DPDK patches and discussions
 help / color / mirror / Atom feed
* [RFC v3] eal: add bitset type
@ 2024-01-31 13:13 Mattias Rönnblom
  2024-01-31 16:02 ` Stephen Hemminger
                   ` (2 more replies)
  0 siblings, 3 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-01-31 13:13 UTC (permalink / raw)
  To: dev; +Cc: hofors, Morten Brørup, Tyler Retzlaff, Mattias Rönnblom

Introduce a set of functions and macros that operate on sets of bits,
kept in arrays of 64-bit elements.

RTE bitset is designed for bitsets which are larger than what fits in
a single machine word (i.e., 64 bits). For very large bitsets, the
<rte_bitmap.h> API may be a more appropriate choice.

RFC v3:
 * Split the bitset from the htimer patchset, where it was originally
   hosted.
 * Rebase to current DPDK main.
 * Add note that rte_bitset_init() need not be called if bitset words
   have already been zeroed.
 * Use REGISTER_FAST_TEST instead of REGISTER_TEST_COMMAND.
 * Use rte_popcount64() instead of compiler builtin.

RFC v2:
 * Replaced <sys/types.h> with <stddef.h> include, to properly get
   size_t typedef.
 * Add <rte_compat.h> to get __rte_experimental in <rte_bitset.h>.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 app/test/meson.build         |   1 +
 app/test/test_bitset.c       | 645 +++++++++++++++++++++++++
 lib/eal/common/meson.build   |   1 +
 lib/eal/common/rte_bitset.c  |  29 ++
 lib/eal/include/meson.build  |   1 +
 lib/eal/include/rte_bitset.h | 884 +++++++++++++++++++++++++++++++++++
 lib/eal/version.map          |   3 +
 7 files changed, 1564 insertions(+)
 create mode 100644 app/test/test_bitset.c
 create mode 100644 lib/eal/common/rte_bitset.c
 create mode 100644 lib/eal/include/rte_bitset.h

diff --git a/app/test/meson.build b/app/test/meson.build
index dcc93f4a43..e218be11d8 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -32,6 +32,7 @@ source_file_deps = {
     'test_bitcount.c': [],
     'test_bitmap.c': [],
     'test_bitops.c': [],
+    'test_bitset.c': [],
     'test_bitratestats.c': ['metrics', 'bitratestats', 'ethdev'] + sample_packet_forward_deps,
     'test_bpf.c': ['bpf', 'net'],
     'test_byteorder.c': [],
diff --git a/app/test/test_bitset.c b/app/test/test_bitset.c
new file mode 100644
index 0000000000..688349b03b
--- /dev/null
+++ b/app/test/test_bitset.c
@@ -0,0 +1,645 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include <rte_random.h>
+
+#include <rte_bitset.h>
+
+#include "test.h"
+
+#define MAGIC UINT64_C(0xdeadbeefdeadbeef)
+
+static void
+rand_buf(void *buf, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		((char *)buf)[i] = (char)rte_rand();
+}
+
+static uint64_t *
+alloc_bitset(size_t size)
+{
+	uint64_t *p;
+
+	p = malloc(RTE_BITSET_SIZE(size) + 2 * sizeof(uint64_t));
+
+	if (p == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	rand_buf(&p[0], RTE_BITSET_SIZE(size));
+
+	p[0] = MAGIC;
+	p[RTE_BITSET_NUM_WORDS(size) + 1] = MAGIC;
+
+	return p + 1;
+}
+
+
+static int
+free_bitset(uint64_t *bitset, size_t size)
+{
+	uint64_t *p;
+
+	p = bitset - 1;
+
+	if (p[0] != MAGIC)
+		return TEST_FAILED;
+
+	if (p[RTE_BITSET_NUM_WORDS(size) + 1] != MAGIC)
+		return TEST_FAILED;
+
+	free(p);
+
+	return TEST_SUCCESS;
+}
+
+static bool
+rand_bool(void)
+{
+	return rte_rand_max(2);
+}
+
+static void
+rand_bool_ary(bool *ary, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++)
+		ary[i] = rand_bool();
+}
+
+static int
+test_test_set_size(size_t size)
+{
+	size_t i;
+	bool reference[size];
+	uint64_t *bitset;
+
+	rand_bool_ary(reference, size);
+
+	bitset = alloc_bitset(size);
+
+	if (bitset == NULL)
+		return TEST_FAILED;
+
+	rte_bitset_init(bitset, size);
+
+	for (i = 0; i < size; i++) {
+		if (reference[i])
+			rte_bitset_set(bitset, i);
+		else
+			rte_bitset_clear(bitset, i);
+	}
+
+	for (i = 0; i < size; i++)
+		if (reference[i] != rte_bitset_test(bitset, i))
+			return TEST_FAILED;
+
+	if (free_bitset(bitset, size) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+#define RAND_ITERATIONS (10000)
+#define RAND_SET_MAX_SIZE (1000)
+
+static int
+test_test_set(void)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_test_set_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static ssize_t
+find(const bool *ary, size_t num_bools, size_t start, size_t len, bool set)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		ssize_t idx = (start + i) % num_bools;
+
+		if (ary[idx] == set)
+			return idx;
+	}
+
+	return -1;
+}
+
+static ssize_t
+find_set(const bool *ary, size_t num_bools, size_t start, size_t len)
+{
+	return find(ary, num_bools, start, len, true);
+}
+
+static ssize_t
+find_clear(const bool *ary, size_t num_bools, size_t start, size_t len)
+{
+	return find(ary, num_bools, start, len, false);
+}
+
+#define FFS_ITERATIONS (100)
+
+static int
+test_find_size(size_t size, bool set)
+{
+	uint64_t *bitset;
+	bool reference[size];
+	size_t i;
+
+	bitset = alloc_bitset(size);
+
+	if (bitset == NULL)
+		return TEST_FAILED;
+
+	rte_bitset_init(bitset, size);
+
+	for (i = 0; i < size; i++) {
+		bool bit = rand_bool();
+		reference[i] = bit;
+
+		if (bit)
+			rte_bitset_set(bitset, i);
+		else /* redundant, still useful for testing */
+			rte_bitset_clear(bitset, i);
+	}
+
+	for (i = 0; i < FFS_ITERATIONS; i++) {
+		size_t start_bit = rte_rand_max(size);
+		size_t len = rte_rand_max(size + 1);
+		bool full_range = len == size && start_bit == 0;
+		bool wraps = start_bit + len > size;
+		ssize_t rc;
+
+		if (set) {
+			if (full_range && rand_bool())
+				rc = rte_bitset_find_first_set(bitset,
+							       size);
+			else if (wraps || rand_bool()) {
+				rc = rte_bitset_find_set_wrap(bitset, size,
+							      start_bit, len);
+
+			} else
+				rc = rte_bitset_find_set(bitset, size,
+							 start_bit, len);
+
+			if (rc != find_set(reference, size, start_bit,
+					   len))
+				return TEST_FAILED;
+		} else {
+			if (full_range && rand_bool())
+				rc = rte_bitset_find_first_clear(bitset,
+								 size);
+			else if (wraps || rand_bool())
+				rc = rte_bitset_find_clear_wrap(bitset,
+								size,
+								start_bit, len);
+			else
+				rc = rte_bitset_find_clear(bitset, size,
+							   start_bit, len);
+
+			if (rc != find_clear(reference, size, start_bit,
+					     len))
+				return TEST_FAILED;
+		}
+
+	}
+
+	if (free_bitset(bitset, size) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_find_set_size(size_t size)
+{
+	return test_find_size(size, true);
+}
+
+static int
+test_find_clear_size(size_t size)
+{
+	return test_find_size(size, false);
+}
+
+static int
+test_find(void)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 2 + rte_rand_max(RAND_SET_MAX_SIZE - 2);
+
+		if (test_find_set_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_find_clear_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+record_match(ssize_t match_idx, size_t size, int *calls)
+{
+	if (match_idx < 0 || (size_t)match_idx >= size)
+		return TEST_FAILED;
+
+	calls[match_idx]++;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_foreach_size(ssize_t size, bool may_wrap, bool set)
+{
+	bool reference[size];
+	int calls[size];
+	uint64_t *bitset;
+	ssize_t i;
+	ssize_t start_bit;
+	ssize_t len;
+	bool full_range;
+	size_t total_calls = 0;
+
+	rand_bool_ary(reference, size);
+
+	bitset = alloc_bitset(size);
+
+	if (bitset == NULL)
+		return TEST_FAILED;
+
+	memset(calls, 0, sizeof(calls));
+
+	start_bit = rte_rand_max(size);
+	len = may_wrap ? rte_rand_max(size + 1) :
+		rte_rand_max(size - start_bit + 1);
+
+	rte_bitset_init(bitset, size);
+
+	/* random data in the unused bits should not matter */
+	rand_buf(bitset, RTE_BITSET_SIZE(size));
+
+	for (i = start_bit; i < start_bit + len; i++) {
+		size_t idx = i % size;
+
+		if (reference[idx])
+			rte_bitset_set(bitset, idx);
+		else
+			rte_bitset_clear(bitset, idx);
+
+		if (rte_bitset_test(bitset, idx) != reference[idx])
+			return TEST_FAILED;
+	}
+
+	full_range = (len == size && start_bit == 0);
+
+	/* XXX: verify iteration order as well */
+	if (set) {
+		if (full_range && rand_bool()) {
+			RTE_BITSET_FOREACH_SET(i, bitset, size) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+			}
+		} else if (may_wrap) {
+			RTE_BITSET_FOREACH_SET_WRAP(i, bitset, size,
+						    start_bit, len) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS) {
+					printf("failed\n");
+					return TEST_FAILED;
+				}
+			}
+		} else {
+			RTE_BITSET_FOREACH_SET_RANGE(i, bitset, size,
+						     start_bit, len) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+			}
+		}
+	} else {
+		if (full_range && rand_bool()) {
+			RTE_BITSET_FOREACH_CLEAR(i, bitset, size)
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+		} else if (may_wrap) {
+			RTE_BITSET_FOREACH_CLEAR_WRAP(i, bitset, size,
+						      start_bit, len) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+			}
+		} else {
+			RTE_BITSET_FOREACH_CLEAR_RANGE(i, bitset, size,
+						       start_bit, len)
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+		}
+	}
+
+	for (i = 0; i < len; i++) {
+		size_t idx = (start_bit + i) % size;
+
+		if (reference[idx] == set && calls[idx] != 1) {
+			printf("bit %zd shouldn't have been found %d "
+			       "times\n", idx, calls[idx]);
+			return TEST_FAILED;
+		}
+
+		if (reference[idx] != set && calls[idx] != 0) {
+			puts("bar");
+			return TEST_FAILED;
+		}
+
+		total_calls += calls[idx];
+	}
+
+	if (full_range) {
+		size_t count;
+
+		count = set ? rte_bitset_count_set(bitset, size) :
+			rte_bitset_count_clear(bitset, size);
+
+		if (count != total_calls)
+			return TEST_FAILED;
+	}
+
+	if (free_bitset(bitset, size) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_foreach(void)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_foreach_size(size, false, true) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_foreach_size(size, false, false) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_foreach_size(size, true, true) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_foreach_size(size, true, false) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_count_size(size_t size)
+{
+	uint64_t *bitset;
+
+	bitset = alloc_bitset(size);
+
+	if (bitset == NULL)
+		return TEST_FAILED;
+
+	rte_bitset_init(bitset, size);
+
+	if (rte_bitset_count_set(bitset, size) != 0)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != size)
+		return TEST_FAILED;
+
+	rte_bitset_set_all(bitset, size);
+
+	if (rte_bitset_count_set(bitset, size) != size)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != 0)
+		return TEST_FAILED;
+
+	rte_bitset_clear_all(bitset, size);
+
+	if (rte_bitset_count_set(bitset, size) != 0)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != size)
+		return TEST_FAILED;
+
+	rte_bitset_set(bitset, rte_rand_max(size));
+
+	if (rte_bitset_count_set(bitset, size) != 1)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != (size - 1))
+		return TEST_FAILED;
+
+	rte_bitset_clear_all(bitset, size);
+	if (rte_bitset_count_set(bitset, size) != 0)
+		return TEST_FAILED;
+	if (rte_bitset_count_clear(bitset, size) != size)
+		return TEST_FAILED;
+
+	rte_bitset_set_all(bitset, size);
+	if (rte_bitset_count_set(bitset, size) != size)
+		return TEST_FAILED;
+	if (rte_bitset_count_clear(bitset, size) != 0)
+		return TEST_FAILED;
+
+	if (free_bitset(bitset, size) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_count(void)
+{
+	size_t i;
+
+	if (test_count_size(128) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(1) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(63) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(64) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(65) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_count_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+#define GEN_DECLARE(size)						\
+	{								\
+		RTE_BITSET_DECLARE(bitset, size);			\
+		size_t idx;						\
+									\
+		idx = rte_rand_max(size);				\
+		rte_bitset_init(bitset, size);				\
+									\
+		rte_bitset_set(bitset, idx);				\
+		if (!rte_bitset_test(bitset, idx))			\
+			return TEST_FAILED;				\
+		if (rte_bitset_count_set(bitset, size) != 1)		\
+			return TEST_FAILED;				\
+		return TEST_SUCCESS;					\
+	}
+
+static int
+test_define(void)
+{
+	GEN_DECLARE(1);
+	GEN_DECLARE(64);
+	GEN_DECLARE(65);
+	GEN_DECLARE(4097);
+}
+
+static int
+test_equal(void)
+{
+	const size_t size = 100;
+	RTE_BITSET_DECLARE(bitset_a, size);
+	RTE_BITSET_DECLARE(bitset_b, size);
+
+	rand_buf(bitset_a, RTE_BITSET_SIZE(size));
+	rand_buf(bitset_b, RTE_BITSET_SIZE(size));
+
+	rte_bitset_init(bitset_a, size);
+	rte_bitset_init(bitset_b, size);
+
+	rte_bitset_set(bitset_a, 9);
+	rte_bitset_set(bitset_b, 9);
+	rte_bitset_set(bitset_a, 90);
+	rte_bitset_set(bitset_b, 90);
+
+	if (!rte_bitset_equal(bitset_a, bitset_b, size))
+		return TEST_FAILED;
+
+	/* set unused bit, which should be ignored */
+	rte_bitset_set(&bitset_a[1], 60);
+
+	if (!rte_bitset_equal(bitset_a, bitset_b, size))
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_copy(void)
+{
+	const size_t size = 100;
+	RTE_BITSET_DECLARE(bitset_a, size);
+	RTE_BITSET_DECLARE(bitset_b, size);
+
+	rand_buf(bitset_a, RTE_BITSET_SIZE(size));
+	rand_buf(bitset_b, RTE_BITSET_SIZE(size));
+
+	if (rte_bitset_equal(bitset_a, bitset_b, size))
+		return TEST_FAILED;
+
+	rte_bitset_copy(bitset_a, bitset_b, size);
+
+	if (!rte_bitset_equal(bitset_a, bitset_b, size))
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_to_str(void)
+{
+	char buf[1024];
+	RTE_BITSET_DECLARE(bitset, 128);
+
+	rte_bitset_init(bitset, 128);
+	rte_bitset_set(bitset, 1);
+
+	if (rte_bitset_to_str(bitset, 2, buf, 3) != 3)
+		return TEST_FAILED;
+	if (strcmp(buf, "10") != 0)
+		return TEST_FAILED;
+
+	rte_bitset_set(bitset, 0);
+
+	if (rte_bitset_to_str(bitset, 1, buf, sizeof(buf)) != 2)
+		return TEST_FAILED;
+	if (strcmp(buf, "1") != 0)
+		return TEST_FAILED;
+
+	rte_bitset_init(bitset, 99);
+	rte_bitset_set(bitset, 98);
+
+	if (rte_bitset_to_str(bitset, 99, buf, sizeof(buf)) != 100)
+		return TEST_FAILED;
+
+	if (buf[0] != '1' || strchr(&buf[1], '1') != NULL)
+		return TEST_FAILED;
+
+	if (rte_bitset_to_str(bitset, 128, buf, 64) != -EINVAL)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_bitset(void)
+{
+	if (test_test_set() != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_find() != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_foreach() != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_count() != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_define() != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_equal() != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_copy() != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_to_str() != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_FAST_TEST(bitset_autotest, true, true, test_bitset);
diff --git a/lib/eal/common/meson.build b/lib/eal/common/meson.build
index 22a626ba6f..c1bbf26654 100644
--- a/lib/eal/common/meson.build
+++ b/lib/eal/common/meson.build
@@ -31,6 +31,7 @@ sources += files(
         'eal_common_uuid.c',
         'malloc_elem.c',
         'malloc_heap.c',
+        'rte_bitset.c',
         'rte_malloc.c',
         'rte_random.c',
         'rte_reciprocal.c',
diff --git a/lib/eal/common/rte_bitset.c b/lib/eal/common/rte_bitset.c
new file mode 100644
index 0000000000..35e55a64db
--- /dev/null
+++ b/lib/eal/common/rte_bitset.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include <errno.h>
+
+#include "rte_bitset.h"
+
+ssize_t
+rte_bitset_to_str(const uint64_t *bitset, size_t num_bits, char *buf,
+		  size_t capacity)
+{
+	size_t i;
+
+	if (capacity < (num_bits + 1))
+		return -EINVAL;
+
+	for (i = 0; i < num_bits; i++) {
+		bool value;
+
+		value = rte_bitset_test(bitset, num_bits - 1 - i);
+
+		buf[i] = value ? '1' : '0';
+	}
+
+	buf[num_bits] = '\0';
+
+	return num_bits + 1;
+}
diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..4b5f120a66 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -5,6 +5,7 @@ includes += include_directories('.')
 
 headers += files(
         'rte_alarm.h',
+        'rte_bitset.h',
         'rte_bitmap.h',
         'rte_bitops.h',
         'rte_branch_prediction.h',
diff --git a/lib/eal/include/rte_bitset.h b/lib/eal/include/rte_bitset.h
new file mode 100644
index 0000000000..24c6ec3703
--- /dev/null
+++ b/lib/eal/include/rte_bitset.h
@@ -0,0 +1,884 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_BITSET_H_
+#define _RTE_BITSET_H_
+
+/**
+ * @file
+ * RTE Bitset
+ *
+ * This file provides functions and macros for querying and
+ * manipulating sets of bits kept in arrays of @c uint64_t-sized
+ * elements.
+ *
+ * The bits in a bitset are numbered from 0 to @c size - 1, with the
+ * lowest index being the least significant bit.
+ *
+ * The bitset array must be properly aligned.
+ *
+ * For optimal performance, the @c size parameter, required by
+ * many of the API's functions, should be a compile-time constant.
+ *
+ * For large bitsets, the rte_bitmap.h API may be more appropriate.
+ *
+ * @warning
+ * All functions modifying a bitset may overwrite any unused bits of
+ * the last word. Such unused bits are ignored by all functions reading
+ * bits.
+ *
+ */
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <rte_bitops.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_compat.h>
+#include <rte_debug.h>
+#include <rte_memcpy.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * The size (in bytes) of each element in the array used to represent
+ * a bitset.
+ */
+#define RTE_BITSET_WORD_SIZE (sizeof(uint64_t))
+
+/**
+ * The size (in bits) of each element in the array used to represent
+ * a bitset.
+ */
+#define RTE_BITSET_WORD_BITS (RTE_BITSET_WORD_SIZE * CHAR_BIT)
+
+/**
+ * Computes the number of words required to store @c size bits.
+ */
+#define RTE_BITSET_NUM_WORDS(size)			       \
+	((size + RTE_BITSET_WORD_BITS - 1) / RTE_BITSET_WORD_BITS)
+
+/**
+ * Computes the amount of memory (in bytes) required to fit a bitset
+ * holding @c size bits.
+ */
+#define RTE_BITSET_SIZE(size)					\
+	((size_t)(RTE_BITSET_NUM_WORDS(size) * RTE_BITSET_WORD_SIZE))
+
+#define __RTE_BITSET_WORD_IDX(bit_num) ((bit_num) / RTE_BITSET_WORD_BITS)
+#define __RTE_BITSET_BIT_OFFSET(bit_num) ((bit_num) % RTE_BITSET_WORD_BITS)
+#define __RTE_BITSET_UNUSED(size)				\
+	((RTE_BITSET_NUM_WORDS(size) * RTE_BITSET_WORD_BITS) \
+	 - (size))
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Declare a bitset.
+ *
+ * Declare (e.g., as a struct field) or define (e.g., as a stack
+ * variable) a bitset of the specified size.
+ *
+ * @param size
+ *   The number of bits the bitset must be able to represent. Must be
+ *   a compile-time constant.
+ * @param name
+ *   The field or variable name of the resulting definition.
+ */
+#define RTE_BITSET_DECLARE(name, size)		\
+	uint64_t name[RTE_BITSET_NUM_WORDS(size)]
+
+/* XXX: should one include flags here and use to avoid a comparison? */
+/* XXX: would this be better off as a function? */
+
+#define __RTE_BITSET_FOREACH_LEFT(var, size, start_bit, len)		\
+	((len) - 1 - ((var) >= (start_bit) ? (var) - (start_bit) :	\
+		  (size) - (start_bit) + (var)))
+
+#define __RTE_BITSET_FOREACH(var, bitset, size, start_bit, len, flags)	\
+	for ((var) = __rte_bitset_find(bitset, size, start_bit, len,	\
+				       flags);				\
+	     (var) != -1;						\
+	     (var) = __RTE_BITSET_FOREACH_LEFT(var, size, start_bit,	\
+					       len) > 0	?		\
+		     __rte_bitset_find(bitset, size,			\
+				       ((var) + 1) % (size),		\
+				       __RTE_BITSET_FOREACH_LEFT(var,	\
+								 size,	\
+								 start_bit, \
+								 len),	\
+				       flags) : -1)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all bits set.
+ *
+ * This macro iterates over all bits set (i.e., all ones) in the
+ * bitset, in the forward direction (i.e., starting with the least
+ * significant '1').
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive
+ *   iteration, this variable will hold the bit index of a set bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+#define RTE_BITSET_FOREACH_SET(var, bitset, size)			\
+	__RTE_BITSET_FOREACH(var, bitset, size, 0, size, 0)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all bits cleared.
+ *
+ * This macro iterates over all bits cleared in the bitset, in the
+ * forward direction (i.e., starting with the lowest-indexed set bit).
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive iteration,
+ *   this variable will hold the bit index of a cleared bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+#define RTE_BITSET_FOREACH_CLEAR(var, bitset, size)			\
+	__RTE_BITSET_FOREACH(var, bitset, size, 0, size,		\
+			     __RTE_BITSET_FIND_FLAG_FIND_CLEAR)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all bits set within a range.
+ *
+ * This macro iterates over all bits set (i.e., all ones) in the
+ * specified range, in the forward direction (i.e., starting with the
+ * least significant '1').
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive iteration,
+ *   this variable will hold the bit index of a set bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The length (in bits) of the range. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ */
+
+#define RTE_BITSET_FOREACH_SET_RANGE(var, bitset, size, start_bit,     \
+				     len)			       \
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len, 0)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all cleared bits within a range.
+ *
+ * This macro iterates over all bits cleared (i.e., all zeroes) in the
+ * specified range, in the forward direction (i.e., starting with the
+ * least significant '0').
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive iteration,
+ *   this variable will hold the bit index of a set bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The length (in bits) of the range. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ */
+
+#define RTE_BITSET_FOREACH_CLEAR_RANGE(var, bitset, size, start_bit,	\
+				       len)				\
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len,		\
+			     __RTE_BITSET_FIND_FLAG_FIND_CLEAR)
+
+#define RTE_BITSET_FOREACH_SET_WRAP(var, bitset, size, start_bit,      \
+				    len)			       \
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len,	       \
+			     __RTE_BITSET_FIND_FLAG_WRAP)
+
+#define RTE_BITSET_FOREACH_CLEAR_WRAP(var, bitset, size, start_bit,	\
+				       len)				\
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len,		\
+			     __RTE_BITSET_FIND_FLAG_WRAP |		\
+			     __RTE_BITSET_FIND_FLAG_FIND_CLEAR)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Initializes a bitset.
+ *
+ * All bits are cleared.
+ *
+ * In case all words in the bitset array are already set to zero by
+ * other means (e.g., at the time of memory allocation), this function
+ * need not be called.
+ *
+ * @param bitset
+ *   A pointer to the array of bitset 64-bit words.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_init(uint64_t *bitset, size_t size)
+{
+	memset(bitset, 0, RTE_BITSET_SIZE(size));
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set a bit in the bitset.
+ *
+ * Bits are numbered from 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be set.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_set(uint64_t *bitset, size_t bit_num)
+{
+	size_t word;
+	size_t offset;
+	uint64_t mask;
+
+	word = __RTE_BITSET_WORD_IDX(bit_num);
+	offset = __RTE_BITSET_BIT_OFFSET(bit_num);
+	mask = UINT64_C(1) << offset;
+
+	bitset[word] |= mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Clear a bit in the bitset.
+ *
+ * Bits are numbered 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be cleared.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_clear(uint64_t *bitset, size_t bit_num)
+{
+	size_t word;
+	size_t offset;
+	uint64_t mask;
+
+	word = __RTE_BITSET_WORD_IDX(bit_num);
+	offset = __RTE_BITSET_BIT_OFFSET(bit_num);
+	mask = ~(UINT64_C(1) << offset);
+
+	bitset[word] &= mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set all bits in the bitset.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_set_all(uint64_t *bitset, size_t size)
+{
+	memset(bitset, 0xFF, RTE_BITSET_SIZE(size));
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Clear all bits in the bitset.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_clear_all(uint64_t *bitset, size_t size)
+{
+	rte_bitset_init(bitset, size);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Count all set bits.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the number of '1' bits in the bitset.
+ */
+
+__rte_experimental
+static inline size_t
+rte_bitset_count_set(const uint64_t *bitset, size_t size)
+{
+	size_t i;
+	size_t total = 0;
+	uint64_t unused_mask;
+
+	/*
+	 * Unused bits in a rte_bitset are always '0', and thus are
+	 * not included in this count.
+	 */
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size) - 1; i++)
+		total += rte_popcount64(bitset[i]);
+
+	unused_mask = UINT64_MAX >> __RTE_BITSET_UNUSED(size);
+	total += rte_popcount64(bitset[i] & unused_mask);
+
+	return total;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Count all cleared bits.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the number of '0' bits in the bitset.
+ */
+
+__rte_experimental
+static inline size_t
+rte_bitset_count_clear(const uint64_t *bitset, size_t size)
+{
+	return size - rte_bitset_count_set(bitset, size);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test if a bit is set.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   Index of the bit to test. Index 0 is the least significant bit.
+ * @return
+ *   Returns true if the bit is '1', and false if the bit is '0'.
+ */
+
+__rte_experimental
+static inline bool
+rte_bitset_test(const uint64_t *bitset, size_t bit_num)
+{
+	size_t word;
+	size_t offset;
+
+	word = __RTE_BITSET_WORD_IDX(bit_num);
+	offset = __RTE_BITSET_BIT_OFFSET(bit_num);
+
+	return (bitset[word] >> offset) & 1;
+}
+
+#define __RTE_BITSET_FIND_FLAG_FIND_CLEAR (1U << 0)
+#define __RTE_BITSET_FIND_FLAG_WRAP (1U << 1)
+
+__rte_experimental
+static inline ssize_t
+__rte_bitset_find_nowrap(const uint64_t *bitset, size_t __rte_unused size,
+			 size_t start_bit, size_t len, bool find_clear)
+{
+	size_t word_idx;
+	size_t offset;
+	size_t end_bit = start_bit + len;
+
+	RTE_ASSERT(end_bit <= size);
+
+	if (unlikely(len == 0))
+		return -1;
+
+	word_idx = __RTE_BITSET_WORD_IDX(start_bit);
+	offset = __RTE_BITSET_BIT_OFFSET(start_bit);
+
+	while (word_idx <= __RTE_BITSET_WORD_IDX(end_bit - 1)) {
+		uint64_t word;
+		int word_ffs;
+
+		word = bitset[word_idx];
+		if (find_clear)
+			word = ~word;
+
+		word >>= offset;
+
+		word_ffs = __builtin_ffsll(word);
+
+		if (word_ffs != 0) {
+			ssize_t ffs = start_bit + word_ffs - 1;
+
+			/*
+			 * Check if set bit were among the last,
+			 * unused bits, in the last word.
+			 */
+			if (unlikely(ffs >= (ssize_t)end_bit))
+				return -1;
+
+			return ffs;
+		}
+
+		start_bit += (RTE_BITSET_WORD_BITS - offset);
+		word_idx++;
+		offset = 0;
+	}
+
+	return -1;
+
+}
+
+__rte_experimental
+static inline ssize_t
+__rte_bitset_find(const uint64_t *bitset, size_t size, size_t start_bit,
+		  size_t len, unsigned int flags)
+{
+	bool find_clear = flags & __RTE_BITSET_FIND_FLAG_FIND_CLEAR;
+	bool may_wrap = flags & __RTE_BITSET_FIND_FLAG_WRAP;
+	bool does_wrap = (start_bit + len) > size;
+	ssize_t rc;
+
+	RTE_ASSERT(len <= size);
+	if (!may_wrap)
+		RTE_ASSERT(!does_wrap);
+
+	if (may_wrap && does_wrap) {
+		size_t len0 = size - start_bit;
+		size_t len1 = len - len0;
+
+		rc = __rte_bitset_find_nowrap(bitset, size, start_bit, len0,
+					      find_clear);
+		if (rc < 0)
+			rc =  __rte_bitset_find_nowrap(bitset, size,
+						       0, len1, find_clear);
+	} else
+		rc = __rte_bitset_find_nowrap(bitset, size, start_bit,
+					      len, find_clear);
+
+	return rc;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first bit set.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), and returns the index of the first '1'.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the index of the least significant '1', or -1 if all
+ *   bits are '0'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_first_set(const uint64_t *bitset, size_t size)
+{
+	return __rte_bitset_find(bitset, size, 0, size, 0);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first bit set at offset.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset, and returns the index of the first '1' encountered.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '1', or -1 if all
+ *   bits are '0'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_set(const uint64_t *bitset, size_t size,
+		    size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len, 0);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first bit set at offset, with wrap-around.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset. If no '1' is encountered before the end of the bitset, the search
+ * will continue at index 0.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '1', or -1 if all
+ *   bits are '0'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_set_wrap(const uint64_t *bitset, size_t size,
+			 size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len,
+				 __RTE_BITSET_FIND_FLAG_WRAP);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first cleared bit.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), and returns the index of the first '0'.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the index of the least significant '0', or -1 if all
+ *   bits are '1'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_first_clear(const uint64_t *bitset, size_t size)
+{
+	return __rte_bitset_find(bitset, size, 0, size,
+				 __RTE_BITSET_FIND_FLAG_FIND_CLEAR);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first cleared bit at offset.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset, and returns the index of the first '0' encountered.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '0', or -1 if all
+ *   bits are '1'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_clear(const uint64_t *bitset, size_t size,
+			   size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len,
+				 __RTE_BITSET_FIND_FLAG_FIND_CLEAR);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first cleared bit at offset, with wrap-around.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset. If no '0' is encountered before the end of the bitset, the
+ * search will continue at index 0.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '0', or -1 if all
+ *   bits are '1'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_clear_wrap(const uint64_t *bitset, size_t size,
+			   size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len,
+				 __RTE_BITSET_FIND_FLAG_FIND_CLEAR |
+				 __RTE_BITSET_FIND_FLAG_WRAP);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Copy bitset.
+ *
+ * Copy the bits of the @c src_bitset to the @c dst_bitset.
+ *
+ * The bitsets may not overlap and must be of equal size.
+ *
+ * @param dst_bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param src_bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_copy(uint64_t *__rte_restrict dst_bitset,
+		const uint64_t *__rte_restrict src_bitset,
+		size_t size)
+{
+	rte_memcpy(dst_bitset, src_bitset, RTE_BITSET_SIZE(size));
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitwise or two bitsets.
+ *
+ * Perform a bitwise OR operation on all bits in the two equal-size
+ * bitsets @c dst_bitset and @c src_bitset, and store the results in
+ * @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_or(uint64_t *dst_bitset, const uint64_t *src_bitset, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] |= src_bitset[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitwise and two bitsets.
+ *
+ * Perform a bitwise AND operation on all bits in the two equal-size
+ * bitsets @c dst_bitset and @c src_bitset, and store the results in
+ * @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_and(uint64_t *dst_bitset, const uint64_t *src_bitset, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] &= src_bitset[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitwise xor two bitsets.
+ *
+ * Perform a bitwise XOR operation on all bits in the two equal-size
+ * bitsets @c dst_bitset and @c src_bitset, and store the results in
+ * @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_xor(uint64_t *__rte_restrict dst_bitset,
+	       const uint64_t *__rte_restrict src_bitset, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] ^= src_bitset[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Compare two bitsets.
+ *
+ * Compare two bitsets for equality.
+ *
+ * @param bitset_a
+ *   A pointer to the destination bitset.
+ * @param bitset_b
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline bool
+rte_bitset_equal(const uint64_t *bitset_a, const uint64_t *bitset_b,
+		 size_t size)
+{
+	size_t i;
+	uint64_t last_a, last_b;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size) - 1; i++)
+		if (bitset_a[i] != bitset_b[i])
+			return false;
+
+	last_a = bitset_a[i] << __RTE_BITSET_UNUSED(size);
+	last_b = bitset_b[i] << __RTE_BITSET_UNUSED(size);
+
+	return last_a == last_b;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Converts a bitset to a string.
+ *
+ * This function prints a string representation of the bitstring to
+ * the supplied buffer.
+ *
+ * Each bit is represented either by '0' or '1' in the output. The
+ * resulting string is NUL terminated.
+ *
+ * @param bitset
+ *   A pointer to the array of bitset 64-bit words.
+ * @param size
+ *   The number of bits the bitset represent.
+ * @param buf
+ *   A buffer to hold the output.
+ * @param capacity
+ *   The size of the buffer. Must be @c size + 1 or larger.
+ * @return
+ *   Returns the number of bytes written (i.e., @c size + 1), or -EINVAL
+ *   in case the buffer capacity was too small.
+ */
+
+__rte_experimental
+ssize_t
+rte_bitset_to_str(const uint64_t *bitset, size_t size, char *buf,
+		  size_t capacity);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BITSET_H_ */
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 5e0cd47c82..639ccfe4b0 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -393,6 +393,9 @@ EXPERIMENTAL {
 	# added in 23.07
 	rte_memzone_max_get;
 	rte_memzone_max_set;
+
+	# added in 24.03
+	rte_bitset_to_str;
 };
 
 INTERNAL {
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [RFC v3] eal: add bitset type
  2024-01-31 13:13 [RFC v3] eal: add bitset type Mattias Rönnblom
@ 2024-01-31 16:02 ` Stephen Hemminger
  2024-01-31 16:28   ` Mattias Rönnblom
  2024-01-31 16:06 ` Stephen Hemminger
  2024-02-16 10:23 ` [RFC v4 1/4] " Mattias Rönnblom
  2 siblings, 1 reply; 63+ messages in thread
From: Stephen Hemminger @ 2024-01-31 16:02 UTC (permalink / raw)
  To: Mattias Rönnblom; +Cc: dev, hofors, Morten Brørup, Tyler Retzlaff

On Wed, 31 Jan 2024 14:13:01 +0100
Mattias Rönnblom <mattias.ronnblom@ericsson.com> wrote:

> Introduce a set of functions and macros that operate on sets of bits,
> kept in arrays of 64-bit elements.
> 
> RTE bitset is designed for bitsets which are larger than what fits in
> a single machine word (i.e., 64 bits). For very large bitsets, the
> <rte_bitmap.h> API may be a more appropriate choice.
> 
> RFC v3:
>  * Split the bitset from the htimer patchset, where it was originally
>    hosted.
>  * Rebase to current DPDK main.
>  * Add note that rte_bitset_init() need not be called if bitset words
>    have already been zeroed.
>  * Use REGISTER_FAST_TEST instead of REGISTER_TEST_COMMAND.
>  * Use rte_popcount64() instead of compiler builtin.
> 
> RFC v2:
>  * Replaced <sys/types.h> with <stddef.h> include, to properly get
>    size_t typedef.
>  * Add <rte_compat.h> to get __rte_experimental in <rte_bitset.h>.
> 
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> ---
>  app/test/meson.build         |   1 +
>  app/test/test_bitset.c       | 645 +++++++++++++++++++++++++
>  lib/eal/common/meson.build   |   1 +
>  lib/eal/common/rte_bitset.c  |  29 ++
>  lib/eal/include/meson.build  |   1 +
>  lib/eal/include/rte_bitset.h | 884 +++++++++++++++++++++++++++++++++++
>  lib/eal/version.map          |   3 +
>  7 files changed, 1564 insertions(+)
>  create mode 100644 app/test/test_bitset.c
>  create mode 100644 lib/eal/common/rte_bitset.c
>  create mode 100644 lib/eal/include/rte_bitset.h
> 
> diff --git a/app/test/meson.build b/app/test/meson.build
> index dcc93f4a43..e218be11d8 100644
> --- a/app/test/meson.build
> +++ b/app/test/meson.build
> @@ -32,6 +32,7 @@ source_file_deps = {
>      'test_bitcount.c': [],
>      'test_bitmap.c': [],
>      'test_bitops.c': [],
> +    'test_bitset.c': [],
>      'test_bitratestats.c': ['metrics', 'bitratestats', 'ethdev'] + sample_packet_forward_deps,
>      'test_bpf.c': ['bpf', 'net'],
>      'test_byteorder.c': [],
> diff --git a/app/test/test_bitset.c b/app/test/test_bitset.c
> new file mode 100644
> index 0000000000..688349b03b
> --- /dev/null
> +++ b/app/test/test_bitset.c
> @@ -0,0 +1,645 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2023 Ericsson AB
> + */
> +
> +#include <stdlib.h>
> +#include <inttypes.h>
> +
> +#include <rte_random.h>
> +
> +#include <rte_bitset.h>
> +
> +#include "test.h"
> +
> +#define MAGIC UINT64_C(0xdeadbeefdeadbeef)
> +
> +static void
> +rand_buf(void *buf, size_t n)
> +{
> +	size_t i;
> +
> +	for (i = 0; i < n; i++)
> +		((char *)buf)[i] = (char)rte_rand();
Cast to char unneeded, and you don't want signed character here.
Use uint8_t

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [RFC v3] eal: add bitset type
  2024-01-31 13:13 [RFC v3] eal: add bitset type Mattias Rönnblom
  2024-01-31 16:02 ` Stephen Hemminger
@ 2024-01-31 16:06 ` Stephen Hemminger
  2024-01-31 18:45   ` Mattias Rönnblom
  2024-02-16 10:23 ` [RFC v4 1/4] " Mattias Rönnblom
  2 siblings, 1 reply; 63+ messages in thread
From: Stephen Hemminger @ 2024-01-31 16:06 UTC (permalink / raw)
  To: Mattias Rönnblom; +Cc: dev, hofors, Morten Brørup, Tyler Retzlaff

On Wed, 31 Jan 2024 14:13:01 +0100
Mattias Rönnblom <mattias.ronnblom@ericsson.com> wrote:

> +/**
> + * @file
> + * RTE Bitset
> + *
> + * This file provides functions and macros for querying and
> + * manipulating sets of bits kept in arrays of @c uint64_t-sized
> + * elements.
> + *
> + * The bits in a bitset are numbered from 0 to @c size - 1, with the
> + * lowest index being the least significant bit.
> + *
> + * The bitset array must be properly aligned.
> + *
> + * For optimal performance, the @c size parameter, required by
> + * many of the API's functions, should be a compile-time constant.
> + *
> + * For large bitsets, the rte_bitmap.h API may be more appropriate.
> + *
> + * @warning
> + * All functions modifying a bitset may overwrite any unused bits of
> + * the last word. Such unused bits are ignored by all functions reading
> + * bits.
> + *
> + */

FYI - the linux kernel has a similar but more complete set of operations.
It might be more efficient to use unsigned long rather than requiring
the elements to be uint64_t. Thinking of the few 32 bit platforms.

Also, what if any thread safety guarantees? or atomic.

From kernel bitmap.h

/**
 * DOC: bitmap overview
 *
 * The available bitmap operations and their rough meaning in the
 * case that the bitmap is a single unsigned long are thus:
 *
 * The generated code is more efficient when nbits is known at
 * compile-time and at most BITS_PER_LONG.
 *
 * ::
 *
 *  bitmap_zero(dst, nbits)                     *dst = 0UL
 *  bitmap_fill(dst, nbits)                     *dst = ~0UL
 *  bitmap_copy(dst, src, nbits)                *dst = *src
 *  bitmap_and(dst, src1, src2, nbits)          *dst = *src1 & *src2
 *  bitmap_or(dst, src1, src2, nbits)           *dst = *src1 | *src2
 *  bitmap_xor(dst, src1, src2, nbits)          *dst = *src1 ^ *src2
 *  bitmap_andnot(dst, src1, src2, nbits)       *dst = *src1 & ~(*src2)
 *  bitmap_complement(dst, src, nbits)          *dst = ~(*src)
 *  bitmap_equal(src1, src2, nbits)             Are *src1 and *src2 equal?
 *  bitmap_intersects(src1, src2, nbits)        Do *src1 and *src2 overlap?
 *  bitmap_subset(src1, src2, nbits)            Is *src1 a subset of *src2?
 *  bitmap_empty(src, nbits)                    Are all bits zero in *src?
 *  bitmap_full(src, nbits)                     Are all bits set in *src?
 *  bitmap_weight(src, nbits)                   Hamming Weight: number set bits
 *  bitmap_weight_and(src1, src2, nbits)        Hamming Weight of and'ed bitmap
 *  bitmap_set(dst, pos, nbits)                 Set specified bit area
 *  bitmap_clear(dst, pos, nbits)               Clear specified bit area
 *  bitmap_find_next_zero_area(buf, len, pos, n, mask)  Find bit free area
 *  bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off)  as above
 *  bitmap_shift_right(dst, src, n, nbits)      *dst = *src >> n
 *  bitmap_shift_left(dst, src, n, nbits)       *dst = *src << n
 *  bitmap_cut(dst, src, first, n, nbits)       Cut n bits from first, copy rest
 *  bitmap_replace(dst, old, new, mask, nbits)  *dst = (*old & ~(*mask)) | (*new & *mask)
 *  bitmap_remap(dst, src, old, new, nbits)     *dst = map(old, new)(src)
 *  bitmap_bitremap(oldbit, old, new, nbits)    newbit = map(old, new)(oldbit)
 *  bitmap_onto(dst, orig, relmap, nbits)       *dst = orig relative to relmap
 *  bitmap_fold(dst, orig, sz, nbits)           dst bits = orig bits mod sz
 *  bitmap_parse(buf, buflen, dst, nbits)       Parse bitmap dst from kernel buf
 *  bitmap_parse_user(ubuf, ulen, dst, nbits)   Parse bitmap dst from user buf
 *  bitmap_parselist(buf, dst, nbits)           Parse bitmap dst from kernel buf
 *  bitmap_parselist_user(buf, dst, nbits)      Parse bitmap dst from user buf
 *  bitmap_find_free_region(bitmap, bits, order)  Find and allocate bit region
 *  bitmap_release_region(bitmap, pos, order)   Free specified bit region
 *  bitmap_allocate_region(bitmap, pos, order)  Allocate specified bit region
 *  bitmap_from_arr32(dst, buf, nbits)          Copy nbits from u32[] buf to dst
 *  bitmap_from_arr64(dst, buf, nbits)          Copy nbits from u64[] buf to dst
 *  bitmap_to_arr32(buf, src, nbits)            Copy nbits from buf to u32[] dst
 *  bitmap_to_arr64(buf, src, nbits)            Copy nbits from buf to u64[] dst
 *  bitmap_get_value8(map, start)               Get 8bit value from map at start
 *  bitmap_set_value8(map, value, start)        Set 8bit value to map at start
 *
 * Note, bitmap_zero() and bitmap_fill() operate over the region of
 * unsigned longs, that is, bits behind bitmap till the unsigned long
 * boundary will be zeroed or filled as well. Consider to use
 * bitmap_clear() or bitmap_set() to make explicit zeroing or filling
 * respectively.
 */


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [RFC v3] eal: add bitset type
  2024-01-31 16:02 ` Stephen Hemminger
@ 2024-01-31 16:28   ` Mattias Rönnblom
  0 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-01-31 16:28 UTC (permalink / raw)
  To: Stephen Hemminger, Mattias Rönnblom
  Cc: dev, Morten Brørup, Tyler Retzlaff

On 2024-01-31 17:02, Stephen Hemminger wrote:
> On Wed, 31 Jan 2024 14:13:01 +0100
> Mattias Rönnblom <mattias.ronnblom@ericsson.com> wrote:
> 
>> Introduce a set of functions and macros that operate on sets of bits,
>> kept in arrays of 64-bit elements.
>>
>> RTE bitset is designed for bitsets which are larger than what fits in
>> a single machine word (i.e., 64 bits). For very large bitsets, the
>> <rte_bitmap.h> API may be a more appropriate choice.
>>
>> RFC v3:
>>   * Split the bitset from the htimer patchset, where it was originally
>>     hosted.
>>   * Rebase to current DPDK main.
>>   * Add note that rte_bitset_init() need not be called if bitset words
>>     have already been zeroed.
>>   * Use REGISTER_FAST_TEST instead of REGISTER_TEST_COMMAND.
>>   * Use rte_popcount64() instead of compiler builtin.
>>
>> RFC v2:
>>   * Replaced <sys/types.h> with <stddef.h> include, to properly get
>>     size_t typedef.
>>   * Add <rte_compat.h> to get __rte_experimental in <rte_bitset.h>.
>>
>> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>> ---
>>   app/test/meson.build         |   1 +
>>   app/test/test_bitset.c       | 645 +++++++++++++++++++++++++
>>   lib/eal/common/meson.build   |   1 +
>>   lib/eal/common/rte_bitset.c  |  29 ++
>>   lib/eal/include/meson.build  |   1 +
>>   lib/eal/include/rte_bitset.h | 884 +++++++++++++++++++++++++++++++++++
>>   lib/eal/version.map          |   3 +
>>   7 files changed, 1564 insertions(+)
>>   create mode 100644 app/test/test_bitset.c
>>   create mode 100644 lib/eal/common/rte_bitset.c
>>   create mode 100644 lib/eal/include/rte_bitset.h
>>
>> diff --git a/app/test/meson.build b/app/test/meson.build
>> index dcc93f4a43..e218be11d8 100644
>> --- a/app/test/meson.build
>> +++ b/app/test/meson.build
>> @@ -32,6 +32,7 @@ source_file_deps = {
>>       'test_bitcount.c': [],
>>       'test_bitmap.c': [],
>>       'test_bitops.c': [],
>> +    'test_bitset.c': [],
>>       'test_bitratestats.c': ['metrics', 'bitratestats', 'ethdev'] + sample_packet_forward_deps,
>>       'test_bpf.c': ['bpf', 'net'],
>>       'test_byteorder.c': [],
>> diff --git a/app/test/test_bitset.c b/app/test/test_bitset.c
>> new file mode 100644
>> index 0000000000..688349b03b
>> --- /dev/null
>> +++ b/app/test/test_bitset.c
>> @@ -0,0 +1,645 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2023 Ericsson AB
>> + */
>> +
>> +#include <stdlib.h>
>> +#include <inttypes.h>
>> +
>> +#include <rte_random.h>
>> +
>> +#include <rte_bitset.h>
>> +
>> +#include "test.h"
>> +
>> +#define MAGIC UINT64_C(0xdeadbeefdeadbeef)
>> +
>> +static void
>> +rand_buf(void *buf, size_t n)
>> +{
>> +	size_t i;
>> +
>> +	for (i = 0; i < n; i++)
>> +		((char *)buf)[i] = (char)rte_rand();
> Cast to char unneeded, and you don't want signed character here.
> Use uint8_t

Going through a char pointer is useful in that it never aliases some 
other type. I'll change it to unsigned char.

Thanks.

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [RFC v3] eal: add bitset type
  2024-01-31 16:06 ` Stephen Hemminger
@ 2024-01-31 18:45   ` Mattias Rönnblom
  2024-02-01  8:04     ` Morten Brørup
  0 siblings, 1 reply; 63+ messages in thread
From: Mattias Rönnblom @ 2024-01-31 18:45 UTC (permalink / raw)
  To: Stephen Hemminger, Mattias Rönnblom
  Cc: dev, Morten Brørup, Tyler Retzlaff

On 2024-01-31 17:06, Stephen Hemminger wrote:
> On Wed, 31 Jan 2024 14:13:01 +0100
> Mattias Rönnblom <mattias.ronnblom@ericsson.com> wrote:
> 
>> +/**
>> + * @file
>> + * RTE Bitset
>> + *
>> + * This file provides functions and macros for querying and
>> + * manipulating sets of bits kept in arrays of @c uint64_t-sized
>> + * elements.
>> + *
>> + * The bits in a bitset are numbered from 0 to @c size - 1, with the
>> + * lowest index being the least significant bit.
>> + *
>> + * The bitset array must be properly aligned.
>> + *
>> + * For optimal performance, the @c size parameter, required by
>> + * many of the API's functions, should be a compile-time constant.
>> + *
>> + * For large bitsets, the rte_bitmap.h API may be more appropriate.
>> + *
>> + * @warning
>> + * All functions modifying a bitset may overwrite any unused bits of
>> + * the last word. Such unused bits are ignored by all functions reading
>> + * bits.
>> + *
>> + */
> 
> FYI - the linux kernel has a similar but more complete set of operations.
> It might be more efficient to use unsigned long rather than requiring
> the elements to be uint64_t. Thinking of the few 32 bit platforms.
> 

Keeping it 64-bit avoids a popcount-related #ifdef. DPDK doesn't have an 
equivalent to __builtin_popcountl().

How much do we need to care about 32-bit ISA performance?

I'll go through the below API and some other APIs to see if there's 
something obvious missing.

When I originally wrote this code there were a few potential features 
where I wasn't sure to what extent they were useful. One example was the 
shift operation. Any input is appreciated.

> Also, what if any thread safety guarantees? or atomic.
> 

Currently, it's all MT unsafe.

An atomic set and get/test would make sense, and maybe other operations 
would as well.

Bringing in atomicity into the design makes it much less obvious:

Would the atomic operations imply some memory ordering, or be "relaxed". 
I would lean toward relaxed, but then shouldn't bit-level atomics be 
consistent with the core DPDK atomics API? With that in mind, memory 
ordering should be user-configurable.

If the code needs to be pure C11 atomics-wise, the words that makes up 
the bitset must be _Atomic uint64_t. Then you need to be careful or end 
up with "lock"-prefixed instructions if you manipulate the bitset words. 
Just a pure words[N] = 0; gives you a mov+mfence on x86, for example, 
plus all the fun memory_order_seq_cst in terms of preventing 
compiler-level optimizations. So you definitely can't have the bitset 
always using _Atomic uint64_t, since would risk non-shared use cases. 
You could have a variant I guess. Just duplicate the whole thing, or 
something with macros.

With GCC C11 builtins, you can both have the atomic cake and eat it, in 
that you both access the data non-atomically/normally, and in an atomic 
manner.

>  From kernel bitmap.h
> 
> /**
>   * DOC: bitmap overview
>   *
>   * The available bitmap operations and their rough meaning in the
>   * case that the bitmap is a single unsigned long are thus:
>   *
>   * The generated code is more efficient when nbits is known at
>   * compile-time and at most BITS_PER_LONG.
>   *
>   * ::
>   *
>   *  bitmap_zero(dst, nbits)                     *dst = 0UL
>   *  bitmap_fill(dst, nbits)                     *dst = ~0UL
>   *  bitmap_copy(dst, src, nbits)                *dst = *src
>   *  bitmap_and(dst, src1, src2, nbits)          *dst = *src1 & *src2
>   *  bitmap_or(dst, src1, src2, nbits)           *dst = *src1 | *src2
>   *  bitmap_xor(dst, src1, src2, nbits)          *dst = *src1 ^ *src2
>   *  bitmap_andnot(dst, src1, src2, nbits)       *dst = *src1 & ~(*src2)
>   *  bitmap_complement(dst, src, nbits)          *dst = ~(*src)
>   *  bitmap_equal(src1, src2, nbits)             Are *src1 and *src2 equal?
>   *  bitmap_intersects(src1, src2, nbits)        Do *src1 and *src2 overlap?
>   *  bitmap_subset(src1, src2, nbits)            Is *src1 a subset of *src2?
>   *  bitmap_empty(src, nbits)                    Are all bits zero in *src?
>   *  bitmap_full(src, nbits)                     Are all bits set in *src?
>   *  bitmap_weight(src, nbits)                   Hamming Weight: number set bits
>   *  bitmap_weight_and(src1, src2, nbits)        Hamming Weight of and'ed bitmap
>   *  bitmap_set(dst, pos, nbits)                 Set specified bit area
>   *  bitmap_clear(dst, pos, nbits)               Clear specified bit area
>   *  bitmap_find_next_zero_area(buf, len, pos, n, mask)  Find bit free area
>   *  bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off)  as above
>   *  bitmap_shift_right(dst, src, n, nbits)      *dst = *src >> n
>   *  bitmap_shift_left(dst, src, n, nbits)       *dst = *src << n
>   *  bitmap_cut(dst, src, first, n, nbits)       Cut n bits from first, copy rest
>   *  bitmap_replace(dst, old, new, mask, nbits)  *dst = (*old & ~(*mask)) | (*new & *mask)
>   *  bitmap_remap(dst, src, old, new, nbits)     *dst = map(old, new)(src)
>   *  bitmap_bitremap(oldbit, old, new, nbits)    newbit = map(old, new)(oldbit)
>   *  bitmap_onto(dst, orig, relmap, nbits)       *dst = orig relative to relmap
>   *  bitmap_fold(dst, orig, sz, nbits)           dst bits = orig bits mod sz
>   *  bitmap_parse(buf, buflen, dst, nbits)       Parse bitmap dst from kernel buf
>   *  bitmap_parse_user(ubuf, ulen, dst, nbits)   Parse bitmap dst from user buf
>   *  bitmap_parselist(buf, dst, nbits)           Parse bitmap dst from kernel buf
>   *  bitmap_parselist_user(buf, dst, nbits)      Parse bitmap dst from user buf
>   *  bitmap_find_free_region(bitmap, bits, order)  Find and allocate bit region
>   *  bitmap_release_region(bitmap, pos, order)   Free specified bit region
>   *  bitmap_allocate_region(bitmap, pos, order)  Allocate specified bit region
>   *  bitmap_from_arr32(dst, buf, nbits)          Copy nbits from u32[] buf to dst
>   *  bitmap_from_arr64(dst, buf, nbits)          Copy nbits from u64[] buf to dst
>   *  bitmap_to_arr32(buf, src, nbits)            Copy nbits from buf to u32[] dst
>   *  bitmap_to_arr64(buf, src, nbits)            Copy nbits from buf to u64[] dst
>   *  bitmap_get_value8(map, start)               Get 8bit value from map at start
>   *  bitmap_set_value8(map, value, start)        Set 8bit value to map at start
>   *
>   * Note, bitmap_zero() and bitmap_fill() operate over the region of
>   * unsigned longs, that is, bits behind bitmap till the unsigned long
>   * boundary will be zeroed or filled as well. Consider to use
>   * bitmap_clear() or bitmap_set() to make explicit zeroing or filling
>   * respectively.
>   */
> 

^ permalink raw reply	[flat|nested] 63+ messages in thread

* RE: [RFC v3] eal: add bitset type
  2024-01-31 18:45   ` Mattias Rönnblom
@ 2024-02-01  8:04     ` Morten Brørup
  2024-02-02 10:19       ` Mattias Rönnblom
  0 siblings, 1 reply; 63+ messages in thread
From: Morten Brørup @ 2024-02-01  8:04 UTC (permalink / raw)
  To: Mattias Rönnblom, Stephen Hemminger, Mattias Rönnblom
  Cc: dev, Tyler Retzlaff

> From: Mattias Rönnblom [mailto:hofors@lysator.liu.se]
> Sent: Wednesday, 31 January 2024 19.46
> 
> On 2024-01-31 17:06, Stephen Hemminger wrote:
> > On Wed, 31 Jan 2024 14:13:01 +0100
> > Mattias Rönnblom <mattias.ronnblom@ericsson.com> wrote:

[...]

> > FYI - the linux kernel has a similar but more complete set of
> operations.
> > It might be more efficient to use unsigned long rather than requiring
> > the elements to be uint64_t. Thinking of the few 32 bit platforms.
> >
> 
> Keeping it 64-bit avoids a popcount-related #ifdef. DPDK doesn't have
> an
> equivalent to __builtin_popcountl().
> 
> How much do we need to care about 32-bit ISA performance?

At the 2023 DPDK Summit I talked to someone at a very well known network equipment vendor using 32 bit CPUs in some of their products; some sort of CPE, IIRC. 32 bit CPUs are still out there, and 32-bit CPU support has not been deprecated in DPDK.

For the bitset parameter to functions, you could either use "unsigned long*" (as suggested by Stephen), or "void*" (followed by type casting inside the functions).

If only using this library for the command line argument parser and similar, performance is irrelevant. If we foresee using it in the fast path, e.g. with the htimer library, we shouldn't tie its API tightly to 64 bit.

> 
> I'll go through the below API and some other APIs to see if there's
> something obvious missing.
> 
> When I originally wrote this code there were a few potential features
> where I wasn't sure to what extent they were useful. One example was
> the
> shift operation. Any input is appreciated.

Start off with what you already have. If we need more operations, they can always be added later.

> 
> > Also, what if any thread safety guarantees? or atomic.
> >
> 
> Currently, it's all MT unsafe.
> 
> An atomic set and get/test would make sense, and maybe other operations
> would as well.
> 
> Bringing in atomicity into the design makes it much less obvious:
> 
> Would the atomic operations imply some memory ordering, or be
> "relaxed".
> I would lean toward relaxed, but then shouldn't bit-level atomics be
> consistent with the core DPDK atomics API? With that in mind, memory
> ordering should be user-configurable.
> 
> If the code needs to be pure C11 atomics-wise, the words that makes up
> the bitset must be _Atomic uint64_t. Then you need to be careful or end
> up with "lock"-prefixed instructions if you manipulate the bitset
> words.
> Just a pure words[N] = 0; gives you a mov+mfence on x86, for example,
> plus all the fun memory_order_seq_cst in terms of preventing
> compiler-level optimizations. So you definitely can't have the bitset
> always using _Atomic uint64_t, since would risk non-shared use cases.
> You could have a variant I guess. Just duplicate the whole thing, or
> something with macros.

It seems like MT unsafe suffices for the near term use cases.

We can add an atomic variant of the library later, if the need should arise.

> 
> With GCC C11 builtins, you can both have the atomic cake and eat it, in
> that you both access the data non-atomically/normally, and in an atomic
> manner.

Yep. And we care quite a lot about performance, so we are likely to keep using those until the compilers offer similar performance for C11 standard atomics.


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [RFC v3] eal: add bitset type
  2024-02-01  8:04     ` Morten Brørup
@ 2024-02-02 10:19       ` Mattias Rönnblom
  2024-02-02 12:42         ` Morten Brørup
  0 siblings, 1 reply; 63+ messages in thread
From: Mattias Rönnblom @ 2024-02-02 10:19 UTC (permalink / raw)
  To: Morten Brørup, Stephen Hemminger, Mattias Rönnblom
  Cc: dev, Tyler Retzlaff

On 2024-02-01 09:04, Morten Brørup wrote:
>> From: Mattias Rönnblom [mailto:hofors@lysator.liu.se]
>> Sent: Wednesday, 31 January 2024 19.46
>>
>> On 2024-01-31 17:06, Stephen Hemminger wrote:
>>> On Wed, 31 Jan 2024 14:13:01 +0100
>>> Mattias Rönnblom <mattias.ronnblom@ericsson.com> wrote:
> 
> [...]
> 
>>> FYI - the linux kernel has a similar but more complete set of
>> operations.
>>> It might be more efficient to use unsigned long rather than requiring
>>> the elements to be uint64_t. Thinking of the few 32 bit platforms.
>>>
>>
>> Keeping it 64-bit avoids a popcount-related #ifdef. DPDK doesn't have
>> an
>> equivalent to __builtin_popcountl().
>>
>> How much do we need to care about 32-bit ISA performance?
> 
> At the 2023 DPDK Summit I talked to someone at a very well known network equipment vendor using 32 bit CPUs in some of their products; some sort of CPE, IIRC. 32 bit CPUs are still out there, and 32-bit CPU support has not been deprecated in DPDK.
> 
> For the bitset parameter to functions, you could either use "unsigned long*" (as suggested by Stephen), or "void*" (followed by type casting inside the functions).
> 
> If only using this library for the command line argument parser and similar, performance is irrelevant. If we foresee using it in the fast path, e.g. with the htimer library, we shouldn't tie its API tightly to 64 bit.
> 

I'm not even sure performance will be that much worse. Sure, two 
popcount instead of one. What is probably worse is older ISAs (32- or 
64-bit, e.g. original x64_64) that lack machine instructions for 
counting set bits of *any* word size.

That said, the only real concern I have about going "unsigned long" -> 
"uint64_t" is that I might feel I need to go fix <rte_bitops.h> first.

>>
>> I'll go through the below API and some other APIs to see if there's
>> something obvious missing.
>>
>> When I originally wrote this code there were a few potential features
>> where I wasn't sure to what extent they were useful. One example was
>> the
>> shift operation. Any input is appreciated.
> 
> Start off with what you already have. If we need more operations, they can always be added later.
> 
>>
>>> Also, what if any thread safety guarantees? or atomic.
>>>
>>
>> Currently, it's all MT unsafe.
>>
>> An atomic set and get/test would make sense, and maybe other operations
>> would as well.
>>
>> Bringing in atomicity into the design makes it much less obvious:
>>
>> Would the atomic operations imply some memory ordering, or be
>> "relaxed".
>> I would lean toward relaxed, but then shouldn't bit-level atomics be
>> consistent with the core DPDK atomics API? With that in mind, memory
>> ordering should be user-configurable.
>>
>> If the code needs to be pure C11 atomics-wise, the words that makes up
>> the bitset must be _Atomic uint64_t. Then you need to be careful or end
>> up with "lock"-prefixed instructions if you manipulate the bitset
>> words.
>> Just a pure words[N] = 0; gives you a mov+mfence on x86, for example,
>> plus all the fun memory_order_seq_cst in terms of preventing
>> compiler-level optimizations. So you definitely can't have the bitset
>> always using _Atomic uint64_t, since would risk non-shared use cases.
>> You could have a variant I guess. Just duplicate the whole thing, or
>> something with macros.
> 
> It seems like MT unsafe suffices for the near term use cases.
> 
> We can add an atomic variant of the library later, if the need should arise.
> 

Agreed. The only concern I have here is that you end up wanting to 
change the original design, to better be able to fit atomic bit operations.

>>
>> With GCC C11 builtins, you can both have the atomic cake and eat it, in
>> that you both access the data non-atomically/normally, and in an atomic
>> manner.
> 
> Yep. And we care quite a lot about performance, so we are likely to keep using those until the compilers offer similar performance for C11 standard atomics.
> 

^ permalink raw reply	[flat|nested] 63+ messages in thread

* RE: [RFC v3] eal: add bitset type
  2024-02-02 10:19       ` Mattias Rönnblom
@ 2024-02-02 12:42         ` Morten Brørup
  0 siblings, 0 replies; 63+ messages in thread
From: Morten Brørup @ 2024-02-02 12:42 UTC (permalink / raw)
  To: Mattias Rönnblom, Stephen Hemminger, Mattias Rönnblom
  Cc: dev, Tyler Retzlaff


> From: Mattias Rönnblom [mailto:hofors@lysator.liu.se]
> Sent: Friday, 2 February 2024 11.19
> 
> On 2024-02-01 09:04, Morten Brørup wrote:
> >> From: Mattias Rönnblom [mailto:hofors@lysator.liu.se]
> >> Sent: Wednesday, 31 January 2024 19.46
> >>
> >> On 2024-01-31 17:06, Stephen Hemminger wrote:
> >>> On Wed, 31 Jan 2024 14:13:01 +0100
> >>> Mattias Rönnblom <mattias.ronnblom@ericsson.com> wrote:
> >
> > [...]
> >
> >>> FYI - the linux kernel has a similar but more complete set of
> >> operations.
> >>> It might be more efficient to use unsigned long rather than
> requiring
> >>> the elements to be uint64_t. Thinking of the few 32 bit platforms.
> >>>
> >>
> >> Keeping it 64-bit avoids a popcount-related #ifdef. DPDK doesn't
> have
> >> an
> >> equivalent to __builtin_popcountl().
> >>
> >> How much do we need to care about 32-bit ISA performance?
> >
> > At the 2023 DPDK Summit I talked to someone at a very well known
> network equipment vendor using 32 bit CPUs in some of their products;
> some sort of CPE, IIRC. 32 bit CPUs are still out there, and 32-bit CPU
> support has not been deprecated in DPDK.
> >
> > For the bitset parameter to functions, you could either use "unsigned
> long*" (as suggested by Stephen), or "void*" (followed by type casting
> inside the functions).
> >
> > If only using this library for the command line argument parser and
> similar, performance is irrelevant. If we foresee using it in the fast
> path, e.g. with the htimer library, we shouldn't tie its API tightly to
> 64 bit.
> >
> 
> I'm not even sure performance will be that much worse. Sure, two
> popcount instead of one. What is probably worse is older ISAs (32- or
> 64-bit, e.g. original x64_64) that lack machine instructions for
> counting set bits of *any* word size.

I'm sorry about being unclear. I didn't mean to suggest supporting *any* word size; I was thinking about one word size, either 32 or 64 bit, automatically selected at build time depending on CPU architecture.

> 
> That said, the only real concern I have about going "unsigned long" ->
> "uint64_t" is that I might feel I need to go fix <rte_bitops.h> first.

I see.
Otherwise you'll end up with a bunch of #if RTE_ARCH_32 rte_bit_<op>32() #else rte_bit_<op>64() #endif in the implementation.
Perhaps a string concatenation macro could replace that with something like rte_bit_<op>##RTE_ARCH_BITS(), or RTE_POSTFIX_ARCH_BITS(rte_bit_<op>, (params)). Just thinking out aloud.

> 
> >>
> >> I'll go through the below API and some other APIs to see if there's
> >> something obvious missing.
> >>
> >> When I originally wrote this code there were a few potential
> features
> >> where I wasn't sure to what extent they were useful. One example was
> >> the
> >> shift operation. Any input is appreciated.
> >
> > Start off with what you already have. If we need more operations,
> they can always be added later.
> >
> >>
> >>> Also, what if any thread safety guarantees? or atomic.
> >>>
> >>
> >> Currently, it's all MT unsafe.
> >>
> >> An atomic set and get/test would make sense, and maybe other
> operations
> >> would as well.
> >>
> >> Bringing in atomicity into the design makes it much less obvious:
> >>
> >> Would the atomic operations imply some memory ordering, or be
> >> "relaxed".
> >> I would lean toward relaxed, but then shouldn't bit-level atomics be
> >> consistent with the core DPDK atomics API? With that in mind, memory
> >> ordering should be user-configurable.
> >>
> >> If the code needs to be pure C11 atomics-wise, the words that makes
> up
> >> the bitset must be _Atomic uint64_t. Then you need to be careful or
> end
> >> up with "lock"-prefixed instructions if you manipulate the bitset
> >> words.
> >> Just a pure words[N] = 0; gives you a mov+mfence on x86, for
> example,
> >> plus all the fun memory_order_seq_cst in terms of preventing
> >> compiler-level optimizations. So you definitely can't have the
> bitset
> >> always using _Atomic uint64_t, since would risk non-shared use
> cases.
> >> You could have a variant I guess. Just duplicate the whole thing, or
> >> something with macros.
> >
> > It seems like MT unsafe suffices for the near term use cases.
> >
> > We can add an atomic variant of the library later, if the need should
> arise.
> >
> 
> Agreed. The only concern I have here is that you end up wanting to
> change the original design, to better be able to fit atomic bit
> operations.

In a perfect world, the design should have a roadmap leading towards atomic bit operations.
In a fast moving world, we could mark the lib experimental (and mean it!) - it is still an improvement over copy-pasting something similar all over the code.

If a potential roadmap towards atomic operations is not obvious after thinking a few moments about it, we have a clear conscience to simply deem the library unsafe for multithreading and proceed with it "as is".


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [RFC v4 1/4] eal: add bitset type
  2024-01-31 13:13 [RFC v3] eal: add bitset type Mattias Rönnblom
  2024-01-31 16:02 ` Stephen Hemminger
  2024-01-31 16:06 ` Stephen Hemminger
@ 2024-02-16 10:23 ` Mattias Rönnblom
  2024-02-16 10:23   ` [RFC v4 2/4] eal: add bitset test suite Mattias Rönnblom
                     ` (3 more replies)
  2 siblings, 4 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-02-16 10:23 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Introduce a set of functions and macros that operate on sets of bits,
kept in arrays of 64-bit words.

RTE bitset is designed for bitsets which are larger than what fits in
a single machine word (i.e., 64 bits). For very large bitsets, the
<rte_bitmap.h> API may be a more appropriate choice.

RFC v4:
 * Add function rte_bitset_flip() to change the value of a bit.
 * Add function rte_bitset_complement(), flipping the value of all bits.
 * Add function rte_bitset_assign(), setting the value of a bit based
   on a 'bool' parameter.
 * Add functions to perform logical shift the bitset left or right.
 * Add explicit destination bitset to logic operation type functions
   (e.g., rte_bitset_and()), to increase flexibility.
 * Split implementation and test suite into distinct commits.

RFC v3:
 * Split the bitset from the htimer patchset, where it was originally
   hosted.
 * Rebase to current DPDK main.
 * Add note that rte_bitset_init() need not be called if bitset words
   have already been zeroed.
 * Use REGISTER_FAST_TEST instead of REGISTER_TEST_COMMAND.
 * Use rte_popcount64() instead of compiler builtin.

RFC v2:
 * Replaced <sys/types.h> with <stddef.h> include, to properly get
   size_t typedef.
 * Add <rte_compat.h> to get __rte_experimental in <rte_bitset.h>.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 lib/eal/common/meson.build   |    1 +
 lib/eal/common/rte_bitset.c  |   29 +
 lib/eal/include/meson.build  |    1 +
 lib/eal/include/rte_bitset.h | 1080 ++++++++++++++++++++++++++++++++++
 lib/eal/version.map          |    3 +
 5 files changed, 1114 insertions(+)
 create mode 100644 lib/eal/common/rte_bitset.c
 create mode 100644 lib/eal/include/rte_bitset.h

diff --git a/lib/eal/common/meson.build b/lib/eal/common/meson.build
index 22a626ba6f..c1bbf26654 100644
--- a/lib/eal/common/meson.build
+++ b/lib/eal/common/meson.build
@@ -31,6 +31,7 @@ sources += files(
         'eal_common_uuid.c',
         'malloc_elem.c',
         'malloc_heap.c',
+        'rte_bitset.c',
         'rte_malloc.c',
         'rte_random.c',
         'rte_reciprocal.c',
diff --git a/lib/eal/common/rte_bitset.c b/lib/eal/common/rte_bitset.c
new file mode 100644
index 0000000000..35e55a64db
--- /dev/null
+++ b/lib/eal/common/rte_bitset.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include <errno.h>
+
+#include "rte_bitset.h"
+
+ssize_t
+rte_bitset_to_str(const uint64_t *bitset, size_t num_bits, char *buf,
+		  size_t capacity)
+{
+	size_t i;
+
+	if (capacity < (num_bits + 1))
+		return -EINVAL;
+
+	for (i = 0; i < num_bits; i++) {
+		bool value;
+
+		value = rte_bitset_test(bitset, num_bits - 1 - i);
+
+		buf[i] = value ? '1' : '0';
+	}
+
+	buf[num_bits] = '\0';
+
+	return num_bits + 1;
+}
diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..4b5f120a66 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -5,6 +5,7 @@ includes += include_directories('.')
 
 headers += files(
         'rte_alarm.h',
+        'rte_bitset.h',
         'rte_bitmap.h',
         'rte_bitops.h',
         'rte_branch_prediction.h',
diff --git a/lib/eal/include/rte_bitset.h b/lib/eal/include/rte_bitset.h
new file mode 100644
index 0000000000..35631a2a12
--- /dev/null
+++ b/lib/eal/include/rte_bitset.h
@@ -0,0 +1,1080 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_BITSET_H_
+#define _RTE_BITSET_H_
+
+/**
+ * @file
+ * RTE Bitset
+ *
+ * This file provides functions and macros for querying and
+ * manipulating sets of bits kept in arrays of @c uint64_t-sized
+ * elements.
+ *
+ * The bits in a bitset are numbered from 0 to @c size - 1, with the
+ * lowest index being the least significant bit.
+ *
+ * The bitset array must be properly aligned.
+ *
+ * For optimal performance, the @c size parameter, required by
+ * many of the API's functions, should be a compile-time constant.
+ *
+ * For large bitsets, the rte_bitmap.h API may be more appropriate.
+ *
+ * @warning
+ * All functions modifying a bitset may overwrite any unused bits of
+ * the last word. Such unused bits are ignored by all functions reading
+ * bits.
+ *
+ */
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <rte_bitops.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_compat.h>
+#include <rte_debug.h>
+#include <rte_memcpy.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * The size (in bytes) of each element in the array used to represent
+ * a bitset.
+ */
+#define RTE_BITSET_WORD_SIZE (sizeof(uint64_t))
+
+/**
+ * The size (in bits) of each element in the array used to represent
+ * a bitset.
+ */
+#define RTE_BITSET_WORD_BITS (RTE_BITSET_WORD_SIZE * CHAR_BIT)
+
+/**
+ * Computes the number of words required to store @c size bits.
+ */
+#define RTE_BITSET_NUM_WORDS(size)					\
+	((size + RTE_BITSET_WORD_BITS - 1) / RTE_BITSET_WORD_BITS)
+
+/**
+ * Computes the amount of memory (in bytes) required to fit a bitset
+ * holding @c size bits.
+ */
+#define RTE_BITSET_SIZE(size)						\
+	((size_t)(RTE_BITSET_NUM_WORDS(size) * RTE_BITSET_WORD_SIZE))
+
+#define __RTE_BITSET_WORD_IDX(bit_num) ((bit_num) / RTE_BITSET_WORD_BITS)
+#define __RTE_BITSET_BIT_OFFSET(bit_num) ((bit_num) % RTE_BITSET_WORD_BITS)
+#define __RTE_BITSET_UNUSED(size)			     \
+	((RTE_BITSET_NUM_WORDS(size) * RTE_BITSET_WORD_BITS) \
+	 - (size))
+#define __RTE_BITSET_USED_MASK(size)			\
+	(UINT64_MAX >> __RTE_BITSET_UNUSED(size))
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Declare a bitset.
+ *
+ * Declare (e.g., as a struct field) or define (e.g., as a stack
+ * variable) a bitset of the specified size.
+ *
+ * @param size
+ *   The number of bits the bitset must be able to represent. Must be
+ *   a compile-time constant.
+ * @param name
+ *   The field or variable name of the resulting definition.
+ */
+#define RTE_BITSET_DECLARE(name, size)		\
+	uint64_t name[RTE_BITSET_NUM_WORDS(size)]
+
+/* XXX: should one include flags here and use to avoid a comparison? */
+/* XXX: would this be better off as a function? */
+
+#define __RTE_BITSET_FOREACH_LEFT(var, size, start_bit, len)		\
+	((len) - 1 - ((var) >= (start_bit) ? (var) - (start_bit) :	\
+		  (size) - (start_bit) + (var)))
+
+#define __RTE_BITSET_FOREACH(var, bitset, size, start_bit, len, flags)	\
+	for ((var) = __rte_bitset_find(bitset, size, start_bit, len,	\
+				       flags);				\
+	     (var) != -1;						\
+	     (var) = __RTE_BITSET_FOREACH_LEFT(var, size, start_bit,	\
+					       len) > 0	?		\
+		     __rte_bitset_find(bitset, size,			\
+				       ((var) + 1) % (size),		\
+				       __RTE_BITSET_FOREACH_LEFT(var,	\
+								 size,	\
+								 start_bit, \
+								 len),	\
+				       flags) : -1)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all bits set.
+ *
+ * This macro iterates over all bits set (i.e., all ones) in the
+ * bitset, in the forward direction (i.e., starting with the least
+ * significant '1').
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive
+ *   iteration, this variable will hold the bit index of a set bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+#define RTE_BITSET_FOREACH_SET(var, bitset, size)			\
+	__RTE_BITSET_FOREACH(var, bitset, size, 0, size, 0)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all bits cleared.
+ *
+ * This macro iterates over all bits cleared in the bitset, in the
+ * forward direction (i.e., starting with the lowest-indexed set bit).
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive iteration,
+ *   this variable will hold the bit index of a cleared bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+#define RTE_BITSET_FOREACH_CLEAR(var, bitset, size)			\
+	__RTE_BITSET_FOREACH(var, bitset, size, 0, size,		\
+			     __RTE_BITSET_FIND_FLAG_FIND_CLEAR)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all bits set within a range.
+ *
+ * This macro iterates over all bits set (i.e., all ones) in the
+ * specified range, in the forward direction (i.e., starting with the
+ * least significant '1').
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive iteration,
+ *   this variable will hold the bit index of a set bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The length (in bits) of the range. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ */
+
+#define RTE_BITSET_FOREACH_SET_RANGE(var, bitset, size, start_bit,     \
+				     len)			       \
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len, 0)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all cleared bits within a range.
+ *
+ * This macro iterates over all bits cleared (i.e., all zeroes) in the
+ * specified range, in the forward direction (i.e., starting with the
+ * least significant '0').
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive iteration,
+ *   this variable will hold the bit index of a set bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The length (in bits) of the range. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ */
+
+#define RTE_BITSET_FOREACH_CLEAR_RANGE(var, bitset, size, start_bit,	\
+				       len)				\
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len,		\
+			     __RTE_BITSET_FIND_FLAG_FIND_CLEAR)
+
+#define RTE_BITSET_FOREACH_SET_WRAP(var, bitset, size, start_bit,      \
+				    len)			       \
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len,	       \
+			     __RTE_BITSET_FIND_FLAG_WRAP)
+
+#define RTE_BITSET_FOREACH_CLEAR_WRAP(var, bitset, size, start_bit,	\
+				       len)				\
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len,		\
+			     __RTE_BITSET_FIND_FLAG_WRAP |		\
+			     __RTE_BITSET_FIND_FLAG_FIND_CLEAR)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Initializes a bitset.
+ *
+ * All bits are cleared.
+ *
+ * In case all words in the bitset array are already set to zero by
+ * other means (e.g., at the time of memory allocation), this function
+ * need not be called.
+ *
+ * @param bitset
+ *   A pointer to the array of bitset 64-bit words.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_init(uint64_t *bitset, size_t size)
+{
+	memset(bitset, 0, RTE_BITSET_SIZE(size));
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set a bit in the bitset.
+ *
+ * Bits are numbered from 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be set.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_set(uint64_t *bitset, size_t bit_num)
+{
+	size_t word;
+	size_t offset;
+	uint64_t mask;
+
+	word = __RTE_BITSET_WORD_IDX(bit_num);
+	offset = __RTE_BITSET_BIT_OFFSET(bit_num);
+	mask = UINT64_C(1) << offset;
+
+	bitset[word] |= mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Clear a bit in the bitset.
+ *
+ * Bits are numbered 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be cleared.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_clear(uint64_t *bitset, size_t bit_num)
+{
+	size_t word;
+	size_t offset;
+	uint64_t mask;
+
+	word = __RTE_BITSET_WORD_IDX(bit_num);
+	offset = __RTE_BITSET_BIT_OFFSET(bit_num);
+	mask = ~(UINT64_C(1) << offset);
+
+	bitset[word] &= mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set or clear a bit in the bitset.
+ *
+ * Bits are numbered 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be set or cleared.
+ * @param bit_value
+ *   Control if the bit should be set or cleared.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_assign(uint64_t *bitset, size_t bit_num, bool bit_value)
+{
+	if (bit_value)
+		rte_bitset_set(bitset, bit_num);
+	else
+		rte_bitset_clear(bitset, bit_num);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Change the value of a bit in the bitset.
+ *
+ * Bits are numbered 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be flipped.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_flip(uint64_t *bitset, size_t bit_num)
+{
+	size_t word;
+	size_t offset;
+	uint64_t mask;
+
+	word = __RTE_BITSET_WORD_IDX(bit_num);
+	offset = __RTE_BITSET_BIT_OFFSET(bit_num);
+	mask = UINT64_C(1) << offset;
+
+	bitset[word] ^= mask;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set all bits in the bitset.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_set_all(uint64_t *bitset, size_t size)
+{
+	memset(bitset, 0xFF, RTE_BITSET_SIZE(size));
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Clear all bits in the bitset.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_clear_all(uint64_t *bitset, size_t size)
+{
+	rte_bitset_init(bitset, size);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Count all set bits (also known as the @e weight).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the number of '1' bits in the bitset.
+ */
+
+__rte_experimental
+static inline size_t
+rte_bitset_count_set(const uint64_t *bitset, size_t size)
+{
+	size_t i;
+	size_t total = 0;
+
+	/*
+	 * Unused bits in a rte_bitset are always '0', and thus are
+	 * not included in this count.
+	 */
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size) - 1; i++)
+		total += rte_popcount64(bitset[i]);
+
+	total += rte_popcount64(bitset[i] & __RTE_BITSET_USED_MASK(size));
+
+	return total;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Count all cleared bits.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the number of '0' bits in the bitset.
+ */
+
+__rte_experimental
+static inline size_t
+rte_bitset_count_clear(const uint64_t *bitset, size_t size)
+{
+	return size - rte_bitset_count_set(bitset, size);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test if a bit is set.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   Index of the bit to test. Index 0 is the least significant bit.
+ * @return
+ *   Returns true if the bit is '1', and false if the bit is '0'.
+ */
+
+__rte_experimental
+static inline bool
+rte_bitset_test(const uint64_t *bitset, size_t bit_num)
+{
+	size_t word;
+	size_t offset;
+
+	word = __RTE_BITSET_WORD_IDX(bit_num);
+	offset = __RTE_BITSET_BIT_OFFSET(bit_num);
+
+	return (bitset[word] >> offset) & 1;
+}
+
+#define __RTE_BITSET_FIND_FLAG_FIND_CLEAR (1U << 0)
+#define __RTE_BITSET_FIND_FLAG_WRAP (1U << 1)
+
+__rte_experimental
+static inline ssize_t
+__rte_bitset_find_nowrap(const uint64_t *bitset, size_t __rte_unused size,
+			 size_t start_bit, size_t len, bool find_clear)
+{
+	size_t word_idx;
+	size_t offset;
+	size_t end_bit = start_bit + len;
+
+	RTE_ASSERT(end_bit <= size);
+
+	if (unlikely(len == 0))
+		return -1;
+
+	word_idx = __RTE_BITSET_WORD_IDX(start_bit);
+	offset = __RTE_BITSET_BIT_OFFSET(start_bit);
+
+	while (word_idx <= __RTE_BITSET_WORD_IDX(end_bit - 1)) {
+		uint64_t word;
+		int word_ffs;
+
+		word = bitset[word_idx];
+		if (find_clear)
+			word = ~word;
+
+		word >>= offset;
+
+		word_ffs = __builtin_ffsll(word);
+
+		if (word_ffs != 0) {
+			ssize_t ffs = start_bit + word_ffs - 1;
+
+			/*
+			 * Check if set bit were among the last,
+			 * unused bits, in the last word.
+			 */
+			if (unlikely(ffs >= (ssize_t)end_bit))
+				return -1;
+
+			return ffs;
+		}
+
+		start_bit += (RTE_BITSET_WORD_BITS - offset);
+		word_idx++;
+		offset = 0;
+	}
+
+	return -1;
+
+}
+
+__rte_experimental
+static inline ssize_t
+__rte_bitset_find(const uint64_t *bitset, size_t size, size_t start_bit,
+		  size_t len, unsigned int flags)
+{
+	bool find_clear = flags & __RTE_BITSET_FIND_FLAG_FIND_CLEAR;
+	bool may_wrap = flags & __RTE_BITSET_FIND_FLAG_WRAP;
+	bool does_wrap = (start_bit + len) > size;
+	ssize_t rc;
+
+	RTE_ASSERT(len <= size);
+	if (!may_wrap)
+		RTE_ASSERT(!does_wrap);
+
+	if (may_wrap && does_wrap) {
+		size_t len0 = size - start_bit;
+		size_t len1 = len - len0;
+
+		rc = __rte_bitset_find_nowrap(bitset, size, start_bit, len0,
+					      find_clear);
+		if (rc < 0)
+			rc =  __rte_bitset_find_nowrap(bitset, size,
+						       0, len1, find_clear);
+	} else
+		rc = __rte_bitset_find_nowrap(bitset, size, start_bit,
+					      len, find_clear);
+
+	return rc;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first bit set.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), and returns the index of the first '1'.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the index of the least significant '1', or -1 if all
+ *   bits are '0'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_first_set(const uint64_t *bitset, size_t size)
+{
+	return __rte_bitset_find(bitset, size, 0, size, 0);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first bit set at offset.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset, and returns the index of the first '1' encountered.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '1', or -1 if all
+ *   bits are '0'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_set(const uint64_t *bitset, size_t size,
+		    size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len, 0);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first bit set at offset, with wrap-around.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset. If no '1' is encountered before the end of the bitset, the search
+ * will continue at index 0.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '1', or -1 if all
+ *   bits are '0'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_set_wrap(const uint64_t *bitset, size_t size,
+			 size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len,
+				 __RTE_BITSET_FIND_FLAG_WRAP);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first cleared bit.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), and returns the index of the first '0'.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the index of the least significant '0', or -1 if all
+ *   bits are '1'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_first_clear(const uint64_t *bitset, size_t size)
+{
+	return __rte_bitset_find(bitset, size, 0, size,
+				 __RTE_BITSET_FIND_FLAG_FIND_CLEAR);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first cleared bit at offset.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset, and returns the index of the first '0' encountered.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '0', or -1 if all
+ *   bits are '1'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_clear(const uint64_t *bitset, size_t size,
+			   size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len,
+				 __RTE_BITSET_FIND_FLAG_FIND_CLEAR);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first cleared bit at offset, with wrap-around.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset. If no '0' is encountered before the end of the bitset, the
+ * search will continue at index 0.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '0', or -1 if all
+ *   bits are '1'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_clear_wrap(const uint64_t *bitset, size_t size,
+			   size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len,
+				 __RTE_BITSET_FIND_FLAG_FIND_CLEAR |
+				 __RTE_BITSET_FIND_FLAG_WRAP);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Copy bitset.
+ *
+ * Copy the bits of the @c src_bitset to the @c dst_bitset.
+ *
+ * The bitsets may not overlap and must be of equal size.
+ *
+ * @param dst_bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param src_bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_copy(uint64_t *__rte_restrict dst_bitset,
+		const uint64_t *__rte_restrict src_bitset,
+		size_t size)
+{
+	rte_memcpy(dst_bitset, src_bitset, RTE_BITSET_SIZE(size));
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitwise or two bitsets.
+ *
+ * Perform a bitwise OR operation on all bits in the two equal-size
+ * bitsets @c src_bitset0 and @c src_bitset1, and store the results in
+ * @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset0
+ *   A pointer to the first source bitset.
+ * @param src_bitset1
+ *   A pointer to the second source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_or(uint64_t *dst_bitset, const uint64_t *src_bitset0,
+	      const uint64_t *src_bitset1, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] = src_bitset0[i] | src_bitset1[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitwise and two bitsets.
+ *
+ * Perform a bitwise AND operation on all bits in the two equal-size
+ * bitsets @c src_bitset0 and @c src_bitset1, and store the result in
+ * @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset0
+ *   A pointer to the first source bitset.
+ * @param src_bitset1
+ *   A pointer to the second source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_and(uint64_t *dst_bitset, const uint64_t *src_bitset0,
+	       const uint64_t *src_bitset1, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] = src_bitset0[i] & src_bitset1[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitwise xor two bitsets.
+ *
+ * Perform a bitwise XOR operation on all bits in the two equal-size
+ * bitsets @c src_bitset0 and @c src_bitset1, and store the result in
+ * @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset0
+ *   A pointer to the first source bitset.
+ * @param src_bitset1
+ *   A pointer to the second source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_xor(uint64_t *dst_bitset, const uint64_t *src_bitset0,
+	       const uint64_t *src_bitset1, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] = src_bitset0[i] ^ src_bitset1[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Compute the bitwise complement of a bitset.
+ *
+ * Flip every bit in the @c src_bitset, and store the result in @c
+ * dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_complement(uint64_t *dst_bitset, const uint64_t *src_bitset,
+		      size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] = ~src_bitset[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Shift bitset left.
+ *
+ * Perform a logical shift left of (multiply) @c src_bitset, and store
+ * the result in @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ * @param shift_bits
+ *   The number of bits to shift the bitset.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_shift_left(uint64_t *dst_bitset, const uint64_t *src_bitset,
+		      size_t size, size_t shift_bits)
+{
+	const int src_word_offset = shift_bits / RTE_BITSET_WORD_BITS;
+	const int src_bit_offset = shift_bits % RTE_BITSET_WORD_BITS;
+	unsigned int dst_idx;
+
+	for (dst_idx = 0; dst_idx < RTE_BITSET_NUM_WORDS(size); dst_idx++) {
+		int src_high_idx = dst_idx - src_word_offset;
+		uint64_t low_bits = 0;
+		uint64_t high_bits = 0;
+
+		if (src_high_idx >= 0) {
+			int src_low_idx = src_high_idx - 1;
+
+			high_bits = src_bitset[src_high_idx] << src_bit_offset;
+
+			if (src_bit_offset > 0 && src_low_idx >= 0)
+				low_bits = src_bitset[src_low_idx] >>
+					(RTE_BITSET_WORD_BITS - src_bit_offset);
+		}
+		dst_bitset[dst_idx] = low_bits | high_bits;
+	}
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Shift bitset right.
+ *
+ * Perform a logical shift right of (divide) @c src_bitset, and store
+ * the result in @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ * @param shift_bits
+ *   The number of bits to shift the bitset.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_shift_right(uint64_t *dst_bitset, const uint64_t *src_bitset,
+		       size_t size, size_t shift_bits)
+{
+	const int num_words = RTE_BITSET_NUM_WORDS(size);
+	const uint64_t used_mask = __RTE_BITSET_USED_MASK(size);
+	const int src_word_offset = shift_bits / RTE_BITSET_WORD_BITS;
+	const int src_bit_offset = shift_bits % RTE_BITSET_WORD_BITS;
+	int dst_idx;
+
+	for (dst_idx = 0; dst_idx < num_words; dst_idx++) {
+		int src_low_idx = src_word_offset + dst_idx;
+		int src_high_idx = src_low_idx + 1;
+		uint64_t src_low_word_bits = 0;
+		uint64_t src_high_word_bits = 0;
+
+		if (src_low_idx < num_words) {
+			src_low_word_bits = src_bitset[src_low_idx];
+
+			if (src_low_idx == (num_words - 1))
+				src_low_word_bits &= used_mask;
+
+			src_low_word_bits >>= src_bit_offset;
+
+			if (src_bit_offset > 0 && src_high_idx < num_words) {
+				src_high_word_bits = src_bitset[src_high_idx];
+
+				if (src_high_idx == (num_words - 1))
+					src_high_word_bits &= used_mask;
+
+				src_high_word_bits <<=
+					(RTE_BITSET_WORD_BITS - src_bit_offset);
+			}
+		}
+		dst_bitset[dst_idx] = src_low_word_bits | src_high_word_bits;
+	}
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Compare two bitsets.
+ *
+ * Compare two bitsets for equality.
+ *
+ * @param bitset_a
+ *   A pointer to the destination bitset.
+ * @param bitset_b
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline bool
+rte_bitset_equal(const uint64_t *bitset_a, const uint64_t *bitset_b,
+		 size_t size)
+{
+	size_t i;
+	uint64_t last_a, last_b;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size) - 1; i++)
+		if (bitset_a[i] != bitset_b[i])
+			return false;
+
+	last_a = bitset_a[i] << __RTE_BITSET_UNUSED(size);
+	last_b = bitset_b[i] << __RTE_BITSET_UNUSED(size);
+
+	return last_a == last_b;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Converts a bitset to a string.
+ *
+ * This function prints a string representation of the bitstring to
+ * the supplied buffer.
+ *
+ * Each bit is represented either by '0' or '1' in the output, with
+ * the first (left-most) character in the output being the most
+ * significant bit. The resulting string is NUL terminated.
+ *
+ * @param bitset
+ *   A pointer to the array of bitset 64-bit words.
+ * @param size
+ *   The number of bits the bitset represent.
+ * @param buf
+ *   A buffer to hold the output.
+ * @param capacity
+ *   The size of the buffer. Must be @c size + 1 or larger.
+ * @return
+ *   Returns the number of bytes written (i.e., @c size + 1), or -EINVAL
+ *   in case the buffer capacity was too small.
+ */
+
+__rte_experimental
+ssize_t
+rte_bitset_to_str(const uint64_t *bitset, size_t size, char *buf,
+		  size_t capacity);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BITSET_H_ */
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 5e0cd47c82..639ccfe4b0 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -393,6 +393,9 @@ EXPERIMENTAL {
 	# added in 23.07
 	rte_memzone_max_get;
 	rte_memzone_max_set;
+
+	# added in 24.03
+	rte_bitset_to_str;
 };
 
 INTERNAL {
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [RFC v4 2/4] eal: add bitset test suite
  2024-02-16 10:23 ` [RFC v4 1/4] " Mattias Rönnblom
@ 2024-02-16 10:23   ` Mattias Rönnblom
  2024-02-16 10:23   ` [RFC v4 3/4] service: use multi-word bitset to represent service flags Mattias Rönnblom
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-02-16 10:23 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Add test suite to exercise <rte_bitset.h>.

RFC v4:
 * Fix signed char issue in test cases. (Stephen Hemminger)
 * Add test cases for logic operations.
 * Use the unit test suite runner helper.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 app/test/meson.build   |   1 +
 app/test/test_bitset.c | 870 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 871 insertions(+)
 create mode 100644 app/test/test_bitset.c

diff --git a/app/test/meson.build b/app/test/meson.build
index b4382cf4ad..d5a7f771ae 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -33,6 +33,7 @@ source_file_deps = {
     'test_bitcount.c': [],
     'test_bitmap.c': [],
     'test_bitops.c': [],
+    'test_bitset.c': [],
     'test_bitratestats.c': ['metrics', 'bitratestats', 'ethdev'] + sample_packet_forward_deps,
     'test_bpf.c': ['bpf', 'net'],
     'test_byteorder.c': [],
diff --git a/app/test/test_bitset.c b/app/test/test_bitset.c
new file mode 100644
index 0000000000..84c8a117ee
--- /dev/null
+++ b/app/test/test_bitset.c
@@ -0,0 +1,870 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include <rte_random.h>
+
+#include <rte_bitset.h>
+
+#include "test.h"
+
+#define MAGIC UINT64_C(0xdeadbeefdeadbeef)
+
+static void
+rand_buf(void *buf, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		((unsigned char *)buf)[i] = rte_rand();
+}
+
+static uint64_t *
+alloc_bitset(size_t size)
+{
+	uint64_t *p;
+
+	p = malloc(RTE_BITSET_SIZE(size) + 2 * sizeof(uint64_t));
+
+	if (p == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	rand_buf(&p[0], RTE_BITSET_SIZE(size));
+
+	p[0] = MAGIC;
+	p[RTE_BITSET_NUM_WORDS(size) + 1] = MAGIC;
+
+	return p + 1;
+}
+
+
+static int
+free_bitset(uint64_t *bitset, size_t size)
+{
+	uint64_t *p;
+
+	p = bitset - 1;
+
+	if (p[0] != MAGIC)
+		return TEST_FAILED;
+
+	if (p[RTE_BITSET_NUM_WORDS(size) + 1] != MAGIC)
+		return TEST_FAILED;
+
+	free(p);
+
+	return TEST_SUCCESS;
+}
+
+static bool
+rand_bool(void)
+{
+	return rte_rand_max(2);
+}
+
+static void
+rand_bool_ary(bool *ary, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++)
+		ary[i] = rand_bool();
+}
+
+static void
+rand_unused_bits(uint64_t *bitset, size_t size)
+{
+	uint64_t bits = rte_rand() & ~__RTE_BITSET_USED_MASK(size);
+
+	bitset[RTE_BITSET_NUM_WORDS(size) - 1] |= bits;
+}
+
+static void
+rand_bitset(uint64_t *bitset, size_t size)
+{
+	size_t i;
+
+	rte_bitset_init(bitset, size);
+
+	for (i = 0; i < size; i++)
+		rte_bitset_assign(bitset, i, rand_bool());
+
+	rand_unused_bits(bitset, size);
+}
+
+static int
+test_set_clear_size(size_t size)
+{
+	size_t i;
+	bool reference[size];
+	uint64_t *bitset;
+
+	rand_bool_ary(reference, size);
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	rte_bitset_init(bitset, size);
+
+	for (i = 0; i < size; i++) {
+		if (reference[i])
+			rte_bitset_set(bitset, i);
+		else
+			rte_bitset_clear(bitset, i);
+	}
+
+	for (i = 0; i < size; i++)
+		if (reference[i] != rte_bitset_test(bitset, i))
+			return TEST_FAILED;
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+#define RAND_ITERATIONS (10000)
+#define RAND_SET_MAX_SIZE (1000)
+
+static int
+test_set_clear(void)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_set_clear_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_flip_size(size_t size)
+{
+	size_t i;
+	uint64_t *bitset;
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	rand_bitset(bitset, size);
+
+	for (i = 0; i < size; i++) {
+		RTE_BITSET_DECLARE(reference, size);
+
+		rte_bitset_copy(reference, bitset, size);
+
+		bool value = rte_bitset_test(bitset, i);
+
+		rte_bitset_flip(bitset, i);
+
+		TEST_ASSERT(rte_bitset_test(bitset, i) != value,
+			    "Bit %zd was not flipped", i);
+
+		rte_bitset_assign(reference, i, !value);
+
+		TEST_ASSERT(rte_bitset_equal(bitset, reference, size),
+			    "Not only the target bit %zd was flipped", i);
+
+
+	}
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_flip(void)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_flip_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static ssize_t
+find(const bool *ary, size_t num_bools, size_t start, size_t len, bool set)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		ssize_t idx = (start + i) % num_bools;
+
+		if (ary[idx] == set)
+			return idx;
+	}
+
+	return -1;
+}
+
+static ssize_t
+find_set(const bool *ary, size_t num_bools, size_t start, size_t len)
+{
+	return find(ary, num_bools, start, len, true);
+}
+
+static ssize_t
+find_clear(const bool *ary, size_t num_bools, size_t start, size_t len)
+{
+	return find(ary, num_bools, start, len, false);
+}
+
+#define FFS_ITERATIONS (100)
+
+static int
+test_find_size(size_t size, bool set)
+{
+	uint64_t *bitset;
+	bool reference[size];
+	size_t i;
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	rte_bitset_init(bitset, size);
+
+	for (i = 0; i < size; i++) {
+		bool bit = rand_bool();
+		reference[i] = bit;
+
+		if (bit)
+			rte_bitset_set(bitset, i);
+		else /* redundant, still useful for testing */
+			rte_bitset_clear(bitset, i);
+	}
+
+	for (i = 0; i < FFS_ITERATIONS; i++) {
+		size_t start_bit = rte_rand_max(size);
+		size_t len = rte_rand_max(size + 1);
+		bool full_range = len == size && start_bit == 0;
+		bool wraps = start_bit + len > size;
+		ssize_t rc;
+
+		if (set) {
+			if (full_range && rand_bool())
+				rc = rte_bitset_find_first_set(bitset,
+							       size);
+			else if (wraps || rand_bool()) {
+				rc = rte_bitset_find_set_wrap(bitset, size,
+							      start_bit, len);
+
+			} else
+				rc = rte_bitset_find_set(bitset, size,
+							 start_bit, len);
+
+			if (rc != find_set(reference, size, start_bit,
+					   len))
+				return TEST_FAILED;
+		} else {
+			if (full_range && rand_bool())
+				rc = rte_bitset_find_first_clear(bitset,
+								 size);
+			else if (wraps || rand_bool())
+				rc = rte_bitset_find_clear_wrap(bitset,
+								size,
+								start_bit, len);
+			else
+				rc = rte_bitset_find_clear(bitset, size,
+							   start_bit, len);
+
+			if (rc != find_clear(reference, size, start_bit,
+					     len))
+				return TEST_FAILED;
+		}
+
+	}
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_find_set_size(size_t size)
+{
+	return test_find_size(size, true);
+}
+
+static int
+test_find_clear_size(size_t size)
+{
+	return test_find_size(size, false);
+}
+
+static int
+test_find(void)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 2 + rte_rand_max(RAND_SET_MAX_SIZE - 2);
+
+		if (test_find_set_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_find_clear_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+record_match(ssize_t match_idx, size_t size, int *calls)
+{
+	if (match_idx < 0 || (size_t)match_idx >= size)
+		return TEST_FAILED;
+
+	calls[match_idx]++;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_foreach_size(ssize_t size, bool may_wrap, bool set)
+{
+	bool reference[size];
+	int calls[size];
+	uint64_t *bitset;
+	ssize_t i;
+	ssize_t start_bit;
+	ssize_t len;
+	bool full_range;
+	size_t total_calls = 0;
+
+	rand_bool_ary(reference, size);
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	memset(calls, 0, sizeof(calls));
+
+	start_bit = rte_rand_max(size);
+	len = may_wrap ? rte_rand_max(size + 1) :
+		rte_rand_max(size - start_bit + 1);
+
+	rte_bitset_init(bitset, size);
+
+	/* random data in the unused bits should not matter */
+	rand_buf(bitset, RTE_BITSET_SIZE(size));
+
+	for (i = start_bit; i < start_bit + len; i++) {
+		size_t idx = i % size;
+
+		if (reference[idx])
+			rte_bitset_set(bitset, idx);
+		else
+			rte_bitset_clear(bitset, idx);
+
+		if (rte_bitset_test(bitset, idx) != reference[idx])
+			return TEST_FAILED;
+	}
+
+	full_range = (len == size && start_bit == 0);
+
+	/* XXX: verify iteration order as well */
+	if (set) {
+		if (full_range && rand_bool()) {
+			RTE_BITSET_FOREACH_SET(i, bitset, size) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+			}
+		} else if (may_wrap) {
+			RTE_BITSET_FOREACH_SET_WRAP(i, bitset, size,
+						    start_bit, len) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS) {
+					printf("failed\n");
+					return TEST_FAILED;
+				}
+			}
+		} else {
+			RTE_BITSET_FOREACH_SET_RANGE(i, bitset, size,
+						     start_bit, len) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+			}
+		}
+	} else {
+		if (full_range && rand_bool()) {
+			RTE_BITSET_FOREACH_CLEAR(i, bitset, size)
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+		} else if (may_wrap) {
+			RTE_BITSET_FOREACH_CLEAR_WRAP(i, bitset, size,
+						      start_bit, len) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+			}
+		} else {
+			RTE_BITSET_FOREACH_CLEAR_RANGE(i, bitset, size,
+						       start_bit, len)
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+		}
+	}
+
+	for (i = 0; i < len; i++) {
+		size_t idx = (start_bit + i) % size;
+
+		if (reference[idx] == set && calls[idx] != 1) {
+			printf("bit %zd shouldn't have been found %d "
+			       "times\n", idx, calls[idx]);
+			return TEST_FAILED;
+		}
+
+		if (reference[idx] != set && calls[idx] != 0) {
+			puts("bar");
+			return TEST_FAILED;
+		}
+
+		total_calls += calls[idx];
+	}
+
+	if (full_range) {
+		size_t count;
+
+		count = set ? rte_bitset_count_set(bitset, size) :
+			rte_bitset_count_clear(bitset, size);
+
+		if (count != total_calls)
+			return TEST_FAILED;
+	}
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_foreach(void)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_foreach_size(size, false, true) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_foreach_size(size, false, false) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_foreach_size(size, true, true) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_foreach_size(size, true, false) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_count_size(size_t size)
+{
+	uint64_t *bitset;
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	rte_bitset_init(bitset, size);
+
+	rand_unused_bits(bitset, size);
+
+	if (rte_bitset_count_set(bitset, size) != 0)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != size)
+		return TEST_FAILED;
+
+	rte_bitset_set_all(bitset, size);
+
+	if (rte_bitset_count_set(bitset, size) != size)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != 0)
+		return TEST_FAILED;
+
+	rte_bitset_clear_all(bitset, size);
+
+	if (rte_bitset_count_set(bitset, size) != 0)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != size)
+		return TEST_FAILED;
+
+	rte_bitset_set(bitset, rte_rand_max(size));
+
+	if (rte_bitset_count_set(bitset, size) != 1)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != (size - 1))
+		return TEST_FAILED;
+
+	rte_bitset_clear_all(bitset, size);
+	if (rte_bitset_count_set(bitset, size) != 0)
+		return TEST_FAILED;
+	if (rte_bitset_count_clear(bitset, size) != size)
+		return TEST_FAILED;
+
+	rte_bitset_set_all(bitset, size);
+	if (rte_bitset_count_set(bitset, size) != size)
+		return TEST_FAILED;
+	if (rte_bitset_count_clear(bitset, size) != 0)
+		return TEST_FAILED;
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_count(void)
+{
+	size_t i;
+
+	if (test_count_size(128) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(1) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(63) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(64) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(65) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_count_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+#define GEN_DECLARE(size)						\
+	{								\
+		RTE_BITSET_DECLARE(bitset, size);			\
+		size_t idx;						\
+									\
+		idx = rte_rand_max(size);				\
+		rte_bitset_init(bitset, size);				\
+									\
+		rte_bitset_set(bitset, idx);				\
+		if (!rte_bitset_test(bitset, idx))			\
+			return TEST_FAILED;				\
+		if (rte_bitset_count_set(bitset, size) != 1)		\
+			return TEST_FAILED;				\
+		return TEST_SUCCESS;					\
+	}
+
+static int
+test_define(void)
+{
+	GEN_DECLARE(1);
+	GEN_DECLARE(64);
+	GEN_DECLARE(65);
+	GEN_DECLARE(4097);
+}
+
+static int test_logic_op(void (*bitset_op)(uint64_t *, const uint64_t *,
+					   const uint64_t *, size_t),
+			 bool (*bool_op)(bool, bool))
+{
+	const size_t size = 1 + rte_rand_max(200);
+	RTE_BITSET_DECLARE(bitset_a, size);
+	RTE_BITSET_DECLARE(bitset_b, size);
+	RTE_BITSET_DECLARE(bitset_d, size);
+
+	bool ary_a[size];
+	bool ary_b[size];
+	bool ary_d[size];
+
+	rand_bool_ary(ary_a, size);
+	rand_bool_ary(ary_b, size);
+
+	size_t i;
+	for (i = 0; i < size; i++) {
+		rte_bitset_assign(bitset_a, i, ary_a[i]);
+		rte_bitset_assign(bitset_b, i, ary_b[i]);
+		ary_d[i] = bool_op(ary_a[i], ary_b[i]);
+	}
+
+	bitset_op(bitset_d, bitset_a, bitset_b, size);
+
+	for (i = 0; i < size; i++)
+		TEST_ASSERT_EQUAL(rte_bitset_test(bitset_d, i),
+				  ary_d[i], "Unexpected value of bit %zd", i);
+
+	return TEST_SUCCESS;
+}
+
+static bool
+bool_or(bool a, bool b)
+{
+	return a || b;
+}
+
+static int
+test_or(void)
+{
+	return test_logic_op(rte_bitset_or, bool_or);
+}
+
+static bool
+bool_and(bool a, bool b)
+{
+	return a && b;
+}
+
+static int
+test_and(void)
+{
+	return test_logic_op(rte_bitset_and, bool_and);
+}
+
+static bool
+bool_xor(bool a, bool b)
+{
+	return a != b;
+}
+
+static int
+test_xor(void)
+{
+	return test_logic_op(rte_bitset_xor, bool_xor);
+}
+
+static int
+test_complement(void)
+{
+	int i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		const size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		RTE_BITSET_DECLARE(src, size);
+
+		rand_bitset(src, size);
+
+		bool bit_idx = rte_rand_max(size);
+		bool bit_value = rte_bitset_test(src, bit_idx);
+
+		RTE_BITSET_DECLARE(dst, size);
+
+		rte_bitset_complement(dst, src, size);
+
+		TEST_ASSERT(bit_value != rte_bitset_test(dst, bit_idx),
+			    "Bit %d was not flipped", bit_idx);
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_shift(bool right)
+{
+	int i;
+
+	const char *direction = right ? "right" : "left";
+
+	for (i = 0; i < 10000; i++) {
+		const int size = 1 + (int)rte_rand_max(500);
+		const int shift_count = (int)rte_rand_max(1.5 * size);
+		int src_idx;
+
+		RTE_BITSET_DECLARE(src, size);
+		RTE_BITSET_DECLARE(reference, size);
+
+		rte_bitset_init(src, size);
+		rte_bitset_init(reference, size);
+
+		rand_unused_bits(src, size);
+		rand_unused_bits(reference, size);
+
+		for (src_idx = 0; src_idx < size; src_idx++) {
+			bool value = rand_bool();
+
+			rte_bitset_assign(src, src_idx, value);
+
+			int dst_idx = right ? src_idx - shift_count :
+				src_idx + shift_count;
+
+			if (dst_idx >= 0 && dst_idx < size)
+				rte_bitset_assign(reference, dst_idx, value);
+		}
+
+		uint64_t *dst = alloc_bitset(size);
+
+		if (right)
+			rte_bitset_shift_right(dst, src, size, shift_count);
+		else
+			rte_bitset_shift_left(dst, src, size, shift_count);
+
+		TEST_ASSERT(rte_bitset_equal(dst, reference, size),
+			    "Unexpected result from shifting bitset of size "
+			    "%d bits %d bits %s", size, shift_count, direction);
+
+		TEST_ASSERT_EQUAL(free_bitset(dst, size), TEST_SUCCESS,
+				  "Shift %s operation overwrote buffer",
+				  direction);
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_shift_right(void)
+{
+	return test_shift(true);
+}
+
+static int
+test_shift_left(void)
+{
+	return test_shift(false);
+}
+
+static int
+test_equal(void)
+{
+	const size_t size = 100;
+	RTE_BITSET_DECLARE(bitset_a, size);
+	RTE_BITSET_DECLARE(bitset_b, size);
+
+	rand_buf(bitset_a, RTE_BITSET_SIZE(size));
+	rand_buf(bitset_b, RTE_BITSET_SIZE(size));
+
+	rte_bitset_init(bitset_a, size);
+	rte_bitset_init(bitset_b, size);
+
+	rte_bitset_set(bitset_a, 9);
+	rte_bitset_set(bitset_b, 9);
+	rte_bitset_set(bitset_a, 90);
+	rte_bitset_set(bitset_b, 90);
+
+	if (!rte_bitset_equal(bitset_a, bitset_b, size))
+		return TEST_FAILED;
+
+	/* set unused bit, which should be ignored */
+	rte_bitset_set(&bitset_a[1], 60);
+
+	if (!rte_bitset_equal(bitset_a, bitset_b, size))
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_copy(void)
+{
+	const size_t size = 100;
+	RTE_BITSET_DECLARE(bitset_a, size);
+	RTE_BITSET_DECLARE(bitset_b, size);
+
+	rand_buf(bitset_a, RTE_BITSET_SIZE(size));
+	rand_buf(bitset_b, RTE_BITSET_SIZE(size));
+
+	rte_bitset_copy(bitset_a, bitset_b, size);
+
+	if (!rte_bitset_equal(bitset_a, bitset_b, size))
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_to_str(void)
+{
+	char buf[1024];
+	RTE_BITSET_DECLARE(bitset, 128);
+
+	rte_bitset_init(bitset, 128);
+	rte_bitset_set(bitset, 1);
+
+	if (rte_bitset_to_str(bitset, 2, buf, 3) != 3)
+		return TEST_FAILED;
+	if (strcmp(buf, "10") != 0)
+		return TEST_FAILED;
+
+	rte_bitset_set(bitset, 0);
+
+	if (rte_bitset_to_str(bitset, 1, buf, sizeof(buf)) != 2)
+		return TEST_FAILED;
+	if (strcmp(buf, "1") != 0)
+		return TEST_FAILED;
+
+	rte_bitset_init(bitset, 99);
+	rte_bitset_set(bitset, 98);
+
+	if (rte_bitset_to_str(bitset, 99, buf, sizeof(buf)) != 100)
+		return TEST_FAILED;
+
+	if (buf[0] != '1' || strchr(&buf[1], '1') != NULL)
+		return TEST_FAILED;
+
+	if (rte_bitset_to_str(bitset, 128, buf, 64) != -EINVAL)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static struct unit_test_suite bitset_tests  = {
+	.suite_name = "bitset test suite",
+	.unit_test_cases = {
+		TEST_CASE_ST(NULL, NULL, test_set_clear),
+		TEST_CASE_ST(NULL, NULL, test_flip),
+		TEST_CASE_ST(NULL, NULL, test_find),
+		TEST_CASE_ST(NULL, NULL, test_foreach),
+		TEST_CASE_ST(NULL, NULL, test_count),
+		TEST_CASE_ST(NULL, NULL, test_define),
+		TEST_CASE_ST(NULL, NULL, test_or),
+		TEST_CASE_ST(NULL, NULL, test_and),
+		TEST_CASE_ST(NULL, NULL, test_xor),
+		TEST_CASE_ST(NULL, NULL, test_complement),
+		TEST_CASE_ST(NULL, NULL, test_shift_right),
+		TEST_CASE_ST(NULL, NULL, test_shift_left),
+		TEST_CASE_ST(NULL, NULL, test_equal),
+		TEST_CASE_ST(NULL, NULL, test_copy),
+		TEST_CASE_ST(NULL, NULL, test_to_str),
+		TEST_CASES_END()
+	}
+};
+
+static int
+test_bitset(void)
+{
+	return unit_test_suite_runner(&bitset_tests);
+}
+
+REGISTER_FAST_TEST(bitset_autotest, true, true, test_bitset);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [RFC v4 3/4] service: use multi-word bitset to represent service flags
  2024-02-16 10:23 ` [RFC v4 1/4] " Mattias Rönnblom
  2024-02-16 10:23   ` [RFC v4 2/4] eal: add bitset test suite Mattias Rönnblom
@ 2024-02-16 10:23   ` Mattias Rönnblom
  2024-02-16 10:23   ` [RFC v4 4/4] event/dsw: optimize serving port logic Mattias Rönnblom
  2024-05-05  7:33   ` [RFC v5 1/6] eal: add bitset type Mattias Rönnblom
  3 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-02-16 10:23 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Use a multi-word bitset to track which services are mapped to which
lcores, allowing the RTE_SERVICE_NUM_MAX compile-time constant to be >
64.

Replace array-of-bytes service-currently-active flags with a more
compact multi-word bitset-based representation, reducing memory
footprint somewhat.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 lib/eal/common/rte_service.c | 70 ++++++++++++++----------------------
 1 file changed, 27 insertions(+), 43 deletions(-)

diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index d959c91459..ac96ecaca8 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -11,6 +11,7 @@
 
 #include <eal_trace_internal.h>
 #include <rte_lcore.h>
+#include <rte_bitset.h>
 #include <rte_branch_prediction.h>
 #include <rte_common.h>
 #include <rte_cycles.h>
@@ -63,11 +64,11 @@ struct service_stats {
 /* the internal values of a service core */
 struct core_state {
 	/* map of services IDs are run on this core */
-	uint64_t service_mask;
+	RTE_BITSET_DECLARE(mapped_services, RTE_SERVICE_NUM_MAX);
 	RTE_ATOMIC(uint8_t) runstate; /* running or stopped */
 	RTE_ATOMIC(uint8_t) thread_active; /* indicates when thread is in service_run() */
 	uint8_t is_service_core; /* set if core is currently a service core */
-	uint8_t service_active_on_lcore[RTE_SERVICE_NUM_MAX];
+	RTE_BITSET_DECLARE(service_active_on_lcore, RTE_SERVICE_NUM_MAX);
 	RTE_ATOMIC(uint64_t) loops;
 	RTE_ATOMIC(uint64_t) cycles;
 	struct service_stats service_stats[RTE_SERVICE_NUM_MAX];
@@ -81,11 +82,6 @@ static uint32_t rte_service_library_initialized;
 int32_t
 rte_service_init(void)
 {
-	/* Hard limit due to the use of an uint64_t-based bitmask (and the
-	 * clzl intrinsic).
-	 */
-	RTE_BUILD_BUG_ON(RTE_SERVICE_NUM_MAX > 64);
-
 	if (rte_service_library_initialized) {
 		EAL_LOG(NOTICE,
 			"service library init() called, init flag %d",
@@ -296,7 +292,7 @@ rte_service_component_unregister(uint32_t id)
 
 	/* clear the run-bit in all cores */
 	for (i = 0; i < RTE_MAX_LCORE; i++)
-		lcore_states[i].service_mask &= ~(UINT64_C(1) << id);
+		rte_bitset_clear(lcore_states[i].mapped_services, id);
 
 	memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl));
 
@@ -410,7 +406,7 @@ service_runner_do_callback(struct rte_service_spec_impl *s,
 
 /* Expects the service 's' is valid. */
 static int32_t
-service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
+service_run(uint32_t i, struct core_state *cs, const uint64_t *mapped_services,
 	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
 {
 	if (!s)
@@ -424,12 +420,12 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 			RUNSTATE_RUNNING ||
 	    rte_atomic_load_explicit(&s->app_runstate, rte_memory_order_acquire) !=
 			RUNSTATE_RUNNING ||
-	    !(service_mask & (UINT64_C(1) << i))) {
-		cs->service_active_on_lcore[i] = 0;
+	    !rte_bitset_test(mapped_services, i)) {
+		rte_bitset_clear(cs->service_active_on_lcore, i);
 		return -ENOEXEC;
 	}
 
-	cs->service_active_on_lcore[i] = 1;
+	rte_bitset_set(cs->service_active_on_lcore, i);
 
 	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
 		if (!rte_spinlock_trylock(&s->execute_lock))
@@ -454,7 +450,7 @@ rte_service_may_be_active(uint32_t id)
 		return -EINVAL;
 
 	for (i = 0; i < lcore_count; i++) {
-		if (lcore_states[ids[i]].service_active_on_lcore[id])
+		if (rte_bitset_test(lcore_states[ids[i]].service_active_on_lcore, id))
 			return 1;
 	}
 
@@ -474,7 +470,9 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 	 */
 	rte_atomic_fetch_add_explicit(&s->num_mapped_cores, 1, rte_memory_order_relaxed);
 
-	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
+	RTE_BITSET_DECLARE(all_services, RTE_SERVICE_NUM_MAX);
+	rte_bitset_set_all(all_services, RTE_SERVICE_NUM_MAX);
+	int ret = service_run(id, cs, all_services, s, serialize_mt_unsafe);
 
 	rte_atomic_fetch_sub_explicit(&s->num_mapped_cores, 1, rte_memory_order_relaxed);
 
@@ -485,7 +483,6 @@ static int32_t
 service_runner_func(void *arg)
 {
 	RTE_SET_USED(arg);
-	uint8_t i;
 	const int lcore = rte_lcore_id();
 	struct core_state *cs = &lcore_states[lcore];
 
@@ -497,20 +494,11 @@ service_runner_func(void *arg)
 	 */
 	while (rte_atomic_load_explicit(&cs->runstate, rte_memory_order_acquire) ==
 			RUNSTATE_RUNNING) {
+		ssize_t id;
 
-		const uint64_t service_mask = cs->service_mask;
-		uint8_t start_id;
-		uint8_t end_id;
-
-		if (service_mask == 0)
-			continue;
-
-		start_id = rte_ctz64(service_mask);
-		end_id = 64 - rte_clz64(service_mask);
-
-		for (i = start_id; i < end_id; i++) {
+		RTE_BITSET_FOREACH_SET(id, cs->mapped_services, RTE_SERVICE_NUM_MAX) {
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask, service_get(i), 1);
+			service_run(id, cs, cs->mapped_services, service_get(id), 1);
 		}
 
 		rte_atomic_store_explicit(&cs->loops, cs->loops + 1, rte_memory_order_relaxed);
@@ -519,8 +507,7 @@ service_runner_func(void *arg)
 	/* Switch off this core for all services, to ensure that future
 	 * calls to may_be_active() know this core is switched off.
 	 */
-	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++)
-		cs->service_active_on_lcore[i] = 0;
+	rte_bitset_clear_all(cs->service_active_on_lcore, RTE_SERVICE_NUM_MAX);
 
 	/* Use SEQ CST memory ordering to avoid any re-ordering around
 	 * this store, ensuring that once this store is visible, the service
@@ -586,7 +573,7 @@ rte_service_lcore_count_services(uint32_t lcore)
 	if (!cs->is_service_core)
 		return -ENOTSUP;
 
-	return rte_popcount64(cs->service_mask);
+	return rte_bitset_count_set(cs->mapped_services, RTE_SERVICE_NUM_MAX);
 }
 
 int32_t
@@ -639,25 +626,23 @@ service_update(uint32_t sid, uint32_t lcore, uint32_t *set, uint32_t *enabled)
 			!lcore_states[lcore].is_service_core)
 		return -EINVAL;
 
-	uint64_t sid_mask = UINT64_C(1) << sid;
 	if (set) {
-		uint64_t lcore_mapped = lcore_states[lcore].service_mask &
-			sid_mask;
+		uint64_t lcore_mapped = rte_bitset_test(lcore_states[lcore].mapped_services, sid);
 
 		if (*set && !lcore_mapped) {
-			lcore_states[lcore].service_mask |= sid_mask;
+			rte_bitset_set(lcore_states[lcore].mapped_services, sid);
 			rte_atomic_fetch_add_explicit(&rte_services[sid].num_mapped_cores,
 				1, rte_memory_order_relaxed);
 		}
 		if (!*set && lcore_mapped) {
-			lcore_states[lcore].service_mask &= ~(sid_mask);
+			rte_bitset_clear(lcore_states[lcore].mapped_services, sid);
 			rte_atomic_fetch_sub_explicit(&rte_services[sid].num_mapped_cores,
 				1, rte_memory_order_relaxed);
 		}
 	}
 
 	if (enabled)
-		*enabled = !!(lcore_states[lcore].service_mask & (sid_mask));
+		*enabled = rte_bitset_test(lcore_states[lcore].mapped_services, sid);
 
 	return 0;
 }
@@ -699,11 +684,11 @@ set_lcore_state(uint32_t lcore, int32_t state)
 int32_t
 rte_service_lcore_reset_all(void)
 {
-	/* loop over cores, reset all to mask 0 */
+	/* loop over cores, reset all mapped services */
 	uint32_t i;
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
 		if (lcore_states[i].is_service_core) {
-			lcore_states[i].service_mask = 0;
+			rte_bitset_clear_all(lcore_states[i].mapped_services, RTE_SERVICE_NUM_MAX);
 			set_lcore_state(i, ROLE_RTE);
 			/* runstate act as guard variable Use
 			 * store-release memory order here to synchronize
@@ -731,7 +716,7 @@ rte_service_lcore_add(uint32_t lcore)
 	set_lcore_state(lcore, ROLE_SERVICE);
 
 	/* ensure that after adding a core the mask and state are defaults */
-	lcore_states[lcore].service_mask = 0;
+	rte_bitset_clear_all(lcore_states[lcore].mapped_services, RTE_SERVICE_NUM_MAX);
 	/* Use store-release memory order here to synchronize with
 	 * load-acquire in runstate read functions.
 	 */
@@ -814,12 +799,11 @@ rte_service_lcore_stop(uint32_t lcore)
 
 	uint32_t i;
 	struct core_state *cs = &lcore_states[lcore];
-	uint64_t service_mask = cs->service_mask;
 
 	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
-		int32_t enabled = service_mask & (UINT64_C(1) << i);
-		int32_t service_running = rte_service_runstate_get(i);
-		int32_t only_core = (1 ==
+		bool enabled = rte_bitset_test(cs->mapped_services, i);
+		bool service_running = rte_service_runstate_get(i);
+		bool only_core = (1 ==
 			rte_atomic_load_explicit(&rte_services[i].num_mapped_cores,
 				rte_memory_order_relaxed));
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [RFC v4 4/4] event/dsw: optimize serving port logic
  2024-02-16 10:23 ` [RFC v4 1/4] " Mattias Rönnblom
  2024-02-16 10:23   ` [RFC v4 2/4] eal: add bitset test suite Mattias Rönnblom
  2024-02-16 10:23   ` [RFC v4 3/4] service: use multi-word bitset to represent service flags Mattias Rönnblom
@ 2024-02-16 10:23   ` Mattias Rönnblom
  2024-05-05  7:33   ` [RFC v5 1/6] eal: add bitset type Mattias Rönnblom
  3 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-02-16 10:23 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

To reduce flow migration overhead, replace the array-based
representation of which set of ports are bound to a particular queue
by a multi-word bitset.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 drivers/event/dsw/dsw_evdev.c | 34 +++++++++++++++++++---------------
 drivers/event/dsw/dsw_evdev.h |  3 ++-
 drivers/event/dsw/dsw_event.c | 11 ++++-------
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c
index 1209e73a9d..a0781e4141 100644
--- a/drivers/event/dsw/dsw_evdev.c
+++ b/drivers/event/dsw/dsw_evdev.c
@@ -118,6 +118,7 @@ dsw_queue_setup(struct rte_eventdev *dev, uint8_t queue_id,
 		queue->schedule_type = conf->schedule_type;
 	}
 
+	rte_bitset_init(queue->serving_ports, DSW_MAX_PORTS);
 	queue->num_serving_ports = 0;
 
 	return 0;
@@ -144,24 +145,19 @@ dsw_queue_release(struct rte_eventdev *dev __rte_unused,
 static void
 queue_add_port(struct dsw_queue *queue, uint16_t port_id)
 {
-	queue->serving_ports[queue->num_serving_ports] = port_id;
+	rte_bitset_set(queue->serving_ports, port_id);
 	queue->num_serving_ports++;
 }
 
 static bool
 queue_remove_port(struct dsw_queue *queue, uint16_t port_id)
 {
-	uint16_t i;
+	if (rte_bitset_test(queue->serving_ports, port_id)) {
+		queue->num_serving_ports--;
+		rte_bitset_clear(queue->serving_ports, port_id);
+		return true;
+	}
 
-	for (i = 0; i < queue->num_serving_ports; i++)
-		if (queue->serving_ports[i] == port_id) {
-			uint16_t last_idx = queue->num_serving_ports - 1;
-			if (i != last_idx)
-				queue->serving_ports[i] =
-					queue->serving_ports[last_idx];
-			queue->num_serving_ports--;
-			return true;
-		}
 	return false;
 }
 
@@ -256,10 +252,18 @@ initial_flow_to_port_assignment(struct dsw_evdev *dsw)
 		struct dsw_queue *queue = &dsw->queues[queue_id];
 		uint16_t flow_hash;
 		for (flow_hash = 0; flow_hash < DSW_MAX_FLOWS; flow_hash++) {
-			uint8_t port_idx =
-				rte_rand() % queue->num_serving_ports;
-			uint8_t port_id =
-				queue->serving_ports[port_idx];
+			uint8_t skip = rte_rand_max(queue->num_serving_ports);
+			uint8_t port_id;
+
+			for (port_id = 0;; port_id++) {
+				if (rte_bitset_test(queue->serving_ports,
+						    port_id)) {
+					if (skip == 0)
+						break;
+					skip--;
+				}
+			}
+
 			dsw->queues[queue_id].flow_to_port_map[flow_hash] =
 				port_id;
 		}
diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h
index 6416a8a898..503a63cbb2 100644
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -7,6 +7,7 @@
 
 #include <eventdev_pmd.h>
 
+#include <rte_bitset.h>
 #include <rte_event_ring.h>
 #include <rte_eventdev.h>
 
@@ -234,7 +235,7 @@ struct dsw_port {
 
 struct dsw_queue {
 	uint8_t schedule_type;
-	uint8_t serving_ports[DSW_MAX_PORTS];
+	RTE_BITSET_DECLARE(serving_ports, DSW_MAX_PORTS);
 	uint16_t num_serving_ports;
 
 	uint8_t flow_to_port_map[DSW_MAX_FLOWS] __rte_cache_aligned;
diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c
index 93bbeead2e..b855f9ecf1 100644
--- a/drivers/event/dsw/dsw_event.c
+++ b/drivers/event/dsw/dsw_event.c
@@ -447,13 +447,8 @@ static bool
 dsw_is_serving_port(struct dsw_evdev *dsw, uint8_t port_id, uint8_t queue_id)
 {
 	struct dsw_queue *queue = &dsw->queues[queue_id];
-	uint16_t i;
-
-	for (i = 0; i < queue->num_serving_ports; i++)
-		if (queue->serving_ports[i] == port_id)
-			return true;
 
-	return false;
+	return rte_bitset_test(queue->serving_ports, port_id);
 }
 
 static bool
@@ -575,7 +570,9 @@ dsw_schedule(struct dsw_evdev *dsw, uint8_t queue_id, uint16_t flow_hash)
 		/* A single-link queue, or atomic/ordered/parallel but
 		 * with just a single serving port.
 		 */
-		port_id = queue->serving_ports[0];
+		port_id = (uint8_t)rte_bitset_find_first_set(
+			queue->serving_ports, DSW_MAX_PORTS
+		);
 
 	DSW_LOG_DP(DEBUG, "Event with queue_id %d flow_hash %d is scheduled "
 		   "to port %d.\n", queue_id, flow_hash, port_id);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [RFC v5 1/6] eal: add bitset type
  2024-02-16 10:23 ` [RFC v4 1/4] " Mattias Rönnblom
                     ` (2 preceding siblings ...)
  2024-02-16 10:23   ` [RFC v4 4/4] event/dsw: optimize serving port logic Mattias Rönnblom
@ 2024-05-05  7:33   ` Mattias Rönnblom
  2024-05-05  7:33     ` [RFC v5 2/6] eal: add bitset test suite Mattias Rönnblom
                       ` (5 more replies)
  3 siblings, 6 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-05-05  7:33 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Introduce a set of functions and macros that operate on sets of bits,
kept in arrays of 64-bit words.

RTE bitset is designed for bitsets which are larger than what fits in
a single machine word (i.e., 64 bits). For very large bitsets, the
<rte_bitmap.h> API may be a more appropriate choice.

Depends-on: series-31863 ("Improve EAL bit operations API")

RFC v5:
 * Delegate bit test/set/clear/assign/flip to RTE bitops.
 * Note in the documentation that set/clear/assign/flip are not
   atomic.

RFC v4:
 * Add function rte_bitset_flip() to change the value of a bit.
 * Add function rte_bitset_complement(), flipping the value of all bits.
 * Add function rte_bitset_assign(), setting the value of a bit based
   on a 'bool' parameter.
 * Add functions to perform logical shift the bitset left or right.
 * Add explicit destination bitset to logic operation type functions
   (e.g., rte_bitset_and()), to increase flexibility.
 * Split implementation and test suite into distinct commits.

RFC v3:
 * Split the bitset from the htimer patchset, where it was originally
   hosted.
 * Rebase to current DPDK main.
 * Add note that rte_bitset_init() need not be called if bitset words
   have already been zeroed.
 * Use REGISTER_FAST_TEST instead of REGISTER_TEST_COMMAND.
 * Use rte_popcount64() instead of compiler builtin.

RFC v2:
 * Replaced <sys/types.h> with <stddef.h> include, to properly get
   size_t typedef.
 * Add <rte_compat.h> to get __rte_experimental in <rte_bitset.h>.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 doc/api/doxy-api-index.md    |    1 +
 lib/eal/common/meson.build   |    1 +
 lib/eal/common/rte_bitset.c  |   29 +
 lib/eal/include/meson.build  |    1 +
 lib/eal/include/rte_bitset.h | 1061 ++++++++++++++++++++++++++++++++++
 lib/eal/version.map          |    2 +
 6 files changed, 1095 insertions(+)
 create mode 100644 lib/eal/common/rte_bitset.c
 create mode 100644 lib/eal/include/rte_bitset.h

diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index 8c1eb8fafa..1ce04a8edf 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -173,6 +173,7 @@ The public API headers are grouped by topics:
   [ring](@ref rte_ring.h),
   [stack](@ref rte_stack.h),
   [tailq](@ref rte_tailq.h),
+  [bitset](@ref rte_bitset.h),
   [bitmap](@ref rte_bitmap.h)
 
 - **packet framework**:
diff --git a/lib/eal/common/meson.build b/lib/eal/common/meson.build
index 22a626ba6f..c1bbf26654 100644
--- a/lib/eal/common/meson.build
+++ b/lib/eal/common/meson.build
@@ -31,6 +31,7 @@ sources += files(
         'eal_common_uuid.c',
         'malloc_elem.c',
         'malloc_heap.c',
+        'rte_bitset.c',
         'rte_malloc.c',
         'rte_random.c',
         'rte_reciprocal.c',
diff --git a/lib/eal/common/rte_bitset.c b/lib/eal/common/rte_bitset.c
new file mode 100644
index 0000000000..35e55a64db
--- /dev/null
+++ b/lib/eal/common/rte_bitset.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include <errno.h>
+
+#include "rte_bitset.h"
+
+ssize_t
+rte_bitset_to_str(const uint64_t *bitset, size_t num_bits, char *buf,
+		  size_t capacity)
+{
+	size_t i;
+
+	if (capacity < (num_bits + 1))
+		return -EINVAL;
+
+	for (i = 0; i < num_bits; i++) {
+		bool value;
+
+		value = rte_bitset_test(bitset, num_bits - 1 - i);
+
+		buf[i] = value ? '1' : '0';
+	}
+
+	buf[num_bits] = '\0';
+
+	return num_bits + 1;
+}
diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..4b5f120a66 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -5,6 +5,7 @@ includes += include_directories('.')
 
 headers += files(
         'rte_alarm.h',
+        'rte_bitset.h',
         'rte_bitmap.h',
         'rte_bitops.h',
         'rte_branch_prediction.h',
diff --git a/lib/eal/include/rte_bitset.h b/lib/eal/include/rte_bitset.h
new file mode 100644
index 0000000000..49a07c77b8
--- /dev/null
+++ b/lib/eal/include/rte_bitset.h
@@ -0,0 +1,1061 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_BITSET_H_
+#define _RTE_BITSET_H_
+
+/**
+ * @file
+ * RTE Bitset
+ *
+ * This file provides functions and macros for querying and
+ * manipulating sets of bits kept in arrays of @c uint64_t-sized
+ * elements.
+ *
+ * The bits in a bitset are numbered from 0 to @c size - 1, with the
+ * lowest index being the least significant bit.
+ *
+ * The bitset array must be properly aligned.
+ *
+ * For optimal performance, the @c size parameter, required by
+ * many of the API's functions, should be a compile-time constant.
+ *
+ * For large bitsets, the rte_bitmap.h API may be more appropriate.
+ *
+ * @warning
+ * All functions modifying a bitset may overwrite any unused bits of
+ * the last word. Such unused bits are ignored by all functions reading
+ * bits.
+ *
+ */
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <rte_bitops.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_compat.h>
+#include <rte_debug.h>
+#include <rte_memcpy.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * The size (in bytes) of each element in the array used to represent
+ * a bitset.
+ */
+#define RTE_BITSET_WORD_SIZE (sizeof(uint64_t))
+
+/**
+ * The size (in bits) of each element in the array used to represent
+ * a bitset.
+ */
+#define RTE_BITSET_WORD_BITS (RTE_BITSET_WORD_SIZE * CHAR_BIT)
+
+/**
+ * Computes the number of words required to store @c size bits.
+ */
+#define RTE_BITSET_NUM_WORDS(size)					\
+	((size + RTE_BITSET_WORD_BITS - 1) / RTE_BITSET_WORD_BITS)
+
+/**
+ * Computes the amount of memory (in bytes) required to fit a bitset
+ * holding @c size bits.
+ */
+#define RTE_BITSET_SIZE(size)						\
+	((size_t)(RTE_BITSET_NUM_WORDS(size) * RTE_BITSET_WORD_SIZE))
+
+#define __RTE_BITSET_WORD_IDX(bit_num) ((bit_num) / RTE_BITSET_WORD_BITS)
+#define __RTE_BITSET_BIT_OFFSET(bit_num) ((bit_num) % RTE_BITSET_WORD_BITS)
+#define __RTE_BITSET_UNUSED(size)			     \
+	((RTE_BITSET_NUM_WORDS(size) * RTE_BITSET_WORD_BITS) \
+	 - (size))
+#define __RTE_BITSET_USED_MASK(size)			\
+	(UINT64_MAX >> __RTE_BITSET_UNUSED(size))
+
+#define __RTE_BITSET_DELEGATE_N(fun, bitset, bit_num, ...)		\
+	fun(&(bitset)[__RTE_BITSET_WORD_IDX(bit_num)],			\
+	    __RTE_BITSET_BIT_OFFSET(bit_num), __VA_ARGS__)
+
+/* MSVC doesn't have ##__VA_ARGS__, so argument-less -> special case */
+#define __RTE_BITSET_DELEGATE(fun, bitset, bit_num)			\
+	fun(&(bitset)[__RTE_BITSET_WORD_IDX(bit_num)],			\
+	    __RTE_BITSET_BIT_OFFSET(bit_num))
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Declare a bitset.
+ *
+ * Declare (e.g., as a struct field) or define (e.g., as a stack
+ * variable) a bitset of the specified size.
+ *
+ * @param size
+ *   The number of bits the bitset must be able to represent. Must be
+ *   a compile-time constant.
+ * @param name
+ *   The field or variable name of the resulting definition.
+ */
+#define RTE_BITSET_DECLARE(name, size)		\
+	uint64_t name[RTE_BITSET_NUM_WORDS(size)]
+
+#define __RTE_BITSET_FOREACH_LEFT(var, size, start_bit, len)		\
+	((len) - 1 - ((var) >= (start_bit) ? (var) - (start_bit) :	\
+		  (size) - (start_bit) + (var)))
+
+#define __RTE_BITSET_FOREACH(var, bitset, size, start_bit, len, flags)	\
+	for ((var) = __rte_bitset_find(bitset, size, start_bit, len,	\
+				       flags);				\
+	     (var) != -1;						\
+	     (var) = __RTE_BITSET_FOREACH_LEFT(var, size, start_bit,	\
+					       len) > 0	?		\
+		     __rte_bitset_find(bitset, size,			\
+				       ((var) + 1) % (size),		\
+				       __RTE_BITSET_FOREACH_LEFT(var,	\
+								 size,	\
+								 start_bit, \
+								 len),	\
+				       flags) : -1)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all bits set.
+ *
+ * This macro iterates over all bits set (i.e., all ones) in the
+ * bitset, in the forward direction (i.e., starting with the least
+ * significant '1').
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive
+ *   iteration, this variable will hold the bit index of a set bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+#define RTE_BITSET_FOREACH_SET(var, bitset, size)			\
+	__RTE_BITSET_FOREACH(var, bitset, size, 0, size, 0)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all bits cleared.
+ *
+ * This macro iterates over all bits cleared in the bitset, in the
+ * forward direction (i.e., starting with the lowest-indexed set bit).
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive iteration,
+ *   this variable will hold the bit index of a cleared bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+#define RTE_BITSET_FOREACH_CLEAR(var, bitset, size)			\
+	__RTE_BITSET_FOREACH(var, bitset, size, 0, size,		\
+			     __RTE_BITSET_FIND_FLAG_FIND_CLEAR)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all bits set within a range.
+ *
+ * This macro iterates over all bits set (i.e., all ones) in the
+ * specified range, in the forward direction (i.e., starting with the
+ * least significant '1').
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive iteration,
+ *   this variable will hold the bit index of a set bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The length (in bits) of the range. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ */
+
+#define RTE_BITSET_FOREACH_SET_RANGE(var, bitset, size, start_bit,     \
+				     len)			       \
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len, 0)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all cleared bits within a range.
+ *
+ * This macro iterates over all bits cleared (i.e., all zeroes) in the
+ * specified range, in the forward direction (i.e., starting with the
+ * least significant '0').
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive iteration,
+ *   this variable will hold the bit index of a set bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The length (in bits) of the range. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ */
+
+#define RTE_BITSET_FOREACH_CLEAR_RANGE(var, bitset, size, start_bit,	\
+				       len)				\
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len,		\
+			     __RTE_BITSET_FIND_FLAG_FIND_CLEAR)
+
+#define RTE_BITSET_FOREACH_SET_WRAP(var, bitset, size, start_bit,      \
+				    len)			       \
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len,	       \
+			     __RTE_BITSET_FIND_FLAG_WRAP)
+
+#define RTE_BITSET_FOREACH_CLEAR_WRAP(var, bitset, size, start_bit,	\
+				       len)				\
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len,		\
+			     __RTE_BITSET_FIND_FLAG_WRAP |		\
+			     __RTE_BITSET_FIND_FLAG_FIND_CLEAR)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Initializes a bitset.
+ *
+ * All bits are cleared.
+ *
+ * In case all words in the bitset array are already set to zero by
+ * other means (e.g., at the time of memory allocation), this function
+ * need not be called.
+ *
+ * @param bitset
+ *   A pointer to the array of bitset 64-bit words.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_init(uint64_t *bitset, size_t size)
+{
+	memset(bitset, 0, RTE_BITSET_SIZE(size));
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test if a bit is set.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   Index of the bit to test. Index 0 is the least significant bit.
+ * @return
+ *   Returns true if the bit is '1', and false if the bit is '0'.
+ */
+
+__rte_experimental
+static inline bool
+rte_bitset_test(const uint64_t *bitset, size_t bit_num)
+{
+	return __RTE_BITSET_DELEGATE(rte_bit_test, bitset, bit_num);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set a bit in the bitset.
+ *
+ * Bits are numbered from 0 to (size - 1) (inclusive).
+ *
+ * The operation is not guaranteed to be atomic.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be set.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_set(uint64_t *bitset, size_t bit_num)
+{
+	__RTE_BITSET_DELEGATE(rte_bit_set, bitset, bit_num);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Clear a bit in the bitset.
+ *
+ * Bits are numbered 0 to (size - 1) (inclusive).
+ *
+ * The operation is not guaranteed to be atomic.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be cleared.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_clear(uint64_t *bitset, size_t bit_num)
+{
+	__RTE_BITSET_DELEGATE(rte_bit_clear, bitset, bit_num);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set or clear a bit in the bitset.
+ *
+ * Bits are numbered 0 to (size - 1) (inclusive).
+ *
+ * The operation is not guaranteed to be atomic.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be set or cleared.
+ * @param bit_value
+ *   Control if the bit should be set or cleared.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_assign(uint64_t *bitset, size_t bit_num, bool bit_value)
+{
+	__RTE_BITSET_DELEGATE_N(rte_bit_assign, bitset, bit_num, bit_value);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Change the value of a bit in the bitset.
+ *
+ * Bits are numbered 0 to (size - 1) (inclusive).
+ *
+ * The operation is not guaranteed to be atomic.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be flipped.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_flip(uint64_t *bitset, size_t bit_num)
+{
+	__RTE_BITSET_DELEGATE(rte_bit_flip, bitset, bit_num);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set all bits in the bitset.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_set_all(uint64_t *bitset, size_t size)
+{
+	memset(bitset, 0xFF, RTE_BITSET_SIZE(size));
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Clear all bits in the bitset.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_clear_all(uint64_t *bitset, size_t size)
+{
+	rte_bitset_init(bitset, size);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Count all set bits (also known as the @e weight).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the number of '1' bits in the bitset.
+ */
+
+__rte_experimental
+static inline size_t
+rte_bitset_count_set(const uint64_t *bitset, size_t size)
+{
+	size_t i;
+	size_t total = 0;
+
+	/*
+	 * Unused bits in a rte_bitset are always '0', and thus are
+	 * not included in this count.
+	 */
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size) - 1; i++)
+		total += rte_popcount64(bitset[i]);
+
+	total += rte_popcount64(bitset[i] & __RTE_BITSET_USED_MASK(size));
+
+	return total;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Count all cleared bits.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the number of '0' bits in the bitset.
+ */
+
+__rte_experimental
+static inline size_t
+rte_bitset_count_clear(const uint64_t *bitset, size_t size)
+{
+	return size - rte_bitset_count_set(bitset, size);
+}
+
+#define __RTE_BITSET_FIND_FLAG_FIND_CLEAR (1U << 0)
+#define __RTE_BITSET_FIND_FLAG_WRAP (1U << 1)
+
+__rte_experimental
+static inline ssize_t
+__rte_bitset_find_nowrap(const uint64_t *bitset, size_t __rte_unused size,
+			 size_t start_bit, size_t len, bool find_clear)
+{
+	size_t word_idx;
+	size_t offset;
+	size_t end_bit = start_bit + len;
+
+	RTE_ASSERT(end_bit <= size);
+
+	if (unlikely(len == 0))
+		return -1;
+
+	word_idx = __RTE_BITSET_WORD_IDX(start_bit);
+	offset = __RTE_BITSET_BIT_OFFSET(start_bit);
+
+	while (word_idx <= __RTE_BITSET_WORD_IDX(end_bit - 1)) {
+		uint64_t word;
+		int word_ffs;
+
+		word = bitset[word_idx];
+		if (find_clear)
+			word = ~word;
+
+		word >>= offset;
+
+		word_ffs = __builtin_ffsll(word);
+
+		if (word_ffs != 0) {
+			ssize_t ffs = start_bit + word_ffs - 1;
+
+			/*
+			 * Check if set bit were among the last,
+			 * unused bits, in the last word.
+			 */
+			if (unlikely(ffs >= (ssize_t)end_bit))
+				return -1;
+
+			return ffs;
+		}
+
+		start_bit += (RTE_BITSET_WORD_BITS - offset);
+		word_idx++;
+		offset = 0;
+	}
+
+	return -1;
+
+}
+
+__rte_experimental
+static inline ssize_t
+__rte_bitset_find(const uint64_t *bitset, size_t size, size_t start_bit,
+		  size_t len, unsigned int flags)
+{
+	bool find_clear = flags & __RTE_BITSET_FIND_FLAG_FIND_CLEAR;
+	bool may_wrap = flags & __RTE_BITSET_FIND_FLAG_WRAP;
+	bool does_wrap = (start_bit + len) > size;
+	ssize_t rc;
+
+	RTE_ASSERT(len <= size);
+	if (!may_wrap)
+		RTE_ASSERT(!does_wrap);
+
+	if (may_wrap && does_wrap) {
+		size_t len0 = size - start_bit;
+		size_t len1 = len - len0;
+
+		rc = __rte_bitset_find_nowrap(bitset, size, start_bit, len0,
+					      find_clear);
+		if (rc < 0)
+			rc =  __rte_bitset_find_nowrap(bitset, size,
+						       0, len1, find_clear);
+	} else
+		rc = __rte_bitset_find_nowrap(bitset, size, start_bit,
+					      len, find_clear);
+
+	return rc;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first bit set.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), and returns the index of the first '1'.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the index of the least significant '1', or -1 if all
+ *   bits are '0'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_first_set(const uint64_t *bitset, size_t size)
+{
+	return __rte_bitset_find(bitset, size, 0, size, 0);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first bit set at offset.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset, and returns the index of the first '1' encountered.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '1', or -1 if all
+ *   bits are '0'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_set(const uint64_t *bitset, size_t size,
+		    size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len, 0);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first bit set at offset, with wrap-around.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset. If no '1' is encountered before the end of the bitset, the search
+ * will continue at index 0.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '1', or -1 if all
+ *   bits are '0'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_set_wrap(const uint64_t *bitset, size_t size,
+			 size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len,
+				 __RTE_BITSET_FIND_FLAG_WRAP);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first cleared bit.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), and returns the index of the first '0'.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the index of the least significant '0', or -1 if all
+ *   bits are '1'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_first_clear(const uint64_t *bitset, size_t size)
+{
+	return __rte_bitset_find(bitset, size, 0, size,
+				 __RTE_BITSET_FIND_FLAG_FIND_CLEAR);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first cleared bit at offset.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset, and returns the index of the first '0' encountered.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '0', or -1 if all
+ *   bits are '1'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_clear(const uint64_t *bitset, size_t size,
+			   size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len,
+				 __RTE_BITSET_FIND_FLAG_FIND_CLEAR);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first cleared bit at offset, with wrap-around.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset. If no '0' is encountered before the end of the bitset, the
+ * search will continue at index 0.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '0', or -1 if all
+ *   bits are '1'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_clear_wrap(const uint64_t *bitset, size_t size,
+			   size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len,
+				 __RTE_BITSET_FIND_FLAG_FIND_CLEAR |
+				 __RTE_BITSET_FIND_FLAG_WRAP);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Copy bitset.
+ *
+ * Copy the bits of the @c src_bitset to the @c dst_bitset.
+ *
+ * The bitsets may not overlap and must be of equal size.
+ *
+ * @param dst_bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param src_bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_copy(uint64_t *__rte_restrict dst_bitset,
+		const uint64_t *__rte_restrict src_bitset,
+		size_t size)
+{
+	rte_memcpy(dst_bitset, src_bitset, RTE_BITSET_SIZE(size));
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitwise or two bitsets.
+ *
+ * Perform a bitwise OR operation on all bits in the two equal-size
+ * bitsets @c src_bitset0 and @c src_bitset1, and store the results in
+ * @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset0
+ *   A pointer to the first source bitset.
+ * @param src_bitset1
+ *   A pointer to the second source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_or(uint64_t *dst_bitset, const uint64_t *src_bitset0,
+	      const uint64_t *src_bitset1, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] = src_bitset0[i] | src_bitset1[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitwise and two bitsets.
+ *
+ * Perform a bitwise AND operation on all bits in the two equal-size
+ * bitsets @c src_bitset0 and @c src_bitset1, and store the result in
+ * @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset0
+ *   A pointer to the first source bitset.
+ * @param src_bitset1
+ *   A pointer to the second source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_and(uint64_t *dst_bitset, const uint64_t *src_bitset0,
+	       const uint64_t *src_bitset1, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] = src_bitset0[i] & src_bitset1[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitwise xor two bitsets.
+ *
+ * Perform a bitwise XOR operation on all bits in the two equal-size
+ * bitsets @c src_bitset0 and @c src_bitset1, and store the result in
+ * @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset0
+ *   A pointer to the first source bitset.
+ * @param src_bitset1
+ *   A pointer to the second source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_xor(uint64_t *dst_bitset, const uint64_t *src_bitset0,
+	       const uint64_t *src_bitset1, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] = src_bitset0[i] ^ src_bitset1[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Compute the bitwise complement of a bitset.
+ *
+ * Flip every bit in the @c src_bitset, and store the result in @c
+ * dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_complement(uint64_t *dst_bitset, const uint64_t *src_bitset,
+		      size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] = ~src_bitset[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Shift bitset left.
+ *
+ * Perform a logical shift left of (multiply) @c src_bitset, and store
+ * the result in @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ * @param shift_bits
+ *   The number of bits to shift the bitset.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_shift_left(uint64_t *dst_bitset, const uint64_t *src_bitset,
+		      size_t size, size_t shift_bits)
+{
+	const int src_word_offset = shift_bits / RTE_BITSET_WORD_BITS;
+	const int src_bit_offset = shift_bits % RTE_BITSET_WORD_BITS;
+	unsigned int dst_idx;
+
+	for (dst_idx = 0; dst_idx < RTE_BITSET_NUM_WORDS(size); dst_idx++) {
+		int src_high_idx = dst_idx - src_word_offset;
+		uint64_t low_bits = 0;
+		uint64_t high_bits = 0;
+
+		if (src_high_idx >= 0) {
+			int src_low_idx = src_high_idx - 1;
+
+			high_bits = src_bitset[src_high_idx] << src_bit_offset;
+
+			if (src_bit_offset > 0 && src_low_idx >= 0)
+				low_bits = src_bitset[src_low_idx] >>
+					(RTE_BITSET_WORD_BITS - src_bit_offset);
+		}
+		dst_bitset[dst_idx] = low_bits | high_bits;
+	}
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Shift bitset right.
+ *
+ * Perform a logical shift right of (divide) @c src_bitset, and store
+ * the result in @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ * @param shift_bits
+ *   The number of bits to shift the bitset.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_shift_right(uint64_t *dst_bitset, const uint64_t *src_bitset,
+		       size_t size, size_t shift_bits)
+{
+	const int num_words = RTE_BITSET_NUM_WORDS(size);
+	const uint64_t used_mask = __RTE_BITSET_USED_MASK(size);
+	const int src_word_offset = shift_bits / RTE_BITSET_WORD_BITS;
+	const int src_bit_offset = shift_bits % RTE_BITSET_WORD_BITS;
+	int dst_idx;
+
+	for (dst_idx = 0; dst_idx < num_words; dst_idx++) {
+		int src_low_idx = src_word_offset + dst_idx;
+		int src_high_idx = src_low_idx + 1;
+		uint64_t src_low_word_bits = 0;
+		uint64_t src_high_word_bits = 0;
+
+		if (src_low_idx < num_words) {
+			src_low_word_bits = src_bitset[src_low_idx];
+
+			if (src_low_idx == (num_words - 1))
+				src_low_word_bits &= used_mask;
+
+			src_low_word_bits >>= src_bit_offset;
+
+			if (src_bit_offset > 0 && src_high_idx < num_words) {
+				src_high_word_bits = src_bitset[src_high_idx];
+
+				if (src_high_idx == (num_words - 1))
+					src_high_word_bits &= used_mask;
+
+				src_high_word_bits <<=
+					(RTE_BITSET_WORD_BITS - src_bit_offset);
+			}
+		}
+		dst_bitset[dst_idx] = src_low_word_bits | src_high_word_bits;
+	}
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Compare two bitsets.
+ *
+ * Compare two bitsets for equality.
+ *
+ * @param bitset_a
+ *   A pointer to the destination bitset.
+ * @param bitset_b
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline bool
+rte_bitset_equal(const uint64_t *bitset_a, const uint64_t *bitset_b,
+		 size_t size)
+{
+	size_t i;
+	uint64_t last_a, last_b;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size) - 1; i++)
+		if (bitset_a[i] != bitset_b[i])
+			return false;
+
+	last_a = bitset_a[i] << __RTE_BITSET_UNUSED(size);
+	last_b = bitset_b[i] << __RTE_BITSET_UNUSED(size);
+
+	return last_a == last_b;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Converts a bitset to a string.
+ *
+ * This function prints a string representation of the bitstring to
+ * the supplied buffer.
+ *
+ * Each bit is represented either by '0' or '1' in the output, with
+ * the first (left-most) character in the output being the most
+ * significant bit. The resulting string is NUL terminated.
+ *
+ * @param bitset
+ *   A pointer to the array of bitset 64-bit words.
+ * @param size
+ *   The number of bits the bitset represent.
+ * @param buf
+ *   A buffer to hold the output.
+ * @param capacity
+ *   The size of the buffer. Must be @c size + 1 or larger.
+ * @return
+ *   Returns the number of bytes written (i.e., @c size + 1), or -EINVAL
+ *   in case the buffer capacity was too small.
+ */
+
+__rte_experimental
+ssize_t
+rte_bitset_to_str(const uint64_t *bitset, size_t size, char *buf,
+		  size_t capacity);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BITSET_H_ */
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 3df50c3fbb..254d3fd4b2 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -396,6 +396,8 @@ EXPERIMENTAL {
 
 	# added in 24.03
 	rte_vfio_get_device_info; # WINDOWS_NO_EXPORT
+
+	rte_bitset_to_str;
 };
 
 INTERNAL {
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [RFC v5 2/6] eal: add bitset test suite
  2024-05-05  7:33   ` [RFC v5 1/6] eal: add bitset type Mattias Rönnblom
@ 2024-05-05  7:33     ` Mattias Rönnblom
  2024-05-05  7:33     ` [RFC v5 3/6] eal: add atomic bitset functions Mattias Rönnblom
                       ` (4 subsequent siblings)
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-05-05  7:33 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Add test suite to exercise <rte_bitset.h>.

RFC v5:
 * Parameterize tests to allow reuse across both atomic and non-atomic
   functions.

RFC v4:
 * Fix signed char issue in test cases. (Stephen Hemminger)
 * Add test cases for logic operations.
 * Use the unit test suite runner helper.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 app/test/meson.build   |   1 +
 app/test/test_bitset.c | 894 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 895 insertions(+)
 create mode 100644 app/test/test_bitset.c

diff --git a/app/test/meson.build b/app/test/meson.build
index 7d909039ae..633af5ce05 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -33,6 +33,7 @@ source_file_deps = {
     'test_bitcount.c': [],
     'test_bitmap.c': [],
     'test_bitops.c': [],
+    'test_bitset.c': [],
     'test_bitratestats.c': ['metrics', 'bitratestats', 'ethdev'] + sample_packet_forward_deps,
     'test_bpf.c': ['bpf', 'net'],
     'test_byteorder.c': [],
diff --git a/app/test/test_bitset.c b/app/test/test_bitset.c
new file mode 100644
index 0000000000..b3496df1c0
--- /dev/null
+++ b/app/test/test_bitset.c
@@ -0,0 +1,894 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include <rte_random.h>
+
+#include <rte_bitset.h>
+
+#include "test.h"
+
+#define MAGIC UINT64_C(0xdeadbeefdeadbeef)
+
+static void
+rand_buf(void *buf, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		((unsigned char *)buf)[i] = rte_rand();
+}
+
+static uint64_t *
+alloc_bitset(size_t size)
+{
+	uint64_t *p;
+
+	p = malloc(RTE_BITSET_SIZE(size) + 2 * sizeof(uint64_t));
+
+	if (p == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	rand_buf(&p[0], RTE_BITSET_SIZE(size));
+
+	p[0] = MAGIC;
+	p[RTE_BITSET_NUM_WORDS(size) + 1] = MAGIC;
+
+	return p + 1;
+}
+
+
+static int
+free_bitset(uint64_t *bitset, size_t size)
+{
+	uint64_t *p;
+
+	p = bitset - 1;
+
+	if (p[0] != MAGIC)
+		return TEST_FAILED;
+
+	if (p[RTE_BITSET_NUM_WORDS(size) + 1] != MAGIC)
+		return TEST_FAILED;
+
+	free(p);
+
+	return TEST_SUCCESS;
+}
+
+static bool
+rand_bool(void)
+{
+	return rte_rand_max(2);
+}
+
+static void
+rand_bool_ary(bool *ary, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++)
+		ary[i] = rand_bool();
+}
+
+static void
+rand_unused_bits(uint64_t *bitset, size_t size)
+{
+	uint64_t bits = rte_rand() & ~__RTE_BITSET_USED_MASK(size);
+
+	bitset[RTE_BITSET_NUM_WORDS(size) - 1] |= bits;
+}
+
+static void
+rand_bitset(uint64_t *bitset, size_t size)
+{
+	size_t i;
+
+	rte_bitset_init(bitset, size);
+
+	for (i = 0; i < size; i++)
+		rte_bitset_assign(bitset, i, rand_bool());
+
+	rand_unused_bits(bitset, size);
+}
+
+typedef bool test_fun(const uint64_t *bitset, size_t bit_num);
+typedef void set_fun(uint64_t *bitset, size_t bit_num);
+typedef void clear_fun(uint64_t *bitset, size_t bit_num);
+typedef void assign_fun(uint64_t *bitset, size_t bit_num, bool value);
+typedef void flip_fun(uint64_t *bitset, size_t bit_num);
+
+static int
+test_set_clear_size(test_fun test_fun, set_fun set_fun, clear_fun clear_fun,
+		    size_t size)
+{
+	size_t i;
+	bool reference[size];
+	uint64_t *bitset;
+
+	rand_bool_ary(reference, size);
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	rte_bitset_init(bitset, size);
+
+	for (i = 0; i < size; i++) {
+		if (reference[i])
+			set_fun(bitset, i);
+		else
+			clear_fun(bitset, i);
+	}
+
+	for (i = 0; i < size; i++)
+		if (reference[i] != test_fun(bitset, i))
+			return TEST_FAILED;
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+#define RAND_ITERATIONS (10000)
+#define RAND_SET_MAX_SIZE (1000)
+
+static int
+test_set_clear_fun(test_fun test_fun, set_fun set_fun, clear_fun clear_fun)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_set_clear_size(test_fun, set_fun, clear_fun,
+					size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_set_clear(void)
+{
+	return test_set_clear_fun(rte_bitset_test, rte_bitset_set,
+				  rte_bitset_clear);
+}
+
+static int
+test_flip_size(test_fun test_fun, assign_fun assign_fun, flip_fun flip_fun,
+	       size_t size)
+{
+	size_t i;
+	uint64_t *bitset;
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	rand_bitset(bitset, size);
+
+	for (i = 0; i < size; i++) {
+		RTE_BITSET_DECLARE(reference, size);
+
+		rte_bitset_copy(reference, bitset, size);
+
+		bool value = test_fun(bitset, i);
+
+		flip_fun(bitset, i);
+
+		TEST_ASSERT(test_fun(bitset, i) != value,
+			    "Bit %zd was not flipped", i);
+
+		assign_fun(reference, i, !value);
+
+		TEST_ASSERT(rte_bitset_equal(bitset, reference, size),
+			    "Not only the target bit %zd was flipped", i);
+
+
+	}
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_flip_fun(test_fun test_fun, assign_fun assign_fun, flip_fun flip_fun)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_flip_size(test_fun, assign_fun, flip_fun,
+				   size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_flip(void)
+{
+	return test_flip_fun(rte_bitset_test, rte_bitset_assign,
+			     rte_bitset_flip);
+}
+
+static ssize_t
+find(const bool *ary, size_t num_bools, size_t start, size_t len, bool set)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		ssize_t idx = (start + i) % num_bools;
+
+		if (ary[idx] == set)
+			return idx;
+	}
+
+	return -1;
+}
+
+static ssize_t
+find_set(const bool *ary, size_t num_bools, size_t start, size_t len)
+{
+	return find(ary, num_bools, start, len, true);
+}
+
+static ssize_t
+find_clear(const bool *ary, size_t num_bools, size_t start, size_t len)
+{
+	return find(ary, num_bools, start, len, false);
+}
+
+#define FFS_ITERATIONS (100)
+
+static int
+test_find_size(size_t size, bool set)
+{
+	uint64_t *bitset;
+	bool reference[size];
+	size_t i;
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	rte_bitset_init(bitset, size);
+
+	for (i = 0; i < size; i++) {
+		bool bit = rand_bool();
+		reference[i] = bit;
+
+		if (bit)
+			rte_bitset_set(bitset, i);
+		else /* redundant, still useful for testing */
+			rte_bitset_clear(bitset, i);
+	}
+
+	for (i = 0; i < FFS_ITERATIONS; i++) {
+		size_t start_bit = rte_rand_max(size);
+		size_t len = rte_rand_max(size + 1);
+		bool full_range = len == size && start_bit == 0;
+		bool wraps = start_bit + len > size;
+		ssize_t rc;
+
+		if (set) {
+			if (full_range && rand_bool())
+				rc = rte_bitset_find_first_set(bitset,
+							       size);
+			else if (wraps || rand_bool()) {
+				rc = rte_bitset_find_set_wrap(bitset, size,
+							      start_bit, len);
+
+			} else
+				rc = rte_bitset_find_set(bitset, size,
+							 start_bit, len);
+
+			if (rc != find_set(reference, size, start_bit,
+					   len))
+				return TEST_FAILED;
+		} else {
+			if (full_range && rand_bool())
+				rc = rte_bitset_find_first_clear(bitset,
+								 size);
+			else if (wraps || rand_bool())
+				rc = rte_bitset_find_clear_wrap(bitset,
+								size,
+								start_bit, len);
+			else
+				rc = rte_bitset_find_clear(bitset, size,
+							   start_bit, len);
+
+			if (rc != find_clear(reference, size, start_bit,
+					     len))
+				return TEST_FAILED;
+		}
+
+	}
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_find_set_size(size_t size)
+{
+	return test_find_size(size, true);
+}
+
+static int
+test_find_clear_size(size_t size)
+{
+	return test_find_size(size, false);
+}
+
+static int
+test_find(void)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 2 + rte_rand_max(RAND_SET_MAX_SIZE - 2);
+
+		if (test_find_set_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_find_clear_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+record_match(ssize_t match_idx, size_t size, int *calls)
+{
+	if (match_idx < 0 || (size_t)match_idx >= size)
+		return TEST_FAILED;
+
+	calls[match_idx]++;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_foreach_size(ssize_t size, bool may_wrap, bool set)
+{
+	bool reference[size];
+	int calls[size];
+	uint64_t *bitset;
+	ssize_t i;
+	ssize_t start_bit;
+	ssize_t len;
+	bool full_range;
+	size_t total_calls = 0;
+
+	rand_bool_ary(reference, size);
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	memset(calls, 0, sizeof(calls));
+
+	start_bit = rte_rand_max(size);
+	len = may_wrap ? rte_rand_max(size + 1) :
+		rte_rand_max(size - start_bit + 1);
+
+	rte_bitset_init(bitset, size);
+
+	/* random data in the unused bits should not matter */
+	rand_buf(bitset, RTE_BITSET_SIZE(size));
+
+	for (i = start_bit; i < start_bit + len; i++) {
+		size_t idx = i % size;
+
+		if (reference[idx])
+			rte_bitset_set(bitset, idx);
+		else
+			rte_bitset_clear(bitset, idx);
+
+		if (rte_bitset_test(bitset, idx) != reference[idx])
+			return TEST_FAILED;
+	}
+
+	full_range = (len == size && start_bit == 0);
+
+	/* XXX: verify iteration order as well */
+	if (set) {
+		if (full_range && rand_bool()) {
+			RTE_BITSET_FOREACH_SET(i, bitset, size) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+			}
+		} else if (may_wrap) {
+			RTE_BITSET_FOREACH_SET_WRAP(i, bitset, size,
+						    start_bit, len) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS) {
+					printf("failed\n");
+					return TEST_FAILED;
+				}
+			}
+		} else {
+			RTE_BITSET_FOREACH_SET_RANGE(i, bitset, size,
+						     start_bit, len) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+			}
+		}
+	} else {
+		if (full_range && rand_bool()) {
+			RTE_BITSET_FOREACH_CLEAR(i, bitset, size)
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+		} else if (may_wrap) {
+			RTE_BITSET_FOREACH_CLEAR_WRAP(i, bitset, size,
+						      start_bit, len) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+			}
+		} else {
+			RTE_BITSET_FOREACH_CLEAR_RANGE(i, bitset, size,
+						       start_bit, len)
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+		}
+	}
+
+	for (i = 0; i < len; i++) {
+		size_t idx = (start_bit + i) % size;
+
+		if (reference[idx] == set && calls[idx] != 1) {
+			printf("bit %zd shouldn't have been found %d "
+			       "times\n", idx, calls[idx]);
+			return TEST_FAILED;
+		}
+
+		if (reference[idx] != set && calls[idx] != 0) {
+			puts("bar");
+			return TEST_FAILED;
+		}
+
+		total_calls += calls[idx];
+	}
+
+	if (full_range) {
+		size_t count;
+
+		count = set ? rte_bitset_count_set(bitset, size) :
+			rte_bitset_count_clear(bitset, size);
+
+		if (count != total_calls)
+			return TEST_FAILED;
+	}
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_foreach(void)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_foreach_size(size, false, true) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_foreach_size(size, false, false) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_foreach_size(size, true, true) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_foreach_size(size, true, false) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_count_size(size_t size)
+{
+	uint64_t *bitset;
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	rte_bitset_init(bitset, size);
+
+	rand_unused_bits(bitset, size);
+
+	if (rte_bitset_count_set(bitset, size) != 0)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != size)
+		return TEST_FAILED;
+
+	rte_bitset_set_all(bitset, size);
+
+	if (rte_bitset_count_set(bitset, size) != size)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != 0)
+		return TEST_FAILED;
+
+	rte_bitset_clear_all(bitset, size);
+
+	if (rte_bitset_count_set(bitset, size) != 0)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != size)
+		return TEST_FAILED;
+
+	rte_bitset_set(bitset, rte_rand_max(size));
+
+	if (rte_bitset_count_set(bitset, size) != 1)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != (size - 1))
+		return TEST_FAILED;
+
+	rte_bitset_clear_all(bitset, size);
+	if (rte_bitset_count_set(bitset, size) != 0)
+		return TEST_FAILED;
+	if (rte_bitset_count_clear(bitset, size) != size)
+		return TEST_FAILED;
+
+	rte_bitset_set_all(bitset, size);
+	if (rte_bitset_count_set(bitset, size) != size)
+		return TEST_FAILED;
+	if (rte_bitset_count_clear(bitset, size) != 0)
+		return TEST_FAILED;
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_count(void)
+{
+	size_t i;
+
+	if (test_count_size(128) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(1) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(63) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(64) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(65) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_count_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+#define GEN_DECLARE(size)						\
+	{								\
+		RTE_BITSET_DECLARE(bitset, size);			\
+		size_t idx;						\
+									\
+		idx = rte_rand_max(size);				\
+		rte_bitset_init(bitset, size);				\
+									\
+		rte_bitset_set(bitset, idx);				\
+		if (!rte_bitset_test(bitset, idx))			\
+			return TEST_FAILED;				\
+		if (rte_bitset_count_set(bitset, size) != 1)		\
+			return TEST_FAILED;				\
+		return TEST_SUCCESS;					\
+	}
+
+static int
+test_define(void)
+{
+	GEN_DECLARE(1);
+	GEN_DECLARE(64);
+	GEN_DECLARE(65);
+	GEN_DECLARE(4097);
+}
+
+static int test_logic_op(void (*bitset_op)(uint64_t *, const uint64_t *,
+					   const uint64_t *, size_t),
+			 bool (*bool_op)(bool, bool))
+{
+	const size_t size = 1 + rte_rand_max(200);
+	RTE_BITSET_DECLARE(bitset_a, size);
+	RTE_BITSET_DECLARE(bitset_b, size);
+	RTE_BITSET_DECLARE(bitset_d, size);
+
+	bool ary_a[size];
+	bool ary_b[size];
+	bool ary_d[size];
+
+	rand_bool_ary(ary_a, size);
+	rand_bool_ary(ary_b, size);
+
+	size_t i;
+	for (i = 0; i < size; i++) {
+		rte_bitset_assign(bitset_a, i, ary_a[i]);
+		rte_bitset_assign(bitset_b, i, ary_b[i]);
+		ary_d[i] = bool_op(ary_a[i], ary_b[i]);
+	}
+
+	bitset_op(bitset_d, bitset_a, bitset_b, size);
+
+	for (i = 0; i < size; i++)
+		TEST_ASSERT_EQUAL(rte_bitset_test(bitset_d, i),
+				  ary_d[i], "Unexpected value of bit %zd", i);
+
+	return TEST_SUCCESS;
+}
+
+static bool
+bool_or(bool a, bool b)
+{
+	return a || b;
+}
+
+static int
+test_or(void)
+{
+	return test_logic_op(rte_bitset_or, bool_or);
+}
+
+static bool
+bool_and(bool a, bool b)
+{
+	return a && b;
+}
+
+static int
+test_and(void)
+{
+	return test_logic_op(rte_bitset_and, bool_and);
+}
+
+static bool
+bool_xor(bool a, bool b)
+{
+	return a != b;
+}
+
+static int
+test_xor(void)
+{
+	return test_logic_op(rte_bitset_xor, bool_xor);
+}
+
+static int
+test_complement(void)
+{
+	int i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		const size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		RTE_BITSET_DECLARE(src, size);
+
+		rand_bitset(src, size);
+
+		bool bit_idx = rte_rand_max(size);
+		bool bit_value = rte_bitset_test(src, bit_idx);
+
+		RTE_BITSET_DECLARE(dst, size);
+
+		rte_bitset_complement(dst, src, size);
+
+		TEST_ASSERT(bit_value != rte_bitset_test(dst, bit_idx),
+			    "Bit %d was not flipped", bit_idx);
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_shift(bool right)
+{
+	int i;
+
+	const char *direction = right ? "right" : "left";
+
+	for (i = 0; i < 10000; i++) {
+		const int size = 1 + (int)rte_rand_max(500);
+		const int shift_count = (int)rte_rand_max(1.5 * size);
+		int src_idx;
+
+		RTE_BITSET_DECLARE(src, size);
+		RTE_BITSET_DECLARE(reference, size);
+
+		rte_bitset_init(src, size);
+		rte_bitset_init(reference, size);
+
+		rand_unused_bits(src, size);
+		rand_unused_bits(reference, size);
+
+		for (src_idx = 0; src_idx < size; src_idx++) {
+			bool value = rand_bool();
+
+			rte_bitset_assign(src, src_idx, value);
+
+			int dst_idx = right ? src_idx - shift_count :
+				src_idx + shift_count;
+
+			if (dst_idx >= 0 && dst_idx < size)
+				rte_bitset_assign(reference, dst_idx, value);
+		}
+
+		uint64_t *dst = alloc_bitset(size);
+
+		if (right)
+			rte_bitset_shift_right(dst, src, size, shift_count);
+		else
+			rte_bitset_shift_left(dst, src, size, shift_count);
+
+		TEST_ASSERT(rte_bitset_equal(dst, reference, size),
+			    "Unexpected result from shifting bitset of size "
+			    "%d bits %d bits %s", size, shift_count, direction);
+
+		TEST_ASSERT_EQUAL(free_bitset(dst, size), TEST_SUCCESS,
+				  "Shift %s operation overwrote buffer",
+				  direction);
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_shift_right(void)
+{
+	return test_shift(true);
+}
+
+static int
+test_shift_left(void)
+{
+	return test_shift(false);
+}
+
+static int
+test_equal(void)
+{
+	const size_t size = 100;
+	RTE_BITSET_DECLARE(bitset_a, size);
+	RTE_BITSET_DECLARE(bitset_b, size);
+
+	rand_buf(bitset_a, RTE_BITSET_SIZE(size));
+	rand_buf(bitset_b, RTE_BITSET_SIZE(size));
+
+	rte_bitset_init(bitset_a, size);
+	rte_bitset_init(bitset_b, size);
+
+	rte_bitset_set(bitset_a, 9);
+	rte_bitset_set(bitset_b, 9);
+	rte_bitset_set(bitset_a, 90);
+	rte_bitset_set(bitset_b, 90);
+
+	if (!rte_bitset_equal(bitset_a, bitset_b, size))
+		return TEST_FAILED;
+
+	/* set unused bit, which should be ignored */
+	rte_bitset_set(&bitset_a[1], 60);
+
+	if (!rte_bitset_equal(bitset_a, bitset_b, size))
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_copy(void)
+{
+	const size_t size = 100;
+	RTE_BITSET_DECLARE(bitset_a, size);
+	RTE_BITSET_DECLARE(bitset_b, size);
+
+	rand_buf(bitset_a, RTE_BITSET_SIZE(size));
+	rand_buf(bitset_b, RTE_BITSET_SIZE(size));
+
+	rte_bitset_copy(bitset_a, bitset_b, size);
+
+	if (!rte_bitset_equal(bitset_a, bitset_b, size))
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_to_str(void)
+{
+	char buf[1024];
+	RTE_BITSET_DECLARE(bitset, 128);
+
+	rte_bitset_init(bitset, 128);
+	rte_bitset_set(bitset, 1);
+
+	if (rte_bitset_to_str(bitset, 2, buf, 3) != 3)
+		return TEST_FAILED;
+	if (strcmp(buf, "10") != 0)
+		return TEST_FAILED;
+
+	rte_bitset_set(bitset, 0);
+
+	if (rte_bitset_to_str(bitset, 1, buf, sizeof(buf)) != 2)
+		return TEST_FAILED;
+	if (strcmp(buf, "1") != 0)
+		return TEST_FAILED;
+
+	rte_bitset_init(bitset, 99);
+	rte_bitset_set(bitset, 98);
+
+	if (rte_bitset_to_str(bitset, 99, buf, sizeof(buf)) != 100)
+		return TEST_FAILED;
+
+	if (buf[0] != '1' || strchr(&buf[1], '1') != NULL)
+		return TEST_FAILED;
+
+	if (rte_bitset_to_str(bitset, 128, buf, 64) != -EINVAL)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static struct unit_test_suite bitset_tests  = {
+	.suite_name = "bitset test suite",
+	.unit_test_cases = {
+		TEST_CASE_ST(NULL, NULL, test_set_clear),
+		TEST_CASE_ST(NULL, NULL, test_flip),
+		TEST_CASE_ST(NULL, NULL, test_find),
+		TEST_CASE_ST(NULL, NULL, test_foreach),
+		TEST_CASE_ST(NULL, NULL, test_count),
+		TEST_CASE_ST(NULL, NULL, test_define),
+		TEST_CASE_ST(NULL, NULL, test_or),
+		TEST_CASE_ST(NULL, NULL, test_and),
+		TEST_CASE_ST(NULL, NULL, test_xor),
+		TEST_CASE_ST(NULL, NULL, test_complement),
+		TEST_CASE_ST(NULL, NULL, test_shift_right),
+		TEST_CASE_ST(NULL, NULL, test_shift_left),
+		TEST_CASE_ST(NULL, NULL, test_equal),
+		TEST_CASE_ST(NULL, NULL, test_copy),
+		TEST_CASE_ST(NULL, NULL, test_to_str),
+		TEST_CASES_END()
+	}
+};
+
+static int
+test_bitset(void)
+{
+	return unit_test_suite_runner(&bitset_tests);
+}
+
+REGISTER_FAST_TEST(bitset_autotest, true, true, test_bitset);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [RFC v5 3/6] eal: add atomic bitset functions
  2024-05-05  7:33   ` [RFC v5 1/6] eal: add bitset type Mattias Rönnblom
  2024-05-05  7:33     ` [RFC v5 2/6] eal: add bitset test suite Mattias Rönnblom
@ 2024-05-05  7:33     ` Mattias Rönnblom
  2024-05-05  7:33     ` [RFC v5 4/6] eal: add unit tests for atomic bitset operations Mattias Rönnblom
                       ` (3 subsequent siblings)
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-05-05  7:33 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Extend the bitset API with atomic versions of the most basic bitset
operations.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 lib/eal/include/rte_bitset.h | 155 +++++++++++++++++++++++++++++++++++
 1 file changed, 155 insertions(+)

diff --git a/lib/eal/include/rte_bitset.h b/lib/eal/include/rte_bitset.h
index 49a07c77b8..c0441b0e22 100644
--- a/lib/eal/include/rte_bitset.h
+++ b/lib/eal/include/rte_bitset.h
@@ -376,6 +376,161 @@ rte_bitset_flip(uint64_t *bitset, size_t bit_num)
 	__RTE_BITSET_DELEGATE(rte_bit_flip, bitset, bit_num);
 }
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test if a bit is set.
+ *
+ * Atomically test if a bit in a bitset is set with the specified
+ * memory ordering.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   Index of the bit to test. Index 0 is the least significant bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit is '1', and false if the bit is '0'.
+ */
+
+__rte_experimental
+static inline bool
+rte_bitset_atomic_test(const uint64_t *bitset, size_t bit_num,
+		       int memory_order)
+{
+	return __RTE_BITSET_DELEGATE_N(rte_bit_atomic_test, bitset, bit_num,
+				       memory_order);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically set a bit in the bitset.
+ *
+ * Set a bit in a bitset as an atomic operation, with the specified
+ * memory ordering.
+ *
+ * rte_bitset_atomic_set() is multi-thread safe, provided all threads
+ * acting in parallel on the same bitset does so through
+ * @c rte_bitset_atomic_*() functions.
+ *
+ * Bits are numbered from 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be set.
+ * @param memory_order
+ *   The memory order to use.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_atomic_set(uint64_t *bitset, size_t bit_num, int memory_order)
+{
+	__RTE_BITSET_DELEGATE_N(rte_bit_atomic_set, bitset, bit_num,
+				memory_order);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically clear a bit in the bitset.
+ *
+ * Clear a bit in a bitset as an atomic operation, with the specified
+ * memory ordering.
+ *
+ * rte_bitset_atomic_clear() is multi-thread safe, provided all
+ * threads acting in parallel on the same bitset does so through @c
+ * rte_bitset_atomic_*() functions.
+ *
+ * Bits are numbered from 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be cleared.
+ * @param memory_order
+ *   The memory order to use.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_atomic_clear(uint64_t *bitset, size_t bit_num, int memory_order)
+{
+	__RTE_BITSET_DELEGATE_N(rte_bit_atomic_clear, bitset, bit_num,
+				memory_order);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically set or clear a bit in the bitset.
+ *
+ * Assign a value to a bit in a bitset as an atomic operation, with
+ * the specified memory ordering.
+ *
+ * rte_bitset_atomic_assign() is multi-thread safe, provided all
+ * threads acting in parallel on the same bitset does so through
+ * @c rte_bitset_atomic_*() functions.
+ *
+ * Bits are numbered from 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be set or cleared.
+ * @param bit_value
+ *   Control if the bit should be set or cleared.
+ * @param memory_order
+ *   The memory order to use.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_atomic_assign(uint64_t *bitset, size_t bit_num, bool bit_value,
+			 int memory_order)
+{
+	__RTE_BITSET_DELEGATE_N(rte_bit_atomic_assign, bitset, bit_num,
+				bit_value, memory_order);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically change the value of a bit in the bitset.
+ *
+ * Flip a bit in a bitset as an atomic operation, with the specified
+ * memory ordering.
+ *
+ * rte_bitset_atomic_flip() is multi-thread safe, provided all threads
+ * acting in parallel on the same bitset does so through
+ * @c rte_bitset_atomic_*() functions.
+ *
+ * Bits are numbered from 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be flipped.
+ * @param memory_order
+ *   The memory order to use.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_atomic_flip(uint64_t *bitset, size_t bit_num, int memory_order)
+{
+	__RTE_BITSET_DELEGATE_N(rte_bit_atomic_flip, bitset, bit_num,
+				memory_order);
+}
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice.
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [RFC v5 4/6] eal: add unit tests for atomic bitset operations
  2024-05-05  7:33   ` [RFC v5 1/6] eal: add bitset type Mattias Rönnblom
  2024-05-05  7:33     ` [RFC v5 2/6] eal: add bitset test suite Mattias Rönnblom
  2024-05-05  7:33     ` [RFC v5 3/6] eal: add atomic bitset functions Mattias Rönnblom
@ 2024-05-05  7:33     ` Mattias Rönnblom
  2024-05-05  7:33     ` [RFC v5 5/6] service: use multi-word bitset to represent service flags Mattias Rönnblom
                       ` (2 subsequent siblings)
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-05-05  7:33 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Extend bitset tests to cover the basic operation of the
rte_bitset_atomic_*() family of functions.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 app/test/test_bitset.c | 48 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/app/test/test_bitset.c b/app/test/test_bitset.c
index b3496df1c0..32224a1eee 100644
--- a/app/test/test_bitset.c
+++ b/app/test/test_bitset.c
@@ -222,6 +222,52 @@ test_flip(void)
 			     rte_bitset_flip);
 }
 
+static bool
+bitset_atomic_test(const uint64_t *bitset, size_t bit_num)
+{
+	return rte_bitset_atomic_test(bitset, bit_num,
+				      rte_memory_order_relaxed);
+}
+
+static void
+bitset_atomic_set(uint64_t *bitset, size_t bit_num)
+{
+	rte_bitset_atomic_set(bitset, bit_num, rte_memory_order_relaxed);
+}
+
+static void
+bitset_atomic_clear(uint64_t *bitset, size_t bit_num)
+{
+	rte_bitset_atomic_clear(bitset, bit_num, rte_memory_order_relaxed);
+}
+
+static void
+bitset_atomic_flip(uint64_t *bitset, size_t bit_num)
+{
+	rte_bitset_atomic_flip(bitset, bit_num, rte_memory_order_relaxed);
+}
+
+static void
+bitset_atomic_assign(uint64_t *bitset, size_t bit_num, bool bit_value)
+{
+	rte_bitset_atomic_assign(bitset, bit_num, bit_value,
+				 rte_memory_order_relaxed);
+}
+
+static int
+test_atomic_set_clear(void)
+{
+	return test_set_clear_fun(bitset_atomic_test, bitset_atomic_set,
+				  bitset_atomic_clear);
+}
+
+static int
+test_atomic_flip(void)
+{
+	return test_flip_fun(bitset_atomic_test, bitset_atomic_assign,
+			     bitset_atomic_flip);
+}
+
 static ssize_t
 find(const bool *ary, size_t num_bools, size_t start, size_t len, bool set)
 {
@@ -868,6 +914,8 @@ static struct unit_test_suite bitset_tests  = {
 	.unit_test_cases = {
 		TEST_CASE_ST(NULL, NULL, test_set_clear),
 		TEST_CASE_ST(NULL, NULL, test_flip),
+		TEST_CASE_ST(NULL, NULL, test_atomic_set_clear),
+		TEST_CASE_ST(NULL, NULL, test_atomic_flip),
 		TEST_CASE_ST(NULL, NULL, test_find),
 		TEST_CASE_ST(NULL, NULL, test_foreach),
 		TEST_CASE_ST(NULL, NULL, test_count),
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [RFC v5 5/6] service: use multi-word bitset to represent service flags
  2024-05-05  7:33   ` [RFC v5 1/6] eal: add bitset type Mattias Rönnblom
                       ` (2 preceding siblings ...)
  2024-05-05  7:33     ` [RFC v5 4/6] eal: add unit tests for atomic bitset operations Mattias Rönnblom
@ 2024-05-05  7:33     ` Mattias Rönnblom
  2024-05-05  7:33     ` [RFC v5 6/6] event/dsw: optimize serving port logic Mattias Rönnblom
  2024-08-09 20:14     ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-05-05  7:33 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Use a multi-word bitset to track which services are mapped to which
lcores, allowing the RTE_SERVICE_NUM_MAX compile-time constant to be >
64.

Replace array-of-bytes service-currently-active flags with a more
compact multi-word bitset-based representation, reducing memory
footprint somewhat.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 lib/eal/common/rte_service.c | 70 ++++++++++++++----------------------
 1 file changed, 27 insertions(+), 43 deletions(-)

diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index 56379930b6..ec0f47e141 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -11,6 +11,7 @@
 
 #include <eal_trace_internal.h>
 #include <rte_lcore.h>
+#include <rte_bitset.h>
 #include <rte_branch_prediction.h>
 #include <rte_common.h>
 #include <rte_cycles.h>
@@ -63,11 +64,11 @@ struct service_stats {
 /* the internal values of a service core */
 struct __rte_cache_aligned core_state {
 	/* map of services IDs are run on this core */
-	uint64_t service_mask;
+	RTE_BITSET_DECLARE(mapped_services, RTE_SERVICE_NUM_MAX);
 	RTE_ATOMIC(uint8_t) runstate; /* running or stopped */
 	RTE_ATOMIC(uint8_t) thread_active; /* indicates when thread is in service_run() */
 	uint8_t is_service_core; /* set if core is currently a service core */
-	uint8_t service_active_on_lcore[RTE_SERVICE_NUM_MAX];
+	RTE_BITSET_DECLARE(service_active_on_lcore, RTE_SERVICE_NUM_MAX);
 	RTE_ATOMIC(uint64_t) loops;
 	RTE_ATOMIC(uint64_t) cycles;
 	struct service_stats service_stats[RTE_SERVICE_NUM_MAX];
@@ -81,11 +82,6 @@ static uint32_t rte_service_library_initialized;
 int32_t
 rte_service_init(void)
 {
-	/* Hard limit due to the use of an uint64_t-based bitmask (and the
-	 * clzl intrinsic).
-	 */
-	RTE_BUILD_BUG_ON(RTE_SERVICE_NUM_MAX > 64);
-
 	if (rte_service_library_initialized) {
 		EAL_LOG(NOTICE,
 			"service library init() called, init flag %d",
@@ -296,7 +292,7 @@ rte_service_component_unregister(uint32_t id)
 
 	/* clear the run-bit in all cores */
 	for (i = 0; i < RTE_MAX_LCORE; i++)
-		lcore_states[i].service_mask &= ~(UINT64_C(1) << id);
+		rte_bitset_clear(lcore_states[i].mapped_services, id);
 
 	memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl));
 
@@ -410,7 +406,7 @@ service_runner_do_callback(struct rte_service_spec_impl *s,
 
 /* Expects the service 's' is valid. */
 static int32_t
-service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
+service_run(uint32_t i, struct core_state *cs, const uint64_t *mapped_services,
 	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
 {
 	if (!s)
@@ -424,12 +420,12 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 			RUNSTATE_RUNNING ||
 	    rte_atomic_load_explicit(&s->app_runstate, rte_memory_order_acquire) !=
 			RUNSTATE_RUNNING ||
-	    !(service_mask & (UINT64_C(1) << i))) {
-		cs->service_active_on_lcore[i] = 0;
+	    !rte_bitset_test(mapped_services, i)) {
+		rte_bitset_clear(cs->service_active_on_lcore, i);
 		return -ENOEXEC;
 	}
 
-	cs->service_active_on_lcore[i] = 1;
+	rte_bitset_set(cs->service_active_on_lcore, i);
 
 	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
 		if (!rte_spinlock_trylock(&s->execute_lock))
@@ -454,7 +450,7 @@ rte_service_may_be_active(uint32_t id)
 		return -EINVAL;
 
 	for (i = 0; i < lcore_count; i++) {
-		if (lcore_states[ids[i]].service_active_on_lcore[id])
+		if (rte_bitset_test(lcore_states[ids[i]].service_active_on_lcore, id))
 			return 1;
 	}
 
@@ -474,7 +470,9 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 	 */
 	rte_atomic_fetch_add_explicit(&s->num_mapped_cores, 1, rte_memory_order_relaxed);
 
-	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
+	RTE_BITSET_DECLARE(all_services, RTE_SERVICE_NUM_MAX);
+	rte_bitset_set_all(all_services, RTE_SERVICE_NUM_MAX);
+	int ret = service_run(id, cs, all_services, s, serialize_mt_unsafe);
 
 	rte_atomic_fetch_sub_explicit(&s->num_mapped_cores, 1, rte_memory_order_relaxed);
 
@@ -485,7 +483,6 @@ static int32_t
 service_runner_func(void *arg)
 {
 	RTE_SET_USED(arg);
-	uint8_t i;
 	const int lcore = rte_lcore_id();
 	struct core_state *cs = &lcore_states[lcore];
 
@@ -497,20 +494,11 @@ service_runner_func(void *arg)
 	 */
 	while (rte_atomic_load_explicit(&cs->runstate, rte_memory_order_acquire) ==
 			RUNSTATE_RUNNING) {
+		ssize_t id;
 
-		const uint64_t service_mask = cs->service_mask;
-		uint8_t start_id;
-		uint8_t end_id;
-
-		if (service_mask == 0)
-			continue;
-
-		start_id = rte_ctz64(service_mask);
-		end_id = 64 - rte_clz64(service_mask);
-
-		for (i = start_id; i < end_id; i++) {
+		RTE_BITSET_FOREACH_SET(id, cs->mapped_services, RTE_SERVICE_NUM_MAX) {
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask, service_get(i), 1);
+			service_run(id, cs, cs->mapped_services, service_get(id), 1);
 		}
 
 		rte_atomic_store_explicit(&cs->loops, cs->loops + 1, rte_memory_order_relaxed);
@@ -519,8 +507,7 @@ service_runner_func(void *arg)
 	/* Switch off this core for all services, to ensure that future
 	 * calls to may_be_active() know this core is switched off.
 	 */
-	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++)
-		cs->service_active_on_lcore[i] = 0;
+	rte_bitset_clear_all(cs->service_active_on_lcore, RTE_SERVICE_NUM_MAX);
 
 	/* Use SEQ CST memory ordering to avoid any re-ordering around
 	 * this store, ensuring that once this store is visible, the service
@@ -586,7 +573,7 @@ rte_service_lcore_count_services(uint32_t lcore)
 	if (!cs->is_service_core)
 		return -ENOTSUP;
 
-	return rte_popcount64(cs->service_mask);
+	return rte_bitset_count_set(cs->mapped_services, RTE_SERVICE_NUM_MAX);
 }
 
 int32_t
@@ -639,25 +626,23 @@ service_update(uint32_t sid, uint32_t lcore, uint32_t *set, uint32_t *enabled)
 			!lcore_states[lcore].is_service_core)
 		return -EINVAL;
 
-	uint64_t sid_mask = UINT64_C(1) << sid;
 	if (set) {
-		uint64_t lcore_mapped = lcore_states[lcore].service_mask &
-			sid_mask;
+		uint64_t lcore_mapped = rte_bitset_test(lcore_states[lcore].mapped_services, sid);
 
 		if (*set && !lcore_mapped) {
-			lcore_states[lcore].service_mask |= sid_mask;
+			rte_bitset_set(lcore_states[lcore].mapped_services, sid);
 			rte_atomic_fetch_add_explicit(&rte_services[sid].num_mapped_cores,
 				1, rte_memory_order_relaxed);
 		}
 		if (!*set && lcore_mapped) {
-			lcore_states[lcore].service_mask &= ~(sid_mask);
+			rte_bitset_clear(lcore_states[lcore].mapped_services, sid);
 			rte_atomic_fetch_sub_explicit(&rte_services[sid].num_mapped_cores,
 				1, rte_memory_order_relaxed);
 		}
 	}
 
 	if (enabled)
-		*enabled = !!(lcore_states[lcore].service_mask & (sid_mask));
+		*enabled = rte_bitset_test(lcore_states[lcore].mapped_services, sid);
 
 	return 0;
 }
@@ -699,11 +684,11 @@ set_lcore_state(uint32_t lcore, int32_t state)
 int32_t
 rte_service_lcore_reset_all(void)
 {
-	/* loop over cores, reset all to mask 0 */
+	/* loop over cores, reset all mapped services */
 	uint32_t i;
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
 		if (lcore_states[i].is_service_core) {
-			lcore_states[i].service_mask = 0;
+			rte_bitset_clear_all(lcore_states[i].mapped_services, RTE_SERVICE_NUM_MAX);
 			set_lcore_state(i, ROLE_RTE);
 			/* runstate act as guard variable Use
 			 * store-release memory order here to synchronize
@@ -731,7 +716,7 @@ rte_service_lcore_add(uint32_t lcore)
 	set_lcore_state(lcore, ROLE_SERVICE);
 
 	/* ensure that after adding a core the mask and state are defaults */
-	lcore_states[lcore].service_mask = 0;
+	rte_bitset_clear_all(lcore_states[lcore].mapped_services, RTE_SERVICE_NUM_MAX);
 	/* Use store-release memory order here to synchronize with
 	 * load-acquire in runstate read functions.
 	 */
@@ -814,12 +799,11 @@ rte_service_lcore_stop(uint32_t lcore)
 
 	uint32_t i;
 	struct core_state *cs = &lcore_states[lcore];
-	uint64_t service_mask = cs->service_mask;
 
 	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
-		int32_t enabled = service_mask & (UINT64_C(1) << i);
-		int32_t service_running = rte_service_runstate_get(i);
-		int32_t only_core = (1 ==
+		bool enabled = rte_bitset_test(cs->mapped_services, i);
+		bool service_running = rte_service_runstate_get(i);
+		bool only_core = (1 ==
 			rte_atomic_load_explicit(&rte_services[i].num_mapped_cores,
 				rte_memory_order_relaxed));
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [RFC v5 6/6] event/dsw: optimize serving port logic
  2024-05-05  7:33   ` [RFC v5 1/6] eal: add bitset type Mattias Rönnblom
                       ` (3 preceding siblings ...)
  2024-05-05  7:33     ` [RFC v5 5/6] service: use multi-word bitset to represent service flags Mattias Rönnblom
@ 2024-05-05  7:33     ` Mattias Rönnblom
  2024-08-09 20:14     ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-05-05  7:33 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

To reduce flow migration overhead, replace the array-based
representation of which set of ports are bound to a particular queue
by a multi-word bitset.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 drivers/event/dsw/dsw_evdev.c | 19 +++++++------------
 drivers/event/dsw/dsw_evdev.h |  3 ++-
 drivers/event/dsw/dsw_event.c |  7 ++++---
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c
index ab0420b549..f3ca99e935 100644
--- a/drivers/event/dsw/dsw_evdev.c
+++ b/drivers/event/dsw/dsw_evdev.c
@@ -118,6 +118,7 @@ dsw_queue_setup(struct rte_eventdev *dev, uint8_t queue_id,
 		queue->schedule_type = conf->schedule_type;
 	}
 
+	rte_bitset_init(queue->serving_ports, DSW_MAX_PORTS);
 	queue->num_serving_ports = 0;
 
 	return 0;
@@ -144,20 +145,16 @@ dsw_queue_release(struct rte_eventdev *dev __rte_unused,
 static void
 queue_add_port(struct dsw_queue *queue, uint16_t port_id)
 {
-	uint64_t port_mask = UINT64_C(1) << port_id;
-
-	queue->serving_ports |=	port_mask;
+	rte_bitset_set(queue->serving_ports, port_id);
 	queue->num_serving_ports++;
 }
 
 static bool
 queue_remove_port(struct dsw_queue *queue, uint16_t port_id)
 {
-	uint64_t port_mask = UINT64_C(1) << port_id;
-
-	if (queue->serving_ports & port_mask) {
+	if (rte_bitset_test(queue->serving_ports, port_id)) {
 		queue->num_serving_ports--;
-		queue->serving_ports ^= port_mask;
+		rte_bitset_clear(queue->serving_ports, port_id);
 		return true;
 	}
 
@@ -257,14 +254,12 @@ initial_flow_to_port_assignment(struct dsw_evdev *dsw)
 		struct dsw_queue *queue = &dsw->queues[queue_id];
 		uint16_t flow_hash;
 		for (flow_hash = 0; flow_hash < DSW_MAX_FLOWS; flow_hash++) {
-			uint8_t skip =
-				rte_rand_max(queue->num_serving_ports);
+			uint8_t skip = rte_rand_max(queue->num_serving_ports);
 			uint8_t port_id;
 
 			for (port_id = 0;; port_id++) {
-				uint64_t port_mask = UINT64_C(1) << port_id;
-
-				if (queue->serving_ports & port_mask) {
+				if (rte_bitset_test(queue->serving_ports,
+						    port_id)) {
 					if (skip == 0)
 						break;
 					skip--;
diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h
index 3a5989f148..0c40c45e46 100644
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -7,6 +7,7 @@
 
 #include <eventdev_pmd.h>
 
+#include <rte_bitset.h>
 #include <rte_event_ring.h>
 #include <rte_eventdev.h>
 
@@ -234,7 +235,7 @@ struct __rte_cache_aligned dsw_port {
 
 struct dsw_queue {
 	uint8_t schedule_type;
-	uint64_t serving_ports;
+	RTE_BITSET_DECLARE(serving_ports, DSW_MAX_PORTS);
 	uint16_t num_serving_ports;
 
 	alignas(RTE_CACHE_LINE_SIZE) uint8_t flow_to_port_map[DSW_MAX_FLOWS];
diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c
index 23488d9030..b855f9ecf1 100644
--- a/drivers/event/dsw/dsw_event.c
+++ b/drivers/event/dsw/dsw_event.c
@@ -447,9 +447,8 @@ static bool
 dsw_is_serving_port(struct dsw_evdev *dsw, uint8_t port_id, uint8_t queue_id)
 {
 	struct dsw_queue *queue = &dsw->queues[queue_id];
-	uint64_t port_mask = UINT64_C(1) << port_id;
 
-	return queue->serving_ports & port_mask;
+	return rte_bitset_test(queue->serving_ports, port_id);
 }
 
 static bool
@@ -571,7 +570,9 @@ dsw_schedule(struct dsw_evdev *dsw, uint8_t queue_id, uint16_t flow_hash)
 		/* A single-link queue, or atomic/ordered/parallel but
 		 * with just a single serving port.
 		 */
-		port_id = rte_bsf64(queue->serving_ports);
+		port_id = (uint8_t)rte_bitset_find_first_set(
+			queue->serving_ports, DSW_MAX_PORTS
+		);
 
 	DSW_LOG_DP(DEBUG, "Event with queue_id %d flow_hash %d is scheduled "
 		   "to port %d.\n", queue_id, flow_hash, port_id);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH 1/6] eal: add bitset type
  2024-05-05  7:33   ` [RFC v5 1/6] eal: add bitset type Mattias Rönnblom
                       ` (4 preceding siblings ...)
  2024-05-05  7:33     ` [RFC v5 6/6] event/dsw: optimize serving port logic Mattias Rönnblom
@ 2024-08-09 20:14     ` Mattias Rönnblom
  2024-08-09 20:14       ` [PATCH 2/6] eal: add bitset test suite Mattias Rönnblom
                         ` (8 more replies)
  5 siblings, 9 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-08-09 20:14 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Introduce a set of functions and macros that operate on sets of bits,
kept in arrays of 64-bit words.

RTE bitset is designed for bitsets which are larger than what fits in
a single machine word (i.e., 64 bits). For very large bitsets, the
<rte_bitmap.h> API may be a more appropriate choice.

Depends-on: series-32740 ("Improve EAL bit operations API")

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>

--

PATCH:
 * Update MAINTAINERS.
 * Update release notes.

RFC v5:
 * Delegate bit test/set/clear/assign/flip to RTE bitops.
 * Note in the documentation that set/clear/assign/flip are not
   atomic.

RFC v4:
 * Add function rte_bitset_flip() to change the value of a bit.
 * Add function rte_bitset_complement(), flipping the value of all bits.
 * Add function rte_bitset_assign(), setting the value of a bit based
   on a 'bool' parameter.
 * Add functions to perform logical shift the bitset left or right.
 * Add explicit destination bitset to logic operation type functions
   (e.g., rte_bitset_and()), to increase flexibility.
 * Split implementation and test suite into distinct commits.

RFC v3:
 * Split the bitset from the htimer patchset, where it was originally
   hosted.
 * Rebase to current DPDK main.
 * Add note that rte_bitset_init() need not be called if bitset words
   have already been zeroed.
 * Use REGISTER_FAST_TEST instead of REGISTER_TEST_COMMAND.
 * Use rte_popcount64() instead of compiler builtin.

RFC v2:
 * Replaced <sys/types.h> with <stddef.h> include, to properly get
   size_t typedef.
 * Add <rte_compat.h> to get __rte_experimental in <rte_bitset.h>.
---
 MAINTAINERS                            |    6 +
 doc/api/doxy-api-index.md              |    1 +
 doc/guides/rel_notes/release_24_11.rst |   10 +
 lib/eal/common/meson.build             |    1 +
 lib/eal/common/rte_bitset.c            |   29 +
 lib/eal/include/meson.build            |    1 +
 lib/eal/include/rte_bitset.h           | 1061 ++++++++++++++++++++++++
 lib/eal/version.map                    |    3 +
 8 files changed, 1112 insertions(+)
 create mode 100644 lib/eal/common/rte_bitset.c
 create mode 100644 lib/eal/include/rte_bitset.h

diff --git a/MAINTAINERS b/MAINTAINERS
index c5a703b5c0..00ad6fd318 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -260,6 +260,12 @@ M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
 F: lib/eal/include/rte_bitmap.h
 F: app/test/test_bitmap.c
 
+Bitset
+M: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
+F: lib/eal/include/rte_bitset.h
+F: lib/eal/include/rte_bitset.c
+F: app/test/test_bitset.c
+
 MCSlock
 M: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
 F: lib/eal/include/rte_mcslock.h
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index f9f0300126..abd44b1861 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -174,6 +174,7 @@ The public API headers are grouped by topics:
   [ring](@ref rte_ring.h),
   [stack](@ref rte_stack.h),
   [tailq](@ref rte_tailq.h),
+  [bitset](@ref rte_bitset.h),
   [bitmap](@ref rte_bitmap.h)
 
 - **packet framework**:
diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index 3111b1e4c0..89716defac 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -73,6 +73,16 @@ New Features
   based macros (for C) and function overloading (in C++ translation
   units).
 
+* **Added multi-word bitset API.**
+
+  A new multi-word bitset API has been introduced in the EAL. The RTE
+  bitset is optimized for scenarios where the bitset size exceeds the
+  capacity of a single word (e.g., larger than 64 bits), but is not
+  large enough to justify the overhead and complexity of the more
+  scalable, yet slower, <rte_bitmap.h> API. This addition provides an
+  efficient and straightforward alternative for handling bitsets of
+  intermediate sizes.
+
 Removed Items
 -------------
 
diff --git a/lib/eal/common/meson.build b/lib/eal/common/meson.build
index 22a626ba6f..c1bbf26654 100644
--- a/lib/eal/common/meson.build
+++ b/lib/eal/common/meson.build
@@ -31,6 +31,7 @@ sources += files(
         'eal_common_uuid.c',
         'malloc_elem.c',
         'malloc_heap.c',
+        'rte_bitset.c',
         'rte_malloc.c',
         'rte_random.c',
         'rte_reciprocal.c',
diff --git a/lib/eal/common/rte_bitset.c b/lib/eal/common/rte_bitset.c
new file mode 100644
index 0000000000..35e55a64db
--- /dev/null
+++ b/lib/eal/common/rte_bitset.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include <errno.h>
+
+#include "rte_bitset.h"
+
+ssize_t
+rte_bitset_to_str(const uint64_t *bitset, size_t num_bits, char *buf,
+		  size_t capacity)
+{
+	size_t i;
+
+	if (capacity < (num_bits + 1))
+		return -EINVAL;
+
+	for (i = 0; i < num_bits; i++) {
+		bool value;
+
+		value = rte_bitset_test(bitset, num_bits - 1 - i);
+
+		buf[i] = value ? '1' : '0';
+	}
+
+	buf[num_bits] = '\0';
+
+	return num_bits + 1;
+}
diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..4b5f120a66 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -5,6 +5,7 @@ includes += include_directories('.')
 
 headers += files(
         'rte_alarm.h',
+        'rte_bitset.h',
         'rte_bitmap.h',
         'rte_bitops.h',
         'rte_branch_prediction.h',
diff --git a/lib/eal/include/rte_bitset.h b/lib/eal/include/rte_bitset.h
new file mode 100644
index 0000000000..49a07c77b8
--- /dev/null
+++ b/lib/eal/include/rte_bitset.h
@@ -0,0 +1,1061 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_BITSET_H_
+#define _RTE_BITSET_H_
+
+/**
+ * @file
+ * RTE Bitset
+ *
+ * This file provides functions and macros for querying and
+ * manipulating sets of bits kept in arrays of @c uint64_t-sized
+ * elements.
+ *
+ * The bits in a bitset are numbered from 0 to @c size - 1, with the
+ * lowest index being the least significant bit.
+ *
+ * The bitset array must be properly aligned.
+ *
+ * For optimal performance, the @c size parameter, required by
+ * many of the API's functions, should be a compile-time constant.
+ *
+ * For large bitsets, the rte_bitmap.h API may be more appropriate.
+ *
+ * @warning
+ * All functions modifying a bitset may overwrite any unused bits of
+ * the last word. Such unused bits are ignored by all functions reading
+ * bits.
+ *
+ */
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <rte_bitops.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_compat.h>
+#include <rte_debug.h>
+#include <rte_memcpy.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * The size (in bytes) of each element in the array used to represent
+ * a bitset.
+ */
+#define RTE_BITSET_WORD_SIZE (sizeof(uint64_t))
+
+/**
+ * The size (in bits) of each element in the array used to represent
+ * a bitset.
+ */
+#define RTE_BITSET_WORD_BITS (RTE_BITSET_WORD_SIZE * CHAR_BIT)
+
+/**
+ * Computes the number of words required to store @c size bits.
+ */
+#define RTE_BITSET_NUM_WORDS(size)					\
+	((size + RTE_BITSET_WORD_BITS - 1) / RTE_BITSET_WORD_BITS)
+
+/**
+ * Computes the amount of memory (in bytes) required to fit a bitset
+ * holding @c size bits.
+ */
+#define RTE_BITSET_SIZE(size)						\
+	((size_t)(RTE_BITSET_NUM_WORDS(size) * RTE_BITSET_WORD_SIZE))
+
+#define __RTE_BITSET_WORD_IDX(bit_num) ((bit_num) / RTE_BITSET_WORD_BITS)
+#define __RTE_BITSET_BIT_OFFSET(bit_num) ((bit_num) % RTE_BITSET_WORD_BITS)
+#define __RTE_BITSET_UNUSED(size)			     \
+	((RTE_BITSET_NUM_WORDS(size) * RTE_BITSET_WORD_BITS) \
+	 - (size))
+#define __RTE_BITSET_USED_MASK(size)			\
+	(UINT64_MAX >> __RTE_BITSET_UNUSED(size))
+
+#define __RTE_BITSET_DELEGATE_N(fun, bitset, bit_num, ...)		\
+	fun(&(bitset)[__RTE_BITSET_WORD_IDX(bit_num)],			\
+	    __RTE_BITSET_BIT_OFFSET(bit_num), __VA_ARGS__)
+
+/* MSVC doesn't have ##__VA_ARGS__, so argument-less -> special case */
+#define __RTE_BITSET_DELEGATE(fun, bitset, bit_num)			\
+	fun(&(bitset)[__RTE_BITSET_WORD_IDX(bit_num)],			\
+	    __RTE_BITSET_BIT_OFFSET(bit_num))
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Declare a bitset.
+ *
+ * Declare (e.g., as a struct field) or define (e.g., as a stack
+ * variable) a bitset of the specified size.
+ *
+ * @param size
+ *   The number of bits the bitset must be able to represent. Must be
+ *   a compile-time constant.
+ * @param name
+ *   The field or variable name of the resulting definition.
+ */
+#define RTE_BITSET_DECLARE(name, size)		\
+	uint64_t name[RTE_BITSET_NUM_WORDS(size)]
+
+#define __RTE_BITSET_FOREACH_LEFT(var, size, start_bit, len)		\
+	((len) - 1 - ((var) >= (start_bit) ? (var) - (start_bit) :	\
+		  (size) - (start_bit) + (var)))
+
+#define __RTE_BITSET_FOREACH(var, bitset, size, start_bit, len, flags)	\
+	for ((var) = __rte_bitset_find(bitset, size, start_bit, len,	\
+				       flags);				\
+	     (var) != -1;						\
+	     (var) = __RTE_BITSET_FOREACH_LEFT(var, size, start_bit,	\
+					       len) > 0	?		\
+		     __rte_bitset_find(bitset, size,			\
+				       ((var) + 1) % (size),		\
+				       __RTE_BITSET_FOREACH_LEFT(var,	\
+								 size,	\
+								 start_bit, \
+								 len),	\
+				       flags) : -1)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all bits set.
+ *
+ * This macro iterates over all bits set (i.e., all ones) in the
+ * bitset, in the forward direction (i.e., starting with the least
+ * significant '1').
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive
+ *   iteration, this variable will hold the bit index of a set bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+#define RTE_BITSET_FOREACH_SET(var, bitset, size)			\
+	__RTE_BITSET_FOREACH(var, bitset, size, 0, size, 0)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all bits cleared.
+ *
+ * This macro iterates over all bits cleared in the bitset, in the
+ * forward direction (i.e., starting with the lowest-indexed set bit).
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive iteration,
+ *   this variable will hold the bit index of a cleared bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+#define RTE_BITSET_FOREACH_CLEAR(var, bitset, size)			\
+	__RTE_BITSET_FOREACH(var, bitset, size, 0, size,		\
+			     __RTE_BITSET_FIND_FLAG_FIND_CLEAR)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all bits set within a range.
+ *
+ * This macro iterates over all bits set (i.e., all ones) in the
+ * specified range, in the forward direction (i.e., starting with the
+ * least significant '1').
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive iteration,
+ *   this variable will hold the bit index of a set bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The length (in bits) of the range. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ */
+
+#define RTE_BITSET_FOREACH_SET_RANGE(var, bitset, size, start_bit,     \
+				     len)			       \
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len, 0)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterate over all cleared bits within a range.
+ *
+ * This macro iterates over all bits cleared (i.e., all zeroes) in the
+ * specified range, in the forward direction (i.e., starting with the
+ * least significant '0').
+ *
+ * @param var
+ *   An iterator variable of type @c ssize_t. For each successive iteration,
+ *   this variable will hold the bit index of a set bit.
+ * @param bitset
+ *   A <tt>const uint64_t *</tt> pointer to the bitset array.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The length (in bits) of the range. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ */
+
+#define RTE_BITSET_FOREACH_CLEAR_RANGE(var, bitset, size, start_bit,	\
+				       len)				\
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len,		\
+			     __RTE_BITSET_FIND_FLAG_FIND_CLEAR)
+
+#define RTE_BITSET_FOREACH_SET_WRAP(var, bitset, size, start_bit,      \
+				    len)			       \
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len,	       \
+			     __RTE_BITSET_FIND_FLAG_WRAP)
+
+#define RTE_BITSET_FOREACH_CLEAR_WRAP(var, bitset, size, start_bit,	\
+				       len)				\
+	__RTE_BITSET_FOREACH(var, bitset, size, start_bit, len,		\
+			     __RTE_BITSET_FIND_FLAG_WRAP |		\
+			     __RTE_BITSET_FIND_FLAG_FIND_CLEAR)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Initializes a bitset.
+ *
+ * All bits are cleared.
+ *
+ * In case all words in the bitset array are already set to zero by
+ * other means (e.g., at the time of memory allocation), this function
+ * need not be called.
+ *
+ * @param bitset
+ *   A pointer to the array of bitset 64-bit words.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_init(uint64_t *bitset, size_t size)
+{
+	memset(bitset, 0, RTE_BITSET_SIZE(size));
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test if a bit is set.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   Index of the bit to test. Index 0 is the least significant bit.
+ * @return
+ *   Returns true if the bit is '1', and false if the bit is '0'.
+ */
+
+__rte_experimental
+static inline bool
+rte_bitset_test(const uint64_t *bitset, size_t bit_num)
+{
+	return __RTE_BITSET_DELEGATE(rte_bit_test, bitset, bit_num);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set a bit in the bitset.
+ *
+ * Bits are numbered from 0 to (size - 1) (inclusive).
+ *
+ * The operation is not guaranteed to be atomic.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be set.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_set(uint64_t *bitset, size_t bit_num)
+{
+	__RTE_BITSET_DELEGATE(rte_bit_set, bitset, bit_num);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Clear a bit in the bitset.
+ *
+ * Bits are numbered 0 to (size - 1) (inclusive).
+ *
+ * The operation is not guaranteed to be atomic.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be cleared.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_clear(uint64_t *bitset, size_t bit_num)
+{
+	__RTE_BITSET_DELEGATE(rte_bit_clear, bitset, bit_num);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set or clear a bit in the bitset.
+ *
+ * Bits are numbered 0 to (size - 1) (inclusive).
+ *
+ * The operation is not guaranteed to be atomic.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be set or cleared.
+ * @param bit_value
+ *   Control if the bit should be set or cleared.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_assign(uint64_t *bitset, size_t bit_num, bool bit_value)
+{
+	__RTE_BITSET_DELEGATE_N(rte_bit_assign, bitset, bit_num, bit_value);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Change the value of a bit in the bitset.
+ *
+ * Bits are numbered 0 to (size - 1) (inclusive).
+ *
+ * The operation is not guaranteed to be atomic.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be flipped.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_flip(uint64_t *bitset, size_t bit_num)
+{
+	__RTE_BITSET_DELEGATE(rte_bit_flip, bitset, bit_num);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set all bits in the bitset.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_set_all(uint64_t *bitset, size_t size)
+{
+	memset(bitset, 0xFF, RTE_BITSET_SIZE(size));
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Clear all bits in the bitset.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_clear_all(uint64_t *bitset, size_t size)
+{
+	rte_bitset_init(bitset, size);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Count all set bits (also known as the @e weight).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the number of '1' bits in the bitset.
+ */
+
+__rte_experimental
+static inline size_t
+rte_bitset_count_set(const uint64_t *bitset, size_t size)
+{
+	size_t i;
+	size_t total = 0;
+
+	/*
+	 * Unused bits in a rte_bitset are always '0', and thus are
+	 * not included in this count.
+	 */
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size) - 1; i++)
+		total += rte_popcount64(bitset[i]);
+
+	total += rte_popcount64(bitset[i] & __RTE_BITSET_USED_MASK(size));
+
+	return total;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Count all cleared bits.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the number of '0' bits in the bitset.
+ */
+
+__rte_experimental
+static inline size_t
+rte_bitset_count_clear(const uint64_t *bitset, size_t size)
+{
+	return size - rte_bitset_count_set(bitset, size);
+}
+
+#define __RTE_BITSET_FIND_FLAG_FIND_CLEAR (1U << 0)
+#define __RTE_BITSET_FIND_FLAG_WRAP (1U << 1)
+
+__rte_experimental
+static inline ssize_t
+__rte_bitset_find_nowrap(const uint64_t *bitset, size_t __rte_unused size,
+			 size_t start_bit, size_t len, bool find_clear)
+{
+	size_t word_idx;
+	size_t offset;
+	size_t end_bit = start_bit + len;
+
+	RTE_ASSERT(end_bit <= size);
+
+	if (unlikely(len == 0))
+		return -1;
+
+	word_idx = __RTE_BITSET_WORD_IDX(start_bit);
+	offset = __RTE_BITSET_BIT_OFFSET(start_bit);
+
+	while (word_idx <= __RTE_BITSET_WORD_IDX(end_bit - 1)) {
+		uint64_t word;
+		int word_ffs;
+
+		word = bitset[word_idx];
+		if (find_clear)
+			word = ~word;
+
+		word >>= offset;
+
+		word_ffs = __builtin_ffsll(word);
+
+		if (word_ffs != 0) {
+			ssize_t ffs = start_bit + word_ffs - 1;
+
+			/*
+			 * Check if set bit were among the last,
+			 * unused bits, in the last word.
+			 */
+			if (unlikely(ffs >= (ssize_t)end_bit))
+				return -1;
+
+			return ffs;
+		}
+
+		start_bit += (RTE_BITSET_WORD_BITS - offset);
+		word_idx++;
+		offset = 0;
+	}
+
+	return -1;
+
+}
+
+__rte_experimental
+static inline ssize_t
+__rte_bitset_find(const uint64_t *bitset, size_t size, size_t start_bit,
+		  size_t len, unsigned int flags)
+{
+	bool find_clear = flags & __RTE_BITSET_FIND_FLAG_FIND_CLEAR;
+	bool may_wrap = flags & __RTE_BITSET_FIND_FLAG_WRAP;
+	bool does_wrap = (start_bit + len) > size;
+	ssize_t rc;
+
+	RTE_ASSERT(len <= size);
+	if (!may_wrap)
+		RTE_ASSERT(!does_wrap);
+
+	if (may_wrap && does_wrap) {
+		size_t len0 = size - start_bit;
+		size_t len1 = len - len0;
+
+		rc = __rte_bitset_find_nowrap(bitset, size, start_bit, len0,
+					      find_clear);
+		if (rc < 0)
+			rc =  __rte_bitset_find_nowrap(bitset, size,
+						       0, len1, find_clear);
+	} else
+		rc = __rte_bitset_find_nowrap(bitset, size, start_bit,
+					      len, find_clear);
+
+	return rc;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first bit set.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), and returns the index of the first '1'.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the index of the least significant '1', or -1 if all
+ *   bits are '0'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_first_set(const uint64_t *bitset, size_t size)
+{
+	return __rte_bitset_find(bitset, size, 0, size, 0);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first bit set at offset.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset, and returns the index of the first '1' encountered.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '1', or -1 if all
+ *   bits are '0'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_set(const uint64_t *bitset, size_t size,
+		    size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len, 0);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first bit set at offset, with wrap-around.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset. If no '1' is encountered before the end of the bitset, the search
+ * will continue at index 0.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '1', or -1 if all
+ *   bits are '0'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_set_wrap(const uint64_t *bitset, size_t size,
+			 size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len,
+				 __RTE_BITSET_FIND_FLAG_WRAP);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first cleared bit.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), and returns the index of the first '0'.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @return
+ *   Returns the index of the least significant '0', or -1 if all
+ *   bits are '1'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_first_clear(const uint64_t *bitset, size_t size)
+{
+	return __rte_bitset_find(bitset, size, 0, size,
+				 __RTE_BITSET_FIND_FLAG_FIND_CLEAR);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first cleared bit at offset.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset, and returns the index of the first '0' encountered.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '0', or -1 if all
+ *   bits are '1'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_clear(const uint64_t *bitset, size_t size,
+			   size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len,
+				 __RTE_BITSET_FIND_FLAG_FIND_CLEAR);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Find first cleared bit at offset, with wrap-around.
+ *
+ * Scans the bitset in the forward direction (i.e., starting at the
+ * least significant bit), starting at an offset @c start_bit into the
+ * bitset. If no '0' is encountered before the end of the bitset, the
+ * search will continue at index 0.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitset (in bits).
+ * @param start_bit
+ *   The index of the first bit to check. Must be less than @c size.
+ * @param len
+ *   The number of bits to scan. @c start_bit + @c len must be less
+ *   than or equal to @c size.
+ * @return
+ *   Returns the index of the least significant '0', or -1 if all
+ *   bits are '1'.
+ */
+
+__rte_experimental
+static inline ssize_t
+rte_bitset_find_clear_wrap(const uint64_t *bitset, size_t size,
+			   size_t start_bit, size_t len)
+{
+	return __rte_bitset_find(bitset, size, start_bit, len,
+				 __RTE_BITSET_FIND_FLAG_FIND_CLEAR |
+				 __RTE_BITSET_FIND_FLAG_WRAP);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Copy bitset.
+ *
+ * Copy the bits of the @c src_bitset to the @c dst_bitset.
+ *
+ * The bitsets may not overlap and must be of equal size.
+ *
+ * @param dst_bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param src_bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_copy(uint64_t *__rte_restrict dst_bitset,
+		const uint64_t *__rte_restrict src_bitset,
+		size_t size)
+{
+	rte_memcpy(dst_bitset, src_bitset, RTE_BITSET_SIZE(size));
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitwise or two bitsets.
+ *
+ * Perform a bitwise OR operation on all bits in the two equal-size
+ * bitsets @c src_bitset0 and @c src_bitset1, and store the results in
+ * @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset0
+ *   A pointer to the first source bitset.
+ * @param src_bitset1
+ *   A pointer to the second source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_or(uint64_t *dst_bitset, const uint64_t *src_bitset0,
+	      const uint64_t *src_bitset1, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] = src_bitset0[i] | src_bitset1[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitwise and two bitsets.
+ *
+ * Perform a bitwise AND operation on all bits in the two equal-size
+ * bitsets @c src_bitset0 and @c src_bitset1, and store the result in
+ * @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset0
+ *   A pointer to the first source bitset.
+ * @param src_bitset1
+ *   A pointer to the second source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_and(uint64_t *dst_bitset, const uint64_t *src_bitset0,
+	       const uint64_t *src_bitset1, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] = src_bitset0[i] & src_bitset1[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitwise xor two bitsets.
+ *
+ * Perform a bitwise XOR operation on all bits in the two equal-size
+ * bitsets @c src_bitset0 and @c src_bitset1, and store the result in
+ * @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset0
+ *   A pointer to the first source bitset.
+ * @param src_bitset1
+ *   A pointer to the second source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_xor(uint64_t *dst_bitset, const uint64_t *src_bitset0,
+	       const uint64_t *src_bitset1, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] = src_bitset0[i] ^ src_bitset1[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Compute the bitwise complement of a bitset.
+ *
+ * Flip every bit in the @c src_bitset, and store the result in @c
+ * dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_complement(uint64_t *dst_bitset, const uint64_t *src_bitset,
+		      size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size); i++)
+		dst_bitset[i] = ~src_bitset[i];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Shift bitset left.
+ *
+ * Perform a logical shift left of (multiply) @c src_bitset, and store
+ * the result in @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ * @param shift_bits
+ *   The number of bits to shift the bitset.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_shift_left(uint64_t *dst_bitset, const uint64_t *src_bitset,
+		      size_t size, size_t shift_bits)
+{
+	const int src_word_offset = shift_bits / RTE_BITSET_WORD_BITS;
+	const int src_bit_offset = shift_bits % RTE_BITSET_WORD_BITS;
+	unsigned int dst_idx;
+
+	for (dst_idx = 0; dst_idx < RTE_BITSET_NUM_WORDS(size); dst_idx++) {
+		int src_high_idx = dst_idx - src_word_offset;
+		uint64_t low_bits = 0;
+		uint64_t high_bits = 0;
+
+		if (src_high_idx >= 0) {
+			int src_low_idx = src_high_idx - 1;
+
+			high_bits = src_bitset[src_high_idx] << src_bit_offset;
+
+			if (src_bit_offset > 0 && src_low_idx >= 0)
+				low_bits = src_bitset[src_low_idx] >>
+					(RTE_BITSET_WORD_BITS - src_bit_offset);
+		}
+		dst_bitset[dst_idx] = low_bits | high_bits;
+	}
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Shift bitset right.
+ *
+ * Perform a logical shift right of (divide) @c src_bitset, and store
+ * the result in @c dst_bitset.
+ *
+ * @param dst_bitset
+ *   A pointer to the destination bitset.
+ * @param src_bitset
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ * @param shift_bits
+ *   The number of bits to shift the bitset.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_shift_right(uint64_t *dst_bitset, const uint64_t *src_bitset,
+		       size_t size, size_t shift_bits)
+{
+	const int num_words = RTE_BITSET_NUM_WORDS(size);
+	const uint64_t used_mask = __RTE_BITSET_USED_MASK(size);
+	const int src_word_offset = shift_bits / RTE_BITSET_WORD_BITS;
+	const int src_bit_offset = shift_bits % RTE_BITSET_WORD_BITS;
+	int dst_idx;
+
+	for (dst_idx = 0; dst_idx < num_words; dst_idx++) {
+		int src_low_idx = src_word_offset + dst_idx;
+		int src_high_idx = src_low_idx + 1;
+		uint64_t src_low_word_bits = 0;
+		uint64_t src_high_word_bits = 0;
+
+		if (src_low_idx < num_words) {
+			src_low_word_bits = src_bitset[src_low_idx];
+
+			if (src_low_idx == (num_words - 1))
+				src_low_word_bits &= used_mask;
+
+			src_low_word_bits >>= src_bit_offset;
+
+			if (src_bit_offset > 0 && src_high_idx < num_words) {
+				src_high_word_bits = src_bitset[src_high_idx];
+
+				if (src_high_idx == (num_words - 1))
+					src_high_word_bits &= used_mask;
+
+				src_high_word_bits <<=
+					(RTE_BITSET_WORD_BITS - src_bit_offset);
+			}
+		}
+		dst_bitset[dst_idx] = src_low_word_bits | src_high_word_bits;
+	}
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Compare two bitsets.
+ *
+ * Compare two bitsets for equality.
+ *
+ * @param bitset_a
+ *   A pointer to the destination bitset.
+ * @param bitset_b
+ *   A pointer to the source bitset.
+ * @param size
+ *   The size of the bitsets (in bits).
+ */
+
+__rte_experimental
+static inline bool
+rte_bitset_equal(const uint64_t *bitset_a, const uint64_t *bitset_b,
+		 size_t size)
+{
+	size_t i;
+	uint64_t last_a, last_b;
+
+	for (i = 0; i < RTE_BITSET_NUM_WORDS(size) - 1; i++)
+		if (bitset_a[i] != bitset_b[i])
+			return false;
+
+	last_a = bitset_a[i] << __RTE_BITSET_UNUSED(size);
+	last_b = bitset_b[i] << __RTE_BITSET_UNUSED(size);
+
+	return last_a == last_b;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Converts a bitset to a string.
+ *
+ * This function prints a string representation of the bitstring to
+ * the supplied buffer.
+ *
+ * Each bit is represented either by '0' or '1' in the output, with
+ * the first (left-most) character in the output being the most
+ * significant bit. The resulting string is NUL terminated.
+ *
+ * @param bitset
+ *   A pointer to the array of bitset 64-bit words.
+ * @param size
+ *   The number of bits the bitset represent.
+ * @param buf
+ *   A buffer to hold the output.
+ * @param capacity
+ *   The size of the buffer. Must be @c size + 1 or larger.
+ * @return
+ *   Returns the number of bytes written (i.e., @c size + 1), or -EINVAL
+ *   in case the buffer capacity was too small.
+ */
+
+__rte_experimental
+ssize_t
+rte_bitset_to_str(const uint64_t *bitset, size_t size, char *buf,
+		  size_t capacity);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BITSET_H_ */
diff --git a/lib/eal/version.map b/lib/eal/version.map
index e3ff412683..f493cd1ca7 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -396,6 +396,9 @@ EXPERIMENTAL {
 
 	# added in 24.03
 	rte_vfio_get_device_info; # WINDOWS_NO_EXPORT
+
+	# added in 24.11
+	rte_bitset_to_str;
 };
 
 INTERNAL {
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH 2/6] eal: add bitset test suite
  2024-08-09 20:14     ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
@ 2024-08-09 20:14       ` Mattias Rönnblom
  2024-09-12  4:51         ` Tyler Retzlaff
  2024-08-09 20:14       ` [PATCH 3/6] eal: add atomic bitset functions Mattias Rönnblom
                         ` (7 subsequent siblings)
  8 siblings, 1 reply; 63+ messages in thread
From: Mattias Rönnblom @ 2024-08-09 20:14 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Add test suite exercising <rte_bitset.h>.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>

--

RFC v5:
 * Parameterize tests to allow reuse across both atomic and non-atomic
   functions.

RFC v4:
 * Fix signed char issue in test cases. (Stephen Hemminger)
 * Add test cases for logic operations.
 * Use the unit test suite runner helper.
---
 app/test/meson.build   |   1 +
 app/test/test_bitset.c | 894 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 895 insertions(+)
 create mode 100644 app/test/test_bitset.c

diff --git a/app/test/meson.build b/app/test/meson.build
index e29258e6ec..fe248b786c 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -33,6 +33,7 @@ source_file_deps = {
     'test_bitcount.c': [],
     'test_bitmap.c': [],
     'test_bitops.c': [],
+    'test_bitset.c': [],
     'test_bitratestats.c': ['metrics', 'bitratestats', 'ethdev'] + sample_packet_forward_deps,
     'test_bpf.c': ['bpf', 'net'],
     'test_byteorder.c': [],
diff --git a/app/test/test_bitset.c b/app/test/test_bitset.c
new file mode 100644
index 0000000000..b3496df1c0
--- /dev/null
+++ b/app/test/test_bitset.c
@@ -0,0 +1,894 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include <rte_random.h>
+
+#include <rte_bitset.h>
+
+#include "test.h"
+
+#define MAGIC UINT64_C(0xdeadbeefdeadbeef)
+
+static void
+rand_buf(void *buf, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		((unsigned char *)buf)[i] = rte_rand();
+}
+
+static uint64_t *
+alloc_bitset(size_t size)
+{
+	uint64_t *p;
+
+	p = malloc(RTE_BITSET_SIZE(size) + 2 * sizeof(uint64_t));
+
+	if (p == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	rand_buf(&p[0], RTE_BITSET_SIZE(size));
+
+	p[0] = MAGIC;
+	p[RTE_BITSET_NUM_WORDS(size) + 1] = MAGIC;
+
+	return p + 1;
+}
+
+
+static int
+free_bitset(uint64_t *bitset, size_t size)
+{
+	uint64_t *p;
+
+	p = bitset - 1;
+
+	if (p[0] != MAGIC)
+		return TEST_FAILED;
+
+	if (p[RTE_BITSET_NUM_WORDS(size) + 1] != MAGIC)
+		return TEST_FAILED;
+
+	free(p);
+
+	return TEST_SUCCESS;
+}
+
+static bool
+rand_bool(void)
+{
+	return rte_rand_max(2);
+}
+
+static void
+rand_bool_ary(bool *ary, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++)
+		ary[i] = rand_bool();
+}
+
+static void
+rand_unused_bits(uint64_t *bitset, size_t size)
+{
+	uint64_t bits = rte_rand() & ~__RTE_BITSET_USED_MASK(size);
+
+	bitset[RTE_BITSET_NUM_WORDS(size) - 1] |= bits;
+}
+
+static void
+rand_bitset(uint64_t *bitset, size_t size)
+{
+	size_t i;
+
+	rte_bitset_init(bitset, size);
+
+	for (i = 0; i < size; i++)
+		rte_bitset_assign(bitset, i, rand_bool());
+
+	rand_unused_bits(bitset, size);
+}
+
+typedef bool test_fun(const uint64_t *bitset, size_t bit_num);
+typedef void set_fun(uint64_t *bitset, size_t bit_num);
+typedef void clear_fun(uint64_t *bitset, size_t bit_num);
+typedef void assign_fun(uint64_t *bitset, size_t bit_num, bool value);
+typedef void flip_fun(uint64_t *bitset, size_t bit_num);
+
+static int
+test_set_clear_size(test_fun test_fun, set_fun set_fun, clear_fun clear_fun,
+		    size_t size)
+{
+	size_t i;
+	bool reference[size];
+	uint64_t *bitset;
+
+	rand_bool_ary(reference, size);
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	rte_bitset_init(bitset, size);
+
+	for (i = 0; i < size; i++) {
+		if (reference[i])
+			set_fun(bitset, i);
+		else
+			clear_fun(bitset, i);
+	}
+
+	for (i = 0; i < size; i++)
+		if (reference[i] != test_fun(bitset, i))
+			return TEST_FAILED;
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+#define RAND_ITERATIONS (10000)
+#define RAND_SET_MAX_SIZE (1000)
+
+static int
+test_set_clear_fun(test_fun test_fun, set_fun set_fun, clear_fun clear_fun)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_set_clear_size(test_fun, set_fun, clear_fun,
+					size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_set_clear(void)
+{
+	return test_set_clear_fun(rte_bitset_test, rte_bitset_set,
+				  rte_bitset_clear);
+}
+
+static int
+test_flip_size(test_fun test_fun, assign_fun assign_fun, flip_fun flip_fun,
+	       size_t size)
+{
+	size_t i;
+	uint64_t *bitset;
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	rand_bitset(bitset, size);
+
+	for (i = 0; i < size; i++) {
+		RTE_BITSET_DECLARE(reference, size);
+
+		rte_bitset_copy(reference, bitset, size);
+
+		bool value = test_fun(bitset, i);
+
+		flip_fun(bitset, i);
+
+		TEST_ASSERT(test_fun(bitset, i) != value,
+			    "Bit %zd was not flipped", i);
+
+		assign_fun(reference, i, !value);
+
+		TEST_ASSERT(rte_bitset_equal(bitset, reference, size),
+			    "Not only the target bit %zd was flipped", i);
+
+
+	}
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_flip_fun(test_fun test_fun, assign_fun assign_fun, flip_fun flip_fun)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_flip_size(test_fun, assign_fun, flip_fun,
+				   size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_flip(void)
+{
+	return test_flip_fun(rte_bitset_test, rte_bitset_assign,
+			     rte_bitset_flip);
+}
+
+static ssize_t
+find(const bool *ary, size_t num_bools, size_t start, size_t len, bool set)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		ssize_t idx = (start + i) % num_bools;
+
+		if (ary[idx] == set)
+			return idx;
+	}
+
+	return -1;
+}
+
+static ssize_t
+find_set(const bool *ary, size_t num_bools, size_t start, size_t len)
+{
+	return find(ary, num_bools, start, len, true);
+}
+
+static ssize_t
+find_clear(const bool *ary, size_t num_bools, size_t start, size_t len)
+{
+	return find(ary, num_bools, start, len, false);
+}
+
+#define FFS_ITERATIONS (100)
+
+static int
+test_find_size(size_t size, bool set)
+{
+	uint64_t *bitset;
+	bool reference[size];
+	size_t i;
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	rte_bitset_init(bitset, size);
+
+	for (i = 0; i < size; i++) {
+		bool bit = rand_bool();
+		reference[i] = bit;
+
+		if (bit)
+			rte_bitset_set(bitset, i);
+		else /* redundant, still useful for testing */
+			rte_bitset_clear(bitset, i);
+	}
+
+	for (i = 0; i < FFS_ITERATIONS; i++) {
+		size_t start_bit = rte_rand_max(size);
+		size_t len = rte_rand_max(size + 1);
+		bool full_range = len == size && start_bit == 0;
+		bool wraps = start_bit + len > size;
+		ssize_t rc;
+
+		if (set) {
+			if (full_range && rand_bool())
+				rc = rte_bitset_find_first_set(bitset,
+							       size);
+			else if (wraps || rand_bool()) {
+				rc = rte_bitset_find_set_wrap(bitset, size,
+							      start_bit, len);
+
+			} else
+				rc = rte_bitset_find_set(bitset, size,
+							 start_bit, len);
+
+			if (rc != find_set(reference, size, start_bit,
+					   len))
+				return TEST_FAILED;
+		} else {
+			if (full_range && rand_bool())
+				rc = rte_bitset_find_first_clear(bitset,
+								 size);
+			else if (wraps || rand_bool())
+				rc = rte_bitset_find_clear_wrap(bitset,
+								size,
+								start_bit, len);
+			else
+				rc = rte_bitset_find_clear(bitset, size,
+							   start_bit, len);
+
+			if (rc != find_clear(reference, size, start_bit,
+					     len))
+				return TEST_FAILED;
+		}
+
+	}
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_find_set_size(size_t size)
+{
+	return test_find_size(size, true);
+}
+
+static int
+test_find_clear_size(size_t size)
+{
+	return test_find_size(size, false);
+}
+
+static int
+test_find(void)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 2 + rte_rand_max(RAND_SET_MAX_SIZE - 2);
+
+		if (test_find_set_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_find_clear_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+record_match(ssize_t match_idx, size_t size, int *calls)
+{
+	if (match_idx < 0 || (size_t)match_idx >= size)
+		return TEST_FAILED;
+
+	calls[match_idx]++;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_foreach_size(ssize_t size, bool may_wrap, bool set)
+{
+	bool reference[size];
+	int calls[size];
+	uint64_t *bitset;
+	ssize_t i;
+	ssize_t start_bit;
+	ssize_t len;
+	bool full_range;
+	size_t total_calls = 0;
+
+	rand_bool_ary(reference, size);
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	memset(calls, 0, sizeof(calls));
+
+	start_bit = rte_rand_max(size);
+	len = may_wrap ? rte_rand_max(size + 1) :
+		rte_rand_max(size - start_bit + 1);
+
+	rte_bitset_init(bitset, size);
+
+	/* random data in the unused bits should not matter */
+	rand_buf(bitset, RTE_BITSET_SIZE(size));
+
+	for (i = start_bit; i < start_bit + len; i++) {
+		size_t idx = i % size;
+
+		if (reference[idx])
+			rte_bitset_set(bitset, idx);
+		else
+			rte_bitset_clear(bitset, idx);
+
+		if (rte_bitset_test(bitset, idx) != reference[idx])
+			return TEST_FAILED;
+	}
+
+	full_range = (len == size && start_bit == 0);
+
+	/* XXX: verify iteration order as well */
+	if (set) {
+		if (full_range && rand_bool()) {
+			RTE_BITSET_FOREACH_SET(i, bitset, size) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+			}
+		} else if (may_wrap) {
+			RTE_BITSET_FOREACH_SET_WRAP(i, bitset, size,
+						    start_bit, len) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS) {
+					printf("failed\n");
+					return TEST_FAILED;
+				}
+			}
+		} else {
+			RTE_BITSET_FOREACH_SET_RANGE(i, bitset, size,
+						     start_bit, len) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+			}
+		}
+	} else {
+		if (full_range && rand_bool()) {
+			RTE_BITSET_FOREACH_CLEAR(i, bitset, size)
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+		} else if (may_wrap) {
+			RTE_BITSET_FOREACH_CLEAR_WRAP(i, bitset, size,
+						      start_bit, len) {
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+			}
+		} else {
+			RTE_BITSET_FOREACH_CLEAR_RANGE(i, bitset, size,
+						       start_bit, len)
+				if (record_match(i, size, calls) !=
+				    TEST_SUCCESS)
+					return TEST_FAILED;
+		}
+	}
+
+	for (i = 0; i < len; i++) {
+		size_t idx = (start_bit + i) % size;
+
+		if (reference[idx] == set && calls[idx] != 1) {
+			printf("bit %zd shouldn't have been found %d "
+			       "times\n", idx, calls[idx]);
+			return TEST_FAILED;
+		}
+
+		if (reference[idx] != set && calls[idx] != 0) {
+			puts("bar");
+			return TEST_FAILED;
+		}
+
+		total_calls += calls[idx];
+	}
+
+	if (full_range) {
+		size_t count;
+
+		count = set ? rte_bitset_count_set(bitset, size) :
+			rte_bitset_count_clear(bitset, size);
+
+		if (count != total_calls)
+			return TEST_FAILED;
+	}
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_foreach(void)
+{
+	size_t i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_foreach_size(size, false, true) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_foreach_size(size, false, false) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_foreach_size(size, true, true) != TEST_SUCCESS)
+			return TEST_FAILED;
+
+		if (test_foreach_size(size, true, false) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_count_size(size_t size)
+{
+	uint64_t *bitset;
+
+	bitset = alloc_bitset(size);
+
+	TEST_ASSERT(bitset != NULL, "Failed to allocate memory");
+
+	rte_bitset_init(bitset, size);
+
+	rand_unused_bits(bitset, size);
+
+	if (rte_bitset_count_set(bitset, size) != 0)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != size)
+		return TEST_FAILED;
+
+	rte_bitset_set_all(bitset, size);
+
+	if (rte_bitset_count_set(bitset, size) != size)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != 0)
+		return TEST_FAILED;
+
+	rte_bitset_clear_all(bitset, size);
+
+	if (rte_bitset_count_set(bitset, size) != 0)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != size)
+		return TEST_FAILED;
+
+	rte_bitset_set(bitset, rte_rand_max(size));
+
+	if (rte_bitset_count_set(bitset, size) != 1)
+		return TEST_FAILED;
+
+	if (rte_bitset_count_clear(bitset, size) != (size - 1))
+		return TEST_FAILED;
+
+	rte_bitset_clear_all(bitset, size);
+	if (rte_bitset_count_set(bitset, size) != 0)
+		return TEST_FAILED;
+	if (rte_bitset_count_clear(bitset, size) != size)
+		return TEST_FAILED;
+
+	rte_bitset_set_all(bitset, size);
+	if (rte_bitset_count_set(bitset, size) != size)
+		return TEST_FAILED;
+	if (rte_bitset_count_clear(bitset, size) != 0)
+		return TEST_FAILED;
+
+	TEST_ASSERT_EQUAL(free_bitset(bitset, size), TEST_SUCCESS,
+			  "Buffer over- or underrun detected");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_count(void)
+{
+	size_t i;
+
+	if (test_count_size(128) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(1) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(63) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(64) != TEST_SUCCESS)
+		return TEST_FAILED;
+	if (test_count_size(65) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		if (test_count_size(size) != TEST_SUCCESS)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+#define GEN_DECLARE(size)						\
+	{								\
+		RTE_BITSET_DECLARE(bitset, size);			\
+		size_t idx;						\
+									\
+		idx = rte_rand_max(size);				\
+		rte_bitset_init(bitset, size);				\
+									\
+		rte_bitset_set(bitset, idx);				\
+		if (!rte_bitset_test(bitset, idx))			\
+			return TEST_FAILED;				\
+		if (rte_bitset_count_set(bitset, size) != 1)		\
+			return TEST_FAILED;				\
+		return TEST_SUCCESS;					\
+	}
+
+static int
+test_define(void)
+{
+	GEN_DECLARE(1);
+	GEN_DECLARE(64);
+	GEN_DECLARE(65);
+	GEN_DECLARE(4097);
+}
+
+static int test_logic_op(void (*bitset_op)(uint64_t *, const uint64_t *,
+					   const uint64_t *, size_t),
+			 bool (*bool_op)(bool, bool))
+{
+	const size_t size = 1 + rte_rand_max(200);
+	RTE_BITSET_DECLARE(bitset_a, size);
+	RTE_BITSET_DECLARE(bitset_b, size);
+	RTE_BITSET_DECLARE(bitset_d, size);
+
+	bool ary_a[size];
+	bool ary_b[size];
+	bool ary_d[size];
+
+	rand_bool_ary(ary_a, size);
+	rand_bool_ary(ary_b, size);
+
+	size_t i;
+	for (i = 0; i < size; i++) {
+		rte_bitset_assign(bitset_a, i, ary_a[i]);
+		rte_bitset_assign(bitset_b, i, ary_b[i]);
+		ary_d[i] = bool_op(ary_a[i], ary_b[i]);
+	}
+
+	bitset_op(bitset_d, bitset_a, bitset_b, size);
+
+	for (i = 0; i < size; i++)
+		TEST_ASSERT_EQUAL(rte_bitset_test(bitset_d, i),
+				  ary_d[i], "Unexpected value of bit %zd", i);
+
+	return TEST_SUCCESS;
+}
+
+static bool
+bool_or(bool a, bool b)
+{
+	return a || b;
+}
+
+static int
+test_or(void)
+{
+	return test_logic_op(rte_bitset_or, bool_or);
+}
+
+static bool
+bool_and(bool a, bool b)
+{
+	return a && b;
+}
+
+static int
+test_and(void)
+{
+	return test_logic_op(rte_bitset_and, bool_and);
+}
+
+static bool
+bool_xor(bool a, bool b)
+{
+	return a != b;
+}
+
+static int
+test_xor(void)
+{
+	return test_logic_op(rte_bitset_xor, bool_xor);
+}
+
+static int
+test_complement(void)
+{
+	int i;
+
+	for (i = 0; i < RAND_ITERATIONS; i++) {
+		const size_t size = 1 + rte_rand_max(RAND_SET_MAX_SIZE - 1);
+
+		RTE_BITSET_DECLARE(src, size);
+
+		rand_bitset(src, size);
+
+		bool bit_idx = rte_rand_max(size);
+		bool bit_value = rte_bitset_test(src, bit_idx);
+
+		RTE_BITSET_DECLARE(dst, size);
+
+		rte_bitset_complement(dst, src, size);
+
+		TEST_ASSERT(bit_value != rte_bitset_test(dst, bit_idx),
+			    "Bit %d was not flipped", bit_idx);
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_shift(bool right)
+{
+	int i;
+
+	const char *direction = right ? "right" : "left";
+
+	for (i = 0; i < 10000; i++) {
+		const int size = 1 + (int)rte_rand_max(500);
+		const int shift_count = (int)rte_rand_max(1.5 * size);
+		int src_idx;
+
+		RTE_BITSET_DECLARE(src, size);
+		RTE_BITSET_DECLARE(reference, size);
+
+		rte_bitset_init(src, size);
+		rte_bitset_init(reference, size);
+
+		rand_unused_bits(src, size);
+		rand_unused_bits(reference, size);
+
+		for (src_idx = 0; src_idx < size; src_idx++) {
+			bool value = rand_bool();
+
+			rte_bitset_assign(src, src_idx, value);
+
+			int dst_idx = right ? src_idx - shift_count :
+				src_idx + shift_count;
+
+			if (dst_idx >= 0 && dst_idx < size)
+				rte_bitset_assign(reference, dst_idx, value);
+		}
+
+		uint64_t *dst = alloc_bitset(size);
+
+		if (right)
+			rte_bitset_shift_right(dst, src, size, shift_count);
+		else
+			rte_bitset_shift_left(dst, src, size, shift_count);
+
+		TEST_ASSERT(rte_bitset_equal(dst, reference, size),
+			    "Unexpected result from shifting bitset of size "
+			    "%d bits %d bits %s", size, shift_count, direction);
+
+		TEST_ASSERT_EQUAL(free_bitset(dst, size), TEST_SUCCESS,
+				  "Shift %s operation overwrote buffer",
+				  direction);
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_shift_right(void)
+{
+	return test_shift(true);
+}
+
+static int
+test_shift_left(void)
+{
+	return test_shift(false);
+}
+
+static int
+test_equal(void)
+{
+	const size_t size = 100;
+	RTE_BITSET_DECLARE(bitset_a, size);
+	RTE_BITSET_DECLARE(bitset_b, size);
+
+	rand_buf(bitset_a, RTE_BITSET_SIZE(size));
+	rand_buf(bitset_b, RTE_BITSET_SIZE(size));
+
+	rte_bitset_init(bitset_a, size);
+	rte_bitset_init(bitset_b, size);
+
+	rte_bitset_set(bitset_a, 9);
+	rte_bitset_set(bitset_b, 9);
+	rte_bitset_set(bitset_a, 90);
+	rte_bitset_set(bitset_b, 90);
+
+	if (!rte_bitset_equal(bitset_a, bitset_b, size))
+		return TEST_FAILED;
+
+	/* set unused bit, which should be ignored */
+	rte_bitset_set(&bitset_a[1], 60);
+
+	if (!rte_bitset_equal(bitset_a, bitset_b, size))
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_copy(void)
+{
+	const size_t size = 100;
+	RTE_BITSET_DECLARE(bitset_a, size);
+	RTE_BITSET_DECLARE(bitset_b, size);
+
+	rand_buf(bitset_a, RTE_BITSET_SIZE(size));
+	rand_buf(bitset_b, RTE_BITSET_SIZE(size));
+
+	rte_bitset_copy(bitset_a, bitset_b, size);
+
+	if (!rte_bitset_equal(bitset_a, bitset_b, size))
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_to_str(void)
+{
+	char buf[1024];
+	RTE_BITSET_DECLARE(bitset, 128);
+
+	rte_bitset_init(bitset, 128);
+	rte_bitset_set(bitset, 1);
+
+	if (rte_bitset_to_str(bitset, 2, buf, 3) != 3)
+		return TEST_FAILED;
+	if (strcmp(buf, "10") != 0)
+		return TEST_FAILED;
+
+	rte_bitset_set(bitset, 0);
+
+	if (rte_bitset_to_str(bitset, 1, buf, sizeof(buf)) != 2)
+		return TEST_FAILED;
+	if (strcmp(buf, "1") != 0)
+		return TEST_FAILED;
+
+	rte_bitset_init(bitset, 99);
+	rte_bitset_set(bitset, 98);
+
+	if (rte_bitset_to_str(bitset, 99, buf, sizeof(buf)) != 100)
+		return TEST_FAILED;
+
+	if (buf[0] != '1' || strchr(&buf[1], '1') != NULL)
+		return TEST_FAILED;
+
+	if (rte_bitset_to_str(bitset, 128, buf, 64) != -EINVAL)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+static struct unit_test_suite bitset_tests  = {
+	.suite_name = "bitset test suite",
+	.unit_test_cases = {
+		TEST_CASE_ST(NULL, NULL, test_set_clear),
+		TEST_CASE_ST(NULL, NULL, test_flip),
+		TEST_CASE_ST(NULL, NULL, test_find),
+		TEST_CASE_ST(NULL, NULL, test_foreach),
+		TEST_CASE_ST(NULL, NULL, test_count),
+		TEST_CASE_ST(NULL, NULL, test_define),
+		TEST_CASE_ST(NULL, NULL, test_or),
+		TEST_CASE_ST(NULL, NULL, test_and),
+		TEST_CASE_ST(NULL, NULL, test_xor),
+		TEST_CASE_ST(NULL, NULL, test_complement),
+		TEST_CASE_ST(NULL, NULL, test_shift_right),
+		TEST_CASE_ST(NULL, NULL, test_shift_left),
+		TEST_CASE_ST(NULL, NULL, test_equal),
+		TEST_CASE_ST(NULL, NULL, test_copy),
+		TEST_CASE_ST(NULL, NULL, test_to_str),
+		TEST_CASES_END()
+	}
+};
+
+static int
+test_bitset(void)
+{
+	return unit_test_suite_runner(&bitset_tests);
+}
+
+REGISTER_FAST_TEST(bitset_autotest, true, true, test_bitset);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH 3/6] eal: add atomic bitset functions
  2024-08-09 20:14     ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
  2024-08-09 20:14       ` [PATCH 2/6] eal: add bitset test suite Mattias Rönnblom
@ 2024-08-09 20:14       ` Mattias Rönnblom
  2024-09-12  4:51         ` Tyler Retzlaff
  2024-08-09 20:14       ` [PATCH 4/6] eal: add unit tests for atomic bitset operations Mattias Rönnblom
                         ` (6 subsequent siblings)
  8 siblings, 1 reply; 63+ messages in thread
From: Mattias Rönnblom @ 2024-08-09 20:14 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Extend the bitset API with atomic versions of the most basic bitset
operations.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 lib/eal/include/rte_bitset.h | 155 +++++++++++++++++++++++++++++++++++
 1 file changed, 155 insertions(+)

diff --git a/lib/eal/include/rte_bitset.h b/lib/eal/include/rte_bitset.h
index 49a07c77b8..c0441b0e22 100644
--- a/lib/eal/include/rte_bitset.h
+++ b/lib/eal/include/rte_bitset.h
@@ -376,6 +376,161 @@ rte_bitset_flip(uint64_t *bitset, size_t bit_num)
 	__RTE_BITSET_DELEGATE(rte_bit_flip, bitset, bit_num);
 }
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test if a bit is set.
+ *
+ * Atomically test if a bit in a bitset is set with the specified
+ * memory ordering.
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   Index of the bit to test. Index 0 is the least significant bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit is '1', and false if the bit is '0'.
+ */
+
+__rte_experimental
+static inline bool
+rte_bitset_atomic_test(const uint64_t *bitset, size_t bit_num,
+		       int memory_order)
+{
+	return __RTE_BITSET_DELEGATE_N(rte_bit_atomic_test, bitset, bit_num,
+				       memory_order);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically set a bit in the bitset.
+ *
+ * Set a bit in a bitset as an atomic operation, with the specified
+ * memory ordering.
+ *
+ * rte_bitset_atomic_set() is multi-thread safe, provided all threads
+ * acting in parallel on the same bitset does so through
+ * @c rte_bitset_atomic_*() functions.
+ *
+ * Bits are numbered from 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be set.
+ * @param memory_order
+ *   The memory order to use.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_atomic_set(uint64_t *bitset, size_t bit_num, int memory_order)
+{
+	__RTE_BITSET_DELEGATE_N(rte_bit_atomic_set, bitset, bit_num,
+				memory_order);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically clear a bit in the bitset.
+ *
+ * Clear a bit in a bitset as an atomic operation, with the specified
+ * memory ordering.
+ *
+ * rte_bitset_atomic_clear() is multi-thread safe, provided all
+ * threads acting in parallel on the same bitset does so through @c
+ * rte_bitset_atomic_*() functions.
+ *
+ * Bits are numbered from 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be cleared.
+ * @param memory_order
+ *   The memory order to use.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_atomic_clear(uint64_t *bitset, size_t bit_num, int memory_order)
+{
+	__RTE_BITSET_DELEGATE_N(rte_bit_atomic_clear, bitset, bit_num,
+				memory_order);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically set or clear a bit in the bitset.
+ *
+ * Assign a value to a bit in a bitset as an atomic operation, with
+ * the specified memory ordering.
+ *
+ * rte_bitset_atomic_assign() is multi-thread safe, provided all
+ * threads acting in parallel on the same bitset does so through
+ * @c rte_bitset_atomic_*() functions.
+ *
+ * Bits are numbered from 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be set or cleared.
+ * @param bit_value
+ *   Control if the bit should be set or cleared.
+ * @param memory_order
+ *   The memory order to use.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_atomic_assign(uint64_t *bitset, size_t bit_num, bool bit_value,
+			 int memory_order)
+{
+	__RTE_BITSET_DELEGATE_N(rte_bit_atomic_assign, bitset, bit_num,
+				bit_value, memory_order);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically change the value of a bit in the bitset.
+ *
+ * Flip a bit in a bitset as an atomic operation, with the specified
+ * memory ordering.
+ *
+ * rte_bitset_atomic_flip() is multi-thread safe, provided all threads
+ * acting in parallel on the same bitset does so through
+ * @c rte_bitset_atomic_*() functions.
+ *
+ * Bits are numbered from 0 to (size - 1) (inclusive).
+ *
+ * @param bitset
+ *   A pointer to the array of words making up the bitset.
+ * @param bit_num
+ *   The index of the bit to be flipped.
+ * @param memory_order
+ *   The memory order to use.
+ */
+
+__rte_experimental
+static inline void
+rte_bitset_atomic_flip(uint64_t *bitset, size_t bit_num, int memory_order)
+{
+	__RTE_BITSET_DELEGATE_N(rte_bit_atomic_flip, bitset, bit_num,
+				memory_order);
+}
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice.
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH 4/6] eal: add unit tests for atomic bitset operations
  2024-08-09 20:14     ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
  2024-08-09 20:14       ` [PATCH 2/6] eal: add bitset test suite Mattias Rönnblom
  2024-08-09 20:14       ` [PATCH 3/6] eal: add atomic bitset functions Mattias Rönnblom
@ 2024-08-09 20:14       ` Mattias Rönnblom
  2024-09-12  4:52         ` Tyler Retzlaff
  2024-08-09 20:14       ` [PATCH 5/6] service: use multi-word bitset to represent service flags Mattias Rönnblom
                         ` (5 subsequent siblings)
  8 siblings, 1 reply; 63+ messages in thread
From: Mattias Rönnblom @ 2024-08-09 20:14 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Extend bitset tests to cover the basic operation of the
rte_bitset_atomic_*() family of functions.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 app/test/test_bitset.c | 48 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/app/test/test_bitset.c b/app/test/test_bitset.c
index b3496df1c0..32224a1eee 100644
--- a/app/test/test_bitset.c
+++ b/app/test/test_bitset.c
@@ -222,6 +222,52 @@ test_flip(void)
 			     rte_bitset_flip);
 }
 
+static bool
+bitset_atomic_test(const uint64_t *bitset, size_t bit_num)
+{
+	return rte_bitset_atomic_test(bitset, bit_num,
+				      rte_memory_order_relaxed);
+}
+
+static void
+bitset_atomic_set(uint64_t *bitset, size_t bit_num)
+{
+	rte_bitset_atomic_set(bitset, bit_num, rte_memory_order_relaxed);
+}
+
+static void
+bitset_atomic_clear(uint64_t *bitset, size_t bit_num)
+{
+	rte_bitset_atomic_clear(bitset, bit_num, rte_memory_order_relaxed);
+}
+
+static void
+bitset_atomic_flip(uint64_t *bitset, size_t bit_num)
+{
+	rte_bitset_atomic_flip(bitset, bit_num, rte_memory_order_relaxed);
+}
+
+static void
+bitset_atomic_assign(uint64_t *bitset, size_t bit_num, bool bit_value)
+{
+	rte_bitset_atomic_assign(bitset, bit_num, bit_value,
+				 rte_memory_order_relaxed);
+}
+
+static int
+test_atomic_set_clear(void)
+{
+	return test_set_clear_fun(bitset_atomic_test, bitset_atomic_set,
+				  bitset_atomic_clear);
+}
+
+static int
+test_atomic_flip(void)
+{
+	return test_flip_fun(bitset_atomic_test, bitset_atomic_assign,
+			     bitset_atomic_flip);
+}
+
 static ssize_t
 find(const bool *ary, size_t num_bools, size_t start, size_t len, bool set)
 {
@@ -868,6 +914,8 @@ static struct unit_test_suite bitset_tests  = {
 	.unit_test_cases = {
 		TEST_CASE_ST(NULL, NULL, test_set_clear),
 		TEST_CASE_ST(NULL, NULL, test_flip),
+		TEST_CASE_ST(NULL, NULL, test_atomic_set_clear),
+		TEST_CASE_ST(NULL, NULL, test_atomic_flip),
 		TEST_CASE_ST(NULL, NULL, test_find),
 		TEST_CASE_ST(NULL, NULL, test_foreach),
 		TEST_CASE_ST(NULL, NULL, test_count),
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH 5/6] service: use multi-word bitset to represent service flags
  2024-08-09 20:14     ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
                         ` (2 preceding siblings ...)
  2024-08-09 20:14       ` [PATCH 4/6] eal: add unit tests for atomic bitset operations Mattias Rönnblom
@ 2024-08-09 20:14       ` Mattias Rönnblom
  2024-09-12  4:52         ` Tyler Retzlaff
  2024-08-09 20:14       ` [PATCH 6/6] event/dsw: add support for larger port count Mattias Rönnblom
                         ` (4 subsequent siblings)
  8 siblings, 1 reply; 63+ messages in thread
From: Mattias Rönnblom @ 2024-08-09 20:14 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Use a multi-word bitset to track which services are mapped to which
lcores, allowing the RTE_SERVICE_NUM_MAX compile-time constant to be >
64.

Replace array-of-bytes service-currently-active flags with a more
compact multi-word bitset-based representation, reducing memory
footprint somewhat.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 lib/eal/common/rte_service.c | 70 ++++++++++++++----------------------
 1 file changed, 27 insertions(+), 43 deletions(-)

diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index 56379930b6..ec0f47e141 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -11,6 +11,7 @@
 
 #include <eal_trace_internal.h>
 #include <rte_lcore.h>
+#include <rte_bitset.h>
 #include <rte_branch_prediction.h>
 #include <rte_common.h>
 #include <rte_cycles.h>
@@ -63,11 +64,11 @@ struct service_stats {
 /* the internal values of a service core */
 struct __rte_cache_aligned core_state {
 	/* map of services IDs are run on this core */
-	uint64_t service_mask;
+	RTE_BITSET_DECLARE(mapped_services, RTE_SERVICE_NUM_MAX);
 	RTE_ATOMIC(uint8_t) runstate; /* running or stopped */
 	RTE_ATOMIC(uint8_t) thread_active; /* indicates when thread is in service_run() */
 	uint8_t is_service_core; /* set if core is currently a service core */
-	uint8_t service_active_on_lcore[RTE_SERVICE_NUM_MAX];
+	RTE_BITSET_DECLARE(service_active_on_lcore, RTE_SERVICE_NUM_MAX);
 	RTE_ATOMIC(uint64_t) loops;
 	RTE_ATOMIC(uint64_t) cycles;
 	struct service_stats service_stats[RTE_SERVICE_NUM_MAX];
@@ -81,11 +82,6 @@ static uint32_t rte_service_library_initialized;
 int32_t
 rte_service_init(void)
 {
-	/* Hard limit due to the use of an uint64_t-based bitmask (and the
-	 * clzl intrinsic).
-	 */
-	RTE_BUILD_BUG_ON(RTE_SERVICE_NUM_MAX > 64);
-
 	if (rte_service_library_initialized) {
 		EAL_LOG(NOTICE,
 			"service library init() called, init flag %d",
@@ -296,7 +292,7 @@ rte_service_component_unregister(uint32_t id)
 
 	/* clear the run-bit in all cores */
 	for (i = 0; i < RTE_MAX_LCORE; i++)
-		lcore_states[i].service_mask &= ~(UINT64_C(1) << id);
+		rte_bitset_clear(lcore_states[i].mapped_services, id);
 
 	memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl));
 
@@ -410,7 +406,7 @@ service_runner_do_callback(struct rte_service_spec_impl *s,
 
 /* Expects the service 's' is valid. */
 static int32_t
-service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
+service_run(uint32_t i, struct core_state *cs, const uint64_t *mapped_services,
 	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
 {
 	if (!s)
@@ -424,12 +420,12 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 			RUNSTATE_RUNNING ||
 	    rte_atomic_load_explicit(&s->app_runstate, rte_memory_order_acquire) !=
 			RUNSTATE_RUNNING ||
-	    !(service_mask & (UINT64_C(1) << i))) {
-		cs->service_active_on_lcore[i] = 0;
+	    !rte_bitset_test(mapped_services, i)) {
+		rte_bitset_clear(cs->service_active_on_lcore, i);
 		return -ENOEXEC;
 	}
 
-	cs->service_active_on_lcore[i] = 1;
+	rte_bitset_set(cs->service_active_on_lcore, i);
 
 	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
 		if (!rte_spinlock_trylock(&s->execute_lock))
@@ -454,7 +450,7 @@ rte_service_may_be_active(uint32_t id)
 		return -EINVAL;
 
 	for (i = 0; i < lcore_count; i++) {
-		if (lcore_states[ids[i]].service_active_on_lcore[id])
+		if (rte_bitset_test(lcore_states[ids[i]].service_active_on_lcore, id))
 			return 1;
 	}
 
@@ -474,7 +470,9 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 	 */
 	rte_atomic_fetch_add_explicit(&s->num_mapped_cores, 1, rte_memory_order_relaxed);
 
-	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
+	RTE_BITSET_DECLARE(all_services, RTE_SERVICE_NUM_MAX);
+	rte_bitset_set_all(all_services, RTE_SERVICE_NUM_MAX);
+	int ret = service_run(id, cs, all_services, s, serialize_mt_unsafe);
 
 	rte_atomic_fetch_sub_explicit(&s->num_mapped_cores, 1, rte_memory_order_relaxed);
 
@@ -485,7 +483,6 @@ static int32_t
 service_runner_func(void *arg)
 {
 	RTE_SET_USED(arg);
-	uint8_t i;
 	const int lcore = rte_lcore_id();
 	struct core_state *cs = &lcore_states[lcore];
 
@@ -497,20 +494,11 @@ service_runner_func(void *arg)
 	 */
 	while (rte_atomic_load_explicit(&cs->runstate, rte_memory_order_acquire) ==
 			RUNSTATE_RUNNING) {
+		ssize_t id;
 
-		const uint64_t service_mask = cs->service_mask;
-		uint8_t start_id;
-		uint8_t end_id;
-
-		if (service_mask == 0)
-			continue;
-
-		start_id = rte_ctz64(service_mask);
-		end_id = 64 - rte_clz64(service_mask);
-
-		for (i = start_id; i < end_id; i++) {
+		RTE_BITSET_FOREACH_SET(id, cs->mapped_services, RTE_SERVICE_NUM_MAX) {
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask, service_get(i), 1);
+			service_run(id, cs, cs->mapped_services, service_get(id), 1);
 		}
 
 		rte_atomic_store_explicit(&cs->loops, cs->loops + 1, rte_memory_order_relaxed);
@@ -519,8 +507,7 @@ service_runner_func(void *arg)
 	/* Switch off this core for all services, to ensure that future
 	 * calls to may_be_active() know this core is switched off.
 	 */
-	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++)
-		cs->service_active_on_lcore[i] = 0;
+	rte_bitset_clear_all(cs->service_active_on_lcore, RTE_SERVICE_NUM_MAX);
 
 	/* Use SEQ CST memory ordering to avoid any re-ordering around
 	 * this store, ensuring that once this store is visible, the service
@@ -586,7 +573,7 @@ rte_service_lcore_count_services(uint32_t lcore)
 	if (!cs->is_service_core)
 		return -ENOTSUP;
 
-	return rte_popcount64(cs->service_mask);
+	return rte_bitset_count_set(cs->mapped_services, RTE_SERVICE_NUM_MAX);
 }
 
 int32_t
@@ -639,25 +626,23 @@ service_update(uint32_t sid, uint32_t lcore, uint32_t *set, uint32_t *enabled)
 			!lcore_states[lcore].is_service_core)
 		return -EINVAL;
 
-	uint64_t sid_mask = UINT64_C(1) << sid;
 	if (set) {
-		uint64_t lcore_mapped = lcore_states[lcore].service_mask &
-			sid_mask;
+		uint64_t lcore_mapped = rte_bitset_test(lcore_states[lcore].mapped_services, sid);
 
 		if (*set && !lcore_mapped) {
-			lcore_states[lcore].service_mask |= sid_mask;
+			rte_bitset_set(lcore_states[lcore].mapped_services, sid);
 			rte_atomic_fetch_add_explicit(&rte_services[sid].num_mapped_cores,
 				1, rte_memory_order_relaxed);
 		}
 		if (!*set && lcore_mapped) {
-			lcore_states[lcore].service_mask &= ~(sid_mask);
+			rte_bitset_clear(lcore_states[lcore].mapped_services, sid);
 			rte_atomic_fetch_sub_explicit(&rte_services[sid].num_mapped_cores,
 				1, rte_memory_order_relaxed);
 		}
 	}
 
 	if (enabled)
-		*enabled = !!(lcore_states[lcore].service_mask & (sid_mask));
+		*enabled = rte_bitset_test(lcore_states[lcore].mapped_services, sid);
 
 	return 0;
 }
@@ -699,11 +684,11 @@ set_lcore_state(uint32_t lcore, int32_t state)
 int32_t
 rte_service_lcore_reset_all(void)
 {
-	/* loop over cores, reset all to mask 0 */
+	/* loop over cores, reset all mapped services */
 	uint32_t i;
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
 		if (lcore_states[i].is_service_core) {
-			lcore_states[i].service_mask = 0;
+			rte_bitset_clear_all(lcore_states[i].mapped_services, RTE_SERVICE_NUM_MAX);
 			set_lcore_state(i, ROLE_RTE);
 			/* runstate act as guard variable Use
 			 * store-release memory order here to synchronize
@@ -731,7 +716,7 @@ rte_service_lcore_add(uint32_t lcore)
 	set_lcore_state(lcore, ROLE_SERVICE);
 
 	/* ensure that after adding a core the mask and state are defaults */
-	lcore_states[lcore].service_mask = 0;
+	rte_bitset_clear_all(lcore_states[lcore].mapped_services, RTE_SERVICE_NUM_MAX);
 	/* Use store-release memory order here to synchronize with
 	 * load-acquire in runstate read functions.
 	 */
@@ -814,12 +799,11 @@ rte_service_lcore_stop(uint32_t lcore)
 
 	uint32_t i;
 	struct core_state *cs = &lcore_states[lcore];
-	uint64_t service_mask = cs->service_mask;
 
 	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
-		int32_t enabled = service_mask & (UINT64_C(1) << i);
-		int32_t service_running = rte_service_runstate_get(i);
-		int32_t only_core = (1 ==
+		bool enabled = rte_bitset_test(cs->mapped_services, i);
+		bool service_running = rte_service_runstate_get(i);
+		bool only_core = (1 ==
 			rte_atomic_load_explicit(&rte_services[i].num_mapped_cores,
 				rte_memory_order_relaxed));
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH 6/6] event/dsw: add support for larger port count
  2024-08-09 20:14     ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
                         ` (3 preceding siblings ...)
  2024-08-09 20:14       ` [PATCH 5/6] service: use multi-word bitset to represent service flags Mattias Rönnblom
@ 2024-08-09 20:14       ` Mattias Rönnblom
  2024-09-12  4:53         ` Tyler Retzlaff
  2024-08-20 17:09       ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
                         ` (3 subsequent siblings)
  8 siblings, 1 reply; 63+ messages in thread
From: Mattias Rönnblom @ 2024-08-09 20:14 UTC (permalink / raw)
  To: dev
  Cc: hofors, Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, Mattias Rönnblom

Switch from using an open-coded, single-word bitset to using
<rte_bitset.h> to represent which event ports are linked to a
particular event queue.

Besides the cleaner code, this also allow the user to extend the
maximum port count beyond 64, by means of changing an "event_dev.h"

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 drivers/event/dsw/dsw_evdev.c | 19 +++++++------------
 drivers/event/dsw/dsw_evdev.h |  3 ++-
 drivers/event/dsw/dsw_event.c |  7 ++++---
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c
index 0dea1091e3..961217334e 100644
--- a/drivers/event/dsw/dsw_evdev.c
+++ b/drivers/event/dsw/dsw_evdev.c
@@ -123,6 +123,7 @@ dsw_queue_setup(struct rte_eventdev *dev, uint8_t queue_id,
 		queue->schedule_type = conf->schedule_type;
 	}
 
+	rte_bitset_init(queue->serving_ports, DSW_MAX_PORTS);
 	queue->num_serving_ports = 0;
 
 	return 0;
@@ -149,20 +150,16 @@ dsw_queue_release(struct rte_eventdev *dev __rte_unused,
 static void
 queue_add_port(struct dsw_queue *queue, uint16_t port_id)
 {
-	uint64_t port_mask = UINT64_C(1) << port_id;
-
-	queue->serving_ports |=	port_mask;
+	rte_bitset_set(queue->serving_ports, port_id);
 	queue->num_serving_ports++;
 }
 
 static bool
 queue_remove_port(struct dsw_queue *queue, uint16_t port_id)
 {
-	uint64_t port_mask = UINT64_C(1) << port_id;
-
-	if (queue->serving_ports & port_mask) {
+	if (rte_bitset_test(queue->serving_ports, port_id)) {
 		queue->num_serving_ports--;
-		queue->serving_ports ^= port_mask;
+		rte_bitset_clear(queue->serving_ports, port_id);
 		return true;
 	}
 
@@ -263,14 +260,12 @@ initial_flow_to_port_assignment(struct dsw_evdev *dsw)
 		struct dsw_queue *queue = &dsw->queues[queue_id];
 		uint16_t flow_hash;
 		for (flow_hash = 0; flow_hash < DSW_MAX_FLOWS; flow_hash++) {
-			uint8_t skip =
-				rte_rand_max(queue->num_serving_ports);
+			uint8_t skip = rte_rand_max(queue->num_serving_ports);
 			uint8_t port_id;
 
 			for (port_id = 0;; port_id++) {
-				uint64_t port_mask = UINT64_C(1) << port_id;
-
-				if (queue->serving_ports & port_mask) {
+				if (rte_bitset_test(queue->serving_ports,
+						    port_id)) {
 					if (skip == 0)
 						break;
 					skip--;
diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h
index c9bf4f8b6b..606c1e9f8a 100644
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -7,6 +7,7 @@
 
 #include <eventdev_pmd.h>
 
+#include <rte_bitset.h>
 #include <rte_event_ring.h>
 #include <rte_eventdev.h>
 
@@ -237,7 +238,7 @@ struct __rte_cache_aligned dsw_port {
 
 struct dsw_queue {
 	uint8_t schedule_type;
-	uint64_t serving_ports;
+	RTE_BITSET_DECLARE(serving_ports, DSW_MAX_PORTS);
 	uint16_t num_serving_ports;
 
 	alignas(RTE_CACHE_LINE_SIZE) uint8_t flow_to_port_map[DSW_MAX_FLOWS];
diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c
index 33f741990f..f18c7e8fbc 100644
--- a/drivers/event/dsw/dsw_event.c
+++ b/drivers/event/dsw/dsw_event.c
@@ -457,9 +457,8 @@ static bool
 dsw_is_serving_port(struct dsw_evdev *dsw, uint8_t port_id, uint8_t queue_id)
 {
 	struct dsw_queue *queue = &dsw->queues[queue_id];
-	uint64_t port_mask = UINT64_C(1) << port_id;
 
-	return queue->serving_ports & port_mask;
+	return rte_bitset_test(queue->serving_ports, port_id);
 }
 
 static bool
@@ -583,7 +582,9 @@ dsw_schedule(struct dsw_evdev *dsw, uint8_t queue_id, uint16_t flow_hash)
 		/* A single-link queue, or atomic/ordered/parallel but
 		 * with just a single serving port.
 		 */
-		port_id = rte_bsf64(queue->serving_ports);
+		port_id = (uint8_t)rte_bitset_find_first_set(
+			queue->serving_ports, DSW_MAX_PORTS
+		);
 
 	DSW_LOG_DP(DEBUG, "Event with queue_id %d flow_hash %d is scheduled "
 		   "to port %d.\n", queue_id, flow_hash, port_id);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 1/6] eal: add bitset type
  2024-08-09 20:14     ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
                         ` (4 preceding siblings ...)
  2024-08-09 20:14       ` [PATCH 6/6] event/dsw: add support for larger port count Mattias Rönnblom
@ 2024-08-20 17:09       ` Mattias Rönnblom
  2024-09-02 13:55       ` Morten Brørup
                         ` (2 subsequent siblings)
  8 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-08-20 17:09 UTC (permalink / raw)
  To: Mattias Rönnblom, dev
  Cc: Morten Brørup, Tyler Retzlaff, Stephen Hemminger,
	Harry van Haaren, david.marchand, Thomas Monjalon

On 2024-08-09 22:14, Mattias Rönnblom wrote:
> Introduce a set of functions and macros that operate on sets of bits,
> kept in arrays of 64-bit words.
> 
> RTE bitset is designed for bitsets which are larger than what fits in
> a single machine word (i.e., 64 bits). For very large bitsets, the
> <rte_bitmap.h> API may be a more appropriate choice.
> 

Anyone else that might have time to have a look at this patch set?

I find these kind of bitsets incredibly useful, so I really want to get 
it into DPDK.

<snip>

^ permalink raw reply	[flat|nested] 63+ messages in thread

* RE: [PATCH 1/6] eal: add bitset type
  2024-08-09 20:14     ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
                         ` (5 preceding siblings ...)
  2024-08-20 17:09       ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
@ 2024-09-02 13:55       ` Morten Brørup
  2024-09-02 14:46         ` Mattias Rönnblom
  2024-09-02 14:49         ` Mattias Rönnblom
  2024-09-12  4:51       ` Tyler Retzlaff
  2024-09-17  9:36       ` [PATCH v7 0/6] Improve EAL bit operations API Mattias Rönnblom
  8 siblings, 2 replies; 63+ messages in thread
From: Morten Brørup @ 2024-09-02 13:55 UTC (permalink / raw)
  To: Mattias Rönnblom, dev
  Cc: hofors, Tyler Retzlaff, Stephen Hemminger, Harry van Haaren

> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
> Sent: Friday, 9 August 2024 22.15
> 
> Introduce a set of functions and macros that operate on sets of bits,
> kept in arrays of 64-bit words.

This could be 32-bit words on 32 bit architectures. Just an idea.

> 
> RTE bitset is designed for bitsets which are larger than what fits in
> a single machine word (i.e., 64 bits). For very large bitsets, the
> <rte_bitmap.h> API may be a more appropriate choice.

RTE bitset uses size_t to index the bits.
This means that it supports very large bitsets.

If it is not supposed to support more than 2^32 bits, it could use uint32_t for indexing.
I don't know if it make any practical difference.

> 
> Depends-on: series-32740 ("Improve EAL bit operations API")
> 
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> 
> --

With or without considering the above ideas, this library is a good addition to DPDK.

For the series,
Acked-by: Morten Brørup <mb@smartsharesystems.com>


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 1/6] eal: add bitset type
  2024-09-02 13:55       ` Morten Brørup
@ 2024-09-02 14:46         ` Mattias Rönnblom
  2024-09-02 14:49         ` Mattias Rönnblom
  1 sibling, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-02 14:46 UTC (permalink / raw)
  To: Morten Brørup, Mattias Rönnblom, dev
  Cc: Tyler Retzlaff, Stephen Hemminger, Harry van Haaren,
	david.marchand, Thomas Monjalon

On 2024-09-02 15:55, Morten Brørup wrote:
>> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
>> Sent: Friday, 9 August 2024 22.15
>>
>> Introduce a set of functions and macros that operate on sets of bits,
>> kept in arrays of 64-bit words.
> 
> This could be 32-bit words on 32 bit architectures. Just an idea.
> 

The use of "unsigned long" instead of "uint64_t" as the type has been 
discussed before on the list.

I prefer uint64_t because it's less to type and the performance benefits 
of using "unsigned long" on 32-bit ISAs likely are small. That said, I 
have no strong opinion on the subject.

>>
>> RTE bitset is designed for bitsets which are larger than what fits in
>> a single machine word (i.e., 64 bits). For very large bitsets, the
>> <rte_bitmap.h> API may be a more appropriate choice.
> 
> RTE bitset uses size_t to index the bits.
> This means that it supports very large bitsets.
> 

Yes, it does, although I can't say I see a use case for enormous 
bitsets. But, who knows.

I used size_t in an attempt to slightly improve readability.

Performance wise, it does not matter, at least in the tests I did. I 
don't know if it ever could, considering these functions will pretty 
much always be inlined.

> If it is not supposed to support more than 2^32 bits, it could use uint32_t for indexing.
> I don't know if it make any practical difference.
> 
>>
>> Depends-on: series-32740 ("Improve EAL bit operations API")
>>
>> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>>
>> --
> 
> With or without considering the above ideas, this library is a good addition to DPDK.
> 
> For the series,
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> 

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 1/6] eal: add bitset type
  2024-09-02 13:55       ` Morten Brørup
  2024-09-02 14:46         ` Mattias Rönnblom
@ 2024-09-02 14:49         ` Mattias Rönnblom
  1 sibling, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-02 14:49 UTC (permalink / raw)
  To: Morten Brørup, Mattias Rönnblom, dev
  Cc: Tyler Retzlaff, Stephen Hemminger, Harry van Haaren,
	david.marchand, Thomas Monjalon

On 2024-09-02 15:55, Morten Brørup wrote:

>> --> 
> With or without considering the above ideas, this library is a good addition to DPDK.
>  > For the series,
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> 

Thomas and David, what else need I do to get this merged?

I have the same question for the bitops patch set.

Thanks.

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 1/6] eal: add bitset type
  2024-08-09 20:14     ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
                         ` (6 preceding siblings ...)
  2024-09-02 13:55       ` Morten Brørup
@ 2024-09-12  4:51       ` Tyler Retzlaff
  2024-09-17  9:36       ` [PATCH v7 0/6] Improve EAL bit operations API Mattias Rönnblom
  8 siblings, 0 replies; 63+ messages in thread
From: Tyler Retzlaff @ 2024-09-12  4:51 UTC (permalink / raw)
  To: Mattias R??nnblom
  Cc: dev, hofors, Morten Br??rup, Stephen Hemminger, Harry van Haaren

On Fri, Aug 09, 2024 at 10:14:35PM +0200, Mattias R??nnblom wrote:
> Introduce a set of functions and macros that operate on sets of bits,
> kept in arrays of 64-bit words.
> 
> RTE bitset is designed for bitsets which are larger than what fits in
> a single machine word (i.e., 64 bits). For very large bitsets, the
> <rte_bitmap.h> API may be a more appropriate choice.
> 
> Depends-on: series-32740 ("Improve EAL bit operations API")
> 
> Signed-off-by: Mattias R??nnblom <mattias.ronnblom@ericsson.com>
> 
> --

Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 2/6] eal: add bitset test suite
  2024-08-09 20:14       ` [PATCH 2/6] eal: add bitset test suite Mattias Rönnblom
@ 2024-09-12  4:51         ` Tyler Retzlaff
  0 siblings, 0 replies; 63+ messages in thread
From: Tyler Retzlaff @ 2024-09-12  4:51 UTC (permalink / raw)
  To: Mattias R??nnblom
  Cc: dev, hofors, Morten Br??rup, Stephen Hemminger, Harry van Haaren

On Fri, Aug 09, 2024 at 10:14:36PM +0200, Mattias R??nnblom wrote:
> Add test suite exercising <rte_bitset.h>.
> 
> Signed-off-by: Mattias R??nnblom <mattias.ronnblom@ericsson.com>
> 
> --
> 
> RFC v5:
>  * Parameterize tests to allow reuse across both atomic and non-atomic
>    functions.
> 
> RFC v4:
>  * Fix signed char issue in test cases. (Stephen Hemminger)
>  * Add test cases for logic operations.
>  * Use the unit test suite runner helper.
> ---

Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 3/6] eal: add atomic bitset functions
  2024-08-09 20:14       ` [PATCH 3/6] eal: add atomic bitset functions Mattias Rönnblom
@ 2024-09-12  4:51         ` Tyler Retzlaff
  0 siblings, 0 replies; 63+ messages in thread
From: Tyler Retzlaff @ 2024-09-12  4:51 UTC (permalink / raw)
  To: Mattias R??nnblom
  Cc: dev, hofors, Morten Br??rup, Stephen Hemminger, Harry van Haaren

On Fri, Aug 09, 2024 at 10:14:37PM +0200, Mattias R??nnblom wrote:
> Extend the bitset API with atomic versions of the most basic bitset
> operations.
> 
> Signed-off-by: Mattias R??nnblom <mattias.ronnblom@ericsson.com>
> ---

Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 4/6] eal: add unit tests for atomic bitset operations
  2024-08-09 20:14       ` [PATCH 4/6] eal: add unit tests for atomic bitset operations Mattias Rönnblom
@ 2024-09-12  4:52         ` Tyler Retzlaff
  0 siblings, 0 replies; 63+ messages in thread
From: Tyler Retzlaff @ 2024-09-12  4:52 UTC (permalink / raw)
  To: Mattias R??nnblom
  Cc: dev, hofors, Morten Br??rup, Stephen Hemminger, Harry van Haaren

On Fri, Aug 09, 2024 at 10:14:38PM +0200, Mattias R??nnblom wrote:
> Extend bitset tests to cover the basic operation of the
> rte_bitset_atomic_*() family of functions.
> 
> Signed-off-by: Mattias R??nnblom <mattias.ronnblom@ericsson.com>
> ---

Tyler Retzlaff <roretzla@linux.microsoft.com>


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 5/6] service: use multi-word bitset to represent service flags
  2024-08-09 20:14       ` [PATCH 5/6] service: use multi-word bitset to represent service flags Mattias Rönnblom
@ 2024-09-12  4:52         ` Tyler Retzlaff
  0 siblings, 0 replies; 63+ messages in thread
From: Tyler Retzlaff @ 2024-09-12  4:52 UTC (permalink / raw)
  To: Mattias R??nnblom
  Cc: dev, hofors, Morten Br??rup, Stephen Hemminger, Harry van Haaren

On Fri, Aug 09, 2024 at 10:14:39PM +0200, Mattias R??nnblom wrote:
> Use a multi-word bitset to track which services are mapped to which
> lcores, allowing the RTE_SERVICE_NUM_MAX compile-time constant to be >
> 64.
> 
> Replace array-of-bytes service-currently-active flags with a more
> compact multi-word bitset-based representation, reducing memory
> footprint somewhat.
> 
> Signed-off-by: Mattias R??nnblom <mattias.ronnblom@ericsson.com>
> ---

Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 6/6] event/dsw: add support for larger port count
  2024-08-09 20:14       ` [PATCH 6/6] event/dsw: add support for larger port count Mattias Rönnblom
@ 2024-09-12  4:53         ` Tyler Retzlaff
  0 siblings, 0 replies; 63+ messages in thread
From: Tyler Retzlaff @ 2024-09-12  4:53 UTC (permalink / raw)
  To: Mattias R??nnblom
  Cc: dev, hofors, Morten Br??rup, Stephen Hemminger, Harry van Haaren

On Fri, Aug 09, 2024 at 10:14:40PM +0200, Mattias R??nnblom wrote:
> Switch from using an open-coded, single-word bitset to using
> <rte_bitset.h> to represent which event ports are linked to a
> particular event queue.
> 
> Besides the cleaner code, this also allow the user to extend the
> maximum port count beyond 64, by means of changing an "event_dev.h"
> 
> Signed-off-by: Mattias R??nnblom <mattias.ronnblom@ericsson.com>
> ---

Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v7 0/6] Improve EAL bit operations API
  2024-08-09 20:14     ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
                         ` (7 preceding siblings ...)
  2024-09-12  4:51       ` Tyler Retzlaff
@ 2024-09-17  9:36       ` Mattias Rönnblom
  2024-09-17  9:36         ` [PATCH v7 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
                           ` (5 more replies)
  8 siblings, 6 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17  9:36 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

This patch set represent an attempt to improve and extend the RTE
bitops API, in particular for functions that operate on individual
bits.

All new functionality is exposed to the user as generic selection
macros, delegating the actual work to private (__-marked) static
inline functions. Public functions (e.g., rte_bit_set32()) would just
be bloating the API. Such generic selection macros will here be
referred to as "functions", although technically they are not.

The legacy <rte_bitops.h> rte_bit_relaxed_*() functions is replaced
with two new families:

rte_bit_[test|set|clear|assign|flip]() which provides no memory
ordering or atomicity guarantees, but does provide the best
performance. The performance degradation resulting from the use of
volatile (e.g., forcing loads and stores to actually occur and in the
number specified) and atomic (e.g., LOCK-prefixed instructions on x86)
may be significant. rte_bit_[test|set|clear|assign|flip]() may be
used with volatile word pointers, in which case they guarantee
that the program-level accesses actually occur.

rte_bit_atomic_*() which provides atomic bit-level operations,
including the possibility to specifying memory ordering constraints
(or the lack thereof).

The atomic functions take non-_Atomic pointers, to be flexible, just
like the GCC builtins and default <rte_stdatomic.h>. The issue with
_Atomic APIs is that it may well be the case that the user wants to
perform both non-atomic and atomic operations on the same word.

Having _Atomic-marked addresses would complicate supporting atomic
bit-level operations in the bitset API (proposed in a different RFC
patchset), and potentially other APIs depending on RTE bitops for
atomic bit-level ops). Either one needs two bitset variants, one
_Atomic bitset and one non-atomic one, or the bitset code needs to
cast the non-_Atomic pointer to an _Atomic one. Having a separate
_Atomic bitset would be bloat and also prevent the user from both, in
some situations, doing atomic operations against a bit set, while in
other situations (e.g., at times when MT safety is not a concern)
operating on the same objects in a non-atomic manner.

Unlike rte_bit_relaxed_*(), individual bits are represented by bool,
not uint32_t or uint64_t. The author found the use of such large types
confusing, and also failed to see any performance benefits.

A set of functions rte_bit_*_assign() are added, to assign a
particular boolean value to a particular bit.

All new functions have properly documented semantics.

All new functions operate on both 32 and 64-bit words, with type
checking.

_Generic allow the user code to be a little more impact. Have a
type-generic atomic test/set/clear/assign bit API also seems
consistent with the "core" (word-size) atomics API, which is generic
(both GCC builtins and <rte_stdatomic.h> are).

The _Generic versions avoids having explicit unsigned long versions of
all functions. If you have an unsigned long, it's safe to use the
generic version (e.g., rte_set_bit()) and _Generic will pick the right
function, provided long is either 32 or 64 bit on your platform (which
it is on all DPDK-supported ABIs).

The generic rte_bit_set() is a macro, and not a function, but
nevertheless has been given a lower-case name. That's how C11 does it
(for atomics, and other _Generic), and <rte_stdatomic.h>. Its address
can't be taken, but it does not evaluate its parameters more than
once.

C++ doesn't support generic selection. In C++ translation units the
_Generic macros are replaced with overloaded functions, implemented by
means of a huge, complicated C macro mess.

Mattias Rönnblom (6):
  dpdk: do not force C linkage on include file dependencies
  eal: extend bit manipulation functionality
  eal: add unit tests for bit operations
  eal: add atomic bit operations
  eal: add unit tests for atomic bit access functions
  eal: extend bitops to handle volatile pointers

 app/test/packet_burst_generator.h             |   8 +-
 app/test/test_bitops.c                        | 416 +++++++++-
 app/test/virtual_pmd.h                        |   4 +-
 doc/guides/rel_notes/release_24_11.rst        |  17 +
 drivers/bus/auxiliary/bus_auxiliary_driver.h  |   8 +-
 drivers/bus/cdx/bus_cdx_driver.h              |   8 +-
 drivers/bus/dpaa/include/fsl_qman.h           |   8 +-
 drivers/bus/fslmc/bus_fslmc_driver.h          |   8 +-
 drivers/bus/pci/bus_pci_driver.h              |   8 +-
 drivers/bus/pci/rte_bus_pci.h                 |   8 +-
 drivers/bus/platform/bus_platform_driver.h    |   8 +-
 drivers/bus/vdev/bus_vdev_driver.h            |   8 +-
 drivers/bus/vmbus/bus_vmbus_driver.h          |   8 +-
 drivers/bus/vmbus/rte_bus_vmbus.h             |   8 +-
 drivers/dma/cnxk/cnxk_dma_event_dp.h          |   8 +-
 drivers/dma/ioat/ioat_hw_defs.h               |   4 +-
 drivers/event/dlb2/rte_pmd_dlb2.h             |   8 +-
 drivers/mempool/dpaa2/rte_dpaa2_mempool.h     |   6 +-
 drivers/net/avp/rte_avp_fifo.h                |   8 +-
 drivers/net/bonding/rte_eth_bond.h            |   4 +-
 drivers/net/i40e/rte_pmd_i40e.h               |   8 +-
 drivers/net/mlx5/mlx5_trace.h                 |   8 +-
 drivers/net/ring/rte_eth_ring.h               |   4 +-
 drivers/net/vhost/rte_eth_vhost.h             |   8 +-
 drivers/raw/ifpga/afu_pmd_core.h              |   8 +-
 drivers/raw/ifpga/afu_pmd_he_hssi.h           |   6 +-
 drivers/raw/ifpga/afu_pmd_he_lpbk.h           |   6 +-
 drivers/raw/ifpga/afu_pmd_he_mem.h            |   6 +-
 drivers/raw/ifpga/afu_pmd_n3000.h             |   6 +-
 drivers/raw/ifpga/rte_pmd_afu.h               |   4 +-
 drivers/raw/ifpga/rte_pmd_ifpga.h             |   4 +-
 examples/ethtool/lib/rte_ethtool.h            |   8 +-
 examples/qos_sched/main.h                     |   4 +-
 examples/vm_power_manager/channel_manager.h   |   8 +-
 lib/acl/rte_acl_osdep.h                       |   8 +-
 lib/bbdev/rte_bbdev.h                         |   8 +-
 lib/bbdev/rte_bbdev_op.h                      |   8 +-
 lib/bbdev/rte_bbdev_pmd.h                     |   8 +-
 lib/bpf/bpf_def.h                             |   8 +-
 lib/compressdev/rte_comp.h                    |   4 +-
 lib/compressdev/rte_compressdev.h             |   6 +-
 lib/compressdev/rte_compressdev_internal.h    |   8 +-
 lib/compressdev/rte_compressdev_pmd.h         |   8 +-
 lib/cryptodev/cryptodev_pmd.h                 |   8 +-
 lib/cryptodev/cryptodev_trace.h               |   8 +-
 lib/cryptodev/rte_crypto.h                    |   8 +-
 lib/cryptodev/rte_crypto_asym.h               |   8 +-
 lib/cryptodev/rte_crypto_sym.h                |   8 +-
 lib/cryptodev/rte_cryptodev.h                 |   8 +-
 lib/cryptodev/rte_cryptodev_trace_fp.h        |   4 +-
 lib/dispatcher/rte_dispatcher.h               |   8 +-
 lib/dmadev/rte_dmadev.h                       |   8 +
 lib/eal/arm/include/rte_atomic_32.h           |   4 +-
 lib/eal/arm/include/rte_atomic_64.h           |   8 +-
 lib/eal/arm/include/rte_byteorder.h           |   8 +-
 lib/eal/arm/include/rte_cpuflags_32.h         |   8 +-
 lib/eal/arm/include/rte_cpuflags_64.h         |   8 +-
 lib/eal/arm/include/rte_cycles_32.h           |   4 +-
 lib/eal/arm/include/rte_cycles_64.h           |   4 +-
 lib/eal/arm/include/rte_io.h                  |   8 +-
 lib/eal/arm/include/rte_io_64.h               |   8 +-
 lib/eal/arm/include/rte_memcpy_32.h           |   8 +-
 lib/eal/arm/include/rte_memcpy_64.h           |   8 +-
 lib/eal/arm/include/rte_pause.h               |   8 +-
 lib/eal/arm/include/rte_pause_32.h            |   6 +-
 lib/eal/arm/include/rte_pause_64.h            |   8 +-
 lib/eal/arm/include/rte_power_intrinsics.h    |   8 +-
 lib/eal/arm/include/rte_prefetch_32.h         |   8 +-
 lib/eal/arm/include/rte_prefetch_64.h         |   8 +-
 lib/eal/arm/include/rte_rwlock.h              |   4 +-
 lib/eal/arm/include/rte_spinlock.h            |   6 +-
 lib/eal/freebsd/include/rte_os.h              |   8 +-
 lib/eal/include/bus_driver.h                  |   8 +-
 lib/eal/include/dev_driver.h                  |   6 +-
 lib/eal/include/eal_trace_internal.h          |   8 +-
 lib/eal/include/generic/rte_atomic.h          |   8 +
 lib/eal/include/generic/rte_byteorder.h       |   8 +
 lib/eal/include/generic/rte_cpuflags.h        |   8 +
 lib/eal/include/generic/rte_cycles.h          |   8 +
 lib/eal/include/generic/rte_io.h              |   8 +
 lib/eal/include/generic/rte_memcpy.h          |   8 +
 lib/eal/include/generic/rte_pause.h           |   8 +
 .../include/generic/rte_power_intrinsics.h    |   8 +
 lib/eal/include/generic/rte_prefetch.h        |   8 +
 lib/eal/include/generic/rte_rwlock.h          |   8 +-
 lib/eal/include/generic/rte_spinlock.h        |   8 +
 lib/eal/include/generic/rte_vect.h            |   8 +
 lib/eal/include/rte_alarm.h                   |   4 +-
 lib/eal/include/rte_bitmap.h                  |   8 +-
 lib/eal/include/rte_bitops.h                  | 768 +++++++++++++++++-
 lib/eal/include/rte_bus.h                     |   8 +-
 lib/eal/include/rte_class.h                   |   4 +-
 lib/eal/include/rte_common.h                  |   8 +-
 lib/eal/include/rte_dev.h                     |   8 +-
 lib/eal/include/rte_devargs.h                 |   8 +-
 lib/eal/include/rte_eal_trace.h               |   4 +-
 lib/eal/include/rte_errno.h                   |   4 +-
 lib/eal/include/rte_fbarray.h                 |   8 +-
 lib/eal/include/rte_keepalive.h               |   6 +-
 lib/eal/include/rte_mcslock.h                 |   8 +-
 lib/eal/include/rte_memory.h                  |   8 +-
 lib/eal/include/rte_pci_dev_features.h        |   4 +-
 lib/eal/include/rte_pflock.h                  |   8 +-
 lib/eal/include/rte_random.h                  |   4 +-
 lib/eal/include/rte_seqcount.h                |   8 +-
 lib/eal/include/rte_seqlock.h                 |   8 +-
 lib/eal/include/rte_service.h                 |   8 +-
 lib/eal/include/rte_service_component.h       |   4 +-
 lib/eal/include/rte_stdatomic.h               |   5 +-
 lib/eal/include/rte_string_fns.h              |  17 +-
 lib/eal/include/rte_tailq.h                   |   6 +-
 lib/eal/include/rte_ticketlock.h              |   8 +-
 lib/eal/include/rte_time.h                    |   6 +-
 lib/eal/include/rte_trace.h                   |   8 +-
 lib/eal/include/rte_trace_point.h             |   8 +-
 lib/eal/include/rte_trace_point_register.h    |   8 +-
 lib/eal/include/rte_uuid.h                    |   8 +-
 lib/eal/include/rte_version.h                 |   6 +-
 lib/eal/include/rte_vfio.h                    |   8 +-
 lib/eal/linux/include/rte_os.h                |   8 +-
 lib/eal/loongarch/include/rte_atomic.h        |   6 +-
 lib/eal/loongarch/include/rte_byteorder.h     |   4 +-
 lib/eal/loongarch/include/rte_cpuflags.h      |   8 +-
 lib/eal/loongarch/include/rte_cycles.h        |   4 +-
 lib/eal/loongarch/include/rte_io.h            |   4 +-
 lib/eal/loongarch/include/rte_memcpy.h        |   4 +-
 lib/eal/loongarch/include/rte_pause.h         |   8 +-
 .../loongarch/include/rte_power_intrinsics.h  |   8 +-
 lib/eal/loongarch/include/rte_prefetch.h      |   8 +-
 lib/eal/loongarch/include/rte_rwlock.h        |   4 +-
 lib/eal/loongarch/include/rte_spinlock.h      |   6 +-
 lib/eal/ppc/include/rte_atomic.h              |   6 +-
 lib/eal/ppc/include/rte_byteorder.h           |   6 +-
 lib/eal/ppc/include/rte_cpuflags.h            |   8 +-
 lib/eal/ppc/include/rte_cycles.h              |   8 +-
 lib/eal/ppc/include/rte_io.h                  |   4 +-
 lib/eal/ppc/include/rte_memcpy.h              |   4 +-
 lib/eal/ppc/include/rte_pause.h               |   8 +-
 lib/eal/ppc/include/rte_power_intrinsics.h    |   8 +-
 lib/eal/ppc/include/rte_prefetch.h            |   8 +-
 lib/eal/ppc/include/rte_rwlock.h              |   4 +-
 lib/eal/ppc/include/rte_spinlock.h            |   8 +-
 lib/eal/riscv/include/rte_atomic.h            |   8 +-
 lib/eal/riscv/include/rte_byteorder.h         |   8 +-
 lib/eal/riscv/include/rte_cpuflags.h          |   8 +-
 lib/eal/riscv/include/rte_cycles.h            |   4 +-
 lib/eal/riscv/include/rte_io.h                |   4 +-
 lib/eal/riscv/include/rte_memcpy.h            |   4 +-
 lib/eal/riscv/include/rte_pause.h             |   8 +-
 lib/eal/riscv/include/rte_power_intrinsics.h  |   8 +-
 lib/eal/riscv/include/rte_prefetch.h          |   8 +-
 lib/eal/riscv/include/rte_rwlock.h            |   4 +-
 lib/eal/riscv/include/rte_spinlock.h          |   6 +-
 lib/eal/windows/include/pthread.h             |   6 +-
 lib/eal/windows/include/regex.h               |   8 +-
 lib/eal/windows/include/rte_windows.h         |   8 +-
 lib/eal/x86/include/rte_atomic.h              |  25 +-
 lib/eal/x86/include/rte_byteorder.h           |  16 +-
 lib/eal/x86/include/rte_cpuflags.h            |   8 +-
 lib/eal/x86/include/rte_cycles.h              |   8 +-
 lib/eal/x86/include/rte_io.h                  |   8 +-
 lib/eal/x86/include/rte_pause.h               |   7 +-
 lib/eal/x86/include/rte_power_intrinsics.h    |   8 +-
 lib/eal/x86/include/rte_prefetch.h            |   8 +-
 lib/eal/x86/include/rte_rwlock.h              |   6 +-
 lib/eal/x86/include/rte_spinlock.h            |   9 +-
 lib/ethdev/ethdev_driver.h                    |   8 +-
 lib/ethdev/ethdev_pci.h                       |   8 +-
 lib/ethdev/ethdev_trace.h                     |   8 +-
 lib/ethdev/ethdev_vdev.h                      |   8 +-
 lib/ethdev/rte_cman.h                         |   4 +-
 lib/ethdev/rte_dev_info.h                     |   4 +-
 lib/ethdev/rte_ethdev.h                       |   8 +-
 lib/ethdev/rte_ethdev_trace_fp.h              |   4 +-
 lib/eventdev/event_timer_adapter_pmd.h        |   4 +-
 lib/eventdev/eventdev_pmd.h                   |   8 +-
 lib/eventdev/eventdev_pmd_pci.h               |   8 +-
 lib/eventdev/eventdev_pmd_vdev.h              |   8 +-
 lib/eventdev/eventdev_trace.h                 |   8 +-
 lib/eventdev/rte_event_crypto_adapter.h       |   8 +-
 lib/eventdev/rte_event_eth_rx_adapter.h       |   8 +-
 lib/eventdev/rte_event_eth_tx_adapter.h       |   8 +-
 lib/eventdev/rte_event_ring.h                 |   8 +-
 lib/eventdev/rte_event_timer_adapter.h        |   8 +-
 lib/eventdev/rte_eventdev.h                   |   8 +-
 lib/eventdev/rte_eventdev_trace_fp.h          |   4 +-
 lib/graph/rte_graph_model_mcore_dispatch.h    |   8 +-
 lib/graph/rte_graph_worker.h                  |   6 +-
 lib/gso/rte_gso.h                             |   6 +-
 lib/hash/rte_fbk_hash.h                       |   8 +-
 lib/hash/rte_hash_crc.h                       |   8 +-
 lib/hash/rte_jhash.h                          |   8 +-
 lib/hash/rte_thash.h                          |   8 +-
 lib/hash/rte_thash_gfni.h                     |   8 +-
 lib/ip_frag/rte_ip_frag.h                     |   8 +-
 lib/ipsec/rte_ipsec.h                         |   8 +-
 lib/log/rte_log.h                             |   8 +-
 lib/lpm/rte_lpm.h                             |   8 +-
 lib/member/rte_member.h                       |   8 +-
 lib/member/rte_member_sketch.h                |   6 +-
 lib/member/rte_member_sketch_avx512.h         |   8 +-
 lib/member/rte_member_x86.h                   |   4 +-
 lib/member/rte_xxh64_avx512.h                 |   6 +-
 lib/mempool/mempool_trace.h                   |   8 +-
 lib/mempool/rte_mempool_trace_fp.h            |   4 +-
 lib/meter/rte_meter.h                         |   8 +-
 lib/mldev/mldev_utils.h                       |   8 +-
 lib/mldev/rte_mldev_core.h                    |   8 +-
 lib/mldev/rte_mldev_pmd.h                     |   8 +-
 lib/net/rte_ether.h                           |   8 +-
 lib/net/rte_net.h                             |   8 +-
 lib/net/rte_sctp.h                            |   8 +-
 lib/node/rte_node_eth_api.h                   |   8 +-
 lib/node/rte_node_ip4_api.h                   |   8 +-
 lib/node/rte_node_ip6_api.h                   |   6 +-
 lib/node/rte_node_udp4_input_api.h            |   8 +-
 lib/pci/rte_pci.h                             |   8 +-
 lib/pdcp/rte_pdcp.h                           |   8 +-
 lib/pipeline/rte_pipeline.h                   |   8 +-
 lib/pipeline/rte_port_in_action.h             |   8 +-
 lib/pipeline/rte_swx_ctl.h                    |   8 +-
 lib/pipeline/rte_swx_extern.h                 |   8 +-
 lib/pipeline/rte_swx_ipsec.h                  |   8 +-
 lib/pipeline/rte_swx_pipeline.h               |   8 +-
 lib/pipeline/rte_swx_pipeline_spec.h          |   8 +-
 lib/pipeline/rte_table_action.h               |   8 +-
 lib/port/rte_port.h                           |   8 +-
 lib/port/rte_port_ethdev.h                    |   8 +-
 lib/port/rte_port_eventdev.h                  |   8 +-
 lib/port/rte_port_fd.h                        |   8 +-
 lib/port/rte_port_frag.h                      |   8 +-
 lib/port/rte_port_ras.h                       |   8 +-
 lib/port/rte_port_ring.h                      |   8 +-
 lib/port/rte_port_sched.h                     |   8 +-
 lib/port/rte_port_source_sink.h               |   8 +-
 lib/port/rte_port_sym_crypto.h                |   8 +-
 lib/port/rte_swx_port.h                       |   8 +-
 lib/port/rte_swx_port_ethdev.h                |   8 +-
 lib/port/rte_swx_port_fd.h                    |   8 +-
 lib/port/rte_swx_port_ring.h                  |   8 +-
 lib/port/rte_swx_port_source_sink.h           |   8 +-
 lib/rawdev/rte_rawdev.h                       |   6 +-
 lib/rawdev/rte_rawdev_pmd.h                   |   8 +-
 lib/rcu/rte_rcu_qsbr.h                        |   8 +-
 lib/regexdev/rte_regexdev.h                   |   8 +-
 lib/ring/rte_ring.h                           |   6 +-
 lib/ring/rte_ring_core.h                      |   8 +-
 lib/ring/rte_ring_elem.h                      |   8 +-
 lib/ring/rte_ring_hts.h                       |   4 +-
 lib/ring/rte_ring_peek.h                      |   4 +-
 lib/ring/rte_ring_peek_zc.h                   |   4 +-
 lib/ring/rte_ring_rts.h                       |   4 +-
 lib/sched/rte_approx.h                        |   8 +-
 lib/sched/rte_pie.h                           |   8 +-
 lib/sched/rte_red.h                           |   8 +-
 lib/sched/rte_sched.h                         |   8 +-
 lib/sched/rte_sched_common.h                  |   6 +-
 lib/security/rte_security.h                   |   8 +-
 lib/security/rte_security_driver.h            |   6 +-
 lib/stack/rte_stack.h                         |   8 +-
 lib/table/rte_lru.h                           |  12 +-
 lib/table/rte_lru_arm64.h                     |   8 +-
 lib/table/rte_lru_x86.h                       |   8 -
 lib/table/rte_swx_hash_func.h                 |   8 +-
 lib/table/rte_swx_keycmp.h                    |   8 +-
 lib/table/rte_swx_table.h                     |   8 +-
 lib/table/rte_swx_table_em.h                  |   8 +-
 lib/table/rte_swx_table_learner.h             |   8 +-
 lib/table/rte_swx_table_selector.h            |   8 +-
 lib/table/rte_swx_table_wm.h                  |   8 +-
 lib/table/rte_table.h                         |   8 +-
 lib/table/rte_table_acl.h                     |   8 +-
 lib/table/rte_table_array.h                   |   8 +-
 lib/table/rte_table_hash.h                    |   8 +-
 lib/table/rte_table_hash_cuckoo.h             |   8 +-
 lib/table/rte_table_hash_func.h               |  12 +-
 lib/table/rte_table_lpm.h                     |   8 +-
 lib/table/rte_table_lpm_ipv6.h                |   8 +-
 lib/table/rte_table_stub.h                    |   8 +-
 lib/telemetry/rte_telemetry.h                 |   8 +-
 lib/vhost/rte_vdpa.h                          |   8 +-
 lib/vhost/rte_vhost.h                         |   8 +-
 lib/vhost/rte_vhost_async.h                   |   8 +-
 lib/vhost/rte_vhost_crypto.h                  |   4 +-
 lib/vhost/vdpa_driver.h                       |   8 +-
 285 files changed, 2264 insertions(+), 998 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v7 1/6] dpdk: do not force C linkage on include file dependencies
  2024-09-17  9:36       ` [PATCH v7 0/6] Improve EAL bit operations API Mattias Rönnblom
@ 2024-09-17  9:36         ` Mattias Rönnblom
  2024-09-17 10:48           ` [PATCH v8 0/6] Improve EAL bit operations API Mattias Rönnblom
  2024-09-17  9:36         ` [PATCH v7 2/6] eal: extend bit manipulation functionality Mattias Rönnblom
                           ` (4 subsequent siblings)
  5 siblings, 1 reply; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17  9:36 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Assure that 'extern "C" { /../ }' do not cover files included from a
particular header file, and address minor issues resulting from this
change of order.

Dealing with C++ should delegate to the individual include file level,
rather than being imposed by the user of that file. For example,
forcing C linkage prevents __Generic macros being replaced with
overloaded static inline functions in C++ translation units.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>

--

PATCH v7:
 * Fix issues in rte_io.h, rte_pause.h and rte_thash_gfni.h causing
   build failures on ARM. (David Marchand)
 * Fix issue in rte_vfio.h, causing build failures unless VFIO_PRESENT.

PATCH v6:
 * Add missing extern "C" in rte_atomic.h, rte_cpuflags.h, rte_io.h,
   rte_vect.h.
 * Fix 32-bit x86 build issues in rte_atomic.h.

PATCH v5:
 * rte_dmadev.h was still including files under extern "C" { /../ }.
   (Chengwen Feng)
 * Fix rte_byteorder.h, broken on 32-bit x86.
---
 app/test/packet_burst_generator.h             |  8 +++---
 app/test/virtual_pmd.h                        |  4 +--
 drivers/bus/auxiliary/bus_auxiliary_driver.h  |  8 +++---
 drivers/bus/cdx/bus_cdx_driver.h              |  8 +++---
 drivers/bus/dpaa/include/fsl_qman.h           |  8 +++---
 drivers/bus/fslmc/bus_fslmc_driver.h          |  8 +++---
 drivers/bus/pci/bus_pci_driver.h              |  8 +++---
 drivers/bus/pci/rte_bus_pci.h                 |  8 +++---
 drivers/bus/platform/bus_platform_driver.h    |  8 +++---
 drivers/bus/vdev/bus_vdev_driver.h            |  8 +++---
 drivers/bus/vmbus/bus_vmbus_driver.h          |  8 +++---
 drivers/bus/vmbus/rte_bus_vmbus.h             |  8 +++---
 drivers/dma/cnxk/cnxk_dma_event_dp.h          |  8 +++---
 drivers/dma/ioat/ioat_hw_defs.h               |  4 +--
 drivers/event/dlb2/rte_pmd_dlb2.h             |  8 +++---
 drivers/mempool/dpaa2/rte_dpaa2_mempool.h     |  6 ++---
 drivers/net/avp/rte_avp_fifo.h                |  8 +++---
 drivers/net/bonding/rte_eth_bond.h            |  4 +--
 drivers/net/i40e/rte_pmd_i40e.h               |  8 +++---
 drivers/net/mlx5/mlx5_trace.h                 |  8 +++---
 drivers/net/ring/rte_eth_ring.h               |  4 +--
 drivers/net/vhost/rte_eth_vhost.h             |  8 +++---
 drivers/raw/ifpga/afu_pmd_core.h              |  8 +++---
 drivers/raw/ifpga/afu_pmd_he_hssi.h           |  6 ++---
 drivers/raw/ifpga/afu_pmd_he_lpbk.h           |  6 ++---
 drivers/raw/ifpga/afu_pmd_he_mem.h            |  6 ++---
 drivers/raw/ifpga/afu_pmd_n3000.h             |  6 ++---
 drivers/raw/ifpga/rte_pmd_afu.h               |  4 +--
 drivers/raw/ifpga/rte_pmd_ifpga.h             |  4 +--
 examples/ethtool/lib/rte_ethtool.h            |  8 +++---
 examples/qos_sched/main.h                     |  4 +--
 examples/vm_power_manager/channel_manager.h   |  8 +++---
 lib/acl/rte_acl_osdep.h                       |  8 +++---
 lib/bbdev/rte_bbdev.h                         |  8 +++---
 lib/bbdev/rte_bbdev_op.h                      |  8 +++---
 lib/bbdev/rte_bbdev_pmd.h                     |  8 +++---
 lib/bpf/bpf_def.h                             |  8 +++---
 lib/compressdev/rte_comp.h                    |  4 +--
 lib/compressdev/rte_compressdev.h             |  6 ++---
 lib/compressdev/rte_compressdev_internal.h    |  8 +++---
 lib/compressdev/rte_compressdev_pmd.h         |  8 +++---
 lib/cryptodev/cryptodev_pmd.h                 |  8 +++---
 lib/cryptodev/cryptodev_trace.h               |  8 +++---
 lib/cryptodev/rte_crypto.h                    |  8 +++---
 lib/cryptodev/rte_crypto_asym.h               |  8 +++---
 lib/cryptodev/rte_crypto_sym.h                |  8 +++---
 lib/cryptodev/rte_cryptodev.h                 |  8 +++---
 lib/cryptodev/rte_cryptodev_trace_fp.h        |  4 +--
 lib/dispatcher/rte_dispatcher.h               |  8 +++---
 lib/dmadev/rte_dmadev.h                       |  8 ++++++
 lib/eal/arm/include/rte_atomic_32.h           |  4 +--
 lib/eal/arm/include/rte_atomic_64.h           |  8 +++---
 lib/eal/arm/include/rte_byteorder.h           |  8 +++---
 lib/eal/arm/include/rte_cpuflags_32.h         |  8 +++---
 lib/eal/arm/include/rte_cpuflags_64.h         |  8 +++---
 lib/eal/arm/include/rte_cycles_32.h           |  4 +--
 lib/eal/arm/include/rte_cycles_64.h           |  4 +--
 lib/eal/arm/include/rte_io.h                  |  8 +++---
 lib/eal/arm/include/rte_io_64.h               |  8 +++---
 lib/eal/arm/include/rte_memcpy_32.h           |  8 +++---
 lib/eal/arm/include/rte_memcpy_64.h           |  8 +++---
 lib/eal/arm/include/rte_pause.h               |  8 +++---
 lib/eal/arm/include/rte_pause_32.h            |  6 ++---
 lib/eal/arm/include/rte_pause_64.h            |  8 +++---
 lib/eal/arm/include/rte_power_intrinsics.h    |  8 +++---
 lib/eal/arm/include/rte_prefetch_32.h         |  8 +++---
 lib/eal/arm/include/rte_prefetch_64.h         |  8 +++---
 lib/eal/arm/include/rte_rwlock.h              |  4 +--
 lib/eal/arm/include/rte_spinlock.h            |  6 ++---
 lib/eal/freebsd/include/rte_os.h              |  8 +++---
 lib/eal/include/bus_driver.h                  |  8 +++---
 lib/eal/include/dev_driver.h                  |  6 ++---
 lib/eal/include/eal_trace_internal.h          |  8 +++---
 lib/eal/include/generic/rte_atomic.h          |  8 ++++++
 lib/eal/include/generic/rte_byteorder.h       |  8 ++++++
 lib/eal/include/generic/rte_cpuflags.h        |  8 ++++++
 lib/eal/include/generic/rte_cycles.h          |  8 ++++++
 lib/eal/include/generic/rte_io.h              |  8 ++++++
 lib/eal/include/generic/rte_memcpy.h          |  8 ++++++
 lib/eal/include/generic/rte_pause.h           |  8 ++++++
 .../include/generic/rte_power_intrinsics.h    |  8 ++++++
 lib/eal/include/generic/rte_prefetch.h        |  8 ++++++
 lib/eal/include/generic/rte_rwlock.h          |  8 +++---
 lib/eal/include/generic/rte_spinlock.h        |  8 ++++++
 lib/eal/include/generic/rte_vect.h            |  8 ++++++
 lib/eal/include/rte_alarm.h                   |  4 +--
 lib/eal/include/rte_bitmap.h                  |  8 +++---
 lib/eal/include/rte_bus.h                     |  8 +++---
 lib/eal/include/rte_class.h                   |  4 +--
 lib/eal/include/rte_common.h                  |  8 +++---
 lib/eal/include/rte_dev.h                     |  8 +++---
 lib/eal/include/rte_devargs.h                 |  8 +++---
 lib/eal/include/rte_eal_trace.h               |  4 +--
 lib/eal/include/rte_errno.h                   |  4 +--
 lib/eal/include/rte_fbarray.h                 |  8 +++---
 lib/eal/include/rte_keepalive.h               |  6 ++---
 lib/eal/include/rte_mcslock.h                 |  8 +++---
 lib/eal/include/rte_memory.h                  |  8 +++---
 lib/eal/include/rte_pci_dev_features.h        |  4 +--
 lib/eal/include/rte_pflock.h                  |  8 +++---
 lib/eal/include/rte_random.h                  |  4 +--
 lib/eal/include/rte_seqcount.h                |  8 +++---
 lib/eal/include/rte_seqlock.h                 |  8 +++---
 lib/eal/include/rte_service.h                 |  8 +++---
 lib/eal/include/rte_service_component.h       |  4 +--
 lib/eal/include/rte_stdatomic.h               |  5 +---
 lib/eal/include/rte_string_fns.h              | 17 +++++++++----
 lib/eal/include/rte_tailq.h                   |  6 ++---
 lib/eal/include/rte_ticketlock.h              |  8 +++---
 lib/eal/include/rte_time.h                    |  6 ++---
 lib/eal/include/rte_trace.h                   |  8 +++---
 lib/eal/include/rte_trace_point.h             |  8 +++---
 lib/eal/include/rte_trace_point_register.h    |  8 +++---
 lib/eal/include/rte_uuid.h                    |  8 +++---
 lib/eal/include/rte_version.h                 |  6 ++---
 lib/eal/include/rte_vfio.h                    |  8 +++---
 lib/eal/linux/include/rte_os.h                |  8 +++---
 lib/eal/loongarch/include/rte_atomic.h        |  6 ++---
 lib/eal/loongarch/include/rte_byteorder.h     |  4 +--
 lib/eal/loongarch/include/rte_cpuflags.h      |  8 +++---
 lib/eal/loongarch/include/rte_cycles.h        |  4 +--
 lib/eal/loongarch/include/rte_io.h            |  4 +--
 lib/eal/loongarch/include/rte_memcpy.h        |  4 +--
 lib/eal/loongarch/include/rte_pause.h         |  8 +++---
 .../loongarch/include/rte_power_intrinsics.h  |  8 +++---
 lib/eal/loongarch/include/rte_prefetch.h      |  8 +++---
 lib/eal/loongarch/include/rte_rwlock.h        |  4 +--
 lib/eal/loongarch/include/rte_spinlock.h      |  6 ++---
 lib/eal/ppc/include/rte_atomic.h              |  6 ++---
 lib/eal/ppc/include/rte_byteorder.h           |  6 ++---
 lib/eal/ppc/include/rte_cpuflags.h            |  8 +++---
 lib/eal/ppc/include/rte_cycles.h              |  8 +++---
 lib/eal/ppc/include/rte_io.h                  |  4 +--
 lib/eal/ppc/include/rte_memcpy.h              |  4 +--
 lib/eal/ppc/include/rte_pause.h               |  8 +++---
 lib/eal/ppc/include/rte_power_intrinsics.h    |  8 +++---
 lib/eal/ppc/include/rte_prefetch.h            |  8 +++---
 lib/eal/ppc/include/rte_rwlock.h              |  4 +--
 lib/eal/ppc/include/rte_spinlock.h            |  8 +++---
 lib/eal/riscv/include/rte_atomic.h            |  8 +++---
 lib/eal/riscv/include/rte_byteorder.h         |  8 +++---
 lib/eal/riscv/include/rte_cpuflags.h          |  8 +++---
 lib/eal/riscv/include/rte_cycles.h            |  4 +--
 lib/eal/riscv/include/rte_io.h                |  4 +--
 lib/eal/riscv/include/rte_memcpy.h            |  4 +--
 lib/eal/riscv/include/rte_pause.h             |  8 +++---
 lib/eal/riscv/include/rte_power_intrinsics.h  |  8 +++---
 lib/eal/riscv/include/rte_prefetch.h          |  8 +++---
 lib/eal/riscv/include/rte_rwlock.h            |  4 +--
 lib/eal/riscv/include/rte_spinlock.h          |  6 ++---
 lib/eal/windows/include/pthread.h             |  6 ++---
 lib/eal/windows/include/regex.h               |  8 +++---
 lib/eal/windows/include/rte_windows.h         |  8 +++---
 lib/eal/x86/include/rte_atomic.h              | 25 +++++++++++++------
 lib/eal/x86/include/rte_byteorder.h           | 16 ++++++------
 lib/eal/x86/include/rte_cpuflags.h            |  8 +++---
 lib/eal/x86/include/rte_cycles.h              |  8 +++---
 lib/eal/x86/include/rte_io.h                  |  8 +++---
 lib/eal/x86/include/rte_pause.h               |  7 +++---
 lib/eal/x86/include/rte_power_intrinsics.h    |  8 +++---
 lib/eal/x86/include/rte_prefetch.h            |  8 +++---
 lib/eal/x86/include/rte_rwlock.h              |  6 ++---
 lib/eal/x86/include/rte_spinlock.h            |  9 +++----
 lib/ethdev/ethdev_driver.h                    |  8 +++---
 lib/ethdev/ethdev_pci.h                       |  8 +++---
 lib/ethdev/ethdev_trace.h                     |  8 +++---
 lib/ethdev/ethdev_vdev.h                      |  8 +++---
 lib/ethdev/rte_cman.h                         |  4 +--
 lib/ethdev/rte_dev_info.h                     |  4 +--
 lib/ethdev/rte_ethdev.h                       |  8 +++---
 lib/ethdev/rte_ethdev_trace_fp.h              |  4 +--
 lib/eventdev/event_timer_adapter_pmd.h        |  4 +--
 lib/eventdev/eventdev_pmd.h                   |  8 +++---
 lib/eventdev/eventdev_pmd_pci.h               |  8 +++---
 lib/eventdev/eventdev_pmd_vdev.h              |  8 +++---
 lib/eventdev/eventdev_trace.h                 |  8 +++---
 lib/eventdev/rte_event_crypto_adapter.h       |  8 +++---
 lib/eventdev/rte_event_eth_rx_adapter.h       |  8 +++---
 lib/eventdev/rte_event_eth_tx_adapter.h       |  8 +++---
 lib/eventdev/rte_event_ring.h                 |  8 +++---
 lib/eventdev/rte_event_timer_adapter.h        |  8 +++---
 lib/eventdev/rte_eventdev.h                   |  8 +++---
 lib/eventdev/rte_eventdev_trace_fp.h          |  4 +--
 lib/graph/rte_graph_model_mcore_dispatch.h    |  8 +++---
 lib/graph/rte_graph_worker.h                  |  6 ++---
 lib/gso/rte_gso.h                             |  6 ++---
 lib/hash/rte_fbk_hash.h                       |  8 +++---
 lib/hash/rte_hash_crc.h                       |  8 +++---
 lib/hash/rte_jhash.h                          |  8 +++---
 lib/hash/rte_thash.h                          |  8 +++---
 lib/hash/rte_thash_gfni.h                     |  8 +++---
 lib/ip_frag/rte_ip_frag.h                     |  8 +++---
 lib/ipsec/rte_ipsec.h                         |  8 +++---
 lib/log/rte_log.h                             |  8 +++---
 lib/lpm/rte_lpm.h                             |  8 +++---
 lib/member/rte_member.h                       |  8 +++---
 lib/member/rte_member_sketch.h                |  6 ++---
 lib/member/rte_member_sketch_avx512.h         |  8 +++---
 lib/member/rte_member_x86.h                   |  4 +--
 lib/member/rte_xxh64_avx512.h                 |  6 ++---
 lib/mempool/mempool_trace.h                   |  8 +++---
 lib/mempool/rte_mempool_trace_fp.h            |  4 +--
 lib/meter/rte_meter.h                         |  8 +++---
 lib/mldev/mldev_utils.h                       |  8 +++---
 lib/mldev/rte_mldev_core.h                    |  8 +++---
 lib/mldev/rte_mldev_pmd.h                     |  8 +++---
 lib/net/rte_ether.h                           |  8 +++---
 lib/net/rte_net.h                             |  8 +++---
 lib/net/rte_sctp.h                            |  8 +++---
 lib/node/rte_node_eth_api.h                   |  8 +++---
 lib/node/rte_node_ip4_api.h                   |  8 +++---
 lib/node/rte_node_ip6_api.h                   |  6 ++---
 lib/node/rte_node_udp4_input_api.h            |  8 +++---
 lib/pci/rte_pci.h                             |  8 +++---
 lib/pdcp/rte_pdcp.h                           |  8 +++---
 lib/pipeline/rte_pipeline.h                   |  8 +++---
 lib/pipeline/rte_port_in_action.h             |  8 +++---
 lib/pipeline/rte_swx_ctl.h                    |  8 +++---
 lib/pipeline/rte_swx_extern.h                 |  8 +++---
 lib/pipeline/rte_swx_ipsec.h                  |  8 +++---
 lib/pipeline/rte_swx_pipeline.h               |  8 +++---
 lib/pipeline/rte_swx_pipeline_spec.h          |  8 +++---
 lib/pipeline/rte_table_action.h               |  8 +++---
 lib/port/rte_port.h                           |  8 +++---
 lib/port/rte_port_ethdev.h                    |  8 +++---
 lib/port/rte_port_eventdev.h                  |  8 +++---
 lib/port/rte_port_fd.h                        |  8 +++---
 lib/port/rte_port_frag.h                      |  8 +++---
 lib/port/rte_port_ras.h                       |  8 +++---
 lib/port/rte_port_ring.h                      |  8 +++---
 lib/port/rte_port_sched.h                     |  8 +++---
 lib/port/rte_port_source_sink.h               |  8 +++---
 lib/port/rte_port_sym_crypto.h                |  8 +++---
 lib/port/rte_swx_port.h                       |  8 +++---
 lib/port/rte_swx_port_ethdev.h                |  8 +++---
 lib/port/rte_swx_port_fd.h                    |  8 +++---
 lib/port/rte_swx_port_ring.h                  |  8 +++---
 lib/port/rte_swx_port_source_sink.h           |  8 +++---
 lib/rawdev/rte_rawdev.h                       |  6 ++---
 lib/rawdev/rte_rawdev_pmd.h                   |  8 +++---
 lib/rcu/rte_rcu_qsbr.h                        |  8 +++---
 lib/regexdev/rte_regexdev.h                   |  8 +++---
 lib/ring/rte_ring.h                           |  6 ++---
 lib/ring/rte_ring_core.h                      |  8 +++---
 lib/ring/rte_ring_elem.h                      |  8 +++---
 lib/ring/rte_ring_hts.h                       |  4 +--
 lib/ring/rte_ring_peek.h                      |  4 +--
 lib/ring/rte_ring_peek_zc.h                   |  4 +--
 lib/ring/rte_ring_rts.h                       |  4 +--
 lib/sched/rte_approx.h                        |  8 +++---
 lib/sched/rte_pie.h                           |  8 +++---
 lib/sched/rte_red.h                           |  8 +++---
 lib/sched/rte_sched.h                         |  8 +++---
 lib/sched/rte_sched_common.h                  |  6 ++---
 lib/security/rte_security.h                   |  8 +++---
 lib/security/rte_security_driver.h            |  6 ++---
 lib/stack/rte_stack.h                         |  8 +++---
 lib/table/rte_lru.h                           | 12 +++------
 lib/table/rte_lru_arm64.h                     |  8 +++---
 lib/table/rte_lru_x86.h                       |  8 ------
 lib/table/rte_swx_hash_func.h                 |  8 +++---
 lib/table/rte_swx_keycmp.h                    |  8 +++---
 lib/table/rte_swx_table.h                     |  8 +++---
 lib/table/rte_swx_table_em.h                  |  8 +++---
 lib/table/rte_swx_table_learner.h             |  8 +++---
 lib/table/rte_swx_table_selector.h            |  8 +++---
 lib/table/rte_swx_table_wm.h                  |  8 +++---
 lib/table/rte_table.h                         |  8 +++---
 lib/table/rte_table_acl.h                     |  8 +++---
 lib/table/rte_table_array.h                   |  8 +++---
 lib/table/rte_table_hash.h                    |  8 +++---
 lib/table/rte_table_hash_cuckoo.h             |  8 +++---
 lib/table/rte_table_hash_func.h               | 12 ++++++---
 lib/table/rte_table_lpm.h                     |  8 +++---
 lib/table/rte_table_lpm_ipv6.h                |  8 +++---
 lib/table/rte_table_stub.h                    |  8 +++---
 lib/telemetry/rte_telemetry.h                 |  8 +++---
 lib/vhost/rte_vdpa.h                          |  8 +++---
 lib/vhost/rte_vhost.h                         |  8 +++---
 lib/vhost/rte_vhost_async.h                   |  8 +++---
 lib/vhost/rte_vhost_crypto.h                  |  4 +--
 lib/vhost/vdpa_driver.h                       |  8 +++---
 282 files changed, 1081 insertions(+), 980 deletions(-)

diff --git a/app/test/packet_burst_generator.h b/app/test/packet_burst_generator.h
index b99286f50e..cce41bcd0f 100644
--- a/app/test/packet_burst_generator.h
+++ b/app/test/packet_burst_generator.h
@@ -5,10 +5,6 @@
 #ifndef PACKET_BURST_GENERATOR_H_
 #define PACKET_BURST_GENERATOR_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_mbuf.h>
 #include <rte_ether.h>
 #include <rte_arp.h>
@@ -17,6 +13,10 @@ extern "C" {
 #include <rte_tcp.h>
 #include <rte_sctp.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define IPV4_ADDR(a, b, c, d)(((a & 0xff) << 24) | ((b & 0xff) << 16) | \
 		((c & 0xff) << 8) | (d & 0xff))
 
diff --git a/app/test/virtual_pmd.h b/app/test/virtual_pmd.h
index 120b58b273..a5a71d7cb4 100644
--- a/app/test/virtual_pmd.h
+++ b/app/test/virtual_pmd.h
@@ -5,12 +5,12 @@
 #ifndef __VIRTUAL_ETHDEV_H_
 #define __VIRTUAL_ETHDEV_H_
 
+#include <rte_ether.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ether.h>
-
 int
 virtual_ethdev_init(void);
 
diff --git a/drivers/bus/auxiliary/bus_auxiliary_driver.h b/drivers/bus/auxiliary/bus_auxiliary_driver.h
index 58fb7c7f69..40ab1f0912 100644
--- a/drivers/bus/auxiliary/bus_auxiliary_driver.h
+++ b/drivers/bus/auxiliary/bus_auxiliary_driver.h
@@ -11,10 +11,6 @@
  * Auxiliary Bus Interface.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -28,6 +24,10 @@ extern "C" {
 #include <dev_driver.h>
 #include <rte_kvargs.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_BUS_AUXILIARY_NAME "auxiliary"
 
 /* Forward declarations */
diff --git a/drivers/bus/cdx/bus_cdx_driver.h b/drivers/bus/cdx/bus_cdx_driver.h
index 211f8e406b..d390e7b5a1 100644
--- a/drivers/bus/cdx/bus_cdx_driver.h
+++ b/drivers/bus/cdx/bus_cdx_driver.h
@@ -10,10 +10,6 @@
  * AMD CDX bus interface
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdlib.h>
 #include <inttypes.h>
 #include <linux/types.h>
@@ -22,6 +18,10 @@ extern "C" {
 #include <dev_driver.h>
 #include <rte_interrupts.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_cdx_device;
 struct rte_cdx_driver;
diff --git a/drivers/bus/dpaa/include/fsl_qman.h b/drivers/bus/dpaa/include/fsl_qman.h
index c0677976e8..f39007b84d 100644
--- a/drivers/bus/dpaa/include/fsl_qman.h
+++ b/drivers/bus/dpaa/include/fsl_qman.h
@@ -8,14 +8,14 @@
 #ifndef __FSL_QMAN_H
 #define __FSL_QMAN_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <dpaa_rbtree.h>
 #include <rte_compat.h>
 #include <rte_eventdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* FQ lookups (turn this on for 64bit user-space) */
 #ifdef RTE_ARCH_64
 #define CONFIG_FSL_QMAN_FQ_LOOKUP
diff --git a/drivers/bus/fslmc/bus_fslmc_driver.h b/drivers/bus/fslmc/bus_fslmc_driver.h
index 7ac5fe6ff1..3095458133 100644
--- a/drivers/bus/fslmc/bus_fslmc_driver.h
+++ b/drivers/bus/fslmc/bus_fslmc_driver.h
@@ -13,10 +13,6 @@
  * RTE FSLMC Bus Interface
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -40,6 +36,10 @@ extern "C" {
 #include "portal/dpaa2_hw_pvt.h"
 #include "portal/dpaa2_hw_dpio.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define FSLMC_OBJECT_MAX_LEN 32   /**< Length of each device on bus */
 
 #define DPAA2_INVALID_MBUF_SEQN        0
diff --git a/drivers/bus/pci/bus_pci_driver.h b/drivers/bus/pci/bus_pci_driver.h
index be32263a82..2cc1119072 100644
--- a/drivers/bus/pci/bus_pci_driver.h
+++ b/drivers/bus/pci/bus_pci_driver.h
@@ -6,14 +6,14 @@
 #ifndef BUS_PCI_DRIVER_H
 #define BUS_PCI_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus_pci.h>
 #include <dev_driver.h>
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Pathname of PCI devices directory. */
 __rte_internal
 const char *rte_pci_get_sysfs_path(void);
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index a3798cb1cb..19a7b15b99 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -11,10 +11,6 @@
  * PCI device & driver interface
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -27,6 +23,10 @@ extern "C" {
 #include <rte_interrupts.h>
 #include <rte_pci.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_pci_device;
 struct rte_pci_driver;
diff --git a/drivers/bus/platform/bus_platform_driver.h b/drivers/bus/platform/bus_platform_driver.h
index 5ac54fb739..a6f246f7c4 100644
--- a/drivers/bus/platform/bus_platform_driver.h
+++ b/drivers/bus/platform/bus_platform_driver.h
@@ -10,10 +10,6 @@
  * Platform bus interface.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stddef.h>
 #include <stdint.h>
 
@@ -23,6 +19,10 @@ extern "C" {
 #include <rte_os.h>
 #include <rte_vfio.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_platform_bus;
 struct rte_platform_device;
diff --git a/drivers/bus/vdev/bus_vdev_driver.h b/drivers/bus/vdev/bus_vdev_driver.h
index bc7e30d7c6..cba1fb5269 100644
--- a/drivers/bus/vdev/bus_vdev_driver.h
+++ b/drivers/bus/vdev/bus_vdev_driver.h
@@ -5,15 +5,15 @@
 #ifndef BUS_VDEV_DRIVER_H
 #define BUS_VDEV_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus_vdev.h>
 #include <rte_compat.h>
 #include <dev_driver.h>
 #include <rte_devargs.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_vdev_device {
 	RTE_TAILQ_ENTRY(rte_vdev_device) next;      /**< Next attached vdev */
 	struct rte_device device;               /**< Inherit core device */
diff --git a/drivers/bus/vmbus/bus_vmbus_driver.h b/drivers/bus/vmbus/bus_vmbus_driver.h
index e2475a642d..bc394208de 100644
--- a/drivers/bus/vmbus/bus_vmbus_driver.h
+++ b/drivers/bus/vmbus/bus_vmbus_driver.h
@@ -6,14 +6,14 @@
 #ifndef BUS_VMBUS_DRIVER_H
 #define BUS_VMBUS_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus_vmbus.h>
 #include <rte_compat.h>
 #include <dev_driver.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct vmbus_channel;
 struct vmbus_mon_page;
 
diff --git a/drivers/bus/vmbus/rte_bus_vmbus.h b/drivers/bus/vmbus/rte_bus_vmbus.h
index 9467bd8f3d..fd18bca73c 100644
--- a/drivers/bus/vmbus/rte_bus_vmbus.h
+++ b/drivers/bus/vmbus/rte_bus_vmbus.h
@@ -11,10 +11,6 @@
  *
  * VMBUS Interface
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -28,6 +24,10 @@ extern "C" {
 #include <rte_interrupts.h>
 #include <rte_vmbus_reg.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_vmbus_device;
 struct rte_vmbus_driver;
diff --git a/drivers/dma/cnxk/cnxk_dma_event_dp.h b/drivers/dma/cnxk/cnxk_dma_event_dp.h
index 06b5ca8279..8c6cf5dd9a 100644
--- a/drivers/dma/cnxk/cnxk_dma_event_dp.h
+++ b/drivers/dma/cnxk/cnxk_dma_event_dp.h
@@ -5,16 +5,16 @@
 #ifndef _CNXK_DMA_EVENT_DP_H_
 #define _CNXK_DMA_EVENT_DP_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_common.h>
 #include <rte_compat.h>
 #include <rte_eventdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 __rte_internal
 uint16_t cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events);
 
diff --git a/drivers/dma/ioat/ioat_hw_defs.h b/drivers/dma/ioat/ioat_hw_defs.h
index dc3493a78f..11893951f2 100644
--- a/drivers/dma/ioat/ioat_hw_defs.h
+++ b/drivers/dma/ioat/ioat_hw_defs.h
@@ -5,12 +5,12 @@
 #ifndef IOAT_HW_DEFS_H
 #define IOAT_HW_DEFS_H
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 #define IOAT_PCI_CHANERR_INT_OFFSET	0x180
 
 #define IOAT_VER_3_0	0x30
diff --git a/drivers/event/dlb2/rte_pmd_dlb2.h b/drivers/event/dlb2/rte_pmd_dlb2.h
index 334c6c356d..dba7fd2f43 100644
--- a/drivers/event/dlb2/rte_pmd_dlb2.h
+++ b/drivers/event/dlb2/rte_pmd_dlb2.h
@@ -11,14 +11,14 @@
 #ifndef _RTE_PMD_DLB2_H_
 #define _RTE_PMD_DLB2_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
diff --git a/drivers/mempool/dpaa2/rte_dpaa2_mempool.h b/drivers/mempool/dpaa2/rte_dpaa2_mempool.h
index 7fe3d93f61..0286090b1b 100644
--- a/drivers/mempool/dpaa2/rte_dpaa2_mempool.h
+++ b/drivers/mempool/dpaa2/rte_dpaa2_mempool.h
@@ -12,13 +12,13 @@
  *
  */
 
+#include <rte_compat.h>
+#include <rte_mempool.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_compat.h>
-#include <rte_mempool.h>
-
 /**
  * Get BPID corresponding to the packet pool
  *
diff --git a/drivers/net/avp/rte_avp_fifo.h b/drivers/net/avp/rte_avp_fifo.h
index c1658da685..879de3b1c0 100644
--- a/drivers/net/avp/rte_avp_fifo.h
+++ b/drivers/net/avp/rte_avp_fifo.h
@@ -8,10 +8,6 @@
 
 #include "rte_avp_common.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef __KERNEL__
 /* Write memory barrier for kernel compiles */
 #define AVP_WMB() smp_wmb()
@@ -27,6 +23,10 @@ extern "C" {
 #ifndef __KERNEL__
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Initializes the avp fifo structure
  */
diff --git a/drivers/net/bonding/rte_eth_bond.h b/drivers/net/bonding/rte_eth_bond.h
index f10165f2c6..e59ff8793e 100644
--- a/drivers/net/bonding/rte_eth_bond.h
+++ b/drivers/net/bonding/rte_eth_bond.h
@@ -17,12 +17,12 @@
  * load balancing of network ports
  */
 
+#include <rte_ether.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ether.h>
-
 /* Supported modes of operation of link bonding library  */
 
 #define BONDING_MODE_ROUND_ROBIN		(0)
diff --git a/drivers/net/i40e/rte_pmd_i40e.h b/drivers/net/i40e/rte_pmd_i40e.h
index a802f989e9..5af7e2330f 100644
--- a/drivers/net/i40e/rte_pmd_i40e.h
+++ b/drivers/net/i40e/rte_pmd_i40e.h
@@ -14,14 +14,14 @@
  *
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_ethdev.h>
 #include <rte_ether.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Response sent back to i40e driver from user app after callback
  */
diff --git a/drivers/net/mlx5/mlx5_trace.h b/drivers/net/mlx5/mlx5_trace.h
index 888d96f60b..a8f0b372c8 100644
--- a/drivers/net/mlx5/mlx5_trace.h
+++ b/drivers/net/mlx5/mlx5_trace.h
@@ -11,14 +11,14 @@
  * API for mlx5 PMD trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <mlx5_prm.h>
 #include <rte_mbuf.h>
 #include <rte_trace_point.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* TX burst subroutines trace points. */
 RTE_TRACE_POINT_FP(
 	rte_pmd_mlx5_trace_tx_entry,
diff --git a/drivers/net/ring/rte_eth_ring.h b/drivers/net/ring/rte_eth_ring.h
index 59e074d0ad..98292c7b33 100644
--- a/drivers/net/ring/rte_eth_ring.h
+++ b/drivers/net/ring/rte_eth_ring.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_ETH_RING_H_
 #define _RTE_ETH_RING_H_
 
+#include <rte_ring.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring.h>
-
 /**
  * Create a new ethdev port from a set of rings
  *
diff --git a/drivers/net/vhost/rte_eth_vhost.h b/drivers/net/vhost/rte_eth_vhost.h
index 0e68b9f668..6ec59a7adc 100644
--- a/drivers/net/vhost/rte_eth_vhost.h
+++ b/drivers/net/vhost/rte_eth_vhost.h
@@ -5,15 +5,15 @@
 #ifndef _RTE_ETH_VHOST_H_
 #define _RTE_ETH_VHOST_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdbool.h>
 
 #include <rte_vhost.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Event description.
  */
diff --git a/drivers/raw/ifpga/afu_pmd_core.h b/drivers/raw/ifpga/afu_pmd_core.h
index a8f1afe343..abf9e491f7 100644
--- a/drivers/raw/ifpga/afu_pmd_core.h
+++ b/drivers/raw/ifpga/afu_pmd_core.h
@@ -5,10 +5,6 @@
 #ifndef AFU_PMD_CORE_H
 #define AFU_PMD_CORE_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -20,6 +16,10 @@ extern "C" {
 
 #include "ifpga_rawdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define AFU_RAWDEV_MAX_DRVS  32
 
 struct afu_rawdev;
diff --git a/drivers/raw/ifpga/afu_pmd_he_hssi.h b/drivers/raw/ifpga/afu_pmd_he_hssi.h
index aebbe32d54..282289d912 100644
--- a/drivers/raw/ifpga/afu_pmd_he_hssi.h
+++ b/drivers/raw/ifpga/afu_pmd_he_hssi.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_HE_HSSI_H
 #define AFU_PMD_HE_HSSI_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define HE_HSSI_UUID_L    0xbb370242ac130002
 #define HE_HSSI_UUID_H    0x823c334c98bf11ea
 #define NUM_HE_HSSI_PORTS 8
diff --git a/drivers/raw/ifpga/afu_pmd_he_lpbk.h b/drivers/raw/ifpga/afu_pmd_he_lpbk.h
index eab7b55199..67b3653c21 100644
--- a/drivers/raw/ifpga/afu_pmd_he_lpbk.h
+++ b/drivers/raw/ifpga/afu_pmd_he_lpbk.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_HE_LPBK_H
 #define AFU_PMD_HE_LPBK_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define HE_LPBK_UUID_L     0xb94b12284c31e02b
 #define HE_LPBK_UUID_H     0x56e203e9864f49a7
 #define HE_MEM_LPBK_UUID_L 0xbb652a578330a8eb
diff --git a/drivers/raw/ifpga/afu_pmd_he_mem.h b/drivers/raw/ifpga/afu_pmd_he_mem.h
index 998ca92416..41854d8c58 100644
--- a/drivers/raw/ifpga/afu_pmd_he_mem.h
+++ b/drivers/raw/ifpga/afu_pmd_he_mem.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_HE_MEM_H
 #define AFU_PMD_HE_MEM_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define HE_MEM_TG_UUID_L  0xa3dc5b831f5cecbb
 #define HE_MEM_TG_UUID_H  0x4dadea342c7848cb
 
diff --git a/drivers/raw/ifpga/afu_pmd_n3000.h b/drivers/raw/ifpga/afu_pmd_n3000.h
index 403cc64b91..f6b6e07c6b 100644
--- a/drivers/raw/ifpga/afu_pmd_n3000.h
+++ b/drivers/raw/ifpga/afu_pmd_n3000.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_N3000_H
 #define AFU_PMD_N3000_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define N3000_AFU_UUID_L  0xc000c9660d824272
 #define N3000_AFU_UUID_H  0x9aeffe5f84570612
 #define N3000_NLB0_UUID_L 0xf89e433683f9040b
diff --git a/drivers/raw/ifpga/rte_pmd_afu.h b/drivers/raw/ifpga/rte_pmd_afu.h
index 5403ed25f5..0edacc3a9c 100644
--- a/drivers/raw/ifpga/rte_pmd_afu.h
+++ b/drivers/raw/ifpga/rte_pmd_afu.h
@@ -14,12 +14,12 @@
  *
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 #define RTE_PMD_AFU_N3000_NLB   1
 #define RTE_PMD_AFU_N3000_DMA   2
 
diff --git a/drivers/raw/ifpga/rte_pmd_ifpga.h b/drivers/raw/ifpga/rte_pmd_ifpga.h
index 791543f2cd..36b7f9c018 100644
--- a/drivers/raw/ifpga/rte_pmd_ifpga.h
+++ b/drivers/raw/ifpga/rte_pmd_ifpga.h
@@ -14,12 +14,12 @@
  *
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 #define IFPGA_MAX_PORT_NUM   4
 
 /**
diff --git a/examples/ethtool/lib/rte_ethtool.h b/examples/ethtool/lib/rte_ethtool.h
index d27e0102b1..c7dd3d9755 100644
--- a/examples/ethtool/lib/rte_ethtool.h
+++ b/examples/ethtool/lib/rte_ethtool.h
@@ -30,14 +30,14 @@
  * rte_ethtool_net_set_rx_mode      net_device_ops::ndo_set_rx_mode
  *
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_ethdev.h>
 #include <linux/ethtool.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Retrieve the Ethernet device driver information according to
  * attributes described by ethtool data structure, ethtool_drvinfo.
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 04e77a4a10..ea66df0434 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -5,12 +5,12 @@
 #ifndef _MAIN_H_
 #define _MAIN_H_
 
+#include <rte_sched.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_sched.h>
-
 #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
 
 /*
diff --git a/examples/vm_power_manager/channel_manager.h b/examples/vm_power_manager/channel_manager.h
index eb989b20ad..6f70539815 100644
--- a/examples/vm_power_manager/channel_manager.h
+++ b/examples/vm_power_manager/channel_manager.h
@@ -5,16 +5,16 @@
 #ifndef CHANNEL_MANAGER_H_
 #define CHANNEL_MANAGER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <linux/limits.h>
 #include <linux/un.h>
 #include <stdbool.h>
 
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Maximum name length including '\0' terminator */
 #define CHANNEL_MGR_MAX_NAME_LEN    64
 
diff --git a/lib/acl/rte_acl_osdep.h b/lib/acl/rte_acl_osdep.h
index 3c1dc402ca..e4c7d07c69 100644
--- a/lib/acl/rte_acl_osdep.h
+++ b/lib/acl/rte_acl_osdep.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_ACL_OSDEP_H_
 #define _RTE_ACL_OSDEP_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -49,6 +45,10 @@ extern "C" {
 #include <rte_cpuflags.h>
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h
index 0cbfdd1c95..9e83dd2bb0 100644
--- a/lib/bbdev/rte_bbdev.h
+++ b/lib/bbdev/rte_bbdev.h
@@ -20,10 +20,6 @@
  * from the same queue.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdbool.h>
 
@@ -32,6 +28,10 @@ extern "C" {
 
 #include "rte_bbdev_op.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_BBDEV_MAX_DEVS
 #define RTE_BBDEV_MAX_DEVS 128  /**< Max number of devices */
 #endif
diff --git a/lib/bbdev/rte_bbdev_op.h b/lib/bbdev/rte_bbdev_op.h
index 459631d0d0..6f4bae7d0f 100644
--- a/lib/bbdev/rte_bbdev_op.h
+++ b/lib/bbdev/rte_bbdev_op.h
@@ -11,10 +11,6 @@
  * Defines wireless base band layer 1 operations and capabilities
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -23,6 +19,10 @@ extern "C" {
 #include <rte_memory.h>
 #include <rte_mempool.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Number of columns in sub-block interleaver (36.212, section 5.1.4.1.1) */
 #define RTE_BBDEV_TURBO_C_SUBBLOCK (32)
 /* Maximum size of Transport Block (36.213, Table, Table 7.1.7.2.5-1) */
diff --git a/lib/bbdev/rte_bbdev_pmd.h b/lib/bbdev/rte_bbdev_pmd.h
index 442b23943d..0a1738fc05 100644
--- a/lib/bbdev/rte_bbdev_pmd.h
+++ b/lib/bbdev/rte_bbdev_pmd.h
@@ -14,15 +14,15 @@
  * bbdev interface. User applications should not use this API.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_log.h>
 
 #include "rte_bbdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Suggested value for SW based devices */
 #define RTE_BBDEV_DEFAULT_MAX_NB_QUEUES RTE_MAX_LCORE
 
diff --git a/lib/bpf/bpf_def.h b/lib/bpf/bpf_def.h
index f08cd9106b..9f2e162914 100644
--- a/lib/bpf/bpf_def.h
+++ b/lib/bpf/bpf_def.h
@@ -7,10 +7,6 @@
 #ifndef _RTE_BPF_DEF_H_
 #define _RTE_BPF_DEF_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -25,6 +21,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 
 /*
  * The instruction encodings.
diff --git a/lib/compressdev/rte_comp.h b/lib/compressdev/rte_comp.h
index 830a240b6b..d66a4b1cb9 100644
--- a/lib/compressdev/rte_comp.h
+++ b/lib/compressdev/rte_comp.h
@@ -11,12 +11,12 @@
  * RTE definitions for Data Compression Service
  */
 
+#include <rte_mbuf.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_mbuf.h>
-
 /**
  * compression service feature flags
  *
diff --git a/lib/compressdev/rte_compressdev.h b/lib/compressdev/rte_compressdev.h
index e0294a18bd..b3392553a6 100644
--- a/lib/compressdev/rte_compressdev.h
+++ b/lib/compressdev/rte_compressdev.h
@@ -13,13 +13,13 @@
  * Defines comp device APIs for the provisioning of compression operations.
  */
 
+
+#include "rte_comp.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-
-#include "rte_comp.h"
-
 /**
  * Parameter log base 2 range description.
  * Final value will be 2^value.
diff --git a/lib/compressdev/rte_compressdev_internal.h b/lib/compressdev/rte_compressdev_internal.h
index 67f8b51a37..a980d74cbf 100644
--- a/lib/compressdev/rte_compressdev_internal.h
+++ b/lib/compressdev/rte_compressdev_internal.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_COMPRESSDEV_INTERNAL_H_
 #define _RTE_COMPRESSDEV_INTERNAL_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /* rte_compressdev_internal.h
  * This file holds Compressdev private data structures.
  */
@@ -16,6 +12,10 @@ extern "C" {
 
 #include "rte_comp.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_COMPRESSDEV_NAME_MAX_LEN	(64)
 /**< Max length of name of comp PMD */
 
diff --git a/lib/compressdev/rte_compressdev_pmd.h b/lib/compressdev/rte_compressdev_pmd.h
index 32e29c9d16..ea721f014d 100644
--- a/lib/compressdev/rte_compressdev_pmd.h
+++ b/lib/compressdev/rte_compressdev_pmd.h
@@ -13,10 +13,6 @@
  * them directly.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <dev_driver.h>
@@ -24,6 +20,10 @@ extern "C" {
 #include "rte_compressdev.h"
 #include "rte_compressdev_internal.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_COMPRESSDEV_PMD_NAME_ARG			("name")
 #define RTE_COMPRESSDEV_PMD_SOCKET_ID_ARG		("socket_id")
 
diff --git a/lib/cryptodev/cryptodev_pmd.h b/lib/cryptodev/cryptodev_pmd.h
index 6c114f7181..3e2e2673b8 100644
--- a/lib/cryptodev/cryptodev_pmd.h
+++ b/lib/cryptodev/cryptodev_pmd.h
@@ -5,10 +5,6 @@
 #ifndef _CRYPTODEV_PMD_H_
 #define _CRYPTODEV_PMD_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Crypto PMD APIs
  *
@@ -28,6 +24,10 @@ extern "C" {
 #include "rte_crypto.h"
 #include "rte_cryptodev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 
 #define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_QUEUE_PAIRS	8
 
diff --git a/lib/cryptodev/cryptodev_trace.h b/lib/cryptodev/cryptodev_trace.h
index 935f0d564b..e186f0f3c1 100644
--- a/lib/cryptodev/cryptodev_trace.h
+++ b/lib/cryptodev/cryptodev_trace.h
@@ -11,14 +11,14 @@
  * API for cryptodev trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_trace_point.h>
 
 #include "rte_cryptodev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_cryptodev_trace_configure,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id,
diff --git a/lib/cryptodev/rte_crypto.h b/lib/cryptodev/rte_crypto.h
index dbc2700da5..dcf4a36fb2 100644
--- a/lib/cryptodev/rte_crypto.h
+++ b/lib/cryptodev/rte_crypto.h
@@ -11,10 +11,6 @@
  * RTE Cryptography Common Definitions
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 
 #include <rte_mbuf.h>
 #include <rte_memory.h>
@@ -24,6 +20,10 @@ extern "C" {
 #include "rte_crypto_sym.h"
 #include "rte_crypto_asym.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Crypto operation types */
 enum rte_crypto_op_type {
 	RTE_CRYPTO_OP_TYPE_UNDEFINED,
diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h
index 39d3da3952..4b7ea36961 100644
--- a/lib/cryptodev/rte_crypto_asym.h
+++ b/lib/cryptodev/rte_crypto_asym.h
@@ -14,10 +14,6 @@
  * asymmetric crypto operations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 #include <stdint.h>
 
@@ -27,6 +23,10 @@ extern "C" {
 
 #include "rte_crypto_sym.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_cryptodev_asym_session;
 
 /** asym key exchange operation type name strings */
diff --git a/lib/cryptodev/rte_crypto_sym.h b/lib/cryptodev/rte_crypto_sym.h
index 53b18b9412..fb73024010 100644
--- a/lib/cryptodev/rte_crypto_sym.h
+++ b/lib/cryptodev/rte_crypto_sym.h
@@ -14,10 +14,6 @@
  * as supported symmetric crypto operation combinations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <rte_compat.h>
@@ -26,6 +22,10 @@ extern "C" {
 #include <rte_mempool.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Crypto IO Vector (in analogy with struct iovec)
  * Supposed be used to pass input/output data buffers for crypto data-path
diff --git a/lib/cryptodev/rte_cryptodev.h b/lib/cryptodev/rte_cryptodev.h
index bec947f6d5..8051c5a6a3 100644
--- a/lib/cryptodev/rte_cryptodev.h
+++ b/lib/cryptodev/rte_cryptodev.h
@@ -14,10 +14,6 @@
  * authentication operations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include "rte_kvargs.h"
 #include "rte_crypto.h"
@@ -1859,6 +1855,10 @@ int rte_cryptodev_remove_deq_callback(uint8_t dev_id,
 				      struct rte_cryptodev_cb *cb);
 
 #include <rte_cryptodev_core.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 /**
  *
  * Dequeue a burst of processed crypto operations from a queue on the crypto
diff --git a/lib/cryptodev/rte_cryptodev_trace_fp.h b/lib/cryptodev/rte_cryptodev_trace_fp.h
index dbfbc7b2e5..f23f882804 100644
--- a/lib/cryptodev/rte_cryptodev_trace_fp.h
+++ b/lib/cryptodev/rte_cryptodev_trace_fp.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_CRYPTODEV_TRACE_FP_H_
 #define _RTE_CRYPTODEV_TRACE_FP_H_
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_cryptodev_trace_enqueue_burst,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id, uint16_t qp_id, void **ops,
diff --git a/lib/dispatcher/rte_dispatcher.h b/lib/dispatcher/rte_dispatcher.h
index d8182d5f2c..ba2c353073 100644
--- a/lib/dispatcher/rte_dispatcher.h
+++ b/lib/dispatcher/rte_dispatcher.h
@@ -19,16 +19,16 @@
  * event device.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdint.h>
 
 #include <rte_compat.h>
 #include <rte_eventdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Function prototype for match callbacks.
  *
diff --git a/lib/dmadev/rte_dmadev.h b/lib/dmadev/rte_dmadev.h
index 5474a5281d..d174d325a1 100644
--- a/lib/dmadev/rte_dmadev.h
+++ b/lib/dmadev/rte_dmadev.h
@@ -772,9 +772,17 @@ struct rte_dma_sge {
 	uint32_t length; /**< The DMA operation length. */
 };
 
+#ifdef __cplusplus
+}
+#endif
+
 #include "rte_dmadev_core.h"
 #include "rte_dmadev_trace_fp.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**@{@name DMA operation flag
  * @see rte_dma_copy()
  * @see rte_dma_copy_sg()
diff --git a/lib/eal/arm/include/rte_atomic_32.h b/lib/eal/arm/include/rte_atomic_32.h
index 62fc33773d..0b9a0dfa30 100644
--- a/lib/eal/arm/include/rte_atomic_32.h
+++ b/lib/eal/arm/include/rte_atomic_32.h
@@ -9,12 +9,12 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include "generic/rte_atomic.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_atomic.h"
-
 #define	rte_mb()  __sync_synchronize()
 
 #define	rte_wmb() do { asm volatile ("dmb st" : : : "memory"); } while (0)
diff --git a/lib/eal/arm/include/rte_atomic_64.h b/lib/eal/arm/include/rte_atomic_64.h
index 7c99fc0a02..181bb60929 100644
--- a/lib/eal/arm/include/rte_atomic_64.h
+++ b/lib/eal/arm/include/rte_atomic_64.h
@@ -10,14 +10,14 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_atomic.h"
 #include <rte_branch_prediction.h>
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define rte_mb() asm volatile("dmb osh" : : : "memory")
 
 #define rte_wmb() asm volatile("dmb oshst" : : : "memory")
diff --git a/lib/eal/arm/include/rte_byteorder.h b/lib/eal/arm/include/rte_byteorder.h
index ff02052f2e..a0aaff4a28 100644
--- a/lib/eal/arm/include/rte_byteorder.h
+++ b/lib/eal/arm/include/rte_byteorder.h
@@ -9,14 +9,14 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include "generic/rte_byteorder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* ARM architecture is bi-endian (both big and little). */
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 
diff --git a/lib/eal/arm/include/rte_cpuflags_32.h b/lib/eal/arm/include/rte_cpuflags_32.h
index 770b09b99d..7e33acd9fb 100644
--- a/lib/eal/arm/include/rte_cpuflags_32.h
+++ b/lib/eal/arm/include/rte_cpuflags_32.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_CPUFLAGS_ARM32_H_
 #define _RTE_CPUFLAGS_ARM32_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -46,6 +42,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_cpuflags_64.h b/lib/eal/arm/include/rte_cpuflags_64.h
index afe70209c3..f84633159e 100644
--- a/lib/eal/arm/include/rte_cpuflags_64.h
+++ b/lib/eal/arm/include/rte_cpuflags_64.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_CPUFLAGS_ARM64_H_
 #define _RTE_CPUFLAGS_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -40,6 +36,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_cycles_32.h b/lib/eal/arm/include/rte_cycles_32.h
index 859cd2e5bb..2b20c8c6f5 100644
--- a/lib/eal/arm/include/rte_cycles_32.h
+++ b/lib/eal/arm/include/rte_cycles_32.h
@@ -15,12 +15,12 @@
 
 #include <time.h>
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 /**
  * Read the time base register.
  *
diff --git a/lib/eal/arm/include/rte_cycles_64.h b/lib/eal/arm/include/rte_cycles_64.h
index 8b05302f47..bb76e4d7e0 100644
--- a/lib/eal/arm/include/rte_cycles_64.h
+++ b/lib/eal/arm/include/rte_cycles_64.h
@@ -6,12 +6,12 @@
 #ifndef _RTE_CYCLES_ARM64_H_
 #define _RTE_CYCLES_ARM64_H_
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 /** Read generic counter frequency */
 static __rte_always_inline uint64_t
 __rte_arm64_cntfrq(void)
diff --git a/lib/eal/arm/include/rte_io.h b/lib/eal/arm/include/rte_io.h
index f4e66e6bad..ca1a353bed 100644
--- a/lib/eal/arm/include/rte_io.h
+++ b/lib/eal/arm/include/rte_io.h
@@ -5,16 +5,16 @@
 #ifndef _RTE_IO_ARM_H_
 #define _RTE_IO_ARM_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_ARCH_64
 #include "rte_io_64.h"
 #else
 #include "generic/rte_io.h"
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_io_64.h b/lib/eal/arm/include/rte_io_64.h
index 96da7789ce..88db82a7eb 100644
--- a/lib/eal/arm/include/rte_io_64.h
+++ b/lib/eal/arm/include/rte_io_64.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_IO_ARM64_H_
 #define _RTE_IO_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #define RTE_OVERRIDE_IO_H
@@ -17,6 +13,10 @@ extern "C" {
 #include <rte_compat.h>
 #include "rte_atomic_64.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static __rte_always_inline uint8_t
 rte_read8_relaxed(const volatile void *addr)
 {
diff --git a/lib/eal/arm/include/rte_memcpy_32.h b/lib/eal/arm/include/rte_memcpy_32.h
index fb3245b59c..99fd5757ca 100644
--- a/lib/eal/arm/include/rte_memcpy_32.h
+++ b/lib/eal/arm/include/rte_memcpy_32.h
@@ -8,10 +8,6 @@
 #include <stdint.h>
 #include <string.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_memcpy.h"
 
 #ifdef RTE_ARCH_ARM_NEON_MEMCPY
@@ -23,6 +19,10 @@ extern "C" {
 /* ARM NEON Intrinsics are used to copy data */
 #include <arm_neon.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
diff --git a/lib/eal/arm/include/rte_memcpy_64.h b/lib/eal/arm/include/rte_memcpy_64.h
index 85ad587bd3..c7d0c345ad 100644
--- a/lib/eal/arm/include/rte_memcpy_64.h
+++ b/lib/eal/arm/include/rte_memcpy_64.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_MEMCPY_ARM64_H_
 #define _RTE_MEMCPY_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <string.h>
 
@@ -18,6 +14,10 @@ extern "C" {
 #include <rte_common.h>
 #include <rte_branch_prediction.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * The memory copy performance differs on different AArch64 micro-architectures.
  * And the most recent glibc (e.g. 2.23 or later) can provide a better memcpy()
diff --git a/lib/eal/arm/include/rte_pause.h b/lib/eal/arm/include/rte_pause.h
index 6c7002ad98..b8a3d64b3a 100644
--- a/lib/eal/arm/include/rte_pause.h
+++ b/lib/eal/arm/include/rte_pause.h
@@ -5,16 +5,16 @@
 #ifndef _RTE_PAUSE_ARM_H_
 #define _RTE_PAUSE_ARM_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_ARCH_64
 #include <rte_pause_64.h>
 #else
 #include <rte_pause_32.h>
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_pause_32.h b/lib/eal/arm/include/rte_pause_32.h
index d4768c7a98..7870fac763 100644
--- a/lib/eal/arm/include/rte_pause_32.h
+++ b/lib/eal/arm/include/rte_pause_32.h
@@ -5,13 +5,13 @@
 #ifndef _RTE_PAUSE_ARM32_H_
 #define _RTE_PAUSE_ARM32_H_
 
+#include <rte_common.h>
+#include "generic/rte_pause.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_pause.h"
-
 static inline void rte_pause(void)
 {
 }
diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index 9e2dbf3531..1526bf87cc 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -6,10 +6,6 @@
 #ifndef _RTE_PAUSE_ARM64_H_
 #define _RTE_PAUSE_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_stdatomic.h>
 
@@ -19,6 +15,10 @@ extern "C" {
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 	asm volatile("yield" ::: "memory");
diff --git a/lib/eal/arm/include/rte_power_intrinsics.h b/lib/eal/arm/include/rte_power_intrinsics.h
index 9e498e9ebf..5481f45ad3 100644
--- a/lib/eal/arm/include/rte_power_intrinsics.h
+++ b/lib/eal/arm/include/rte_power_intrinsics.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_POWER_INTRINSIC_ARM_H_
 #define _RTE_POWER_INTRINSIC_ARM_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_prefetch_32.h b/lib/eal/arm/include/rte_prefetch_32.h
index 0e9a140c8a..619bf27c79 100644
--- a/lib/eal/arm/include/rte_prefetch_32.h
+++ b/lib/eal/arm/include/rte_prefetch_32.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_PREFETCH_ARM32_H_
 #define _RTE_PREFETCH_ARM32_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	asm volatile ("pld [%0]" : : "r" (p));
diff --git a/lib/eal/arm/include/rte_prefetch_64.h b/lib/eal/arm/include/rte_prefetch_64.h
index 22cba48e29..4f60123b8b 100644
--- a/lib/eal/arm/include/rte_prefetch_64.h
+++ b/lib/eal/arm/include/rte_prefetch_64.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_PREFETCH_ARM_64_H_
 #define _RTE_PREFETCH_ARM_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	asm volatile ("PRFM PLDL1KEEP, [%0]" : : "r" (p));
diff --git a/lib/eal/arm/include/rte_rwlock.h b/lib/eal/arm/include/rte_rwlock.h
index 18bb37b036..727cabafec 100644
--- a/lib/eal/arm/include/rte_rwlock.h
+++ b/lib/eal/arm/include/rte_rwlock.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_RWLOCK_ARM_H_
 #define _RTE_RWLOCK_ARM_H_
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/arm/include/rte_spinlock.h b/lib/eal/arm/include/rte_spinlock.h
index a973763c23..a5d01b0d21 100644
--- a/lib/eal/arm/include/rte_spinlock.h
+++ b/lib/eal/arm/include/rte_spinlock.h
@@ -9,13 +9,13 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_spinlock.h"
-
 static inline int rte_tm_supported(void)
 {
 	return 0;
diff --git a/lib/eal/freebsd/include/rte_os.h b/lib/eal/freebsd/include/rte_os.h
index 003468caff..f31f6af12d 100644
--- a/lib/eal/freebsd/include/rte_os.h
+++ b/lib/eal/freebsd/include/rte_os.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_OS_H_
 #define _RTE_OS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * This header should contain any definition
  * which is not supported natively or named differently in FreeBSD.
@@ -17,6 +13,10 @@ extern "C" {
 #include <pthread_np.h>
 #include <sys/queue.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* These macros are compatible with system's sys/queue.h. */
 #define RTE_TAILQ_HEAD(name, type) TAILQ_HEAD(name, type)
 #define RTE_TAILQ_ENTRY(type) TAILQ_ENTRY(type)
diff --git a/lib/eal/include/bus_driver.h b/lib/eal/include/bus_driver.h
index 7b85a17a09..60527b75b6 100644
--- a/lib/eal/include/bus_driver.h
+++ b/lib/eal/include/bus_driver.h
@@ -5,16 +5,16 @@
 #ifndef BUS_DRIVER_H
 #define BUS_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus.h>
 #include <rte_compat.h>
 #include <rte_dev.h>
 #include <rte_eal.h>
 #include <rte_tailq.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_devargs;
 struct rte_device;
 
diff --git a/lib/eal/include/dev_driver.h b/lib/eal/include/dev_driver.h
index 5efa8c437e..f7a9c17dc3 100644
--- a/lib/eal/include/dev_driver.h
+++ b/lib/eal/include/dev_driver.h
@@ -5,13 +5,13 @@
 #ifndef DEV_DRIVER_H
 #define DEV_DRIVER_H
 
+#include <rte_common.h>
+#include <rte_dev.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_dev.h>
-
 /**
  * A structure describing a device driver.
  */
diff --git a/lib/eal/include/eal_trace_internal.h b/lib/eal/include/eal_trace_internal.h
index 09c354717f..50f91d0929 100644
--- a/lib/eal/include/eal_trace_internal.h
+++ b/lib/eal/include/eal_trace_internal.h
@@ -11,16 +11,16 @@
  * API for EAL trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_alarm.h>
 #include <rte_interrupts.h>
 #include <rte_trace_point.h>
 
 #include "eal_interrupts.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Alarm */
 RTE_TRACE_POINT(
 	rte_eal_trace_alarm_set,
diff --git a/lib/eal/include/generic/rte_atomic.h b/lib/eal/include/generic/rte_atomic.h
index f859707744..0a4f3f8528 100644
--- a/lib/eal/include/generic/rte_atomic.h
+++ b/lib/eal/include/generic/rte_atomic.h
@@ -17,6 +17,10 @@
 #include <rte_common.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __DOXYGEN__
 
 /** @name Memory Barrier
@@ -1156,4 +1160,8 @@ rte_atomic128_cmp_exchange(rte_int128_t *dst,
 
 #endif /* __DOXYGEN__ */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_ATOMIC_H_ */
diff --git a/lib/eal/include/generic/rte_byteorder.h b/lib/eal/include/generic/rte_byteorder.h
index f1c04ba83e..7973d6326f 100644
--- a/lib/eal/include/generic/rte_byteorder.h
+++ b/lib/eal/include/generic/rte_byteorder.h
@@ -24,6 +24,10 @@
 #include <rte_common.h>
 #include <rte_config.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Compile-time endianness detection
  */
@@ -251,4 +255,8 @@ static uint64_t rte_be_to_cpu_64(rte_be64_t x);
 #endif
 #endif
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_BYTEORDER_H_ */
diff --git a/lib/eal/include/generic/rte_cpuflags.h b/lib/eal/include/generic/rte_cpuflags.h
index d35551e931..bfe9df4516 100644
--- a/lib/eal/include/generic/rte_cpuflags.h
+++ b/lib/eal/include/generic/rte_cpuflags.h
@@ -15,6 +15,10 @@
 
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Structure used to describe platform-specific intrinsics that may or may not
  * be supported at runtime.
@@ -104,4 +108,8 @@ rte_cpu_getauxval(unsigned long type);
 int
 rte_cpu_strcmp_auxval(unsigned long type, const char *str);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_CPUFLAGS_H_ */
diff --git a/lib/eal/include/generic/rte_cycles.h b/lib/eal/include/generic/rte_cycles.h
index 075e899f5a..7cfd51f0eb 100644
--- a/lib/eal/include/generic/rte_cycles.h
+++ b/lib/eal/include/generic/rte_cycles.h
@@ -16,6 +16,10 @@
 #include <rte_debug.h>
 #include <rte_atomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MS_PER_S 1000
 #define US_PER_S 1000000
 #define NS_PER_S 1000000000
@@ -175,4 +179,8 @@ void rte_delay_us_sleep(unsigned int us);
  */
 void rte_delay_us_callback_register(void(*userfunc)(unsigned int));
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_CYCLES_H_ */
diff --git a/lib/eal/include/generic/rte_io.h b/lib/eal/include/generic/rte_io.h
index ebcf8051e1..73b0f7a9f4 100644
--- a/lib/eal/include/generic/rte_io.h
+++ b/lib/eal/include/generic/rte_io.h
@@ -17,6 +17,10 @@
 #include <rte_compat.h>
 #include <rte_atomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __DOXYGEN__
 
 /**
@@ -396,4 +400,8 @@ rte_write32_wc_relaxed(uint32_t value, volatile void *addr)
 
 #endif /* RTE_OVERRIDE_IO_H */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_IO_H_ */
diff --git a/lib/eal/include/generic/rte_memcpy.h b/lib/eal/include/generic/rte_memcpy.h
index e7f0f8eaa9..da53b72ca8 100644
--- a/lib/eal/include/generic/rte_memcpy.h
+++ b/lib/eal/include/generic/rte_memcpy.h
@@ -5,6 +5,10 @@
 #ifndef _RTE_MEMCPY_H_
 #define _RTE_MEMCPY_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  *
@@ -113,4 +117,8 @@ rte_memcpy(void *dst, const void *src, size_t n);
 
 #endif /* __DOXYGEN__ */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_MEMCPY_H_ */
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index f2a1eadcbd..968c0886d3 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -19,6 +19,10 @@
 #include <rte_atomic.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Pause CPU execution for a short while
  *
@@ -136,4 +140,8 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 } while (0)
 #endif /* ! RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_PAUSE_H_ */
diff --git a/lib/eal/include/generic/rte_power_intrinsics.h b/lib/eal/include/generic/rte_power_intrinsics.h
index ea899f1bfa..86c0559468 100644
--- a/lib/eal/include/generic/rte_power_intrinsics.h
+++ b/lib/eal/include/generic/rte_power_intrinsics.h
@@ -9,6 +9,10 @@
 
 #include <rte_spinlock.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  * Advanced power management operations.
@@ -147,4 +151,8 @@ int rte_power_pause(const uint64_t tsc_timestamp);
 int rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
 		const uint32_t num, const uint64_t tsc_timestamp);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_POWER_INTRINSIC_H_ */
diff --git a/lib/eal/include/generic/rte_prefetch.h b/lib/eal/include/generic/rte_prefetch.h
index 773b3b8d1e..f7ac4ab48a 100644
--- a/lib/eal/include/generic/rte_prefetch.h
+++ b/lib/eal/include/generic/rte_prefetch.h
@@ -7,6 +7,10 @@
 
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  *
@@ -146,4 +150,8 @@ __rte_experimental
 static inline void
 rte_cldemote(const volatile void *p);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_PREFETCH_H_ */
diff --git a/lib/eal/include/generic/rte_rwlock.h b/lib/eal/include/generic/rte_rwlock.h
index 5f939be98c..ac0474466a 100644
--- a/lib/eal/include/generic/rte_rwlock.h
+++ b/lib/eal/include/generic/rte_rwlock.h
@@ -22,10 +22,6 @@
  *  https://locklessinc.com/articles/locks/
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <errno.h>
 
 #include <rte_branch_prediction.h>
@@ -34,6 +30,10 @@ extern "C" {
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_rwlock_t type.
  *
diff --git a/lib/eal/include/generic/rte_spinlock.h b/lib/eal/include/generic/rte_spinlock.h
index 23fb04896f..c2980601b2 100644
--- a/lib/eal/include/generic/rte_spinlock.h
+++ b/lib/eal/include/generic/rte_spinlock.h
@@ -25,6 +25,10 @@
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_spinlock_t type.
  */
@@ -318,4 +322,8 @@ __rte_warn_unused_result
 static inline int rte_spinlock_recursive_trylock_tm(
 	rte_spinlock_recursive_t *slr);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_SPINLOCK_H_ */
diff --git a/lib/eal/include/generic/rte_vect.h b/lib/eal/include/generic/rte_vect.h
index 1f84292a41..b87520a4d9 100644
--- a/lib/eal/include/generic/rte_vect.h
+++ b/lib/eal/include/generic/rte_vect.h
@@ -209,6 +209,10 @@ enum rte_vect_max_simd {
 	 */
 };
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Get the supported SIMD bitwidth.
  *
@@ -230,4 +234,8 @@ uint16_t rte_vect_get_max_simd_bitwidth(void);
  */
 int rte_vect_set_max_simd_bitwidth(uint16_t bitwidth);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_VECT_H_ */
diff --git a/lib/eal/include/rte_alarm.h b/lib/eal/include/rte_alarm.h
index 7e4d0b2407..9b4721b77f 100644
--- a/lib/eal/include/rte_alarm.h
+++ b/lib/eal/include/rte_alarm.h
@@ -14,12 +14,12 @@
  * Does not require hpet support.
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /**
  * Signature of callback back function called when an alarm goes off.
  */
diff --git a/lib/eal/include/rte_bitmap.h b/lib/eal/include/rte_bitmap.h
index ebe46000a0..abb102f1d3 100644
--- a/lib/eal/include/rte_bitmap.h
+++ b/lib/eal/include/rte_bitmap.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_BITMAP_H__
 #define __INCLUDE_RTE_BITMAP_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Bitmap
@@ -43,6 +39,10 @@ extern "C" {
 #include <rte_branch_prediction.h>
 #include <rte_prefetch.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Slab */
 #define RTE_BITMAP_SLAB_BIT_SIZE                 64
 #define RTE_BITMAP_SLAB_BIT_SIZE_LOG2            6
diff --git a/lib/eal/include/rte_bus.h b/lib/eal/include/rte_bus.h
index dfe756fb11..519f7b35f0 100644
--- a/lib/eal/include/rte_bus.h
+++ b/lib/eal/include/rte_bus.h
@@ -14,14 +14,14 @@
  * over the devices and drivers in EAL.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_eal.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_bus;
 struct rte_device;
 
diff --git a/lib/eal/include/rte_class.h b/lib/eal/include/rte_class.h
index 16e544ec9a..7631e36e82 100644
--- a/lib/eal/include/rte_class.h
+++ b/lib/eal/include/rte_class.h
@@ -18,12 +18,12 @@
  * cryptographic co-processor (crypto), etc.
  */
 
+#include <rte_dev.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_dev.h>
-
 /** Double linked list of classes */
 RTE_TAILQ_HEAD(rte_class_list, rte_class);
 
diff --git a/lib/eal/include/rte_common.h b/lib/eal/include/rte_common.h
index eec0400dad..2486caa471 100644
--- a/lib/eal/include/rte_common.h
+++ b/lib/eal/include/rte_common.h
@@ -12,10 +12,6 @@
  * for DPDK.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <assert.h>
 #include <limits.h>
 #include <stdint.h>
@@ -26,6 +22,10 @@ extern "C" {
 /* OS specific include */
 #include <rte_os.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_TOOLCHAIN_MSVC
 #ifndef typeof
 #define typeof __typeof__
diff --git a/lib/eal/include/rte_dev.h b/lib/eal/include/rte_dev.h
index cefa04f905..738400e8d1 100644
--- a/lib/eal/include/rte_dev.h
+++ b/lib/eal/include/rte_dev.h
@@ -13,16 +13,16 @@
  * This file manages the list of device drivers.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_config.h>
 #include <rte_common.h>
 #include <rte_log.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_bus;
 struct rte_devargs;
 struct rte_device;
diff --git a/lib/eal/include/rte_devargs.h b/lib/eal/include/rte_devargs.h
index 515e978bbe..ed5a4675d9 100644
--- a/lib/eal/include/rte_devargs.h
+++ b/lib/eal/include/rte_devargs.h
@@ -16,14 +16,14 @@
  * list of rte_devargs structures.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_dev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_bus;
 
 /**
diff --git a/lib/eal/include/rte_eal_trace.h b/lib/eal/include/rte_eal_trace.h
index c3d15bbe5e..9ad2112801 100644
--- a/lib/eal/include/rte_eal_trace.h
+++ b/lib/eal/include/rte_eal_trace.h
@@ -11,12 +11,12 @@
  * API for EAL trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 /* Generic */
 RTE_TRACE_POINT(
 	rte_eal_trace_generic_void,
diff --git a/lib/eal/include/rte_errno.h b/lib/eal/include/rte_errno.h
index ba45591d24..c49818a40e 100644
--- a/lib/eal/include/rte_errno.h
+++ b/lib/eal/include/rte_errno.h
@@ -11,12 +11,12 @@
 #ifndef _RTE_ERRNO_H_
 #define _RTE_ERRNO_H_
 
+#include <rte_per_lcore.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_per_lcore.h>
-
 RTE_DECLARE_PER_LCORE(int, _rte_errno); /**< Per core error number. */
 
 /**
diff --git a/lib/eal/include/rte_fbarray.h b/lib/eal/include/rte_fbarray.h
index e33076778f..27dbfc2d6c 100644
--- a/lib/eal/include/rte_fbarray.h
+++ b/lib/eal/include/rte_fbarray.h
@@ -30,14 +30,14 @@
  * another process is using ``rte_fbarray``.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_rwlock.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_FBARRAY_NAME_LEN 64
 
 struct rte_fbarray {
diff --git a/lib/eal/include/rte_keepalive.h b/lib/eal/include/rte_keepalive.h
index 3ec413da01..9ff870f6b4 100644
--- a/lib/eal/include/rte_keepalive.h
+++ b/lib/eal/include/rte_keepalive.h
@@ -10,13 +10,13 @@
 #ifndef _KEEPALIVE_H_
 #define _KEEPALIVE_H_
 
+#include <rte_config.h>
+#include <rte_memory.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_config.h>
-#include <rte_memory.h>
-
 #ifndef RTE_KEEPALIVE_MAXCORES
 /**
  * Number of cores to track.
diff --git a/lib/eal/include/rte_mcslock.h b/lib/eal/include/rte_mcslock.h
index 0aeb1a09f4..bb218d2e50 100644
--- a/lib/eal/include/rte_mcslock.h
+++ b/lib/eal/include/rte_mcslock.h
@@ -19,16 +19,16 @@
  * they acquired the lock.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_lcore.h>
 #include <rte_common.h>
 #include <rte_pause.h>
 #include <rte_branch_prediction.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_mcslock_t type.
  */
diff --git a/lib/eal/include/rte_memory.h b/lib/eal/include/rte_memory.h
index 842362d527..dbd0a6bedc 100644
--- a/lib/eal/include/rte_memory.h
+++ b/lib/eal/include/rte_memory.h
@@ -15,16 +15,16 @@
 #include <stddef.h>
 #include <stdio.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bitops.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include <rte_eal_memconfig.h>
 #include <rte_fbarray.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_PGSIZE_4K   (1ULL << 12)
 #define RTE_PGSIZE_64K  (1ULL << 16)
 #define RTE_PGSIZE_256K (1ULL << 18)
diff --git a/lib/eal/include/rte_pci_dev_features.h b/lib/eal/include/rte_pci_dev_features.h
index ee6e10590c..bc6d3d4c1f 100644
--- a/lib/eal/include/rte_pci_dev_features.h
+++ b/lib/eal/include/rte_pci_dev_features.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_PCI_DEV_FEATURES_H
 #define _RTE_PCI_DEV_FEATURES_H
 
+#include <rte_pci_dev_feature_defs.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_pci_dev_feature_defs.h>
-
 #define RTE_INTR_MODE_NONE_NAME "none"
 #define RTE_INTR_MODE_LEGACY_NAME "legacy"
 #define RTE_INTR_MODE_MSI_NAME "msi"
diff --git a/lib/eal/include/rte_pflock.h b/lib/eal/include/rte_pflock.h
index 37aa223ac3..6797ce5920 100644
--- a/lib/eal/include/rte_pflock.h
+++ b/lib/eal/include/rte_pflock.h
@@ -27,14 +27,14 @@
  * All locks must be initialised before use, and only initialised once.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_pflock_t type.
  */
diff --git a/lib/eal/include/rte_random.h b/lib/eal/include/rte_random.h
index 5031c6fe5f..15cbe6215a 100644
--- a/lib/eal/include/rte_random.h
+++ b/lib/eal/include/rte_random.h
@@ -11,12 +11,12 @@
  * Pseudo-random Generators in RTE
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /**
  * Seed the pseudo-random generator.
  *
diff --git a/lib/eal/include/rte_seqcount.h b/lib/eal/include/rte_seqcount.h
index 88a6746900..d71afa6ab7 100644
--- a/lib/eal/include/rte_seqcount.h
+++ b/lib/eal/include/rte_seqcount.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_SEQCOUNT_H_
 #define _RTE_SEQCOUNT_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Seqcount
@@ -27,6 +23,10 @@ extern "C" {
 #include <rte_branch_prediction.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The RTE seqcount type.
  */
diff --git a/lib/eal/include/rte_seqlock.h b/lib/eal/include/rte_seqlock.h
index 2677bd9440..e0e94900d1 100644
--- a/lib/eal/include/rte_seqlock.h
+++ b/lib/eal/include/rte_seqlock.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_SEQLOCK_H_
 #define _RTE_SEQLOCK_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Seqlock
@@ -95,6 +91,10 @@ extern "C" {
 #include <rte_seqcount.h>
 #include <rte_spinlock.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The RTE seqlock type.
  */
diff --git a/lib/eal/include/rte_service.h b/lib/eal/include/rte_service.h
index e49a7a877e..94919ae584 100644
--- a/lib/eal/include/rte_service.h
+++ b/lib/eal/include/rte_service.h
@@ -23,16 +23,16 @@
  * application has access to the remaining lcores as normal.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include<stdio.h>
 #include <stdint.h>
 
 #include <rte_config.h>
 #include <rte_lcore.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_SERVICE_NAME_MAX 32
 
 /* Capabilities of a service.
diff --git a/lib/eal/include/rte_service_component.h b/lib/eal/include/rte_service_component.h
index a5350c97e5..acdf45cf60 100644
--- a/lib/eal/include/rte_service_component.h
+++ b/lib/eal/include/rte_service_component.h
@@ -10,12 +10,12 @@
  * operate, and you wish to run the component using service cores
  */
 
+#include <rte_service.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_service.h>
-
 /**
  * Signature of callback function to run a service.
  *
diff --git a/lib/eal/include/rte_stdatomic.h b/lib/eal/include/rte_stdatomic.h
index 7a081cb500..0f11a15e4e 100644
--- a/lib/eal/include/rte_stdatomic.h
+++ b/lib/eal/include/rte_stdatomic.h
@@ -7,10 +7,6 @@
 
 #include <assert.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_ENABLE_STDATOMIC
 #ifndef _MSC_VER
 #ifdef __STDC_NO_ATOMICS__
@@ -188,6 +184,7 @@ typedef int rte_memory_order;
 #endif
 
 #ifdef __cplusplus
+extern "C" {
 }
 #endif
 
diff --git a/lib/eal/include/rte_string_fns.h b/lib/eal/include/rte_string_fns.h
index 13badec7b3..702bd81251 100644
--- a/lib/eal/include/rte_string_fns.h
+++ b/lib/eal/include/rte_string_fns.h
@@ -11,10 +11,6 @@
 #ifndef _RTE_STRING_FNS_H_
 #define _RTE_STRING_FNS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <ctype.h>
 #include <stdio.h>
 #include <string.h>
@@ -22,6 +18,10 @@ extern "C" {
 #include <rte_common.h>
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Takes string "string" parameter and splits it at character "delim"
  * up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like
@@ -77,6 +77,10 @@ rte_strlcat(char *dst, const char *src, size_t size)
 	return l + strlen(src);
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 /* pull in a strlcpy function */
 #ifdef RTE_EXEC_ENV_FREEBSD
 #ifndef __BSD_VISIBLE /* non-standard functions are hidden */
@@ -95,6 +99,10 @@ rte_strlcat(char *dst, const char *src, size_t size)
 #endif /* RTE_USE_LIBBSD */
 #endif /* FREEBSD */
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Copy string src to buffer dst of size dsize.
  * At most dsize-1 chars will be copied.
@@ -141,7 +149,6 @@ rte_str_skip_leading_spaces(const char *src)
 	return p;
 }
 
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/include/rte_tailq.h b/lib/eal/include/rte_tailq.h
index 931d549e59..89f7ef2134 100644
--- a/lib/eal/include/rte_tailq.h
+++ b/lib/eal/include/rte_tailq.h
@@ -10,13 +10,13 @@
  *  Here defines rte_tailq APIs for only internal use
  */
 
+#include <stdio.h>
+#include <rte_debug.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdio.h>
-#include <rte_debug.h>
-
 /** dummy structure type used by the rte_tailq APIs */
 struct rte_tailq_entry {
 	RTE_TAILQ_ENTRY(rte_tailq_entry) next; /**< Pointer entries for a tailq list */
diff --git a/lib/eal/include/rte_ticketlock.h b/lib/eal/include/rte_ticketlock.h
index 73884eb07b..e60f60699c 100644
--- a/lib/eal/include/rte_ticketlock.h
+++ b/lib/eal/include/rte_ticketlock.h
@@ -17,15 +17,15 @@
  * All locks must be initialised before use, and only initialised once.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_lcore.h>
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_ticketlock_t type.
  */
diff --git a/lib/eal/include/rte_time.h b/lib/eal/include/rte_time.h
index ec25f7b93d..c5c3a233e4 100644
--- a/lib/eal/include/rte_time.h
+++ b/lib/eal/include/rte_time.h
@@ -5,13 +5,13 @@
 #ifndef _RTE_TIME_H_
 #define _RTE_TIME_H_
 
+#include <stdint.h>
+#include <time.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include <time.h>
-
 #define NSEC_PER_SEC             1000000000L
 
 /**
diff --git a/lib/eal/include/rte_trace.h b/lib/eal/include/rte_trace.h
index a6e991fad3..1c824b2158 100644
--- a/lib/eal/include/rte_trace.h
+++ b/lib/eal/include/rte_trace.h
@@ -16,16 +16,16 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdio.h>
 
 #include <rte_common.h>
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  *  Test if trace is enabled.
  *
diff --git a/lib/eal/include/rte_trace_point.h b/lib/eal/include/rte_trace_point.h
index 41e2a7f99e..bc737d585e 100644
--- a/lib/eal/include/rte_trace_point.h
+++ b/lib/eal/include/rte_trace_point.h
@@ -16,10 +16,6 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdio.h>
 
@@ -32,6 +28,10 @@ extern "C" {
 #include <rte_string_fns.h>
 #include <rte_uuid.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** The tracepoint object. */
 typedef RTE_ATOMIC(uint64_t) rte_trace_point_t;
 
diff --git a/lib/eal/include/rte_trace_point_register.h b/lib/eal/include/rte_trace_point_register.h
index 41260e5964..8726338fe4 100644
--- a/lib/eal/include/rte_trace_point_register.h
+++ b/lib/eal/include/rte_trace_point_register.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_TRACE_POINT_REGISTER_H_
 #define _RTE_TRACE_POINT_REGISTER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef _RTE_TRACE_POINT_H_
 #error for registration, include this file first before <rte_trace_point.h>
 #endif
@@ -16,6 +12,10 @@ extern "C" {
 #include <rte_per_lcore.h>
 #include <rte_trace_point.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_DECLARE_PER_LCORE(volatile int, trace_point_sz);
 
 #define RTE_TRACE_POINT_REGISTER(trace, name) \
diff --git a/lib/eal/include/rte_uuid.h b/lib/eal/include/rte_uuid.h
index cfefd4308a..def5907a00 100644
--- a/lib/eal/include/rte_uuid.h
+++ b/lib/eal/include/rte_uuid.h
@@ -10,14 +10,14 @@
 #ifndef _RTE_UUID_H_
 #define _RTE_UUID_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stddef.h>
 #include <string.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Struct describing a Universal Unique Identifier
  */
diff --git a/lib/eal/include/rte_version.h b/lib/eal/include/rte_version.h
index 422d00fdff..be3f753617 100644
--- a/lib/eal/include/rte_version.h
+++ b/lib/eal/include/rte_version.h
@@ -10,13 +10,13 @@
 #ifndef _RTE_VERSION_H_
 #define _RTE_VERSION_H_
 
+#include <string.h>
+#include <stdio.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <string.h>
-#include <stdio.h>
-
 /**
  * Macro to compute a version number usable for comparisons
  */
diff --git a/lib/eal/include/rte_vfio.h b/lib/eal/include/rte_vfio.h
index b774625d9f..923293040b 100644
--- a/lib/eal/include/rte_vfio.h
+++ b/lib/eal/include/rte_vfio.h
@@ -10,10 +10,6 @@
  * RTE VFIO. This library provides various VFIO related utility functions.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdint.h>
 
@@ -32,6 +28,10 @@ extern "C" {
 #endif /* kernel version >= 4.0.0 */
 #endif /* RTE_EAL_VFIO */
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef VFIO_PRESENT
 
 #include <linux/vfio.h>
diff --git a/lib/eal/linux/include/rte_os.h b/lib/eal/linux/include/rte_os.h
index c72bf5b7e6..dba0e29827 100644
--- a/lib/eal/linux/include/rte_os.h
+++ b/lib/eal/linux/include/rte_os.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_OS_H_
 #define _RTE_OS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * This header should contain any definition
  * which is not supported natively or named differently in Linux.
@@ -17,6 +13,10 @@ extern "C" {
 #include <sched.h>
 #include <sys/queue.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* These macros are compatible with system's sys/queue.h. */
 #define RTE_TAILQ_HEAD(name, type) TAILQ_HEAD(name, type)
 #define RTE_TAILQ_ENTRY(type) TAILQ_ENTRY(type)
diff --git a/lib/eal/loongarch/include/rte_atomic.h b/lib/eal/loongarch/include/rte_atomic.h
index 0510b8f781..c8066a4612 100644
--- a/lib/eal/loongarch/include/rte_atomic.h
+++ b/lib/eal/loongarch/include/rte_atomic.h
@@ -9,13 +9,13 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include <rte_common.h>
+#include "generic/rte_atomic.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_atomic.h"
-
 #define rte_mb()	do { asm volatile("dbar 0":::"memory"); } while (0)
 
 #define rte_wmb()	rte_mb()
diff --git a/lib/eal/loongarch/include/rte_byteorder.h b/lib/eal/loongarch/include/rte_byteorder.h
index 0da6097a4f..9b092e2a59 100644
--- a/lib/eal/loongarch/include/rte_byteorder.h
+++ b/lib/eal/loongarch/include/rte_byteorder.h
@@ -5,12 +5,12 @@
 #ifndef RTE_BYTEORDER_LOONGARCH_H
 #define RTE_BYTEORDER_LOONGARCH_H
 
+#include "generic/rte_byteorder.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_byteorder.h"
-
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 
 #define rte_cpu_to_le_16(x) (x)
diff --git a/lib/eal/loongarch/include/rte_cpuflags.h b/lib/eal/loongarch/include/rte_cpuflags.h
index 6b592c147c..c1e04ac545 100644
--- a/lib/eal/loongarch/include/rte_cpuflags.h
+++ b/lib/eal/loongarch/include/rte_cpuflags.h
@@ -5,10 +5,6 @@
 #ifndef RTE_CPUFLAGS_LOONGARCH_H
 #define RTE_CPUFLAGS_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -30,6 +26,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/loongarch/include/rte_cycles.h b/lib/eal/loongarch/include/rte_cycles.h
index f612d1ad10..128c8646e9 100644
--- a/lib/eal/loongarch/include/rte_cycles.h
+++ b/lib/eal/loongarch/include/rte_cycles.h
@@ -5,12 +5,12 @@
 #ifndef RTE_CYCLES_LOONGARCH_H
 #define RTE_CYCLES_LOONGARCH_H
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 /**
  * Read the time base register.
  *
diff --git a/lib/eal/loongarch/include/rte_io.h b/lib/eal/loongarch/include/rte_io.h
index 40e40efa86..e32a4737b2 100644
--- a/lib/eal/loongarch/include/rte_io.h
+++ b/lib/eal/loongarch/include/rte_io.h
@@ -5,12 +5,12 @@
 #ifndef RTE_IO_LOONGARCH_H
 #define RTE_IO_LOONGARCH_H
 
+#include "generic/rte_io.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_io.h"
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/loongarch/include/rte_memcpy.h b/lib/eal/loongarch/include/rte_memcpy.h
index 22578d40f4..5412a0fdc1 100644
--- a/lib/eal/loongarch/include/rte_memcpy.h
+++ b/lib/eal/loongarch/include/rte_memcpy.h
@@ -10,12 +10,12 @@
 
 #include "rte_common.h"
 
+#include "generic/rte_memcpy.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_memcpy.h"
-
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
diff --git a/lib/eal/loongarch/include/rte_pause.h b/lib/eal/loongarch/include/rte_pause.h
index 4302e1b9be..cffa2874d6 100644
--- a/lib/eal/loongarch/include/rte_pause.h
+++ b/lib/eal/loongarch/include/rte_pause.h
@@ -5,14 +5,14 @@
 #ifndef RTE_PAUSE_LOONGARCH_H
 #define RTE_PAUSE_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_atomic.h"
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 }
diff --git a/lib/eal/loongarch/include/rte_power_intrinsics.h b/lib/eal/loongarch/include/rte_power_intrinsics.h
index d5dbd94567..9e11478206 100644
--- a/lib/eal/loongarch/include/rte_power_intrinsics.h
+++ b/lib/eal/loongarch/include/rte_power_intrinsics.h
@@ -5,14 +5,14 @@
 #ifndef RTE_POWER_INTRINSIC_LOONGARCH_H
 #define RTE_POWER_INTRINSIC_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/loongarch/include/rte_prefetch.h b/lib/eal/loongarch/include/rte_prefetch.h
index 64b1fd2c2a..8da08a5566 100644
--- a/lib/eal/loongarch/include/rte_prefetch.h
+++ b/lib/eal/loongarch/include/rte_prefetch.h
@@ -5,14 +5,14 @@
 #ifndef RTE_PREFETCH_LOONGARCH_H
 #define RTE_PREFETCH_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	__builtin_prefetch((const void *)(uintptr_t)p, 0, 3);
diff --git a/lib/eal/loongarch/include/rte_rwlock.h b/lib/eal/loongarch/include/rte_rwlock.h
index aedc6f3349..48924599c5 100644
--- a/lib/eal/loongarch/include/rte_rwlock.h
+++ b/lib/eal/loongarch/include/rte_rwlock.h
@@ -5,12 +5,12 @@
 #ifndef RTE_RWLOCK_LOONGARCH_H
 #define RTE_RWLOCK_LOONGARCH_H
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/loongarch/include/rte_spinlock.h b/lib/eal/loongarch/include/rte_spinlock.h
index e8d34e9728..38f00f631d 100644
--- a/lib/eal/loongarch/include/rte_spinlock.h
+++ b/lib/eal/loongarch/include/rte_spinlock.h
@@ -5,13 +5,13 @@
 #ifndef RTE_SPINLOCK_LOONGARCH_H
 #define RTE_SPINLOCK_LOONGARCH_H
 
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_spinlock.h"
-
 #ifndef RTE_FORCE_INTRINSICS
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
diff --git a/lib/eal/ppc/include/rte_atomic.h b/lib/eal/ppc/include/rte_atomic.h
index 645c7132df..6ce2e5188a 100644
--- a/lib/eal/ppc/include/rte_atomic.h
+++ b/lib/eal/ppc/include/rte_atomic.h
@@ -12,13 +12,13 @@
 #ifndef _RTE_ATOMIC_PPC_64_H_
 #define _RTE_ATOMIC_PPC_64_H_
 
+#include <stdint.h>
+#include "generic/rte_atomic.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include "generic/rte_atomic.h"
-
 #define	rte_mb()  asm volatile("sync" : : : "memory")
 
 #define	rte_wmb() asm volatile("sync" : : : "memory")
diff --git a/lib/eal/ppc/include/rte_byteorder.h b/lib/eal/ppc/include/rte_byteorder.h
index de94e2ad32..1d19e96f72 100644
--- a/lib/eal/ppc/include/rte_byteorder.h
+++ b/lib/eal/ppc/include/rte_byteorder.h
@@ -8,13 +8,13 @@
 #ifndef _RTE_BYTEORDER_PPC_64_H_
 #define _RTE_BYTEORDER_PPC_64_H_
 
+#include <stdint.h>
+#include "generic/rte_byteorder.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include "generic/rte_byteorder.h"
-
 /*
  * An architecture-optimized byte swap for a 16-bit value.
  *
diff --git a/lib/eal/ppc/include/rte_cpuflags.h b/lib/eal/ppc/include/rte_cpuflags.h
index dedc1ab469..b7bb8f6872 100644
--- a/lib/eal/ppc/include/rte_cpuflags.h
+++ b/lib/eal/ppc/include/rte_cpuflags.h
@@ -6,10 +6,6 @@
 #ifndef _RTE_CPUFLAGS_PPC_64_H_
 #define _RTE_CPUFLAGS_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -52,6 +48,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/ppc/include/rte_cycles.h b/lib/eal/ppc/include/rte_cycles.h
index 666fc9b0bf..1e6e6cccc8 100644
--- a/lib/eal/ppc/include/rte_cycles.h
+++ b/lib/eal/ppc/include/rte_cycles.h
@@ -6,10 +6,6 @@
 #ifndef _RTE_CYCLES_PPC_64_H_
 #define _RTE_CYCLES_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <features.h>
 #ifdef __GLIBC__
 #include <sys/platform/ppc.h>
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_byteorder.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Read the time base register.
  *
diff --git a/lib/eal/ppc/include/rte_io.h b/lib/eal/ppc/include/rte_io.h
index 01455065e5..c9371b784e 100644
--- a/lib/eal/ppc/include/rte_io.h
+++ b/lib/eal/ppc/include/rte_io.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_IO_PPC_64_H_
 #define _RTE_IO_PPC_64_H_
 
+#include "generic/rte_io.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_io.h"
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/ppc/include/rte_memcpy.h b/lib/eal/ppc/include/rte_memcpy.h
index 6f388c0234..eae73128c4 100644
--- a/lib/eal/ppc/include/rte_memcpy.h
+++ b/lib/eal/ppc/include/rte_memcpy.h
@@ -12,12 +12,12 @@
 #include "rte_altivec.h"
 #include "rte_common.h"
 
+#include "generic/rte_memcpy.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_memcpy.h"
-
 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 90000)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"
diff --git a/lib/eal/ppc/include/rte_pause.h b/lib/eal/ppc/include/rte_pause.h
index 16e47ce22f..78a73aceed 100644
--- a/lib/eal/ppc/include/rte_pause.h
+++ b/lib/eal/ppc/include/rte_pause.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_PAUSE_PPC64_H_
 #define _RTE_PAUSE_PPC64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_atomic.h"
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 	/* Set hardware multi-threading low priority */
diff --git a/lib/eal/ppc/include/rte_power_intrinsics.h b/lib/eal/ppc/include/rte_power_intrinsics.h
index c0e9ac279f..6207eeb04d 100644
--- a/lib/eal/ppc/include/rte_power_intrinsics.h
+++ b/lib/eal/ppc/include/rte_power_intrinsics.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_POWER_INTRINSIC_PPC_H_
 #define _RTE_POWER_INTRINSIC_PPC_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/ppc/include/rte_prefetch.h b/lib/eal/ppc/include/rte_prefetch.h
index 2e1b5751e0..bae95af7bf 100644
--- a/lib/eal/ppc/include/rte_prefetch.h
+++ b/lib/eal/ppc/include/rte_prefetch.h
@@ -6,14 +6,14 @@
 #ifndef _RTE_PREFETCH_PPC_64_H_
 #define _RTE_PREFETCH_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
diff --git a/lib/eal/ppc/include/rte_rwlock.h b/lib/eal/ppc/include/rte_rwlock.h
index 9fadc04076..bee8da4070 100644
--- a/lib/eal/ppc/include/rte_rwlock.h
+++ b/lib/eal/ppc/include/rte_rwlock.h
@@ -3,12 +3,12 @@
 #ifndef _RTE_RWLOCK_PPC_64_H_
 #define _RTE_RWLOCK_PPC_64_H_
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/ppc/include/rte_spinlock.h b/lib/eal/ppc/include/rte_spinlock.h
index 3a4c905b22..77f90f974a 100644
--- a/lib/eal/ppc/include/rte_spinlock.h
+++ b/lib/eal/ppc/include/rte_spinlock.h
@@ -6,14 +6,14 @@
 #ifndef _RTE_SPINLOCK_PPC_64_H_
 #define _RTE_SPINLOCK_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_pause.h>
 #include "generic/rte_spinlock.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Fixme: Use intrinsics to implement the spinlock on Power architecture */
 
 #ifndef RTE_FORCE_INTRINSICS
diff --git a/lib/eal/riscv/include/rte_atomic.h b/lib/eal/riscv/include/rte_atomic.h
index 2603bc90ea..66346ad474 100644
--- a/lib/eal/riscv/include/rte_atomic.h
+++ b/lib/eal/riscv/include/rte_atomic.h
@@ -12,15 +12,15 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include "generic/rte_atomic.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define rte_mb()	asm volatile("fence rw, rw" : : : "memory")
 
 #define rte_wmb()	asm volatile("fence w, w" : : : "memory")
diff --git a/lib/eal/riscv/include/rte_byteorder.h b/lib/eal/riscv/include/rte_byteorder.h
index 25bd0c275d..c9ff5c0dd1 100644
--- a/lib/eal/riscv/include/rte_byteorder.h
+++ b/lib/eal/riscv/include/rte_byteorder.h
@@ -8,14 +8,14 @@
 #ifndef RTE_BYTEORDER_RISCV_H
 #define RTE_BYTEORDER_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include "generic/rte_byteorder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_BYTE_ORDER
 #define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
 #endif
diff --git a/lib/eal/riscv/include/rte_cpuflags.h b/lib/eal/riscv/include/rte_cpuflags.h
index d742efc40f..ac2004f02d 100644
--- a/lib/eal/riscv/include/rte_cpuflags.h
+++ b/lib/eal/riscv/include/rte_cpuflags.h
@@ -8,10 +8,6 @@
 #ifndef RTE_CPUFLAGS_RISCV_H
 #define RTE_CPUFLAGS_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -46,6 +42,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/riscv/include/rte_cycles.h b/lib/eal/riscv/include/rte_cycles.h
index 04750ca253..7926809a73 100644
--- a/lib/eal/riscv/include/rte_cycles.h
+++ b/lib/eal/riscv/include/rte_cycles.h
@@ -8,12 +8,12 @@
 #ifndef RTE_CYCLES_RISCV_H
 #define RTE_CYCLES_RISCV_H
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 #ifndef RTE_RISCV_RDTSC_USE_HPM
 #define RTE_RISCV_RDTSC_USE_HPM 0
 #endif
diff --git a/lib/eal/riscv/include/rte_io.h b/lib/eal/riscv/include/rte_io.h
index 29659c9590..911dbb6bd2 100644
--- a/lib/eal/riscv/include/rte_io.h
+++ b/lib/eal/riscv/include/rte_io.h
@@ -8,12 +8,12 @@
 #ifndef RTE_IO_RISCV_H
 #define RTE_IO_RISCV_H
 
+#include "generic/rte_io.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_io.h"
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/riscv/include/rte_memcpy.h b/lib/eal/riscv/include/rte_memcpy.h
index e34f19396e..d8a942c5d2 100644
--- a/lib/eal/riscv/include/rte_memcpy.h
+++ b/lib/eal/riscv/include/rte_memcpy.h
@@ -12,12 +12,12 @@
 
 #include "rte_common.h"
 
+#include "generic/rte_memcpy.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_memcpy.h"
-
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
diff --git a/lib/eal/riscv/include/rte_pause.h b/lib/eal/riscv/include/rte_pause.h
index cb8e9ca52d..3f473cd8db 100644
--- a/lib/eal/riscv/include/rte_pause.h
+++ b/lib/eal/riscv/include/rte_pause.h
@@ -7,14 +7,14 @@
 #ifndef RTE_PAUSE_RISCV_H
 #define RTE_PAUSE_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_atomic.h"
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 	/* Insert pause hint directly to be compatible with old compilers.
diff --git a/lib/eal/riscv/include/rte_power_intrinsics.h b/lib/eal/riscv/include/rte_power_intrinsics.h
index 636e58e71f..3f7dba1640 100644
--- a/lib/eal/riscv/include/rte_power_intrinsics.h
+++ b/lib/eal/riscv/include/rte_power_intrinsics.h
@@ -7,14 +7,14 @@
 #ifndef RTE_POWER_INTRINSIC_RISCV_H
 #define RTE_POWER_INTRINSIC_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/riscv/include/rte_prefetch.h b/lib/eal/riscv/include/rte_prefetch.h
index 748cf1b626..42146491ea 100644
--- a/lib/eal/riscv/include/rte_prefetch.h
+++ b/lib/eal/riscv/include/rte_prefetch.h
@@ -8,14 +8,14 @@
 #ifndef RTE_PREFETCH_RISCV_H
 #define RTE_PREFETCH_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	RTE_SET_USED(p);
diff --git a/lib/eal/riscv/include/rte_rwlock.h b/lib/eal/riscv/include/rte_rwlock.h
index 9cdaf1b0ef..730970eecb 100644
--- a/lib/eal/riscv/include/rte_rwlock.h
+++ b/lib/eal/riscv/include/rte_rwlock.h
@@ -7,12 +7,12 @@
 #ifndef RTE_RWLOCK_RISCV_H
 #define RTE_RWLOCK_RISCV_H
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/riscv/include/rte_spinlock.h b/lib/eal/riscv/include/rte_spinlock.h
index 6af430735c..5fe4980e44 100644
--- a/lib/eal/riscv/include/rte_spinlock.h
+++ b/lib/eal/riscv/include/rte_spinlock.h
@@ -12,13 +12,13 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_spinlock.h"
-
 static inline int rte_tm_supported(void)
 {
 	return 0;
diff --git a/lib/eal/windows/include/pthread.h b/lib/eal/windows/include/pthread.h
index 051b9311c2..e1c31017d1 100644
--- a/lib/eal/windows/include/pthread.h
+++ b/lib/eal/windows/include/pthread.h
@@ -13,13 +13,13 @@
  * eal_common_thread.c and common\include\rte_per_lcore.h as Microsoft libc
  * does not contain pthread.h. This may be removed in future releases.
  */
+#include <rte_common.h>
+#include <rte_windows.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_windows.h>
-
 #define PTHREAD_BARRIER_SERIAL_THREAD TRUE
 
 /* defining pthread_t type on Windows since there is no in Microsoft libc*/
diff --git a/lib/eal/windows/include/regex.h b/lib/eal/windows/include/regex.h
index 827f938414..a224c0cd29 100644
--- a/lib/eal/windows/include/regex.h
+++ b/lib/eal/windows/include/regex.h
@@ -10,15 +10,15 @@
  * as Microsoft libc does not contain regex.h. This may be removed in
  * future releases.
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #define REG_NOMATCH 1
 #define REG_ESPACE 12
 
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* defining regex_t for Windows */
 typedef void *regex_t;
 /* defining regmatch_t for Windows */
diff --git a/lib/eal/windows/include/rte_windows.h b/lib/eal/windows/include/rte_windows.h
index 567ed7d820..e78f007ffa 100644
--- a/lib/eal/windows/include/rte_windows.h
+++ b/lib/eal/windows/include/rte_windows.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_WINDOWS_H_
 #define _RTE_WINDOWS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file Windows-specific facilities
  *
@@ -44,6 +40,10 @@ extern "C" {
 #include <devguid.h>
 #include <rte_log.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Log GetLastError() with context, usually a Win32 API function and arguments.
  */
diff --git a/lib/eal/x86/include/rte_atomic.h b/lib/eal/x86/include/rte_atomic.h
index 74b1b24b7a..c72c47c83e 100644
--- a/lib/eal/x86/include/rte_atomic.h
+++ b/lib/eal/x86/include/rte_atomic.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_ATOMIC_X86_H_
 #define _RTE_ATOMIC_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include <rte_config.h>
@@ -31,6 +27,10 @@ extern "C" {
 
 #define rte_smp_rmb() rte_compiler_barrier()
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * From Intel Software Development Manual; Vol 3;
  * 8.2.2 Memory Ordering in P6 and More Recent Processor Families:
@@ -99,10 +99,18 @@ rte_atomic_thread_fence(rte_memory_order memorder)
 		__rte_atomic_thread_fence(memorder);
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 #ifndef RTE_TOOLCHAIN_MSVC
 
 /*------------------------- 16 bit atomic operations -------------------------*/
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_FORCE_INTRINSICS
 static inline int
 rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
@@ -273,6 +281,11 @@ static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 			);
 	return ret != 0;
 }
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif
 
 #ifdef RTE_ARCH_I686
@@ -283,8 +296,4 @@ static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_ATOMIC_X86_H_ */
diff --git a/lib/eal/x86/include/rte_byteorder.h b/lib/eal/x86/include/rte_byteorder.h
index adbec0c157..5a49ffcd50 100644
--- a/lib/eal/x86/include/rte_byteorder.h
+++ b/lib/eal/x86/include/rte_byteorder.h
@@ -5,15 +5,15 @@
 #ifndef _RTE_BYTEORDER_X86_H_
 #define _RTE_BYTEORDER_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include "generic/rte_byteorder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_BYTE_ORDER
 #define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
 #endif
@@ -48,6 +48,10 @@ static inline uint32_t rte_arch_bswap32(uint32_t _x)
 	return x;
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 #define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ?		\
 				   rte_constant_bswap16(x) :		\
 				   rte_arch_bswap16(x)))
@@ -83,8 +87,4 @@ static inline uint32_t rte_arch_bswap32(uint32_t _x)
 #define rte_be_to_cpu_32(x) rte_bswap32(x)
 #define rte_be_to_cpu_64(x) rte_bswap64(x)
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_BYTEORDER_X86_H_ */
diff --git a/lib/eal/x86/include/rte_cpuflags.h b/lib/eal/x86/include/rte_cpuflags.h
index 1ee00e70fe..e843d1e5f4 100644
--- a/lib/eal/x86/include/rte_cpuflags.h
+++ b/lib/eal/x86/include/rte_cpuflags.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_CPUFLAGS_X86_64_H_
 #define _RTE_CPUFLAGS_X86_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 enum rte_cpu_flag_t {
 	/* (EAX 01h) ECX features*/
 	RTE_CPUFLAG_SSE3 = 0,               /**< SSE3 */
@@ -138,6 +134,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/x86/include/rte_cycles.h b/lib/eal/x86/include/rte_cycles.h
index 2afe85e28c..8de43840da 100644
--- a/lib/eal/x86/include/rte_cycles.h
+++ b/lib/eal/x86/include/rte_cycles.h
@@ -12,10 +12,6 @@
 #include <x86intrin.h>
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_cycles.h"
 
 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
@@ -26,6 +22,10 @@ extern int rte_cycles_vmware_tsc_map;
 #include <rte_common.h>
 #include <rte_config.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline uint64_t
 rte_rdtsc(void)
 {
diff --git a/lib/eal/x86/include/rte_io.h b/lib/eal/x86/include/rte_io.h
index 0e1fefdee1..c11cb8cd89 100644
--- a/lib/eal/x86/include/rte_io.h
+++ b/lib/eal/x86/include/rte_io.h
@@ -5,16 +5,16 @@
 #ifndef _RTE_IO_X86_H_
 #define _RTE_IO_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include "rte_cpuflags.h"
 
 #define RTE_NATIVE_WRITE32_WC
 #include "generic/rte_io.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * MOVDIRI wrapper.
diff --git a/lib/eal/x86/include/rte_pause.h b/lib/eal/x86/include/rte_pause.h
index b4cf1df1d0..54f028b295 100644
--- a/lib/eal/x86/include/rte_pause.h
+++ b/lib/eal/x86/include/rte_pause.h
@@ -5,13 +5,14 @@
 #ifndef _RTE_PAUSE_X86_H_
 #define _RTE_PAUSE_X86_H_
 
+#include "generic/rte_pause.h"
+
+#include <emmintrin.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_pause.h"
-
-#include <emmintrin.h>
 static inline void rte_pause(void)
 {
 	_mm_pause();
diff --git a/lib/eal/x86/include/rte_power_intrinsics.h b/lib/eal/x86/include/rte_power_intrinsics.h
index e4c2b87f73..fcb780fc5b 100644
--- a/lib/eal/x86/include/rte_power_intrinsics.h
+++ b/lib/eal/x86/include/rte_power_intrinsics.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_POWER_INTRINSIC_X86_H_
 #define _RTE_POWER_INTRINSIC_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/x86/include/rte_prefetch.h b/lib/eal/x86/include/rte_prefetch.h
index 8a9377714f..34a609cc65 100644
--- a/lib/eal/x86/include/rte_prefetch.h
+++ b/lib/eal/x86/include/rte_prefetch.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_PREFETCH_X86_64_H_
 #define _RTE_PREFETCH_X86_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_TOOLCHAIN_MSVC
 #include <emmintrin.h>
 #endif
@@ -17,6 +13,10 @@ extern "C" {
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 #ifdef RTE_TOOLCHAIN_MSVC
diff --git a/lib/eal/x86/include/rte_rwlock.h b/lib/eal/x86/include/rte_rwlock.h
index 1796b69265..281eff33b9 100644
--- a/lib/eal/x86/include/rte_rwlock.h
+++ b/lib/eal/x86/include/rte_rwlock.h
@@ -5,13 +5,13 @@
 #ifndef _RTE_RWLOCK_X86_64_H_
 #define _RTE_RWLOCK_X86_64_H_
 
+#include "generic/rte_rwlock.h"
+#include "rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-#include "rte_spinlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 	__rte_no_thread_safety_analysis
diff --git a/lib/eal/x86/include/rte_spinlock.h b/lib/eal/x86/include/rte_spinlock.h
index a6c23ea1f6..a14da41964 100644
--- a/lib/eal/x86/include/rte_spinlock.h
+++ b/lib/eal/x86/include/rte_spinlock.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_SPINLOCK_X86_64_H_
 #define _RTE_SPINLOCK_X86_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_spinlock.h"
 #include "rte_rtm.h"
 #include "rte_cpuflags.h"
@@ -17,6 +13,10 @@ extern "C" {
 #include "rte_pause.h"
 #include "rte_cycles.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_RTM_MAX_RETRIES (20)
 #define RTE_XABORT_LOCK_BUSY (0xff)
 
@@ -182,7 +182,6 @@ rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
 	return rte_spinlock_recursive_trylock(slr);
 }
 
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index 883e59a927..ae00ead865 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_ETHDEV_DRIVER_H_
 #define _RTE_ETHDEV_DRIVER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -24,6 +20,10 @@ extern "C" {
 #include <rte_compat.h>
 #include <rte_ethdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Structure used to hold information about the callbacks to be called for a
diff --git a/lib/ethdev/ethdev_pci.h b/lib/ethdev/ethdev_pci.h
index ec4f731270..2229ffa252 100644
--- a/lib/ethdev/ethdev_pci.h
+++ b/lib/ethdev/ethdev_pci.h
@@ -6,16 +6,16 @@
 #ifndef _RTE_ETHDEV_PCI_H_
 #define _RTE_ETHDEV_PCI_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_malloc.h>
 #include <rte_pci.h>
 #include <bus_pci_driver.h>
 #include <rte_config.h>
 #include <ethdev_driver.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Copy pci device info to the Ethernet device data.
  * Shared memory (eth_dev->data) only updated by primary process, so it is safe
diff --git a/lib/ethdev/ethdev_trace.h b/lib/ethdev/ethdev_trace.h
index 3bec87bfdb..36a38f718a 100644
--- a/lib/ethdev/ethdev_trace.h
+++ b/lib/ethdev/ethdev_trace.h
@@ -11,10 +11,6 @@
  * API for ethdev trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <dev_driver.h>
 #include <rte_trace_point.h>
 
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_mtr.h"
 #include "rte_tm.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_ethdev_trace_configure,
 	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t nb_rx_q,
diff --git a/lib/ethdev/ethdev_vdev.h b/lib/ethdev/ethdev_vdev.h
index 364f140f91..010ec75a00 100644
--- a/lib/ethdev/ethdev_vdev.h
+++ b/lib/ethdev/ethdev_vdev.h
@@ -6,15 +6,15 @@
 #ifndef _RTE_ETHDEV_VDEV_H_
 #define _RTE_ETHDEV_VDEV_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_config.h>
 #include <rte_malloc.h>
 #include <bus_vdev_driver.h>
 #include <ethdev_driver.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Allocates a new ethdev slot for an Ethernet device and returns the pointer
diff --git a/lib/ethdev/rte_cman.h b/lib/ethdev/rte_cman.h
index 297db8e095..dedd6cb71a 100644
--- a/lib/ethdev/rte_cman.h
+++ b/lib/ethdev/rte_cman.h
@@ -5,12 +5,12 @@
 #ifndef RTE_CMAN_H
 #define RTE_CMAN_H
 
+#include <rte_bitops.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_bitops.h>
-
 /**
  * @file
  * Congestion management related parameters for DPDK.
diff --git a/lib/ethdev/rte_dev_info.h b/lib/ethdev/rte_dev_info.h
index 67cf0ae526..4fde2ad408 100644
--- a/lib/ethdev/rte_dev_info.h
+++ b/lib/ethdev/rte_dev_info.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_DEV_INFO_H_
 #define _RTE_DEV_INFO_H_
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /*
  * Placeholder for accessing device registers
  */
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 548fada1c7..a75e26bf07 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -145,10 +145,6 @@
  * a 0 value by the receive function of the driver for a given number of tries.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 /* Use this macro to check if LRO API is supported */
@@ -5966,6 +5962,10 @@ int rte_eth_cman_config_get(uint16_t port_id, struct rte_eth_cman_config *config
 
 #include <rte_ethdev_core.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Helper routine for rte_eth_rx_burst().
diff --git a/lib/ethdev/rte_ethdev_trace_fp.h b/lib/ethdev/rte_ethdev_trace_fp.h
index 40b6e4756b..c11b4f18f7 100644
--- a/lib/ethdev/rte_ethdev_trace_fp.h
+++ b/lib/ethdev/rte_ethdev_trace_fp.h
@@ -11,12 +11,12 @@
  * API for ethdev trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_ethdev_trace_rx_burst,
 	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t queue_id,
diff --git a/lib/eventdev/event_timer_adapter_pmd.h b/lib/eventdev/event_timer_adapter_pmd.h
index cd5127f047..fffcd90c8f 100644
--- a/lib/eventdev/event_timer_adapter_pmd.h
+++ b/lib/eventdev/event_timer_adapter_pmd.h
@@ -16,12 +16,12 @@
  * versioning.
  */
 
+#include "rte_event_timer_adapter.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "rte_event_timer_adapter.h"
-
 /*
  * Definitions of functions exported by an event timer adapter implementation
  * through *rte_event_timer_adapter_ops* structure supplied in the
diff --git a/lib/eventdev/eventdev_pmd.h b/lib/eventdev/eventdev_pmd.h
index 7a5699f14b..fd5f7a14f4 100644
--- a/lib/eventdev/eventdev_pmd.h
+++ b/lib/eventdev/eventdev_pmd.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_EVENTDEV_PMD_H_
 #define _RTE_EVENTDEV_PMD_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Event PMD APIs
  *
@@ -31,6 +27,10 @@ extern "C" {
 #include "event_timer_adapter_pmd.h"
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern int rte_event_logtype;
 #define RTE_LOGTYPE_EVENTDEV rte_event_logtype
 
diff --git a/lib/eventdev/eventdev_pmd_pci.h b/lib/eventdev/eventdev_pmd_pci.h
index 26aa3a6635..5cb5916a84 100644
--- a/lib/eventdev/eventdev_pmd_pci.h
+++ b/lib/eventdev/eventdev_pmd_pci.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_EVENTDEV_PMD_PCI_H_
 #define _RTE_EVENTDEV_PMD_PCI_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Eventdev PCI PMD APIs
  *
@@ -28,6 +24,10 @@ extern "C" {
 
 #include "eventdev_pmd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef int (*eventdev_pmd_pci_callback_t)(struct rte_eventdev *dev);
 
 /**
diff --git a/lib/eventdev/eventdev_pmd_vdev.h b/lib/eventdev/eventdev_pmd_vdev.h
index bb433ba955..4eaefa0b0b 100644
--- a/lib/eventdev/eventdev_pmd_vdev.h
+++ b/lib/eventdev/eventdev_pmd_vdev.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_EVENTDEV_PMD_VDEV_H_
 #define _RTE_EVENTDEV_PMD_VDEV_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Eventdev VDEV PMD APIs
  *
@@ -27,6 +23,10 @@ extern "C" {
 
 #include "eventdev_pmd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Creates a new virtual event device and returns the pointer to that device.
diff --git a/lib/eventdev/eventdev_trace.h b/lib/eventdev/eventdev_trace.h
index 9c2b261c06..8ff8841729 100644
--- a/lib/eventdev/eventdev_trace.h
+++ b/lib/eventdev/eventdev_trace.h
@@ -11,10 +11,6 @@
  * API for ethdev trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_trace_point.h>
 
 #include "rte_eventdev.h"
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_event_eth_rx_adapter.h"
 #include "rte_event_timer_adapter.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_eventdev_trace_configure,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id,
diff --git a/lib/eventdev/rte_event_crypto_adapter.h b/lib/eventdev/rte_event_crypto_adapter.h
index e07f159b77..c9b277c664 100644
--- a/lib/eventdev/rte_event_crypto_adapter.h
+++ b/lib/eventdev/rte_event_crypto_adapter.h
@@ -167,14 +167,14 @@
  * from the start of the rte_crypto_op including initialization vector (IV).
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Crypto event adapter mode
  */
diff --git a/lib/eventdev/rte_event_eth_rx_adapter.h b/lib/eventdev/rte_event_eth_rx_adapter.h
index cf42c69b0d..9237e198a7 100644
--- a/lib/eventdev/rte_event_eth_rx_adapter.h
+++ b/lib/eventdev/rte_event_eth_rx_adapter.h
@@ -87,10 +87,6 @@
  * event based so the callback can also modify the event data if it needs to.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -98,6 +94,10 @@ extern "C" {
 
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE 32
 
 /* struct rte_event_eth_rx_adapter_queue_conf flags definitions */
diff --git a/lib/eventdev/rte_event_eth_tx_adapter.h b/lib/eventdev/rte_event_eth_tx_adapter.h
index b38b3fce97..ef01345ac2 100644
--- a/lib/eventdev/rte_event_eth_tx_adapter.h
+++ b/lib/eventdev/rte_event_eth_tx_adapter.h
@@ -76,10 +76,6 @@
  * impact due to a change in how the transmit queue index is specified.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -87,6 +83,10 @@ extern "C" {
 
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Adapter configuration structure
  *
diff --git a/lib/eventdev/rte_event_ring.h b/lib/eventdev/rte_event_ring.h
index f9cf19ae16..5769da269e 100644
--- a/lib/eventdev/rte_event_ring.h
+++ b/lib/eventdev/rte_event_ring.h
@@ -14,10 +14,6 @@
 #ifndef _RTE_EVENT_RING_
 #define _RTE_EVENT_RING_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_common.h>
@@ -25,6 +21,10 @@ extern "C" {
 #include <rte_ring_elem.h>
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_TAILQ_EVENT_RING_NAME "RTE_EVENT_RING"
 
 /**
diff --git a/lib/eventdev/rte_event_timer_adapter.h b/lib/eventdev/rte_event_timer_adapter.h
index 0bd1b30045..256807b3bf 100644
--- a/lib/eventdev/rte_event_timer_adapter.h
+++ b/lib/eventdev/rte_event_timer_adapter.h
@@ -107,14 +107,14 @@
  * All these use cases require high resolution and low time drift.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 
 #include "rte_eventdev.h"
 #include "rte_eventdev_trace_fp.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Timer adapter clock source
  */
diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h
index 08e5f9320b..e5c5b7df64 100644
--- a/lib/eventdev/rte_eventdev.h
+++ b/lib/eventdev/rte_eventdev.h
@@ -237,10 +237,6 @@
  * \endcode
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include <rte_errno.h>
@@ -2469,6 +2465,10 @@ rte_event_vector_pool_create(const char *name, unsigned int n,
 
 #include <rte_eventdev_core.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static __rte_always_inline uint16_t
 __rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
 			  const struct rte_event ev[], uint16_t nb_events,
diff --git a/lib/eventdev/rte_eventdev_trace_fp.h b/lib/eventdev/rte_eventdev_trace_fp.h
index 04d510ad00..8656f1e6e4 100644
--- a/lib/eventdev/rte_eventdev_trace_fp.h
+++ b/lib/eventdev/rte_eventdev_trace_fp.h
@@ -11,12 +11,12 @@
  * API for ethdev trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_eventdev_trace_deq_burst,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id, uint8_t port_id, void *ev_table,
diff --git a/lib/graph/rte_graph_model_mcore_dispatch.h b/lib/graph/rte_graph_model_mcore_dispatch.h
index 732b89297f..f9ff3daa88 100644
--- a/lib/graph/rte_graph_model_mcore_dispatch.h
+++ b/lib/graph/rte_graph_model_mcore_dispatch.h
@@ -12,10 +12,6 @@
  * dispatch model.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_errno.h>
 #include <rte_mempool.h>
 #include <rte_memzone.h>
@@ -23,6 +19,10 @@ extern "C" {
 
 #include "rte_graph_worker_common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_GRAPH_SCHED_WQ_SIZE_MULTIPLIER  8
 #define RTE_GRAPH_SCHED_WQ_SIZE(nb_nodes)   \
 	((typeof(nb_nodes))((nb_nodes) * RTE_GRAPH_SCHED_WQ_SIZE_MULTIPLIER))
diff --git a/lib/graph/rte_graph_worker.h b/lib/graph/rte_graph_worker.h
index 03d0e01b68..b0f952a82c 100644
--- a/lib/graph/rte_graph_worker.h
+++ b/lib/graph/rte_graph_worker.h
@@ -6,13 +6,13 @@
 #ifndef _RTE_GRAPH_WORKER_H_
 #define _RTE_GRAPH_WORKER_H_
 
+#include "rte_graph_model_rtc.h"
+#include "rte_graph_model_mcore_dispatch.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "rte_graph_model_rtc.h"
-#include "rte_graph_model_mcore_dispatch.h"
-
 /**
  * Perform graph walk on the circular buffer and invoke the process function
  * of the nodes and collect the stats.
diff --git a/lib/gso/rte_gso.h b/lib/gso/rte_gso.h
index d60cb65f18..75246989dc 100644
--- a/lib/gso/rte_gso.h
+++ b/lib/gso/rte_gso.h
@@ -10,13 +10,13 @@
  * Interface to GSO library
  */
 
+#include <stdint.h>
+#include <rte_mbuf.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include <rte_mbuf.h>
-
 /* Minimum GSO segment size for TCP based packets. */
 #define RTE_GSO_SEG_SIZE_MIN (sizeof(struct rte_ether_hdr) + \
 		sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_tcp_hdr) + 1)
diff --git a/lib/hash/rte_fbk_hash.h b/lib/hash/rte_fbk_hash.h
index b01126999b..1f0c1d1b6c 100644
--- a/lib/hash/rte_fbk_hash.h
+++ b/lib/hash/rte_fbk_hash.h
@@ -18,15 +18,15 @@
 #include <stdint.h>
 #include <errno.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <rte_hash_crc.h>
 #include <rte_jhash.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_FBK_HASH_INIT_VAL_DEFAULT
 /** Initialising value used when calculating hash. */
 #define RTE_FBK_HASH_INIT_VAL_DEFAULT		0xFFFFFFFF
diff --git a/lib/hash/rte_hash_crc.h b/lib/hash/rte_hash_crc.h
index 8ad2422ec3..fa07c97685 100644
--- a/lib/hash/rte_hash_crc.h
+++ b/lib/hash/rte_hash_crc.h
@@ -11,10 +11,6 @@
  * RTE CRC Hash
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_branch_prediction.h>
@@ -39,6 +35,10 @@ extern uint8_t rte_hash_crc32_alg;
 #include "rte_crc_generic.h"
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
  * calculation.
diff --git a/lib/hash/rte_jhash.h b/lib/hash/rte_jhash.h
index f2446f081e..b70799d209 100644
--- a/lib/hash/rte_jhash.h
+++ b/lib/hash/rte_jhash.h
@@ -11,10 +11,6 @@
  * jhash functions.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <string.h>
 #include <limits.h>
@@ -23,6 +19,10 @@ extern "C" {
 #include <rte_log.h>
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* jhash.h: Jenkins hash support.
  *
  * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
index 30b657e67a..ec9bc57efa 100644
--- a/lib/hash/rte_thash.h
+++ b/lib/hash/rte_thash.h
@@ -15,10 +15,6 @@
  * after GRE header decapsulating)
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_byteorder.h>
@@ -28,6 +24,10 @@ extern "C" {
 
 #if defined(RTE_ARCH_X86) || defined(__ARM_NEON)
 #include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 #endif
 
 #ifdef RTE_ARCH_X86
diff --git a/lib/hash/rte_thash_gfni.h b/lib/hash/rte_thash_gfni.h
index 132f37506d..e82378933c 100644
--- a/lib/hash/rte_thash_gfni.h
+++ b/lib/hash/rte_thash_gfni.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_THASH_GFNI_H_
 #define _RTE_THASH_GFNI_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_log.h>
 
@@ -18,6 +14,10 @@ extern "C" {
 
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Stubs only used when GFNI is not available.
diff --git a/lib/ip_frag/rte_ip_frag.h b/lib/ip_frag/rte_ip_frag.h
index 2ad318096b..84fd717953 100644
--- a/lib/ip_frag/rte_ip_frag.h
+++ b/lib/ip_frag/rte_ip_frag.h
@@ -12,10 +12,6 @@
  * Implementation of IP packet fragmentation and reassembly.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 
@@ -25,6 +21,10 @@ extern "C" {
 #include <rte_ip.h>
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_mbuf;
 
 /** death row size (in packets) */
diff --git a/lib/ipsec/rte_ipsec.h b/lib/ipsec/rte_ipsec.h
index f15f6f2966..28b7a61aea 100644
--- a/lib/ipsec/rte_ipsec.h
+++ b/lib/ipsec/rte_ipsec.h
@@ -17,10 +17,6 @@
 #include <rte_ipsec_sa.h>
 #include <rte_mbuf.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 struct rte_ipsec_session;
 
 /**
@@ -181,6 +177,10 @@ rte_ipsec_telemetry_sa_del(const struct rte_ipsec_sa *sa);
 
 #include <rte_ipsec_group.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/log/rte_log.h b/lib/log/rte_log.h
index f357c59548..3735137150 100644
--- a/lib/log/rte_log.h
+++ b/lib/log/rte_log.h
@@ -13,10 +13,6 @@
  * This file provides a log API to RTE applications.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -26,6 +22,10 @@ extern "C" {
 #include <rte_common.h>
 #include <rte_config.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* SDK log type */
 #define RTE_LOGTYPE_EAL        0 /**< Log related to eal. */
 				 /* was RTE_LOGTYPE_MALLOC */
diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h
index 9c6df311cb..329dc1aad4 100644
--- a/lib/lpm/rte_lpm.h
+++ b/lib/lpm/rte_lpm.h
@@ -391,6 +391,10 @@ static inline void
 rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 	uint32_t defv);
 
+#ifdef __cplusplus
+}
+#endif
+
 #if defined(RTE_ARCH_ARM)
 #ifdef RTE_HAS_SVE_ACLE
 #include "rte_lpm_sve.h"
@@ -407,8 +411,4 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 #include "rte_lpm_scalar.h"
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_LPM_H_ */
diff --git a/lib/member/rte_member.h b/lib/member/rte_member.h
index aec192eba5..109bdd000b 100644
--- a/lib/member/rte_member.h
+++ b/lib/member/rte_member.h
@@ -54,10 +54,6 @@
 #ifndef _RTE_MEMBER_H_
 #define _RTE_MEMBER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdbool.h>
 #include <inttypes.h>
@@ -100,6 +96,10 @@ typedef uint16_t member_set_t;
 #define MEMBER_HASH_FUNC       rte_jhash
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** @internal setsummary structure. */
 struct rte_member_setsum;
 
diff --git a/lib/member/rte_member_sketch.h b/lib/member/rte_member_sketch.h
index 74f24ca223..6a8d5104dd 100644
--- a/lib/member/rte_member_sketch.h
+++ b/lib/member/rte_member_sketch.h
@@ -5,13 +5,13 @@
 #ifndef RTE_MEMBER_SKETCH_H
 #define RTE_MEMBER_SKETCH_H
 
+#include <rte_vect.h>
+#include <rte_ring_elem.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_vect.h>
-#include <rte_ring_elem.h>
-
 #define NUM_ROW_SCALAR 5
 #define INTERVAL (1 << 15)
 
diff --git a/lib/member/rte_member_sketch_avx512.h b/lib/member/rte_member_sketch_avx512.h
index 52666b5b4c..a8ef3b065e 100644
--- a/lib/member/rte_member_sketch_avx512.h
+++ b/lib/member/rte_member_sketch_avx512.h
@@ -5,14 +5,14 @@
 #ifndef RTE_MEMBER_SKETCH_AVX512_H
 #define RTE_MEMBER_SKETCH_AVX512_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_vect.h>
 #include "rte_member.h"
 #include "rte_member_sketch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define NUM_ROW_VEC 8
 
 void
diff --git a/lib/member/rte_member_x86.h b/lib/member/rte_member_x86.h
index d115151f9f..4de453485b 100644
--- a/lib/member/rte_member_x86.h
+++ b/lib/member/rte_member_x86.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_MEMBER_X86_H_
 #define _RTE_MEMBER_X86_H_
 
+#include <x86intrin.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <x86intrin.h>
-
 #if defined(__AVX2__)
 
 static inline int
diff --git a/lib/member/rte_xxh64_avx512.h b/lib/member/rte_xxh64_avx512.h
index ffe6cb79f9..58f896ebb8 100644
--- a/lib/member/rte_xxh64_avx512.h
+++ b/lib/member/rte_xxh64_avx512.h
@@ -5,13 +5,13 @@
 #ifndef RTE_XXH64_AVX512_H
 #define RTE_XXH64_AVX512_H
 
+#include <rte_common.h>
+#include <immintrin.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <immintrin.h>
-
 /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
 static const uint64_t PRIME64_1 = 0x9E3779B185EBCA87ULL;
 /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
diff --git a/lib/mempool/mempool_trace.h b/lib/mempool/mempool_trace.h
index dffef062e4..c595a3116b 100644
--- a/lib/mempool/mempool_trace.h
+++ b/lib/mempool/mempool_trace.h
@@ -11,15 +11,15 @@
  * APIs for mempool trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_mempool.h"
 
 #include <rte_memzone.h>
 #include <rte_trace_point.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_mempool_trace_create,
 	RTE_TRACE_POINT_ARGS(const char *name, uint32_t nb_elts,
diff --git a/lib/mempool/rte_mempool_trace_fp.h b/lib/mempool/rte_mempool_trace_fp.h
index ed060e887c..9c5cdbb291 100644
--- a/lib/mempool/rte_mempool_trace_fp.h
+++ b/lib/mempool/rte_mempool_trace_fp.h
@@ -11,12 +11,12 @@
  * Mempool fast path API for trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_mempool_trace_ops_dequeue_bulk,
 	RTE_TRACE_POINT_ARGS(void *mempool, void **obj_table,
diff --git a/lib/meter/rte_meter.h b/lib/meter/rte_meter.h
index bd68cbe389..e72bf93b3e 100644
--- a/lib/meter/rte_meter.h
+++ b/lib/meter/rte_meter.h
@@ -6,10 +6,6 @@
 #ifndef __INCLUDE_RTE_METER_H__
 #define __INCLUDE_RTE_METER_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Traffic Metering
@@ -22,6 +18,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Application Programmer's Interface (API)
  */
diff --git a/lib/mldev/mldev_utils.h b/lib/mldev/mldev_utils.h
index 5e2a180adc..bf21067d38 100644
--- a/lib/mldev/mldev_utils.h
+++ b/lib/mldev/mldev_utils.h
@@ -5,10 +5,6 @@
 #ifndef RTE_MLDEV_UTILS_H
 #define RTE_MLDEV_UTILS_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_compat.h>
 #include <rte_mldev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  *
diff --git a/lib/mldev/rte_mldev_core.h b/lib/mldev/rte_mldev_core.h
index b3bd281083..8dccf125fc 100644
--- a/lib/mldev/rte_mldev_core.h
+++ b/lib/mldev/rte_mldev_core.h
@@ -16,10 +16,6 @@
  * These APIs are for MLDEV PMDs and library only.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <dev_driver.h>
@@ -27,6 +23,10 @@ extern "C" {
 #include <rte_log.h>
 #include <rte_mldev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Device state */
 #define ML_DEV_DETACHED (0)
 #define ML_DEV_ATTACHED (1)
diff --git a/lib/mldev/rte_mldev_pmd.h b/lib/mldev/rte_mldev_pmd.h
index fd5bbf4360..47c0f23223 100644
--- a/lib/mldev/rte_mldev_pmd.h
+++ b/lib/mldev/rte_mldev_pmd.h
@@ -14,10 +14,6 @@
  * These APIs are for MLDEV PMDs only and user applications should not call them directly.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_common.h>
@@ -25,6 +21,10 @@ extern "C" {
 #include <rte_mldev.h>
 #include <rte_mldev_core.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  *
diff --git a/lib/net/rte_ether.h b/lib/net/rte_ether.h
index 32ed515aef..403e84f50b 100644
--- a/lib/net/rte_ether.h
+++ b/lib/net/rte_ether.h
@@ -11,10 +11,6 @@
  * Ethernet Helpers in RTE
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 
@@ -22,6 +18,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_ETHER_ADDR_LEN  6 /**< Length of Ethernet address. */
 #define RTE_ETHER_TYPE_LEN  2 /**< Length of Ethernet type field. */
 #define RTE_ETHER_CRC_LEN   4 /**< Length of Ethernet CRC. */
diff --git a/lib/net/rte_net.h b/lib/net/rte_net.h
index cdc6cf956d..40ad6a71a1 100644
--- a/lib/net/rte_net.h
+++ b/lib/net/rte_net.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_NET_PTYPE_H_
 #define _RTE_NET_PTYPE_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_ip.h>
 #include <rte_udp.h>
 #include <rte_tcp.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Structure containing header lengths associated to a packet, filled
  * by rte_net_get_ptype().
diff --git a/lib/net/rte_sctp.h b/lib/net/rte_sctp.h
index 965682dc2b..a8ba9e49d8 100644
--- a/lib/net/rte_sctp.h
+++ b/lib/net/rte_sctp.h
@@ -14,14 +14,14 @@
 #ifndef _RTE_SCTP_H_
 #define _RTE_SCTP_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * SCTP Header
  */
diff --git a/lib/node/rte_node_eth_api.h b/lib/node/rte_node_eth_api.h
index 143cf131b3..2b7019f6bb 100644
--- a/lib/node/rte_node_eth_api.h
+++ b/lib/node/rte_node_eth_api.h
@@ -16,15 +16,15 @@
  * and its queue associations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include <rte_graph.h>
 #include <rte_mempool.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Port config for ethdev_rx and ethdev_tx node.
  */
diff --git a/lib/node/rte_node_ip4_api.h b/lib/node/rte_node_ip4_api.h
index 24f8ec843a..950751a525 100644
--- a/lib/node/rte_node_ip4_api.h
+++ b/lib/node/rte_node_ip4_api.h
@@ -15,15 +15,15 @@
  * This API allows to do control path functions of ip4_* nodes
  * like ip4_lookup, ip4_rewrite.
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_compat.h>
 
 #include <rte_graph.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * IP4 lookup next nodes.
  */
diff --git a/lib/node/rte_node_ip6_api.h b/lib/node/rte_node_ip6_api.h
index a538dc2ea7..f467aac7b6 100644
--- a/lib/node/rte_node_ip6_api.h
+++ b/lib/node/rte_node_ip6_api.h
@@ -15,13 +15,13 @@
  * This API allows to do control path functions of ip6_* nodes
  * like ip6_lookup, ip6_rewrite.
  */
+#include <rte_common.h>
+#include <rte_compat.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_compat.h>
-
 /**
  * IP6 lookup next nodes.
  */
diff --git a/lib/node/rte_node_udp4_input_api.h b/lib/node/rte_node_udp4_input_api.h
index c873acbbe0..694660bd6a 100644
--- a/lib/node/rte_node_udp4_input_api.h
+++ b/lib/node/rte_node_udp4_input_api.h
@@ -16,14 +16,14 @@
  * like udp4_input.
  *
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_compat.h>
 
 #include "rte_graph.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 /**
  * UDP4 lookup next nodes.
  */
diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h
index c26fc77209..9a50a12142 100644
--- a/lib/pci/rte_pci.h
+++ b/lib/pci/rte_pci.h
@@ -12,14 +12,14 @@
  * RTE PCI Library
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <inttypes.h>
 #include <sys/types.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Conventional PCI and PCI-X Mode 1 devices have 256 bytes of
  * configuration space.  PCI-X Mode 2 and PCIe devices have 4096 bytes of
diff --git a/lib/pdcp/rte_pdcp.h b/lib/pdcp/rte_pdcp.h
index f74524f83d..15fcbf9607 100644
--- a/lib/pdcp/rte_pdcp.h
+++ b/lib/pdcp/rte_pdcp.h
@@ -19,10 +19,6 @@
 #include <rte_pdcp_hdr.h>
 #include <rte_security.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /* Forward declarations. */
 struct rte_pdcp_entity;
 
@@ -373,6 +369,10 @@ rte_pdcp_t_reordering_expiry_handle(const struct rte_pdcp_entity *entity,
  */
 #include <rte_pdcp_group.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/pipeline/rte_pipeline.h b/lib/pipeline/rte_pipeline.h
index 0c7994b4f2..c9e7172453 100644
--- a/lib/pipeline/rte_pipeline.h
+++ b/lib/pipeline/rte_pipeline.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PIPELINE_H__
 #define __INCLUDE_RTE_PIPELINE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Pipeline
@@ -59,6 +55,10 @@ extern "C" {
 #include <rte_table.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_mbuf;
 
 /*
diff --git a/lib/pipeline/rte_port_in_action.h b/lib/pipeline/rte_port_in_action.h
index ec2994599f..9d17bae988 100644
--- a/lib/pipeline/rte_port_in_action.h
+++ b/lib/pipeline/rte_port_in_action.h
@@ -46,10 +46,6 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -57,6 +53,10 @@ extern "C" {
 
 #include "rte_pipeline.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Input port actions. */
 enum rte_port_in_action_type {
 	/** Filter selected input packets. */
diff --git a/lib/pipeline/rte_swx_ctl.h b/lib/pipeline/rte_swx_ctl.h
index 6ef2551ab5..c4e63753f5 100644
--- a/lib/pipeline/rte_swx_ctl.h
+++ b/lib/pipeline/rte_swx_ctl.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_CTL_H__
 #define __INCLUDE_RTE_SWX_CTL_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Pipeline Control
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_swx_port.h"
 #include "rte_swx_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_swx_pipeline;
 
 /** Name size. */
diff --git a/lib/pipeline/rte_swx_extern.h b/lib/pipeline/rte_swx_extern.h
index e10e963d63..1553fa81ec 100644
--- a/lib/pipeline/rte_swx_extern.h
+++ b/lib/pipeline/rte_swx_extern.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_EXTERN_H__
 #define __INCLUDE_RTE_SWX_EXTERN_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Extern objects and functions
@@ -19,6 +15,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Extern type
  */
diff --git a/lib/pipeline/rte_swx_ipsec.h b/lib/pipeline/rte_swx_ipsec.h
index 7c07fdc739..d2e5abef7d 100644
--- a/lib/pipeline/rte_swx_ipsec.h
+++ b/lib/pipeline/rte_swx_ipsec.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_IPSEC_H__
 #define __INCLUDE_RTE_SWX_IPSEC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Internet Protocol Security (IPsec)
@@ -53,6 +49,10 @@ extern "C" {
 #include <rte_compat.h>
 #include <rte_crypto_sym.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * IPsec Setup API
  */
diff --git a/lib/pipeline/rte_swx_pipeline.h b/lib/pipeline/rte_swx_pipeline.h
index 25df042d3b..882bd4bf6f 100644
--- a/lib/pipeline/rte_swx_pipeline.h
+++ b/lib/pipeline/rte_swx_pipeline.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PIPELINE_H__
 #define __INCLUDE_RTE_SWX_PIPELINE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Pipeline
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_swx_table.h"
 #include "rte_swx_extern.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Name size. */
 #ifndef RTE_SWX_NAME_SIZE
 #define RTE_SWX_NAME_SIZE 64
diff --git a/lib/pipeline/rte_swx_pipeline_spec.h b/lib/pipeline/rte_swx_pipeline_spec.h
index dd88c0bfab..077b407c0a 100644
--- a/lib/pipeline/rte_swx_pipeline_spec.h
+++ b/lib/pipeline/rte_swx_pipeline_spec.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PIPELINE_SPEC_H__
 #define __INCLUDE_RTE_SWX_PIPELINE_SPEC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 
@@ -15,6 +11,10 @@ extern "C" {
 
 #include <rte_swx_pipeline.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * extobj.
  *
diff --git a/lib/pipeline/rte_table_action.h b/lib/pipeline/rte_table_action.h
index 5dffbeb700..bab4bfd2e2 100644
--- a/lib/pipeline/rte_table_action.h
+++ b/lib/pipeline/rte_table_action.h
@@ -52,10 +52,6 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -65,6 +61,10 @@ extern "C" {
 
 #include "rte_pipeline.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Table actions. */
 enum rte_table_action_type {
 	/** Forward to next pipeline table, output port or drop. */
diff --git a/lib/port/rte_port.h b/lib/port/rte_port.h
index 0e30db371e..4b20872537 100644
--- a/lib/port/rte_port.h
+++ b/lib/port/rte_port.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_H__
 #define __INCLUDE_RTE_PORT_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port
@@ -20,6 +16,10 @@ extern "C" {
 #include <stdint.h>
 #include <rte_mbuf.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**@{
  * Macros to allow accessing metadata stored in the mbuf headroom
  * just beyond the end of the mbuf data structure returned by a port
diff --git a/lib/port/rte_port_ethdev.h b/lib/port/rte_port_ethdev.h
index e07021cb89..7729ff0da3 100644
--- a/lib/port/rte_port_ethdev.h
+++ b/lib/port/rte_port_ethdev.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_ETHDEV_H__
 #define __INCLUDE_RTE_PORT_ETHDEV_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Ethernet Device
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ethdev_reader port parameters */
 struct rte_port_ethdev_reader_params {
 	/** NIC RX port ID */
diff --git a/lib/port/rte_port_eventdev.h b/lib/port/rte_port_eventdev.h
index 0efb8e1021..d9eccf07d4 100644
--- a/lib/port/rte_port_eventdev.h
+++ b/lib/port/rte_port_eventdev.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_EVENTDEV_H__
 #define __INCLUDE_RTE_PORT_EVENTDEV_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Eventdev Interface
@@ -24,6 +20,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Eventdev_reader port parameters */
 struct rte_port_eventdev_reader_params {
 	/** Eventdev Device ID */
diff --git a/lib/port/rte_port_fd.h b/lib/port/rte_port_fd.h
index 885b9ada22..40a5e4a426 100644
--- a/lib/port/rte_port_fd.h
+++ b/lib/port/rte_port_fd.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_FD_H__
 #define __INCLUDE_RTE_PORT_FD_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port FD Device
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** fd_reader port parameters */
 struct rte_port_fd_reader_params {
 	/** File descriptor */
diff --git a/lib/port/rte_port_frag.h b/lib/port/rte_port_frag.h
index 4055872e8d..9a10f10523 100644
--- a/lib/port/rte_port_frag.h
+++ b/lib/port/rte_port_frag.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_IP_FRAG_H__
 #define __INCLUDE_RTE_PORT_IP_FRAG_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port for IPv4 Fragmentation
@@ -31,6 +27,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ring_reader_ipv4_frag port parameters */
 struct rte_port_ring_reader_frag_params {
 	/** Underlying single consumer ring that has to be pre-initialized. */
diff --git a/lib/port/rte_port_ras.h b/lib/port/rte_port_ras.h
index 94cfb3ed92..86e36f5362 100644
--- a/lib/port/rte_port_ras.h
+++ b/lib/port/rte_port_ras.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_RAS_H__
 #define __INCLUDE_RTE_PORT_RAS_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port for IPv4 Reassembly
@@ -31,6 +27,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ring_writer_ipv4_ras port parameters */
 struct rte_port_ring_writer_ras_params {
 	/** Underlying single consumer ring that has to be pre-initialized. */
diff --git a/lib/port/rte_port_ring.h b/lib/port/rte_port_ring.h
index 027928c924..2089d0889b 100644
--- a/lib/port/rte_port_ring.h
+++ b/lib/port/rte_port_ring.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_RING_H__
 #define __INCLUDE_RTE_PORT_RING_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Ring
@@ -27,6 +23,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ring_reader port parameters */
 struct rte_port_ring_reader_params {
 	/** Underlying consumer ring that has to be pre-initialized */
diff --git a/lib/port/rte_port_sched.h b/lib/port/rte_port_sched.h
index 251380ef80..1bf08ae6a9 100644
--- a/lib/port/rte_port_sched.h
+++ b/lib/port/rte_port_sched.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_SCHED_H__
 #define __INCLUDE_RTE_PORT_SCHED_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Hierarchical Scheduler
@@ -23,6 +19,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** sched_reader port parameters */
 struct rte_port_sched_reader_params {
 	/** Underlying pre-initialized rte_sched_port */
diff --git a/lib/port/rte_port_source_sink.h b/lib/port/rte_port_source_sink.h
index bcdbaf1e40..3122dd5038 100644
--- a/lib/port/rte_port_source_sink.h
+++ b/lib/port/rte_port_source_sink.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_SOURCE_SINK_H__
 #define __INCLUDE_RTE_PORT_SOURCE_SINK_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Source/Sink
@@ -19,6 +15,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** source port parameters */
 struct rte_port_source_params {
 	/** Pre-initialized buffer pool */
diff --git a/lib/port/rte_port_sym_crypto.h b/lib/port/rte_port_sym_crypto.h
index 6532b4388a..d03cdc1e8b 100644
--- a/lib/port/rte_port_sym_crypto.h
+++ b/lib/port/rte_port_sym_crypto.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_SYM_CRYPTO_H__
 #define __INCLUDE_RTE_PORT_SYM_CRYPTO_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port sym crypto Interface
@@ -23,6 +19,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Function prototype for reader post action. */
 typedef void (*rte_port_sym_crypto_reader_callback_fn)(struct rte_mbuf **pkts,
 		uint16_t n_pkts, void *arg);
diff --git a/lib/port/rte_swx_port.h b/lib/port/rte_swx_port.h
index 1dbd95ae87..b52b125572 100644
--- a/lib/port/rte_swx_port.h
+++ b/lib/port/rte_swx_port.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_H__
 #define __INCLUDE_RTE_SWX_PORT_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Port
@@ -17,6 +13,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Packet. */
 struct rte_swx_pkt {
 	/** Opaque packet handle. */
diff --git a/lib/port/rte_swx_port_ethdev.h b/lib/port/rte_swx_port_ethdev.h
index cbc2d7b213..1828031e67 100644
--- a/lib/port/rte_swx_port_ethdev.h
+++ b/lib/port/rte_swx_port_ethdev.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_ETHDEV_H__
 #define __INCLUDE_RTE_SWX_PORT_ETHDEV_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Ethernet Device Input and Output Ports
@@ -17,6 +13,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Ethernet device input port (reader) creation parameters. */
 struct rte_swx_port_ethdev_reader_params {
 	/** Name of a valid and fully configured Ethernet device. */
diff --git a/lib/port/rte_swx_port_fd.h b/lib/port/rte_swx_port_fd.h
index e61719c8f6..63529cf0ab 100644
--- a/lib/port/rte_swx_port_fd.h
+++ b/lib/port/rte_swx_port_fd.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_FD_H__
 #define __INCLUDE_RTE_SWX_PORT_FD_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX FD Input and Output Ports
@@ -18,6 +14,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** fd_reader port parameters */
 struct rte_swx_port_fd_reader_params {
 	/** File descriptor. Must be valid and opened in non-blocking mode. */
diff --git a/lib/port/rte_swx_port_ring.h b/lib/port/rte_swx_port_ring.h
index efc485fb08..ef241c3fee 100644
--- a/lib/port/rte_swx_port_ring.h
+++ b/lib/port/rte_swx_port_ring.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_RING_H__
 #define __INCLUDE_RTE_SWX_PORT_RING_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Ring Input and Output Ports
@@ -18,6 +14,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Ring input port (reader) creation parameters. */
 struct rte_swx_port_ring_reader_params {
 	/** Name of valid RTE ring. */
diff --git a/lib/port/rte_swx_port_source_sink.h b/lib/port/rte_swx_port_source_sink.h
index 91bcbf74f4..e3ca7cfbb4 100644
--- a/lib/port/rte_swx_port_source_sink.h
+++ b/lib/port/rte_swx_port_source_sink.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_SOURCE_SINK_H__
 #define __INCLUDE_RTE_SWX_PORT_SOURCE_SINK_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Source and Sink Ports
@@ -15,6 +11,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum number of packets to read from the PCAP file. */
 #ifndef RTE_SWX_PORT_SOURCE_PKTS_MAX
 #define RTE_SWX_PORT_SOURCE_PKTS_MAX 1024
diff --git a/lib/rawdev/rte_rawdev.h b/lib/rawdev/rte_rawdev.h
index 640037b524..3fc471526e 100644
--- a/lib/rawdev/rte_rawdev.h
+++ b/lib/rawdev/rte_rawdev.h
@@ -14,13 +14,13 @@
  * no specific type already available in DPDK.
  */
 
+#include <rte_common.h>
+#include <rte_memory.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_memory.h>
-
 /* Rawdevice object - essentially a void to be typecast by implementation */
 typedef void *rte_rawdev_obj_t;
 
diff --git a/lib/rawdev/rte_rawdev_pmd.h b/lib/rawdev/rte_rawdev_pmd.h
index 22b406444d..408ed461a4 100644
--- a/lib/rawdev/rte_rawdev_pmd.h
+++ b/lib/rawdev/rte_rawdev_pmd.h
@@ -13,10 +13,6 @@
  * any application.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <dev_driver.h>
@@ -26,6 +22,10 @@ extern "C" {
 
 #include "rte_rawdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern int librawdev_logtype;
 #define RTE_LOGTYPE_RAWDEV librawdev_logtype
 
diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h
index ed3dd6d3d2..550fadf56a 100644
--- a/lib/rcu/rte_rcu_qsbr.h
+++ b/lib/rcu/rte_rcu_qsbr.h
@@ -21,10 +21,6 @@
  * entered quiescent state.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <inttypes.h>
 #include <stdalign.h>
 #include <stdbool.h>
@@ -36,6 +32,10 @@ extern "C" {
 #include <rte_atomic.h>
 #include <rte_ring.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern int rte_rcu_log_type;
 #define RTE_LOGTYPE_RCU rte_rcu_log_type
 
diff --git a/lib/regexdev/rte_regexdev.h b/lib/regexdev/rte_regexdev.h
index a50b841b1e..b18a1d4251 100644
--- a/lib/regexdev/rte_regexdev.h
+++ b/lib/regexdev/rte_regexdev.h
@@ -194,10 +194,6 @@
  * - rte_regexdev_dequeue_burst()
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include <rte_dev.h>
@@ -1428,6 +1424,10 @@ struct rte_regex_ops {
 
 #include "rte_regexdev_core.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice.
diff --git a/lib/ring/rte_ring.h b/lib/ring/rte_ring.h
index c709f30497..11ca69c73d 100644
--- a/lib/ring/rte_ring.h
+++ b/lib/ring/rte_ring.h
@@ -34,13 +34,13 @@
  * for more information.
  */
 
+#include <rte_ring_core.h>
+#include <rte_ring_elem.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_core.h>
-#include <rte_ring_elem.h>
-
 /**
  * Calculate the memory size needed for a ring
  *
diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h
index 270869d214..222c5aeb3f 100644
--- a/lib/ring/rte_ring_core.h
+++ b/lib/ring/rte_ring_core.h
@@ -19,10 +19,6 @@
  * instead.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdalign.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -38,6 +34,10 @@ extern "C" {
 #include <rte_pause.h>
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_TAILQ_RING_NAME "RTE_RING"
 
 /** enqueue/dequeue behavior types */
diff --git a/lib/ring/rte_ring_elem.h b/lib/ring/rte_ring_elem.h
index 7f7d4951d3..506f686884 100644
--- a/lib/ring/rte_ring_elem.h
+++ b/lib/ring/rte_ring_elem.h
@@ -16,10 +16,6 @@
  * RTE Ring with user defined element size
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_ring_core.h>
 #include <rte_ring_elem_pvt.h>
 
@@ -699,6 +695,10 @@ rte_ring_dequeue_burst_elem(struct rte_ring *r, void *obj_table,
 
 #include <rte_ring.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ring/rte_ring_hts.h b/lib/ring/rte_ring_hts.h
index 9a5938ac58..a41acea740 100644
--- a/lib/ring/rte_ring_hts.h
+++ b/lib/ring/rte_ring_hts.h
@@ -24,12 +24,12 @@
  * To achieve that 64-bit CAS is used by head update routine.
  */
 
+#include <rte_ring_hts_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_hts_elem_pvt.h>
-
 /**
  * Enqueue several objects on the HTS ring (multi-producers safe).
  *
diff --git a/lib/ring/rte_ring_peek.h b/lib/ring/rte_ring_peek.h
index c0621d12e2..2312f52668 100644
--- a/lib/ring/rte_ring_peek.h
+++ b/lib/ring/rte_ring_peek.h
@@ -43,12 +43,12 @@
  * with enqueue(/dequeue) operation till _finish_ completes.
  */
 
+#include <rte_ring_peek_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_peek_elem_pvt.h>
-
 /**
  * Start to enqueue several objects on the ring.
  * Note that no actual objects are put in the queue by this function,
diff --git a/lib/ring/rte_ring_peek_zc.h b/lib/ring/rte_ring_peek_zc.h
index 0b5e34b731..3254fe0481 100644
--- a/lib/ring/rte_ring_peek_zc.h
+++ b/lib/ring/rte_ring_peek_zc.h
@@ -67,12 +67,12 @@
  * with enqueue/dequeue operation till _finish_ completes.
  */
 
+#include <rte_ring_peek_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_peek_elem_pvt.h>
-
 /**
  * Ring zero-copy information structure.
  *
diff --git a/lib/ring/rte_ring_rts.h b/lib/ring/rte_ring_rts.h
index 50fc8f74db..d7a3863c83 100644
--- a/lib/ring/rte_ring_rts.h
+++ b/lib/ring/rte_ring_rts.h
@@ -51,12 +51,12 @@
  * By default HTD_MAX == ring.capacity / 8.
  */
 
+#include <rte_ring_rts_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_rts_elem_pvt.h>
-
 /**
  * Enqueue several objects on the RTS ring (multi-producers safe).
  *
diff --git a/lib/sched/rte_approx.h b/lib/sched/rte_approx.h
index b60086330e..738e33a98b 100644
--- a/lib/sched/rte_approx.h
+++ b/lib/sched/rte_approx.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_APPROX_H__
 #define __INCLUDE_RTE_APPROX_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Rational Approximation
@@ -20,6 +16,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Find best rational approximation
  *
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index 1477a47700..2a385ffdba 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -5,10 +5,6 @@
 #ifndef __RTE_PIE_H_INCLUDED__
 #define __RTE_PIE_H_INCLUDED__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * Proportional Integral controller Enhanced (PIE)
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_debug.h>
 #include <rte_cycles.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
 				     */
diff --git a/lib/sched/rte_red.h b/lib/sched/rte_red.h
index afaa35fcd6..e62abb9295 100644
--- a/lib/sched/rte_red.h
+++ b/lib/sched/rte_red.h
@@ -5,10 +5,6 @@
 #ifndef __RTE_RED_H_INCLUDED__
 #define __RTE_RED_H_INCLUDED__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Random Early Detection (RED)
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_cycles.h>
 #include <rte_branch_prediction.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_RED_SCALING                     10         /**< Fraction size for fixed-point */
 #define RTE_RED_S                           (1 << 22)  /**< Packet size multiplied by number of leaf queues */
 #define RTE_RED_MAX_TH_MAX                  1023       /**< Max threshold limit in fixed point format */
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index b882c4a882..222e6b3583 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_SCHED_H__
 #define __INCLUDE_RTE_SCHED_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Hierarchical Scheduler
@@ -62,6 +58,10 @@ extern "C" {
 #include "rte_red.h"
 #include "rte_pie.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum number of queues per pipe.
  * Note that the multiple queues (power of 2) can only be assigned to
  * lowest priority (best-effort) traffic class. Other higher priority traffic
diff --git a/lib/sched/rte_sched_common.h b/lib/sched/rte_sched_common.h
index 573d164569..a5acb9c08a 100644
--- a/lib/sched/rte_sched_common.h
+++ b/lib/sched/rte_sched_common.h
@@ -5,13 +5,13 @@
 #ifndef __INCLUDE_RTE_SCHED_COMMON_H__
 #define __INCLUDE_RTE_SCHED_COMMON_H__
 
+#include <stdint.h>
+#include <sys/types.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include <sys/types.h>
-
 #if 0
 static inline uint32_t
 rte_min_pos_4_u16(uint16_t *x)
diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
index 1c8474b74f..7a9bafa0fa 100644
--- a/lib/security/rte_security.h
+++ b/lib/security/rte_security.h
@@ -12,10 +12,6 @@
  * RTE Security Common Definitions
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <sys/types.h>
 
 #include <rte_compat.h>
@@ -24,6 +20,10 @@ extern "C" {
 #include <rte_ip.h>
 #include <rte_mbuf_dyn.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** IPSec protocol mode */
 enum rte_security_ipsec_sa_mode {
 	RTE_SECURITY_IPSEC_SA_MODE_TRANSPORT = 1,
diff --git a/lib/security/rte_security_driver.h b/lib/security/rte_security_driver.h
index 9bb5052a4c..2ceb145066 100644
--- a/lib/security/rte_security_driver.h
+++ b/lib/security/rte_security_driver.h
@@ -12,13 +12,13 @@
  * RTE Security Common Definitions
  */
 
+#include <rte_compat.h>
+#include "rte_security.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_compat.h>
-#include "rte_security.h"
-
 /**
  * @internal
  * Security session to be used by library for internal usage
diff --git a/lib/stack/rte_stack.h b/lib/stack/rte_stack.h
index 3325757568..4439adfc42 100644
--- a/lib/stack/rte_stack.h
+++ b/lib/stack/rte_stack.h
@@ -15,10 +15,6 @@
 #ifndef _RTE_STACK_H_
 #define _RTE_STACK_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdalign.h>
 
 #include <rte_debug.h>
@@ -95,6 +91,10 @@ struct __rte_cache_aligned rte_stack {
 #include "rte_stack_std.h"
 #include "rte_stack_lf.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Push several objects on the stack (MT-safe).
  *
diff --git a/lib/table/rte_lru.h b/lib/table/rte_lru.h
index 88229d8632..bc1ad36500 100644
--- a/lib/table/rte_lru.h
+++ b/lib/table/rte_lru.h
@@ -5,15 +5,15 @@
 #ifndef __INCLUDE_RTE_LRU_H__
 #define __INCLUDE_RTE_LRU_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_config.h>
 #ifdef RTE_ARCH_X86_64
 #include "rte_lru_x86.h"
 #elif defined(RTE_ARCH_ARM64)
 #include "rte_lru_arm64.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 #else
 #undef RTE_TABLE_HASH_LRU_STRATEGY
 #define RTE_TABLE_HASH_LRU_STRATEGY                        1
@@ -86,8 +86,4 @@ do {									\
 
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/table/rte_lru_arm64.h b/lib/table/rte_lru_arm64.h
index f19b0bdb4e..f9a4678ee0 100644
--- a/lib/table/rte_lru_arm64.h
+++ b/lib/table/rte_lru_arm64.h
@@ -5,14 +5,14 @@
 #ifndef __RTE_LRU_ARM64_H__
 #define __RTE_LRU_ARM64_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_vect.h>
 #include <rte_bitops.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_TABLE_HASH_LRU_STRATEGY
 #ifdef __ARM_NEON
 #define RTE_TABLE_HASH_LRU_STRATEGY                        3
diff --git a/lib/table/rte_lru_x86.h b/lib/table/rte_lru_x86.h
index ddfb8c1c8c..93f4a136a8 100644
--- a/lib/table/rte_lru_x86.h
+++ b/lib/table/rte_lru_x86.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_LRU_X86_H__
 #define __INCLUDE_RTE_LRU_X86_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_config.h>
@@ -97,8 +93,4 @@ do {									\
 
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/table/rte_swx_hash_func.h b/lib/table/rte_swx_hash_func.h
index 04f3d543e7..9c65cfa913 100644
--- a/lib/table/rte_swx_hash_func.h
+++ b/lib/table/rte_swx_hash_func.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_HASH_FUNC_H__
 #define __INCLUDE_RTE_SWX_HASH_FUNC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Hash Function
@@ -15,6 +11,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Hash function prototype
  *
diff --git a/lib/table/rte_swx_keycmp.h b/lib/table/rte_swx_keycmp.h
index 09fb1be869..b0ed819307 100644
--- a/lib/table/rte_swx_keycmp.h
+++ b/lib/table/rte_swx_keycmp.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_KEYCMP_H__
 #define __INCLUDE_RTE_SWX_KEYCMP_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Key Comparison Functions
@@ -16,6 +12,10 @@ extern "C" {
 #include <stdint.h>
 #include <string.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Key comparison function prototype
  *
diff --git a/lib/table/rte_swx_table.h b/lib/table/rte_swx_table.h
index ac01e19781..3c53459498 100644
--- a/lib/table/rte_swx_table.h
+++ b/lib/table/rte_swx_table.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_H__
 #define __INCLUDE_RTE_SWX_TABLE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Table
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_swx_hash_func.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Match type. */
 enum rte_swx_table_match_type {
 	/** Wildcard Match (WM). */
diff --git a/lib/table/rte_swx_table_em.h b/lib/table/rte_swx_table_em.h
index b7423dd060..592541f01f 100644
--- a/lib/table/rte_swx_table_em.h
+++ b/lib/table/rte_swx_table_em.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_EM_H__
 #define __INCLUDE_RTE_SWX_TABLE_EM_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Exact Match Table
@@ -16,6 +12,10 @@ extern "C" {
 
 #include <rte_swx_table.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Exact match table operations - unoptimized. */
 extern struct rte_swx_table_ops rte_swx_table_exact_match_unoptimized_ops;
 
diff --git a/lib/table/rte_swx_table_learner.h b/lib/table/rte_swx_table_learner.h
index c5ea015b8d..9a18be083d 100644
--- a/lib/table/rte_swx_table_learner.h
+++ b/lib/table/rte_swx_table_learner.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_LEARNER_H__
 #define __INCLUDE_RTE_SWX_TABLE_LEARNER_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Learner Table
@@ -53,6 +49,10 @@ extern "C" {
 
 #include "rte_swx_hash_func.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum number of key timeout values per learner table. */
 #ifndef RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX
 #define RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX 16
diff --git a/lib/table/rte_swx_table_selector.h b/lib/table/rte_swx_table_selector.h
index 05863cc90b..ef29bdb6b0 100644
--- a/lib/table/rte_swx_table_selector.h
+++ b/lib/table/rte_swx_table_selector.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_SELECTOR_H__
 #define __INCLUDE_RTE_SWX_TABLE_SELECTOR_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Selector Table
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_swx_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Selector table creation parameters. */
 struct rte_swx_table_selector_params {
 	/** Group ID offset. */
diff --git a/lib/table/rte_swx_table_wm.h b/lib/table/rte_swx_table_wm.h
index 4fd52c0a17..7eb6f8e2a6 100644
--- a/lib/table/rte_swx_table_wm.h
+++ b/lib/table/rte_swx_table_wm.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_WM_H__
 #define __INCLUDE_RTE_SWX_TABLE_WM_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Wildcard Match Table
@@ -16,6 +12,10 @@ extern "C" {
 
 #include <rte_swx_table.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Wildcard match table operations. */
 extern struct rte_swx_table_ops rte_swx_table_wildcard_match_ops;
 
diff --git a/lib/table/rte_table.h b/lib/table/rte_table.h
index 9a5faf0e32..43a5a1a7b3 100644
--- a/lib/table/rte_table.h
+++ b/lib/table/rte_table.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_H__
 #define __INCLUDE_RTE_TABLE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table
@@ -27,6 +23,10 @@ extern "C" {
 #include <stdint.h>
 #include <rte_port.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_mbuf;
 
 /** Lookup table statistics */
diff --git a/lib/table/rte_table_acl.h b/lib/table/rte_table_acl.h
index 1cb7b9fbbd..61af7b88e4 100644
--- a/lib/table/rte_table_acl.h
+++ b/lib/table/rte_table_acl.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_ACL_H__
 #define __INCLUDE_RTE_TABLE_ACL_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table ACL
@@ -25,6 +21,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ACL table parameters */
 struct rte_table_acl_params {
 	/** Name */
diff --git a/lib/table/rte_table_array.h b/lib/table/rte_table_array.h
index fad83b0588..b2a7b95d68 100644
--- a/lib/table/rte_table_array.h
+++ b/lib/table/rte_table_array.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_ARRAY_H__
 #define __INCLUDE_RTE_TABLE_ARRAY_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Array
@@ -20,6 +16,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Array table parameters */
 struct rte_table_array_params {
 	/** Number of array entries. Has to be a power of two. */
diff --git a/lib/table/rte_table_hash.h b/lib/table/rte_table_hash.h
index 6698621dae..ff8fc9e9ce 100644
--- a/lib/table/rte_table_hash.h
+++ b/lib/table/rte_table_hash.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_HASH_H__
 #define __INCLUDE_RTE_TABLE_HASH_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Hash
@@ -52,6 +48,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Hash function */
 typedef uint64_t (*rte_table_hash_op_hash)(
 	void *key,
diff --git a/lib/table/rte_table_hash_cuckoo.h b/lib/table/rte_table_hash_cuckoo.h
index 3a55d28e9b..55aa12216a 100644
--- a/lib/table/rte_table_hash_cuckoo.h
+++ b/lib/table/rte_table_hash_cuckoo.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_HASH_CUCKOO_H__
 #define __INCLUDE_RTE_TABLE_HASH_CUCKOO_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Hash Cuckoo
@@ -20,6 +16,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Hash table parameters */
 struct rte_table_hash_cuckoo_params {
 	/** Name */
diff --git a/lib/table/rte_table_hash_func.h b/lib/table/rte_table_hash_func.h
index aa779c2182..cba7ec4c20 100644
--- a/lib/table/rte_table_hash_func.h
+++ b/lib/table/rte_table_hash_func.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_HASH_FUNC_H__
 #define __INCLUDE_RTE_TABLE_HASH_FUNC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -18,6 +14,10 @@ extern "C" {
 
 #include <x86intrin.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline uint64_t
 rte_crc32_u64(uint64_t crc, uint64_t v)
 {
@@ -28,6 +28,10 @@ rte_crc32_u64(uint64_t crc, uint64_t v)
 #include "rte_table_hash_func_arm64.h"
 #else
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline uint64_t
 rte_crc32_u64(uint64_t crc, uint64_t v)
 {
diff --git a/lib/table/rte_table_lpm.h b/lib/table/rte_table_lpm.h
index dde32deed9..59b9bdee89 100644
--- a/lib/table/rte_table_lpm.h
+++ b/lib/table/rte_table_lpm.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_LPM_H__
 #define __INCLUDE_RTE_TABLE_LPM_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table LPM for IPv4
@@ -45,6 +41,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** LPM table parameters */
 struct rte_table_lpm_params {
 	/** Table name */
diff --git a/lib/table/rte_table_lpm_ipv6.h b/lib/table/rte_table_lpm_ipv6.h
index 96ddbd32c2..166a5ba9ee 100644
--- a/lib/table/rte_table_lpm_ipv6.h
+++ b/lib/table/rte_table_lpm_ipv6.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_LPM_IPV6_H__
 #define __INCLUDE_RTE_TABLE_LPM_IPV6_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table LPM for IPv6
@@ -45,6 +41,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_LPM_IPV6_ADDR_SIZE 16
 
 /** LPM table parameters */
diff --git a/lib/table/rte_table_stub.h b/lib/table/rte_table_stub.h
index 846526ea99..f7e589df16 100644
--- a/lib/table/rte_table_stub.h
+++ b/lib/table/rte_table_stub.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_STUB_H__
 #define __INCLUDE_RTE_TABLE_STUB_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Stub
@@ -18,6 +14,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Stub table parameters: NONE */
 
 /** Stub table operations */
diff --git a/lib/telemetry/rte_telemetry.h b/lib/telemetry/rte_telemetry.h
index cab9daa6fe..463819e2bf 100644
--- a/lib/telemetry/rte_telemetry.h
+++ b/lib/telemetry/rte_telemetry.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_TELEMETRY_H_
 #define _RTE_TELEMETRY_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_compat.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum length for string used in object. */
 #define RTE_TEL_MAX_STRING_LEN 128
 /** Maximum length of string. */
diff --git a/lib/vhost/rte_vdpa.h b/lib/vhost/rte_vdpa.h
index 6ac85d1bbf..18e273c20f 100644
--- a/lib/vhost/rte_vdpa.h
+++ b/lib/vhost/rte_vdpa.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_VDPA_H_
 #define _RTE_VDPA_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -17,6 +13,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum name length for statistics counters */
 #define RTE_VDPA_STATS_NAME_SIZE 64
 
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index b0434c4b8d..c7a5f56df8 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -18,10 +18,6 @@
 #include <rte_memory.h>
 #include <rte_mempool.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifndef __cplusplus
 /* These are not C++-aware. */
 #include <linux/vhost.h>
@@ -29,6 +25,10 @@ extern "C" {
 #include <linux/virtio_net.h>
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_VHOST_USER_CLIENT		(1ULL << 0)
 #define RTE_VHOST_USER_NO_RECONNECT	(1ULL << 1)
 #define RTE_VHOST_USER_RESERVED_1	(1ULL << 2)
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 8f190dd44b..60995e4e62 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -5,15 +5,15 @@
 #ifndef _RTE_VHOST_ASYNC_H_
 #define _RTE_VHOST_ASYNC_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
 #include <rte_mbuf.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Register an async channel for a vhost queue
  *
diff --git a/lib/vhost/rte_vhost_crypto.h b/lib/vhost/rte_vhost_crypto.h
index f962a53818..af61f0907e 100644
--- a/lib/vhost/rte_vhost_crypto.h
+++ b/lib/vhost/rte_vhost_crypto.h
@@ -5,12 +5,12 @@
 #ifndef _VHOST_CRYPTO_H_
 #define _VHOST_CRYPTO_H_
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /* pre-declare structs to avoid including full headers */
 struct rte_mempool;
 struct rte_crypto_op;
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 8db4ab9f4d..42392a0d14 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -5,10 +5,6 @@
 #ifndef _VDPA_DRIVER_H_
 #define _VDPA_DRIVER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 
 #include <rte_compat.h>
@@ -16,6 +12,10 @@ extern "C" {
 #include "rte_vhost.h"
 #include "rte_vdpa.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_VHOST_QUEUE_ALL UINT16_MAX
 
 /**
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v7 2/6] eal: extend bit manipulation functionality
  2024-09-17  9:36       ` [PATCH v7 0/6] Improve EAL bit operations API Mattias Rönnblom
  2024-09-17  9:36         ` [PATCH v7 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
@ 2024-09-17  9:36         ` Mattias Rönnblom
  2024-09-17  9:36         ` [PATCH v7 3/6] eal: add unit tests for bit operations Mattias Rönnblom
                           ` (3 subsequent siblings)
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17  9:36 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Add functionality to test and modify the value of individual bits in
32-bit or 64-bit words.

These functions have no implications on memory ordering, atomicity and
does not use volatile and thus does not prevent any compiler
optimizations.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

PATCH v3:
 * Remove unnecessary <rte_compat.h> include.
 * Remove redundant 'fun' parameter from the __RTE_GEN_BIT_*() macros
   (Jack Bond-Preston).
 * Introduce __RTE_BIT_BIT_OPS() macro, consistent with how things
   are done when generating the atomic bit operations.
 * Refer to volatile bit op functions as variants instead of families
   (macro parameter naming).

RFC v6:
 * Have rte_bit_test() accept const-marked bitsets.

RFC v4:
 * Add rte_bit_flip() which, believe it or not, flips the value of a bit.
 * Mark macro-generated private functions as experimental.
 * Use macros to generate *assign*() functions.

RFC v3:
 * Work around lack of C++ support for _Generic (Tyler Retzlaff).
 * Fix ','-related checkpatch warnings.
---
 lib/eal/include/rte_bitops.h | 260 ++++++++++++++++++++++++++++++++++-
 1 file changed, 258 insertions(+), 2 deletions(-)

diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 449565eeae..6915b945ba 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -2,6 +2,7 @@
  * Copyright(c) 2020 Arm Limited
  * Copyright(c) 2010-2019 Intel Corporation
  * Copyright(c) 2023 Microsoft Corporation
+ * Copyright(c) 2024 Ericsson AB
  */
 
 #ifndef _RTE_BITOPS_H_
@@ -11,12 +12,14 @@
  * @file
  * Bit Operations
  *
- * This file defines a family of APIs for bit operations
- * without enforcing memory ordering.
+ * This file provides functionality for low-level, single-word
+ * arithmetic and bit-level operations, such as counting or
+ * setting individual bits.
  */
 
 #include <stdint.h>
 
+#include <rte_compat.h>
 #include <rte_debug.h>
 
 #ifdef __cplusplus
@@ -105,6 +108,197 @@ extern "C" {
 #define RTE_FIELD_GET64(mask, reg) \
 		((typeof(mask))(((reg) & (mask)) >> rte_ctz64(mask)))
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test bit in word.
+ *
+ * Generic selection macro to test the value of a bit in a 32-bit or
+ * 64-bit word. The type of operation depends on the type of the @c
+ * addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_test(addr, nr)					\
+	_Generic((addr),					\
+		uint32_t *: __rte_bit_test32,			\
+		const uint32_t *: __rte_bit_test32,		\
+		uint64_t *: __rte_bit_test64,			\
+		const uint64_t *: __rte_bit_test64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set bit in word.
+ *
+ * Generic selection macro to set a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr
+ * parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_set(addr, nr)				\
+	_Generic((addr),				\
+		 uint32_t *: __rte_bit_set32,		\
+		 uint64_t *: __rte_bit_set64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Clear bit in word.
+ *
+ * Generic selection macro to clear a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr
+ * parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_clear(addr, nr)					\
+	_Generic((addr),					\
+		 uint32_t *: __rte_bit_clear32,			\
+		 uint64_t *: __rte_bit_clear64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Assign a value to a bit in word.
+ *
+ * Generic selection macro to assign a value to a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param value
+ *   The new value of the bit - true for '1', or false for '0'.
+ */
+#define rte_bit_assign(addr, nr, value)					\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_assign32,			\
+		 uint64_t *: __rte_bit_assign64)(addr, nr, value)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Flip a bit in word.
+ *
+ * Generic selection macro to change the value of a bit to '0' if '1'
+ * or '1' if '0' in a 32-bit or 64-bit word. The type of operation
+ * depends on the type of the @c addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_flip(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_flip32,				\
+		 uint64_t *: __rte_bit_flip64)(addr, nr)
+
+#define __RTE_GEN_BIT_TEST(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_ ## variant ## test ## size(const qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		return *addr & mask;					\
+	}
+
+#define __RTE_GEN_BIT_SET(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## set ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		*addr |= mask;						\
+	}								\
+
+#define __RTE_GEN_BIT_CLEAR(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## clear ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		uint ## size ## _t mask = ~((uint ## size ## _t)1 << nr); \
+		(*addr) &= mask;					\
+	}								\
+
+#define __RTE_GEN_BIT_ASSIGN(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## assign ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr, bool value) \
+	{								\
+		if (value)						\
+			__rte_bit_ ## variant ## set ## size(addr, nr);	\
+		else							\
+			__rte_bit_ ## variant ## clear ## size(addr, nr); \
+	}
+
+#define __RTE_GEN_BIT_FLIP(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## flip ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		bool value;						\
+									\
+		value = __rte_bit_ ## variant ## test ## size(addr, nr); \
+		__rte_bit_ ## variant ## assign ## size(addr, nr, !value); \
+	}
+
+#define __RTE_GEN_BIT_OPS(v, qualifier, size)	\
+	__RTE_GEN_BIT_TEST(v, qualifier, size)	\
+	__RTE_GEN_BIT_SET(v, qualifier, size)	\
+	__RTE_GEN_BIT_CLEAR(v, qualifier, size)	\
+	__RTE_GEN_BIT_ASSIGN(v, qualifier, size)	\
+	__RTE_GEN_BIT_FLIP(v, qualifier, size)
+
+#define __RTE_GEN_BIT_OPS_SIZE(size) \
+	__RTE_GEN_BIT_OPS(,, size)
+
+__RTE_GEN_BIT_OPS_SIZE(32)
+__RTE_GEN_BIT_OPS_SIZE(64)
+
 /*------------------------ 32-bit relaxed operations ------------------------*/
 
 /**
@@ -787,6 +981,68 @@ rte_log2_u64(uint64_t v)
 
 #ifdef __cplusplus
 }
+
+/*
+ * Since C++ doesn't support generic selection (i.e., _Generic),
+ * function overloading is used instead. Such functions must be
+ * defined outside 'extern "C"' to be accepted by the compiler.
+ */
+
+#undef rte_bit_test
+#undef rte_bit_set
+#undef rte_bit_clear
+#undef rte_bit_assign
+#undef rte_bit_flip
+
+#define __RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, size, arg1_type, arg1_name) \
+	static inline void						\
+	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
+			arg1_type arg1_name)				\
+	{								\
+		__rte_bit_ ## fun ## size(addr, arg1_name);		\
+	}
+
+#define __RTE_BIT_OVERLOAD_2(fun, qualifier, arg1_type, arg1_name)	\
+	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 32, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 64, arg1_type, arg1_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, size, ret_type, arg1_type, \
+				 arg1_name)				\
+	static inline ret_type						\
+	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
+			arg1_type arg1_name)				\
+	{								\
+		return __rte_bit_ ## fun ## size(addr, arg1_name);	\
+	}
+
+#define __RTE_BIT_OVERLOAD_2R(fun, qualifier, ret_type, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 32, ret_type, arg1_type, \
+				 arg1_name)				\
+	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 64, ret_type, arg1_type, \
+				 arg1_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, size, arg1_type, arg1_name, \
+				arg2_type, arg2_name)			\
+	static inline void						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name)				\
+	{								\
+		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name);	\
+	}
+
+#define __RTE_BIT_OVERLOAD_3(fun, qualifier, arg1_type, arg1_name, arg2_type, \
+			     arg2_name)					\
+	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 32, arg1_type, arg1_name, \
+				arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 64, arg1_type, arg1_name, \
+				arg2_type, arg2_name)
+
+__RTE_BIT_OVERLOAD_2R(test, const, bool, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(set,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(clear,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_3(assign,, unsigned int, nr, bool, value)
+__RTE_BIT_OVERLOAD_2(flip,, unsigned int, nr)
+
 #endif
 
 #endif /* _RTE_BITOPS_H_ */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v7 3/6] eal: add unit tests for bit operations
  2024-09-17  9:36       ` [PATCH v7 0/6] Improve EAL bit operations API Mattias Rönnblom
  2024-09-17  9:36         ` [PATCH v7 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
  2024-09-17  9:36         ` [PATCH v7 2/6] eal: extend bit manipulation functionality Mattias Rönnblom
@ 2024-09-17  9:36         ` Mattias Rönnblom
  2024-09-17  9:36         ` [PATCH v7 4/6] eal: add atomic " Mattias Rönnblom
                           ` (2 subsequent siblings)
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17  9:36 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Extend bitops tests to cover the
rte_bit_[test|set|clear|assign|flip]()
functions.

The tests are converted to use the test suite runner framework.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

RFC v6:
 * Test rte_bit_*test() usage through const pointers.

RFC v4:
 * Remove redundant line continuations.
---
 app/test/test_bitops.c | 85 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 70 insertions(+), 15 deletions(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index 0d4ccfb468..322f58c066 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -1,13 +1,68 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2019 Arm Limited
+ * Copyright(c) 2024 Ericsson AB
  */
 
+#include <stdbool.h>
+
 #include <rte_launch.h>
 #include <rte_bitops.h>
+#include <rte_random.h>
 #include "test.h"
 
-uint32_t val32;
-uint64_t val64;
+#define GEN_TEST_BIT_ACCESS(test_name, set_fun, clear_fun, assign_fun,	\
+			    flip_fun, test_fun, size)			\
+	static int							\
+	test_name(void)							\
+	{								\
+		uint ## size ## _t reference = (uint ## size ## _t)rte_rand(); \
+		unsigned int bit_nr;					\
+		uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
+									\
+		for (bit_nr = 0; bit_nr < size; bit_nr++) {		\
+			bool reference_bit = (reference >> bit_nr) & 1;	\
+			bool assign = rte_rand() & 1;			\
+			if (assign)					\
+				assign_fun(&word, bit_nr, reference_bit); \
+			else {						\
+				if (reference_bit)			\
+					set_fun(&word, bit_nr);		\
+				else					\
+					clear_fun(&word, bit_nr);	\
+									\
+			}						\
+			TEST_ASSERT(test_fun(&word, bit_nr) == reference_bit, \
+				    "Bit %d had unexpected value", bit_nr); \
+			flip_fun(&word, bit_nr);			\
+			TEST_ASSERT(test_fun(&word, bit_nr) != reference_bit, \
+				    "Bit %d had unflipped value", bit_nr); \
+			flip_fun(&word, bit_nr);			\
+									\
+			const uint ## size ## _t *const_ptr = &word;	\
+			TEST_ASSERT(test_fun(const_ptr, bit_nr) ==	\
+				    reference_bit,			\
+				    "Bit %d had unexpected value", bit_nr); \
+		}							\
+									\
+		for (bit_nr = 0; bit_nr < size; bit_nr++) {		\
+			bool reference_bit = (reference >> bit_nr) & 1;	\
+			TEST_ASSERT(test_fun(&word, bit_nr) == reference_bit, \
+				    "Bit %d had unexpected value", bit_nr); \
+		}							\
+									\
+		TEST_ASSERT(reference == word, "Word had unexpected value"); \
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32)
+
+GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
+
+static uint32_t val32;
+static uint64_t val64;
 
 #define MAX_BITS_32 32
 #define MAX_BITS_64 64
@@ -117,22 +172,22 @@ test_bit_relaxed_test_set_clear(void)
 	return TEST_SUCCESS;
 }
 
+static struct unit_test_suite test_suite = {
+	.suite_name = "Bitops test suite",
+	.unit_test_cases = {
+		TEST_CASE(test_bit_access32),
+		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_relaxed_set),
+		TEST_CASE(test_bit_relaxed_clear),
+		TEST_CASE(test_bit_relaxed_test_set_clear),
+		TEST_CASES_END()
+	}
+};
+
 static int
 test_bitops(void)
 {
-	val32 = 0;
-	val64 = 0;
-
-	if (test_bit_relaxed_set() < 0)
-		return TEST_FAILED;
-
-	if (test_bit_relaxed_clear() < 0)
-		return TEST_FAILED;
-
-	if (test_bit_relaxed_test_set_clear() < 0)
-		return TEST_FAILED;
-
-	return TEST_SUCCESS;
+	return unit_test_suite_runner(&test_suite);
 }
 
 REGISTER_FAST_TEST(bitops_autotest, true, true, test_bitops);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v7 4/6] eal: add atomic bit operations
  2024-09-17  9:36       ` [PATCH v7 0/6] Improve EAL bit operations API Mattias Rönnblom
                           ` (2 preceding siblings ...)
  2024-09-17  9:36         ` [PATCH v7 3/6] eal: add unit tests for bit operations Mattias Rönnblom
@ 2024-09-17  9:36         ` Mattias Rönnblom
  2024-09-17  9:36         ` [PATCH v7 5/6] eal: add unit tests for atomic bit access functions Mattias Rönnblom
  2024-09-17  9:36         ` [PATCH v7 6/6] eal: extend bitops to handle volatile pointers Mattias Rönnblom
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17  9:36 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Add atomic bit test/set/clear/assign/flip and
test-and-set/clear/assign/flip functions.

All atomic bit functions allow (and indeed, require) the caller to
specify a memory order.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

PATCH v3:
 * Introduce __RTE_GEN_BIT_ATOMIC_*() 'qualifier' argument already in
   this patch (Jack Bond-Preston).
 * Refer to volatile bit op functions as variants instead of families
   (macro parameter naming).
 * Update release notes.

PATCH:
 * Add missing macro #undef for C++ version of atomic bit flip.

RFC v7:
 * Replace compare-exchange-based rte_bitset_atomic_test_and_*() and
   flip() with implementations that use the previous value as returned
   by the atomic fetch function.
 * Reword documentation to match the non-atomic macro variants.
 * Remove pointer to <rte_stdatomic.h> for memory model documentation,
   since there is no documentation for that API.

RFC v6:
 * Have rte_bit_atomic_test() accept const-marked bitsets.

RFC v4:
 * Add atomic bit flip.
 * Mark macro-generated private functions experimental.

RFC v3:
 * Work around lack of C++ support for _Generic (Tyler Retzlaff).

RFC v2:
 o Add rte_bit_atomic_test_and_assign() (for consistency).
 o Fix bugs in rte_bit_atomic_test_and_[set|clear]().
 o Use <rte_stdatomics.h> to support MSVC.
---
 doc/guides/rel_notes/release_24_11.rst |  17 +
 lib/eal/include/rte_bitops.h           | 415 +++++++++++++++++++++++++
 2 files changed, 432 insertions(+)

diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index 0ff70d9057..3111b1e4c0 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -56,6 +56,23 @@ New Features
      =======================================================
 
 
+* **Extended bit operations API.**
+
+  The support for bit-level operations on single 32- and 64-bit words
+  in <rte_bitops.h> has been extended with two families of
+  semantically well-defined functions.
+
+  rte_bit_[test|set|clear|assign|flip]() functions provide excellent
+  performance (by avoiding restricting the compiler and CPU), but give
+  no guarantees in regards to memory ordering or atomicity.
+
+  rte_bit_atomic_*() provides atomic bit-level operations, including
+  the possibility to specifying memory ordering constraints.
+
+  The new public API elements are polymorphic, using the _Generic-
+  based macros (for C) and function overloading (in C++ translation
+  units).
+
 Removed Items
 -------------
 
diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 6915b945ba..3ad6795fd1 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -21,6 +21,7 @@
 
 #include <rte_compat.h>
 #include <rte_debug.h>
+#include <rte_stdatomic.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -226,6 +227,204 @@ extern "C" {
 		 uint32_t *: __rte_bit_flip32,				\
 		 uint64_t *: __rte_bit_flip64)(addr, nr)
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test if a particular bit in a word is set with a particular memory
+ * order.
+ *
+ * Test a bit with the resulting memory load ordered as per the
+ * specified memory order.
+ *
+ * @param addr
+ *   A pointer to the word to query.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit is set, and false otherwise.
+ */
+#define rte_bit_atomic_test(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test32,			\
+		 const uint32_t *: __rte_bit_atomic_test32,		\
+		 uint64_t *: __rte_bit_atomic_test64,			\
+		 const uint64_t *: __rte_bit_atomic_test64)(addr, nr,	\
+							    memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically set bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in
+ * the word pointed to by @c addr to '1', with the memory ordering as
+ * specified by @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_set(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_set32,			\
+		 uint64_t *: __rte_bit_atomic_set64)(addr, nr, memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically clear bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in
+ * the word pointed to by @c addr to '0', with the memory ordering as
+ * specified by @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_clear(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_clear32,			\
+		 uint64_t *: __rte_bit_atomic_clear64)(addr, nr, memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically assign a value to bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in the
+ * word pointed to by @c addr to the value indicated by @c value, with
+ * the memory ordering as specified with @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param value
+ *   The new value of the bit - true for '1', or false for '0'.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_assign(addr, nr, value, memory_order)		\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_assign32,			\
+		 uint64_t *: __rte_bit_atomic_assign64)(addr, nr, value, \
+							memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically flip bit in word.
+ *
+ * Generic selection macro to atomically negate the value of the bit
+ * specified by @c nr in the word pointed to by @c addr to the value
+ * indicated by @c value, with the memory ordering as specified with
+ * @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_flip(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_flip32,			\
+		 uint64_t *: __rte_bit_atomic_flip64)(addr, nr, memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test and set a bit in word.
+ *
+ * Generic selection macro to atomically test and set bit specified by
+ * @c nr in the word pointed to by @c addr to '1', with the memory
+ * ordering as specified with @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit was set, and false otherwise.
+ */
+#define rte_bit_atomic_test_and_set(addr, nr, memory_order)		\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test_and_set32,		\
+		 uint64_t *: __rte_bit_atomic_test_and_set64)(addr, nr,	\
+							      memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test and clear a bit in word.
+ *
+ * Generic selection macro to atomically test and clear bit specified
+ * by @c nr in the word pointed to by @c addr to '0', with the memory
+ * ordering as specified with @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit was set, and false otherwise.
+ */
+#define rte_bit_atomic_test_and_clear(addr, nr, memory_order)		\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test_and_clear32,		\
+		 uint64_t *: __rte_bit_atomic_test_and_clear64)(addr, nr, \
+								memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test and assign a bit in word.
+ *
+ * Generic selection macro to atomically test and assign bit specified
+ * by @c nr in the word pointed to by @c addr the value specified by
+ * @c value, with the memory ordering as specified with @c
+ * memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param value
+ *   The new value of the bit - true for '1', or false for '0'.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit was set, and false otherwise.
+ */
+#define rte_bit_atomic_test_and_assign(addr, nr, value, memory_order)	\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test_and_assign32,	\
+		 uint64_t *: __rte_bit_atomic_test_and_assign64)(addr, nr, \
+								 value, \
+								 memory_order)
+
 #define __RTE_GEN_BIT_TEST(variant, qualifier, size)			\
 	__rte_experimental						\
 	static inline bool						\
@@ -299,6 +498,146 @@ extern "C" {
 __RTE_GEN_BIT_OPS_SIZE(32)
 __RTE_GEN_BIT_OPS_SIZE(64)
 
+#define __RTE_GEN_BIT_ATOMIC_TEST(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test ## size(const qualifier uint ## size ## _t *addr, \
+						     unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		const qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr = \
+			(const qualifier RTE_ATOMIC(uint ## size ## _t) *)addr;	\
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		return rte_atomic_load_explicit(a_addr, memory_order) & mask; \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_SET(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_ ## variant ## set ## size(qualifier uint ## size ## _t *addr, \
+					      unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		rte_atomic_fetch_or_explicit(a_addr, mask, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_CLEAR(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_ ## variant ## clear ## size(qualifier uint ## size ## _t *addr,	\
+						unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		rte_atomic_fetch_and_explicit(a_addr, ~mask, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_FLIP(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_ ## variant ## flip ## size(qualifier uint ## size ## _t *addr, \
+					       unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		rte_atomic_fetch_xor_explicit(a_addr, mask, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_ASSIGN(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_## variant ## assign ## size(qualifier uint ## size ## _t *addr, \
+						unsigned int nr, bool value, \
+						int memory_order)	\
+	{								\
+		if (value)						\
+			__rte_bit_atomic_ ## variant ## set ## size(addr, nr, memory_order); \
+		else							\
+			__rte_bit_atomic_ ## variant ## clear ## size(addr, nr, \
+								     memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_TEST_AND_SET(variant, qualifier, size)	\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test_and_set ## size(qualifier uint ## size ## _t *addr, \
+						       unsigned int nr,	\
+						       int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		uint ## size ## _t prev;				\
+									\
+		prev = rte_atomic_fetch_or_explicit(a_addr, mask,	\
+						    memory_order);	\
+									\
+		return prev & mask;					\
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_TEST_AND_CLEAR(variant, qualifier, size)	\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test_and_clear ## size(qualifier uint ## size ## _t *addr, \
+							 unsigned int nr, \
+							 int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		uint ## size ## _t prev;				\
+									\
+	        prev = rte_atomic_fetch_and_explicit(a_addr, ~mask,	\
+						     memory_order);	\
+									\
+		return prev & mask;					\
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_TEST_AND_ASSIGN(variant, qualifier, size)	\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test_and_assign ## size(qualifier uint ## size ## _t *addr, \
+							  unsigned int nr, \
+							  bool value,	\
+							  int memory_order) \
+	{								\
+		if (value)						\
+			return __rte_bit_atomic_ ## variant ## test_and_set ## size(addr, nr, memory_order); \
+		else							\
+			return __rte_bit_atomic_ ## variant ## test_and_clear ## size(addr, nr, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_OPS(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_TEST(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_SET(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_CLEAR(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_ASSIGN(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_TEST_AND_SET(variant, qualifier, size) \
+	__RTE_GEN_BIT_ATOMIC_TEST_AND_CLEAR(variant, qualifier, size) \
+	__RTE_GEN_BIT_ATOMIC_TEST_AND_ASSIGN(variant, qualifier, size) \
+	__RTE_GEN_BIT_ATOMIC_FLIP(variant, qualifier, size)
+
+#define __RTE_GEN_BIT_ATOMIC_OPS_SIZE(size) \
+	__RTE_GEN_BIT_ATOMIC_OPS(,, size)
+
+__RTE_GEN_BIT_ATOMIC_OPS_SIZE(32)
+__RTE_GEN_BIT_ATOMIC_OPS_SIZE(64)
+
 /*------------------------ 32-bit relaxed operations ------------------------*/
 
 /**
@@ -994,6 +1333,15 @@ rte_log2_u64(uint64_t v)
 #undef rte_bit_assign
 #undef rte_bit_flip
 
+#undef rte_bit_atomic_test
+#undef rte_bit_atomic_set
+#undef rte_bit_atomic_clear
+#undef rte_bit_atomic_assign
+#undef rte_bit_atomic_flip
+#undef rte_bit_atomic_test_and_set
+#undef rte_bit_atomic_test_and_clear
+#undef rte_bit_atomic_test_and_assign
+
 #define __RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, size, arg1_type, arg1_name) \
 	static inline void						\
 	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
@@ -1037,12 +1385,79 @@ rte_log2_u64(uint64_t v)
 	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 64, arg1_type, arg1_name, \
 				arg2_type, arg2_name)
 
+#define __RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, size, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name)	\
+	static inline ret_type						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name)				\
+	{								\
+		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name); \
+	}
+
+#define __RTE_BIT_OVERLOAD_3R(fun, qualifier, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 32, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name)	\
+	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 64, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, size, arg1_type, arg1_name, \
+				arg2_type, arg2_name, arg3_type, arg3_name) \
+	static inline void						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name, arg3_type arg3_name)	\
+	{								\
+		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name,	\
+					  arg3_name);		      \
+	}
+
+#define __RTE_BIT_OVERLOAD_4(fun, qualifier, arg1_type, arg1_name, arg2_type, \
+			     arg2_name, arg3_type, arg3_name)		\
+	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 32, arg1_type, arg1_name, \
+				arg2_type, arg2_name, arg3_type, arg3_name) \
+	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 64, arg1_type, arg1_name, \
+				arg2_type, arg2_name, arg3_type, arg3_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, size, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name, arg3_type, \
+				 arg3_name)				\
+	static inline ret_type						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name, arg3_type arg3_name)	\
+	{								\
+		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name, \
+						 arg3_name);		\
+	}
+
+#define __RTE_BIT_OVERLOAD_4R(fun, qualifier, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name, arg3_type, arg3_name) \
+	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 32, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name, arg3_type, \
+				 arg3_name)				\
+	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 64, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name, arg3_type, \
+				 arg3_name)
+
 __RTE_BIT_OVERLOAD_2R(test, const, bool, unsigned int, nr)
 __RTE_BIT_OVERLOAD_2(set,, unsigned int, nr)
 __RTE_BIT_OVERLOAD_2(clear,, unsigned int, nr)
 __RTE_BIT_OVERLOAD_3(assign,, unsigned int, nr, bool, value)
 __RTE_BIT_OVERLOAD_2(flip,, unsigned int, nr)
 
+__RTE_BIT_OVERLOAD_3R(atomic_test, const, bool, unsigned int, nr,
+		      int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_set,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_clear,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_4(atomic_assign,, unsigned int, nr, bool, value,
+		     int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_flip,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3R(atomic_test_and_set,, bool, unsigned int, nr,
+		      int, memory_order)
+__RTE_BIT_OVERLOAD_3R(atomic_test_and_clear,, bool, unsigned int, nr,
+		      int, memory_order)
+__RTE_BIT_OVERLOAD_4R(atomic_test_and_assign,, bool, unsigned int, nr,
+		      bool, value, int, memory_order)
+
 #endif
 
 #endif /* _RTE_BITOPS_H_ */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v7 5/6] eal: add unit tests for atomic bit access functions
  2024-09-17  9:36       ` [PATCH v7 0/6] Improve EAL bit operations API Mattias Rönnblom
                           ` (3 preceding siblings ...)
  2024-09-17  9:36         ` [PATCH v7 4/6] eal: add atomic " Mattias Rönnblom
@ 2024-09-17  9:36         ` Mattias Rönnblom
  2024-09-17  9:36         ` [PATCH v7 6/6] eal: extend bitops to handle volatile pointers Mattias Rönnblom
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17  9:36 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Extend bitops tests to cover the rte_bit_atomic_*() family of
functions.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

RFC v4:
 * Add atomicity test for atomic bit flip.

RFC v3:
 * Rename variable 'main' to make ICC happy.
---
 app/test/test_bitops.c | 313 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 312 insertions(+), 1 deletion(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index 322f58c066..b80216a0a1 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -3,10 +3,13 @@
  * Copyright(c) 2024 Ericsson AB
  */
 
+#include <inttypes.h>
 #include <stdbool.h>
 
-#include <rte_launch.h>
 #include <rte_bitops.h>
+#include <rte_cycles.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
 #include <rte_random.h>
 #include "test.h"
 
@@ -61,6 +64,304 @@ GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
 GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
 		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
 
+#define bit_atomic_set(addr, nr)				\
+	rte_bit_atomic_set(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_clear(addr, nr)					\
+	rte_bit_atomic_clear(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_assign(addr, nr, value)				\
+	rte_bit_atomic_assign(addr, nr, value, rte_memory_order_relaxed)
+
+#define bit_atomic_flip(addr, nr)					\
+    rte_bit_atomic_flip(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_test(addr, nr)				\
+	rte_bit_atomic_test(addr, nr, rte_memory_order_relaxed)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_access32, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 32)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_access64, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 64)
+
+#define PARALLEL_TEST_RUNTIME 0.25
+
+#define GEN_TEST_BIT_PARALLEL_ASSIGN(size)				\
+									\
+	struct parallel_access_lcore ## size				\
+	{								\
+		unsigned int bit;					\
+		uint ## size ##_t *word;				\
+		bool failed;						\
+	};								\
+									\
+	static int							\
+	run_parallel_assign ## size(void *arg)				\
+	{								\
+		struct parallel_access_lcore ## size *lcore = arg;	\
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		bool value = false;					\
+									\
+		do {							\
+			bool new_value = rte_rand() & 1;		\
+			bool use_test_and_modify = rte_rand() & 1;	\
+			bool use_assign = rte_rand() & 1;		\
+									\
+			if (rte_bit_atomic_test(lcore->word, lcore->bit, \
+						rte_memory_order_relaxed) != value) { \
+				lcore->failed = true;			\
+				break;					\
+			}						\
+									\
+			if (use_test_and_modify) {			\
+				bool old_value;				\
+				if (use_assign) 			\
+					old_value = rte_bit_atomic_test_and_assign( \
+						lcore->word, lcore->bit, new_value, \
+						rte_memory_order_relaxed); \
+				else {					\
+					old_value = new_value ?		\
+						rte_bit_atomic_test_and_set( \
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed) : \
+						rte_bit_atomic_test_and_clear( \
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+				}					\
+				if (old_value != value) {		\
+					lcore->failed = true;		\
+					break;				\
+				}					\
+			} else {					\
+				if (use_assign)				\
+					rte_bit_atomic_assign(lcore->word, lcore->bit, \
+							      new_value, \
+							      rte_memory_order_relaxed); \
+				else {					\
+					if (new_value)			\
+						rte_bit_atomic_set(	\
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+					else				\
+						rte_bit_atomic_clear(	\
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+				}					\
+			}						\
+									\
+			value = new_value;				\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_assign ## size(void)			\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		struct parallel_access_lcore ## size lmain = {		\
+			.word = &word					\
+		};							\
+		struct parallel_access_lcore ## size lworker = {	\
+			.word = &word					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		lmain.bit = rte_rand_max(size);				\
+		do {							\
+			lworker.bit = rte_rand_max(size);		\
+		} while (lworker.bit == lmain.bit);			\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_assign ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_assign ## size(&lmain);			\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		TEST_ASSERT(!lmain.failed, "Main lcore atomic access failed"); \
+		TEST_ASSERT(!lworker.failed, "Worker lcore atomic access " \
+			    "failed");					\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_ASSIGN(32)
+GEN_TEST_BIT_PARALLEL_ASSIGN(64)
+
+#define GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(size)			\
+									\
+	struct parallel_test_and_set_lcore ## size			\
+	{								\
+		uint ## size ##_t *word;				\
+		unsigned int bit;					\
+		uint64_t flips;						\
+	};								\
+									\
+	static int							\
+	run_parallel_test_and_modify ## size(void *arg)		\
+	{								\
+		struct parallel_test_and_set_lcore ## size *lcore = arg; \
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		do {							\
+			bool old_value;					\
+			bool new_value = rte_rand() & 1;		\
+			bool use_assign = rte_rand() & 1;		\
+									\
+			if (use_assign)					\
+				old_value = rte_bit_atomic_test_and_assign( \
+					lcore->word, lcore->bit, new_value, \
+					rte_memory_order_relaxed);	\
+			else						\
+				old_value = new_value ?			\
+					rte_bit_atomic_test_and_set(	\
+						lcore->word, lcore->bit, \
+						rte_memory_order_relaxed) : \
+					rte_bit_atomic_test_and_clear(	\
+						lcore->word, lcore->bit, \
+						rte_memory_order_relaxed); \
+			if (old_value != new_value)			\
+				lcore->flips++;				\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_test_and_modify ## size(void)		\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		unsigned int bit = rte_rand_max(size);			\
+		struct parallel_test_and_set_lcore ## size lmain = {	\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+		struct parallel_test_and_set_lcore ## size lworker = {	\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_test_and_modify ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_test_and_modify ## size(&lmain);		\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		uint64_t total_flips = lmain.flips + lworker.flips;	\
+		bool expected_value = total_flips % 2;			\
+									\
+		TEST_ASSERT(expected_value == rte_bit_test(&word, bit), \
+			    "After %"PRId64" flips, the bit value "	\
+			    "should be %d", total_flips, expected_value); \
+									\
+		uint64_t expected_word = 0;				\
+		rte_bit_assign(&expected_word, bit, expected_value);	\
+									\
+		TEST_ASSERT(expected_word == word, "Untouched bits have " \
+			    "changed value");				\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(32)
+GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(64)
+
+#define GEN_TEST_BIT_PARALLEL_FLIP(size)				\
+									\
+	struct parallel_flip_lcore ## size				\
+	{								\
+		uint ## size ##_t *word;				\
+		unsigned int bit;					\
+		uint64_t flips;						\
+	};								\
+									\
+	static int							\
+	run_parallel_flip ## size(void *arg)				\
+	{								\
+		struct parallel_flip_lcore ## size *lcore = arg; \
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		do {							\
+			rte_bit_atomic_flip(lcore->word, lcore->bit,	\
+					    rte_memory_order_relaxed);	\
+			lcore->flips++;					\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_flip ## size(void)			\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		unsigned int bit = rte_rand_max(size);			\
+		struct parallel_flip_lcore ## size lmain = {		\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+		struct parallel_flip_lcore ## size lworker = {		\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_flip ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_flip ## size(&lmain);			\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		uint64_t total_flips = lmain.flips + lworker.flips;	\
+		bool expected_value = total_flips % 2;			\
+									\
+		TEST_ASSERT(expected_value == rte_bit_test(&word, bit), \
+			    "After %"PRId64" flips, the bit value "	\
+			    "should be %d", total_flips, expected_value); \
+									\
+		uint64_t expected_word = 0;				\
+		rte_bit_assign(&expected_word, bit, expected_value);	\
+									\
+		TEST_ASSERT(expected_word == word, "Untouched bits have " \
+			    "changed value");				\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_FLIP(32)
+GEN_TEST_BIT_PARALLEL_FLIP(64)
+
 static uint32_t val32;
 static uint64_t val64;
 
@@ -177,6 +478,16 @@ static struct unit_test_suite test_suite = {
 	.unit_test_cases = {
 		TEST_CASE(test_bit_access32),
 		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_access32),
+		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_atomic_access32),
+		TEST_CASE(test_bit_atomic_access64),
+		TEST_CASE(test_bit_atomic_parallel_assign32),
+		TEST_CASE(test_bit_atomic_parallel_assign64),
+		TEST_CASE(test_bit_atomic_parallel_test_and_modify32),
+		TEST_CASE(test_bit_atomic_parallel_test_and_modify64),
+		TEST_CASE(test_bit_atomic_parallel_flip32),
+		TEST_CASE(test_bit_atomic_parallel_flip64),
 		TEST_CASE(test_bit_relaxed_set),
 		TEST_CASE(test_bit_relaxed_clear),
 		TEST_CASE(test_bit_relaxed_test_set_clear),
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v7 6/6] eal: extend bitops to handle volatile pointers
  2024-09-17  9:36       ` [PATCH v7 0/6] Improve EAL bit operations API Mattias Rönnblom
                           ` (4 preceding siblings ...)
  2024-09-17  9:36         ` [PATCH v7 5/6] eal: add unit tests for atomic bit access functions Mattias Rönnblom
@ 2024-09-17  9:36         ` Mattias Rönnblom
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17  9:36 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Have rte_bit_[test|set|clear|assign|flip]() and rte_bit_atomic_*()
handle volatile-marked pointers.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

PATCH v3:
 * Updated to reflect removed 'fun' parameter in __RTE_GEN_BIT_*()
   (Jack Bond-Preston).

PATCH v2:
 * Actually run the test_bit_atomic_v_access*() test functions.
---
 app/test/test_bitops.c       |  32 +++-
 lib/eal/include/rte_bitops.h | 301 +++++++++++++++++++++++------------
 2 files changed, 222 insertions(+), 111 deletions(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index b80216a0a1..10e87f6776 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -14,13 +14,13 @@
 #include "test.h"
 
 #define GEN_TEST_BIT_ACCESS(test_name, set_fun, clear_fun, assign_fun,	\
-			    flip_fun, test_fun, size)			\
+			    flip_fun, test_fun, size, mod)		\
 	static int							\
 	test_name(void)							\
 	{								\
 		uint ## size ## _t reference = (uint ## size ## _t)rte_rand(); \
 		unsigned int bit_nr;					\
-		uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
+		mod uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
 									\
 		for (bit_nr = 0; bit_nr < size; bit_nr++) {		\
 			bool reference_bit = (reference >> bit_nr) & 1;	\
@@ -41,7 +41,7 @@
 				    "Bit %d had unflipped value", bit_nr); \
 			flip_fun(&word, bit_nr);			\
 									\
-			const uint ## size ## _t *const_ptr = &word;	\
+			const mod uint ## size ## _t *const_ptr = &word; \
 			TEST_ASSERT(test_fun(const_ptr, bit_nr) ==	\
 				    reference_bit,			\
 				    "Bit %d had unexpected value", bit_nr); \
@@ -59,10 +59,16 @@
 	}
 
 GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
-		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32)
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32,)
 
 GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
-		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64,)
+
+GEN_TEST_BIT_ACCESS(test_bit_v_access32, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32, volatile)
+
+GEN_TEST_BIT_ACCESS(test_bit_v_access64, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64, volatile)
 
 #define bit_atomic_set(addr, nr)				\
 	rte_bit_atomic_set(addr, nr, rte_memory_order_relaxed)
@@ -81,11 +87,19 @@ GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
 
 GEN_TEST_BIT_ACCESS(test_bit_atomic_access32, bit_atomic_set,
 		    bit_atomic_clear, bit_atomic_assign,
-		    bit_atomic_flip, bit_atomic_test, 32)
+		    bit_atomic_flip, bit_atomic_test, 32,)
 
 GEN_TEST_BIT_ACCESS(test_bit_atomic_access64, bit_atomic_set,
 		    bit_atomic_clear, bit_atomic_assign,
-		    bit_atomic_flip, bit_atomic_test, 64)
+		    bit_atomic_flip, bit_atomic_test, 64,)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_v_access32, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 32, volatile)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_v_access64, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 64, volatile)
 
 #define PARALLEL_TEST_RUNTIME 0.25
 
@@ -480,8 +494,12 @@ static struct unit_test_suite test_suite = {
 		TEST_CASE(test_bit_access64),
 		TEST_CASE(test_bit_access32),
 		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_v_access32),
+		TEST_CASE(test_bit_v_access64),
 		TEST_CASE(test_bit_atomic_access32),
 		TEST_CASE(test_bit_atomic_access64),
+		TEST_CASE(test_bit_atomic_v_access32),
+		TEST_CASE(test_bit_atomic_v_access64),
 		TEST_CASE(test_bit_atomic_parallel_assign32),
 		TEST_CASE(test_bit_atomic_parallel_assign64),
 		TEST_CASE(test_bit_atomic_parallel_test_and_modify32),
diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 3ad6795fd1..d7a07c4099 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -127,12 +127,16 @@ extern "C" {
  * @param nr
  *   The index of the bit.
  */
-#define rte_bit_test(addr, nr)					\
-	_Generic((addr),					\
-		uint32_t *: __rte_bit_test32,			\
-		const uint32_t *: __rte_bit_test32,		\
-		uint64_t *: __rte_bit_test64,			\
-		const uint64_t *: __rte_bit_test64)(addr, nr)
+#define rte_bit_test(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_test32,				\
+		 const uint32_t *: __rte_bit_test32,			\
+		 volatile uint32_t *: __rte_bit_v_test32,		\
+		 const volatile uint32_t *: __rte_bit_v_test32,		\
+		 uint64_t *: __rte_bit_test64,				\
+		 const uint64_t *: __rte_bit_test64,			\
+		 volatile uint64_t *: __rte_bit_v_test64,		\
+		 const volatile uint64_t *: __rte_bit_v_test64)(addr, nr)
 
 /**
  * @warning
@@ -152,10 +156,12 @@ extern "C" {
  * @param nr
  *   The index of the bit.
  */
-#define rte_bit_set(addr, nr)				\
-	_Generic((addr),				\
-		 uint32_t *: __rte_bit_set32,		\
-		 uint64_t *: __rte_bit_set64)(addr, nr)
+#define rte_bit_set(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_set32,				\
+		 volatile uint32_t *: __rte_bit_v_set32,		\
+		 uint64_t *: __rte_bit_set64,				\
+		 volatile uint64_t *: __rte_bit_v_set64)(addr, nr)
 
 /**
  * @warning
@@ -175,10 +181,12 @@ extern "C" {
  * @param nr
  *   The index of the bit.
  */
-#define rte_bit_clear(addr, nr)					\
-	_Generic((addr),					\
-		 uint32_t *: __rte_bit_clear32,			\
-		 uint64_t *: __rte_bit_clear64)(addr, nr)
+#define rte_bit_clear(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_clear32,				\
+		 volatile uint32_t *: __rte_bit_v_clear32,		\
+		 uint64_t *: __rte_bit_clear64,				\
+		 volatile uint64_t *: __rte_bit_v_clear64)(addr, nr)
 
 /**
  * @warning
@@ -202,7 +210,9 @@ extern "C" {
 #define rte_bit_assign(addr, nr, value)					\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_assign32,			\
-		 uint64_t *: __rte_bit_assign64)(addr, nr, value)
+		 volatile uint32_t *: __rte_bit_v_assign32,		\
+		 uint64_t *: __rte_bit_assign64,			\
+		 volatile uint64_t *: __rte_bit_v_assign64)(addr, nr, value)
 
 /**
  * @warning
@@ -225,7 +235,9 @@ extern "C" {
 #define rte_bit_flip(addr, nr)						\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_flip32,				\
-		 uint64_t *: __rte_bit_flip64)(addr, nr)
+		 volatile uint32_t *: __rte_bit_v_flip32,		\
+		 uint64_t *: __rte_bit_flip64,				\
+		 volatile uint64_t *: __rte_bit_v_flip64)(addr, nr)
 
 /**
  * @warning
@@ -250,9 +262,13 @@ extern "C" {
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test32,			\
 		 const uint32_t *: __rte_bit_atomic_test32,		\
+		 volatile uint32_t *: __rte_bit_atomic_v_test32,	\
+		 const volatile uint32_t *: __rte_bit_atomic_v_test32,	\
 		 uint64_t *: __rte_bit_atomic_test64,			\
-		 const uint64_t *: __rte_bit_atomic_test64)(addr, nr,	\
-							    memory_order)
+		 const uint64_t *: __rte_bit_atomic_test64,		\
+		 volatile uint64_t *: __rte_bit_atomic_v_test64,	\
+		 const volatile uint64_t *: __rte_bit_atomic_v_test64) \
+						    (addr, nr, memory_order)
 
 /**
  * @warning
@@ -274,7 +290,10 @@ extern "C" {
 #define rte_bit_atomic_set(addr, nr, memory_order)			\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_set32,			\
-		 uint64_t *: __rte_bit_atomic_set64)(addr, nr, memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_set32,		\
+		 uint64_t *: __rte_bit_atomic_set64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_set64)(addr, nr, \
+								memory_order)
 
 /**
  * @warning
@@ -296,7 +315,10 @@ extern "C" {
 #define rte_bit_atomic_clear(addr, nr, memory_order)			\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_clear32,			\
-		 uint64_t *: __rte_bit_atomic_clear64)(addr, nr, memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_clear32,	\
+		 uint64_t *: __rte_bit_atomic_clear64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_clear64)(addr, nr, \
+								  memory_order)
 
 /**
  * @warning
@@ -320,8 +342,11 @@ extern "C" {
 #define rte_bit_atomic_assign(addr, nr, value, memory_order)		\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_assign32,			\
-		 uint64_t *: __rte_bit_atomic_assign64)(addr, nr, value, \
-							memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_assign32,	\
+		 uint64_t *: __rte_bit_atomic_assign64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_assign64)(addr, nr, \
+								   value, \
+								   memory_order)
 
 /**
  * @warning
@@ -344,7 +369,10 @@ extern "C" {
 #define rte_bit_atomic_flip(addr, nr, memory_order)			\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_flip32,			\
-		 uint64_t *: __rte_bit_atomic_flip64)(addr, nr, memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_flip32,	\
+		 uint64_t *: __rte_bit_atomic_flip64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_flip64)(addr, nr, \
+								 memory_order)
 
 /**
  * @warning
@@ -368,8 +396,10 @@ extern "C" {
 #define rte_bit_atomic_test_and_set(addr, nr, memory_order)		\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test_and_set32,		\
-		 uint64_t *: __rte_bit_atomic_test_and_set64)(addr, nr,	\
-							      memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_test_and_set32, \
+		 uint64_t *: __rte_bit_atomic_test_and_set64,		\
+		 volatile uint64_t *: __rte_bit_atomic_v_test_and_set64) \
+						    (addr, nr, memory_order)
 
 /**
  * @warning
@@ -393,8 +423,10 @@ extern "C" {
 #define rte_bit_atomic_test_and_clear(addr, nr, memory_order)		\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test_and_clear32,		\
-		 uint64_t *: __rte_bit_atomic_test_and_clear64)(addr, nr, \
-								memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_test_and_clear32, \
+		 uint64_t *: __rte_bit_atomic_test_and_clear64,		\
+		 volatile uint64_t *: __rte_bit_atomic_v_test_and_clear64) \
+						       (addr, nr, memory_order)
 
 /**
  * @warning
@@ -421,9 +453,10 @@ extern "C" {
 #define rte_bit_atomic_test_and_assign(addr, nr, value, memory_order)	\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test_and_assign32,	\
-		 uint64_t *: __rte_bit_atomic_test_and_assign64)(addr, nr, \
-								 value, \
-								 memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_test_and_assign32, \
+		 uint64_t *: __rte_bit_atomic_test_and_assign64,	\
+		 volatile uint64_t *: __rte_bit_atomic_v_test_and_assign64) \
+						(addr, nr, value, memory_order)
 
 #define __RTE_GEN_BIT_TEST(variant, qualifier, size)			\
 	__rte_experimental						\
@@ -493,7 +526,8 @@ extern "C" {
 	__RTE_GEN_BIT_FLIP(v, qualifier, size)
 
 #define __RTE_GEN_BIT_OPS_SIZE(size) \
-	__RTE_GEN_BIT_OPS(,, size)
+	__RTE_GEN_BIT_OPS(,, size) \
+	__RTE_GEN_BIT_OPS(v_, volatile, size)
 
 __RTE_GEN_BIT_OPS_SIZE(32)
 __RTE_GEN_BIT_OPS_SIZE(64)
@@ -633,7 +667,8 @@ __RTE_GEN_BIT_OPS_SIZE(64)
 	__RTE_GEN_BIT_ATOMIC_FLIP(variant, qualifier, size)
 
 #define __RTE_GEN_BIT_ATOMIC_OPS_SIZE(size) \
-	__RTE_GEN_BIT_ATOMIC_OPS(,, size)
+	__RTE_GEN_BIT_ATOMIC_OPS(,, size) \
+	__RTE_GEN_BIT_ATOMIC_OPS(v_, volatile, size)
 
 __RTE_GEN_BIT_ATOMIC_OPS_SIZE(32)
 __RTE_GEN_BIT_ATOMIC_OPS_SIZE(64)
@@ -1342,120 +1377,178 @@ rte_log2_u64(uint64_t v)
 #undef rte_bit_atomic_test_and_clear
 #undef rte_bit_atomic_test_and_assign
 
-#define __RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, size, arg1_type, arg1_name) \
+#define __RTE_BIT_OVERLOAD_V_2(family, v, fun, c, size, arg1_type, arg1_name) \
 	static inline void						\
-	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
-			arg1_type arg1_name)				\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name)			\
 	{								\
-		__rte_bit_ ## fun ## size(addr, arg1_name);		\
+		__rte_bit_ ## family ## v ## fun ## size(addr, arg1_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_2(fun, qualifier, arg1_type, arg1_name)	\
-	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 32, arg1_type, arg1_name) \
-	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 64, arg1_type, arg1_name)
+#define __RTE_BIT_OVERLOAD_SZ_2(family, fun, c, size, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_V_2(family,, fun, c, size, arg1_type,	\
+			       arg1_name)				\
+	__RTE_BIT_OVERLOAD_V_2(family, v_, fun, c volatile, size, \
+			       arg1_type, arg1_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, size, ret_type, arg1_type, \
-				 arg1_name)				\
+#define __RTE_BIT_OVERLOAD_2(family, fun, c, arg1_type, arg1_name)	\
+	__RTE_BIT_OVERLOAD_SZ_2(family, fun, c, 32, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2(family, fun, c, 64, arg1_type, arg1_name)
+
+#define __RTE_BIT_OVERLOAD_V_2R(family, v, fun, c, size, ret_type, arg1_type, \
+				arg1_name)				\
 	static inline ret_type						\
-	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
 			arg1_type arg1_name)				\
 	{								\
-		return __rte_bit_ ## fun ## size(addr, arg1_name);	\
+		return __rte_bit_ ## family ## v ## fun ## size(addr,	\
+								arg1_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_2R(fun, qualifier, ret_type, arg1_type, arg1_name) \
-	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 32, ret_type, arg1_type, \
+#define __RTE_BIT_OVERLOAD_SZ_2R(family, fun, c, size, ret_type, arg1_type, \
+				 arg1_name)				\
+	__RTE_BIT_OVERLOAD_V_2R(family,, fun, c, size, ret_type, arg1_type, \
+				arg1_name)				\
+	__RTE_BIT_OVERLOAD_V_2R(family, v_, fun, c volatile,		\
+				size, ret_type, arg1_type, arg1_name)
+
+#define __RTE_BIT_OVERLOAD_2R(family, fun, c, ret_type, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2R(family, fun, c, 32, ret_type, arg1_type, \
 				 arg1_name)				\
-	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 64, ret_type, arg1_type, \
+	__RTE_BIT_OVERLOAD_SZ_2R(family, fun, c, 64, ret_type, arg1_type, \
 				 arg1_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, size, arg1_type, arg1_name, \
-				arg2_type, arg2_name)			\
+#define __RTE_BIT_OVERLOAD_V_3(family, v, fun, c, size, arg1_type, arg1_name, \
+			       arg2_type, arg2_name)			\
 	static inline void						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name)				\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name) \
 	{								\
-		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name);	\
+		__rte_bit_ ## family ## v ## fun ## size(addr, arg1_name, \
+							 arg2_name);	\
 	}
 
-#define __RTE_BIT_OVERLOAD_3(fun, qualifier, arg1_type, arg1_name, arg2_type, \
+#define __RTE_BIT_OVERLOAD_SZ_3(family, fun, c, size, arg1_type, arg1_name, \
+				arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_V_3(family,, fun, c, size, arg1_type, arg1_name, \
+			       arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_V_3(family, v_, fun, c volatile, size, arg1_type, \
+			       arg1_name, arg2_type, arg2_name)
+
+#define __RTE_BIT_OVERLOAD_3(family, fun, c, arg1_type, arg1_name, arg2_type, \
 			     arg2_name)					\
-	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 32, arg1_type, arg1_name, \
+	__RTE_BIT_OVERLOAD_SZ_3(family, fun, c, 32, arg1_type, arg1_name, \
 				arg2_type, arg2_name)			\
-	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 64, arg1_type, arg1_name, \
+	__RTE_BIT_OVERLOAD_SZ_3(family, fun, c, 64, arg1_type, arg1_name, \
 				arg2_type, arg2_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, size, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name)	\
+#define __RTE_BIT_OVERLOAD_V_3R(family, v, fun, c, size, ret_type, arg1_type, \
+				arg1_name, arg2_type, arg2_name)	\
 	static inline ret_type						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name)				\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name) \
 	{								\
-		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name); \
+		return __rte_bit_ ## family ## v ## fun ## size(addr,	\
+								arg1_name, \
+								arg2_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_3R(fun, qualifier, ret_type, arg1_type, arg1_name, \
-			      arg2_type, arg2_name)			\
-	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 32, ret_type, arg1_type, \
+#define __RTE_BIT_OVERLOAD_SZ_3R(family, fun, c, size, ret_type, arg1_type, \
 				 arg1_name, arg2_type, arg2_name)	\
-	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 64, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name)
+	__RTE_BIT_OVERLOAD_V_3R(family,, fun, c, size, ret_type, \
+				arg1_type, arg1_name, arg2_type, arg2_name) \
+	__RTE_BIT_OVERLOAD_V_3R(family, v_, fun, c volatile, size, \
+				ret_type, arg1_type, arg1_name, arg2_type, \
+				arg2_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, size, arg1_type, arg1_name, \
-				arg2_type, arg2_name, arg3_type, arg3_name) \
+#define __RTE_BIT_OVERLOAD_3R(family, fun, c, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_SZ_3R(family, fun, c, 32, ret_type,		\
+				 arg1_type, arg1_name, arg2_type, arg2_name) \
+	__RTE_BIT_OVERLOAD_SZ_3R(family, fun, c, 64, ret_type, \
+				 arg1_type, arg1_name, arg2_type, arg2_name)
+
+#define __RTE_BIT_OVERLOAD_V_4(family, v, fun, c, size, arg1_type, arg1_name, \
+			       arg2_type, arg2_name, arg3_type,	arg3_name) \
 	static inline void						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name, arg3_type arg3_name)	\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name, \
+				  arg3_type arg3_name)			\
 	{								\
-		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name,	\
-					  arg3_name);		      \
+		__rte_bit_ ## family ## v ## fun ## size(addr, arg1_name, \
+							 arg2_name,	\
+							 arg3_name);	\
 	}
 
-#define __RTE_BIT_OVERLOAD_4(fun, qualifier, arg1_type, arg1_name, arg2_type, \
-			     arg2_name, arg3_type, arg3_name)		\
-	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 32, arg1_type, arg1_name, \
+#define __RTE_BIT_OVERLOAD_SZ_4(family, fun, c, size, arg1_type, arg1_name, \
 				arg2_type, arg2_name, arg3_type, arg3_name) \
-	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 64, arg1_type, arg1_name, \
-				arg2_type, arg2_name, arg3_type, arg3_name)
-
-#define __RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, size, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name, arg3_type, \
-				 arg3_name)				\
+	__RTE_BIT_OVERLOAD_V_4(family,, fun, c, size, arg1_type,	\
+			       arg1_name, arg2_type, arg2_name, arg3_type, \
+			       arg3_name)				\
+	__RTE_BIT_OVERLOAD_V_4(family, v_, fun, c volatile, size,	\
+			       arg1_type, arg1_name, arg2_type, arg2_name, \
+			       arg3_type, arg3_name)
+
+#define __RTE_BIT_OVERLOAD_4(family, fun, c, arg1_type, arg1_name, arg2_type, \
+			     arg2_name, arg3_type, arg3_name)		\
+	__RTE_BIT_OVERLOAD_SZ_4(family, fun, c, 32, arg1_type,		\
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)				\
+	__RTE_BIT_OVERLOAD_SZ_4(family, fun, c, 64, arg1_type,		\
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)
+
+#define __RTE_BIT_OVERLOAD_V_4R(family, v, fun, c, size, ret_type, arg1_type, \
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)				\
 	static inline ret_type						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name, arg3_type arg3_name)	\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name, \
+				  arg3_type arg3_name)			\
 	{								\
-		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name, \
-						 arg3_name);		\
+		return __rte_bit_ ## family ## v ## fun ## size(addr,	\
+								arg1_name, \
+								arg2_name, \
+								arg3_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_4R(fun, qualifier, ret_type, arg1_type, arg1_name, \
-			      arg2_type, arg2_name, arg3_type, arg3_name) \
-	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 32, ret_type, arg1_type, \
+#define __RTE_BIT_OVERLOAD_SZ_4R(family, fun, c, size, ret_type, arg1_type, \
 				 arg1_name, arg2_type, arg2_name, arg3_type, \
 				 arg3_name)				\
-	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 64, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name, arg3_type, \
-				 arg3_name)
-
-__RTE_BIT_OVERLOAD_2R(test, const, bool, unsigned int, nr)
-__RTE_BIT_OVERLOAD_2(set,, unsigned int, nr)
-__RTE_BIT_OVERLOAD_2(clear,, unsigned int, nr)
-__RTE_BIT_OVERLOAD_3(assign,, unsigned int, nr, bool, value)
-__RTE_BIT_OVERLOAD_2(flip,, unsigned int, nr)
-
-__RTE_BIT_OVERLOAD_3R(atomic_test, const, bool, unsigned int, nr,
+	__RTE_BIT_OVERLOAD_V_4R(family,, fun, c, size, ret_type, arg1_type, \
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)				\
+	__RTE_BIT_OVERLOAD_V_4R(family, v_, fun, c volatile, size,	\
+				ret_type, arg1_type, arg1_name, arg2_type, \
+				arg2_name, arg3_type, arg3_name)
+
+#define __RTE_BIT_OVERLOAD_4R(family, fun, c, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name, arg3_type, arg3_name) \
+	__RTE_BIT_OVERLOAD_SZ_4R(family, fun, c, 32, ret_type,		\
+				 arg1_type, arg1_name, arg2_type, arg2_name, \
+				 arg3_type, arg3_name)			\
+	__RTE_BIT_OVERLOAD_SZ_4R(family, fun, c, 64, ret_type,		\
+				 arg1_type, arg1_name, arg2_type, arg2_name, \
+				 arg3_type, arg3_name)
+
+__RTE_BIT_OVERLOAD_2R(, test, const, bool, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(, set,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(, clear,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_3(, assign,, unsigned int, nr, bool, value)
+__RTE_BIT_OVERLOAD_2(, flip,, unsigned int, nr)
+
+__RTE_BIT_OVERLOAD_3R(atomic_, test, const, bool, unsigned int, nr,
 		      int, memory_order)
-__RTE_BIT_OVERLOAD_3(atomic_set,, unsigned int, nr, int, memory_order)
-__RTE_BIT_OVERLOAD_3(atomic_clear,, unsigned int, nr, int, memory_order)
-__RTE_BIT_OVERLOAD_4(atomic_assign,, unsigned int, nr, bool, value,
+__RTE_BIT_OVERLOAD_3(atomic_, set,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_, clear,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_4(atomic_, assign,, unsigned int, nr, bool, value,
 		     int, memory_order)
-__RTE_BIT_OVERLOAD_3(atomic_flip,, unsigned int, nr, int, memory_order)
-__RTE_BIT_OVERLOAD_3R(atomic_test_and_set,, bool, unsigned int, nr,
+__RTE_BIT_OVERLOAD_3(atomic_, flip,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3R(atomic_, test_and_set,, bool, unsigned int, nr,
 		      int, memory_order)
-__RTE_BIT_OVERLOAD_3R(atomic_test_and_clear,, bool, unsigned int, nr,
+__RTE_BIT_OVERLOAD_3R(atomic_, test_and_clear,, bool, unsigned int, nr,
 		      int, memory_order)
-__RTE_BIT_OVERLOAD_4R(atomic_test_and_assign,, bool, unsigned int, nr,
+__RTE_BIT_OVERLOAD_4R(atomic_, test_and_assign,, bool, unsigned int, nr,
 		      bool, value, int, memory_order)
 
 #endif
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v8 0/6] Improve EAL bit operations API
  2024-09-17  9:36         ` [PATCH v7 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
@ 2024-09-17 10:48           ` Mattias Rönnblom
  2024-09-17 10:48             ` [PATCH v8 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
                               ` (5 more replies)
  0 siblings, 6 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17 10:48 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

This patch set represent an attempt to improve and extend the RTE
bitops API, in particular for functions that operate on individual
bits.

All new functionality is exposed to the user as generic selection
macros, delegating the actual work to private (__-marked) static
inline functions. Public functions (e.g., rte_bit_set32()) would just
be bloating the API. Such generic selection macros will here be
referred to as "functions", although technically they are not.

The legacy <rte_bitops.h> rte_bit_relaxed_*() functions is replaced
with two new families:

rte_bit_[test|set|clear|assign|flip]() which provides no memory
ordering or atomicity guarantees, but does provide the best
performance. The performance degradation resulting from the use of
volatile (e.g., forcing loads and stores to actually occur and in the
number specified) and atomic (e.g., LOCK-prefixed instructions on x86)
may be significant. rte_bit_[test|set|clear|assign|flip]() may be
used with volatile word pointers, in which case they guarantee
that the program-level accesses actually occur.

rte_bit_atomic_*() which provides atomic bit-level operations,
including the possibility to specifying memory ordering constraints
(or the lack thereof).

The atomic functions take non-_Atomic pointers, to be flexible, just
like the GCC builtins and default <rte_stdatomic.h>. The issue with
_Atomic APIs is that it may well be the case that the user wants to
perform both non-atomic and atomic operations on the same word.

Having _Atomic-marked addresses would complicate supporting atomic
bit-level operations in the bitset API (proposed in a different RFC
patchset), and potentially other APIs depending on RTE bitops for
atomic bit-level ops). Either one needs two bitset variants, one
_Atomic bitset and one non-atomic one, or the bitset code needs to
cast the non-_Atomic pointer to an _Atomic one. Having a separate
_Atomic bitset would be bloat and also prevent the user from both, in
some situations, doing atomic operations against a bit set, while in
other situations (e.g., at times when MT safety is not a concern)
operating on the same objects in a non-atomic manner.

Unlike rte_bit_relaxed_*(), individual bits are represented by bool,
not uint32_t or uint64_t. The author found the use of such large types
confusing, and also failed to see any performance benefits.

A set of functions rte_bit_*_assign() are added, to assign a
particular boolean value to a particular bit.

All new functions have properly documented semantics.

All new functions operate on both 32 and 64-bit words, with type
checking.

_Generic allow the user code to be a little more impact. Have a
type-generic atomic test/set/clear/assign bit API also seems
consistent with the "core" (word-size) atomics API, which is generic
(both GCC builtins and <rte_stdatomic.h> are).

The _Generic versions avoids having explicit unsigned long versions of
all functions. If you have an unsigned long, it's safe to use the
generic version (e.g., rte_set_bit()) and _Generic will pick the right
function, provided long is either 32 or 64 bit on your platform (which
it is on all DPDK-supported ABIs).

The generic rte_bit_set() is a macro, and not a function, but
nevertheless has been given a lower-case name. That's how C11 does it
(for atomics, and other _Generic), and <rte_stdatomic.h>. Its address
can't be taken, but it does not evaluate its parameters more than
once.

C++ doesn't support generic selection. In C++ translation units the
_Generic macros are replaced with overloaded functions, implemented by
means of a huge, complicated C macro mess.

Mattias Rönnblom (6):
  dpdk: do not force C linkage on include file dependencies
  eal: extend bit manipulation functionality
  eal: add unit tests for bit operations
  eal: add atomic bit operations
  eal: add unit tests for atomic bit access functions
  eal: extend bitops to handle volatile pointers

 app/test/packet_burst_generator.h             |   8 +-
 app/test/test_bitops.c                        | 416 +++++++++-
 app/test/virtual_pmd.h                        |   4 +-
 doc/guides/rel_notes/release_24_11.rst        |  17 +
 drivers/bus/auxiliary/bus_auxiliary_driver.h  |   8 +-
 drivers/bus/cdx/bus_cdx_driver.h              |   8 +-
 drivers/bus/dpaa/include/fsl_qman.h           |   8 +-
 drivers/bus/fslmc/bus_fslmc_driver.h          |   8 +-
 drivers/bus/pci/bus_pci_driver.h              |   8 +-
 drivers/bus/pci/rte_bus_pci.h                 |   8 +-
 drivers/bus/platform/bus_platform_driver.h    |   8 +-
 drivers/bus/vdev/bus_vdev_driver.h            |   8 +-
 drivers/bus/vmbus/bus_vmbus_driver.h          |   8 +-
 drivers/bus/vmbus/rte_bus_vmbus.h             |   8 +-
 drivers/dma/cnxk/cnxk_dma_event_dp.h          |   8 +-
 drivers/dma/ioat/ioat_hw_defs.h               |   4 +-
 drivers/event/dlb2/rte_pmd_dlb2.h             |   8 +-
 drivers/mempool/dpaa2/rte_dpaa2_mempool.h     |   6 +-
 drivers/net/avp/rte_avp_fifo.h                |   8 +-
 drivers/net/bonding/rte_eth_bond.h            |   4 +-
 drivers/net/i40e/rte_pmd_i40e.h               |   8 +-
 drivers/net/mlx5/mlx5_trace.h                 |   8 +-
 drivers/net/ring/rte_eth_ring.h               |   4 +-
 drivers/net/vhost/rte_eth_vhost.h             |   8 +-
 drivers/raw/ifpga/afu_pmd_core.h              |   8 +-
 drivers/raw/ifpga/afu_pmd_he_hssi.h           |   6 +-
 drivers/raw/ifpga/afu_pmd_he_lpbk.h           |   6 +-
 drivers/raw/ifpga/afu_pmd_he_mem.h            |   6 +-
 drivers/raw/ifpga/afu_pmd_n3000.h             |   6 +-
 drivers/raw/ifpga/rte_pmd_afu.h               |   4 +-
 drivers/raw/ifpga/rte_pmd_ifpga.h             |   4 +-
 examples/ethtool/lib/rte_ethtool.h            |   8 +-
 examples/qos_sched/main.h                     |   4 +-
 examples/vm_power_manager/channel_manager.h   |   8 +-
 lib/acl/rte_acl_osdep.h                       |   8 +-
 lib/bbdev/rte_bbdev.h                         |   8 +-
 lib/bbdev/rte_bbdev_op.h                      |   8 +-
 lib/bbdev/rte_bbdev_pmd.h                     |   8 +-
 lib/bpf/bpf_def.h                             |   8 +-
 lib/compressdev/rte_comp.h                    |   4 +-
 lib/compressdev/rte_compressdev.h             |   6 +-
 lib/compressdev/rte_compressdev_internal.h    |   8 +-
 lib/compressdev/rte_compressdev_pmd.h         |   8 +-
 lib/cryptodev/cryptodev_pmd.h                 |   8 +-
 lib/cryptodev/cryptodev_trace.h               |   8 +-
 lib/cryptodev/rte_crypto.h                    |   8 +-
 lib/cryptodev/rte_crypto_asym.h               |   8 +-
 lib/cryptodev/rte_crypto_sym.h                |   8 +-
 lib/cryptodev/rte_cryptodev.h                 |   8 +-
 lib/cryptodev/rte_cryptodev_trace_fp.h        |   4 +-
 lib/dispatcher/rte_dispatcher.h               |   8 +-
 lib/dmadev/rte_dmadev.h                       |   8 +
 lib/eal/arm/include/rte_atomic_32.h           |   4 +-
 lib/eal/arm/include/rte_atomic_64.h           |   8 +-
 lib/eal/arm/include/rte_byteorder.h           |   8 +-
 lib/eal/arm/include/rte_cpuflags_32.h         |   8 +-
 lib/eal/arm/include/rte_cpuflags_64.h         |   8 +-
 lib/eal/arm/include/rte_cycles_32.h           |   4 +-
 lib/eal/arm/include/rte_cycles_64.h           |   4 +-
 lib/eal/arm/include/rte_io.h                  |   8 +-
 lib/eal/arm/include/rte_io_64.h               |   8 +-
 lib/eal/arm/include/rte_memcpy_32.h           |   8 +-
 lib/eal/arm/include/rte_memcpy_64.h           |  12 +-
 lib/eal/arm/include/rte_pause.h               |   8 +-
 lib/eal/arm/include/rte_pause_32.h            |   6 +-
 lib/eal/arm/include/rte_pause_64.h            |   8 +-
 lib/eal/arm/include/rte_power_intrinsics.h    |   8 +-
 lib/eal/arm/include/rte_prefetch_32.h         |   8 +-
 lib/eal/arm/include/rte_prefetch_64.h         |   8 +-
 lib/eal/arm/include/rte_rwlock.h              |   4 +-
 lib/eal/arm/include/rte_spinlock.h            |   6 +-
 lib/eal/freebsd/include/rte_os.h              |   8 +-
 lib/eal/include/bus_driver.h                  |   8 +-
 lib/eal/include/dev_driver.h                  |   6 +-
 lib/eal/include/eal_trace_internal.h          |   8 +-
 lib/eal/include/generic/rte_atomic.h          |   8 +
 lib/eal/include/generic/rte_byteorder.h       |   8 +
 lib/eal/include/generic/rte_cpuflags.h        |   8 +
 lib/eal/include/generic/rte_cycles.h          |   8 +
 lib/eal/include/generic/rte_io.h              |   8 +
 lib/eal/include/generic/rte_memcpy.h          |   8 +
 lib/eal/include/generic/rte_pause.h           |   8 +
 .../include/generic/rte_power_intrinsics.h    |   8 +
 lib/eal/include/generic/rte_prefetch.h        |   8 +
 lib/eal/include/generic/rte_rwlock.h          |   8 +-
 lib/eal/include/generic/rte_spinlock.h        |   8 +
 lib/eal/include/generic/rte_vect.h            |   8 +
 lib/eal/include/rte_alarm.h                   |   4 +-
 lib/eal/include/rte_bitmap.h                  |   8 +-
 lib/eal/include/rte_bitops.h                  | 768 +++++++++++++++++-
 lib/eal/include/rte_bus.h                     |   8 +-
 lib/eal/include/rte_class.h                   |   4 +-
 lib/eal/include/rte_common.h                  |   8 +-
 lib/eal/include/rte_dev.h                     |   8 +-
 lib/eal/include/rte_devargs.h                 |   8 +-
 lib/eal/include/rte_eal_trace.h               |   4 +-
 lib/eal/include/rte_errno.h                   |   4 +-
 lib/eal/include/rte_fbarray.h                 |   8 +-
 lib/eal/include/rte_keepalive.h               |   6 +-
 lib/eal/include/rte_mcslock.h                 |   8 +-
 lib/eal/include/rte_memory.h                  |   8 +-
 lib/eal/include/rte_pci_dev_features.h        |   4 +-
 lib/eal/include/rte_pflock.h                  |   8 +-
 lib/eal/include/rte_random.h                  |   4 +-
 lib/eal/include/rte_seqcount.h                |   8 +-
 lib/eal/include/rte_seqlock.h                 |   8 +-
 lib/eal/include/rte_service.h                 |   8 +-
 lib/eal/include/rte_service_component.h       |   4 +-
 lib/eal/include/rte_stdatomic.h               |   5 +-
 lib/eal/include/rte_string_fns.h              |  17 +-
 lib/eal/include/rte_tailq.h                   |   6 +-
 lib/eal/include/rte_ticketlock.h              |   8 +-
 lib/eal/include/rte_time.h                    |   6 +-
 lib/eal/include/rte_trace.h                   |   8 +-
 lib/eal/include/rte_trace_point.h             |   8 +-
 lib/eal/include/rte_trace_point_register.h    |   8 +-
 lib/eal/include/rte_uuid.h                    |   8 +-
 lib/eal/include/rte_version.h                 |   6 +-
 lib/eal/include/rte_vfio.h                    |   8 +-
 lib/eal/linux/include/rte_os.h                |   8 +-
 lib/eal/loongarch/include/rte_atomic.h        |   6 +-
 lib/eal/loongarch/include/rte_byteorder.h     |   4 +-
 lib/eal/loongarch/include/rte_cpuflags.h      |   8 +-
 lib/eal/loongarch/include/rte_cycles.h        |   4 +-
 lib/eal/loongarch/include/rte_io.h            |   4 +-
 lib/eal/loongarch/include/rte_memcpy.h        |   4 +-
 lib/eal/loongarch/include/rte_pause.h         |   8 +-
 .../loongarch/include/rte_power_intrinsics.h  |   8 +-
 lib/eal/loongarch/include/rte_prefetch.h      |   8 +-
 lib/eal/loongarch/include/rte_rwlock.h        |   4 +-
 lib/eal/loongarch/include/rte_spinlock.h      |   6 +-
 lib/eal/ppc/include/rte_atomic.h              |   6 +-
 lib/eal/ppc/include/rte_byteorder.h           |   6 +-
 lib/eal/ppc/include/rte_cpuflags.h            |   8 +-
 lib/eal/ppc/include/rte_cycles.h              |   8 +-
 lib/eal/ppc/include/rte_io.h                  |   4 +-
 lib/eal/ppc/include/rte_memcpy.h              |   4 +-
 lib/eal/ppc/include/rte_pause.h               |   8 +-
 lib/eal/ppc/include/rte_power_intrinsics.h    |   8 +-
 lib/eal/ppc/include/rte_prefetch.h            |   8 +-
 lib/eal/ppc/include/rte_rwlock.h              |   4 +-
 lib/eal/ppc/include/rte_spinlock.h            |   8 +-
 lib/eal/riscv/include/rte_atomic.h            |   8 +-
 lib/eal/riscv/include/rte_byteorder.h         |   8 +-
 lib/eal/riscv/include/rte_cpuflags.h          |   8 +-
 lib/eal/riscv/include/rte_cycles.h            |   4 +-
 lib/eal/riscv/include/rte_io.h                |   4 +-
 lib/eal/riscv/include/rte_memcpy.h            |   4 +-
 lib/eal/riscv/include/rte_pause.h             |   8 +-
 lib/eal/riscv/include/rte_power_intrinsics.h  |   8 +-
 lib/eal/riscv/include/rte_prefetch.h          |   8 +-
 lib/eal/riscv/include/rte_rwlock.h            |   4 +-
 lib/eal/riscv/include/rte_spinlock.h          |   6 +-
 lib/eal/windows/include/pthread.h             |   6 +-
 lib/eal/windows/include/regex.h               |   8 +-
 lib/eal/windows/include/rte_windows.h         |   8 +-
 lib/eal/x86/include/rte_atomic.h              |  25 +-
 lib/eal/x86/include/rte_byteorder.h           |  16 +-
 lib/eal/x86/include/rte_cpuflags.h            |   8 +-
 lib/eal/x86/include/rte_cycles.h              |   8 +-
 lib/eal/x86/include/rte_io.h                  |   8 +-
 lib/eal/x86/include/rte_pause.h               |   7 +-
 lib/eal/x86/include/rte_power_intrinsics.h    |   8 +-
 lib/eal/x86/include/rte_prefetch.h            |   8 +-
 lib/eal/x86/include/rte_rwlock.h              |   6 +-
 lib/eal/x86/include/rte_spinlock.h            |   9 +-
 lib/ethdev/ethdev_driver.h                    |   8 +-
 lib/ethdev/ethdev_pci.h                       |   8 +-
 lib/ethdev/ethdev_trace.h                     |   8 +-
 lib/ethdev/ethdev_vdev.h                      |   8 +-
 lib/ethdev/rte_cman.h                         |   4 +-
 lib/ethdev/rte_dev_info.h                     |   4 +-
 lib/ethdev/rte_ethdev.h                       |   8 +-
 lib/ethdev/rte_ethdev_trace_fp.h              |   4 +-
 lib/eventdev/event_timer_adapter_pmd.h        |   4 +-
 lib/eventdev/eventdev_pmd.h                   |   8 +-
 lib/eventdev/eventdev_pmd_pci.h               |   8 +-
 lib/eventdev/eventdev_pmd_vdev.h              |   8 +-
 lib/eventdev/eventdev_trace.h                 |   8 +-
 lib/eventdev/rte_event_crypto_adapter.h       |   8 +-
 lib/eventdev/rte_event_eth_rx_adapter.h       |   8 +-
 lib/eventdev/rte_event_eth_tx_adapter.h       |   8 +-
 lib/eventdev/rte_event_ring.h                 |   8 +-
 lib/eventdev/rte_event_timer_adapter.h        |   8 +-
 lib/eventdev/rte_eventdev.h                   |   8 +-
 lib/eventdev/rte_eventdev_trace_fp.h          |   4 +-
 lib/graph/rte_graph_model_mcore_dispatch.h    |   8 +-
 lib/graph/rte_graph_worker.h                  |   6 +-
 lib/gso/rte_gso.h                             |   6 +-
 lib/hash/rte_fbk_hash.h                       |   8 +-
 lib/hash/rte_hash_crc.h                       |   8 +-
 lib/hash/rte_jhash.h                          |   8 +-
 lib/hash/rte_thash.h                          |   8 +-
 lib/hash/rte_thash_gfni.h                     |   8 +-
 lib/ip_frag/rte_ip_frag.h                     |   8 +-
 lib/ipsec/rte_ipsec.h                         |   8 +-
 lib/log/rte_log.h                             |   8 +-
 lib/lpm/rte_lpm.h                             |   8 +-
 lib/member/rte_member.h                       |   8 +-
 lib/member/rte_member_sketch.h                |   6 +-
 lib/member/rte_member_sketch_avx512.h         |   8 +-
 lib/member/rte_member_x86.h                   |   4 +-
 lib/member/rte_xxh64_avx512.h                 |   6 +-
 lib/mempool/mempool_trace.h                   |   8 +-
 lib/mempool/rte_mempool_trace_fp.h            |   4 +-
 lib/meter/rte_meter.h                         |   8 +-
 lib/mldev/mldev_utils.h                       |   8 +-
 lib/mldev/rte_mldev_core.h                    |   8 +-
 lib/mldev/rte_mldev_pmd.h                     |   8 +-
 lib/net/rte_ether.h                           |   8 +-
 lib/net/rte_net.h                             |   8 +-
 lib/net/rte_sctp.h                            |   8 +-
 lib/node/rte_node_eth_api.h                   |   8 +-
 lib/node/rte_node_ip4_api.h                   |   8 +-
 lib/node/rte_node_ip6_api.h                   |   6 +-
 lib/node/rte_node_udp4_input_api.h            |   8 +-
 lib/pci/rte_pci.h                             |   8 +-
 lib/pdcp/rte_pdcp.h                           |   8 +-
 lib/pipeline/rte_pipeline.h                   |   8 +-
 lib/pipeline/rte_port_in_action.h             |   8 +-
 lib/pipeline/rte_swx_ctl.h                    |   8 +-
 lib/pipeline/rte_swx_extern.h                 |   8 +-
 lib/pipeline/rte_swx_ipsec.h                  |   8 +-
 lib/pipeline/rte_swx_pipeline.h               |   8 +-
 lib/pipeline/rte_swx_pipeline_spec.h          |   8 +-
 lib/pipeline/rte_table_action.h               |   8 +-
 lib/port/rte_port.h                           |   8 +-
 lib/port/rte_port_ethdev.h                    |   8 +-
 lib/port/rte_port_eventdev.h                  |   8 +-
 lib/port/rte_port_fd.h                        |   8 +-
 lib/port/rte_port_frag.h                      |   8 +-
 lib/port/rte_port_ras.h                       |   8 +-
 lib/port/rte_port_ring.h                      |   8 +-
 lib/port/rte_port_sched.h                     |   8 +-
 lib/port/rte_port_source_sink.h               |   8 +-
 lib/port/rte_port_sym_crypto.h                |   8 +-
 lib/port/rte_swx_port.h                       |   8 +-
 lib/port/rte_swx_port_ethdev.h                |   8 +-
 lib/port/rte_swx_port_fd.h                    |   8 +-
 lib/port/rte_swx_port_ring.h                  |   8 +-
 lib/port/rte_swx_port_source_sink.h           |   8 +-
 lib/rawdev/rte_rawdev.h                       |   6 +-
 lib/rawdev/rte_rawdev_pmd.h                   |   8 +-
 lib/rcu/rte_rcu_qsbr.h                        |   8 +-
 lib/regexdev/rte_regexdev.h                   |   8 +-
 lib/ring/rte_ring.h                           |   6 +-
 lib/ring/rte_ring_core.h                      |   8 +-
 lib/ring/rte_ring_elem.h                      |   8 +-
 lib/ring/rte_ring_hts.h                       |   4 +-
 lib/ring/rte_ring_peek.h                      |   4 +-
 lib/ring/rte_ring_peek_zc.h                   |   4 +-
 lib/ring/rte_ring_rts.h                       |   4 +-
 lib/sched/rte_approx.h                        |   8 +-
 lib/sched/rte_pie.h                           |   8 +-
 lib/sched/rte_red.h                           |   8 +-
 lib/sched/rte_sched.h                         |   8 +-
 lib/sched/rte_sched_common.h                  |   6 +-
 lib/security/rte_security.h                   |   8 +-
 lib/security/rte_security_driver.h            |   6 +-
 lib/stack/rte_stack.h                         |   8 +-
 lib/table/rte_lru.h                           |  12 +-
 lib/table/rte_lru_arm64.h                     |   8 +-
 lib/table/rte_lru_x86.h                       |   8 -
 lib/table/rte_swx_hash_func.h                 |   8 +-
 lib/table/rte_swx_keycmp.h                    |   8 +-
 lib/table/rte_swx_table.h                     |   8 +-
 lib/table/rte_swx_table_em.h                  |   8 +-
 lib/table/rte_swx_table_learner.h             |   8 +-
 lib/table/rte_swx_table_selector.h            |   8 +-
 lib/table/rte_swx_table_wm.h                  |   8 +-
 lib/table/rte_table.h                         |   8 +-
 lib/table/rte_table_acl.h                     |   8 +-
 lib/table/rte_table_array.h                   |   8 +-
 lib/table/rte_table_hash.h                    |   8 +-
 lib/table/rte_table_hash_cuckoo.h             |   8 +-
 lib/table/rte_table_hash_func.h               |  12 +-
 lib/table/rte_table_lpm.h                     |   8 +-
 lib/table/rte_table_lpm_ipv6.h                |   8 +-
 lib/table/rte_table_stub.h                    |   8 +-
 lib/telemetry/rte_telemetry.h                 |   8 +-
 lib/vhost/rte_vdpa.h                          |   8 +-
 lib/vhost/rte_vhost.h                         |   8 +-
 lib/vhost/rte_vhost_async.h                   |   8 +-
 lib/vhost/rte_vhost_crypto.h                  |   4 +-
 lib/vhost/vdpa_driver.h                       |   8 +-
 285 files changed, 2266 insertions(+), 1000 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v8 1/6] dpdk: do not force C linkage on include file dependencies
  2024-09-17 10:48           ` [PATCH v8 0/6] Improve EAL bit operations API Mattias Rönnblom
@ 2024-09-17 10:48             ` Mattias Rönnblom
  2024-09-18  9:04               ` [PATCH v9 0/6] Improve EAL bit operations API Mattias Rönnblom
  2024-09-17 10:48             ` [PATCH v8 2/6] eal: extend bit manipulation functionality Mattias Rönnblom
                               ` (4 subsequent siblings)
  5 siblings, 1 reply; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17 10:48 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Assure that 'extern "C" { /../ }' do not cover files included from a
particular header file, and address minor issues resulting from this
change of order.

Dealing with C++ should delegate to the individual include file level,
rather than being imposed by the user of that file. For example,
forcing C linkage prevents __Generic macros being replaced with
overloaded static inline functions in C++ translation units.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>

--

PATCH v8:
 * Fix issues in rte_memcpy_64.h causing build failures on ARM.

PATCH v7:
 * Fix issues in rte_io.h, rte_pause.h and rte_thash_gfni.h causing
   build failures on ARM. (David Marchand)
 * Fix issue in rte_vfio.h, causing build failures unless VFIO_PRESENT.

PATCH v6:
 * Add missing extern "C" in rte_atomic.h, rte_cpuflags.h, rte_io.h,
   rte_vect.h.
 * Fix 32-bit x86 build issues in rte_atomic.h.

PATCH v5:
 * rte_dmadev.h was still including files under extern "C" { /../ }.
   (Chengwen Feng)
 * Fix rte_byteorder.h, broken on 32-bit x86.
---
 app/test/packet_burst_generator.h             |  8 +++---
 app/test/virtual_pmd.h                        |  4 +--
 drivers/bus/auxiliary/bus_auxiliary_driver.h  |  8 +++---
 drivers/bus/cdx/bus_cdx_driver.h              |  8 +++---
 drivers/bus/dpaa/include/fsl_qman.h           |  8 +++---
 drivers/bus/fslmc/bus_fslmc_driver.h          |  8 +++---
 drivers/bus/pci/bus_pci_driver.h              |  8 +++---
 drivers/bus/pci/rte_bus_pci.h                 |  8 +++---
 drivers/bus/platform/bus_platform_driver.h    |  8 +++---
 drivers/bus/vdev/bus_vdev_driver.h            |  8 +++---
 drivers/bus/vmbus/bus_vmbus_driver.h          |  8 +++---
 drivers/bus/vmbus/rte_bus_vmbus.h             |  8 +++---
 drivers/dma/cnxk/cnxk_dma_event_dp.h          |  8 +++---
 drivers/dma/ioat/ioat_hw_defs.h               |  4 +--
 drivers/event/dlb2/rte_pmd_dlb2.h             |  8 +++---
 drivers/mempool/dpaa2/rte_dpaa2_mempool.h     |  6 ++---
 drivers/net/avp/rte_avp_fifo.h                |  8 +++---
 drivers/net/bonding/rte_eth_bond.h            |  4 +--
 drivers/net/i40e/rte_pmd_i40e.h               |  8 +++---
 drivers/net/mlx5/mlx5_trace.h                 |  8 +++---
 drivers/net/ring/rte_eth_ring.h               |  4 +--
 drivers/net/vhost/rte_eth_vhost.h             |  8 +++---
 drivers/raw/ifpga/afu_pmd_core.h              |  8 +++---
 drivers/raw/ifpga/afu_pmd_he_hssi.h           |  6 ++---
 drivers/raw/ifpga/afu_pmd_he_lpbk.h           |  6 ++---
 drivers/raw/ifpga/afu_pmd_he_mem.h            |  6 ++---
 drivers/raw/ifpga/afu_pmd_n3000.h             |  6 ++---
 drivers/raw/ifpga/rte_pmd_afu.h               |  4 +--
 drivers/raw/ifpga/rte_pmd_ifpga.h             |  4 +--
 examples/ethtool/lib/rte_ethtool.h            |  8 +++---
 examples/qos_sched/main.h                     |  4 +--
 examples/vm_power_manager/channel_manager.h   |  8 +++---
 lib/acl/rte_acl_osdep.h                       |  8 +++---
 lib/bbdev/rte_bbdev.h                         |  8 +++---
 lib/bbdev/rte_bbdev_op.h                      |  8 +++---
 lib/bbdev/rte_bbdev_pmd.h                     |  8 +++---
 lib/bpf/bpf_def.h                             |  8 +++---
 lib/compressdev/rte_comp.h                    |  4 +--
 lib/compressdev/rte_compressdev.h             |  6 ++---
 lib/compressdev/rte_compressdev_internal.h    |  8 +++---
 lib/compressdev/rte_compressdev_pmd.h         |  8 +++---
 lib/cryptodev/cryptodev_pmd.h                 |  8 +++---
 lib/cryptodev/cryptodev_trace.h               |  8 +++---
 lib/cryptodev/rte_crypto.h                    |  8 +++---
 lib/cryptodev/rte_crypto_asym.h               |  8 +++---
 lib/cryptodev/rte_crypto_sym.h                |  8 +++---
 lib/cryptodev/rte_cryptodev.h                 |  8 +++---
 lib/cryptodev/rte_cryptodev_trace_fp.h        |  4 +--
 lib/dispatcher/rte_dispatcher.h               |  8 +++---
 lib/dmadev/rte_dmadev.h                       |  8 ++++++
 lib/eal/arm/include/rte_atomic_32.h           |  4 +--
 lib/eal/arm/include/rte_atomic_64.h           |  8 +++---
 lib/eal/arm/include/rte_byteorder.h           |  8 +++---
 lib/eal/arm/include/rte_cpuflags_32.h         |  8 +++---
 lib/eal/arm/include/rte_cpuflags_64.h         |  8 +++---
 lib/eal/arm/include/rte_cycles_32.h           |  4 +--
 lib/eal/arm/include/rte_cycles_64.h           |  4 +--
 lib/eal/arm/include/rte_io.h                  |  8 +++---
 lib/eal/arm/include/rte_io_64.h               |  8 +++---
 lib/eal/arm/include/rte_memcpy_32.h           |  8 +++---
 lib/eal/arm/include/rte_memcpy_64.h           | 12 ++++-----
 lib/eal/arm/include/rte_pause.h               |  8 +++---
 lib/eal/arm/include/rte_pause_32.h            |  6 ++---
 lib/eal/arm/include/rte_pause_64.h            |  8 +++---
 lib/eal/arm/include/rte_power_intrinsics.h    |  8 +++---
 lib/eal/arm/include/rte_prefetch_32.h         |  8 +++---
 lib/eal/arm/include/rte_prefetch_64.h         |  8 +++---
 lib/eal/arm/include/rte_rwlock.h              |  4 +--
 lib/eal/arm/include/rte_spinlock.h            |  6 ++---
 lib/eal/freebsd/include/rte_os.h              |  8 +++---
 lib/eal/include/bus_driver.h                  |  8 +++---
 lib/eal/include/dev_driver.h                  |  6 ++---
 lib/eal/include/eal_trace_internal.h          |  8 +++---
 lib/eal/include/generic/rte_atomic.h          |  8 ++++++
 lib/eal/include/generic/rte_byteorder.h       |  8 ++++++
 lib/eal/include/generic/rte_cpuflags.h        |  8 ++++++
 lib/eal/include/generic/rte_cycles.h          |  8 ++++++
 lib/eal/include/generic/rte_io.h              |  8 ++++++
 lib/eal/include/generic/rte_memcpy.h          |  8 ++++++
 lib/eal/include/generic/rte_pause.h           |  8 ++++++
 .../include/generic/rte_power_intrinsics.h    |  8 ++++++
 lib/eal/include/generic/rte_prefetch.h        |  8 ++++++
 lib/eal/include/generic/rte_rwlock.h          |  8 +++---
 lib/eal/include/generic/rte_spinlock.h        |  8 ++++++
 lib/eal/include/generic/rte_vect.h            |  8 ++++++
 lib/eal/include/rte_alarm.h                   |  4 +--
 lib/eal/include/rte_bitmap.h                  |  8 +++---
 lib/eal/include/rte_bus.h                     |  8 +++---
 lib/eal/include/rte_class.h                   |  4 +--
 lib/eal/include/rte_common.h                  |  8 +++---
 lib/eal/include/rte_dev.h                     |  8 +++---
 lib/eal/include/rte_devargs.h                 |  8 +++---
 lib/eal/include/rte_eal_trace.h               |  4 +--
 lib/eal/include/rte_errno.h                   |  4 +--
 lib/eal/include/rte_fbarray.h                 |  8 +++---
 lib/eal/include/rte_keepalive.h               |  6 ++---
 lib/eal/include/rte_mcslock.h                 |  8 +++---
 lib/eal/include/rte_memory.h                  |  8 +++---
 lib/eal/include/rte_pci_dev_features.h        |  4 +--
 lib/eal/include/rte_pflock.h                  |  8 +++---
 lib/eal/include/rte_random.h                  |  4 +--
 lib/eal/include/rte_seqcount.h                |  8 +++---
 lib/eal/include/rte_seqlock.h                 |  8 +++---
 lib/eal/include/rte_service.h                 |  8 +++---
 lib/eal/include/rte_service_component.h       |  4 +--
 lib/eal/include/rte_stdatomic.h               |  5 +---
 lib/eal/include/rte_string_fns.h              | 17 +++++++++----
 lib/eal/include/rte_tailq.h                   |  6 ++---
 lib/eal/include/rte_ticketlock.h              |  8 +++---
 lib/eal/include/rte_time.h                    |  6 ++---
 lib/eal/include/rte_trace.h                   |  8 +++---
 lib/eal/include/rte_trace_point.h             |  8 +++---
 lib/eal/include/rte_trace_point_register.h    |  8 +++---
 lib/eal/include/rte_uuid.h                    |  8 +++---
 lib/eal/include/rte_version.h                 |  6 ++---
 lib/eal/include/rte_vfio.h                    |  8 +++---
 lib/eal/linux/include/rte_os.h                |  8 +++---
 lib/eal/loongarch/include/rte_atomic.h        |  6 ++---
 lib/eal/loongarch/include/rte_byteorder.h     |  4 +--
 lib/eal/loongarch/include/rte_cpuflags.h      |  8 +++---
 lib/eal/loongarch/include/rte_cycles.h        |  4 +--
 lib/eal/loongarch/include/rte_io.h            |  4 +--
 lib/eal/loongarch/include/rte_memcpy.h        |  4 +--
 lib/eal/loongarch/include/rte_pause.h         |  8 +++---
 .../loongarch/include/rte_power_intrinsics.h  |  8 +++---
 lib/eal/loongarch/include/rte_prefetch.h      |  8 +++---
 lib/eal/loongarch/include/rte_rwlock.h        |  4 +--
 lib/eal/loongarch/include/rte_spinlock.h      |  6 ++---
 lib/eal/ppc/include/rte_atomic.h              |  6 ++---
 lib/eal/ppc/include/rte_byteorder.h           |  6 ++---
 lib/eal/ppc/include/rte_cpuflags.h            |  8 +++---
 lib/eal/ppc/include/rte_cycles.h              |  8 +++---
 lib/eal/ppc/include/rte_io.h                  |  4 +--
 lib/eal/ppc/include/rte_memcpy.h              |  4 +--
 lib/eal/ppc/include/rte_pause.h               |  8 +++---
 lib/eal/ppc/include/rte_power_intrinsics.h    |  8 +++---
 lib/eal/ppc/include/rte_prefetch.h            |  8 +++---
 lib/eal/ppc/include/rte_rwlock.h              |  4 +--
 lib/eal/ppc/include/rte_spinlock.h            |  8 +++---
 lib/eal/riscv/include/rte_atomic.h            |  8 +++---
 lib/eal/riscv/include/rte_byteorder.h         |  8 +++---
 lib/eal/riscv/include/rte_cpuflags.h          |  8 +++---
 lib/eal/riscv/include/rte_cycles.h            |  4 +--
 lib/eal/riscv/include/rte_io.h                |  4 +--
 lib/eal/riscv/include/rte_memcpy.h            |  4 +--
 lib/eal/riscv/include/rte_pause.h             |  8 +++---
 lib/eal/riscv/include/rte_power_intrinsics.h  |  8 +++---
 lib/eal/riscv/include/rte_prefetch.h          |  8 +++---
 lib/eal/riscv/include/rte_rwlock.h            |  4 +--
 lib/eal/riscv/include/rte_spinlock.h          |  6 ++---
 lib/eal/windows/include/pthread.h             |  6 ++---
 lib/eal/windows/include/regex.h               |  8 +++---
 lib/eal/windows/include/rte_windows.h         |  8 +++---
 lib/eal/x86/include/rte_atomic.h              | 25 +++++++++++++------
 lib/eal/x86/include/rte_byteorder.h           | 16 ++++++------
 lib/eal/x86/include/rte_cpuflags.h            |  8 +++---
 lib/eal/x86/include/rte_cycles.h              |  8 +++---
 lib/eal/x86/include/rte_io.h                  |  8 +++---
 lib/eal/x86/include/rte_pause.h               |  7 +++---
 lib/eal/x86/include/rte_power_intrinsics.h    |  8 +++---
 lib/eal/x86/include/rte_prefetch.h            |  8 +++---
 lib/eal/x86/include/rte_rwlock.h              |  6 ++---
 lib/eal/x86/include/rte_spinlock.h            |  9 +++----
 lib/ethdev/ethdev_driver.h                    |  8 +++---
 lib/ethdev/ethdev_pci.h                       |  8 +++---
 lib/ethdev/ethdev_trace.h                     |  8 +++---
 lib/ethdev/ethdev_vdev.h                      |  8 +++---
 lib/ethdev/rte_cman.h                         |  4 +--
 lib/ethdev/rte_dev_info.h                     |  4 +--
 lib/ethdev/rte_ethdev.h                       |  8 +++---
 lib/ethdev/rte_ethdev_trace_fp.h              |  4 +--
 lib/eventdev/event_timer_adapter_pmd.h        |  4 +--
 lib/eventdev/eventdev_pmd.h                   |  8 +++---
 lib/eventdev/eventdev_pmd_pci.h               |  8 +++---
 lib/eventdev/eventdev_pmd_vdev.h              |  8 +++---
 lib/eventdev/eventdev_trace.h                 |  8 +++---
 lib/eventdev/rte_event_crypto_adapter.h       |  8 +++---
 lib/eventdev/rte_event_eth_rx_adapter.h       |  8 +++---
 lib/eventdev/rte_event_eth_tx_adapter.h       |  8 +++---
 lib/eventdev/rte_event_ring.h                 |  8 +++---
 lib/eventdev/rte_event_timer_adapter.h        |  8 +++---
 lib/eventdev/rte_eventdev.h                   |  8 +++---
 lib/eventdev/rte_eventdev_trace_fp.h          |  4 +--
 lib/graph/rte_graph_model_mcore_dispatch.h    |  8 +++---
 lib/graph/rte_graph_worker.h                  |  6 ++---
 lib/gso/rte_gso.h                             |  6 ++---
 lib/hash/rte_fbk_hash.h                       |  8 +++---
 lib/hash/rte_hash_crc.h                       |  8 +++---
 lib/hash/rte_jhash.h                          |  8 +++---
 lib/hash/rte_thash.h                          |  8 +++---
 lib/hash/rte_thash_gfni.h                     |  8 +++---
 lib/ip_frag/rte_ip_frag.h                     |  8 +++---
 lib/ipsec/rte_ipsec.h                         |  8 +++---
 lib/log/rte_log.h                             |  8 +++---
 lib/lpm/rte_lpm.h                             |  8 +++---
 lib/member/rte_member.h                       |  8 +++---
 lib/member/rte_member_sketch.h                |  6 ++---
 lib/member/rte_member_sketch_avx512.h         |  8 +++---
 lib/member/rte_member_x86.h                   |  4 +--
 lib/member/rte_xxh64_avx512.h                 |  6 ++---
 lib/mempool/mempool_trace.h                   |  8 +++---
 lib/mempool/rte_mempool_trace_fp.h            |  4 +--
 lib/meter/rte_meter.h                         |  8 +++---
 lib/mldev/mldev_utils.h                       |  8 +++---
 lib/mldev/rte_mldev_core.h                    |  8 +++---
 lib/mldev/rte_mldev_pmd.h                     |  8 +++---
 lib/net/rte_ether.h                           |  8 +++---
 lib/net/rte_net.h                             |  8 +++---
 lib/net/rte_sctp.h                            |  8 +++---
 lib/node/rte_node_eth_api.h                   |  8 +++---
 lib/node/rte_node_ip4_api.h                   |  8 +++---
 lib/node/rte_node_ip6_api.h                   |  6 ++---
 lib/node/rte_node_udp4_input_api.h            |  8 +++---
 lib/pci/rte_pci.h                             |  8 +++---
 lib/pdcp/rte_pdcp.h                           |  8 +++---
 lib/pipeline/rte_pipeline.h                   |  8 +++---
 lib/pipeline/rte_port_in_action.h             |  8 +++---
 lib/pipeline/rte_swx_ctl.h                    |  8 +++---
 lib/pipeline/rte_swx_extern.h                 |  8 +++---
 lib/pipeline/rte_swx_ipsec.h                  |  8 +++---
 lib/pipeline/rte_swx_pipeline.h               |  8 +++---
 lib/pipeline/rte_swx_pipeline_spec.h          |  8 +++---
 lib/pipeline/rte_table_action.h               |  8 +++---
 lib/port/rte_port.h                           |  8 +++---
 lib/port/rte_port_ethdev.h                    |  8 +++---
 lib/port/rte_port_eventdev.h                  |  8 +++---
 lib/port/rte_port_fd.h                        |  8 +++---
 lib/port/rte_port_frag.h                      |  8 +++---
 lib/port/rte_port_ras.h                       |  8 +++---
 lib/port/rte_port_ring.h                      |  8 +++---
 lib/port/rte_port_sched.h                     |  8 +++---
 lib/port/rte_port_source_sink.h               |  8 +++---
 lib/port/rte_port_sym_crypto.h                |  8 +++---
 lib/port/rte_swx_port.h                       |  8 +++---
 lib/port/rte_swx_port_ethdev.h                |  8 +++---
 lib/port/rte_swx_port_fd.h                    |  8 +++---
 lib/port/rte_swx_port_ring.h                  |  8 +++---
 lib/port/rte_swx_port_source_sink.h           |  8 +++---
 lib/rawdev/rte_rawdev.h                       |  6 ++---
 lib/rawdev/rte_rawdev_pmd.h                   |  8 +++---
 lib/rcu/rte_rcu_qsbr.h                        |  8 +++---
 lib/regexdev/rte_regexdev.h                   |  8 +++---
 lib/ring/rte_ring.h                           |  6 ++---
 lib/ring/rte_ring_core.h                      |  8 +++---
 lib/ring/rte_ring_elem.h                      |  8 +++---
 lib/ring/rte_ring_hts.h                       |  4 +--
 lib/ring/rte_ring_peek.h                      |  4 +--
 lib/ring/rte_ring_peek_zc.h                   |  4 +--
 lib/ring/rte_ring_rts.h                       |  4 +--
 lib/sched/rte_approx.h                        |  8 +++---
 lib/sched/rte_pie.h                           |  8 +++---
 lib/sched/rte_red.h                           |  8 +++---
 lib/sched/rte_sched.h                         |  8 +++---
 lib/sched/rte_sched_common.h                  |  6 ++---
 lib/security/rte_security.h                   |  8 +++---
 lib/security/rte_security_driver.h            |  6 ++---
 lib/stack/rte_stack.h                         |  8 +++---
 lib/table/rte_lru.h                           | 12 +++------
 lib/table/rte_lru_arm64.h                     |  8 +++---
 lib/table/rte_lru_x86.h                       |  8 ------
 lib/table/rte_swx_hash_func.h                 |  8 +++---
 lib/table/rte_swx_keycmp.h                    |  8 +++---
 lib/table/rte_swx_table.h                     |  8 +++---
 lib/table/rte_swx_table_em.h                  |  8 +++---
 lib/table/rte_swx_table_learner.h             |  8 +++---
 lib/table/rte_swx_table_selector.h            |  8 +++---
 lib/table/rte_swx_table_wm.h                  |  8 +++---
 lib/table/rte_table.h                         |  8 +++---
 lib/table/rte_table_acl.h                     |  8 +++---
 lib/table/rte_table_array.h                   |  8 +++---
 lib/table/rte_table_hash.h                    |  8 +++---
 lib/table/rte_table_hash_cuckoo.h             |  8 +++---
 lib/table/rte_table_hash_func.h               | 12 ++++++---
 lib/table/rte_table_lpm.h                     |  8 +++---
 lib/table/rte_table_lpm_ipv6.h                |  8 +++---
 lib/table/rte_table_stub.h                    |  8 +++---
 lib/telemetry/rte_telemetry.h                 |  8 +++---
 lib/vhost/rte_vdpa.h                          |  8 +++---
 lib/vhost/rte_vhost.h                         |  8 +++---
 lib/vhost/rte_vhost_async.h                   |  8 +++---
 lib/vhost/rte_vhost_crypto.h                  |  4 +--
 lib/vhost/vdpa_driver.h                       |  8 +++---
 282 files changed, 1083 insertions(+), 982 deletions(-)

diff --git a/app/test/packet_burst_generator.h b/app/test/packet_burst_generator.h
index b99286f50e..cce41bcd0f 100644
--- a/app/test/packet_burst_generator.h
+++ b/app/test/packet_burst_generator.h
@@ -5,10 +5,6 @@
 #ifndef PACKET_BURST_GENERATOR_H_
 #define PACKET_BURST_GENERATOR_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_mbuf.h>
 #include <rte_ether.h>
 #include <rte_arp.h>
@@ -17,6 +13,10 @@ extern "C" {
 #include <rte_tcp.h>
 #include <rte_sctp.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define IPV4_ADDR(a, b, c, d)(((a & 0xff) << 24) | ((b & 0xff) << 16) | \
 		((c & 0xff) << 8) | (d & 0xff))
 
diff --git a/app/test/virtual_pmd.h b/app/test/virtual_pmd.h
index 120b58b273..a5a71d7cb4 100644
--- a/app/test/virtual_pmd.h
+++ b/app/test/virtual_pmd.h
@@ -5,12 +5,12 @@
 #ifndef __VIRTUAL_ETHDEV_H_
 #define __VIRTUAL_ETHDEV_H_
 
+#include <rte_ether.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ether.h>
-
 int
 virtual_ethdev_init(void);
 
diff --git a/drivers/bus/auxiliary/bus_auxiliary_driver.h b/drivers/bus/auxiliary/bus_auxiliary_driver.h
index 58fb7c7f69..40ab1f0912 100644
--- a/drivers/bus/auxiliary/bus_auxiliary_driver.h
+++ b/drivers/bus/auxiliary/bus_auxiliary_driver.h
@@ -11,10 +11,6 @@
  * Auxiliary Bus Interface.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -28,6 +24,10 @@ extern "C" {
 #include <dev_driver.h>
 #include <rte_kvargs.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_BUS_AUXILIARY_NAME "auxiliary"
 
 /* Forward declarations */
diff --git a/drivers/bus/cdx/bus_cdx_driver.h b/drivers/bus/cdx/bus_cdx_driver.h
index 211f8e406b..d390e7b5a1 100644
--- a/drivers/bus/cdx/bus_cdx_driver.h
+++ b/drivers/bus/cdx/bus_cdx_driver.h
@@ -10,10 +10,6 @@
  * AMD CDX bus interface
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdlib.h>
 #include <inttypes.h>
 #include <linux/types.h>
@@ -22,6 +18,10 @@ extern "C" {
 #include <dev_driver.h>
 #include <rte_interrupts.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_cdx_device;
 struct rte_cdx_driver;
diff --git a/drivers/bus/dpaa/include/fsl_qman.h b/drivers/bus/dpaa/include/fsl_qman.h
index c0677976e8..f39007b84d 100644
--- a/drivers/bus/dpaa/include/fsl_qman.h
+++ b/drivers/bus/dpaa/include/fsl_qman.h
@@ -8,14 +8,14 @@
 #ifndef __FSL_QMAN_H
 #define __FSL_QMAN_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <dpaa_rbtree.h>
 #include <rte_compat.h>
 #include <rte_eventdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* FQ lookups (turn this on for 64bit user-space) */
 #ifdef RTE_ARCH_64
 #define CONFIG_FSL_QMAN_FQ_LOOKUP
diff --git a/drivers/bus/fslmc/bus_fslmc_driver.h b/drivers/bus/fslmc/bus_fslmc_driver.h
index 7ac5fe6ff1..3095458133 100644
--- a/drivers/bus/fslmc/bus_fslmc_driver.h
+++ b/drivers/bus/fslmc/bus_fslmc_driver.h
@@ -13,10 +13,6 @@
  * RTE FSLMC Bus Interface
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -40,6 +36,10 @@ extern "C" {
 #include "portal/dpaa2_hw_pvt.h"
 #include "portal/dpaa2_hw_dpio.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define FSLMC_OBJECT_MAX_LEN 32   /**< Length of each device on bus */
 
 #define DPAA2_INVALID_MBUF_SEQN        0
diff --git a/drivers/bus/pci/bus_pci_driver.h b/drivers/bus/pci/bus_pci_driver.h
index be32263a82..2cc1119072 100644
--- a/drivers/bus/pci/bus_pci_driver.h
+++ b/drivers/bus/pci/bus_pci_driver.h
@@ -6,14 +6,14 @@
 #ifndef BUS_PCI_DRIVER_H
 #define BUS_PCI_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus_pci.h>
 #include <dev_driver.h>
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Pathname of PCI devices directory. */
 __rte_internal
 const char *rte_pci_get_sysfs_path(void);
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index a3798cb1cb..19a7b15b99 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -11,10 +11,6 @@
  * PCI device & driver interface
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -27,6 +23,10 @@ extern "C" {
 #include <rte_interrupts.h>
 #include <rte_pci.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_pci_device;
 struct rte_pci_driver;
diff --git a/drivers/bus/platform/bus_platform_driver.h b/drivers/bus/platform/bus_platform_driver.h
index 5ac54fb739..a6f246f7c4 100644
--- a/drivers/bus/platform/bus_platform_driver.h
+++ b/drivers/bus/platform/bus_platform_driver.h
@@ -10,10 +10,6 @@
  * Platform bus interface.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stddef.h>
 #include <stdint.h>
 
@@ -23,6 +19,10 @@ extern "C" {
 #include <rte_os.h>
 #include <rte_vfio.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_platform_bus;
 struct rte_platform_device;
diff --git a/drivers/bus/vdev/bus_vdev_driver.h b/drivers/bus/vdev/bus_vdev_driver.h
index bc7e30d7c6..cba1fb5269 100644
--- a/drivers/bus/vdev/bus_vdev_driver.h
+++ b/drivers/bus/vdev/bus_vdev_driver.h
@@ -5,15 +5,15 @@
 #ifndef BUS_VDEV_DRIVER_H
 #define BUS_VDEV_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus_vdev.h>
 #include <rte_compat.h>
 #include <dev_driver.h>
 #include <rte_devargs.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_vdev_device {
 	RTE_TAILQ_ENTRY(rte_vdev_device) next;      /**< Next attached vdev */
 	struct rte_device device;               /**< Inherit core device */
diff --git a/drivers/bus/vmbus/bus_vmbus_driver.h b/drivers/bus/vmbus/bus_vmbus_driver.h
index e2475a642d..bc394208de 100644
--- a/drivers/bus/vmbus/bus_vmbus_driver.h
+++ b/drivers/bus/vmbus/bus_vmbus_driver.h
@@ -6,14 +6,14 @@
 #ifndef BUS_VMBUS_DRIVER_H
 #define BUS_VMBUS_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus_vmbus.h>
 #include <rte_compat.h>
 #include <dev_driver.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct vmbus_channel;
 struct vmbus_mon_page;
 
diff --git a/drivers/bus/vmbus/rte_bus_vmbus.h b/drivers/bus/vmbus/rte_bus_vmbus.h
index 9467bd8f3d..fd18bca73c 100644
--- a/drivers/bus/vmbus/rte_bus_vmbus.h
+++ b/drivers/bus/vmbus/rte_bus_vmbus.h
@@ -11,10 +11,6 @@
  *
  * VMBUS Interface
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -28,6 +24,10 @@ extern "C" {
 #include <rte_interrupts.h>
 #include <rte_vmbus_reg.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_vmbus_device;
 struct rte_vmbus_driver;
diff --git a/drivers/dma/cnxk/cnxk_dma_event_dp.h b/drivers/dma/cnxk/cnxk_dma_event_dp.h
index 06b5ca8279..8c6cf5dd9a 100644
--- a/drivers/dma/cnxk/cnxk_dma_event_dp.h
+++ b/drivers/dma/cnxk/cnxk_dma_event_dp.h
@@ -5,16 +5,16 @@
 #ifndef _CNXK_DMA_EVENT_DP_H_
 #define _CNXK_DMA_EVENT_DP_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_common.h>
 #include <rte_compat.h>
 #include <rte_eventdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 __rte_internal
 uint16_t cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events);
 
diff --git a/drivers/dma/ioat/ioat_hw_defs.h b/drivers/dma/ioat/ioat_hw_defs.h
index dc3493a78f..11893951f2 100644
--- a/drivers/dma/ioat/ioat_hw_defs.h
+++ b/drivers/dma/ioat/ioat_hw_defs.h
@@ -5,12 +5,12 @@
 #ifndef IOAT_HW_DEFS_H
 #define IOAT_HW_DEFS_H
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 #define IOAT_PCI_CHANERR_INT_OFFSET	0x180
 
 #define IOAT_VER_3_0	0x30
diff --git a/drivers/event/dlb2/rte_pmd_dlb2.h b/drivers/event/dlb2/rte_pmd_dlb2.h
index 334c6c356d..dba7fd2f43 100644
--- a/drivers/event/dlb2/rte_pmd_dlb2.h
+++ b/drivers/event/dlb2/rte_pmd_dlb2.h
@@ -11,14 +11,14 @@
 #ifndef _RTE_PMD_DLB2_H_
 #define _RTE_PMD_DLB2_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
diff --git a/drivers/mempool/dpaa2/rte_dpaa2_mempool.h b/drivers/mempool/dpaa2/rte_dpaa2_mempool.h
index 7fe3d93f61..0286090b1b 100644
--- a/drivers/mempool/dpaa2/rte_dpaa2_mempool.h
+++ b/drivers/mempool/dpaa2/rte_dpaa2_mempool.h
@@ -12,13 +12,13 @@
  *
  */
 
+#include <rte_compat.h>
+#include <rte_mempool.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_compat.h>
-#include <rte_mempool.h>
-
 /**
  * Get BPID corresponding to the packet pool
  *
diff --git a/drivers/net/avp/rte_avp_fifo.h b/drivers/net/avp/rte_avp_fifo.h
index c1658da685..879de3b1c0 100644
--- a/drivers/net/avp/rte_avp_fifo.h
+++ b/drivers/net/avp/rte_avp_fifo.h
@@ -8,10 +8,6 @@
 
 #include "rte_avp_common.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef __KERNEL__
 /* Write memory barrier for kernel compiles */
 #define AVP_WMB() smp_wmb()
@@ -27,6 +23,10 @@ extern "C" {
 #ifndef __KERNEL__
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Initializes the avp fifo structure
  */
diff --git a/drivers/net/bonding/rte_eth_bond.h b/drivers/net/bonding/rte_eth_bond.h
index f10165f2c6..e59ff8793e 100644
--- a/drivers/net/bonding/rte_eth_bond.h
+++ b/drivers/net/bonding/rte_eth_bond.h
@@ -17,12 +17,12 @@
  * load balancing of network ports
  */
 
+#include <rte_ether.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ether.h>
-
 /* Supported modes of operation of link bonding library  */
 
 #define BONDING_MODE_ROUND_ROBIN		(0)
diff --git a/drivers/net/i40e/rte_pmd_i40e.h b/drivers/net/i40e/rte_pmd_i40e.h
index a802f989e9..5af7e2330f 100644
--- a/drivers/net/i40e/rte_pmd_i40e.h
+++ b/drivers/net/i40e/rte_pmd_i40e.h
@@ -14,14 +14,14 @@
  *
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_ethdev.h>
 #include <rte_ether.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Response sent back to i40e driver from user app after callback
  */
diff --git a/drivers/net/mlx5/mlx5_trace.h b/drivers/net/mlx5/mlx5_trace.h
index 888d96f60b..a8f0b372c8 100644
--- a/drivers/net/mlx5/mlx5_trace.h
+++ b/drivers/net/mlx5/mlx5_trace.h
@@ -11,14 +11,14 @@
  * API for mlx5 PMD trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <mlx5_prm.h>
 #include <rte_mbuf.h>
 #include <rte_trace_point.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* TX burst subroutines trace points. */
 RTE_TRACE_POINT_FP(
 	rte_pmd_mlx5_trace_tx_entry,
diff --git a/drivers/net/ring/rte_eth_ring.h b/drivers/net/ring/rte_eth_ring.h
index 59e074d0ad..98292c7b33 100644
--- a/drivers/net/ring/rte_eth_ring.h
+++ b/drivers/net/ring/rte_eth_ring.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_ETH_RING_H_
 #define _RTE_ETH_RING_H_
 
+#include <rte_ring.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring.h>
-
 /**
  * Create a new ethdev port from a set of rings
  *
diff --git a/drivers/net/vhost/rte_eth_vhost.h b/drivers/net/vhost/rte_eth_vhost.h
index 0e68b9f668..6ec59a7adc 100644
--- a/drivers/net/vhost/rte_eth_vhost.h
+++ b/drivers/net/vhost/rte_eth_vhost.h
@@ -5,15 +5,15 @@
 #ifndef _RTE_ETH_VHOST_H_
 #define _RTE_ETH_VHOST_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdbool.h>
 
 #include <rte_vhost.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Event description.
  */
diff --git a/drivers/raw/ifpga/afu_pmd_core.h b/drivers/raw/ifpga/afu_pmd_core.h
index a8f1afe343..abf9e491f7 100644
--- a/drivers/raw/ifpga/afu_pmd_core.h
+++ b/drivers/raw/ifpga/afu_pmd_core.h
@@ -5,10 +5,6 @@
 #ifndef AFU_PMD_CORE_H
 #define AFU_PMD_CORE_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -20,6 +16,10 @@ extern "C" {
 
 #include "ifpga_rawdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define AFU_RAWDEV_MAX_DRVS  32
 
 struct afu_rawdev;
diff --git a/drivers/raw/ifpga/afu_pmd_he_hssi.h b/drivers/raw/ifpga/afu_pmd_he_hssi.h
index aebbe32d54..282289d912 100644
--- a/drivers/raw/ifpga/afu_pmd_he_hssi.h
+++ b/drivers/raw/ifpga/afu_pmd_he_hssi.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_HE_HSSI_H
 #define AFU_PMD_HE_HSSI_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define HE_HSSI_UUID_L    0xbb370242ac130002
 #define HE_HSSI_UUID_H    0x823c334c98bf11ea
 #define NUM_HE_HSSI_PORTS 8
diff --git a/drivers/raw/ifpga/afu_pmd_he_lpbk.h b/drivers/raw/ifpga/afu_pmd_he_lpbk.h
index eab7b55199..67b3653c21 100644
--- a/drivers/raw/ifpga/afu_pmd_he_lpbk.h
+++ b/drivers/raw/ifpga/afu_pmd_he_lpbk.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_HE_LPBK_H
 #define AFU_PMD_HE_LPBK_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define HE_LPBK_UUID_L     0xb94b12284c31e02b
 #define HE_LPBK_UUID_H     0x56e203e9864f49a7
 #define HE_MEM_LPBK_UUID_L 0xbb652a578330a8eb
diff --git a/drivers/raw/ifpga/afu_pmd_he_mem.h b/drivers/raw/ifpga/afu_pmd_he_mem.h
index 998ca92416..41854d8c58 100644
--- a/drivers/raw/ifpga/afu_pmd_he_mem.h
+++ b/drivers/raw/ifpga/afu_pmd_he_mem.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_HE_MEM_H
 #define AFU_PMD_HE_MEM_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define HE_MEM_TG_UUID_L  0xa3dc5b831f5cecbb
 #define HE_MEM_TG_UUID_H  0x4dadea342c7848cb
 
diff --git a/drivers/raw/ifpga/afu_pmd_n3000.h b/drivers/raw/ifpga/afu_pmd_n3000.h
index 403cc64b91..f6b6e07c6b 100644
--- a/drivers/raw/ifpga/afu_pmd_n3000.h
+++ b/drivers/raw/ifpga/afu_pmd_n3000.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_N3000_H
 #define AFU_PMD_N3000_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define N3000_AFU_UUID_L  0xc000c9660d824272
 #define N3000_AFU_UUID_H  0x9aeffe5f84570612
 #define N3000_NLB0_UUID_L 0xf89e433683f9040b
diff --git a/drivers/raw/ifpga/rte_pmd_afu.h b/drivers/raw/ifpga/rte_pmd_afu.h
index 5403ed25f5..0edacc3a9c 100644
--- a/drivers/raw/ifpga/rte_pmd_afu.h
+++ b/drivers/raw/ifpga/rte_pmd_afu.h
@@ -14,12 +14,12 @@
  *
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 #define RTE_PMD_AFU_N3000_NLB   1
 #define RTE_PMD_AFU_N3000_DMA   2
 
diff --git a/drivers/raw/ifpga/rte_pmd_ifpga.h b/drivers/raw/ifpga/rte_pmd_ifpga.h
index 791543f2cd..36b7f9c018 100644
--- a/drivers/raw/ifpga/rte_pmd_ifpga.h
+++ b/drivers/raw/ifpga/rte_pmd_ifpga.h
@@ -14,12 +14,12 @@
  *
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 #define IFPGA_MAX_PORT_NUM   4
 
 /**
diff --git a/examples/ethtool/lib/rte_ethtool.h b/examples/ethtool/lib/rte_ethtool.h
index d27e0102b1..c7dd3d9755 100644
--- a/examples/ethtool/lib/rte_ethtool.h
+++ b/examples/ethtool/lib/rte_ethtool.h
@@ -30,14 +30,14 @@
  * rte_ethtool_net_set_rx_mode      net_device_ops::ndo_set_rx_mode
  *
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_ethdev.h>
 #include <linux/ethtool.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Retrieve the Ethernet device driver information according to
  * attributes described by ethtool data structure, ethtool_drvinfo.
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 04e77a4a10..ea66df0434 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -5,12 +5,12 @@
 #ifndef _MAIN_H_
 #define _MAIN_H_
 
+#include <rte_sched.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_sched.h>
-
 #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
 
 /*
diff --git a/examples/vm_power_manager/channel_manager.h b/examples/vm_power_manager/channel_manager.h
index eb989b20ad..6f70539815 100644
--- a/examples/vm_power_manager/channel_manager.h
+++ b/examples/vm_power_manager/channel_manager.h
@@ -5,16 +5,16 @@
 #ifndef CHANNEL_MANAGER_H_
 #define CHANNEL_MANAGER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <linux/limits.h>
 #include <linux/un.h>
 #include <stdbool.h>
 
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Maximum name length including '\0' terminator */
 #define CHANNEL_MGR_MAX_NAME_LEN    64
 
diff --git a/lib/acl/rte_acl_osdep.h b/lib/acl/rte_acl_osdep.h
index 3c1dc402ca..e4c7d07c69 100644
--- a/lib/acl/rte_acl_osdep.h
+++ b/lib/acl/rte_acl_osdep.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_ACL_OSDEP_H_
 #define _RTE_ACL_OSDEP_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -49,6 +45,10 @@ extern "C" {
 #include <rte_cpuflags.h>
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h
index 0cbfdd1c95..9e83dd2bb0 100644
--- a/lib/bbdev/rte_bbdev.h
+++ b/lib/bbdev/rte_bbdev.h
@@ -20,10 +20,6 @@
  * from the same queue.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdbool.h>
 
@@ -32,6 +28,10 @@ extern "C" {
 
 #include "rte_bbdev_op.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_BBDEV_MAX_DEVS
 #define RTE_BBDEV_MAX_DEVS 128  /**< Max number of devices */
 #endif
diff --git a/lib/bbdev/rte_bbdev_op.h b/lib/bbdev/rte_bbdev_op.h
index 459631d0d0..6f4bae7d0f 100644
--- a/lib/bbdev/rte_bbdev_op.h
+++ b/lib/bbdev/rte_bbdev_op.h
@@ -11,10 +11,6 @@
  * Defines wireless base band layer 1 operations and capabilities
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -23,6 +19,10 @@ extern "C" {
 #include <rte_memory.h>
 #include <rte_mempool.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Number of columns in sub-block interleaver (36.212, section 5.1.4.1.1) */
 #define RTE_BBDEV_TURBO_C_SUBBLOCK (32)
 /* Maximum size of Transport Block (36.213, Table, Table 7.1.7.2.5-1) */
diff --git a/lib/bbdev/rte_bbdev_pmd.h b/lib/bbdev/rte_bbdev_pmd.h
index 442b23943d..0a1738fc05 100644
--- a/lib/bbdev/rte_bbdev_pmd.h
+++ b/lib/bbdev/rte_bbdev_pmd.h
@@ -14,15 +14,15 @@
  * bbdev interface. User applications should not use this API.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_log.h>
 
 #include "rte_bbdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Suggested value for SW based devices */
 #define RTE_BBDEV_DEFAULT_MAX_NB_QUEUES RTE_MAX_LCORE
 
diff --git a/lib/bpf/bpf_def.h b/lib/bpf/bpf_def.h
index f08cd9106b..9f2e162914 100644
--- a/lib/bpf/bpf_def.h
+++ b/lib/bpf/bpf_def.h
@@ -7,10 +7,6 @@
 #ifndef _RTE_BPF_DEF_H_
 #define _RTE_BPF_DEF_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -25,6 +21,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 
 /*
  * The instruction encodings.
diff --git a/lib/compressdev/rte_comp.h b/lib/compressdev/rte_comp.h
index 830a240b6b..d66a4b1cb9 100644
--- a/lib/compressdev/rte_comp.h
+++ b/lib/compressdev/rte_comp.h
@@ -11,12 +11,12 @@
  * RTE definitions for Data Compression Service
  */
 
+#include <rte_mbuf.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_mbuf.h>
-
 /**
  * compression service feature flags
  *
diff --git a/lib/compressdev/rte_compressdev.h b/lib/compressdev/rte_compressdev.h
index e0294a18bd..b3392553a6 100644
--- a/lib/compressdev/rte_compressdev.h
+++ b/lib/compressdev/rte_compressdev.h
@@ -13,13 +13,13 @@
  * Defines comp device APIs for the provisioning of compression operations.
  */
 
+
+#include "rte_comp.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-
-#include "rte_comp.h"
-
 /**
  * Parameter log base 2 range description.
  * Final value will be 2^value.
diff --git a/lib/compressdev/rte_compressdev_internal.h b/lib/compressdev/rte_compressdev_internal.h
index 67f8b51a37..a980d74cbf 100644
--- a/lib/compressdev/rte_compressdev_internal.h
+++ b/lib/compressdev/rte_compressdev_internal.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_COMPRESSDEV_INTERNAL_H_
 #define _RTE_COMPRESSDEV_INTERNAL_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /* rte_compressdev_internal.h
  * This file holds Compressdev private data structures.
  */
@@ -16,6 +12,10 @@ extern "C" {
 
 #include "rte_comp.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_COMPRESSDEV_NAME_MAX_LEN	(64)
 /**< Max length of name of comp PMD */
 
diff --git a/lib/compressdev/rte_compressdev_pmd.h b/lib/compressdev/rte_compressdev_pmd.h
index 32e29c9d16..ea721f014d 100644
--- a/lib/compressdev/rte_compressdev_pmd.h
+++ b/lib/compressdev/rte_compressdev_pmd.h
@@ -13,10 +13,6 @@
  * them directly.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <dev_driver.h>
@@ -24,6 +20,10 @@ extern "C" {
 #include "rte_compressdev.h"
 #include "rte_compressdev_internal.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_COMPRESSDEV_PMD_NAME_ARG			("name")
 #define RTE_COMPRESSDEV_PMD_SOCKET_ID_ARG		("socket_id")
 
diff --git a/lib/cryptodev/cryptodev_pmd.h b/lib/cryptodev/cryptodev_pmd.h
index 6c114f7181..3e2e2673b8 100644
--- a/lib/cryptodev/cryptodev_pmd.h
+++ b/lib/cryptodev/cryptodev_pmd.h
@@ -5,10 +5,6 @@
 #ifndef _CRYPTODEV_PMD_H_
 #define _CRYPTODEV_PMD_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Crypto PMD APIs
  *
@@ -28,6 +24,10 @@ extern "C" {
 #include "rte_crypto.h"
 #include "rte_cryptodev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 
 #define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_QUEUE_PAIRS	8
 
diff --git a/lib/cryptodev/cryptodev_trace.h b/lib/cryptodev/cryptodev_trace.h
index 935f0d564b..e186f0f3c1 100644
--- a/lib/cryptodev/cryptodev_trace.h
+++ b/lib/cryptodev/cryptodev_trace.h
@@ -11,14 +11,14 @@
  * API for cryptodev trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_trace_point.h>
 
 #include "rte_cryptodev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_cryptodev_trace_configure,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id,
diff --git a/lib/cryptodev/rte_crypto.h b/lib/cryptodev/rte_crypto.h
index dbc2700da5..dcf4a36fb2 100644
--- a/lib/cryptodev/rte_crypto.h
+++ b/lib/cryptodev/rte_crypto.h
@@ -11,10 +11,6 @@
  * RTE Cryptography Common Definitions
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 
 #include <rte_mbuf.h>
 #include <rte_memory.h>
@@ -24,6 +20,10 @@ extern "C" {
 #include "rte_crypto_sym.h"
 #include "rte_crypto_asym.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Crypto operation types */
 enum rte_crypto_op_type {
 	RTE_CRYPTO_OP_TYPE_UNDEFINED,
diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h
index 39d3da3952..4b7ea36961 100644
--- a/lib/cryptodev/rte_crypto_asym.h
+++ b/lib/cryptodev/rte_crypto_asym.h
@@ -14,10 +14,6 @@
  * asymmetric crypto operations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 #include <stdint.h>
 
@@ -27,6 +23,10 @@ extern "C" {
 
 #include "rte_crypto_sym.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_cryptodev_asym_session;
 
 /** asym key exchange operation type name strings */
diff --git a/lib/cryptodev/rte_crypto_sym.h b/lib/cryptodev/rte_crypto_sym.h
index 53b18b9412..fb73024010 100644
--- a/lib/cryptodev/rte_crypto_sym.h
+++ b/lib/cryptodev/rte_crypto_sym.h
@@ -14,10 +14,6 @@
  * as supported symmetric crypto operation combinations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <rte_compat.h>
@@ -26,6 +22,10 @@ extern "C" {
 #include <rte_mempool.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Crypto IO Vector (in analogy with struct iovec)
  * Supposed be used to pass input/output data buffers for crypto data-path
diff --git a/lib/cryptodev/rte_cryptodev.h b/lib/cryptodev/rte_cryptodev.h
index bec947f6d5..8051c5a6a3 100644
--- a/lib/cryptodev/rte_cryptodev.h
+++ b/lib/cryptodev/rte_cryptodev.h
@@ -14,10 +14,6 @@
  * authentication operations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include "rte_kvargs.h"
 #include "rte_crypto.h"
@@ -1859,6 +1855,10 @@ int rte_cryptodev_remove_deq_callback(uint8_t dev_id,
 				      struct rte_cryptodev_cb *cb);
 
 #include <rte_cryptodev_core.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 /**
  *
  * Dequeue a burst of processed crypto operations from a queue on the crypto
diff --git a/lib/cryptodev/rte_cryptodev_trace_fp.h b/lib/cryptodev/rte_cryptodev_trace_fp.h
index dbfbc7b2e5..f23f882804 100644
--- a/lib/cryptodev/rte_cryptodev_trace_fp.h
+++ b/lib/cryptodev/rte_cryptodev_trace_fp.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_CRYPTODEV_TRACE_FP_H_
 #define _RTE_CRYPTODEV_TRACE_FP_H_
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_cryptodev_trace_enqueue_burst,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id, uint16_t qp_id, void **ops,
diff --git a/lib/dispatcher/rte_dispatcher.h b/lib/dispatcher/rte_dispatcher.h
index d8182d5f2c..ba2c353073 100644
--- a/lib/dispatcher/rte_dispatcher.h
+++ b/lib/dispatcher/rte_dispatcher.h
@@ -19,16 +19,16 @@
  * event device.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdint.h>
 
 #include <rte_compat.h>
 #include <rte_eventdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Function prototype for match callbacks.
  *
diff --git a/lib/dmadev/rte_dmadev.h b/lib/dmadev/rte_dmadev.h
index 5474a5281d..d174d325a1 100644
--- a/lib/dmadev/rte_dmadev.h
+++ b/lib/dmadev/rte_dmadev.h
@@ -772,9 +772,17 @@ struct rte_dma_sge {
 	uint32_t length; /**< The DMA operation length. */
 };
 
+#ifdef __cplusplus
+}
+#endif
+
 #include "rte_dmadev_core.h"
 #include "rte_dmadev_trace_fp.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**@{@name DMA operation flag
  * @see rte_dma_copy()
  * @see rte_dma_copy_sg()
diff --git a/lib/eal/arm/include/rte_atomic_32.h b/lib/eal/arm/include/rte_atomic_32.h
index 62fc33773d..0b9a0dfa30 100644
--- a/lib/eal/arm/include/rte_atomic_32.h
+++ b/lib/eal/arm/include/rte_atomic_32.h
@@ -9,12 +9,12 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include "generic/rte_atomic.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_atomic.h"
-
 #define	rte_mb()  __sync_synchronize()
 
 #define	rte_wmb() do { asm volatile ("dmb st" : : : "memory"); } while (0)
diff --git a/lib/eal/arm/include/rte_atomic_64.h b/lib/eal/arm/include/rte_atomic_64.h
index 7c99fc0a02..181bb60929 100644
--- a/lib/eal/arm/include/rte_atomic_64.h
+++ b/lib/eal/arm/include/rte_atomic_64.h
@@ -10,14 +10,14 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_atomic.h"
 #include <rte_branch_prediction.h>
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define rte_mb() asm volatile("dmb osh" : : : "memory")
 
 #define rte_wmb() asm volatile("dmb oshst" : : : "memory")
diff --git a/lib/eal/arm/include/rte_byteorder.h b/lib/eal/arm/include/rte_byteorder.h
index ff02052f2e..a0aaff4a28 100644
--- a/lib/eal/arm/include/rte_byteorder.h
+++ b/lib/eal/arm/include/rte_byteorder.h
@@ -9,14 +9,14 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include "generic/rte_byteorder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* ARM architecture is bi-endian (both big and little). */
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 
diff --git a/lib/eal/arm/include/rte_cpuflags_32.h b/lib/eal/arm/include/rte_cpuflags_32.h
index 770b09b99d..7e33acd9fb 100644
--- a/lib/eal/arm/include/rte_cpuflags_32.h
+++ b/lib/eal/arm/include/rte_cpuflags_32.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_CPUFLAGS_ARM32_H_
 #define _RTE_CPUFLAGS_ARM32_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -46,6 +42,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_cpuflags_64.h b/lib/eal/arm/include/rte_cpuflags_64.h
index afe70209c3..f84633159e 100644
--- a/lib/eal/arm/include/rte_cpuflags_64.h
+++ b/lib/eal/arm/include/rte_cpuflags_64.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_CPUFLAGS_ARM64_H_
 #define _RTE_CPUFLAGS_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -40,6 +36,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_cycles_32.h b/lib/eal/arm/include/rte_cycles_32.h
index 859cd2e5bb..2b20c8c6f5 100644
--- a/lib/eal/arm/include/rte_cycles_32.h
+++ b/lib/eal/arm/include/rte_cycles_32.h
@@ -15,12 +15,12 @@
 
 #include <time.h>
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 /**
  * Read the time base register.
  *
diff --git a/lib/eal/arm/include/rte_cycles_64.h b/lib/eal/arm/include/rte_cycles_64.h
index 8b05302f47..bb76e4d7e0 100644
--- a/lib/eal/arm/include/rte_cycles_64.h
+++ b/lib/eal/arm/include/rte_cycles_64.h
@@ -6,12 +6,12 @@
 #ifndef _RTE_CYCLES_ARM64_H_
 #define _RTE_CYCLES_ARM64_H_
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 /** Read generic counter frequency */
 static __rte_always_inline uint64_t
 __rte_arm64_cntfrq(void)
diff --git a/lib/eal/arm/include/rte_io.h b/lib/eal/arm/include/rte_io.h
index f4e66e6bad..ca1a353bed 100644
--- a/lib/eal/arm/include/rte_io.h
+++ b/lib/eal/arm/include/rte_io.h
@@ -5,16 +5,16 @@
 #ifndef _RTE_IO_ARM_H_
 #define _RTE_IO_ARM_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_ARCH_64
 #include "rte_io_64.h"
 #else
 #include "generic/rte_io.h"
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_io_64.h b/lib/eal/arm/include/rte_io_64.h
index 96da7789ce..88db82a7eb 100644
--- a/lib/eal/arm/include/rte_io_64.h
+++ b/lib/eal/arm/include/rte_io_64.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_IO_ARM64_H_
 #define _RTE_IO_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #define RTE_OVERRIDE_IO_H
@@ -17,6 +13,10 @@ extern "C" {
 #include <rte_compat.h>
 #include "rte_atomic_64.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static __rte_always_inline uint8_t
 rte_read8_relaxed(const volatile void *addr)
 {
diff --git a/lib/eal/arm/include/rte_memcpy_32.h b/lib/eal/arm/include/rte_memcpy_32.h
index fb3245b59c..99fd5757ca 100644
--- a/lib/eal/arm/include/rte_memcpy_32.h
+++ b/lib/eal/arm/include/rte_memcpy_32.h
@@ -8,10 +8,6 @@
 #include <stdint.h>
 #include <string.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_memcpy.h"
 
 #ifdef RTE_ARCH_ARM_NEON_MEMCPY
@@ -23,6 +19,10 @@ extern "C" {
 /* ARM NEON Intrinsics are used to copy data */
 #include <arm_neon.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
diff --git a/lib/eal/arm/include/rte_memcpy_64.h b/lib/eal/arm/include/rte_memcpy_64.h
index 85ad587bd3..5adc238149 100644
--- a/lib/eal/arm/include/rte_memcpy_64.h
+++ b/lib/eal/arm/include/rte_memcpy_64.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_MEMCPY_ARM64_H_
 #define _RTE_MEMCPY_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <string.h>
 
@@ -18,6 +14,10 @@ extern "C" {
 #include <rte_common.h>
 #include <rte_branch_prediction.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * The memory copy performance differs on different AArch64 micro-architectures.
  * And the most recent glibc (e.g. 2.23 or later) can provide a better memcpy()
@@ -363,10 +363,10 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
 
 #define rte_memcpy(d, s, n)	memcpy((d), (s), (n))
 
-#endif /* RTE_ARCH_ARM64_MEMCPY */
-
 #ifdef __cplusplus
 }
 #endif
 
+#endif /* RTE_ARCH_ARM64_MEMCPY */
+
 #endif /* _RTE_MEMCPY_ARM_64_H_ */
diff --git a/lib/eal/arm/include/rte_pause.h b/lib/eal/arm/include/rte_pause.h
index 6c7002ad98..b8a3d64b3a 100644
--- a/lib/eal/arm/include/rte_pause.h
+++ b/lib/eal/arm/include/rte_pause.h
@@ -5,16 +5,16 @@
 #ifndef _RTE_PAUSE_ARM_H_
 #define _RTE_PAUSE_ARM_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_ARCH_64
 #include <rte_pause_64.h>
 #else
 #include <rte_pause_32.h>
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_pause_32.h b/lib/eal/arm/include/rte_pause_32.h
index d4768c7a98..7870fac763 100644
--- a/lib/eal/arm/include/rte_pause_32.h
+++ b/lib/eal/arm/include/rte_pause_32.h
@@ -5,13 +5,13 @@
 #ifndef _RTE_PAUSE_ARM32_H_
 #define _RTE_PAUSE_ARM32_H_
 
+#include <rte_common.h>
+#include "generic/rte_pause.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_pause.h"
-
 static inline void rte_pause(void)
 {
 }
diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index 9e2dbf3531..1526bf87cc 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -6,10 +6,6 @@
 #ifndef _RTE_PAUSE_ARM64_H_
 #define _RTE_PAUSE_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_stdatomic.h>
 
@@ -19,6 +15,10 @@ extern "C" {
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 	asm volatile("yield" ::: "memory");
diff --git a/lib/eal/arm/include/rte_power_intrinsics.h b/lib/eal/arm/include/rte_power_intrinsics.h
index 9e498e9ebf..5481f45ad3 100644
--- a/lib/eal/arm/include/rte_power_intrinsics.h
+++ b/lib/eal/arm/include/rte_power_intrinsics.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_POWER_INTRINSIC_ARM_H_
 #define _RTE_POWER_INTRINSIC_ARM_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_prefetch_32.h b/lib/eal/arm/include/rte_prefetch_32.h
index 0e9a140c8a..619bf27c79 100644
--- a/lib/eal/arm/include/rte_prefetch_32.h
+++ b/lib/eal/arm/include/rte_prefetch_32.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_PREFETCH_ARM32_H_
 #define _RTE_PREFETCH_ARM32_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	asm volatile ("pld [%0]" : : "r" (p));
diff --git a/lib/eal/arm/include/rte_prefetch_64.h b/lib/eal/arm/include/rte_prefetch_64.h
index 22cba48e29..4f60123b8b 100644
--- a/lib/eal/arm/include/rte_prefetch_64.h
+++ b/lib/eal/arm/include/rte_prefetch_64.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_PREFETCH_ARM_64_H_
 #define _RTE_PREFETCH_ARM_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	asm volatile ("PRFM PLDL1KEEP, [%0]" : : "r" (p));
diff --git a/lib/eal/arm/include/rte_rwlock.h b/lib/eal/arm/include/rte_rwlock.h
index 18bb37b036..727cabafec 100644
--- a/lib/eal/arm/include/rte_rwlock.h
+++ b/lib/eal/arm/include/rte_rwlock.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_RWLOCK_ARM_H_
 #define _RTE_RWLOCK_ARM_H_
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/arm/include/rte_spinlock.h b/lib/eal/arm/include/rte_spinlock.h
index a973763c23..a5d01b0d21 100644
--- a/lib/eal/arm/include/rte_spinlock.h
+++ b/lib/eal/arm/include/rte_spinlock.h
@@ -9,13 +9,13 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_spinlock.h"
-
 static inline int rte_tm_supported(void)
 {
 	return 0;
diff --git a/lib/eal/freebsd/include/rte_os.h b/lib/eal/freebsd/include/rte_os.h
index 003468caff..f31f6af12d 100644
--- a/lib/eal/freebsd/include/rte_os.h
+++ b/lib/eal/freebsd/include/rte_os.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_OS_H_
 #define _RTE_OS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * This header should contain any definition
  * which is not supported natively or named differently in FreeBSD.
@@ -17,6 +13,10 @@ extern "C" {
 #include <pthread_np.h>
 #include <sys/queue.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* These macros are compatible with system's sys/queue.h. */
 #define RTE_TAILQ_HEAD(name, type) TAILQ_HEAD(name, type)
 #define RTE_TAILQ_ENTRY(type) TAILQ_ENTRY(type)
diff --git a/lib/eal/include/bus_driver.h b/lib/eal/include/bus_driver.h
index 7b85a17a09..60527b75b6 100644
--- a/lib/eal/include/bus_driver.h
+++ b/lib/eal/include/bus_driver.h
@@ -5,16 +5,16 @@
 #ifndef BUS_DRIVER_H
 #define BUS_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus.h>
 #include <rte_compat.h>
 #include <rte_dev.h>
 #include <rte_eal.h>
 #include <rte_tailq.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_devargs;
 struct rte_device;
 
diff --git a/lib/eal/include/dev_driver.h b/lib/eal/include/dev_driver.h
index 5efa8c437e..f7a9c17dc3 100644
--- a/lib/eal/include/dev_driver.h
+++ b/lib/eal/include/dev_driver.h
@@ -5,13 +5,13 @@
 #ifndef DEV_DRIVER_H
 #define DEV_DRIVER_H
 
+#include <rte_common.h>
+#include <rte_dev.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_dev.h>
-
 /**
  * A structure describing a device driver.
  */
diff --git a/lib/eal/include/eal_trace_internal.h b/lib/eal/include/eal_trace_internal.h
index 09c354717f..50f91d0929 100644
--- a/lib/eal/include/eal_trace_internal.h
+++ b/lib/eal/include/eal_trace_internal.h
@@ -11,16 +11,16 @@
  * API for EAL trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_alarm.h>
 #include <rte_interrupts.h>
 #include <rte_trace_point.h>
 
 #include "eal_interrupts.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Alarm */
 RTE_TRACE_POINT(
 	rte_eal_trace_alarm_set,
diff --git a/lib/eal/include/generic/rte_atomic.h b/lib/eal/include/generic/rte_atomic.h
index f859707744..0a4f3f8528 100644
--- a/lib/eal/include/generic/rte_atomic.h
+++ b/lib/eal/include/generic/rte_atomic.h
@@ -17,6 +17,10 @@
 #include <rte_common.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __DOXYGEN__
 
 /** @name Memory Barrier
@@ -1156,4 +1160,8 @@ rte_atomic128_cmp_exchange(rte_int128_t *dst,
 
 #endif /* __DOXYGEN__ */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_ATOMIC_H_ */
diff --git a/lib/eal/include/generic/rte_byteorder.h b/lib/eal/include/generic/rte_byteorder.h
index f1c04ba83e..7973d6326f 100644
--- a/lib/eal/include/generic/rte_byteorder.h
+++ b/lib/eal/include/generic/rte_byteorder.h
@@ -24,6 +24,10 @@
 #include <rte_common.h>
 #include <rte_config.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Compile-time endianness detection
  */
@@ -251,4 +255,8 @@ static uint64_t rte_be_to_cpu_64(rte_be64_t x);
 #endif
 #endif
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_BYTEORDER_H_ */
diff --git a/lib/eal/include/generic/rte_cpuflags.h b/lib/eal/include/generic/rte_cpuflags.h
index d35551e931..bfe9df4516 100644
--- a/lib/eal/include/generic/rte_cpuflags.h
+++ b/lib/eal/include/generic/rte_cpuflags.h
@@ -15,6 +15,10 @@
 
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Structure used to describe platform-specific intrinsics that may or may not
  * be supported at runtime.
@@ -104,4 +108,8 @@ rte_cpu_getauxval(unsigned long type);
 int
 rte_cpu_strcmp_auxval(unsigned long type, const char *str);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_CPUFLAGS_H_ */
diff --git a/lib/eal/include/generic/rte_cycles.h b/lib/eal/include/generic/rte_cycles.h
index 075e899f5a..7cfd51f0eb 100644
--- a/lib/eal/include/generic/rte_cycles.h
+++ b/lib/eal/include/generic/rte_cycles.h
@@ -16,6 +16,10 @@
 #include <rte_debug.h>
 #include <rte_atomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MS_PER_S 1000
 #define US_PER_S 1000000
 #define NS_PER_S 1000000000
@@ -175,4 +179,8 @@ void rte_delay_us_sleep(unsigned int us);
  */
 void rte_delay_us_callback_register(void(*userfunc)(unsigned int));
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_CYCLES_H_ */
diff --git a/lib/eal/include/generic/rte_io.h b/lib/eal/include/generic/rte_io.h
index ebcf8051e1..73b0f7a9f4 100644
--- a/lib/eal/include/generic/rte_io.h
+++ b/lib/eal/include/generic/rte_io.h
@@ -17,6 +17,10 @@
 #include <rte_compat.h>
 #include <rte_atomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __DOXYGEN__
 
 /**
@@ -396,4 +400,8 @@ rte_write32_wc_relaxed(uint32_t value, volatile void *addr)
 
 #endif /* RTE_OVERRIDE_IO_H */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_IO_H_ */
diff --git a/lib/eal/include/generic/rte_memcpy.h b/lib/eal/include/generic/rte_memcpy.h
index e7f0f8eaa9..da53b72ca8 100644
--- a/lib/eal/include/generic/rte_memcpy.h
+++ b/lib/eal/include/generic/rte_memcpy.h
@@ -5,6 +5,10 @@
 #ifndef _RTE_MEMCPY_H_
 #define _RTE_MEMCPY_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  *
@@ -113,4 +117,8 @@ rte_memcpy(void *dst, const void *src, size_t n);
 
 #endif /* __DOXYGEN__ */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_MEMCPY_H_ */
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index f2a1eadcbd..968c0886d3 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -19,6 +19,10 @@
 #include <rte_atomic.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Pause CPU execution for a short while
  *
@@ -136,4 +140,8 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 } while (0)
 #endif /* ! RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_PAUSE_H_ */
diff --git a/lib/eal/include/generic/rte_power_intrinsics.h b/lib/eal/include/generic/rte_power_intrinsics.h
index ea899f1bfa..86c0559468 100644
--- a/lib/eal/include/generic/rte_power_intrinsics.h
+++ b/lib/eal/include/generic/rte_power_intrinsics.h
@@ -9,6 +9,10 @@
 
 #include <rte_spinlock.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  * Advanced power management operations.
@@ -147,4 +151,8 @@ int rte_power_pause(const uint64_t tsc_timestamp);
 int rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
 		const uint32_t num, const uint64_t tsc_timestamp);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_POWER_INTRINSIC_H_ */
diff --git a/lib/eal/include/generic/rte_prefetch.h b/lib/eal/include/generic/rte_prefetch.h
index 773b3b8d1e..f7ac4ab48a 100644
--- a/lib/eal/include/generic/rte_prefetch.h
+++ b/lib/eal/include/generic/rte_prefetch.h
@@ -7,6 +7,10 @@
 
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  *
@@ -146,4 +150,8 @@ __rte_experimental
 static inline void
 rte_cldemote(const volatile void *p);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_PREFETCH_H_ */
diff --git a/lib/eal/include/generic/rte_rwlock.h b/lib/eal/include/generic/rte_rwlock.h
index 5f939be98c..ac0474466a 100644
--- a/lib/eal/include/generic/rte_rwlock.h
+++ b/lib/eal/include/generic/rte_rwlock.h
@@ -22,10 +22,6 @@
  *  https://locklessinc.com/articles/locks/
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <errno.h>
 
 #include <rte_branch_prediction.h>
@@ -34,6 +30,10 @@ extern "C" {
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_rwlock_t type.
  *
diff --git a/lib/eal/include/generic/rte_spinlock.h b/lib/eal/include/generic/rte_spinlock.h
index 23fb04896f..c2980601b2 100644
--- a/lib/eal/include/generic/rte_spinlock.h
+++ b/lib/eal/include/generic/rte_spinlock.h
@@ -25,6 +25,10 @@
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_spinlock_t type.
  */
@@ -318,4 +322,8 @@ __rte_warn_unused_result
 static inline int rte_spinlock_recursive_trylock_tm(
 	rte_spinlock_recursive_t *slr);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_SPINLOCK_H_ */
diff --git a/lib/eal/include/generic/rte_vect.h b/lib/eal/include/generic/rte_vect.h
index 1f84292a41..b87520a4d9 100644
--- a/lib/eal/include/generic/rte_vect.h
+++ b/lib/eal/include/generic/rte_vect.h
@@ -209,6 +209,10 @@ enum rte_vect_max_simd {
 	 */
 };
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Get the supported SIMD bitwidth.
  *
@@ -230,4 +234,8 @@ uint16_t rte_vect_get_max_simd_bitwidth(void);
  */
 int rte_vect_set_max_simd_bitwidth(uint16_t bitwidth);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_VECT_H_ */
diff --git a/lib/eal/include/rte_alarm.h b/lib/eal/include/rte_alarm.h
index 7e4d0b2407..9b4721b77f 100644
--- a/lib/eal/include/rte_alarm.h
+++ b/lib/eal/include/rte_alarm.h
@@ -14,12 +14,12 @@
  * Does not require hpet support.
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /**
  * Signature of callback back function called when an alarm goes off.
  */
diff --git a/lib/eal/include/rte_bitmap.h b/lib/eal/include/rte_bitmap.h
index ebe46000a0..abb102f1d3 100644
--- a/lib/eal/include/rte_bitmap.h
+++ b/lib/eal/include/rte_bitmap.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_BITMAP_H__
 #define __INCLUDE_RTE_BITMAP_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Bitmap
@@ -43,6 +39,10 @@ extern "C" {
 #include <rte_branch_prediction.h>
 #include <rte_prefetch.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Slab */
 #define RTE_BITMAP_SLAB_BIT_SIZE                 64
 #define RTE_BITMAP_SLAB_BIT_SIZE_LOG2            6
diff --git a/lib/eal/include/rte_bus.h b/lib/eal/include/rte_bus.h
index dfe756fb11..519f7b35f0 100644
--- a/lib/eal/include/rte_bus.h
+++ b/lib/eal/include/rte_bus.h
@@ -14,14 +14,14 @@
  * over the devices and drivers in EAL.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_eal.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_bus;
 struct rte_device;
 
diff --git a/lib/eal/include/rte_class.h b/lib/eal/include/rte_class.h
index 16e544ec9a..7631e36e82 100644
--- a/lib/eal/include/rte_class.h
+++ b/lib/eal/include/rte_class.h
@@ -18,12 +18,12 @@
  * cryptographic co-processor (crypto), etc.
  */
 
+#include <rte_dev.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_dev.h>
-
 /** Double linked list of classes */
 RTE_TAILQ_HEAD(rte_class_list, rte_class);
 
diff --git a/lib/eal/include/rte_common.h b/lib/eal/include/rte_common.h
index eec0400dad..2486caa471 100644
--- a/lib/eal/include/rte_common.h
+++ b/lib/eal/include/rte_common.h
@@ -12,10 +12,6 @@
  * for DPDK.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <assert.h>
 #include <limits.h>
 #include <stdint.h>
@@ -26,6 +22,10 @@ extern "C" {
 /* OS specific include */
 #include <rte_os.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_TOOLCHAIN_MSVC
 #ifndef typeof
 #define typeof __typeof__
diff --git a/lib/eal/include/rte_dev.h b/lib/eal/include/rte_dev.h
index cefa04f905..738400e8d1 100644
--- a/lib/eal/include/rte_dev.h
+++ b/lib/eal/include/rte_dev.h
@@ -13,16 +13,16 @@
  * This file manages the list of device drivers.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_config.h>
 #include <rte_common.h>
 #include <rte_log.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_bus;
 struct rte_devargs;
 struct rte_device;
diff --git a/lib/eal/include/rte_devargs.h b/lib/eal/include/rte_devargs.h
index 515e978bbe..ed5a4675d9 100644
--- a/lib/eal/include/rte_devargs.h
+++ b/lib/eal/include/rte_devargs.h
@@ -16,14 +16,14 @@
  * list of rte_devargs structures.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_dev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_bus;
 
 /**
diff --git a/lib/eal/include/rte_eal_trace.h b/lib/eal/include/rte_eal_trace.h
index c3d15bbe5e..9ad2112801 100644
--- a/lib/eal/include/rte_eal_trace.h
+++ b/lib/eal/include/rte_eal_trace.h
@@ -11,12 +11,12 @@
  * API for EAL trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 /* Generic */
 RTE_TRACE_POINT(
 	rte_eal_trace_generic_void,
diff --git a/lib/eal/include/rte_errno.h b/lib/eal/include/rte_errno.h
index ba45591d24..c49818a40e 100644
--- a/lib/eal/include/rte_errno.h
+++ b/lib/eal/include/rte_errno.h
@@ -11,12 +11,12 @@
 #ifndef _RTE_ERRNO_H_
 #define _RTE_ERRNO_H_
 
+#include <rte_per_lcore.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_per_lcore.h>
-
 RTE_DECLARE_PER_LCORE(int, _rte_errno); /**< Per core error number. */
 
 /**
diff --git a/lib/eal/include/rte_fbarray.h b/lib/eal/include/rte_fbarray.h
index e33076778f..27dbfc2d6c 100644
--- a/lib/eal/include/rte_fbarray.h
+++ b/lib/eal/include/rte_fbarray.h
@@ -30,14 +30,14 @@
  * another process is using ``rte_fbarray``.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_rwlock.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_FBARRAY_NAME_LEN 64
 
 struct rte_fbarray {
diff --git a/lib/eal/include/rte_keepalive.h b/lib/eal/include/rte_keepalive.h
index 3ec413da01..9ff870f6b4 100644
--- a/lib/eal/include/rte_keepalive.h
+++ b/lib/eal/include/rte_keepalive.h
@@ -10,13 +10,13 @@
 #ifndef _KEEPALIVE_H_
 #define _KEEPALIVE_H_
 
+#include <rte_config.h>
+#include <rte_memory.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_config.h>
-#include <rte_memory.h>
-
 #ifndef RTE_KEEPALIVE_MAXCORES
 /**
  * Number of cores to track.
diff --git a/lib/eal/include/rte_mcslock.h b/lib/eal/include/rte_mcslock.h
index 0aeb1a09f4..bb218d2e50 100644
--- a/lib/eal/include/rte_mcslock.h
+++ b/lib/eal/include/rte_mcslock.h
@@ -19,16 +19,16 @@
  * they acquired the lock.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_lcore.h>
 #include <rte_common.h>
 #include <rte_pause.h>
 #include <rte_branch_prediction.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_mcslock_t type.
  */
diff --git a/lib/eal/include/rte_memory.h b/lib/eal/include/rte_memory.h
index 842362d527..dbd0a6bedc 100644
--- a/lib/eal/include/rte_memory.h
+++ b/lib/eal/include/rte_memory.h
@@ -15,16 +15,16 @@
 #include <stddef.h>
 #include <stdio.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bitops.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include <rte_eal_memconfig.h>
 #include <rte_fbarray.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_PGSIZE_4K   (1ULL << 12)
 #define RTE_PGSIZE_64K  (1ULL << 16)
 #define RTE_PGSIZE_256K (1ULL << 18)
diff --git a/lib/eal/include/rte_pci_dev_features.h b/lib/eal/include/rte_pci_dev_features.h
index ee6e10590c..bc6d3d4c1f 100644
--- a/lib/eal/include/rte_pci_dev_features.h
+++ b/lib/eal/include/rte_pci_dev_features.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_PCI_DEV_FEATURES_H
 #define _RTE_PCI_DEV_FEATURES_H
 
+#include <rte_pci_dev_feature_defs.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_pci_dev_feature_defs.h>
-
 #define RTE_INTR_MODE_NONE_NAME "none"
 #define RTE_INTR_MODE_LEGACY_NAME "legacy"
 #define RTE_INTR_MODE_MSI_NAME "msi"
diff --git a/lib/eal/include/rte_pflock.h b/lib/eal/include/rte_pflock.h
index 37aa223ac3..6797ce5920 100644
--- a/lib/eal/include/rte_pflock.h
+++ b/lib/eal/include/rte_pflock.h
@@ -27,14 +27,14 @@
  * All locks must be initialised before use, and only initialised once.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_pflock_t type.
  */
diff --git a/lib/eal/include/rte_random.h b/lib/eal/include/rte_random.h
index 5031c6fe5f..15cbe6215a 100644
--- a/lib/eal/include/rte_random.h
+++ b/lib/eal/include/rte_random.h
@@ -11,12 +11,12 @@
  * Pseudo-random Generators in RTE
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /**
  * Seed the pseudo-random generator.
  *
diff --git a/lib/eal/include/rte_seqcount.h b/lib/eal/include/rte_seqcount.h
index 88a6746900..d71afa6ab7 100644
--- a/lib/eal/include/rte_seqcount.h
+++ b/lib/eal/include/rte_seqcount.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_SEQCOUNT_H_
 #define _RTE_SEQCOUNT_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Seqcount
@@ -27,6 +23,10 @@ extern "C" {
 #include <rte_branch_prediction.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The RTE seqcount type.
  */
diff --git a/lib/eal/include/rte_seqlock.h b/lib/eal/include/rte_seqlock.h
index 2677bd9440..e0e94900d1 100644
--- a/lib/eal/include/rte_seqlock.h
+++ b/lib/eal/include/rte_seqlock.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_SEQLOCK_H_
 #define _RTE_SEQLOCK_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Seqlock
@@ -95,6 +91,10 @@ extern "C" {
 #include <rte_seqcount.h>
 #include <rte_spinlock.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The RTE seqlock type.
  */
diff --git a/lib/eal/include/rte_service.h b/lib/eal/include/rte_service.h
index e49a7a877e..94919ae584 100644
--- a/lib/eal/include/rte_service.h
+++ b/lib/eal/include/rte_service.h
@@ -23,16 +23,16 @@
  * application has access to the remaining lcores as normal.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include<stdio.h>
 #include <stdint.h>
 
 #include <rte_config.h>
 #include <rte_lcore.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_SERVICE_NAME_MAX 32
 
 /* Capabilities of a service.
diff --git a/lib/eal/include/rte_service_component.h b/lib/eal/include/rte_service_component.h
index a5350c97e5..acdf45cf60 100644
--- a/lib/eal/include/rte_service_component.h
+++ b/lib/eal/include/rte_service_component.h
@@ -10,12 +10,12 @@
  * operate, and you wish to run the component using service cores
  */
 
+#include <rte_service.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_service.h>
-
 /**
  * Signature of callback function to run a service.
  *
diff --git a/lib/eal/include/rte_stdatomic.h b/lib/eal/include/rte_stdatomic.h
index 7a081cb500..0f11a15e4e 100644
--- a/lib/eal/include/rte_stdatomic.h
+++ b/lib/eal/include/rte_stdatomic.h
@@ -7,10 +7,6 @@
 
 #include <assert.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_ENABLE_STDATOMIC
 #ifndef _MSC_VER
 #ifdef __STDC_NO_ATOMICS__
@@ -188,6 +184,7 @@ typedef int rte_memory_order;
 #endif
 
 #ifdef __cplusplus
+extern "C" {
 }
 #endif
 
diff --git a/lib/eal/include/rte_string_fns.h b/lib/eal/include/rte_string_fns.h
index 13badec7b3..702bd81251 100644
--- a/lib/eal/include/rte_string_fns.h
+++ b/lib/eal/include/rte_string_fns.h
@@ -11,10 +11,6 @@
 #ifndef _RTE_STRING_FNS_H_
 #define _RTE_STRING_FNS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <ctype.h>
 #include <stdio.h>
 #include <string.h>
@@ -22,6 +18,10 @@ extern "C" {
 #include <rte_common.h>
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Takes string "string" parameter and splits it at character "delim"
  * up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like
@@ -77,6 +77,10 @@ rte_strlcat(char *dst, const char *src, size_t size)
 	return l + strlen(src);
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 /* pull in a strlcpy function */
 #ifdef RTE_EXEC_ENV_FREEBSD
 #ifndef __BSD_VISIBLE /* non-standard functions are hidden */
@@ -95,6 +99,10 @@ rte_strlcat(char *dst, const char *src, size_t size)
 #endif /* RTE_USE_LIBBSD */
 #endif /* FREEBSD */
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Copy string src to buffer dst of size dsize.
  * At most dsize-1 chars will be copied.
@@ -141,7 +149,6 @@ rte_str_skip_leading_spaces(const char *src)
 	return p;
 }
 
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/include/rte_tailq.h b/lib/eal/include/rte_tailq.h
index 931d549e59..89f7ef2134 100644
--- a/lib/eal/include/rte_tailq.h
+++ b/lib/eal/include/rte_tailq.h
@@ -10,13 +10,13 @@
  *  Here defines rte_tailq APIs for only internal use
  */
 
+#include <stdio.h>
+#include <rte_debug.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdio.h>
-#include <rte_debug.h>
-
 /** dummy structure type used by the rte_tailq APIs */
 struct rte_tailq_entry {
 	RTE_TAILQ_ENTRY(rte_tailq_entry) next; /**< Pointer entries for a tailq list */
diff --git a/lib/eal/include/rte_ticketlock.h b/lib/eal/include/rte_ticketlock.h
index 73884eb07b..e60f60699c 100644
--- a/lib/eal/include/rte_ticketlock.h
+++ b/lib/eal/include/rte_ticketlock.h
@@ -17,15 +17,15 @@
  * All locks must be initialised before use, and only initialised once.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_lcore.h>
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_ticketlock_t type.
  */
diff --git a/lib/eal/include/rte_time.h b/lib/eal/include/rte_time.h
index ec25f7b93d..c5c3a233e4 100644
--- a/lib/eal/include/rte_time.h
+++ b/lib/eal/include/rte_time.h
@@ -5,13 +5,13 @@
 #ifndef _RTE_TIME_H_
 #define _RTE_TIME_H_
 
+#include <stdint.h>
+#include <time.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include <time.h>
-
 #define NSEC_PER_SEC             1000000000L
 
 /**
diff --git a/lib/eal/include/rte_trace.h b/lib/eal/include/rte_trace.h
index a6e991fad3..1c824b2158 100644
--- a/lib/eal/include/rte_trace.h
+++ b/lib/eal/include/rte_trace.h
@@ -16,16 +16,16 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdio.h>
 
 #include <rte_common.h>
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  *  Test if trace is enabled.
  *
diff --git a/lib/eal/include/rte_trace_point.h b/lib/eal/include/rte_trace_point.h
index 41e2a7f99e..bc737d585e 100644
--- a/lib/eal/include/rte_trace_point.h
+++ b/lib/eal/include/rte_trace_point.h
@@ -16,10 +16,6 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdio.h>
 
@@ -32,6 +28,10 @@ extern "C" {
 #include <rte_string_fns.h>
 #include <rte_uuid.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** The tracepoint object. */
 typedef RTE_ATOMIC(uint64_t) rte_trace_point_t;
 
diff --git a/lib/eal/include/rte_trace_point_register.h b/lib/eal/include/rte_trace_point_register.h
index 41260e5964..8726338fe4 100644
--- a/lib/eal/include/rte_trace_point_register.h
+++ b/lib/eal/include/rte_trace_point_register.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_TRACE_POINT_REGISTER_H_
 #define _RTE_TRACE_POINT_REGISTER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef _RTE_TRACE_POINT_H_
 #error for registration, include this file first before <rte_trace_point.h>
 #endif
@@ -16,6 +12,10 @@ extern "C" {
 #include <rte_per_lcore.h>
 #include <rte_trace_point.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_DECLARE_PER_LCORE(volatile int, trace_point_sz);
 
 #define RTE_TRACE_POINT_REGISTER(trace, name) \
diff --git a/lib/eal/include/rte_uuid.h b/lib/eal/include/rte_uuid.h
index cfefd4308a..def5907a00 100644
--- a/lib/eal/include/rte_uuid.h
+++ b/lib/eal/include/rte_uuid.h
@@ -10,14 +10,14 @@
 #ifndef _RTE_UUID_H_
 #define _RTE_UUID_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stddef.h>
 #include <string.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Struct describing a Universal Unique Identifier
  */
diff --git a/lib/eal/include/rte_version.h b/lib/eal/include/rte_version.h
index 422d00fdff..be3f753617 100644
--- a/lib/eal/include/rte_version.h
+++ b/lib/eal/include/rte_version.h
@@ -10,13 +10,13 @@
 #ifndef _RTE_VERSION_H_
 #define _RTE_VERSION_H_
 
+#include <string.h>
+#include <stdio.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <string.h>
-#include <stdio.h>
-
 /**
  * Macro to compute a version number usable for comparisons
  */
diff --git a/lib/eal/include/rte_vfio.h b/lib/eal/include/rte_vfio.h
index b774625d9f..923293040b 100644
--- a/lib/eal/include/rte_vfio.h
+++ b/lib/eal/include/rte_vfio.h
@@ -10,10 +10,6 @@
  * RTE VFIO. This library provides various VFIO related utility functions.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdint.h>
 
@@ -32,6 +28,10 @@ extern "C" {
 #endif /* kernel version >= 4.0.0 */
 #endif /* RTE_EAL_VFIO */
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef VFIO_PRESENT
 
 #include <linux/vfio.h>
diff --git a/lib/eal/linux/include/rte_os.h b/lib/eal/linux/include/rte_os.h
index c72bf5b7e6..dba0e29827 100644
--- a/lib/eal/linux/include/rte_os.h
+++ b/lib/eal/linux/include/rte_os.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_OS_H_
 #define _RTE_OS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * This header should contain any definition
  * which is not supported natively or named differently in Linux.
@@ -17,6 +13,10 @@ extern "C" {
 #include <sched.h>
 #include <sys/queue.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* These macros are compatible with system's sys/queue.h. */
 #define RTE_TAILQ_HEAD(name, type) TAILQ_HEAD(name, type)
 #define RTE_TAILQ_ENTRY(type) TAILQ_ENTRY(type)
diff --git a/lib/eal/loongarch/include/rte_atomic.h b/lib/eal/loongarch/include/rte_atomic.h
index 0510b8f781..c8066a4612 100644
--- a/lib/eal/loongarch/include/rte_atomic.h
+++ b/lib/eal/loongarch/include/rte_atomic.h
@@ -9,13 +9,13 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include <rte_common.h>
+#include "generic/rte_atomic.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_atomic.h"
-
 #define rte_mb()	do { asm volatile("dbar 0":::"memory"); } while (0)
 
 #define rte_wmb()	rte_mb()
diff --git a/lib/eal/loongarch/include/rte_byteorder.h b/lib/eal/loongarch/include/rte_byteorder.h
index 0da6097a4f..9b092e2a59 100644
--- a/lib/eal/loongarch/include/rte_byteorder.h
+++ b/lib/eal/loongarch/include/rte_byteorder.h
@@ -5,12 +5,12 @@
 #ifndef RTE_BYTEORDER_LOONGARCH_H
 #define RTE_BYTEORDER_LOONGARCH_H
 
+#include "generic/rte_byteorder.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_byteorder.h"
-
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 
 #define rte_cpu_to_le_16(x) (x)
diff --git a/lib/eal/loongarch/include/rte_cpuflags.h b/lib/eal/loongarch/include/rte_cpuflags.h
index 6b592c147c..c1e04ac545 100644
--- a/lib/eal/loongarch/include/rte_cpuflags.h
+++ b/lib/eal/loongarch/include/rte_cpuflags.h
@@ -5,10 +5,6 @@
 #ifndef RTE_CPUFLAGS_LOONGARCH_H
 #define RTE_CPUFLAGS_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -30,6 +26,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/loongarch/include/rte_cycles.h b/lib/eal/loongarch/include/rte_cycles.h
index f612d1ad10..128c8646e9 100644
--- a/lib/eal/loongarch/include/rte_cycles.h
+++ b/lib/eal/loongarch/include/rte_cycles.h
@@ -5,12 +5,12 @@
 #ifndef RTE_CYCLES_LOONGARCH_H
 #define RTE_CYCLES_LOONGARCH_H
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 /**
  * Read the time base register.
  *
diff --git a/lib/eal/loongarch/include/rte_io.h b/lib/eal/loongarch/include/rte_io.h
index 40e40efa86..e32a4737b2 100644
--- a/lib/eal/loongarch/include/rte_io.h
+++ b/lib/eal/loongarch/include/rte_io.h
@@ -5,12 +5,12 @@
 #ifndef RTE_IO_LOONGARCH_H
 #define RTE_IO_LOONGARCH_H
 
+#include "generic/rte_io.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_io.h"
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/loongarch/include/rte_memcpy.h b/lib/eal/loongarch/include/rte_memcpy.h
index 22578d40f4..5412a0fdc1 100644
--- a/lib/eal/loongarch/include/rte_memcpy.h
+++ b/lib/eal/loongarch/include/rte_memcpy.h
@@ -10,12 +10,12 @@
 
 #include "rte_common.h"
 
+#include "generic/rte_memcpy.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_memcpy.h"
-
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
diff --git a/lib/eal/loongarch/include/rte_pause.h b/lib/eal/loongarch/include/rte_pause.h
index 4302e1b9be..cffa2874d6 100644
--- a/lib/eal/loongarch/include/rte_pause.h
+++ b/lib/eal/loongarch/include/rte_pause.h
@@ -5,14 +5,14 @@
 #ifndef RTE_PAUSE_LOONGARCH_H
 #define RTE_PAUSE_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_atomic.h"
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 }
diff --git a/lib/eal/loongarch/include/rte_power_intrinsics.h b/lib/eal/loongarch/include/rte_power_intrinsics.h
index d5dbd94567..9e11478206 100644
--- a/lib/eal/loongarch/include/rte_power_intrinsics.h
+++ b/lib/eal/loongarch/include/rte_power_intrinsics.h
@@ -5,14 +5,14 @@
 #ifndef RTE_POWER_INTRINSIC_LOONGARCH_H
 #define RTE_POWER_INTRINSIC_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/loongarch/include/rte_prefetch.h b/lib/eal/loongarch/include/rte_prefetch.h
index 64b1fd2c2a..8da08a5566 100644
--- a/lib/eal/loongarch/include/rte_prefetch.h
+++ b/lib/eal/loongarch/include/rte_prefetch.h
@@ -5,14 +5,14 @@
 #ifndef RTE_PREFETCH_LOONGARCH_H
 #define RTE_PREFETCH_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	__builtin_prefetch((const void *)(uintptr_t)p, 0, 3);
diff --git a/lib/eal/loongarch/include/rte_rwlock.h b/lib/eal/loongarch/include/rte_rwlock.h
index aedc6f3349..48924599c5 100644
--- a/lib/eal/loongarch/include/rte_rwlock.h
+++ b/lib/eal/loongarch/include/rte_rwlock.h
@@ -5,12 +5,12 @@
 #ifndef RTE_RWLOCK_LOONGARCH_H
 #define RTE_RWLOCK_LOONGARCH_H
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/loongarch/include/rte_spinlock.h b/lib/eal/loongarch/include/rte_spinlock.h
index e8d34e9728..38f00f631d 100644
--- a/lib/eal/loongarch/include/rte_spinlock.h
+++ b/lib/eal/loongarch/include/rte_spinlock.h
@@ -5,13 +5,13 @@
 #ifndef RTE_SPINLOCK_LOONGARCH_H
 #define RTE_SPINLOCK_LOONGARCH_H
 
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_spinlock.h"
-
 #ifndef RTE_FORCE_INTRINSICS
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
diff --git a/lib/eal/ppc/include/rte_atomic.h b/lib/eal/ppc/include/rte_atomic.h
index 645c7132df..6ce2e5188a 100644
--- a/lib/eal/ppc/include/rte_atomic.h
+++ b/lib/eal/ppc/include/rte_atomic.h
@@ -12,13 +12,13 @@
 #ifndef _RTE_ATOMIC_PPC_64_H_
 #define _RTE_ATOMIC_PPC_64_H_
 
+#include <stdint.h>
+#include "generic/rte_atomic.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include "generic/rte_atomic.h"
-
 #define	rte_mb()  asm volatile("sync" : : : "memory")
 
 #define	rte_wmb() asm volatile("sync" : : : "memory")
diff --git a/lib/eal/ppc/include/rte_byteorder.h b/lib/eal/ppc/include/rte_byteorder.h
index de94e2ad32..1d19e96f72 100644
--- a/lib/eal/ppc/include/rte_byteorder.h
+++ b/lib/eal/ppc/include/rte_byteorder.h
@@ -8,13 +8,13 @@
 #ifndef _RTE_BYTEORDER_PPC_64_H_
 #define _RTE_BYTEORDER_PPC_64_H_
 
+#include <stdint.h>
+#include "generic/rte_byteorder.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include "generic/rte_byteorder.h"
-
 /*
  * An architecture-optimized byte swap for a 16-bit value.
  *
diff --git a/lib/eal/ppc/include/rte_cpuflags.h b/lib/eal/ppc/include/rte_cpuflags.h
index dedc1ab469..b7bb8f6872 100644
--- a/lib/eal/ppc/include/rte_cpuflags.h
+++ b/lib/eal/ppc/include/rte_cpuflags.h
@@ -6,10 +6,6 @@
 #ifndef _RTE_CPUFLAGS_PPC_64_H_
 #define _RTE_CPUFLAGS_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -52,6 +48,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/ppc/include/rte_cycles.h b/lib/eal/ppc/include/rte_cycles.h
index 666fc9b0bf..1e6e6cccc8 100644
--- a/lib/eal/ppc/include/rte_cycles.h
+++ b/lib/eal/ppc/include/rte_cycles.h
@@ -6,10 +6,6 @@
 #ifndef _RTE_CYCLES_PPC_64_H_
 #define _RTE_CYCLES_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <features.h>
 #ifdef __GLIBC__
 #include <sys/platform/ppc.h>
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_byteorder.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Read the time base register.
  *
diff --git a/lib/eal/ppc/include/rte_io.h b/lib/eal/ppc/include/rte_io.h
index 01455065e5..c9371b784e 100644
--- a/lib/eal/ppc/include/rte_io.h
+++ b/lib/eal/ppc/include/rte_io.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_IO_PPC_64_H_
 #define _RTE_IO_PPC_64_H_
 
+#include "generic/rte_io.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_io.h"
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/ppc/include/rte_memcpy.h b/lib/eal/ppc/include/rte_memcpy.h
index 6f388c0234..eae73128c4 100644
--- a/lib/eal/ppc/include/rte_memcpy.h
+++ b/lib/eal/ppc/include/rte_memcpy.h
@@ -12,12 +12,12 @@
 #include "rte_altivec.h"
 #include "rte_common.h"
 
+#include "generic/rte_memcpy.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_memcpy.h"
-
 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 90000)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"
diff --git a/lib/eal/ppc/include/rte_pause.h b/lib/eal/ppc/include/rte_pause.h
index 16e47ce22f..78a73aceed 100644
--- a/lib/eal/ppc/include/rte_pause.h
+++ b/lib/eal/ppc/include/rte_pause.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_PAUSE_PPC64_H_
 #define _RTE_PAUSE_PPC64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_atomic.h"
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 	/* Set hardware multi-threading low priority */
diff --git a/lib/eal/ppc/include/rte_power_intrinsics.h b/lib/eal/ppc/include/rte_power_intrinsics.h
index c0e9ac279f..6207eeb04d 100644
--- a/lib/eal/ppc/include/rte_power_intrinsics.h
+++ b/lib/eal/ppc/include/rte_power_intrinsics.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_POWER_INTRINSIC_PPC_H_
 #define _RTE_POWER_INTRINSIC_PPC_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/ppc/include/rte_prefetch.h b/lib/eal/ppc/include/rte_prefetch.h
index 2e1b5751e0..bae95af7bf 100644
--- a/lib/eal/ppc/include/rte_prefetch.h
+++ b/lib/eal/ppc/include/rte_prefetch.h
@@ -6,14 +6,14 @@
 #ifndef _RTE_PREFETCH_PPC_64_H_
 #define _RTE_PREFETCH_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
diff --git a/lib/eal/ppc/include/rte_rwlock.h b/lib/eal/ppc/include/rte_rwlock.h
index 9fadc04076..bee8da4070 100644
--- a/lib/eal/ppc/include/rte_rwlock.h
+++ b/lib/eal/ppc/include/rte_rwlock.h
@@ -3,12 +3,12 @@
 #ifndef _RTE_RWLOCK_PPC_64_H_
 #define _RTE_RWLOCK_PPC_64_H_
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/ppc/include/rte_spinlock.h b/lib/eal/ppc/include/rte_spinlock.h
index 3a4c905b22..77f90f974a 100644
--- a/lib/eal/ppc/include/rte_spinlock.h
+++ b/lib/eal/ppc/include/rte_spinlock.h
@@ -6,14 +6,14 @@
 #ifndef _RTE_SPINLOCK_PPC_64_H_
 #define _RTE_SPINLOCK_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_pause.h>
 #include "generic/rte_spinlock.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Fixme: Use intrinsics to implement the spinlock on Power architecture */
 
 #ifndef RTE_FORCE_INTRINSICS
diff --git a/lib/eal/riscv/include/rte_atomic.h b/lib/eal/riscv/include/rte_atomic.h
index 2603bc90ea..66346ad474 100644
--- a/lib/eal/riscv/include/rte_atomic.h
+++ b/lib/eal/riscv/include/rte_atomic.h
@@ -12,15 +12,15 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include "generic/rte_atomic.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define rte_mb()	asm volatile("fence rw, rw" : : : "memory")
 
 #define rte_wmb()	asm volatile("fence w, w" : : : "memory")
diff --git a/lib/eal/riscv/include/rte_byteorder.h b/lib/eal/riscv/include/rte_byteorder.h
index 25bd0c275d..c9ff5c0dd1 100644
--- a/lib/eal/riscv/include/rte_byteorder.h
+++ b/lib/eal/riscv/include/rte_byteorder.h
@@ -8,14 +8,14 @@
 #ifndef RTE_BYTEORDER_RISCV_H
 #define RTE_BYTEORDER_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include "generic/rte_byteorder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_BYTE_ORDER
 #define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
 #endif
diff --git a/lib/eal/riscv/include/rte_cpuflags.h b/lib/eal/riscv/include/rte_cpuflags.h
index d742efc40f..ac2004f02d 100644
--- a/lib/eal/riscv/include/rte_cpuflags.h
+++ b/lib/eal/riscv/include/rte_cpuflags.h
@@ -8,10 +8,6 @@
 #ifndef RTE_CPUFLAGS_RISCV_H
 #define RTE_CPUFLAGS_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -46,6 +42,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/riscv/include/rte_cycles.h b/lib/eal/riscv/include/rte_cycles.h
index 04750ca253..7926809a73 100644
--- a/lib/eal/riscv/include/rte_cycles.h
+++ b/lib/eal/riscv/include/rte_cycles.h
@@ -8,12 +8,12 @@
 #ifndef RTE_CYCLES_RISCV_H
 #define RTE_CYCLES_RISCV_H
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 #ifndef RTE_RISCV_RDTSC_USE_HPM
 #define RTE_RISCV_RDTSC_USE_HPM 0
 #endif
diff --git a/lib/eal/riscv/include/rte_io.h b/lib/eal/riscv/include/rte_io.h
index 29659c9590..911dbb6bd2 100644
--- a/lib/eal/riscv/include/rte_io.h
+++ b/lib/eal/riscv/include/rte_io.h
@@ -8,12 +8,12 @@
 #ifndef RTE_IO_RISCV_H
 #define RTE_IO_RISCV_H
 
+#include "generic/rte_io.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_io.h"
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/riscv/include/rte_memcpy.h b/lib/eal/riscv/include/rte_memcpy.h
index e34f19396e..d8a942c5d2 100644
--- a/lib/eal/riscv/include/rte_memcpy.h
+++ b/lib/eal/riscv/include/rte_memcpy.h
@@ -12,12 +12,12 @@
 
 #include "rte_common.h"
 
+#include "generic/rte_memcpy.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_memcpy.h"
-
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
diff --git a/lib/eal/riscv/include/rte_pause.h b/lib/eal/riscv/include/rte_pause.h
index cb8e9ca52d..3f473cd8db 100644
--- a/lib/eal/riscv/include/rte_pause.h
+++ b/lib/eal/riscv/include/rte_pause.h
@@ -7,14 +7,14 @@
 #ifndef RTE_PAUSE_RISCV_H
 #define RTE_PAUSE_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_atomic.h"
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 	/* Insert pause hint directly to be compatible with old compilers.
diff --git a/lib/eal/riscv/include/rte_power_intrinsics.h b/lib/eal/riscv/include/rte_power_intrinsics.h
index 636e58e71f..3f7dba1640 100644
--- a/lib/eal/riscv/include/rte_power_intrinsics.h
+++ b/lib/eal/riscv/include/rte_power_intrinsics.h
@@ -7,14 +7,14 @@
 #ifndef RTE_POWER_INTRINSIC_RISCV_H
 #define RTE_POWER_INTRINSIC_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/riscv/include/rte_prefetch.h b/lib/eal/riscv/include/rte_prefetch.h
index 748cf1b626..42146491ea 100644
--- a/lib/eal/riscv/include/rte_prefetch.h
+++ b/lib/eal/riscv/include/rte_prefetch.h
@@ -8,14 +8,14 @@
 #ifndef RTE_PREFETCH_RISCV_H
 #define RTE_PREFETCH_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	RTE_SET_USED(p);
diff --git a/lib/eal/riscv/include/rte_rwlock.h b/lib/eal/riscv/include/rte_rwlock.h
index 9cdaf1b0ef..730970eecb 100644
--- a/lib/eal/riscv/include/rte_rwlock.h
+++ b/lib/eal/riscv/include/rte_rwlock.h
@@ -7,12 +7,12 @@
 #ifndef RTE_RWLOCK_RISCV_H
 #define RTE_RWLOCK_RISCV_H
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/riscv/include/rte_spinlock.h b/lib/eal/riscv/include/rte_spinlock.h
index 6af430735c..5fe4980e44 100644
--- a/lib/eal/riscv/include/rte_spinlock.h
+++ b/lib/eal/riscv/include/rte_spinlock.h
@@ -12,13 +12,13 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_spinlock.h"
-
 static inline int rte_tm_supported(void)
 {
 	return 0;
diff --git a/lib/eal/windows/include/pthread.h b/lib/eal/windows/include/pthread.h
index 051b9311c2..e1c31017d1 100644
--- a/lib/eal/windows/include/pthread.h
+++ b/lib/eal/windows/include/pthread.h
@@ -13,13 +13,13 @@
  * eal_common_thread.c and common\include\rte_per_lcore.h as Microsoft libc
  * does not contain pthread.h. This may be removed in future releases.
  */
+#include <rte_common.h>
+#include <rte_windows.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_windows.h>
-
 #define PTHREAD_BARRIER_SERIAL_THREAD TRUE
 
 /* defining pthread_t type on Windows since there is no in Microsoft libc*/
diff --git a/lib/eal/windows/include/regex.h b/lib/eal/windows/include/regex.h
index 827f938414..a224c0cd29 100644
--- a/lib/eal/windows/include/regex.h
+++ b/lib/eal/windows/include/regex.h
@@ -10,15 +10,15 @@
  * as Microsoft libc does not contain regex.h. This may be removed in
  * future releases.
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #define REG_NOMATCH 1
 #define REG_ESPACE 12
 
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* defining regex_t for Windows */
 typedef void *regex_t;
 /* defining regmatch_t for Windows */
diff --git a/lib/eal/windows/include/rte_windows.h b/lib/eal/windows/include/rte_windows.h
index 567ed7d820..e78f007ffa 100644
--- a/lib/eal/windows/include/rte_windows.h
+++ b/lib/eal/windows/include/rte_windows.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_WINDOWS_H_
 #define _RTE_WINDOWS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file Windows-specific facilities
  *
@@ -44,6 +40,10 @@ extern "C" {
 #include <devguid.h>
 #include <rte_log.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Log GetLastError() with context, usually a Win32 API function and arguments.
  */
diff --git a/lib/eal/x86/include/rte_atomic.h b/lib/eal/x86/include/rte_atomic.h
index 74b1b24b7a..c72c47c83e 100644
--- a/lib/eal/x86/include/rte_atomic.h
+++ b/lib/eal/x86/include/rte_atomic.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_ATOMIC_X86_H_
 #define _RTE_ATOMIC_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include <rte_config.h>
@@ -31,6 +27,10 @@ extern "C" {
 
 #define rte_smp_rmb() rte_compiler_barrier()
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * From Intel Software Development Manual; Vol 3;
  * 8.2.2 Memory Ordering in P6 and More Recent Processor Families:
@@ -99,10 +99,18 @@ rte_atomic_thread_fence(rte_memory_order memorder)
 		__rte_atomic_thread_fence(memorder);
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 #ifndef RTE_TOOLCHAIN_MSVC
 
 /*------------------------- 16 bit atomic operations -------------------------*/
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_FORCE_INTRINSICS
 static inline int
 rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
@@ -273,6 +281,11 @@ static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 			);
 	return ret != 0;
 }
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif
 
 #ifdef RTE_ARCH_I686
@@ -283,8 +296,4 @@ static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_ATOMIC_X86_H_ */
diff --git a/lib/eal/x86/include/rte_byteorder.h b/lib/eal/x86/include/rte_byteorder.h
index adbec0c157..5a49ffcd50 100644
--- a/lib/eal/x86/include/rte_byteorder.h
+++ b/lib/eal/x86/include/rte_byteorder.h
@@ -5,15 +5,15 @@
 #ifndef _RTE_BYTEORDER_X86_H_
 #define _RTE_BYTEORDER_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include "generic/rte_byteorder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_BYTE_ORDER
 #define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
 #endif
@@ -48,6 +48,10 @@ static inline uint32_t rte_arch_bswap32(uint32_t _x)
 	return x;
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 #define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ?		\
 				   rte_constant_bswap16(x) :		\
 				   rte_arch_bswap16(x)))
@@ -83,8 +87,4 @@ static inline uint32_t rte_arch_bswap32(uint32_t _x)
 #define rte_be_to_cpu_32(x) rte_bswap32(x)
 #define rte_be_to_cpu_64(x) rte_bswap64(x)
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_BYTEORDER_X86_H_ */
diff --git a/lib/eal/x86/include/rte_cpuflags.h b/lib/eal/x86/include/rte_cpuflags.h
index 1ee00e70fe..e843d1e5f4 100644
--- a/lib/eal/x86/include/rte_cpuflags.h
+++ b/lib/eal/x86/include/rte_cpuflags.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_CPUFLAGS_X86_64_H_
 #define _RTE_CPUFLAGS_X86_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 enum rte_cpu_flag_t {
 	/* (EAX 01h) ECX features*/
 	RTE_CPUFLAG_SSE3 = 0,               /**< SSE3 */
@@ -138,6 +134,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/x86/include/rte_cycles.h b/lib/eal/x86/include/rte_cycles.h
index 2afe85e28c..8de43840da 100644
--- a/lib/eal/x86/include/rte_cycles.h
+++ b/lib/eal/x86/include/rte_cycles.h
@@ -12,10 +12,6 @@
 #include <x86intrin.h>
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_cycles.h"
 
 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
@@ -26,6 +22,10 @@ extern int rte_cycles_vmware_tsc_map;
 #include <rte_common.h>
 #include <rte_config.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline uint64_t
 rte_rdtsc(void)
 {
diff --git a/lib/eal/x86/include/rte_io.h b/lib/eal/x86/include/rte_io.h
index 0e1fefdee1..c11cb8cd89 100644
--- a/lib/eal/x86/include/rte_io.h
+++ b/lib/eal/x86/include/rte_io.h
@@ -5,16 +5,16 @@
 #ifndef _RTE_IO_X86_H_
 #define _RTE_IO_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include "rte_cpuflags.h"
 
 #define RTE_NATIVE_WRITE32_WC
 #include "generic/rte_io.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * MOVDIRI wrapper.
diff --git a/lib/eal/x86/include/rte_pause.h b/lib/eal/x86/include/rte_pause.h
index b4cf1df1d0..54f028b295 100644
--- a/lib/eal/x86/include/rte_pause.h
+++ b/lib/eal/x86/include/rte_pause.h
@@ -5,13 +5,14 @@
 #ifndef _RTE_PAUSE_X86_H_
 #define _RTE_PAUSE_X86_H_
 
+#include "generic/rte_pause.h"
+
+#include <emmintrin.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_pause.h"
-
-#include <emmintrin.h>
 static inline void rte_pause(void)
 {
 	_mm_pause();
diff --git a/lib/eal/x86/include/rte_power_intrinsics.h b/lib/eal/x86/include/rte_power_intrinsics.h
index e4c2b87f73..fcb780fc5b 100644
--- a/lib/eal/x86/include/rte_power_intrinsics.h
+++ b/lib/eal/x86/include/rte_power_intrinsics.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_POWER_INTRINSIC_X86_H_
 #define _RTE_POWER_INTRINSIC_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/x86/include/rte_prefetch.h b/lib/eal/x86/include/rte_prefetch.h
index 8a9377714f..34a609cc65 100644
--- a/lib/eal/x86/include/rte_prefetch.h
+++ b/lib/eal/x86/include/rte_prefetch.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_PREFETCH_X86_64_H_
 #define _RTE_PREFETCH_X86_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_TOOLCHAIN_MSVC
 #include <emmintrin.h>
 #endif
@@ -17,6 +13,10 @@ extern "C" {
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 #ifdef RTE_TOOLCHAIN_MSVC
diff --git a/lib/eal/x86/include/rte_rwlock.h b/lib/eal/x86/include/rte_rwlock.h
index 1796b69265..281eff33b9 100644
--- a/lib/eal/x86/include/rte_rwlock.h
+++ b/lib/eal/x86/include/rte_rwlock.h
@@ -5,13 +5,13 @@
 #ifndef _RTE_RWLOCK_X86_64_H_
 #define _RTE_RWLOCK_X86_64_H_
 
+#include "generic/rte_rwlock.h"
+#include "rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-#include "rte_spinlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 	__rte_no_thread_safety_analysis
diff --git a/lib/eal/x86/include/rte_spinlock.h b/lib/eal/x86/include/rte_spinlock.h
index a6c23ea1f6..a14da41964 100644
--- a/lib/eal/x86/include/rte_spinlock.h
+++ b/lib/eal/x86/include/rte_spinlock.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_SPINLOCK_X86_64_H_
 #define _RTE_SPINLOCK_X86_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_spinlock.h"
 #include "rte_rtm.h"
 #include "rte_cpuflags.h"
@@ -17,6 +13,10 @@ extern "C" {
 #include "rte_pause.h"
 #include "rte_cycles.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_RTM_MAX_RETRIES (20)
 #define RTE_XABORT_LOCK_BUSY (0xff)
 
@@ -182,7 +182,6 @@ rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
 	return rte_spinlock_recursive_trylock(slr);
 }
 
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index 883e59a927..ae00ead865 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_ETHDEV_DRIVER_H_
 #define _RTE_ETHDEV_DRIVER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -24,6 +20,10 @@ extern "C" {
 #include <rte_compat.h>
 #include <rte_ethdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Structure used to hold information about the callbacks to be called for a
diff --git a/lib/ethdev/ethdev_pci.h b/lib/ethdev/ethdev_pci.h
index ec4f731270..2229ffa252 100644
--- a/lib/ethdev/ethdev_pci.h
+++ b/lib/ethdev/ethdev_pci.h
@@ -6,16 +6,16 @@
 #ifndef _RTE_ETHDEV_PCI_H_
 #define _RTE_ETHDEV_PCI_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_malloc.h>
 #include <rte_pci.h>
 #include <bus_pci_driver.h>
 #include <rte_config.h>
 #include <ethdev_driver.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Copy pci device info to the Ethernet device data.
  * Shared memory (eth_dev->data) only updated by primary process, so it is safe
diff --git a/lib/ethdev/ethdev_trace.h b/lib/ethdev/ethdev_trace.h
index 3bec87bfdb..36a38f718a 100644
--- a/lib/ethdev/ethdev_trace.h
+++ b/lib/ethdev/ethdev_trace.h
@@ -11,10 +11,6 @@
  * API for ethdev trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <dev_driver.h>
 #include <rte_trace_point.h>
 
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_mtr.h"
 #include "rte_tm.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_ethdev_trace_configure,
 	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t nb_rx_q,
diff --git a/lib/ethdev/ethdev_vdev.h b/lib/ethdev/ethdev_vdev.h
index 364f140f91..010ec75a00 100644
--- a/lib/ethdev/ethdev_vdev.h
+++ b/lib/ethdev/ethdev_vdev.h
@@ -6,15 +6,15 @@
 #ifndef _RTE_ETHDEV_VDEV_H_
 #define _RTE_ETHDEV_VDEV_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_config.h>
 #include <rte_malloc.h>
 #include <bus_vdev_driver.h>
 #include <ethdev_driver.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Allocates a new ethdev slot for an Ethernet device and returns the pointer
diff --git a/lib/ethdev/rte_cman.h b/lib/ethdev/rte_cman.h
index 297db8e095..dedd6cb71a 100644
--- a/lib/ethdev/rte_cman.h
+++ b/lib/ethdev/rte_cman.h
@@ -5,12 +5,12 @@
 #ifndef RTE_CMAN_H
 #define RTE_CMAN_H
 
+#include <rte_bitops.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_bitops.h>
-
 /**
  * @file
  * Congestion management related parameters for DPDK.
diff --git a/lib/ethdev/rte_dev_info.h b/lib/ethdev/rte_dev_info.h
index 67cf0ae526..4fde2ad408 100644
--- a/lib/ethdev/rte_dev_info.h
+++ b/lib/ethdev/rte_dev_info.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_DEV_INFO_H_
 #define _RTE_DEV_INFO_H_
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /*
  * Placeholder for accessing device registers
  */
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 548fada1c7..a75e26bf07 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -145,10 +145,6 @@
  * a 0 value by the receive function of the driver for a given number of tries.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 /* Use this macro to check if LRO API is supported */
@@ -5966,6 +5962,10 @@ int rte_eth_cman_config_get(uint16_t port_id, struct rte_eth_cman_config *config
 
 #include <rte_ethdev_core.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Helper routine for rte_eth_rx_burst().
diff --git a/lib/ethdev/rte_ethdev_trace_fp.h b/lib/ethdev/rte_ethdev_trace_fp.h
index 40b6e4756b..c11b4f18f7 100644
--- a/lib/ethdev/rte_ethdev_trace_fp.h
+++ b/lib/ethdev/rte_ethdev_trace_fp.h
@@ -11,12 +11,12 @@
  * API for ethdev trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_ethdev_trace_rx_burst,
 	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t queue_id,
diff --git a/lib/eventdev/event_timer_adapter_pmd.h b/lib/eventdev/event_timer_adapter_pmd.h
index cd5127f047..fffcd90c8f 100644
--- a/lib/eventdev/event_timer_adapter_pmd.h
+++ b/lib/eventdev/event_timer_adapter_pmd.h
@@ -16,12 +16,12 @@
  * versioning.
  */
 
+#include "rte_event_timer_adapter.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "rte_event_timer_adapter.h"
-
 /*
  * Definitions of functions exported by an event timer adapter implementation
  * through *rte_event_timer_adapter_ops* structure supplied in the
diff --git a/lib/eventdev/eventdev_pmd.h b/lib/eventdev/eventdev_pmd.h
index 7a5699f14b..fd5f7a14f4 100644
--- a/lib/eventdev/eventdev_pmd.h
+++ b/lib/eventdev/eventdev_pmd.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_EVENTDEV_PMD_H_
 #define _RTE_EVENTDEV_PMD_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Event PMD APIs
  *
@@ -31,6 +27,10 @@ extern "C" {
 #include "event_timer_adapter_pmd.h"
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern int rte_event_logtype;
 #define RTE_LOGTYPE_EVENTDEV rte_event_logtype
 
diff --git a/lib/eventdev/eventdev_pmd_pci.h b/lib/eventdev/eventdev_pmd_pci.h
index 26aa3a6635..5cb5916a84 100644
--- a/lib/eventdev/eventdev_pmd_pci.h
+++ b/lib/eventdev/eventdev_pmd_pci.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_EVENTDEV_PMD_PCI_H_
 #define _RTE_EVENTDEV_PMD_PCI_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Eventdev PCI PMD APIs
  *
@@ -28,6 +24,10 @@ extern "C" {
 
 #include "eventdev_pmd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef int (*eventdev_pmd_pci_callback_t)(struct rte_eventdev *dev);
 
 /**
diff --git a/lib/eventdev/eventdev_pmd_vdev.h b/lib/eventdev/eventdev_pmd_vdev.h
index bb433ba955..4eaefa0b0b 100644
--- a/lib/eventdev/eventdev_pmd_vdev.h
+++ b/lib/eventdev/eventdev_pmd_vdev.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_EVENTDEV_PMD_VDEV_H_
 #define _RTE_EVENTDEV_PMD_VDEV_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Eventdev VDEV PMD APIs
  *
@@ -27,6 +23,10 @@ extern "C" {
 
 #include "eventdev_pmd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Creates a new virtual event device and returns the pointer to that device.
diff --git a/lib/eventdev/eventdev_trace.h b/lib/eventdev/eventdev_trace.h
index 9c2b261c06..8ff8841729 100644
--- a/lib/eventdev/eventdev_trace.h
+++ b/lib/eventdev/eventdev_trace.h
@@ -11,10 +11,6 @@
  * API for ethdev trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_trace_point.h>
 
 #include "rte_eventdev.h"
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_event_eth_rx_adapter.h"
 #include "rte_event_timer_adapter.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_eventdev_trace_configure,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id,
diff --git a/lib/eventdev/rte_event_crypto_adapter.h b/lib/eventdev/rte_event_crypto_adapter.h
index e07f159b77..c9b277c664 100644
--- a/lib/eventdev/rte_event_crypto_adapter.h
+++ b/lib/eventdev/rte_event_crypto_adapter.h
@@ -167,14 +167,14 @@
  * from the start of the rte_crypto_op including initialization vector (IV).
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Crypto event adapter mode
  */
diff --git a/lib/eventdev/rte_event_eth_rx_adapter.h b/lib/eventdev/rte_event_eth_rx_adapter.h
index cf42c69b0d..9237e198a7 100644
--- a/lib/eventdev/rte_event_eth_rx_adapter.h
+++ b/lib/eventdev/rte_event_eth_rx_adapter.h
@@ -87,10 +87,6 @@
  * event based so the callback can also modify the event data if it needs to.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -98,6 +94,10 @@ extern "C" {
 
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE 32
 
 /* struct rte_event_eth_rx_adapter_queue_conf flags definitions */
diff --git a/lib/eventdev/rte_event_eth_tx_adapter.h b/lib/eventdev/rte_event_eth_tx_adapter.h
index b38b3fce97..ef01345ac2 100644
--- a/lib/eventdev/rte_event_eth_tx_adapter.h
+++ b/lib/eventdev/rte_event_eth_tx_adapter.h
@@ -76,10 +76,6 @@
  * impact due to a change in how the transmit queue index is specified.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -87,6 +83,10 @@ extern "C" {
 
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Adapter configuration structure
  *
diff --git a/lib/eventdev/rte_event_ring.h b/lib/eventdev/rte_event_ring.h
index f9cf19ae16..5769da269e 100644
--- a/lib/eventdev/rte_event_ring.h
+++ b/lib/eventdev/rte_event_ring.h
@@ -14,10 +14,6 @@
 #ifndef _RTE_EVENT_RING_
 #define _RTE_EVENT_RING_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_common.h>
@@ -25,6 +21,10 @@ extern "C" {
 #include <rte_ring_elem.h>
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_TAILQ_EVENT_RING_NAME "RTE_EVENT_RING"
 
 /**
diff --git a/lib/eventdev/rte_event_timer_adapter.h b/lib/eventdev/rte_event_timer_adapter.h
index 0bd1b30045..256807b3bf 100644
--- a/lib/eventdev/rte_event_timer_adapter.h
+++ b/lib/eventdev/rte_event_timer_adapter.h
@@ -107,14 +107,14 @@
  * All these use cases require high resolution and low time drift.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 
 #include "rte_eventdev.h"
 #include "rte_eventdev_trace_fp.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Timer adapter clock source
  */
diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h
index 08e5f9320b..e5c5b7df64 100644
--- a/lib/eventdev/rte_eventdev.h
+++ b/lib/eventdev/rte_eventdev.h
@@ -237,10 +237,6 @@
  * \endcode
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include <rte_errno.h>
@@ -2469,6 +2465,10 @@ rte_event_vector_pool_create(const char *name, unsigned int n,
 
 #include <rte_eventdev_core.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static __rte_always_inline uint16_t
 __rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
 			  const struct rte_event ev[], uint16_t nb_events,
diff --git a/lib/eventdev/rte_eventdev_trace_fp.h b/lib/eventdev/rte_eventdev_trace_fp.h
index 04d510ad00..8656f1e6e4 100644
--- a/lib/eventdev/rte_eventdev_trace_fp.h
+++ b/lib/eventdev/rte_eventdev_trace_fp.h
@@ -11,12 +11,12 @@
  * API for ethdev trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_eventdev_trace_deq_burst,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id, uint8_t port_id, void *ev_table,
diff --git a/lib/graph/rte_graph_model_mcore_dispatch.h b/lib/graph/rte_graph_model_mcore_dispatch.h
index 732b89297f..f9ff3daa88 100644
--- a/lib/graph/rte_graph_model_mcore_dispatch.h
+++ b/lib/graph/rte_graph_model_mcore_dispatch.h
@@ -12,10 +12,6 @@
  * dispatch model.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_errno.h>
 #include <rte_mempool.h>
 #include <rte_memzone.h>
@@ -23,6 +19,10 @@ extern "C" {
 
 #include "rte_graph_worker_common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_GRAPH_SCHED_WQ_SIZE_MULTIPLIER  8
 #define RTE_GRAPH_SCHED_WQ_SIZE(nb_nodes)   \
 	((typeof(nb_nodes))((nb_nodes) * RTE_GRAPH_SCHED_WQ_SIZE_MULTIPLIER))
diff --git a/lib/graph/rte_graph_worker.h b/lib/graph/rte_graph_worker.h
index 03d0e01b68..b0f952a82c 100644
--- a/lib/graph/rte_graph_worker.h
+++ b/lib/graph/rte_graph_worker.h
@@ -6,13 +6,13 @@
 #ifndef _RTE_GRAPH_WORKER_H_
 #define _RTE_GRAPH_WORKER_H_
 
+#include "rte_graph_model_rtc.h"
+#include "rte_graph_model_mcore_dispatch.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "rte_graph_model_rtc.h"
-#include "rte_graph_model_mcore_dispatch.h"
-
 /**
  * Perform graph walk on the circular buffer and invoke the process function
  * of the nodes and collect the stats.
diff --git a/lib/gso/rte_gso.h b/lib/gso/rte_gso.h
index d60cb65f18..75246989dc 100644
--- a/lib/gso/rte_gso.h
+++ b/lib/gso/rte_gso.h
@@ -10,13 +10,13 @@
  * Interface to GSO library
  */
 
+#include <stdint.h>
+#include <rte_mbuf.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include <rte_mbuf.h>
-
 /* Minimum GSO segment size for TCP based packets. */
 #define RTE_GSO_SEG_SIZE_MIN (sizeof(struct rte_ether_hdr) + \
 		sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_tcp_hdr) + 1)
diff --git a/lib/hash/rte_fbk_hash.h b/lib/hash/rte_fbk_hash.h
index b01126999b..1f0c1d1b6c 100644
--- a/lib/hash/rte_fbk_hash.h
+++ b/lib/hash/rte_fbk_hash.h
@@ -18,15 +18,15 @@
 #include <stdint.h>
 #include <errno.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <rte_hash_crc.h>
 #include <rte_jhash.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_FBK_HASH_INIT_VAL_DEFAULT
 /** Initialising value used when calculating hash. */
 #define RTE_FBK_HASH_INIT_VAL_DEFAULT		0xFFFFFFFF
diff --git a/lib/hash/rte_hash_crc.h b/lib/hash/rte_hash_crc.h
index 8ad2422ec3..fa07c97685 100644
--- a/lib/hash/rte_hash_crc.h
+++ b/lib/hash/rte_hash_crc.h
@@ -11,10 +11,6 @@
  * RTE CRC Hash
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_branch_prediction.h>
@@ -39,6 +35,10 @@ extern uint8_t rte_hash_crc32_alg;
 #include "rte_crc_generic.h"
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
  * calculation.
diff --git a/lib/hash/rte_jhash.h b/lib/hash/rte_jhash.h
index f2446f081e..b70799d209 100644
--- a/lib/hash/rte_jhash.h
+++ b/lib/hash/rte_jhash.h
@@ -11,10 +11,6 @@
  * jhash functions.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <string.h>
 #include <limits.h>
@@ -23,6 +19,10 @@ extern "C" {
 #include <rte_log.h>
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* jhash.h: Jenkins hash support.
  *
  * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
index 30b657e67a..ec9bc57efa 100644
--- a/lib/hash/rte_thash.h
+++ b/lib/hash/rte_thash.h
@@ -15,10 +15,6 @@
  * after GRE header decapsulating)
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_byteorder.h>
@@ -28,6 +24,10 @@ extern "C" {
 
 #if defined(RTE_ARCH_X86) || defined(__ARM_NEON)
 #include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 #endif
 
 #ifdef RTE_ARCH_X86
diff --git a/lib/hash/rte_thash_gfni.h b/lib/hash/rte_thash_gfni.h
index 132f37506d..e82378933c 100644
--- a/lib/hash/rte_thash_gfni.h
+++ b/lib/hash/rte_thash_gfni.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_THASH_GFNI_H_
 #define _RTE_THASH_GFNI_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_log.h>
 
@@ -18,6 +14,10 @@ extern "C" {
 
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Stubs only used when GFNI is not available.
diff --git a/lib/ip_frag/rte_ip_frag.h b/lib/ip_frag/rte_ip_frag.h
index 2ad318096b..84fd717953 100644
--- a/lib/ip_frag/rte_ip_frag.h
+++ b/lib/ip_frag/rte_ip_frag.h
@@ -12,10 +12,6 @@
  * Implementation of IP packet fragmentation and reassembly.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 
@@ -25,6 +21,10 @@ extern "C" {
 #include <rte_ip.h>
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_mbuf;
 
 /** death row size (in packets) */
diff --git a/lib/ipsec/rte_ipsec.h b/lib/ipsec/rte_ipsec.h
index f15f6f2966..28b7a61aea 100644
--- a/lib/ipsec/rte_ipsec.h
+++ b/lib/ipsec/rte_ipsec.h
@@ -17,10 +17,6 @@
 #include <rte_ipsec_sa.h>
 #include <rte_mbuf.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 struct rte_ipsec_session;
 
 /**
@@ -181,6 +177,10 @@ rte_ipsec_telemetry_sa_del(const struct rte_ipsec_sa *sa);
 
 #include <rte_ipsec_group.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/log/rte_log.h b/lib/log/rte_log.h
index f357c59548..3735137150 100644
--- a/lib/log/rte_log.h
+++ b/lib/log/rte_log.h
@@ -13,10 +13,6 @@
  * This file provides a log API to RTE applications.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -26,6 +22,10 @@ extern "C" {
 #include <rte_common.h>
 #include <rte_config.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* SDK log type */
 #define RTE_LOGTYPE_EAL        0 /**< Log related to eal. */
 				 /* was RTE_LOGTYPE_MALLOC */
diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h
index 9c6df311cb..329dc1aad4 100644
--- a/lib/lpm/rte_lpm.h
+++ b/lib/lpm/rte_lpm.h
@@ -391,6 +391,10 @@ static inline void
 rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 	uint32_t defv);
 
+#ifdef __cplusplus
+}
+#endif
+
 #if defined(RTE_ARCH_ARM)
 #ifdef RTE_HAS_SVE_ACLE
 #include "rte_lpm_sve.h"
@@ -407,8 +411,4 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 #include "rte_lpm_scalar.h"
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_LPM_H_ */
diff --git a/lib/member/rte_member.h b/lib/member/rte_member.h
index aec192eba5..109bdd000b 100644
--- a/lib/member/rte_member.h
+++ b/lib/member/rte_member.h
@@ -54,10 +54,6 @@
 #ifndef _RTE_MEMBER_H_
 #define _RTE_MEMBER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdbool.h>
 #include <inttypes.h>
@@ -100,6 +96,10 @@ typedef uint16_t member_set_t;
 #define MEMBER_HASH_FUNC       rte_jhash
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** @internal setsummary structure. */
 struct rte_member_setsum;
 
diff --git a/lib/member/rte_member_sketch.h b/lib/member/rte_member_sketch.h
index 74f24ca223..6a8d5104dd 100644
--- a/lib/member/rte_member_sketch.h
+++ b/lib/member/rte_member_sketch.h
@@ -5,13 +5,13 @@
 #ifndef RTE_MEMBER_SKETCH_H
 #define RTE_MEMBER_SKETCH_H
 
+#include <rte_vect.h>
+#include <rte_ring_elem.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_vect.h>
-#include <rte_ring_elem.h>
-
 #define NUM_ROW_SCALAR 5
 #define INTERVAL (1 << 15)
 
diff --git a/lib/member/rte_member_sketch_avx512.h b/lib/member/rte_member_sketch_avx512.h
index 52666b5b4c..a8ef3b065e 100644
--- a/lib/member/rte_member_sketch_avx512.h
+++ b/lib/member/rte_member_sketch_avx512.h
@@ -5,14 +5,14 @@
 #ifndef RTE_MEMBER_SKETCH_AVX512_H
 #define RTE_MEMBER_SKETCH_AVX512_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_vect.h>
 #include "rte_member.h"
 #include "rte_member_sketch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define NUM_ROW_VEC 8
 
 void
diff --git a/lib/member/rte_member_x86.h b/lib/member/rte_member_x86.h
index d115151f9f..4de453485b 100644
--- a/lib/member/rte_member_x86.h
+++ b/lib/member/rte_member_x86.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_MEMBER_X86_H_
 #define _RTE_MEMBER_X86_H_
 
+#include <x86intrin.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <x86intrin.h>
-
 #if defined(__AVX2__)
 
 static inline int
diff --git a/lib/member/rte_xxh64_avx512.h b/lib/member/rte_xxh64_avx512.h
index ffe6cb79f9..58f896ebb8 100644
--- a/lib/member/rte_xxh64_avx512.h
+++ b/lib/member/rte_xxh64_avx512.h
@@ -5,13 +5,13 @@
 #ifndef RTE_XXH64_AVX512_H
 #define RTE_XXH64_AVX512_H
 
+#include <rte_common.h>
+#include <immintrin.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <immintrin.h>
-
 /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
 static const uint64_t PRIME64_1 = 0x9E3779B185EBCA87ULL;
 /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
diff --git a/lib/mempool/mempool_trace.h b/lib/mempool/mempool_trace.h
index dffef062e4..c595a3116b 100644
--- a/lib/mempool/mempool_trace.h
+++ b/lib/mempool/mempool_trace.h
@@ -11,15 +11,15 @@
  * APIs for mempool trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_mempool.h"
 
 #include <rte_memzone.h>
 #include <rte_trace_point.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_mempool_trace_create,
 	RTE_TRACE_POINT_ARGS(const char *name, uint32_t nb_elts,
diff --git a/lib/mempool/rte_mempool_trace_fp.h b/lib/mempool/rte_mempool_trace_fp.h
index ed060e887c..9c5cdbb291 100644
--- a/lib/mempool/rte_mempool_trace_fp.h
+++ b/lib/mempool/rte_mempool_trace_fp.h
@@ -11,12 +11,12 @@
  * Mempool fast path API for trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_mempool_trace_ops_dequeue_bulk,
 	RTE_TRACE_POINT_ARGS(void *mempool, void **obj_table,
diff --git a/lib/meter/rte_meter.h b/lib/meter/rte_meter.h
index bd68cbe389..e72bf93b3e 100644
--- a/lib/meter/rte_meter.h
+++ b/lib/meter/rte_meter.h
@@ -6,10 +6,6 @@
 #ifndef __INCLUDE_RTE_METER_H__
 #define __INCLUDE_RTE_METER_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Traffic Metering
@@ -22,6 +18,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Application Programmer's Interface (API)
  */
diff --git a/lib/mldev/mldev_utils.h b/lib/mldev/mldev_utils.h
index 5e2a180adc..bf21067d38 100644
--- a/lib/mldev/mldev_utils.h
+++ b/lib/mldev/mldev_utils.h
@@ -5,10 +5,6 @@
 #ifndef RTE_MLDEV_UTILS_H
 #define RTE_MLDEV_UTILS_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_compat.h>
 #include <rte_mldev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  *
diff --git a/lib/mldev/rte_mldev_core.h b/lib/mldev/rte_mldev_core.h
index b3bd281083..8dccf125fc 100644
--- a/lib/mldev/rte_mldev_core.h
+++ b/lib/mldev/rte_mldev_core.h
@@ -16,10 +16,6 @@
  * These APIs are for MLDEV PMDs and library only.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <dev_driver.h>
@@ -27,6 +23,10 @@ extern "C" {
 #include <rte_log.h>
 #include <rte_mldev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Device state */
 #define ML_DEV_DETACHED (0)
 #define ML_DEV_ATTACHED (1)
diff --git a/lib/mldev/rte_mldev_pmd.h b/lib/mldev/rte_mldev_pmd.h
index fd5bbf4360..47c0f23223 100644
--- a/lib/mldev/rte_mldev_pmd.h
+++ b/lib/mldev/rte_mldev_pmd.h
@@ -14,10 +14,6 @@
  * These APIs are for MLDEV PMDs only and user applications should not call them directly.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_common.h>
@@ -25,6 +21,10 @@ extern "C" {
 #include <rte_mldev.h>
 #include <rte_mldev_core.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  *
diff --git a/lib/net/rte_ether.h b/lib/net/rte_ether.h
index 32ed515aef..403e84f50b 100644
--- a/lib/net/rte_ether.h
+++ b/lib/net/rte_ether.h
@@ -11,10 +11,6 @@
  * Ethernet Helpers in RTE
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 
@@ -22,6 +18,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_ETHER_ADDR_LEN  6 /**< Length of Ethernet address. */
 #define RTE_ETHER_TYPE_LEN  2 /**< Length of Ethernet type field. */
 #define RTE_ETHER_CRC_LEN   4 /**< Length of Ethernet CRC. */
diff --git a/lib/net/rte_net.h b/lib/net/rte_net.h
index cdc6cf956d..40ad6a71a1 100644
--- a/lib/net/rte_net.h
+++ b/lib/net/rte_net.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_NET_PTYPE_H_
 #define _RTE_NET_PTYPE_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_ip.h>
 #include <rte_udp.h>
 #include <rte_tcp.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Structure containing header lengths associated to a packet, filled
  * by rte_net_get_ptype().
diff --git a/lib/net/rte_sctp.h b/lib/net/rte_sctp.h
index 965682dc2b..a8ba9e49d8 100644
--- a/lib/net/rte_sctp.h
+++ b/lib/net/rte_sctp.h
@@ -14,14 +14,14 @@
 #ifndef _RTE_SCTP_H_
 #define _RTE_SCTP_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * SCTP Header
  */
diff --git a/lib/node/rte_node_eth_api.h b/lib/node/rte_node_eth_api.h
index 143cf131b3..2b7019f6bb 100644
--- a/lib/node/rte_node_eth_api.h
+++ b/lib/node/rte_node_eth_api.h
@@ -16,15 +16,15 @@
  * and its queue associations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include <rte_graph.h>
 #include <rte_mempool.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Port config for ethdev_rx and ethdev_tx node.
  */
diff --git a/lib/node/rte_node_ip4_api.h b/lib/node/rte_node_ip4_api.h
index 24f8ec843a..950751a525 100644
--- a/lib/node/rte_node_ip4_api.h
+++ b/lib/node/rte_node_ip4_api.h
@@ -15,15 +15,15 @@
  * This API allows to do control path functions of ip4_* nodes
  * like ip4_lookup, ip4_rewrite.
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_compat.h>
 
 #include <rte_graph.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * IP4 lookup next nodes.
  */
diff --git a/lib/node/rte_node_ip6_api.h b/lib/node/rte_node_ip6_api.h
index a538dc2ea7..f467aac7b6 100644
--- a/lib/node/rte_node_ip6_api.h
+++ b/lib/node/rte_node_ip6_api.h
@@ -15,13 +15,13 @@
  * This API allows to do control path functions of ip6_* nodes
  * like ip6_lookup, ip6_rewrite.
  */
+#include <rte_common.h>
+#include <rte_compat.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_compat.h>
-
 /**
  * IP6 lookup next nodes.
  */
diff --git a/lib/node/rte_node_udp4_input_api.h b/lib/node/rte_node_udp4_input_api.h
index c873acbbe0..694660bd6a 100644
--- a/lib/node/rte_node_udp4_input_api.h
+++ b/lib/node/rte_node_udp4_input_api.h
@@ -16,14 +16,14 @@
  * like udp4_input.
  *
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_compat.h>
 
 #include "rte_graph.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 /**
  * UDP4 lookup next nodes.
  */
diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h
index c26fc77209..9a50a12142 100644
--- a/lib/pci/rte_pci.h
+++ b/lib/pci/rte_pci.h
@@ -12,14 +12,14 @@
  * RTE PCI Library
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <inttypes.h>
 #include <sys/types.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Conventional PCI and PCI-X Mode 1 devices have 256 bytes of
  * configuration space.  PCI-X Mode 2 and PCIe devices have 4096 bytes of
diff --git a/lib/pdcp/rte_pdcp.h b/lib/pdcp/rte_pdcp.h
index f74524f83d..15fcbf9607 100644
--- a/lib/pdcp/rte_pdcp.h
+++ b/lib/pdcp/rte_pdcp.h
@@ -19,10 +19,6 @@
 #include <rte_pdcp_hdr.h>
 #include <rte_security.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /* Forward declarations. */
 struct rte_pdcp_entity;
 
@@ -373,6 +369,10 @@ rte_pdcp_t_reordering_expiry_handle(const struct rte_pdcp_entity *entity,
  */
 #include <rte_pdcp_group.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/pipeline/rte_pipeline.h b/lib/pipeline/rte_pipeline.h
index 0c7994b4f2..c9e7172453 100644
--- a/lib/pipeline/rte_pipeline.h
+++ b/lib/pipeline/rte_pipeline.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PIPELINE_H__
 #define __INCLUDE_RTE_PIPELINE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Pipeline
@@ -59,6 +55,10 @@ extern "C" {
 #include <rte_table.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_mbuf;
 
 /*
diff --git a/lib/pipeline/rte_port_in_action.h b/lib/pipeline/rte_port_in_action.h
index ec2994599f..9d17bae988 100644
--- a/lib/pipeline/rte_port_in_action.h
+++ b/lib/pipeline/rte_port_in_action.h
@@ -46,10 +46,6 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -57,6 +53,10 @@ extern "C" {
 
 #include "rte_pipeline.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Input port actions. */
 enum rte_port_in_action_type {
 	/** Filter selected input packets. */
diff --git a/lib/pipeline/rte_swx_ctl.h b/lib/pipeline/rte_swx_ctl.h
index 6ef2551ab5..c4e63753f5 100644
--- a/lib/pipeline/rte_swx_ctl.h
+++ b/lib/pipeline/rte_swx_ctl.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_CTL_H__
 #define __INCLUDE_RTE_SWX_CTL_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Pipeline Control
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_swx_port.h"
 #include "rte_swx_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_swx_pipeline;
 
 /** Name size. */
diff --git a/lib/pipeline/rte_swx_extern.h b/lib/pipeline/rte_swx_extern.h
index e10e963d63..1553fa81ec 100644
--- a/lib/pipeline/rte_swx_extern.h
+++ b/lib/pipeline/rte_swx_extern.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_EXTERN_H__
 #define __INCLUDE_RTE_SWX_EXTERN_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Extern objects and functions
@@ -19,6 +15,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Extern type
  */
diff --git a/lib/pipeline/rte_swx_ipsec.h b/lib/pipeline/rte_swx_ipsec.h
index 7c07fdc739..d2e5abef7d 100644
--- a/lib/pipeline/rte_swx_ipsec.h
+++ b/lib/pipeline/rte_swx_ipsec.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_IPSEC_H__
 #define __INCLUDE_RTE_SWX_IPSEC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Internet Protocol Security (IPsec)
@@ -53,6 +49,10 @@ extern "C" {
 #include <rte_compat.h>
 #include <rte_crypto_sym.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * IPsec Setup API
  */
diff --git a/lib/pipeline/rte_swx_pipeline.h b/lib/pipeline/rte_swx_pipeline.h
index 25df042d3b..882bd4bf6f 100644
--- a/lib/pipeline/rte_swx_pipeline.h
+++ b/lib/pipeline/rte_swx_pipeline.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PIPELINE_H__
 #define __INCLUDE_RTE_SWX_PIPELINE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Pipeline
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_swx_table.h"
 #include "rte_swx_extern.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Name size. */
 #ifndef RTE_SWX_NAME_SIZE
 #define RTE_SWX_NAME_SIZE 64
diff --git a/lib/pipeline/rte_swx_pipeline_spec.h b/lib/pipeline/rte_swx_pipeline_spec.h
index dd88c0bfab..077b407c0a 100644
--- a/lib/pipeline/rte_swx_pipeline_spec.h
+++ b/lib/pipeline/rte_swx_pipeline_spec.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PIPELINE_SPEC_H__
 #define __INCLUDE_RTE_SWX_PIPELINE_SPEC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 
@@ -15,6 +11,10 @@ extern "C" {
 
 #include <rte_swx_pipeline.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * extobj.
  *
diff --git a/lib/pipeline/rte_table_action.h b/lib/pipeline/rte_table_action.h
index 5dffbeb700..bab4bfd2e2 100644
--- a/lib/pipeline/rte_table_action.h
+++ b/lib/pipeline/rte_table_action.h
@@ -52,10 +52,6 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -65,6 +61,10 @@ extern "C" {
 
 #include "rte_pipeline.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Table actions. */
 enum rte_table_action_type {
 	/** Forward to next pipeline table, output port or drop. */
diff --git a/lib/port/rte_port.h b/lib/port/rte_port.h
index 0e30db371e..4b20872537 100644
--- a/lib/port/rte_port.h
+++ b/lib/port/rte_port.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_H__
 #define __INCLUDE_RTE_PORT_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port
@@ -20,6 +16,10 @@ extern "C" {
 #include <stdint.h>
 #include <rte_mbuf.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**@{
  * Macros to allow accessing metadata stored in the mbuf headroom
  * just beyond the end of the mbuf data structure returned by a port
diff --git a/lib/port/rte_port_ethdev.h b/lib/port/rte_port_ethdev.h
index e07021cb89..7729ff0da3 100644
--- a/lib/port/rte_port_ethdev.h
+++ b/lib/port/rte_port_ethdev.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_ETHDEV_H__
 #define __INCLUDE_RTE_PORT_ETHDEV_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Ethernet Device
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ethdev_reader port parameters */
 struct rte_port_ethdev_reader_params {
 	/** NIC RX port ID */
diff --git a/lib/port/rte_port_eventdev.h b/lib/port/rte_port_eventdev.h
index 0efb8e1021..d9eccf07d4 100644
--- a/lib/port/rte_port_eventdev.h
+++ b/lib/port/rte_port_eventdev.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_EVENTDEV_H__
 #define __INCLUDE_RTE_PORT_EVENTDEV_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Eventdev Interface
@@ -24,6 +20,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Eventdev_reader port parameters */
 struct rte_port_eventdev_reader_params {
 	/** Eventdev Device ID */
diff --git a/lib/port/rte_port_fd.h b/lib/port/rte_port_fd.h
index 885b9ada22..40a5e4a426 100644
--- a/lib/port/rte_port_fd.h
+++ b/lib/port/rte_port_fd.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_FD_H__
 #define __INCLUDE_RTE_PORT_FD_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port FD Device
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** fd_reader port parameters */
 struct rte_port_fd_reader_params {
 	/** File descriptor */
diff --git a/lib/port/rte_port_frag.h b/lib/port/rte_port_frag.h
index 4055872e8d..9a10f10523 100644
--- a/lib/port/rte_port_frag.h
+++ b/lib/port/rte_port_frag.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_IP_FRAG_H__
 #define __INCLUDE_RTE_PORT_IP_FRAG_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port for IPv4 Fragmentation
@@ -31,6 +27,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ring_reader_ipv4_frag port parameters */
 struct rte_port_ring_reader_frag_params {
 	/** Underlying single consumer ring that has to be pre-initialized. */
diff --git a/lib/port/rte_port_ras.h b/lib/port/rte_port_ras.h
index 94cfb3ed92..86e36f5362 100644
--- a/lib/port/rte_port_ras.h
+++ b/lib/port/rte_port_ras.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_RAS_H__
 #define __INCLUDE_RTE_PORT_RAS_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port for IPv4 Reassembly
@@ -31,6 +27,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ring_writer_ipv4_ras port parameters */
 struct rte_port_ring_writer_ras_params {
 	/** Underlying single consumer ring that has to be pre-initialized. */
diff --git a/lib/port/rte_port_ring.h b/lib/port/rte_port_ring.h
index 027928c924..2089d0889b 100644
--- a/lib/port/rte_port_ring.h
+++ b/lib/port/rte_port_ring.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_RING_H__
 #define __INCLUDE_RTE_PORT_RING_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Ring
@@ -27,6 +23,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ring_reader port parameters */
 struct rte_port_ring_reader_params {
 	/** Underlying consumer ring that has to be pre-initialized */
diff --git a/lib/port/rte_port_sched.h b/lib/port/rte_port_sched.h
index 251380ef80..1bf08ae6a9 100644
--- a/lib/port/rte_port_sched.h
+++ b/lib/port/rte_port_sched.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_SCHED_H__
 #define __INCLUDE_RTE_PORT_SCHED_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Hierarchical Scheduler
@@ -23,6 +19,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** sched_reader port parameters */
 struct rte_port_sched_reader_params {
 	/** Underlying pre-initialized rte_sched_port */
diff --git a/lib/port/rte_port_source_sink.h b/lib/port/rte_port_source_sink.h
index bcdbaf1e40..3122dd5038 100644
--- a/lib/port/rte_port_source_sink.h
+++ b/lib/port/rte_port_source_sink.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_SOURCE_SINK_H__
 #define __INCLUDE_RTE_PORT_SOURCE_SINK_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Source/Sink
@@ -19,6 +15,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** source port parameters */
 struct rte_port_source_params {
 	/** Pre-initialized buffer pool */
diff --git a/lib/port/rte_port_sym_crypto.h b/lib/port/rte_port_sym_crypto.h
index 6532b4388a..d03cdc1e8b 100644
--- a/lib/port/rte_port_sym_crypto.h
+++ b/lib/port/rte_port_sym_crypto.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_SYM_CRYPTO_H__
 #define __INCLUDE_RTE_PORT_SYM_CRYPTO_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port sym crypto Interface
@@ -23,6 +19,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Function prototype for reader post action. */
 typedef void (*rte_port_sym_crypto_reader_callback_fn)(struct rte_mbuf **pkts,
 		uint16_t n_pkts, void *arg);
diff --git a/lib/port/rte_swx_port.h b/lib/port/rte_swx_port.h
index 1dbd95ae87..b52b125572 100644
--- a/lib/port/rte_swx_port.h
+++ b/lib/port/rte_swx_port.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_H__
 #define __INCLUDE_RTE_SWX_PORT_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Port
@@ -17,6 +13,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Packet. */
 struct rte_swx_pkt {
 	/** Opaque packet handle. */
diff --git a/lib/port/rte_swx_port_ethdev.h b/lib/port/rte_swx_port_ethdev.h
index cbc2d7b213..1828031e67 100644
--- a/lib/port/rte_swx_port_ethdev.h
+++ b/lib/port/rte_swx_port_ethdev.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_ETHDEV_H__
 #define __INCLUDE_RTE_SWX_PORT_ETHDEV_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Ethernet Device Input and Output Ports
@@ -17,6 +13,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Ethernet device input port (reader) creation parameters. */
 struct rte_swx_port_ethdev_reader_params {
 	/** Name of a valid and fully configured Ethernet device. */
diff --git a/lib/port/rte_swx_port_fd.h b/lib/port/rte_swx_port_fd.h
index e61719c8f6..63529cf0ab 100644
--- a/lib/port/rte_swx_port_fd.h
+++ b/lib/port/rte_swx_port_fd.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_FD_H__
 #define __INCLUDE_RTE_SWX_PORT_FD_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX FD Input and Output Ports
@@ -18,6 +14,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** fd_reader port parameters */
 struct rte_swx_port_fd_reader_params {
 	/** File descriptor. Must be valid and opened in non-blocking mode. */
diff --git a/lib/port/rte_swx_port_ring.h b/lib/port/rte_swx_port_ring.h
index efc485fb08..ef241c3fee 100644
--- a/lib/port/rte_swx_port_ring.h
+++ b/lib/port/rte_swx_port_ring.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_RING_H__
 #define __INCLUDE_RTE_SWX_PORT_RING_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Ring Input and Output Ports
@@ -18,6 +14,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Ring input port (reader) creation parameters. */
 struct rte_swx_port_ring_reader_params {
 	/** Name of valid RTE ring. */
diff --git a/lib/port/rte_swx_port_source_sink.h b/lib/port/rte_swx_port_source_sink.h
index 91bcbf74f4..e3ca7cfbb4 100644
--- a/lib/port/rte_swx_port_source_sink.h
+++ b/lib/port/rte_swx_port_source_sink.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_SOURCE_SINK_H__
 #define __INCLUDE_RTE_SWX_PORT_SOURCE_SINK_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Source and Sink Ports
@@ -15,6 +11,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum number of packets to read from the PCAP file. */
 #ifndef RTE_SWX_PORT_SOURCE_PKTS_MAX
 #define RTE_SWX_PORT_SOURCE_PKTS_MAX 1024
diff --git a/lib/rawdev/rte_rawdev.h b/lib/rawdev/rte_rawdev.h
index 640037b524..3fc471526e 100644
--- a/lib/rawdev/rte_rawdev.h
+++ b/lib/rawdev/rte_rawdev.h
@@ -14,13 +14,13 @@
  * no specific type already available in DPDK.
  */
 
+#include <rte_common.h>
+#include <rte_memory.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_memory.h>
-
 /* Rawdevice object - essentially a void to be typecast by implementation */
 typedef void *rte_rawdev_obj_t;
 
diff --git a/lib/rawdev/rte_rawdev_pmd.h b/lib/rawdev/rte_rawdev_pmd.h
index 22b406444d..408ed461a4 100644
--- a/lib/rawdev/rte_rawdev_pmd.h
+++ b/lib/rawdev/rte_rawdev_pmd.h
@@ -13,10 +13,6 @@
  * any application.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <dev_driver.h>
@@ -26,6 +22,10 @@ extern "C" {
 
 #include "rte_rawdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern int librawdev_logtype;
 #define RTE_LOGTYPE_RAWDEV librawdev_logtype
 
diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h
index ed3dd6d3d2..550fadf56a 100644
--- a/lib/rcu/rte_rcu_qsbr.h
+++ b/lib/rcu/rte_rcu_qsbr.h
@@ -21,10 +21,6 @@
  * entered quiescent state.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <inttypes.h>
 #include <stdalign.h>
 #include <stdbool.h>
@@ -36,6 +32,10 @@ extern "C" {
 #include <rte_atomic.h>
 #include <rte_ring.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern int rte_rcu_log_type;
 #define RTE_LOGTYPE_RCU rte_rcu_log_type
 
diff --git a/lib/regexdev/rte_regexdev.h b/lib/regexdev/rte_regexdev.h
index a50b841b1e..b18a1d4251 100644
--- a/lib/regexdev/rte_regexdev.h
+++ b/lib/regexdev/rte_regexdev.h
@@ -194,10 +194,6 @@
  * - rte_regexdev_dequeue_burst()
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include <rte_dev.h>
@@ -1428,6 +1424,10 @@ struct rte_regex_ops {
 
 #include "rte_regexdev_core.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice.
diff --git a/lib/ring/rte_ring.h b/lib/ring/rte_ring.h
index c709f30497..11ca69c73d 100644
--- a/lib/ring/rte_ring.h
+++ b/lib/ring/rte_ring.h
@@ -34,13 +34,13 @@
  * for more information.
  */
 
+#include <rte_ring_core.h>
+#include <rte_ring_elem.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_core.h>
-#include <rte_ring_elem.h>
-
 /**
  * Calculate the memory size needed for a ring
  *
diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h
index 270869d214..222c5aeb3f 100644
--- a/lib/ring/rte_ring_core.h
+++ b/lib/ring/rte_ring_core.h
@@ -19,10 +19,6 @@
  * instead.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdalign.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -38,6 +34,10 @@ extern "C" {
 #include <rte_pause.h>
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_TAILQ_RING_NAME "RTE_RING"
 
 /** enqueue/dequeue behavior types */
diff --git a/lib/ring/rte_ring_elem.h b/lib/ring/rte_ring_elem.h
index 7f7d4951d3..506f686884 100644
--- a/lib/ring/rte_ring_elem.h
+++ b/lib/ring/rte_ring_elem.h
@@ -16,10 +16,6 @@
  * RTE Ring with user defined element size
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_ring_core.h>
 #include <rte_ring_elem_pvt.h>
 
@@ -699,6 +695,10 @@ rte_ring_dequeue_burst_elem(struct rte_ring *r, void *obj_table,
 
 #include <rte_ring.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ring/rte_ring_hts.h b/lib/ring/rte_ring_hts.h
index 9a5938ac58..a41acea740 100644
--- a/lib/ring/rte_ring_hts.h
+++ b/lib/ring/rte_ring_hts.h
@@ -24,12 +24,12 @@
  * To achieve that 64-bit CAS is used by head update routine.
  */
 
+#include <rte_ring_hts_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_hts_elem_pvt.h>
-
 /**
  * Enqueue several objects on the HTS ring (multi-producers safe).
  *
diff --git a/lib/ring/rte_ring_peek.h b/lib/ring/rte_ring_peek.h
index c0621d12e2..2312f52668 100644
--- a/lib/ring/rte_ring_peek.h
+++ b/lib/ring/rte_ring_peek.h
@@ -43,12 +43,12 @@
  * with enqueue(/dequeue) operation till _finish_ completes.
  */
 
+#include <rte_ring_peek_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_peek_elem_pvt.h>
-
 /**
  * Start to enqueue several objects on the ring.
  * Note that no actual objects are put in the queue by this function,
diff --git a/lib/ring/rte_ring_peek_zc.h b/lib/ring/rte_ring_peek_zc.h
index 0b5e34b731..3254fe0481 100644
--- a/lib/ring/rte_ring_peek_zc.h
+++ b/lib/ring/rte_ring_peek_zc.h
@@ -67,12 +67,12 @@
  * with enqueue/dequeue operation till _finish_ completes.
  */
 
+#include <rte_ring_peek_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_peek_elem_pvt.h>
-
 /**
  * Ring zero-copy information structure.
  *
diff --git a/lib/ring/rte_ring_rts.h b/lib/ring/rte_ring_rts.h
index 50fc8f74db..d7a3863c83 100644
--- a/lib/ring/rte_ring_rts.h
+++ b/lib/ring/rte_ring_rts.h
@@ -51,12 +51,12 @@
  * By default HTD_MAX == ring.capacity / 8.
  */
 
+#include <rte_ring_rts_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_rts_elem_pvt.h>
-
 /**
  * Enqueue several objects on the RTS ring (multi-producers safe).
  *
diff --git a/lib/sched/rte_approx.h b/lib/sched/rte_approx.h
index b60086330e..738e33a98b 100644
--- a/lib/sched/rte_approx.h
+++ b/lib/sched/rte_approx.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_APPROX_H__
 #define __INCLUDE_RTE_APPROX_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Rational Approximation
@@ -20,6 +16,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Find best rational approximation
  *
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index 1477a47700..2a385ffdba 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -5,10 +5,6 @@
 #ifndef __RTE_PIE_H_INCLUDED__
 #define __RTE_PIE_H_INCLUDED__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * Proportional Integral controller Enhanced (PIE)
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_debug.h>
 #include <rte_cycles.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
 				     */
diff --git a/lib/sched/rte_red.h b/lib/sched/rte_red.h
index afaa35fcd6..e62abb9295 100644
--- a/lib/sched/rte_red.h
+++ b/lib/sched/rte_red.h
@@ -5,10 +5,6 @@
 #ifndef __RTE_RED_H_INCLUDED__
 #define __RTE_RED_H_INCLUDED__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Random Early Detection (RED)
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_cycles.h>
 #include <rte_branch_prediction.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_RED_SCALING                     10         /**< Fraction size for fixed-point */
 #define RTE_RED_S                           (1 << 22)  /**< Packet size multiplied by number of leaf queues */
 #define RTE_RED_MAX_TH_MAX                  1023       /**< Max threshold limit in fixed point format */
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index b882c4a882..222e6b3583 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_SCHED_H__
 #define __INCLUDE_RTE_SCHED_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Hierarchical Scheduler
@@ -62,6 +58,10 @@ extern "C" {
 #include "rte_red.h"
 #include "rte_pie.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum number of queues per pipe.
  * Note that the multiple queues (power of 2) can only be assigned to
  * lowest priority (best-effort) traffic class. Other higher priority traffic
diff --git a/lib/sched/rte_sched_common.h b/lib/sched/rte_sched_common.h
index 573d164569..a5acb9c08a 100644
--- a/lib/sched/rte_sched_common.h
+++ b/lib/sched/rte_sched_common.h
@@ -5,13 +5,13 @@
 #ifndef __INCLUDE_RTE_SCHED_COMMON_H__
 #define __INCLUDE_RTE_SCHED_COMMON_H__
 
+#include <stdint.h>
+#include <sys/types.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include <sys/types.h>
-
 #if 0
 static inline uint32_t
 rte_min_pos_4_u16(uint16_t *x)
diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
index 1c8474b74f..7a9bafa0fa 100644
--- a/lib/security/rte_security.h
+++ b/lib/security/rte_security.h
@@ -12,10 +12,6 @@
  * RTE Security Common Definitions
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <sys/types.h>
 
 #include <rte_compat.h>
@@ -24,6 +20,10 @@ extern "C" {
 #include <rte_ip.h>
 #include <rte_mbuf_dyn.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** IPSec protocol mode */
 enum rte_security_ipsec_sa_mode {
 	RTE_SECURITY_IPSEC_SA_MODE_TRANSPORT = 1,
diff --git a/lib/security/rte_security_driver.h b/lib/security/rte_security_driver.h
index 9bb5052a4c..2ceb145066 100644
--- a/lib/security/rte_security_driver.h
+++ b/lib/security/rte_security_driver.h
@@ -12,13 +12,13 @@
  * RTE Security Common Definitions
  */
 
+#include <rte_compat.h>
+#include "rte_security.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_compat.h>
-#include "rte_security.h"
-
 /**
  * @internal
  * Security session to be used by library for internal usage
diff --git a/lib/stack/rte_stack.h b/lib/stack/rte_stack.h
index 3325757568..4439adfc42 100644
--- a/lib/stack/rte_stack.h
+++ b/lib/stack/rte_stack.h
@@ -15,10 +15,6 @@
 #ifndef _RTE_STACK_H_
 #define _RTE_STACK_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdalign.h>
 
 #include <rte_debug.h>
@@ -95,6 +91,10 @@ struct __rte_cache_aligned rte_stack {
 #include "rte_stack_std.h"
 #include "rte_stack_lf.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Push several objects on the stack (MT-safe).
  *
diff --git a/lib/table/rte_lru.h b/lib/table/rte_lru.h
index 88229d8632..bc1ad36500 100644
--- a/lib/table/rte_lru.h
+++ b/lib/table/rte_lru.h
@@ -5,15 +5,15 @@
 #ifndef __INCLUDE_RTE_LRU_H__
 #define __INCLUDE_RTE_LRU_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_config.h>
 #ifdef RTE_ARCH_X86_64
 #include "rte_lru_x86.h"
 #elif defined(RTE_ARCH_ARM64)
 #include "rte_lru_arm64.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 #else
 #undef RTE_TABLE_HASH_LRU_STRATEGY
 #define RTE_TABLE_HASH_LRU_STRATEGY                        1
@@ -86,8 +86,4 @@ do {									\
 
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/table/rte_lru_arm64.h b/lib/table/rte_lru_arm64.h
index f19b0bdb4e..f9a4678ee0 100644
--- a/lib/table/rte_lru_arm64.h
+++ b/lib/table/rte_lru_arm64.h
@@ -5,14 +5,14 @@
 #ifndef __RTE_LRU_ARM64_H__
 #define __RTE_LRU_ARM64_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_vect.h>
 #include <rte_bitops.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_TABLE_HASH_LRU_STRATEGY
 #ifdef __ARM_NEON
 #define RTE_TABLE_HASH_LRU_STRATEGY                        3
diff --git a/lib/table/rte_lru_x86.h b/lib/table/rte_lru_x86.h
index ddfb8c1c8c..93f4a136a8 100644
--- a/lib/table/rte_lru_x86.h
+++ b/lib/table/rte_lru_x86.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_LRU_X86_H__
 #define __INCLUDE_RTE_LRU_X86_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_config.h>
@@ -97,8 +93,4 @@ do {									\
 
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/table/rte_swx_hash_func.h b/lib/table/rte_swx_hash_func.h
index 04f3d543e7..9c65cfa913 100644
--- a/lib/table/rte_swx_hash_func.h
+++ b/lib/table/rte_swx_hash_func.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_HASH_FUNC_H__
 #define __INCLUDE_RTE_SWX_HASH_FUNC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Hash Function
@@ -15,6 +11,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Hash function prototype
  *
diff --git a/lib/table/rte_swx_keycmp.h b/lib/table/rte_swx_keycmp.h
index 09fb1be869..b0ed819307 100644
--- a/lib/table/rte_swx_keycmp.h
+++ b/lib/table/rte_swx_keycmp.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_KEYCMP_H__
 #define __INCLUDE_RTE_SWX_KEYCMP_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Key Comparison Functions
@@ -16,6 +12,10 @@ extern "C" {
 #include <stdint.h>
 #include <string.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Key comparison function prototype
  *
diff --git a/lib/table/rte_swx_table.h b/lib/table/rte_swx_table.h
index ac01e19781..3c53459498 100644
--- a/lib/table/rte_swx_table.h
+++ b/lib/table/rte_swx_table.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_H__
 #define __INCLUDE_RTE_SWX_TABLE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Table
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_swx_hash_func.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Match type. */
 enum rte_swx_table_match_type {
 	/** Wildcard Match (WM). */
diff --git a/lib/table/rte_swx_table_em.h b/lib/table/rte_swx_table_em.h
index b7423dd060..592541f01f 100644
--- a/lib/table/rte_swx_table_em.h
+++ b/lib/table/rte_swx_table_em.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_EM_H__
 #define __INCLUDE_RTE_SWX_TABLE_EM_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Exact Match Table
@@ -16,6 +12,10 @@ extern "C" {
 
 #include <rte_swx_table.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Exact match table operations - unoptimized. */
 extern struct rte_swx_table_ops rte_swx_table_exact_match_unoptimized_ops;
 
diff --git a/lib/table/rte_swx_table_learner.h b/lib/table/rte_swx_table_learner.h
index c5ea015b8d..9a18be083d 100644
--- a/lib/table/rte_swx_table_learner.h
+++ b/lib/table/rte_swx_table_learner.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_LEARNER_H__
 #define __INCLUDE_RTE_SWX_TABLE_LEARNER_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Learner Table
@@ -53,6 +49,10 @@ extern "C" {
 
 #include "rte_swx_hash_func.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum number of key timeout values per learner table. */
 #ifndef RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX
 #define RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX 16
diff --git a/lib/table/rte_swx_table_selector.h b/lib/table/rte_swx_table_selector.h
index 05863cc90b..ef29bdb6b0 100644
--- a/lib/table/rte_swx_table_selector.h
+++ b/lib/table/rte_swx_table_selector.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_SELECTOR_H__
 #define __INCLUDE_RTE_SWX_TABLE_SELECTOR_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Selector Table
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_swx_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Selector table creation parameters. */
 struct rte_swx_table_selector_params {
 	/** Group ID offset. */
diff --git a/lib/table/rte_swx_table_wm.h b/lib/table/rte_swx_table_wm.h
index 4fd52c0a17..7eb6f8e2a6 100644
--- a/lib/table/rte_swx_table_wm.h
+++ b/lib/table/rte_swx_table_wm.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_WM_H__
 #define __INCLUDE_RTE_SWX_TABLE_WM_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Wildcard Match Table
@@ -16,6 +12,10 @@ extern "C" {
 
 #include <rte_swx_table.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Wildcard match table operations. */
 extern struct rte_swx_table_ops rte_swx_table_wildcard_match_ops;
 
diff --git a/lib/table/rte_table.h b/lib/table/rte_table.h
index 9a5faf0e32..43a5a1a7b3 100644
--- a/lib/table/rte_table.h
+++ b/lib/table/rte_table.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_H__
 #define __INCLUDE_RTE_TABLE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table
@@ -27,6 +23,10 @@ extern "C" {
 #include <stdint.h>
 #include <rte_port.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_mbuf;
 
 /** Lookup table statistics */
diff --git a/lib/table/rte_table_acl.h b/lib/table/rte_table_acl.h
index 1cb7b9fbbd..61af7b88e4 100644
--- a/lib/table/rte_table_acl.h
+++ b/lib/table/rte_table_acl.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_ACL_H__
 #define __INCLUDE_RTE_TABLE_ACL_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table ACL
@@ -25,6 +21,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ACL table parameters */
 struct rte_table_acl_params {
 	/** Name */
diff --git a/lib/table/rte_table_array.h b/lib/table/rte_table_array.h
index fad83b0588..b2a7b95d68 100644
--- a/lib/table/rte_table_array.h
+++ b/lib/table/rte_table_array.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_ARRAY_H__
 #define __INCLUDE_RTE_TABLE_ARRAY_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Array
@@ -20,6 +16,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Array table parameters */
 struct rte_table_array_params {
 	/** Number of array entries. Has to be a power of two. */
diff --git a/lib/table/rte_table_hash.h b/lib/table/rte_table_hash.h
index 6698621dae..ff8fc9e9ce 100644
--- a/lib/table/rte_table_hash.h
+++ b/lib/table/rte_table_hash.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_HASH_H__
 #define __INCLUDE_RTE_TABLE_HASH_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Hash
@@ -52,6 +48,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Hash function */
 typedef uint64_t (*rte_table_hash_op_hash)(
 	void *key,
diff --git a/lib/table/rte_table_hash_cuckoo.h b/lib/table/rte_table_hash_cuckoo.h
index 3a55d28e9b..55aa12216a 100644
--- a/lib/table/rte_table_hash_cuckoo.h
+++ b/lib/table/rte_table_hash_cuckoo.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_HASH_CUCKOO_H__
 #define __INCLUDE_RTE_TABLE_HASH_CUCKOO_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Hash Cuckoo
@@ -20,6 +16,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Hash table parameters */
 struct rte_table_hash_cuckoo_params {
 	/** Name */
diff --git a/lib/table/rte_table_hash_func.h b/lib/table/rte_table_hash_func.h
index aa779c2182..cba7ec4c20 100644
--- a/lib/table/rte_table_hash_func.h
+++ b/lib/table/rte_table_hash_func.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_HASH_FUNC_H__
 #define __INCLUDE_RTE_TABLE_HASH_FUNC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -18,6 +14,10 @@ extern "C" {
 
 #include <x86intrin.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline uint64_t
 rte_crc32_u64(uint64_t crc, uint64_t v)
 {
@@ -28,6 +28,10 @@ rte_crc32_u64(uint64_t crc, uint64_t v)
 #include "rte_table_hash_func_arm64.h"
 #else
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline uint64_t
 rte_crc32_u64(uint64_t crc, uint64_t v)
 {
diff --git a/lib/table/rte_table_lpm.h b/lib/table/rte_table_lpm.h
index dde32deed9..59b9bdee89 100644
--- a/lib/table/rte_table_lpm.h
+++ b/lib/table/rte_table_lpm.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_LPM_H__
 #define __INCLUDE_RTE_TABLE_LPM_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table LPM for IPv4
@@ -45,6 +41,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** LPM table parameters */
 struct rte_table_lpm_params {
 	/** Table name */
diff --git a/lib/table/rte_table_lpm_ipv6.h b/lib/table/rte_table_lpm_ipv6.h
index 96ddbd32c2..166a5ba9ee 100644
--- a/lib/table/rte_table_lpm_ipv6.h
+++ b/lib/table/rte_table_lpm_ipv6.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_LPM_IPV6_H__
 #define __INCLUDE_RTE_TABLE_LPM_IPV6_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table LPM for IPv6
@@ -45,6 +41,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_LPM_IPV6_ADDR_SIZE 16
 
 /** LPM table parameters */
diff --git a/lib/table/rte_table_stub.h b/lib/table/rte_table_stub.h
index 846526ea99..f7e589df16 100644
--- a/lib/table/rte_table_stub.h
+++ b/lib/table/rte_table_stub.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_STUB_H__
 #define __INCLUDE_RTE_TABLE_STUB_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Stub
@@ -18,6 +14,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Stub table parameters: NONE */
 
 /** Stub table operations */
diff --git a/lib/telemetry/rte_telemetry.h b/lib/telemetry/rte_telemetry.h
index cab9daa6fe..463819e2bf 100644
--- a/lib/telemetry/rte_telemetry.h
+++ b/lib/telemetry/rte_telemetry.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_TELEMETRY_H_
 #define _RTE_TELEMETRY_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_compat.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum length for string used in object. */
 #define RTE_TEL_MAX_STRING_LEN 128
 /** Maximum length of string. */
diff --git a/lib/vhost/rte_vdpa.h b/lib/vhost/rte_vdpa.h
index 6ac85d1bbf..18e273c20f 100644
--- a/lib/vhost/rte_vdpa.h
+++ b/lib/vhost/rte_vdpa.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_VDPA_H_
 #define _RTE_VDPA_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -17,6 +13,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum name length for statistics counters */
 #define RTE_VDPA_STATS_NAME_SIZE 64
 
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index b0434c4b8d..c7a5f56df8 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -18,10 +18,6 @@
 #include <rte_memory.h>
 #include <rte_mempool.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifndef __cplusplus
 /* These are not C++-aware. */
 #include <linux/vhost.h>
@@ -29,6 +25,10 @@ extern "C" {
 #include <linux/virtio_net.h>
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_VHOST_USER_CLIENT		(1ULL << 0)
 #define RTE_VHOST_USER_NO_RECONNECT	(1ULL << 1)
 #define RTE_VHOST_USER_RESERVED_1	(1ULL << 2)
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 8f190dd44b..60995e4e62 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -5,15 +5,15 @@
 #ifndef _RTE_VHOST_ASYNC_H_
 #define _RTE_VHOST_ASYNC_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
 #include <rte_mbuf.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Register an async channel for a vhost queue
  *
diff --git a/lib/vhost/rte_vhost_crypto.h b/lib/vhost/rte_vhost_crypto.h
index f962a53818..af61f0907e 100644
--- a/lib/vhost/rte_vhost_crypto.h
+++ b/lib/vhost/rte_vhost_crypto.h
@@ -5,12 +5,12 @@
 #ifndef _VHOST_CRYPTO_H_
 #define _VHOST_CRYPTO_H_
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /* pre-declare structs to avoid including full headers */
 struct rte_mempool;
 struct rte_crypto_op;
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 8db4ab9f4d..42392a0d14 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -5,10 +5,6 @@
 #ifndef _VDPA_DRIVER_H_
 #define _VDPA_DRIVER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 
 #include <rte_compat.h>
@@ -16,6 +12,10 @@ extern "C" {
 #include "rte_vhost.h"
 #include "rte_vdpa.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_VHOST_QUEUE_ALL UINT16_MAX
 
 /**
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v8 2/6] eal: extend bit manipulation functionality
  2024-09-17 10:48           ` [PATCH v8 0/6] Improve EAL bit operations API Mattias Rönnblom
  2024-09-17 10:48             ` [PATCH v8 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
@ 2024-09-17 10:48             ` Mattias Rönnblom
  2024-09-17 10:48             ` [PATCH v8 3/6] eal: add unit tests for bit operations Mattias Rönnblom
                               ` (3 subsequent siblings)
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17 10:48 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Add functionality to test and modify the value of individual bits in
32-bit or 64-bit words.

These functions have no implications on memory ordering, atomicity and
does not use volatile and thus does not prevent any compiler
optimizations.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

PATCH v3:
 * Remove unnecessary <rte_compat.h> include.
 * Remove redundant 'fun' parameter from the __RTE_GEN_BIT_*() macros
   (Jack Bond-Preston).
 * Introduce __RTE_BIT_BIT_OPS() macro, consistent with how things
   are done when generating the atomic bit operations.
 * Refer to volatile bit op functions as variants instead of families
   (macro parameter naming).

RFC v6:
 * Have rte_bit_test() accept const-marked bitsets.

RFC v4:
 * Add rte_bit_flip() which, believe it or not, flips the value of a bit.
 * Mark macro-generated private functions as experimental.
 * Use macros to generate *assign*() functions.

RFC v3:
 * Work around lack of C++ support for _Generic (Tyler Retzlaff).
 * Fix ','-related checkpatch warnings.
---
 lib/eal/include/rte_bitops.h | 260 ++++++++++++++++++++++++++++++++++-
 1 file changed, 258 insertions(+), 2 deletions(-)

diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 449565eeae..6915b945ba 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -2,6 +2,7 @@
  * Copyright(c) 2020 Arm Limited
  * Copyright(c) 2010-2019 Intel Corporation
  * Copyright(c) 2023 Microsoft Corporation
+ * Copyright(c) 2024 Ericsson AB
  */
 
 #ifndef _RTE_BITOPS_H_
@@ -11,12 +12,14 @@
  * @file
  * Bit Operations
  *
- * This file defines a family of APIs for bit operations
- * without enforcing memory ordering.
+ * This file provides functionality for low-level, single-word
+ * arithmetic and bit-level operations, such as counting or
+ * setting individual bits.
  */
 
 #include <stdint.h>
 
+#include <rte_compat.h>
 #include <rte_debug.h>
 
 #ifdef __cplusplus
@@ -105,6 +108,197 @@ extern "C" {
 #define RTE_FIELD_GET64(mask, reg) \
 		((typeof(mask))(((reg) & (mask)) >> rte_ctz64(mask)))
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test bit in word.
+ *
+ * Generic selection macro to test the value of a bit in a 32-bit or
+ * 64-bit word. The type of operation depends on the type of the @c
+ * addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_test(addr, nr)					\
+	_Generic((addr),					\
+		uint32_t *: __rte_bit_test32,			\
+		const uint32_t *: __rte_bit_test32,		\
+		uint64_t *: __rte_bit_test64,			\
+		const uint64_t *: __rte_bit_test64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set bit in word.
+ *
+ * Generic selection macro to set a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr
+ * parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_set(addr, nr)				\
+	_Generic((addr),				\
+		 uint32_t *: __rte_bit_set32,		\
+		 uint64_t *: __rte_bit_set64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Clear bit in word.
+ *
+ * Generic selection macro to clear a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr
+ * parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_clear(addr, nr)					\
+	_Generic((addr),					\
+		 uint32_t *: __rte_bit_clear32,			\
+		 uint64_t *: __rte_bit_clear64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Assign a value to a bit in word.
+ *
+ * Generic selection macro to assign a value to a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param value
+ *   The new value of the bit - true for '1', or false for '0'.
+ */
+#define rte_bit_assign(addr, nr, value)					\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_assign32,			\
+		 uint64_t *: __rte_bit_assign64)(addr, nr, value)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Flip a bit in word.
+ *
+ * Generic selection macro to change the value of a bit to '0' if '1'
+ * or '1' if '0' in a 32-bit or 64-bit word. The type of operation
+ * depends on the type of the @c addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_flip(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_flip32,				\
+		 uint64_t *: __rte_bit_flip64)(addr, nr)
+
+#define __RTE_GEN_BIT_TEST(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_ ## variant ## test ## size(const qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		return *addr & mask;					\
+	}
+
+#define __RTE_GEN_BIT_SET(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## set ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		*addr |= mask;						\
+	}								\
+
+#define __RTE_GEN_BIT_CLEAR(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## clear ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		uint ## size ## _t mask = ~((uint ## size ## _t)1 << nr); \
+		(*addr) &= mask;					\
+	}								\
+
+#define __RTE_GEN_BIT_ASSIGN(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## assign ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr, bool value) \
+	{								\
+		if (value)						\
+			__rte_bit_ ## variant ## set ## size(addr, nr);	\
+		else							\
+			__rte_bit_ ## variant ## clear ## size(addr, nr); \
+	}
+
+#define __RTE_GEN_BIT_FLIP(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## flip ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		bool value;						\
+									\
+		value = __rte_bit_ ## variant ## test ## size(addr, nr); \
+		__rte_bit_ ## variant ## assign ## size(addr, nr, !value); \
+	}
+
+#define __RTE_GEN_BIT_OPS(v, qualifier, size)	\
+	__RTE_GEN_BIT_TEST(v, qualifier, size)	\
+	__RTE_GEN_BIT_SET(v, qualifier, size)	\
+	__RTE_GEN_BIT_CLEAR(v, qualifier, size)	\
+	__RTE_GEN_BIT_ASSIGN(v, qualifier, size)	\
+	__RTE_GEN_BIT_FLIP(v, qualifier, size)
+
+#define __RTE_GEN_BIT_OPS_SIZE(size) \
+	__RTE_GEN_BIT_OPS(,, size)
+
+__RTE_GEN_BIT_OPS_SIZE(32)
+__RTE_GEN_BIT_OPS_SIZE(64)
+
 /*------------------------ 32-bit relaxed operations ------------------------*/
 
 /**
@@ -787,6 +981,68 @@ rte_log2_u64(uint64_t v)
 
 #ifdef __cplusplus
 }
+
+/*
+ * Since C++ doesn't support generic selection (i.e., _Generic),
+ * function overloading is used instead. Such functions must be
+ * defined outside 'extern "C"' to be accepted by the compiler.
+ */
+
+#undef rte_bit_test
+#undef rte_bit_set
+#undef rte_bit_clear
+#undef rte_bit_assign
+#undef rte_bit_flip
+
+#define __RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, size, arg1_type, arg1_name) \
+	static inline void						\
+	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
+			arg1_type arg1_name)				\
+	{								\
+		__rte_bit_ ## fun ## size(addr, arg1_name);		\
+	}
+
+#define __RTE_BIT_OVERLOAD_2(fun, qualifier, arg1_type, arg1_name)	\
+	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 32, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 64, arg1_type, arg1_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, size, ret_type, arg1_type, \
+				 arg1_name)				\
+	static inline ret_type						\
+	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
+			arg1_type arg1_name)				\
+	{								\
+		return __rte_bit_ ## fun ## size(addr, arg1_name);	\
+	}
+
+#define __RTE_BIT_OVERLOAD_2R(fun, qualifier, ret_type, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 32, ret_type, arg1_type, \
+				 arg1_name)				\
+	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 64, ret_type, arg1_type, \
+				 arg1_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, size, arg1_type, arg1_name, \
+				arg2_type, arg2_name)			\
+	static inline void						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name)				\
+	{								\
+		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name);	\
+	}
+
+#define __RTE_BIT_OVERLOAD_3(fun, qualifier, arg1_type, arg1_name, arg2_type, \
+			     arg2_name)					\
+	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 32, arg1_type, arg1_name, \
+				arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 64, arg1_type, arg1_name, \
+				arg2_type, arg2_name)
+
+__RTE_BIT_OVERLOAD_2R(test, const, bool, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(set,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(clear,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_3(assign,, unsigned int, nr, bool, value)
+__RTE_BIT_OVERLOAD_2(flip,, unsigned int, nr)
+
 #endif
 
 #endif /* _RTE_BITOPS_H_ */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v8 3/6] eal: add unit tests for bit operations
  2024-09-17 10:48           ` [PATCH v8 0/6] Improve EAL bit operations API Mattias Rönnblom
  2024-09-17 10:48             ` [PATCH v8 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
  2024-09-17 10:48             ` [PATCH v8 2/6] eal: extend bit manipulation functionality Mattias Rönnblom
@ 2024-09-17 10:48             ` Mattias Rönnblom
  2024-09-17 10:48             ` [PATCH v8 4/6] eal: add atomic " Mattias Rönnblom
                               ` (2 subsequent siblings)
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17 10:48 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Extend bitops tests to cover the
rte_bit_[test|set|clear|assign|flip]()
functions.

The tests are converted to use the test suite runner framework.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

RFC v6:
 * Test rte_bit_*test() usage through const pointers.

RFC v4:
 * Remove redundant line continuations.
---
 app/test/test_bitops.c | 85 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 70 insertions(+), 15 deletions(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index 0d4ccfb468..322f58c066 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -1,13 +1,68 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2019 Arm Limited
+ * Copyright(c) 2024 Ericsson AB
  */
 
+#include <stdbool.h>
+
 #include <rte_launch.h>
 #include <rte_bitops.h>
+#include <rte_random.h>
 #include "test.h"
 
-uint32_t val32;
-uint64_t val64;
+#define GEN_TEST_BIT_ACCESS(test_name, set_fun, clear_fun, assign_fun,	\
+			    flip_fun, test_fun, size)			\
+	static int							\
+	test_name(void)							\
+	{								\
+		uint ## size ## _t reference = (uint ## size ## _t)rte_rand(); \
+		unsigned int bit_nr;					\
+		uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
+									\
+		for (bit_nr = 0; bit_nr < size; bit_nr++) {		\
+			bool reference_bit = (reference >> bit_nr) & 1;	\
+			bool assign = rte_rand() & 1;			\
+			if (assign)					\
+				assign_fun(&word, bit_nr, reference_bit); \
+			else {						\
+				if (reference_bit)			\
+					set_fun(&word, bit_nr);		\
+				else					\
+					clear_fun(&word, bit_nr);	\
+									\
+			}						\
+			TEST_ASSERT(test_fun(&word, bit_nr) == reference_bit, \
+				    "Bit %d had unexpected value", bit_nr); \
+			flip_fun(&word, bit_nr);			\
+			TEST_ASSERT(test_fun(&word, bit_nr) != reference_bit, \
+				    "Bit %d had unflipped value", bit_nr); \
+			flip_fun(&word, bit_nr);			\
+									\
+			const uint ## size ## _t *const_ptr = &word;	\
+			TEST_ASSERT(test_fun(const_ptr, bit_nr) ==	\
+				    reference_bit,			\
+				    "Bit %d had unexpected value", bit_nr); \
+		}							\
+									\
+		for (bit_nr = 0; bit_nr < size; bit_nr++) {		\
+			bool reference_bit = (reference >> bit_nr) & 1;	\
+			TEST_ASSERT(test_fun(&word, bit_nr) == reference_bit, \
+				    "Bit %d had unexpected value", bit_nr); \
+		}							\
+									\
+		TEST_ASSERT(reference == word, "Word had unexpected value"); \
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32)
+
+GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
+
+static uint32_t val32;
+static uint64_t val64;
 
 #define MAX_BITS_32 32
 #define MAX_BITS_64 64
@@ -117,22 +172,22 @@ test_bit_relaxed_test_set_clear(void)
 	return TEST_SUCCESS;
 }
 
+static struct unit_test_suite test_suite = {
+	.suite_name = "Bitops test suite",
+	.unit_test_cases = {
+		TEST_CASE(test_bit_access32),
+		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_relaxed_set),
+		TEST_CASE(test_bit_relaxed_clear),
+		TEST_CASE(test_bit_relaxed_test_set_clear),
+		TEST_CASES_END()
+	}
+};
+
 static int
 test_bitops(void)
 {
-	val32 = 0;
-	val64 = 0;
-
-	if (test_bit_relaxed_set() < 0)
-		return TEST_FAILED;
-
-	if (test_bit_relaxed_clear() < 0)
-		return TEST_FAILED;
-
-	if (test_bit_relaxed_test_set_clear() < 0)
-		return TEST_FAILED;
-
-	return TEST_SUCCESS;
+	return unit_test_suite_runner(&test_suite);
 }
 
 REGISTER_FAST_TEST(bitops_autotest, true, true, test_bitops);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v8 4/6] eal: add atomic bit operations
  2024-09-17 10:48           ` [PATCH v8 0/6] Improve EAL bit operations API Mattias Rönnblom
                               ` (2 preceding siblings ...)
  2024-09-17 10:48             ` [PATCH v8 3/6] eal: add unit tests for bit operations Mattias Rönnblom
@ 2024-09-17 10:48             ` Mattias Rönnblom
  2024-09-17 10:48             ` [PATCH v8 5/6] eal: add unit tests for atomic bit access functions Mattias Rönnblom
  2024-09-17 10:48             ` [PATCH v8 6/6] eal: extend bitops to handle volatile pointers Mattias Rönnblom
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17 10:48 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Add atomic bit test/set/clear/assign/flip and
test-and-set/clear/assign/flip functions.

All atomic bit functions allow (and indeed, require) the caller to
specify a memory order.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

PATCH v3:
 * Introduce __RTE_GEN_BIT_ATOMIC_*() 'qualifier' argument already in
   this patch (Jack Bond-Preston).
 * Refer to volatile bit op functions as variants instead of families
   (macro parameter naming).
 * Update release notes.

PATCH:
 * Add missing macro #undef for C++ version of atomic bit flip.

RFC v7:
 * Replace compare-exchange-based rte_bitset_atomic_test_and_*() and
   flip() with implementations that use the previous value as returned
   by the atomic fetch function.
 * Reword documentation to match the non-atomic macro variants.
 * Remove pointer to <rte_stdatomic.h> for memory model documentation,
   since there is no documentation for that API.

RFC v6:
 * Have rte_bit_atomic_test() accept const-marked bitsets.

RFC v4:
 * Add atomic bit flip.
 * Mark macro-generated private functions experimental.

RFC v3:
 * Work around lack of C++ support for _Generic (Tyler Retzlaff).

RFC v2:
 o Add rte_bit_atomic_test_and_assign() (for consistency).
 o Fix bugs in rte_bit_atomic_test_and_[set|clear]().
 o Use <rte_stdatomics.h> to support MSVC.
---
 doc/guides/rel_notes/release_24_11.rst |  17 +
 lib/eal/include/rte_bitops.h           | 415 +++++++++++++++++++++++++
 2 files changed, 432 insertions(+)

diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index 0ff70d9057..3111b1e4c0 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -56,6 +56,23 @@ New Features
      =======================================================
 
 
+* **Extended bit operations API.**
+
+  The support for bit-level operations on single 32- and 64-bit words
+  in <rte_bitops.h> has been extended with two families of
+  semantically well-defined functions.
+
+  rte_bit_[test|set|clear|assign|flip]() functions provide excellent
+  performance (by avoiding restricting the compiler and CPU), but give
+  no guarantees in regards to memory ordering or atomicity.
+
+  rte_bit_atomic_*() provides atomic bit-level operations, including
+  the possibility to specifying memory ordering constraints.
+
+  The new public API elements are polymorphic, using the _Generic-
+  based macros (for C) and function overloading (in C++ translation
+  units).
+
 Removed Items
 -------------
 
diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 6915b945ba..3ad6795fd1 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -21,6 +21,7 @@
 
 #include <rte_compat.h>
 #include <rte_debug.h>
+#include <rte_stdatomic.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -226,6 +227,204 @@ extern "C" {
 		 uint32_t *: __rte_bit_flip32,				\
 		 uint64_t *: __rte_bit_flip64)(addr, nr)
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test if a particular bit in a word is set with a particular memory
+ * order.
+ *
+ * Test a bit with the resulting memory load ordered as per the
+ * specified memory order.
+ *
+ * @param addr
+ *   A pointer to the word to query.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit is set, and false otherwise.
+ */
+#define rte_bit_atomic_test(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test32,			\
+		 const uint32_t *: __rte_bit_atomic_test32,		\
+		 uint64_t *: __rte_bit_atomic_test64,			\
+		 const uint64_t *: __rte_bit_atomic_test64)(addr, nr,	\
+							    memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically set bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in
+ * the word pointed to by @c addr to '1', with the memory ordering as
+ * specified by @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_set(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_set32,			\
+		 uint64_t *: __rte_bit_atomic_set64)(addr, nr, memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically clear bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in
+ * the word pointed to by @c addr to '0', with the memory ordering as
+ * specified by @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_clear(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_clear32,			\
+		 uint64_t *: __rte_bit_atomic_clear64)(addr, nr, memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically assign a value to bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in the
+ * word pointed to by @c addr to the value indicated by @c value, with
+ * the memory ordering as specified with @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param value
+ *   The new value of the bit - true for '1', or false for '0'.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_assign(addr, nr, value, memory_order)		\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_assign32,			\
+		 uint64_t *: __rte_bit_atomic_assign64)(addr, nr, value, \
+							memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically flip bit in word.
+ *
+ * Generic selection macro to atomically negate the value of the bit
+ * specified by @c nr in the word pointed to by @c addr to the value
+ * indicated by @c value, with the memory ordering as specified with
+ * @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_flip(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_flip32,			\
+		 uint64_t *: __rte_bit_atomic_flip64)(addr, nr, memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test and set a bit in word.
+ *
+ * Generic selection macro to atomically test and set bit specified by
+ * @c nr in the word pointed to by @c addr to '1', with the memory
+ * ordering as specified with @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit was set, and false otherwise.
+ */
+#define rte_bit_atomic_test_and_set(addr, nr, memory_order)		\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test_and_set32,		\
+		 uint64_t *: __rte_bit_atomic_test_and_set64)(addr, nr,	\
+							      memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test and clear a bit in word.
+ *
+ * Generic selection macro to atomically test and clear bit specified
+ * by @c nr in the word pointed to by @c addr to '0', with the memory
+ * ordering as specified with @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit was set, and false otherwise.
+ */
+#define rte_bit_atomic_test_and_clear(addr, nr, memory_order)		\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test_and_clear32,		\
+		 uint64_t *: __rte_bit_atomic_test_and_clear64)(addr, nr, \
+								memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test and assign a bit in word.
+ *
+ * Generic selection macro to atomically test and assign bit specified
+ * by @c nr in the word pointed to by @c addr the value specified by
+ * @c value, with the memory ordering as specified with @c
+ * memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param value
+ *   The new value of the bit - true for '1', or false for '0'.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit was set, and false otherwise.
+ */
+#define rte_bit_atomic_test_and_assign(addr, nr, value, memory_order)	\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test_and_assign32,	\
+		 uint64_t *: __rte_bit_atomic_test_and_assign64)(addr, nr, \
+								 value, \
+								 memory_order)
+
 #define __RTE_GEN_BIT_TEST(variant, qualifier, size)			\
 	__rte_experimental						\
 	static inline bool						\
@@ -299,6 +498,146 @@ extern "C" {
 __RTE_GEN_BIT_OPS_SIZE(32)
 __RTE_GEN_BIT_OPS_SIZE(64)
 
+#define __RTE_GEN_BIT_ATOMIC_TEST(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test ## size(const qualifier uint ## size ## _t *addr, \
+						     unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		const qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr = \
+			(const qualifier RTE_ATOMIC(uint ## size ## _t) *)addr;	\
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		return rte_atomic_load_explicit(a_addr, memory_order) & mask; \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_SET(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_ ## variant ## set ## size(qualifier uint ## size ## _t *addr, \
+					      unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		rte_atomic_fetch_or_explicit(a_addr, mask, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_CLEAR(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_ ## variant ## clear ## size(qualifier uint ## size ## _t *addr,	\
+						unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		rte_atomic_fetch_and_explicit(a_addr, ~mask, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_FLIP(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_ ## variant ## flip ## size(qualifier uint ## size ## _t *addr, \
+					       unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		rte_atomic_fetch_xor_explicit(a_addr, mask, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_ASSIGN(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_## variant ## assign ## size(qualifier uint ## size ## _t *addr, \
+						unsigned int nr, bool value, \
+						int memory_order)	\
+	{								\
+		if (value)						\
+			__rte_bit_atomic_ ## variant ## set ## size(addr, nr, memory_order); \
+		else							\
+			__rte_bit_atomic_ ## variant ## clear ## size(addr, nr, \
+								     memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_TEST_AND_SET(variant, qualifier, size)	\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test_and_set ## size(qualifier uint ## size ## _t *addr, \
+						       unsigned int nr,	\
+						       int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		uint ## size ## _t prev;				\
+									\
+		prev = rte_atomic_fetch_or_explicit(a_addr, mask,	\
+						    memory_order);	\
+									\
+		return prev & mask;					\
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_TEST_AND_CLEAR(variant, qualifier, size)	\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test_and_clear ## size(qualifier uint ## size ## _t *addr, \
+							 unsigned int nr, \
+							 int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		uint ## size ## _t prev;				\
+									\
+	        prev = rte_atomic_fetch_and_explicit(a_addr, ~mask,	\
+						     memory_order);	\
+									\
+		return prev & mask;					\
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_TEST_AND_ASSIGN(variant, qualifier, size)	\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test_and_assign ## size(qualifier uint ## size ## _t *addr, \
+							  unsigned int nr, \
+							  bool value,	\
+							  int memory_order) \
+	{								\
+		if (value)						\
+			return __rte_bit_atomic_ ## variant ## test_and_set ## size(addr, nr, memory_order); \
+		else							\
+			return __rte_bit_atomic_ ## variant ## test_and_clear ## size(addr, nr, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_OPS(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_TEST(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_SET(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_CLEAR(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_ASSIGN(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_TEST_AND_SET(variant, qualifier, size) \
+	__RTE_GEN_BIT_ATOMIC_TEST_AND_CLEAR(variant, qualifier, size) \
+	__RTE_GEN_BIT_ATOMIC_TEST_AND_ASSIGN(variant, qualifier, size) \
+	__RTE_GEN_BIT_ATOMIC_FLIP(variant, qualifier, size)
+
+#define __RTE_GEN_BIT_ATOMIC_OPS_SIZE(size) \
+	__RTE_GEN_BIT_ATOMIC_OPS(,, size)
+
+__RTE_GEN_BIT_ATOMIC_OPS_SIZE(32)
+__RTE_GEN_BIT_ATOMIC_OPS_SIZE(64)
+
 /*------------------------ 32-bit relaxed operations ------------------------*/
 
 /**
@@ -994,6 +1333,15 @@ rte_log2_u64(uint64_t v)
 #undef rte_bit_assign
 #undef rte_bit_flip
 
+#undef rte_bit_atomic_test
+#undef rte_bit_atomic_set
+#undef rte_bit_atomic_clear
+#undef rte_bit_atomic_assign
+#undef rte_bit_atomic_flip
+#undef rte_bit_atomic_test_and_set
+#undef rte_bit_atomic_test_and_clear
+#undef rte_bit_atomic_test_and_assign
+
 #define __RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, size, arg1_type, arg1_name) \
 	static inline void						\
 	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
@@ -1037,12 +1385,79 @@ rte_log2_u64(uint64_t v)
 	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 64, arg1_type, arg1_name, \
 				arg2_type, arg2_name)
 
+#define __RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, size, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name)	\
+	static inline ret_type						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name)				\
+	{								\
+		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name); \
+	}
+
+#define __RTE_BIT_OVERLOAD_3R(fun, qualifier, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 32, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name)	\
+	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 64, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, size, arg1_type, arg1_name, \
+				arg2_type, arg2_name, arg3_type, arg3_name) \
+	static inline void						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name, arg3_type arg3_name)	\
+	{								\
+		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name,	\
+					  arg3_name);		      \
+	}
+
+#define __RTE_BIT_OVERLOAD_4(fun, qualifier, arg1_type, arg1_name, arg2_type, \
+			     arg2_name, arg3_type, arg3_name)		\
+	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 32, arg1_type, arg1_name, \
+				arg2_type, arg2_name, arg3_type, arg3_name) \
+	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 64, arg1_type, arg1_name, \
+				arg2_type, arg2_name, arg3_type, arg3_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, size, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name, arg3_type, \
+				 arg3_name)				\
+	static inline ret_type						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name, arg3_type arg3_name)	\
+	{								\
+		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name, \
+						 arg3_name);		\
+	}
+
+#define __RTE_BIT_OVERLOAD_4R(fun, qualifier, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name, arg3_type, arg3_name) \
+	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 32, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name, arg3_type, \
+				 arg3_name)				\
+	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 64, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name, arg3_type, \
+				 arg3_name)
+
 __RTE_BIT_OVERLOAD_2R(test, const, bool, unsigned int, nr)
 __RTE_BIT_OVERLOAD_2(set,, unsigned int, nr)
 __RTE_BIT_OVERLOAD_2(clear,, unsigned int, nr)
 __RTE_BIT_OVERLOAD_3(assign,, unsigned int, nr, bool, value)
 __RTE_BIT_OVERLOAD_2(flip,, unsigned int, nr)
 
+__RTE_BIT_OVERLOAD_3R(atomic_test, const, bool, unsigned int, nr,
+		      int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_set,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_clear,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_4(atomic_assign,, unsigned int, nr, bool, value,
+		     int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_flip,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3R(atomic_test_and_set,, bool, unsigned int, nr,
+		      int, memory_order)
+__RTE_BIT_OVERLOAD_3R(atomic_test_and_clear,, bool, unsigned int, nr,
+		      int, memory_order)
+__RTE_BIT_OVERLOAD_4R(atomic_test_and_assign,, bool, unsigned int, nr,
+		      bool, value, int, memory_order)
+
 #endif
 
 #endif /* _RTE_BITOPS_H_ */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v8 5/6] eal: add unit tests for atomic bit access functions
  2024-09-17 10:48           ` [PATCH v8 0/6] Improve EAL bit operations API Mattias Rönnblom
                               ` (3 preceding siblings ...)
  2024-09-17 10:48             ` [PATCH v8 4/6] eal: add atomic " Mattias Rönnblom
@ 2024-09-17 10:48             ` Mattias Rönnblom
  2024-09-17 10:48             ` [PATCH v8 6/6] eal: extend bitops to handle volatile pointers Mattias Rönnblom
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17 10:48 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Extend bitops tests to cover the rte_bit_atomic_*() family of
functions.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

RFC v4:
 * Add atomicity test for atomic bit flip.

RFC v3:
 * Rename variable 'main' to make ICC happy.
---
 app/test/test_bitops.c | 313 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 312 insertions(+), 1 deletion(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index 322f58c066..b80216a0a1 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -3,10 +3,13 @@
  * Copyright(c) 2024 Ericsson AB
  */
 
+#include <inttypes.h>
 #include <stdbool.h>
 
-#include <rte_launch.h>
 #include <rte_bitops.h>
+#include <rte_cycles.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
 #include <rte_random.h>
 #include "test.h"
 
@@ -61,6 +64,304 @@ GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
 GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
 		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
 
+#define bit_atomic_set(addr, nr)				\
+	rte_bit_atomic_set(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_clear(addr, nr)					\
+	rte_bit_atomic_clear(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_assign(addr, nr, value)				\
+	rte_bit_atomic_assign(addr, nr, value, rte_memory_order_relaxed)
+
+#define bit_atomic_flip(addr, nr)					\
+    rte_bit_atomic_flip(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_test(addr, nr)				\
+	rte_bit_atomic_test(addr, nr, rte_memory_order_relaxed)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_access32, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 32)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_access64, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 64)
+
+#define PARALLEL_TEST_RUNTIME 0.25
+
+#define GEN_TEST_BIT_PARALLEL_ASSIGN(size)				\
+									\
+	struct parallel_access_lcore ## size				\
+	{								\
+		unsigned int bit;					\
+		uint ## size ##_t *word;				\
+		bool failed;						\
+	};								\
+									\
+	static int							\
+	run_parallel_assign ## size(void *arg)				\
+	{								\
+		struct parallel_access_lcore ## size *lcore = arg;	\
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		bool value = false;					\
+									\
+		do {							\
+			bool new_value = rte_rand() & 1;		\
+			bool use_test_and_modify = rte_rand() & 1;	\
+			bool use_assign = rte_rand() & 1;		\
+									\
+			if (rte_bit_atomic_test(lcore->word, lcore->bit, \
+						rte_memory_order_relaxed) != value) { \
+				lcore->failed = true;			\
+				break;					\
+			}						\
+									\
+			if (use_test_and_modify) {			\
+				bool old_value;				\
+				if (use_assign) 			\
+					old_value = rte_bit_atomic_test_and_assign( \
+						lcore->word, lcore->bit, new_value, \
+						rte_memory_order_relaxed); \
+				else {					\
+					old_value = new_value ?		\
+						rte_bit_atomic_test_and_set( \
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed) : \
+						rte_bit_atomic_test_and_clear( \
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+				}					\
+				if (old_value != value) {		\
+					lcore->failed = true;		\
+					break;				\
+				}					\
+			} else {					\
+				if (use_assign)				\
+					rte_bit_atomic_assign(lcore->word, lcore->bit, \
+							      new_value, \
+							      rte_memory_order_relaxed); \
+				else {					\
+					if (new_value)			\
+						rte_bit_atomic_set(	\
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+					else				\
+						rte_bit_atomic_clear(	\
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+				}					\
+			}						\
+									\
+			value = new_value;				\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_assign ## size(void)			\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		struct parallel_access_lcore ## size lmain = {		\
+			.word = &word					\
+		};							\
+		struct parallel_access_lcore ## size lworker = {	\
+			.word = &word					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		lmain.bit = rte_rand_max(size);				\
+		do {							\
+			lworker.bit = rte_rand_max(size);		\
+		} while (lworker.bit == lmain.bit);			\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_assign ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_assign ## size(&lmain);			\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		TEST_ASSERT(!lmain.failed, "Main lcore atomic access failed"); \
+		TEST_ASSERT(!lworker.failed, "Worker lcore atomic access " \
+			    "failed");					\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_ASSIGN(32)
+GEN_TEST_BIT_PARALLEL_ASSIGN(64)
+
+#define GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(size)			\
+									\
+	struct parallel_test_and_set_lcore ## size			\
+	{								\
+		uint ## size ##_t *word;				\
+		unsigned int bit;					\
+		uint64_t flips;						\
+	};								\
+									\
+	static int							\
+	run_parallel_test_and_modify ## size(void *arg)		\
+	{								\
+		struct parallel_test_and_set_lcore ## size *lcore = arg; \
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		do {							\
+			bool old_value;					\
+			bool new_value = rte_rand() & 1;		\
+			bool use_assign = rte_rand() & 1;		\
+									\
+			if (use_assign)					\
+				old_value = rte_bit_atomic_test_and_assign( \
+					lcore->word, lcore->bit, new_value, \
+					rte_memory_order_relaxed);	\
+			else						\
+				old_value = new_value ?			\
+					rte_bit_atomic_test_and_set(	\
+						lcore->word, lcore->bit, \
+						rte_memory_order_relaxed) : \
+					rte_bit_atomic_test_and_clear(	\
+						lcore->word, lcore->bit, \
+						rte_memory_order_relaxed); \
+			if (old_value != new_value)			\
+				lcore->flips++;				\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_test_and_modify ## size(void)		\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		unsigned int bit = rte_rand_max(size);			\
+		struct parallel_test_and_set_lcore ## size lmain = {	\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+		struct parallel_test_and_set_lcore ## size lworker = {	\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_test_and_modify ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_test_and_modify ## size(&lmain);		\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		uint64_t total_flips = lmain.flips + lworker.flips;	\
+		bool expected_value = total_flips % 2;			\
+									\
+		TEST_ASSERT(expected_value == rte_bit_test(&word, bit), \
+			    "After %"PRId64" flips, the bit value "	\
+			    "should be %d", total_flips, expected_value); \
+									\
+		uint64_t expected_word = 0;				\
+		rte_bit_assign(&expected_word, bit, expected_value);	\
+									\
+		TEST_ASSERT(expected_word == word, "Untouched bits have " \
+			    "changed value");				\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(32)
+GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(64)
+
+#define GEN_TEST_BIT_PARALLEL_FLIP(size)				\
+									\
+	struct parallel_flip_lcore ## size				\
+	{								\
+		uint ## size ##_t *word;				\
+		unsigned int bit;					\
+		uint64_t flips;						\
+	};								\
+									\
+	static int							\
+	run_parallel_flip ## size(void *arg)				\
+	{								\
+		struct parallel_flip_lcore ## size *lcore = arg; \
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		do {							\
+			rte_bit_atomic_flip(lcore->word, lcore->bit,	\
+					    rte_memory_order_relaxed);	\
+			lcore->flips++;					\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_flip ## size(void)			\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		unsigned int bit = rte_rand_max(size);			\
+		struct parallel_flip_lcore ## size lmain = {		\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+		struct parallel_flip_lcore ## size lworker = {		\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_flip ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_flip ## size(&lmain);			\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		uint64_t total_flips = lmain.flips + lworker.flips;	\
+		bool expected_value = total_flips % 2;			\
+									\
+		TEST_ASSERT(expected_value == rte_bit_test(&word, bit), \
+			    "After %"PRId64" flips, the bit value "	\
+			    "should be %d", total_flips, expected_value); \
+									\
+		uint64_t expected_word = 0;				\
+		rte_bit_assign(&expected_word, bit, expected_value);	\
+									\
+		TEST_ASSERT(expected_word == word, "Untouched bits have " \
+			    "changed value");				\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_FLIP(32)
+GEN_TEST_BIT_PARALLEL_FLIP(64)
+
 static uint32_t val32;
 static uint64_t val64;
 
@@ -177,6 +478,16 @@ static struct unit_test_suite test_suite = {
 	.unit_test_cases = {
 		TEST_CASE(test_bit_access32),
 		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_access32),
+		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_atomic_access32),
+		TEST_CASE(test_bit_atomic_access64),
+		TEST_CASE(test_bit_atomic_parallel_assign32),
+		TEST_CASE(test_bit_atomic_parallel_assign64),
+		TEST_CASE(test_bit_atomic_parallel_test_and_modify32),
+		TEST_CASE(test_bit_atomic_parallel_test_and_modify64),
+		TEST_CASE(test_bit_atomic_parallel_flip32),
+		TEST_CASE(test_bit_atomic_parallel_flip64),
 		TEST_CASE(test_bit_relaxed_set),
 		TEST_CASE(test_bit_relaxed_clear),
 		TEST_CASE(test_bit_relaxed_test_set_clear),
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v8 6/6] eal: extend bitops to handle volatile pointers
  2024-09-17 10:48           ` [PATCH v8 0/6] Improve EAL bit operations API Mattias Rönnblom
                               ` (4 preceding siblings ...)
  2024-09-17 10:48             ` [PATCH v8 5/6] eal: add unit tests for atomic bit access functions Mattias Rönnblom
@ 2024-09-17 10:48             ` Mattias Rönnblom
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-17 10:48 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Have rte_bit_[test|set|clear|assign|flip]() and rte_bit_atomic_*()
handle volatile-marked pointers.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

PATCH v3:
 * Updated to reflect removed 'fun' parameter in __RTE_GEN_BIT_*()
   (Jack Bond-Preston).

PATCH v2:
 * Actually run the test_bit_atomic_v_access*() test functions.
---
 app/test/test_bitops.c       |  32 +++-
 lib/eal/include/rte_bitops.h | 301 +++++++++++++++++++++++------------
 2 files changed, 222 insertions(+), 111 deletions(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index b80216a0a1..10e87f6776 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -14,13 +14,13 @@
 #include "test.h"
 
 #define GEN_TEST_BIT_ACCESS(test_name, set_fun, clear_fun, assign_fun,	\
-			    flip_fun, test_fun, size)			\
+			    flip_fun, test_fun, size, mod)		\
 	static int							\
 	test_name(void)							\
 	{								\
 		uint ## size ## _t reference = (uint ## size ## _t)rte_rand(); \
 		unsigned int bit_nr;					\
-		uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
+		mod uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
 									\
 		for (bit_nr = 0; bit_nr < size; bit_nr++) {		\
 			bool reference_bit = (reference >> bit_nr) & 1;	\
@@ -41,7 +41,7 @@
 				    "Bit %d had unflipped value", bit_nr); \
 			flip_fun(&word, bit_nr);			\
 									\
-			const uint ## size ## _t *const_ptr = &word;	\
+			const mod uint ## size ## _t *const_ptr = &word; \
 			TEST_ASSERT(test_fun(const_ptr, bit_nr) ==	\
 				    reference_bit,			\
 				    "Bit %d had unexpected value", bit_nr); \
@@ -59,10 +59,16 @@
 	}
 
 GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
-		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32)
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32,)
 
 GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
-		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64,)
+
+GEN_TEST_BIT_ACCESS(test_bit_v_access32, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32, volatile)
+
+GEN_TEST_BIT_ACCESS(test_bit_v_access64, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64, volatile)
 
 #define bit_atomic_set(addr, nr)				\
 	rte_bit_atomic_set(addr, nr, rte_memory_order_relaxed)
@@ -81,11 +87,19 @@ GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
 
 GEN_TEST_BIT_ACCESS(test_bit_atomic_access32, bit_atomic_set,
 		    bit_atomic_clear, bit_atomic_assign,
-		    bit_atomic_flip, bit_atomic_test, 32)
+		    bit_atomic_flip, bit_atomic_test, 32,)
 
 GEN_TEST_BIT_ACCESS(test_bit_atomic_access64, bit_atomic_set,
 		    bit_atomic_clear, bit_atomic_assign,
-		    bit_atomic_flip, bit_atomic_test, 64)
+		    bit_atomic_flip, bit_atomic_test, 64,)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_v_access32, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 32, volatile)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_v_access64, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 64, volatile)
 
 #define PARALLEL_TEST_RUNTIME 0.25
 
@@ -480,8 +494,12 @@ static struct unit_test_suite test_suite = {
 		TEST_CASE(test_bit_access64),
 		TEST_CASE(test_bit_access32),
 		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_v_access32),
+		TEST_CASE(test_bit_v_access64),
 		TEST_CASE(test_bit_atomic_access32),
 		TEST_CASE(test_bit_atomic_access64),
+		TEST_CASE(test_bit_atomic_v_access32),
+		TEST_CASE(test_bit_atomic_v_access64),
 		TEST_CASE(test_bit_atomic_parallel_assign32),
 		TEST_CASE(test_bit_atomic_parallel_assign64),
 		TEST_CASE(test_bit_atomic_parallel_test_and_modify32),
diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 3ad6795fd1..d7a07c4099 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -127,12 +127,16 @@ extern "C" {
  * @param nr
  *   The index of the bit.
  */
-#define rte_bit_test(addr, nr)					\
-	_Generic((addr),					\
-		uint32_t *: __rte_bit_test32,			\
-		const uint32_t *: __rte_bit_test32,		\
-		uint64_t *: __rte_bit_test64,			\
-		const uint64_t *: __rte_bit_test64)(addr, nr)
+#define rte_bit_test(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_test32,				\
+		 const uint32_t *: __rte_bit_test32,			\
+		 volatile uint32_t *: __rte_bit_v_test32,		\
+		 const volatile uint32_t *: __rte_bit_v_test32,		\
+		 uint64_t *: __rte_bit_test64,				\
+		 const uint64_t *: __rte_bit_test64,			\
+		 volatile uint64_t *: __rte_bit_v_test64,		\
+		 const volatile uint64_t *: __rte_bit_v_test64)(addr, nr)
 
 /**
  * @warning
@@ -152,10 +156,12 @@ extern "C" {
  * @param nr
  *   The index of the bit.
  */
-#define rte_bit_set(addr, nr)				\
-	_Generic((addr),				\
-		 uint32_t *: __rte_bit_set32,		\
-		 uint64_t *: __rte_bit_set64)(addr, nr)
+#define rte_bit_set(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_set32,				\
+		 volatile uint32_t *: __rte_bit_v_set32,		\
+		 uint64_t *: __rte_bit_set64,				\
+		 volatile uint64_t *: __rte_bit_v_set64)(addr, nr)
 
 /**
  * @warning
@@ -175,10 +181,12 @@ extern "C" {
  * @param nr
  *   The index of the bit.
  */
-#define rte_bit_clear(addr, nr)					\
-	_Generic((addr),					\
-		 uint32_t *: __rte_bit_clear32,			\
-		 uint64_t *: __rte_bit_clear64)(addr, nr)
+#define rte_bit_clear(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_clear32,				\
+		 volatile uint32_t *: __rte_bit_v_clear32,		\
+		 uint64_t *: __rte_bit_clear64,				\
+		 volatile uint64_t *: __rte_bit_v_clear64)(addr, nr)
 
 /**
  * @warning
@@ -202,7 +210,9 @@ extern "C" {
 #define rte_bit_assign(addr, nr, value)					\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_assign32,			\
-		 uint64_t *: __rte_bit_assign64)(addr, nr, value)
+		 volatile uint32_t *: __rte_bit_v_assign32,		\
+		 uint64_t *: __rte_bit_assign64,			\
+		 volatile uint64_t *: __rte_bit_v_assign64)(addr, nr, value)
 
 /**
  * @warning
@@ -225,7 +235,9 @@ extern "C" {
 #define rte_bit_flip(addr, nr)						\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_flip32,				\
-		 uint64_t *: __rte_bit_flip64)(addr, nr)
+		 volatile uint32_t *: __rte_bit_v_flip32,		\
+		 uint64_t *: __rte_bit_flip64,				\
+		 volatile uint64_t *: __rte_bit_v_flip64)(addr, nr)
 
 /**
  * @warning
@@ -250,9 +262,13 @@ extern "C" {
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test32,			\
 		 const uint32_t *: __rte_bit_atomic_test32,		\
+		 volatile uint32_t *: __rte_bit_atomic_v_test32,	\
+		 const volatile uint32_t *: __rte_bit_atomic_v_test32,	\
 		 uint64_t *: __rte_bit_atomic_test64,			\
-		 const uint64_t *: __rte_bit_atomic_test64)(addr, nr,	\
-							    memory_order)
+		 const uint64_t *: __rte_bit_atomic_test64,		\
+		 volatile uint64_t *: __rte_bit_atomic_v_test64,	\
+		 const volatile uint64_t *: __rte_bit_atomic_v_test64) \
+						    (addr, nr, memory_order)
 
 /**
  * @warning
@@ -274,7 +290,10 @@ extern "C" {
 #define rte_bit_atomic_set(addr, nr, memory_order)			\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_set32,			\
-		 uint64_t *: __rte_bit_atomic_set64)(addr, nr, memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_set32,		\
+		 uint64_t *: __rte_bit_atomic_set64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_set64)(addr, nr, \
+								memory_order)
 
 /**
  * @warning
@@ -296,7 +315,10 @@ extern "C" {
 #define rte_bit_atomic_clear(addr, nr, memory_order)			\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_clear32,			\
-		 uint64_t *: __rte_bit_atomic_clear64)(addr, nr, memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_clear32,	\
+		 uint64_t *: __rte_bit_atomic_clear64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_clear64)(addr, nr, \
+								  memory_order)
 
 /**
  * @warning
@@ -320,8 +342,11 @@ extern "C" {
 #define rte_bit_atomic_assign(addr, nr, value, memory_order)		\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_assign32,			\
-		 uint64_t *: __rte_bit_atomic_assign64)(addr, nr, value, \
-							memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_assign32,	\
+		 uint64_t *: __rte_bit_atomic_assign64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_assign64)(addr, nr, \
+								   value, \
+								   memory_order)
 
 /**
  * @warning
@@ -344,7 +369,10 @@ extern "C" {
 #define rte_bit_atomic_flip(addr, nr, memory_order)			\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_flip32,			\
-		 uint64_t *: __rte_bit_atomic_flip64)(addr, nr, memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_flip32,	\
+		 uint64_t *: __rte_bit_atomic_flip64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_flip64)(addr, nr, \
+								 memory_order)
 
 /**
  * @warning
@@ -368,8 +396,10 @@ extern "C" {
 #define rte_bit_atomic_test_and_set(addr, nr, memory_order)		\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test_and_set32,		\
-		 uint64_t *: __rte_bit_atomic_test_and_set64)(addr, nr,	\
-							      memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_test_and_set32, \
+		 uint64_t *: __rte_bit_atomic_test_and_set64,		\
+		 volatile uint64_t *: __rte_bit_atomic_v_test_and_set64) \
+						    (addr, nr, memory_order)
 
 /**
  * @warning
@@ -393,8 +423,10 @@ extern "C" {
 #define rte_bit_atomic_test_and_clear(addr, nr, memory_order)		\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test_and_clear32,		\
-		 uint64_t *: __rte_bit_atomic_test_and_clear64)(addr, nr, \
-								memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_test_and_clear32, \
+		 uint64_t *: __rte_bit_atomic_test_and_clear64,		\
+		 volatile uint64_t *: __rte_bit_atomic_v_test_and_clear64) \
+						       (addr, nr, memory_order)
 
 /**
  * @warning
@@ -421,9 +453,10 @@ extern "C" {
 #define rte_bit_atomic_test_and_assign(addr, nr, value, memory_order)	\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test_and_assign32,	\
-		 uint64_t *: __rte_bit_atomic_test_and_assign64)(addr, nr, \
-								 value, \
-								 memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_test_and_assign32, \
+		 uint64_t *: __rte_bit_atomic_test_and_assign64,	\
+		 volatile uint64_t *: __rte_bit_atomic_v_test_and_assign64) \
+						(addr, nr, value, memory_order)
 
 #define __RTE_GEN_BIT_TEST(variant, qualifier, size)			\
 	__rte_experimental						\
@@ -493,7 +526,8 @@ extern "C" {
 	__RTE_GEN_BIT_FLIP(v, qualifier, size)
 
 #define __RTE_GEN_BIT_OPS_SIZE(size) \
-	__RTE_GEN_BIT_OPS(,, size)
+	__RTE_GEN_BIT_OPS(,, size) \
+	__RTE_GEN_BIT_OPS(v_, volatile, size)
 
 __RTE_GEN_BIT_OPS_SIZE(32)
 __RTE_GEN_BIT_OPS_SIZE(64)
@@ -633,7 +667,8 @@ __RTE_GEN_BIT_OPS_SIZE(64)
 	__RTE_GEN_BIT_ATOMIC_FLIP(variant, qualifier, size)
 
 #define __RTE_GEN_BIT_ATOMIC_OPS_SIZE(size) \
-	__RTE_GEN_BIT_ATOMIC_OPS(,, size)
+	__RTE_GEN_BIT_ATOMIC_OPS(,, size) \
+	__RTE_GEN_BIT_ATOMIC_OPS(v_, volatile, size)
 
 __RTE_GEN_BIT_ATOMIC_OPS_SIZE(32)
 __RTE_GEN_BIT_ATOMIC_OPS_SIZE(64)
@@ -1342,120 +1377,178 @@ rte_log2_u64(uint64_t v)
 #undef rte_bit_atomic_test_and_clear
 #undef rte_bit_atomic_test_and_assign
 
-#define __RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, size, arg1_type, arg1_name) \
+#define __RTE_BIT_OVERLOAD_V_2(family, v, fun, c, size, arg1_type, arg1_name) \
 	static inline void						\
-	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
-			arg1_type arg1_name)				\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name)			\
 	{								\
-		__rte_bit_ ## fun ## size(addr, arg1_name);		\
+		__rte_bit_ ## family ## v ## fun ## size(addr, arg1_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_2(fun, qualifier, arg1_type, arg1_name)	\
-	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 32, arg1_type, arg1_name) \
-	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 64, arg1_type, arg1_name)
+#define __RTE_BIT_OVERLOAD_SZ_2(family, fun, c, size, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_V_2(family,, fun, c, size, arg1_type,	\
+			       arg1_name)				\
+	__RTE_BIT_OVERLOAD_V_2(family, v_, fun, c volatile, size, \
+			       arg1_type, arg1_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, size, ret_type, arg1_type, \
-				 arg1_name)				\
+#define __RTE_BIT_OVERLOAD_2(family, fun, c, arg1_type, arg1_name)	\
+	__RTE_BIT_OVERLOAD_SZ_2(family, fun, c, 32, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2(family, fun, c, 64, arg1_type, arg1_name)
+
+#define __RTE_BIT_OVERLOAD_V_2R(family, v, fun, c, size, ret_type, arg1_type, \
+				arg1_name)				\
 	static inline ret_type						\
-	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
 			arg1_type arg1_name)				\
 	{								\
-		return __rte_bit_ ## fun ## size(addr, arg1_name);	\
+		return __rte_bit_ ## family ## v ## fun ## size(addr,	\
+								arg1_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_2R(fun, qualifier, ret_type, arg1_type, arg1_name) \
-	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 32, ret_type, arg1_type, \
+#define __RTE_BIT_OVERLOAD_SZ_2R(family, fun, c, size, ret_type, arg1_type, \
+				 arg1_name)				\
+	__RTE_BIT_OVERLOAD_V_2R(family,, fun, c, size, ret_type, arg1_type, \
+				arg1_name)				\
+	__RTE_BIT_OVERLOAD_V_2R(family, v_, fun, c volatile,		\
+				size, ret_type, arg1_type, arg1_name)
+
+#define __RTE_BIT_OVERLOAD_2R(family, fun, c, ret_type, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2R(family, fun, c, 32, ret_type, arg1_type, \
 				 arg1_name)				\
-	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 64, ret_type, arg1_type, \
+	__RTE_BIT_OVERLOAD_SZ_2R(family, fun, c, 64, ret_type, arg1_type, \
 				 arg1_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, size, arg1_type, arg1_name, \
-				arg2_type, arg2_name)			\
+#define __RTE_BIT_OVERLOAD_V_3(family, v, fun, c, size, arg1_type, arg1_name, \
+			       arg2_type, arg2_name)			\
 	static inline void						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name)				\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name) \
 	{								\
-		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name);	\
+		__rte_bit_ ## family ## v ## fun ## size(addr, arg1_name, \
+							 arg2_name);	\
 	}
 
-#define __RTE_BIT_OVERLOAD_3(fun, qualifier, arg1_type, arg1_name, arg2_type, \
+#define __RTE_BIT_OVERLOAD_SZ_3(family, fun, c, size, arg1_type, arg1_name, \
+				arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_V_3(family,, fun, c, size, arg1_type, arg1_name, \
+			       arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_V_3(family, v_, fun, c volatile, size, arg1_type, \
+			       arg1_name, arg2_type, arg2_name)
+
+#define __RTE_BIT_OVERLOAD_3(family, fun, c, arg1_type, arg1_name, arg2_type, \
 			     arg2_name)					\
-	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 32, arg1_type, arg1_name, \
+	__RTE_BIT_OVERLOAD_SZ_3(family, fun, c, 32, arg1_type, arg1_name, \
 				arg2_type, arg2_name)			\
-	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 64, arg1_type, arg1_name, \
+	__RTE_BIT_OVERLOAD_SZ_3(family, fun, c, 64, arg1_type, arg1_name, \
 				arg2_type, arg2_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, size, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name)	\
+#define __RTE_BIT_OVERLOAD_V_3R(family, v, fun, c, size, ret_type, arg1_type, \
+				arg1_name, arg2_type, arg2_name)	\
 	static inline ret_type						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name)				\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name) \
 	{								\
-		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name); \
+		return __rte_bit_ ## family ## v ## fun ## size(addr,	\
+								arg1_name, \
+								arg2_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_3R(fun, qualifier, ret_type, arg1_type, arg1_name, \
-			      arg2_type, arg2_name)			\
-	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 32, ret_type, arg1_type, \
+#define __RTE_BIT_OVERLOAD_SZ_3R(family, fun, c, size, ret_type, arg1_type, \
 				 arg1_name, arg2_type, arg2_name)	\
-	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 64, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name)
+	__RTE_BIT_OVERLOAD_V_3R(family,, fun, c, size, ret_type, \
+				arg1_type, arg1_name, arg2_type, arg2_name) \
+	__RTE_BIT_OVERLOAD_V_3R(family, v_, fun, c volatile, size, \
+				ret_type, arg1_type, arg1_name, arg2_type, \
+				arg2_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, size, arg1_type, arg1_name, \
-				arg2_type, arg2_name, arg3_type, arg3_name) \
+#define __RTE_BIT_OVERLOAD_3R(family, fun, c, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_SZ_3R(family, fun, c, 32, ret_type,		\
+				 arg1_type, arg1_name, arg2_type, arg2_name) \
+	__RTE_BIT_OVERLOAD_SZ_3R(family, fun, c, 64, ret_type, \
+				 arg1_type, arg1_name, arg2_type, arg2_name)
+
+#define __RTE_BIT_OVERLOAD_V_4(family, v, fun, c, size, arg1_type, arg1_name, \
+			       arg2_type, arg2_name, arg3_type,	arg3_name) \
 	static inline void						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name, arg3_type arg3_name)	\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name, \
+				  arg3_type arg3_name)			\
 	{								\
-		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name,	\
-					  arg3_name);		      \
+		__rte_bit_ ## family ## v ## fun ## size(addr, arg1_name, \
+							 arg2_name,	\
+							 arg3_name);	\
 	}
 
-#define __RTE_BIT_OVERLOAD_4(fun, qualifier, arg1_type, arg1_name, arg2_type, \
-			     arg2_name, arg3_type, arg3_name)		\
-	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 32, arg1_type, arg1_name, \
+#define __RTE_BIT_OVERLOAD_SZ_4(family, fun, c, size, arg1_type, arg1_name, \
 				arg2_type, arg2_name, arg3_type, arg3_name) \
-	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 64, arg1_type, arg1_name, \
-				arg2_type, arg2_name, arg3_type, arg3_name)
-
-#define __RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, size, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name, arg3_type, \
-				 arg3_name)				\
+	__RTE_BIT_OVERLOAD_V_4(family,, fun, c, size, arg1_type,	\
+			       arg1_name, arg2_type, arg2_name, arg3_type, \
+			       arg3_name)				\
+	__RTE_BIT_OVERLOAD_V_4(family, v_, fun, c volatile, size,	\
+			       arg1_type, arg1_name, arg2_type, arg2_name, \
+			       arg3_type, arg3_name)
+
+#define __RTE_BIT_OVERLOAD_4(family, fun, c, arg1_type, arg1_name, arg2_type, \
+			     arg2_name, arg3_type, arg3_name)		\
+	__RTE_BIT_OVERLOAD_SZ_4(family, fun, c, 32, arg1_type,		\
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)				\
+	__RTE_BIT_OVERLOAD_SZ_4(family, fun, c, 64, arg1_type,		\
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)
+
+#define __RTE_BIT_OVERLOAD_V_4R(family, v, fun, c, size, ret_type, arg1_type, \
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)				\
 	static inline ret_type						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name, arg3_type arg3_name)	\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name, \
+				  arg3_type arg3_name)			\
 	{								\
-		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name, \
-						 arg3_name);		\
+		return __rte_bit_ ## family ## v ## fun ## size(addr,	\
+								arg1_name, \
+								arg2_name, \
+								arg3_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_4R(fun, qualifier, ret_type, arg1_type, arg1_name, \
-			      arg2_type, arg2_name, arg3_type, arg3_name) \
-	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 32, ret_type, arg1_type, \
+#define __RTE_BIT_OVERLOAD_SZ_4R(family, fun, c, size, ret_type, arg1_type, \
 				 arg1_name, arg2_type, arg2_name, arg3_type, \
 				 arg3_name)				\
-	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 64, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name, arg3_type, \
-				 arg3_name)
-
-__RTE_BIT_OVERLOAD_2R(test, const, bool, unsigned int, nr)
-__RTE_BIT_OVERLOAD_2(set,, unsigned int, nr)
-__RTE_BIT_OVERLOAD_2(clear,, unsigned int, nr)
-__RTE_BIT_OVERLOAD_3(assign,, unsigned int, nr, bool, value)
-__RTE_BIT_OVERLOAD_2(flip,, unsigned int, nr)
-
-__RTE_BIT_OVERLOAD_3R(atomic_test, const, bool, unsigned int, nr,
+	__RTE_BIT_OVERLOAD_V_4R(family,, fun, c, size, ret_type, arg1_type, \
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)				\
+	__RTE_BIT_OVERLOAD_V_4R(family, v_, fun, c volatile, size,	\
+				ret_type, arg1_type, arg1_name, arg2_type, \
+				arg2_name, arg3_type, arg3_name)
+
+#define __RTE_BIT_OVERLOAD_4R(family, fun, c, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name, arg3_type, arg3_name) \
+	__RTE_BIT_OVERLOAD_SZ_4R(family, fun, c, 32, ret_type,		\
+				 arg1_type, arg1_name, arg2_type, arg2_name, \
+				 arg3_type, arg3_name)			\
+	__RTE_BIT_OVERLOAD_SZ_4R(family, fun, c, 64, ret_type,		\
+				 arg1_type, arg1_name, arg2_type, arg2_name, \
+				 arg3_type, arg3_name)
+
+__RTE_BIT_OVERLOAD_2R(, test, const, bool, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(, set,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(, clear,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_3(, assign,, unsigned int, nr, bool, value)
+__RTE_BIT_OVERLOAD_2(, flip,, unsigned int, nr)
+
+__RTE_BIT_OVERLOAD_3R(atomic_, test, const, bool, unsigned int, nr,
 		      int, memory_order)
-__RTE_BIT_OVERLOAD_3(atomic_set,, unsigned int, nr, int, memory_order)
-__RTE_BIT_OVERLOAD_3(atomic_clear,, unsigned int, nr, int, memory_order)
-__RTE_BIT_OVERLOAD_4(atomic_assign,, unsigned int, nr, bool, value,
+__RTE_BIT_OVERLOAD_3(atomic_, set,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_, clear,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_4(atomic_, assign,, unsigned int, nr, bool, value,
 		     int, memory_order)
-__RTE_BIT_OVERLOAD_3(atomic_flip,, unsigned int, nr, int, memory_order)
-__RTE_BIT_OVERLOAD_3R(atomic_test_and_set,, bool, unsigned int, nr,
+__RTE_BIT_OVERLOAD_3(atomic_, flip,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3R(atomic_, test_and_set,, bool, unsigned int, nr,
 		      int, memory_order)
-__RTE_BIT_OVERLOAD_3R(atomic_test_and_clear,, bool, unsigned int, nr,
+__RTE_BIT_OVERLOAD_3R(atomic_, test_and_clear,, bool, unsigned int, nr,
 		      int, memory_order)
-__RTE_BIT_OVERLOAD_4R(atomic_test_and_assign,, bool, unsigned int, nr,
+__RTE_BIT_OVERLOAD_4R(atomic_, test_and_assign,, bool, unsigned int, nr,
 		      bool, value, int, memory_order)
 
 #endif
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v9 0/6] Improve EAL bit operations API
  2024-09-17 10:48             ` [PATCH v8 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
@ 2024-09-18  9:04               ` Mattias Rönnblom
  2024-09-18  9:04                 ` [PATCH v9 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
                                   ` (5 more replies)
  0 siblings, 6 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-18  9:04 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

This patch set represent an attempt to improve and extend the RTE
bitops API, in particular for functions that operate on individual
bits.

All new functionality is exposed to the user as generic selection
macros, delegating the actual work to private (__-marked) static
inline functions. Public functions (e.g., rte_bit_set32()) would just
be bloating the API. Such generic selection macros will here be
referred to as "functions", although technically they are not.

The legacy <rte_bitops.h> rte_bit_relaxed_*() functions is replaced
with two new families:

rte_bit_[test|set|clear|assign|flip]() which provides no memory
ordering or atomicity guarantees, but does provide the best
performance. The performance degradation resulting from the use of
volatile (e.g., forcing loads and stores to actually occur and in the
number specified) and atomic (e.g., LOCK-prefixed instructions on x86)
may be significant. rte_bit_[test|set|clear|assign|flip]() may be
used with volatile word pointers, in which case they guarantee
that the program-level accesses actually occur.

rte_bit_atomic_*() which provides atomic bit-level operations,
including the possibility to specifying memory ordering constraints
(or the lack thereof).

The atomic functions take non-_Atomic pointers, to be flexible, just
like the GCC builtins and default <rte_stdatomic.h>. The issue with
_Atomic APIs is that it may well be the case that the user wants to
perform both non-atomic and atomic operations on the same word.

Having _Atomic-marked addresses would complicate supporting atomic
bit-level operations in the bitset API (proposed in a different RFC
patchset), and potentially other APIs depending on RTE bitops for
atomic bit-level ops). Either one needs two bitset variants, one
_Atomic bitset and one non-atomic one, or the bitset code needs to
cast the non-_Atomic pointer to an _Atomic one. Having a separate
_Atomic bitset would be bloat and also prevent the user from both, in
some situations, doing atomic operations against a bit set, while in
other situations (e.g., at times when MT safety is not a concern)
operating on the same objects in a non-atomic manner.

Unlike rte_bit_relaxed_*(), individual bits are represented by bool,
not uint32_t or uint64_t. The author found the use of such large types
confusing, and also failed to see any performance benefits.

A set of functions rte_bit_*_assign() are added, to assign a
particular boolean value to a particular bit.

All new functions have properly documented semantics.

All new functions operate on both 32 and 64-bit words, with type
checking.

_Generic allow the user code to be a little more impact. Have a
type-generic atomic test/set/clear/assign bit API also seems
consistent with the "core" (word-size) atomics API, which is generic
(both GCC builtins and <rte_stdatomic.h> are).

The _Generic versions avoids having explicit unsigned long versions of
all functions. If you have an unsigned long, it's safe to use the
generic version (e.g., rte_set_bit()) and _Generic will pick the right
function, provided long is either 32 or 64 bit on your platform (which
it is on all DPDK-supported ABIs).

The generic rte_bit_set() is a macro, and not a function, but
nevertheless has been given a lower-case name. That's how C11 does it
(for atomics, and other _Generic), and <rte_stdatomic.h>. Its address
can't be taken, but it does not evaluate its parameters more than
once.

C++ doesn't support generic selection. In C++ translation units the
_Generic macros are replaced with overloaded functions, implemented by
means of a huge, complicated C macro mess.

Mattias Rönnblom (6):
  dpdk: do not force C linkage on include file dependencies
  eal: extend bit manipulation functionality
  eal: add unit tests for bit operations
  eal: add atomic bit operations
  eal: add unit tests for atomic bit access functions
  eal: extend bitops to handle volatile pointers

 app/test/packet_burst_generator.h             |   8 +-
 app/test/test_bitops.c                        | 416 +++++++++-
 app/test/virtual_pmd.h                        |   4 +-
 doc/guides/rel_notes/release_24_11.rst        |  17 +
 drivers/bus/auxiliary/bus_auxiliary_driver.h  |   8 +-
 drivers/bus/cdx/bus_cdx_driver.h              |   8 +-
 drivers/bus/dpaa/include/fsl_qman.h           |   8 +-
 drivers/bus/fslmc/bus_fslmc_driver.h          |   8 +-
 drivers/bus/pci/bus_pci_driver.h              |   8 +-
 drivers/bus/pci/rte_bus_pci.h                 |   8 +-
 drivers/bus/platform/bus_platform_driver.h    |   8 +-
 drivers/bus/vdev/bus_vdev_driver.h            |   8 +-
 drivers/bus/vmbus/bus_vmbus_driver.h          |   8 +-
 drivers/bus/vmbus/rte_bus_vmbus.h             |   8 +-
 drivers/dma/cnxk/cnxk_dma_event_dp.h          |   8 +-
 drivers/dma/ioat/ioat_hw_defs.h               |   4 +-
 drivers/event/dlb2/rte_pmd_dlb2.h             |   8 +-
 drivers/mempool/dpaa2/rte_dpaa2_mempool.h     |   6 +-
 drivers/net/avp/rte_avp_fifo.h                |   8 +-
 drivers/net/bonding/rte_eth_bond.h            |   4 +-
 drivers/net/i40e/rte_pmd_i40e.h               |   8 +-
 drivers/net/mlx5/mlx5_trace.h                 |   8 +-
 drivers/net/ring/rte_eth_ring.h               |   4 +-
 drivers/net/vhost/rte_eth_vhost.h             |   8 +-
 drivers/raw/ifpga/afu_pmd_core.h              |   8 +-
 drivers/raw/ifpga/afu_pmd_he_hssi.h           |   6 +-
 drivers/raw/ifpga/afu_pmd_he_lpbk.h           |   6 +-
 drivers/raw/ifpga/afu_pmd_he_mem.h            |   6 +-
 drivers/raw/ifpga/afu_pmd_n3000.h             |   6 +-
 drivers/raw/ifpga/rte_pmd_afu.h               |   4 +-
 drivers/raw/ifpga/rte_pmd_ifpga.h             |   4 +-
 examples/ethtool/lib/rte_ethtool.h            |   8 +-
 examples/qos_sched/main.h                     |   4 +-
 examples/vm_power_manager/channel_manager.h   |   8 +-
 lib/acl/rte_acl_osdep.h                       |   8 +-
 lib/bbdev/rte_bbdev.h                         |   8 +-
 lib/bbdev/rte_bbdev_op.h                      |   8 +-
 lib/bbdev/rte_bbdev_pmd.h                     |   8 +-
 lib/bpf/bpf_def.h                             |   8 +-
 lib/compressdev/rte_comp.h                    |   4 +-
 lib/compressdev/rte_compressdev.h             |   6 +-
 lib/compressdev/rte_compressdev_internal.h    |   8 +-
 lib/compressdev/rte_compressdev_pmd.h         |   8 +-
 lib/cryptodev/cryptodev_pmd.h                 |   8 +-
 lib/cryptodev/cryptodev_trace.h               |   8 +-
 lib/cryptodev/rte_crypto.h                    |   8 +-
 lib/cryptodev/rte_crypto_asym.h               |   8 +-
 lib/cryptodev/rte_crypto_sym.h                |   8 +-
 lib/cryptodev/rte_cryptodev.h                 |   8 +-
 lib/cryptodev/rte_cryptodev_trace_fp.h        |   4 +-
 lib/dispatcher/rte_dispatcher.h               |   8 +-
 lib/dmadev/rte_dmadev.h                       |   8 +
 lib/eal/arm/include/rte_atomic_32.h           |   4 +-
 lib/eal/arm/include/rte_atomic_64.h           |   8 +-
 lib/eal/arm/include/rte_byteorder.h           |   8 +-
 lib/eal/arm/include/rte_cpuflags_32.h         |   8 +-
 lib/eal/arm/include/rte_cpuflags_64.h         |   8 +-
 lib/eal/arm/include/rte_cycles_32.h           |   4 +-
 lib/eal/arm/include/rte_cycles_64.h           |   4 +-
 lib/eal/arm/include/rte_io.h                  |   8 +-
 lib/eal/arm/include/rte_io_64.h               |   8 +-
 lib/eal/arm/include/rte_memcpy_32.h           |   8 +-
 lib/eal/arm/include/rte_memcpy_64.h           |  23 +-
 lib/eal/arm/include/rte_pause.h               |   8 +-
 lib/eal/arm/include/rte_pause_32.h            |   6 +-
 lib/eal/arm/include/rte_pause_64.h            |   8 +-
 lib/eal/arm/include/rte_power_intrinsics.h    |   8 +-
 lib/eal/arm/include/rte_prefetch_32.h         |   8 +-
 lib/eal/arm/include/rte_prefetch_64.h         |   8 +-
 lib/eal/arm/include/rte_rwlock.h              |   4 +-
 lib/eal/arm/include/rte_spinlock.h            |   6 +-
 lib/eal/freebsd/include/rte_os.h              |   8 +-
 lib/eal/include/bus_driver.h                  |   8 +-
 lib/eal/include/dev_driver.h                  |   6 +-
 lib/eal/include/eal_trace_internal.h          |   8 +-
 lib/eal/include/generic/rte_atomic.h          |   8 +
 lib/eal/include/generic/rte_byteorder.h       |   8 +
 lib/eal/include/generic/rte_cpuflags.h        |   8 +
 lib/eal/include/generic/rte_cycles.h          |   8 +
 lib/eal/include/generic/rte_io.h              |   8 +
 lib/eal/include/generic/rte_memcpy.h          |   8 +
 lib/eal/include/generic/rte_pause.h           |   8 +
 .../include/generic/rte_power_intrinsics.h    |   8 +
 lib/eal/include/generic/rte_prefetch.h        |   8 +
 lib/eal/include/generic/rte_rwlock.h          |   8 +-
 lib/eal/include/generic/rte_spinlock.h        |   8 +
 lib/eal/include/generic/rte_vect.h            |   8 +
 lib/eal/include/rte_alarm.h                   |   4 +-
 lib/eal/include/rte_bitmap.h                  |   8 +-
 lib/eal/include/rte_bitops.h                  | 768 +++++++++++++++++-
 lib/eal/include/rte_bus.h                     |   8 +-
 lib/eal/include/rte_class.h                   |   4 +-
 lib/eal/include/rte_common.h                  |   8 +-
 lib/eal/include/rte_dev.h                     |   8 +-
 lib/eal/include/rte_devargs.h                 |   8 +-
 lib/eal/include/rte_eal_trace.h               |   4 +-
 lib/eal/include/rte_errno.h                   |   4 +-
 lib/eal/include/rte_fbarray.h                 |   8 +-
 lib/eal/include/rte_keepalive.h               |   6 +-
 lib/eal/include/rte_mcslock.h                 |   8 +-
 lib/eal/include/rte_memory.h                  |   8 +-
 lib/eal/include/rte_pci_dev_features.h        |   4 +-
 lib/eal/include/rte_pflock.h                  |   8 +-
 lib/eal/include/rte_random.h                  |   4 +-
 lib/eal/include/rte_seqcount.h                |   8 +-
 lib/eal/include/rte_seqlock.h                 |   8 +-
 lib/eal/include/rte_service.h                 |   8 +-
 lib/eal/include/rte_service_component.h       |   4 +-
 lib/eal/include/rte_stdatomic.h               |   5 +-
 lib/eal/include/rte_string_fns.h              |  17 +-
 lib/eal/include/rte_tailq.h                   |   6 +-
 lib/eal/include/rte_ticketlock.h              |   8 +-
 lib/eal/include/rte_time.h                    |   6 +-
 lib/eal/include/rte_trace.h                   |   8 +-
 lib/eal/include/rte_trace_point.h             |   8 +-
 lib/eal/include/rte_trace_point_register.h    |   8 +-
 lib/eal/include/rte_uuid.h                    |   8 +-
 lib/eal/include/rte_version.h                 |   6 +-
 lib/eal/include/rte_vfio.h                    |   8 +-
 lib/eal/linux/include/rte_os.h                |   8 +-
 lib/eal/loongarch/include/rte_atomic.h        |   6 +-
 lib/eal/loongarch/include/rte_byteorder.h     |   4 +-
 lib/eal/loongarch/include/rte_cpuflags.h      |   8 +-
 lib/eal/loongarch/include/rte_cycles.h        |   4 +-
 lib/eal/loongarch/include/rte_io.h            |   4 +-
 lib/eal/loongarch/include/rte_memcpy.h        |   4 +-
 lib/eal/loongarch/include/rte_pause.h         |   8 +-
 .../loongarch/include/rte_power_intrinsics.h  |   8 +-
 lib/eal/loongarch/include/rte_prefetch.h      |   8 +-
 lib/eal/loongarch/include/rte_rwlock.h        |   4 +-
 lib/eal/loongarch/include/rte_spinlock.h      |   6 +-
 lib/eal/ppc/include/rte_atomic.h              |   6 +-
 lib/eal/ppc/include/rte_byteorder.h           |   6 +-
 lib/eal/ppc/include/rte_cpuflags.h            |   8 +-
 lib/eal/ppc/include/rte_cycles.h              |   8 +-
 lib/eal/ppc/include/rte_io.h                  |   4 +-
 lib/eal/ppc/include/rte_memcpy.h              |   4 +-
 lib/eal/ppc/include/rte_pause.h               |   8 +-
 lib/eal/ppc/include/rte_power_intrinsics.h    |   8 +-
 lib/eal/ppc/include/rte_prefetch.h            |   8 +-
 lib/eal/ppc/include/rte_rwlock.h              |   4 +-
 lib/eal/ppc/include/rte_spinlock.h            |   8 +-
 lib/eal/riscv/include/rte_atomic.h            |   8 +-
 lib/eal/riscv/include/rte_byteorder.h         |   8 +-
 lib/eal/riscv/include/rte_cpuflags.h          |   8 +-
 lib/eal/riscv/include/rte_cycles.h            |   4 +-
 lib/eal/riscv/include/rte_io.h                |   4 +-
 lib/eal/riscv/include/rte_memcpy.h            |   4 +-
 lib/eal/riscv/include/rte_pause.h             |   8 +-
 lib/eal/riscv/include/rte_power_intrinsics.h  |   8 +-
 lib/eal/riscv/include/rte_prefetch.h          |   8 +-
 lib/eal/riscv/include/rte_rwlock.h            |   4 +-
 lib/eal/riscv/include/rte_spinlock.h          |   6 +-
 lib/eal/windows/include/pthread.h             |   6 +-
 lib/eal/windows/include/regex.h               |   8 +-
 lib/eal/windows/include/rte_windows.h         |   8 +-
 lib/eal/x86/include/rte_atomic.h              |  25 +-
 lib/eal/x86/include/rte_byteorder.h           |  16 +-
 lib/eal/x86/include/rte_cpuflags.h            |   8 +-
 lib/eal/x86/include/rte_cycles.h              |   8 +-
 lib/eal/x86/include/rte_io.h                  |   8 +-
 lib/eal/x86/include/rte_pause.h               |   7 +-
 lib/eal/x86/include/rte_power_intrinsics.h    |   8 +-
 lib/eal/x86/include/rte_prefetch.h            |   8 +-
 lib/eal/x86/include/rte_rwlock.h              |   6 +-
 lib/eal/x86/include/rte_spinlock.h            |   9 +-
 lib/ethdev/ethdev_driver.h                    |   8 +-
 lib/ethdev/ethdev_pci.h                       |   8 +-
 lib/ethdev/ethdev_trace.h                     |   8 +-
 lib/ethdev/ethdev_vdev.h                      |   8 +-
 lib/ethdev/rte_cman.h                         |   4 +-
 lib/ethdev/rte_dev_info.h                     |   4 +-
 lib/ethdev/rte_ethdev.h                       |   8 +-
 lib/ethdev/rte_ethdev_trace_fp.h              |   4 +-
 lib/eventdev/event_timer_adapter_pmd.h        |   4 +-
 lib/eventdev/eventdev_pmd.h                   |   8 +-
 lib/eventdev/eventdev_pmd_pci.h               |   8 +-
 lib/eventdev/eventdev_pmd_vdev.h              |   8 +-
 lib/eventdev/eventdev_trace.h                 |   8 +-
 lib/eventdev/rte_event_crypto_adapter.h       |   8 +-
 lib/eventdev/rte_event_eth_rx_adapter.h       |   8 +-
 lib/eventdev/rte_event_eth_tx_adapter.h       |   8 +-
 lib/eventdev/rte_event_ring.h                 |   8 +-
 lib/eventdev/rte_event_timer_adapter.h        |   8 +-
 lib/eventdev/rte_eventdev.h                   |   8 +-
 lib/eventdev/rte_eventdev_trace_fp.h          |   4 +-
 lib/graph/rte_graph_model_mcore_dispatch.h    |   8 +-
 lib/graph/rte_graph_worker.h                  |   6 +-
 lib/gso/rte_gso.h                             |   6 +-
 lib/hash/rte_fbk_hash.h                       |   8 +-
 lib/hash/rte_hash_crc.h                       |   8 +-
 lib/hash/rte_jhash.h                          |   8 +-
 lib/hash/rte_thash.h                          |   8 +-
 lib/hash/rte_thash_gfni.h                     |   8 +-
 lib/ip_frag/rte_ip_frag.h                     |   8 +-
 lib/ipsec/rte_ipsec.h                         |   8 +-
 lib/log/rte_log.h                             |   8 +-
 lib/lpm/rte_lpm.h                             |   8 +-
 lib/member/rte_member.h                       |   8 +-
 lib/member/rte_member_sketch.h                |   6 +-
 lib/member/rte_member_sketch_avx512.h         |   8 +-
 lib/member/rte_member_x86.h                   |   4 +-
 lib/member/rte_xxh64_avx512.h                 |   6 +-
 lib/mempool/mempool_trace.h                   |   8 +-
 lib/mempool/rte_mempool_trace_fp.h            |   4 +-
 lib/meter/rte_meter.h                         |   8 +-
 lib/mldev/mldev_utils.h                       |   8 +-
 lib/mldev/rte_mldev_core.h                    |   8 +-
 lib/mldev/rte_mldev_pmd.h                     |   8 +-
 lib/net/rte_ether.h                           |   8 +-
 lib/net/rte_net.h                             |   8 +-
 lib/net/rte_sctp.h                            |   8 +-
 lib/node/rte_node_eth_api.h                   |   8 +-
 lib/node/rte_node_ip4_api.h                   |   8 +-
 lib/node/rte_node_ip6_api.h                   |   6 +-
 lib/node/rte_node_udp4_input_api.h            |   8 +-
 lib/pci/rte_pci.h                             |   8 +-
 lib/pdcp/rte_pdcp.h                           |   8 +-
 lib/pipeline/rte_pipeline.h                   |   8 +-
 lib/pipeline/rte_port_in_action.h             |   8 +-
 lib/pipeline/rte_swx_ctl.h                    |   8 +-
 lib/pipeline/rte_swx_extern.h                 |   8 +-
 lib/pipeline/rte_swx_ipsec.h                  |   8 +-
 lib/pipeline/rte_swx_pipeline.h               |   8 +-
 lib/pipeline/rte_swx_pipeline_spec.h          |   8 +-
 lib/pipeline/rte_table_action.h               |   8 +-
 lib/port/rte_port.h                           |   8 +-
 lib/port/rte_port_ethdev.h                    |   8 +-
 lib/port/rte_port_eventdev.h                  |   8 +-
 lib/port/rte_port_fd.h                        |   8 +-
 lib/port/rte_port_frag.h                      |   8 +-
 lib/port/rte_port_ras.h                       |   8 +-
 lib/port/rte_port_ring.h                      |   8 +-
 lib/port/rte_port_sched.h                     |   8 +-
 lib/port/rte_port_source_sink.h               |   8 +-
 lib/port/rte_port_sym_crypto.h                |   8 +-
 lib/port/rte_swx_port.h                       |   8 +-
 lib/port/rte_swx_port_ethdev.h                |   8 +-
 lib/port/rte_swx_port_fd.h                    |   8 +-
 lib/port/rte_swx_port_ring.h                  |   8 +-
 lib/port/rte_swx_port_source_sink.h           |   8 +-
 lib/rawdev/rte_rawdev.h                       |   6 +-
 lib/rawdev/rte_rawdev_pmd.h                   |   8 +-
 lib/rcu/rte_rcu_qsbr.h                        |   8 +-
 lib/regexdev/rte_regexdev.h                   |   8 +-
 lib/ring/rte_ring.h                           |   6 +-
 lib/ring/rte_ring_core.h                      |   8 +-
 lib/ring/rte_ring_elem.h                      |   8 +-
 lib/ring/rte_ring_hts.h                       |   4 +-
 lib/ring/rte_ring_peek.h                      |   4 +-
 lib/ring/rte_ring_peek_zc.h                   |   4 +-
 lib/ring/rte_ring_rts.h                       |   4 +-
 lib/sched/rte_approx.h                        |   8 +-
 lib/sched/rte_pie.h                           |   8 +-
 lib/sched/rte_red.h                           |   8 +-
 lib/sched/rte_sched.h                         |   8 +-
 lib/sched/rte_sched_common.h                  |   6 +-
 lib/security/rte_security.h                   |   8 +-
 lib/security/rte_security_driver.h            |   6 +-
 lib/stack/rte_stack.h                         |   8 +-
 lib/table/rte_lru.h                           |  12 +-
 lib/table/rte_lru_arm64.h                     |   8 +-
 lib/table/rte_lru_x86.h                       |   8 -
 lib/table/rte_swx_hash_func.h                 |   8 +-
 lib/table/rte_swx_keycmp.h                    |   8 +-
 lib/table/rte_swx_table.h                     |   8 +-
 lib/table/rte_swx_table_em.h                  |   8 +-
 lib/table/rte_swx_table_learner.h             |   8 +-
 lib/table/rte_swx_table_selector.h            |   8 +-
 lib/table/rte_swx_table_wm.h                  |   8 +-
 lib/table/rte_table.h                         |   8 +-
 lib/table/rte_table_acl.h                     |   8 +-
 lib/table/rte_table_array.h                   |   8 +-
 lib/table/rte_table_hash.h                    |   8 +-
 lib/table/rte_table_hash_cuckoo.h             |   8 +-
 lib/table/rte_table_hash_func.h               |  12 +-
 lib/table/rte_table_lpm.h                     |   8 +-
 lib/table/rte_table_lpm_ipv6.h                |   8 +-
 lib/table/rte_table_stub.h                    |   8 +-
 lib/telemetry/rte_telemetry.h                 |   8 +-
 lib/vhost/rte_vdpa.h                          |   8 +-
 lib/vhost/rte_vhost.h                         |   8 +-
 lib/vhost/rte_vhost_async.h                   |   8 +-
 lib/vhost/rte_vhost_crypto.h                  |   4 +-
 lib/vhost/vdpa_driver.h                       |   8 +-
 285 files changed, 2276 insertions(+), 1001 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v9 1/6] dpdk: do not force C linkage on include file dependencies
  2024-09-18  9:04               ` [PATCH v9 0/6] Improve EAL bit operations API Mattias Rönnblom
@ 2024-09-18  9:04                 ` Mattias Rönnblom
  2024-09-19 19:31                   ` [PATCH v10 0/7] Improve EAL bit operations API Mattias Rönnblom
  2024-09-18  9:04                 ` [PATCH v9 2/6] eal: extend bit manipulation functionality Mattias Rönnblom
                                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-18  9:04 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Assure that 'extern "C" { /../ }' do not cover files included from a
particular header file, and address minor issues resulting from this
change of order.

Dealing with C++ should delegate to the individual include file level,
rather than being imposed by the user of that file. For example,
forcing C linkage prevents __Generic macros being replaced with
overloaded static inline functions in C++ translation units.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>

--

PATCH v9:
 * Fix yet another issue in the ARM build. Author really needs an
   ARM system powerful enough to build DPDK.

PATCH v8:
 * Fix issues in rte_memcpy_64.h causing build failures on ARM.

PATCH v7:
 * Fix issues in rte_io.h, rte_pause.h and rte_thash_gfni.h causing
   build failures on ARM. (David Marchand)
 * Fix issue in rte_vfio.h, causing build failures unless VFIO_PRESENT.

PATCH v6:
 * Add missing extern "C" in rte_atomic.h, rte_cpuflags.h, rte_io.h,
   rte_vect.h.
 * Fix 32-bit x86 build issues in rte_atomic.h.

PATCH v5:
 * rte_dmadev.h was still including files under extern "C" { /../ }.
   (Chengwen Feng)
 * Fix rte_byteorder.h, broken on 32-bit x86.
---
 app/test/packet_burst_generator.h             |  8 +++---
 app/test/virtual_pmd.h                        |  4 +--
 drivers/bus/auxiliary/bus_auxiliary_driver.h  |  8 +++---
 drivers/bus/cdx/bus_cdx_driver.h              |  8 +++---
 drivers/bus/dpaa/include/fsl_qman.h           |  8 +++---
 drivers/bus/fslmc/bus_fslmc_driver.h          |  8 +++---
 drivers/bus/pci/bus_pci_driver.h              |  8 +++---
 drivers/bus/pci/rte_bus_pci.h                 |  8 +++---
 drivers/bus/platform/bus_platform_driver.h    |  8 +++---
 drivers/bus/vdev/bus_vdev_driver.h            |  8 +++---
 drivers/bus/vmbus/bus_vmbus_driver.h          |  8 +++---
 drivers/bus/vmbus/rte_bus_vmbus.h             |  8 +++---
 drivers/dma/cnxk/cnxk_dma_event_dp.h          |  8 +++---
 drivers/dma/ioat/ioat_hw_defs.h               |  4 +--
 drivers/event/dlb2/rte_pmd_dlb2.h             |  8 +++---
 drivers/mempool/dpaa2/rte_dpaa2_mempool.h     |  6 ++---
 drivers/net/avp/rte_avp_fifo.h                |  8 +++---
 drivers/net/bonding/rte_eth_bond.h            |  4 +--
 drivers/net/i40e/rte_pmd_i40e.h               |  8 +++---
 drivers/net/mlx5/mlx5_trace.h                 |  8 +++---
 drivers/net/ring/rte_eth_ring.h               |  4 +--
 drivers/net/vhost/rte_eth_vhost.h             |  8 +++---
 drivers/raw/ifpga/afu_pmd_core.h              |  8 +++---
 drivers/raw/ifpga/afu_pmd_he_hssi.h           |  6 ++---
 drivers/raw/ifpga/afu_pmd_he_lpbk.h           |  6 ++---
 drivers/raw/ifpga/afu_pmd_he_mem.h            |  6 ++---
 drivers/raw/ifpga/afu_pmd_n3000.h             |  6 ++---
 drivers/raw/ifpga/rte_pmd_afu.h               |  4 +--
 drivers/raw/ifpga/rte_pmd_ifpga.h             |  4 +--
 examples/ethtool/lib/rte_ethtool.h            |  8 +++---
 examples/qos_sched/main.h                     |  4 +--
 examples/vm_power_manager/channel_manager.h   |  8 +++---
 lib/acl/rte_acl_osdep.h                       |  8 +++---
 lib/bbdev/rte_bbdev.h                         |  8 +++---
 lib/bbdev/rte_bbdev_op.h                      |  8 +++---
 lib/bbdev/rte_bbdev_pmd.h                     |  8 +++---
 lib/bpf/bpf_def.h                             |  8 +++---
 lib/compressdev/rte_comp.h                    |  4 +--
 lib/compressdev/rte_compressdev.h             |  6 ++---
 lib/compressdev/rte_compressdev_internal.h    |  8 +++---
 lib/compressdev/rte_compressdev_pmd.h         |  8 +++---
 lib/cryptodev/cryptodev_pmd.h                 |  8 +++---
 lib/cryptodev/cryptodev_trace.h               |  8 +++---
 lib/cryptodev/rte_crypto.h                    |  8 +++---
 lib/cryptodev/rte_crypto_asym.h               |  8 +++---
 lib/cryptodev/rte_crypto_sym.h                |  8 +++---
 lib/cryptodev/rte_cryptodev.h                 |  8 +++---
 lib/cryptodev/rte_cryptodev_trace_fp.h        |  4 +--
 lib/dispatcher/rte_dispatcher.h               |  8 +++---
 lib/dmadev/rte_dmadev.h                       |  8 ++++++
 lib/eal/arm/include/rte_atomic_32.h           |  4 +--
 lib/eal/arm/include/rte_atomic_64.h           |  8 +++---
 lib/eal/arm/include/rte_byteorder.h           |  8 +++---
 lib/eal/arm/include/rte_cpuflags_32.h         |  8 +++---
 lib/eal/arm/include/rte_cpuflags_64.h         |  8 +++---
 lib/eal/arm/include/rte_cycles_32.h           |  4 +--
 lib/eal/arm/include/rte_cycles_64.h           |  4 +--
 lib/eal/arm/include/rte_io.h                  |  8 +++---
 lib/eal/arm/include/rte_io_64.h               |  8 +++---
 lib/eal/arm/include/rte_memcpy_32.h           |  8 +++---
 lib/eal/arm/include/rte_memcpy_64.h           | 23 +++++++++++------
 lib/eal/arm/include/rte_pause.h               |  8 +++---
 lib/eal/arm/include/rte_pause_32.h            |  6 ++---
 lib/eal/arm/include/rte_pause_64.h            |  8 +++---
 lib/eal/arm/include/rte_power_intrinsics.h    |  8 +++---
 lib/eal/arm/include/rte_prefetch_32.h         |  8 +++---
 lib/eal/arm/include/rte_prefetch_64.h         |  8 +++---
 lib/eal/arm/include/rte_rwlock.h              |  4 +--
 lib/eal/arm/include/rte_spinlock.h            |  6 ++---
 lib/eal/freebsd/include/rte_os.h              |  8 +++---
 lib/eal/include/bus_driver.h                  |  8 +++---
 lib/eal/include/dev_driver.h                  |  6 ++---
 lib/eal/include/eal_trace_internal.h          |  8 +++---
 lib/eal/include/generic/rte_atomic.h          |  8 ++++++
 lib/eal/include/generic/rte_byteorder.h       |  8 ++++++
 lib/eal/include/generic/rte_cpuflags.h        |  8 ++++++
 lib/eal/include/generic/rte_cycles.h          |  8 ++++++
 lib/eal/include/generic/rte_io.h              |  8 ++++++
 lib/eal/include/generic/rte_memcpy.h          |  8 ++++++
 lib/eal/include/generic/rte_pause.h           |  8 ++++++
 .../include/generic/rte_power_intrinsics.h    |  8 ++++++
 lib/eal/include/generic/rte_prefetch.h        |  8 ++++++
 lib/eal/include/generic/rte_rwlock.h          |  8 +++---
 lib/eal/include/generic/rte_spinlock.h        |  8 ++++++
 lib/eal/include/generic/rte_vect.h            |  8 ++++++
 lib/eal/include/rte_alarm.h                   |  4 +--
 lib/eal/include/rte_bitmap.h                  |  8 +++---
 lib/eal/include/rte_bus.h                     |  8 +++---
 lib/eal/include/rte_class.h                   |  4 +--
 lib/eal/include/rte_common.h                  |  8 +++---
 lib/eal/include/rte_dev.h                     |  8 +++---
 lib/eal/include/rte_devargs.h                 |  8 +++---
 lib/eal/include/rte_eal_trace.h               |  4 +--
 lib/eal/include/rte_errno.h                   |  4 +--
 lib/eal/include/rte_fbarray.h                 |  8 +++---
 lib/eal/include/rte_keepalive.h               |  6 ++---
 lib/eal/include/rte_mcslock.h                 |  8 +++---
 lib/eal/include/rte_memory.h                  |  8 +++---
 lib/eal/include/rte_pci_dev_features.h        |  4 +--
 lib/eal/include/rte_pflock.h                  |  8 +++---
 lib/eal/include/rte_random.h                  |  4 +--
 lib/eal/include/rte_seqcount.h                |  8 +++---
 lib/eal/include/rte_seqlock.h                 |  8 +++---
 lib/eal/include/rte_service.h                 |  8 +++---
 lib/eal/include/rte_service_component.h       |  4 +--
 lib/eal/include/rte_stdatomic.h               |  5 +---
 lib/eal/include/rte_string_fns.h              | 17 +++++++++----
 lib/eal/include/rte_tailq.h                   |  6 ++---
 lib/eal/include/rte_ticketlock.h              |  8 +++---
 lib/eal/include/rte_time.h                    |  6 ++---
 lib/eal/include/rte_trace.h                   |  8 +++---
 lib/eal/include/rte_trace_point.h             |  8 +++---
 lib/eal/include/rte_trace_point_register.h    |  8 +++---
 lib/eal/include/rte_uuid.h                    |  8 +++---
 lib/eal/include/rte_version.h                 |  6 ++---
 lib/eal/include/rte_vfio.h                    |  8 +++---
 lib/eal/linux/include/rte_os.h                |  8 +++---
 lib/eal/loongarch/include/rte_atomic.h        |  6 ++---
 lib/eal/loongarch/include/rte_byteorder.h     |  4 +--
 lib/eal/loongarch/include/rte_cpuflags.h      |  8 +++---
 lib/eal/loongarch/include/rte_cycles.h        |  4 +--
 lib/eal/loongarch/include/rte_io.h            |  4 +--
 lib/eal/loongarch/include/rte_memcpy.h        |  4 +--
 lib/eal/loongarch/include/rte_pause.h         |  8 +++---
 .../loongarch/include/rte_power_intrinsics.h  |  8 +++---
 lib/eal/loongarch/include/rte_prefetch.h      |  8 +++---
 lib/eal/loongarch/include/rte_rwlock.h        |  4 +--
 lib/eal/loongarch/include/rte_spinlock.h      |  6 ++---
 lib/eal/ppc/include/rte_atomic.h              |  6 ++---
 lib/eal/ppc/include/rte_byteorder.h           |  6 ++---
 lib/eal/ppc/include/rte_cpuflags.h            |  8 +++---
 lib/eal/ppc/include/rte_cycles.h              |  8 +++---
 lib/eal/ppc/include/rte_io.h                  |  4 +--
 lib/eal/ppc/include/rte_memcpy.h              |  4 +--
 lib/eal/ppc/include/rte_pause.h               |  8 +++---
 lib/eal/ppc/include/rte_power_intrinsics.h    |  8 +++---
 lib/eal/ppc/include/rte_prefetch.h            |  8 +++---
 lib/eal/ppc/include/rte_rwlock.h              |  4 +--
 lib/eal/ppc/include/rte_spinlock.h            |  8 +++---
 lib/eal/riscv/include/rte_atomic.h            |  8 +++---
 lib/eal/riscv/include/rte_byteorder.h         |  8 +++---
 lib/eal/riscv/include/rte_cpuflags.h          |  8 +++---
 lib/eal/riscv/include/rte_cycles.h            |  4 +--
 lib/eal/riscv/include/rte_io.h                |  4 +--
 lib/eal/riscv/include/rte_memcpy.h            |  4 +--
 lib/eal/riscv/include/rte_pause.h             |  8 +++---
 lib/eal/riscv/include/rte_power_intrinsics.h  |  8 +++---
 lib/eal/riscv/include/rte_prefetch.h          |  8 +++---
 lib/eal/riscv/include/rte_rwlock.h            |  4 +--
 lib/eal/riscv/include/rte_spinlock.h          |  6 ++---
 lib/eal/windows/include/pthread.h             |  6 ++---
 lib/eal/windows/include/regex.h               |  8 +++---
 lib/eal/windows/include/rte_windows.h         |  8 +++---
 lib/eal/x86/include/rte_atomic.h              | 25 +++++++++++++------
 lib/eal/x86/include/rte_byteorder.h           | 16 ++++++------
 lib/eal/x86/include/rte_cpuflags.h            |  8 +++---
 lib/eal/x86/include/rte_cycles.h              |  8 +++---
 lib/eal/x86/include/rte_io.h                  |  8 +++---
 lib/eal/x86/include/rte_pause.h               |  7 +++---
 lib/eal/x86/include/rte_power_intrinsics.h    |  8 +++---
 lib/eal/x86/include/rte_prefetch.h            |  8 +++---
 lib/eal/x86/include/rte_rwlock.h              |  6 ++---
 lib/eal/x86/include/rte_spinlock.h            |  9 +++----
 lib/ethdev/ethdev_driver.h                    |  8 +++---
 lib/ethdev/ethdev_pci.h                       |  8 +++---
 lib/ethdev/ethdev_trace.h                     |  8 +++---
 lib/ethdev/ethdev_vdev.h                      |  8 +++---
 lib/ethdev/rte_cman.h                         |  4 +--
 lib/ethdev/rte_dev_info.h                     |  4 +--
 lib/ethdev/rte_ethdev.h                       |  8 +++---
 lib/ethdev/rte_ethdev_trace_fp.h              |  4 +--
 lib/eventdev/event_timer_adapter_pmd.h        |  4 +--
 lib/eventdev/eventdev_pmd.h                   |  8 +++---
 lib/eventdev/eventdev_pmd_pci.h               |  8 +++---
 lib/eventdev/eventdev_pmd_vdev.h              |  8 +++---
 lib/eventdev/eventdev_trace.h                 |  8 +++---
 lib/eventdev/rte_event_crypto_adapter.h       |  8 +++---
 lib/eventdev/rte_event_eth_rx_adapter.h       |  8 +++---
 lib/eventdev/rte_event_eth_tx_adapter.h       |  8 +++---
 lib/eventdev/rte_event_ring.h                 |  8 +++---
 lib/eventdev/rte_event_timer_adapter.h        |  8 +++---
 lib/eventdev/rte_eventdev.h                   |  8 +++---
 lib/eventdev/rte_eventdev_trace_fp.h          |  4 +--
 lib/graph/rte_graph_model_mcore_dispatch.h    |  8 +++---
 lib/graph/rte_graph_worker.h                  |  6 ++---
 lib/gso/rte_gso.h                             |  6 ++---
 lib/hash/rte_fbk_hash.h                       |  8 +++---
 lib/hash/rte_hash_crc.h                       |  8 +++---
 lib/hash/rte_jhash.h                          |  8 +++---
 lib/hash/rte_thash.h                          |  8 +++---
 lib/hash/rte_thash_gfni.h                     |  8 +++---
 lib/ip_frag/rte_ip_frag.h                     |  8 +++---
 lib/ipsec/rte_ipsec.h                         |  8 +++---
 lib/log/rte_log.h                             |  8 +++---
 lib/lpm/rte_lpm.h                             |  8 +++---
 lib/member/rte_member.h                       |  8 +++---
 lib/member/rte_member_sketch.h                |  6 ++---
 lib/member/rte_member_sketch_avx512.h         |  8 +++---
 lib/member/rte_member_x86.h                   |  4 +--
 lib/member/rte_xxh64_avx512.h                 |  6 ++---
 lib/mempool/mempool_trace.h                   |  8 +++---
 lib/mempool/rte_mempool_trace_fp.h            |  4 +--
 lib/meter/rte_meter.h                         |  8 +++---
 lib/mldev/mldev_utils.h                       |  8 +++---
 lib/mldev/rte_mldev_core.h                    |  8 +++---
 lib/mldev/rte_mldev_pmd.h                     |  8 +++---
 lib/net/rte_ether.h                           |  8 +++---
 lib/net/rte_net.h                             |  8 +++---
 lib/net/rte_sctp.h                            |  8 +++---
 lib/node/rte_node_eth_api.h                   |  8 +++---
 lib/node/rte_node_ip4_api.h                   |  8 +++---
 lib/node/rte_node_ip6_api.h                   |  6 ++---
 lib/node/rte_node_udp4_input_api.h            |  8 +++---
 lib/pci/rte_pci.h                             |  8 +++---
 lib/pdcp/rte_pdcp.h                           |  8 +++---
 lib/pipeline/rte_pipeline.h                   |  8 +++---
 lib/pipeline/rte_port_in_action.h             |  8 +++---
 lib/pipeline/rte_swx_ctl.h                    |  8 +++---
 lib/pipeline/rte_swx_extern.h                 |  8 +++---
 lib/pipeline/rte_swx_ipsec.h                  |  8 +++---
 lib/pipeline/rte_swx_pipeline.h               |  8 +++---
 lib/pipeline/rte_swx_pipeline_spec.h          |  8 +++---
 lib/pipeline/rte_table_action.h               |  8 +++---
 lib/port/rte_port.h                           |  8 +++---
 lib/port/rte_port_ethdev.h                    |  8 +++---
 lib/port/rte_port_eventdev.h                  |  8 +++---
 lib/port/rte_port_fd.h                        |  8 +++---
 lib/port/rte_port_frag.h                      |  8 +++---
 lib/port/rte_port_ras.h                       |  8 +++---
 lib/port/rte_port_ring.h                      |  8 +++---
 lib/port/rte_port_sched.h                     |  8 +++---
 lib/port/rte_port_source_sink.h               |  8 +++---
 lib/port/rte_port_sym_crypto.h                |  8 +++---
 lib/port/rte_swx_port.h                       |  8 +++---
 lib/port/rte_swx_port_ethdev.h                |  8 +++---
 lib/port/rte_swx_port_fd.h                    |  8 +++---
 lib/port/rte_swx_port_ring.h                  |  8 +++---
 lib/port/rte_swx_port_source_sink.h           |  8 +++---
 lib/rawdev/rte_rawdev.h                       |  6 ++---
 lib/rawdev/rte_rawdev_pmd.h                   |  8 +++---
 lib/rcu/rte_rcu_qsbr.h                        |  8 +++---
 lib/regexdev/rte_regexdev.h                   |  8 +++---
 lib/ring/rte_ring.h                           |  6 ++---
 lib/ring/rte_ring_core.h                      |  8 +++---
 lib/ring/rte_ring_elem.h                      |  8 +++---
 lib/ring/rte_ring_hts.h                       |  4 +--
 lib/ring/rte_ring_peek.h                      |  4 +--
 lib/ring/rte_ring_peek_zc.h                   |  4 +--
 lib/ring/rte_ring_rts.h                       |  4 +--
 lib/sched/rte_approx.h                        |  8 +++---
 lib/sched/rte_pie.h                           |  8 +++---
 lib/sched/rte_red.h                           |  8 +++---
 lib/sched/rte_sched.h                         |  8 +++---
 lib/sched/rte_sched_common.h                  |  6 ++---
 lib/security/rte_security.h                   |  8 +++---
 lib/security/rte_security_driver.h            |  6 ++---
 lib/stack/rte_stack.h                         |  8 +++---
 lib/table/rte_lru.h                           | 12 +++------
 lib/table/rte_lru_arm64.h                     |  8 +++---
 lib/table/rte_lru_x86.h                       |  8 ------
 lib/table/rte_swx_hash_func.h                 |  8 +++---
 lib/table/rte_swx_keycmp.h                    |  8 +++---
 lib/table/rte_swx_table.h                     |  8 +++---
 lib/table/rte_swx_table_em.h                  |  8 +++---
 lib/table/rte_swx_table_learner.h             |  8 +++---
 lib/table/rte_swx_table_selector.h            |  8 +++---
 lib/table/rte_swx_table_wm.h                  |  8 +++---
 lib/table/rte_table.h                         |  8 +++---
 lib/table/rte_table_acl.h                     |  8 +++---
 lib/table/rte_table_array.h                   |  8 +++---
 lib/table/rte_table_hash.h                    |  8 +++---
 lib/table/rte_table_hash_cuckoo.h             |  8 +++---
 lib/table/rte_table_hash_func.h               | 12 ++++++---
 lib/table/rte_table_lpm.h                     |  8 +++---
 lib/table/rte_table_lpm_ipv6.h                |  8 +++---
 lib/table/rte_table_stub.h                    |  8 +++---
 lib/telemetry/rte_telemetry.h                 |  8 +++---
 lib/vhost/rte_vdpa.h                          |  8 +++---
 lib/vhost/rte_vhost.h                         |  8 +++---
 lib/vhost/rte_vhost_async.h                   |  8 +++---
 lib/vhost/rte_vhost_crypto.h                  |  4 +--
 lib/vhost/vdpa_driver.h                       |  8 +++---
 282 files changed, 1093 insertions(+), 983 deletions(-)

diff --git a/app/test/packet_burst_generator.h b/app/test/packet_burst_generator.h
index b99286f50e..cce41bcd0f 100644
--- a/app/test/packet_burst_generator.h
+++ b/app/test/packet_burst_generator.h
@@ -5,10 +5,6 @@
 #ifndef PACKET_BURST_GENERATOR_H_
 #define PACKET_BURST_GENERATOR_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_mbuf.h>
 #include <rte_ether.h>
 #include <rte_arp.h>
@@ -17,6 +13,10 @@ extern "C" {
 #include <rte_tcp.h>
 #include <rte_sctp.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define IPV4_ADDR(a, b, c, d)(((a & 0xff) << 24) | ((b & 0xff) << 16) | \
 		((c & 0xff) << 8) | (d & 0xff))
 
diff --git a/app/test/virtual_pmd.h b/app/test/virtual_pmd.h
index 120b58b273..a5a71d7cb4 100644
--- a/app/test/virtual_pmd.h
+++ b/app/test/virtual_pmd.h
@@ -5,12 +5,12 @@
 #ifndef __VIRTUAL_ETHDEV_H_
 #define __VIRTUAL_ETHDEV_H_
 
+#include <rte_ether.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ether.h>
-
 int
 virtual_ethdev_init(void);
 
diff --git a/drivers/bus/auxiliary/bus_auxiliary_driver.h b/drivers/bus/auxiliary/bus_auxiliary_driver.h
index 58fb7c7f69..40ab1f0912 100644
--- a/drivers/bus/auxiliary/bus_auxiliary_driver.h
+++ b/drivers/bus/auxiliary/bus_auxiliary_driver.h
@@ -11,10 +11,6 @@
  * Auxiliary Bus Interface.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -28,6 +24,10 @@ extern "C" {
 #include <dev_driver.h>
 #include <rte_kvargs.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_BUS_AUXILIARY_NAME "auxiliary"
 
 /* Forward declarations */
diff --git a/drivers/bus/cdx/bus_cdx_driver.h b/drivers/bus/cdx/bus_cdx_driver.h
index 211f8e406b..d390e7b5a1 100644
--- a/drivers/bus/cdx/bus_cdx_driver.h
+++ b/drivers/bus/cdx/bus_cdx_driver.h
@@ -10,10 +10,6 @@
  * AMD CDX bus interface
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdlib.h>
 #include <inttypes.h>
 #include <linux/types.h>
@@ -22,6 +18,10 @@ extern "C" {
 #include <dev_driver.h>
 #include <rte_interrupts.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_cdx_device;
 struct rte_cdx_driver;
diff --git a/drivers/bus/dpaa/include/fsl_qman.h b/drivers/bus/dpaa/include/fsl_qman.h
index c0677976e8..f39007b84d 100644
--- a/drivers/bus/dpaa/include/fsl_qman.h
+++ b/drivers/bus/dpaa/include/fsl_qman.h
@@ -8,14 +8,14 @@
 #ifndef __FSL_QMAN_H
 #define __FSL_QMAN_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <dpaa_rbtree.h>
 #include <rte_compat.h>
 #include <rte_eventdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* FQ lookups (turn this on for 64bit user-space) */
 #ifdef RTE_ARCH_64
 #define CONFIG_FSL_QMAN_FQ_LOOKUP
diff --git a/drivers/bus/fslmc/bus_fslmc_driver.h b/drivers/bus/fslmc/bus_fslmc_driver.h
index 7ac5fe6ff1..3095458133 100644
--- a/drivers/bus/fslmc/bus_fslmc_driver.h
+++ b/drivers/bus/fslmc/bus_fslmc_driver.h
@@ -13,10 +13,6 @@
  * RTE FSLMC Bus Interface
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -40,6 +36,10 @@ extern "C" {
 #include "portal/dpaa2_hw_pvt.h"
 #include "portal/dpaa2_hw_dpio.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define FSLMC_OBJECT_MAX_LEN 32   /**< Length of each device on bus */
 
 #define DPAA2_INVALID_MBUF_SEQN        0
diff --git a/drivers/bus/pci/bus_pci_driver.h b/drivers/bus/pci/bus_pci_driver.h
index be32263a82..2cc1119072 100644
--- a/drivers/bus/pci/bus_pci_driver.h
+++ b/drivers/bus/pci/bus_pci_driver.h
@@ -6,14 +6,14 @@
 #ifndef BUS_PCI_DRIVER_H
 #define BUS_PCI_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus_pci.h>
 #include <dev_driver.h>
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Pathname of PCI devices directory. */
 __rte_internal
 const char *rte_pci_get_sysfs_path(void);
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index a3798cb1cb..19a7b15b99 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -11,10 +11,6 @@
  * PCI device & driver interface
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -27,6 +23,10 @@ extern "C" {
 #include <rte_interrupts.h>
 #include <rte_pci.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_pci_device;
 struct rte_pci_driver;
diff --git a/drivers/bus/platform/bus_platform_driver.h b/drivers/bus/platform/bus_platform_driver.h
index 5ac54fb739..a6f246f7c4 100644
--- a/drivers/bus/platform/bus_platform_driver.h
+++ b/drivers/bus/platform/bus_platform_driver.h
@@ -10,10 +10,6 @@
  * Platform bus interface.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stddef.h>
 #include <stdint.h>
 
@@ -23,6 +19,10 @@ extern "C" {
 #include <rte_os.h>
 #include <rte_vfio.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_platform_bus;
 struct rte_platform_device;
diff --git a/drivers/bus/vdev/bus_vdev_driver.h b/drivers/bus/vdev/bus_vdev_driver.h
index bc7e30d7c6..cba1fb5269 100644
--- a/drivers/bus/vdev/bus_vdev_driver.h
+++ b/drivers/bus/vdev/bus_vdev_driver.h
@@ -5,15 +5,15 @@
 #ifndef BUS_VDEV_DRIVER_H
 #define BUS_VDEV_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus_vdev.h>
 #include <rte_compat.h>
 #include <dev_driver.h>
 #include <rte_devargs.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_vdev_device {
 	RTE_TAILQ_ENTRY(rte_vdev_device) next;      /**< Next attached vdev */
 	struct rte_device device;               /**< Inherit core device */
diff --git a/drivers/bus/vmbus/bus_vmbus_driver.h b/drivers/bus/vmbus/bus_vmbus_driver.h
index e2475a642d..bc394208de 100644
--- a/drivers/bus/vmbus/bus_vmbus_driver.h
+++ b/drivers/bus/vmbus/bus_vmbus_driver.h
@@ -6,14 +6,14 @@
 #ifndef BUS_VMBUS_DRIVER_H
 #define BUS_VMBUS_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus_vmbus.h>
 #include <rte_compat.h>
 #include <dev_driver.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct vmbus_channel;
 struct vmbus_mon_page;
 
diff --git a/drivers/bus/vmbus/rte_bus_vmbus.h b/drivers/bus/vmbus/rte_bus_vmbus.h
index 9467bd8f3d..fd18bca73c 100644
--- a/drivers/bus/vmbus/rte_bus_vmbus.h
+++ b/drivers/bus/vmbus/rte_bus_vmbus.h
@@ -11,10 +11,6 @@
  *
  * VMBUS Interface
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -28,6 +24,10 @@ extern "C" {
 #include <rte_interrupts.h>
 #include <rte_vmbus_reg.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_vmbus_device;
 struct rte_vmbus_driver;
diff --git a/drivers/dma/cnxk/cnxk_dma_event_dp.h b/drivers/dma/cnxk/cnxk_dma_event_dp.h
index 06b5ca8279..8c6cf5dd9a 100644
--- a/drivers/dma/cnxk/cnxk_dma_event_dp.h
+++ b/drivers/dma/cnxk/cnxk_dma_event_dp.h
@@ -5,16 +5,16 @@
 #ifndef _CNXK_DMA_EVENT_DP_H_
 #define _CNXK_DMA_EVENT_DP_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_common.h>
 #include <rte_compat.h>
 #include <rte_eventdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 __rte_internal
 uint16_t cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events);
 
diff --git a/drivers/dma/ioat/ioat_hw_defs.h b/drivers/dma/ioat/ioat_hw_defs.h
index dc3493a78f..11893951f2 100644
--- a/drivers/dma/ioat/ioat_hw_defs.h
+++ b/drivers/dma/ioat/ioat_hw_defs.h
@@ -5,12 +5,12 @@
 #ifndef IOAT_HW_DEFS_H
 #define IOAT_HW_DEFS_H
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 #define IOAT_PCI_CHANERR_INT_OFFSET	0x180
 
 #define IOAT_VER_3_0	0x30
diff --git a/drivers/event/dlb2/rte_pmd_dlb2.h b/drivers/event/dlb2/rte_pmd_dlb2.h
index 334c6c356d..dba7fd2f43 100644
--- a/drivers/event/dlb2/rte_pmd_dlb2.h
+++ b/drivers/event/dlb2/rte_pmd_dlb2.h
@@ -11,14 +11,14 @@
 #ifndef _RTE_PMD_DLB2_H_
 #define _RTE_PMD_DLB2_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
diff --git a/drivers/mempool/dpaa2/rte_dpaa2_mempool.h b/drivers/mempool/dpaa2/rte_dpaa2_mempool.h
index 7fe3d93f61..0286090b1b 100644
--- a/drivers/mempool/dpaa2/rte_dpaa2_mempool.h
+++ b/drivers/mempool/dpaa2/rte_dpaa2_mempool.h
@@ -12,13 +12,13 @@
  *
  */
 
+#include <rte_compat.h>
+#include <rte_mempool.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_compat.h>
-#include <rte_mempool.h>
-
 /**
  * Get BPID corresponding to the packet pool
  *
diff --git a/drivers/net/avp/rte_avp_fifo.h b/drivers/net/avp/rte_avp_fifo.h
index c1658da685..879de3b1c0 100644
--- a/drivers/net/avp/rte_avp_fifo.h
+++ b/drivers/net/avp/rte_avp_fifo.h
@@ -8,10 +8,6 @@
 
 #include "rte_avp_common.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef __KERNEL__
 /* Write memory barrier for kernel compiles */
 #define AVP_WMB() smp_wmb()
@@ -27,6 +23,10 @@ extern "C" {
 #ifndef __KERNEL__
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Initializes the avp fifo structure
  */
diff --git a/drivers/net/bonding/rte_eth_bond.h b/drivers/net/bonding/rte_eth_bond.h
index f10165f2c6..e59ff8793e 100644
--- a/drivers/net/bonding/rte_eth_bond.h
+++ b/drivers/net/bonding/rte_eth_bond.h
@@ -17,12 +17,12 @@
  * load balancing of network ports
  */
 
+#include <rte_ether.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ether.h>
-
 /* Supported modes of operation of link bonding library  */
 
 #define BONDING_MODE_ROUND_ROBIN		(0)
diff --git a/drivers/net/i40e/rte_pmd_i40e.h b/drivers/net/i40e/rte_pmd_i40e.h
index a802f989e9..5af7e2330f 100644
--- a/drivers/net/i40e/rte_pmd_i40e.h
+++ b/drivers/net/i40e/rte_pmd_i40e.h
@@ -14,14 +14,14 @@
  *
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_ethdev.h>
 #include <rte_ether.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Response sent back to i40e driver from user app after callback
  */
diff --git a/drivers/net/mlx5/mlx5_trace.h b/drivers/net/mlx5/mlx5_trace.h
index 888d96f60b..a8f0b372c8 100644
--- a/drivers/net/mlx5/mlx5_trace.h
+++ b/drivers/net/mlx5/mlx5_trace.h
@@ -11,14 +11,14 @@
  * API for mlx5 PMD trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <mlx5_prm.h>
 #include <rte_mbuf.h>
 #include <rte_trace_point.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* TX burst subroutines trace points. */
 RTE_TRACE_POINT_FP(
 	rte_pmd_mlx5_trace_tx_entry,
diff --git a/drivers/net/ring/rte_eth_ring.h b/drivers/net/ring/rte_eth_ring.h
index 59e074d0ad..98292c7b33 100644
--- a/drivers/net/ring/rte_eth_ring.h
+++ b/drivers/net/ring/rte_eth_ring.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_ETH_RING_H_
 #define _RTE_ETH_RING_H_
 
+#include <rte_ring.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring.h>
-
 /**
  * Create a new ethdev port from a set of rings
  *
diff --git a/drivers/net/vhost/rte_eth_vhost.h b/drivers/net/vhost/rte_eth_vhost.h
index 0e68b9f668..6ec59a7adc 100644
--- a/drivers/net/vhost/rte_eth_vhost.h
+++ b/drivers/net/vhost/rte_eth_vhost.h
@@ -5,15 +5,15 @@
 #ifndef _RTE_ETH_VHOST_H_
 #define _RTE_ETH_VHOST_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdbool.h>
 
 #include <rte_vhost.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Event description.
  */
diff --git a/drivers/raw/ifpga/afu_pmd_core.h b/drivers/raw/ifpga/afu_pmd_core.h
index a8f1afe343..abf9e491f7 100644
--- a/drivers/raw/ifpga/afu_pmd_core.h
+++ b/drivers/raw/ifpga/afu_pmd_core.h
@@ -5,10 +5,6 @@
 #ifndef AFU_PMD_CORE_H
 #define AFU_PMD_CORE_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -20,6 +16,10 @@ extern "C" {
 
 #include "ifpga_rawdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define AFU_RAWDEV_MAX_DRVS  32
 
 struct afu_rawdev;
diff --git a/drivers/raw/ifpga/afu_pmd_he_hssi.h b/drivers/raw/ifpga/afu_pmd_he_hssi.h
index aebbe32d54..282289d912 100644
--- a/drivers/raw/ifpga/afu_pmd_he_hssi.h
+++ b/drivers/raw/ifpga/afu_pmd_he_hssi.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_HE_HSSI_H
 #define AFU_PMD_HE_HSSI_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define HE_HSSI_UUID_L    0xbb370242ac130002
 #define HE_HSSI_UUID_H    0x823c334c98bf11ea
 #define NUM_HE_HSSI_PORTS 8
diff --git a/drivers/raw/ifpga/afu_pmd_he_lpbk.h b/drivers/raw/ifpga/afu_pmd_he_lpbk.h
index eab7b55199..67b3653c21 100644
--- a/drivers/raw/ifpga/afu_pmd_he_lpbk.h
+++ b/drivers/raw/ifpga/afu_pmd_he_lpbk.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_HE_LPBK_H
 #define AFU_PMD_HE_LPBK_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define HE_LPBK_UUID_L     0xb94b12284c31e02b
 #define HE_LPBK_UUID_H     0x56e203e9864f49a7
 #define HE_MEM_LPBK_UUID_L 0xbb652a578330a8eb
diff --git a/drivers/raw/ifpga/afu_pmd_he_mem.h b/drivers/raw/ifpga/afu_pmd_he_mem.h
index 998ca92416..41854d8c58 100644
--- a/drivers/raw/ifpga/afu_pmd_he_mem.h
+++ b/drivers/raw/ifpga/afu_pmd_he_mem.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_HE_MEM_H
 #define AFU_PMD_HE_MEM_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define HE_MEM_TG_UUID_L  0xa3dc5b831f5cecbb
 #define HE_MEM_TG_UUID_H  0x4dadea342c7848cb
 
diff --git a/drivers/raw/ifpga/afu_pmd_n3000.h b/drivers/raw/ifpga/afu_pmd_n3000.h
index 403cc64b91..f6b6e07c6b 100644
--- a/drivers/raw/ifpga/afu_pmd_n3000.h
+++ b/drivers/raw/ifpga/afu_pmd_n3000.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_N3000_H
 #define AFU_PMD_N3000_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define N3000_AFU_UUID_L  0xc000c9660d824272
 #define N3000_AFU_UUID_H  0x9aeffe5f84570612
 #define N3000_NLB0_UUID_L 0xf89e433683f9040b
diff --git a/drivers/raw/ifpga/rte_pmd_afu.h b/drivers/raw/ifpga/rte_pmd_afu.h
index 5403ed25f5..0edacc3a9c 100644
--- a/drivers/raw/ifpga/rte_pmd_afu.h
+++ b/drivers/raw/ifpga/rte_pmd_afu.h
@@ -14,12 +14,12 @@
  *
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 #define RTE_PMD_AFU_N3000_NLB   1
 #define RTE_PMD_AFU_N3000_DMA   2
 
diff --git a/drivers/raw/ifpga/rte_pmd_ifpga.h b/drivers/raw/ifpga/rte_pmd_ifpga.h
index 791543f2cd..36b7f9c018 100644
--- a/drivers/raw/ifpga/rte_pmd_ifpga.h
+++ b/drivers/raw/ifpga/rte_pmd_ifpga.h
@@ -14,12 +14,12 @@
  *
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 #define IFPGA_MAX_PORT_NUM   4
 
 /**
diff --git a/examples/ethtool/lib/rte_ethtool.h b/examples/ethtool/lib/rte_ethtool.h
index d27e0102b1..c7dd3d9755 100644
--- a/examples/ethtool/lib/rte_ethtool.h
+++ b/examples/ethtool/lib/rte_ethtool.h
@@ -30,14 +30,14 @@
  * rte_ethtool_net_set_rx_mode      net_device_ops::ndo_set_rx_mode
  *
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_ethdev.h>
 #include <linux/ethtool.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Retrieve the Ethernet device driver information according to
  * attributes described by ethtool data structure, ethtool_drvinfo.
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 04e77a4a10..ea66df0434 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -5,12 +5,12 @@
 #ifndef _MAIN_H_
 #define _MAIN_H_
 
+#include <rte_sched.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_sched.h>
-
 #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
 
 /*
diff --git a/examples/vm_power_manager/channel_manager.h b/examples/vm_power_manager/channel_manager.h
index eb989b20ad..6f70539815 100644
--- a/examples/vm_power_manager/channel_manager.h
+++ b/examples/vm_power_manager/channel_manager.h
@@ -5,16 +5,16 @@
 #ifndef CHANNEL_MANAGER_H_
 #define CHANNEL_MANAGER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <linux/limits.h>
 #include <linux/un.h>
 #include <stdbool.h>
 
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Maximum name length including '\0' terminator */
 #define CHANNEL_MGR_MAX_NAME_LEN    64
 
diff --git a/lib/acl/rte_acl_osdep.h b/lib/acl/rte_acl_osdep.h
index 3c1dc402ca..e4c7d07c69 100644
--- a/lib/acl/rte_acl_osdep.h
+++ b/lib/acl/rte_acl_osdep.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_ACL_OSDEP_H_
 #define _RTE_ACL_OSDEP_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -49,6 +45,10 @@ extern "C" {
 #include <rte_cpuflags.h>
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h
index 0cbfdd1c95..9e83dd2bb0 100644
--- a/lib/bbdev/rte_bbdev.h
+++ b/lib/bbdev/rte_bbdev.h
@@ -20,10 +20,6 @@
  * from the same queue.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdbool.h>
 
@@ -32,6 +28,10 @@ extern "C" {
 
 #include "rte_bbdev_op.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_BBDEV_MAX_DEVS
 #define RTE_BBDEV_MAX_DEVS 128  /**< Max number of devices */
 #endif
diff --git a/lib/bbdev/rte_bbdev_op.h b/lib/bbdev/rte_bbdev_op.h
index 459631d0d0..6f4bae7d0f 100644
--- a/lib/bbdev/rte_bbdev_op.h
+++ b/lib/bbdev/rte_bbdev_op.h
@@ -11,10 +11,6 @@
  * Defines wireless base band layer 1 operations and capabilities
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -23,6 +19,10 @@ extern "C" {
 #include <rte_memory.h>
 #include <rte_mempool.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Number of columns in sub-block interleaver (36.212, section 5.1.4.1.1) */
 #define RTE_BBDEV_TURBO_C_SUBBLOCK (32)
 /* Maximum size of Transport Block (36.213, Table, Table 7.1.7.2.5-1) */
diff --git a/lib/bbdev/rte_bbdev_pmd.h b/lib/bbdev/rte_bbdev_pmd.h
index 442b23943d..0a1738fc05 100644
--- a/lib/bbdev/rte_bbdev_pmd.h
+++ b/lib/bbdev/rte_bbdev_pmd.h
@@ -14,15 +14,15 @@
  * bbdev interface. User applications should not use this API.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_log.h>
 
 #include "rte_bbdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Suggested value for SW based devices */
 #define RTE_BBDEV_DEFAULT_MAX_NB_QUEUES RTE_MAX_LCORE
 
diff --git a/lib/bpf/bpf_def.h b/lib/bpf/bpf_def.h
index f08cd9106b..9f2e162914 100644
--- a/lib/bpf/bpf_def.h
+++ b/lib/bpf/bpf_def.h
@@ -7,10 +7,6 @@
 #ifndef _RTE_BPF_DEF_H_
 #define _RTE_BPF_DEF_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -25,6 +21,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 
 /*
  * The instruction encodings.
diff --git a/lib/compressdev/rte_comp.h b/lib/compressdev/rte_comp.h
index 830a240b6b..d66a4b1cb9 100644
--- a/lib/compressdev/rte_comp.h
+++ b/lib/compressdev/rte_comp.h
@@ -11,12 +11,12 @@
  * RTE definitions for Data Compression Service
  */
 
+#include <rte_mbuf.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_mbuf.h>
-
 /**
  * compression service feature flags
  *
diff --git a/lib/compressdev/rte_compressdev.h b/lib/compressdev/rte_compressdev.h
index e0294a18bd..b3392553a6 100644
--- a/lib/compressdev/rte_compressdev.h
+++ b/lib/compressdev/rte_compressdev.h
@@ -13,13 +13,13 @@
  * Defines comp device APIs for the provisioning of compression operations.
  */
 
+
+#include "rte_comp.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-
-#include "rte_comp.h"
-
 /**
  * Parameter log base 2 range description.
  * Final value will be 2^value.
diff --git a/lib/compressdev/rte_compressdev_internal.h b/lib/compressdev/rte_compressdev_internal.h
index 67f8b51a37..a980d74cbf 100644
--- a/lib/compressdev/rte_compressdev_internal.h
+++ b/lib/compressdev/rte_compressdev_internal.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_COMPRESSDEV_INTERNAL_H_
 #define _RTE_COMPRESSDEV_INTERNAL_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /* rte_compressdev_internal.h
  * This file holds Compressdev private data structures.
  */
@@ -16,6 +12,10 @@ extern "C" {
 
 #include "rte_comp.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_COMPRESSDEV_NAME_MAX_LEN	(64)
 /**< Max length of name of comp PMD */
 
diff --git a/lib/compressdev/rte_compressdev_pmd.h b/lib/compressdev/rte_compressdev_pmd.h
index 32e29c9d16..ea721f014d 100644
--- a/lib/compressdev/rte_compressdev_pmd.h
+++ b/lib/compressdev/rte_compressdev_pmd.h
@@ -13,10 +13,6 @@
  * them directly.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <dev_driver.h>
@@ -24,6 +20,10 @@ extern "C" {
 #include "rte_compressdev.h"
 #include "rte_compressdev_internal.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_COMPRESSDEV_PMD_NAME_ARG			("name")
 #define RTE_COMPRESSDEV_PMD_SOCKET_ID_ARG		("socket_id")
 
diff --git a/lib/cryptodev/cryptodev_pmd.h b/lib/cryptodev/cryptodev_pmd.h
index 6c114f7181..3e2e2673b8 100644
--- a/lib/cryptodev/cryptodev_pmd.h
+++ b/lib/cryptodev/cryptodev_pmd.h
@@ -5,10 +5,6 @@
 #ifndef _CRYPTODEV_PMD_H_
 #define _CRYPTODEV_PMD_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Crypto PMD APIs
  *
@@ -28,6 +24,10 @@ extern "C" {
 #include "rte_crypto.h"
 #include "rte_cryptodev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 
 #define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_QUEUE_PAIRS	8
 
diff --git a/lib/cryptodev/cryptodev_trace.h b/lib/cryptodev/cryptodev_trace.h
index 935f0d564b..e186f0f3c1 100644
--- a/lib/cryptodev/cryptodev_trace.h
+++ b/lib/cryptodev/cryptodev_trace.h
@@ -11,14 +11,14 @@
  * API for cryptodev trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_trace_point.h>
 
 #include "rte_cryptodev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_cryptodev_trace_configure,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id,
diff --git a/lib/cryptodev/rte_crypto.h b/lib/cryptodev/rte_crypto.h
index dbc2700da5..dcf4a36fb2 100644
--- a/lib/cryptodev/rte_crypto.h
+++ b/lib/cryptodev/rte_crypto.h
@@ -11,10 +11,6 @@
  * RTE Cryptography Common Definitions
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 
 #include <rte_mbuf.h>
 #include <rte_memory.h>
@@ -24,6 +20,10 @@ extern "C" {
 #include "rte_crypto_sym.h"
 #include "rte_crypto_asym.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Crypto operation types */
 enum rte_crypto_op_type {
 	RTE_CRYPTO_OP_TYPE_UNDEFINED,
diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h
index 39d3da3952..4b7ea36961 100644
--- a/lib/cryptodev/rte_crypto_asym.h
+++ b/lib/cryptodev/rte_crypto_asym.h
@@ -14,10 +14,6 @@
  * asymmetric crypto operations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 #include <stdint.h>
 
@@ -27,6 +23,10 @@ extern "C" {
 
 #include "rte_crypto_sym.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_cryptodev_asym_session;
 
 /** asym key exchange operation type name strings */
diff --git a/lib/cryptodev/rte_crypto_sym.h b/lib/cryptodev/rte_crypto_sym.h
index 53b18b9412..fb73024010 100644
--- a/lib/cryptodev/rte_crypto_sym.h
+++ b/lib/cryptodev/rte_crypto_sym.h
@@ -14,10 +14,6 @@
  * as supported symmetric crypto operation combinations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <rte_compat.h>
@@ -26,6 +22,10 @@ extern "C" {
 #include <rte_mempool.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Crypto IO Vector (in analogy with struct iovec)
  * Supposed be used to pass input/output data buffers for crypto data-path
diff --git a/lib/cryptodev/rte_cryptodev.h b/lib/cryptodev/rte_cryptodev.h
index bec947f6d5..8051c5a6a3 100644
--- a/lib/cryptodev/rte_cryptodev.h
+++ b/lib/cryptodev/rte_cryptodev.h
@@ -14,10 +14,6 @@
  * authentication operations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include "rte_kvargs.h"
 #include "rte_crypto.h"
@@ -1859,6 +1855,10 @@ int rte_cryptodev_remove_deq_callback(uint8_t dev_id,
 				      struct rte_cryptodev_cb *cb);
 
 #include <rte_cryptodev_core.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 /**
  *
  * Dequeue a burst of processed crypto operations from a queue on the crypto
diff --git a/lib/cryptodev/rte_cryptodev_trace_fp.h b/lib/cryptodev/rte_cryptodev_trace_fp.h
index dbfbc7b2e5..f23f882804 100644
--- a/lib/cryptodev/rte_cryptodev_trace_fp.h
+++ b/lib/cryptodev/rte_cryptodev_trace_fp.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_CRYPTODEV_TRACE_FP_H_
 #define _RTE_CRYPTODEV_TRACE_FP_H_
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_cryptodev_trace_enqueue_burst,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id, uint16_t qp_id, void **ops,
diff --git a/lib/dispatcher/rte_dispatcher.h b/lib/dispatcher/rte_dispatcher.h
index d8182d5f2c..ba2c353073 100644
--- a/lib/dispatcher/rte_dispatcher.h
+++ b/lib/dispatcher/rte_dispatcher.h
@@ -19,16 +19,16 @@
  * event device.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdint.h>
 
 #include <rte_compat.h>
 #include <rte_eventdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Function prototype for match callbacks.
  *
diff --git a/lib/dmadev/rte_dmadev.h b/lib/dmadev/rte_dmadev.h
index 5474a5281d..d174d325a1 100644
--- a/lib/dmadev/rte_dmadev.h
+++ b/lib/dmadev/rte_dmadev.h
@@ -772,9 +772,17 @@ struct rte_dma_sge {
 	uint32_t length; /**< The DMA operation length. */
 };
 
+#ifdef __cplusplus
+}
+#endif
+
 #include "rte_dmadev_core.h"
 #include "rte_dmadev_trace_fp.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**@{@name DMA operation flag
  * @see rte_dma_copy()
  * @see rte_dma_copy_sg()
diff --git a/lib/eal/arm/include/rte_atomic_32.h b/lib/eal/arm/include/rte_atomic_32.h
index 62fc33773d..0b9a0dfa30 100644
--- a/lib/eal/arm/include/rte_atomic_32.h
+++ b/lib/eal/arm/include/rte_atomic_32.h
@@ -9,12 +9,12 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include "generic/rte_atomic.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_atomic.h"
-
 #define	rte_mb()  __sync_synchronize()
 
 #define	rte_wmb() do { asm volatile ("dmb st" : : : "memory"); } while (0)
diff --git a/lib/eal/arm/include/rte_atomic_64.h b/lib/eal/arm/include/rte_atomic_64.h
index 7c99fc0a02..181bb60929 100644
--- a/lib/eal/arm/include/rte_atomic_64.h
+++ b/lib/eal/arm/include/rte_atomic_64.h
@@ -10,14 +10,14 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_atomic.h"
 #include <rte_branch_prediction.h>
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define rte_mb() asm volatile("dmb osh" : : : "memory")
 
 #define rte_wmb() asm volatile("dmb oshst" : : : "memory")
diff --git a/lib/eal/arm/include/rte_byteorder.h b/lib/eal/arm/include/rte_byteorder.h
index ff02052f2e..a0aaff4a28 100644
--- a/lib/eal/arm/include/rte_byteorder.h
+++ b/lib/eal/arm/include/rte_byteorder.h
@@ -9,14 +9,14 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include "generic/rte_byteorder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* ARM architecture is bi-endian (both big and little). */
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 
diff --git a/lib/eal/arm/include/rte_cpuflags_32.h b/lib/eal/arm/include/rte_cpuflags_32.h
index 770b09b99d..7e33acd9fb 100644
--- a/lib/eal/arm/include/rte_cpuflags_32.h
+++ b/lib/eal/arm/include/rte_cpuflags_32.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_CPUFLAGS_ARM32_H_
 #define _RTE_CPUFLAGS_ARM32_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -46,6 +42,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_cpuflags_64.h b/lib/eal/arm/include/rte_cpuflags_64.h
index afe70209c3..f84633159e 100644
--- a/lib/eal/arm/include/rte_cpuflags_64.h
+++ b/lib/eal/arm/include/rte_cpuflags_64.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_CPUFLAGS_ARM64_H_
 #define _RTE_CPUFLAGS_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -40,6 +36,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_cycles_32.h b/lib/eal/arm/include/rte_cycles_32.h
index 859cd2e5bb..2b20c8c6f5 100644
--- a/lib/eal/arm/include/rte_cycles_32.h
+++ b/lib/eal/arm/include/rte_cycles_32.h
@@ -15,12 +15,12 @@
 
 #include <time.h>
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 /**
  * Read the time base register.
  *
diff --git a/lib/eal/arm/include/rte_cycles_64.h b/lib/eal/arm/include/rte_cycles_64.h
index 8b05302f47..bb76e4d7e0 100644
--- a/lib/eal/arm/include/rte_cycles_64.h
+++ b/lib/eal/arm/include/rte_cycles_64.h
@@ -6,12 +6,12 @@
 #ifndef _RTE_CYCLES_ARM64_H_
 #define _RTE_CYCLES_ARM64_H_
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 /** Read generic counter frequency */
 static __rte_always_inline uint64_t
 __rte_arm64_cntfrq(void)
diff --git a/lib/eal/arm/include/rte_io.h b/lib/eal/arm/include/rte_io.h
index f4e66e6bad..ca1a353bed 100644
--- a/lib/eal/arm/include/rte_io.h
+++ b/lib/eal/arm/include/rte_io.h
@@ -5,16 +5,16 @@
 #ifndef _RTE_IO_ARM_H_
 #define _RTE_IO_ARM_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_ARCH_64
 #include "rte_io_64.h"
 #else
 #include "generic/rte_io.h"
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_io_64.h b/lib/eal/arm/include/rte_io_64.h
index 96da7789ce..88db82a7eb 100644
--- a/lib/eal/arm/include/rte_io_64.h
+++ b/lib/eal/arm/include/rte_io_64.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_IO_ARM64_H_
 #define _RTE_IO_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #define RTE_OVERRIDE_IO_H
@@ -17,6 +13,10 @@ extern "C" {
 #include <rte_compat.h>
 #include "rte_atomic_64.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static __rte_always_inline uint8_t
 rte_read8_relaxed(const volatile void *addr)
 {
diff --git a/lib/eal/arm/include/rte_memcpy_32.h b/lib/eal/arm/include/rte_memcpy_32.h
index fb3245b59c..99fd5757ca 100644
--- a/lib/eal/arm/include/rte_memcpy_32.h
+++ b/lib/eal/arm/include/rte_memcpy_32.h
@@ -8,10 +8,6 @@
 #include <stdint.h>
 #include <string.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_memcpy.h"
 
 #ifdef RTE_ARCH_ARM_NEON_MEMCPY
@@ -23,6 +19,10 @@ extern "C" {
 /* ARM NEON Intrinsics are used to copy data */
 #include <arm_neon.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
diff --git a/lib/eal/arm/include/rte_memcpy_64.h b/lib/eal/arm/include/rte_memcpy_64.h
index 85ad587bd3..90039039be 100644
--- a/lib/eal/arm/include/rte_memcpy_64.h
+++ b/lib/eal/arm/include/rte_memcpy_64.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_MEMCPY_ARM64_H_
 #define _RTE_MEMCPY_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <string.h>
 
@@ -18,6 +14,10 @@ extern "C" {
 #include <rte_common.h>
 #include <rte_branch_prediction.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * The memory copy performance differs on different AArch64 micro-architectures.
  * And the most recent glibc (e.g. 2.23 or later) can provide a better memcpy()
@@ -324,7 +324,16 @@ void *rte_memcpy(void *dst, const void *src, size_t n)
 }
 #endif /* RTE_CACHE_LINE_SIZE >= 128 */
 
-#else
+#ifdef __cplusplus
+}
+#endif
+
+#else /* RTE_ARCH_ARM64_MEMCPY */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
@@ -363,10 +372,10 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
 
 #define rte_memcpy(d, s, n)	memcpy((d), (s), (n))
 
-#endif /* RTE_ARCH_ARM64_MEMCPY */
-
 #ifdef __cplusplus
 }
 #endif
 
+#endif /* RTE_ARCH_ARM64_MEMCPY */
+
 #endif /* _RTE_MEMCPY_ARM_64_H_ */
diff --git a/lib/eal/arm/include/rte_pause.h b/lib/eal/arm/include/rte_pause.h
index 6c7002ad98..b8a3d64b3a 100644
--- a/lib/eal/arm/include/rte_pause.h
+++ b/lib/eal/arm/include/rte_pause.h
@@ -5,16 +5,16 @@
 #ifndef _RTE_PAUSE_ARM_H_
 #define _RTE_PAUSE_ARM_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_ARCH_64
 #include <rte_pause_64.h>
 #else
 #include <rte_pause_32.h>
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_pause_32.h b/lib/eal/arm/include/rte_pause_32.h
index d4768c7a98..7870fac763 100644
--- a/lib/eal/arm/include/rte_pause_32.h
+++ b/lib/eal/arm/include/rte_pause_32.h
@@ -5,13 +5,13 @@
 #ifndef _RTE_PAUSE_ARM32_H_
 #define _RTE_PAUSE_ARM32_H_
 
+#include <rte_common.h>
+#include "generic/rte_pause.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_pause.h"
-
 static inline void rte_pause(void)
 {
 }
diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index 9e2dbf3531..1526bf87cc 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -6,10 +6,6 @@
 #ifndef _RTE_PAUSE_ARM64_H_
 #define _RTE_PAUSE_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_stdatomic.h>
 
@@ -19,6 +15,10 @@ extern "C" {
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 	asm volatile("yield" ::: "memory");
diff --git a/lib/eal/arm/include/rte_power_intrinsics.h b/lib/eal/arm/include/rte_power_intrinsics.h
index 9e498e9ebf..5481f45ad3 100644
--- a/lib/eal/arm/include/rte_power_intrinsics.h
+++ b/lib/eal/arm/include/rte_power_intrinsics.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_POWER_INTRINSIC_ARM_H_
 #define _RTE_POWER_INTRINSIC_ARM_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/arm/include/rte_prefetch_32.h b/lib/eal/arm/include/rte_prefetch_32.h
index 0e9a140c8a..619bf27c79 100644
--- a/lib/eal/arm/include/rte_prefetch_32.h
+++ b/lib/eal/arm/include/rte_prefetch_32.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_PREFETCH_ARM32_H_
 #define _RTE_PREFETCH_ARM32_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	asm volatile ("pld [%0]" : : "r" (p));
diff --git a/lib/eal/arm/include/rte_prefetch_64.h b/lib/eal/arm/include/rte_prefetch_64.h
index 22cba48e29..4f60123b8b 100644
--- a/lib/eal/arm/include/rte_prefetch_64.h
+++ b/lib/eal/arm/include/rte_prefetch_64.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_PREFETCH_ARM_64_H_
 #define _RTE_PREFETCH_ARM_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	asm volatile ("PRFM PLDL1KEEP, [%0]" : : "r" (p));
diff --git a/lib/eal/arm/include/rte_rwlock.h b/lib/eal/arm/include/rte_rwlock.h
index 18bb37b036..727cabafec 100644
--- a/lib/eal/arm/include/rte_rwlock.h
+++ b/lib/eal/arm/include/rte_rwlock.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_RWLOCK_ARM_H_
 #define _RTE_RWLOCK_ARM_H_
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/arm/include/rte_spinlock.h b/lib/eal/arm/include/rte_spinlock.h
index a973763c23..a5d01b0d21 100644
--- a/lib/eal/arm/include/rte_spinlock.h
+++ b/lib/eal/arm/include/rte_spinlock.h
@@ -9,13 +9,13 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_spinlock.h"
-
 static inline int rte_tm_supported(void)
 {
 	return 0;
diff --git a/lib/eal/freebsd/include/rte_os.h b/lib/eal/freebsd/include/rte_os.h
index 003468caff..f31f6af12d 100644
--- a/lib/eal/freebsd/include/rte_os.h
+++ b/lib/eal/freebsd/include/rte_os.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_OS_H_
 #define _RTE_OS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * This header should contain any definition
  * which is not supported natively or named differently in FreeBSD.
@@ -17,6 +13,10 @@ extern "C" {
 #include <pthread_np.h>
 #include <sys/queue.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* These macros are compatible with system's sys/queue.h. */
 #define RTE_TAILQ_HEAD(name, type) TAILQ_HEAD(name, type)
 #define RTE_TAILQ_ENTRY(type) TAILQ_ENTRY(type)
diff --git a/lib/eal/include/bus_driver.h b/lib/eal/include/bus_driver.h
index 7b85a17a09..60527b75b6 100644
--- a/lib/eal/include/bus_driver.h
+++ b/lib/eal/include/bus_driver.h
@@ -5,16 +5,16 @@
 #ifndef BUS_DRIVER_H
 #define BUS_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus.h>
 #include <rte_compat.h>
 #include <rte_dev.h>
 #include <rte_eal.h>
 #include <rte_tailq.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_devargs;
 struct rte_device;
 
diff --git a/lib/eal/include/dev_driver.h b/lib/eal/include/dev_driver.h
index 5efa8c437e..f7a9c17dc3 100644
--- a/lib/eal/include/dev_driver.h
+++ b/lib/eal/include/dev_driver.h
@@ -5,13 +5,13 @@
 #ifndef DEV_DRIVER_H
 #define DEV_DRIVER_H
 
+#include <rte_common.h>
+#include <rte_dev.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_dev.h>
-
 /**
  * A structure describing a device driver.
  */
diff --git a/lib/eal/include/eal_trace_internal.h b/lib/eal/include/eal_trace_internal.h
index 09c354717f..50f91d0929 100644
--- a/lib/eal/include/eal_trace_internal.h
+++ b/lib/eal/include/eal_trace_internal.h
@@ -11,16 +11,16 @@
  * API for EAL trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_alarm.h>
 #include <rte_interrupts.h>
 #include <rte_trace_point.h>
 
 #include "eal_interrupts.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Alarm */
 RTE_TRACE_POINT(
 	rte_eal_trace_alarm_set,
diff --git a/lib/eal/include/generic/rte_atomic.h b/lib/eal/include/generic/rte_atomic.h
index f859707744..0a4f3f8528 100644
--- a/lib/eal/include/generic/rte_atomic.h
+++ b/lib/eal/include/generic/rte_atomic.h
@@ -17,6 +17,10 @@
 #include <rte_common.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __DOXYGEN__
 
 /** @name Memory Barrier
@@ -1156,4 +1160,8 @@ rte_atomic128_cmp_exchange(rte_int128_t *dst,
 
 #endif /* __DOXYGEN__ */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_ATOMIC_H_ */
diff --git a/lib/eal/include/generic/rte_byteorder.h b/lib/eal/include/generic/rte_byteorder.h
index f1c04ba83e..7973d6326f 100644
--- a/lib/eal/include/generic/rte_byteorder.h
+++ b/lib/eal/include/generic/rte_byteorder.h
@@ -24,6 +24,10 @@
 #include <rte_common.h>
 #include <rte_config.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Compile-time endianness detection
  */
@@ -251,4 +255,8 @@ static uint64_t rte_be_to_cpu_64(rte_be64_t x);
 #endif
 #endif
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_BYTEORDER_H_ */
diff --git a/lib/eal/include/generic/rte_cpuflags.h b/lib/eal/include/generic/rte_cpuflags.h
index d35551e931..bfe9df4516 100644
--- a/lib/eal/include/generic/rte_cpuflags.h
+++ b/lib/eal/include/generic/rte_cpuflags.h
@@ -15,6 +15,10 @@
 
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Structure used to describe platform-specific intrinsics that may or may not
  * be supported at runtime.
@@ -104,4 +108,8 @@ rte_cpu_getauxval(unsigned long type);
 int
 rte_cpu_strcmp_auxval(unsigned long type, const char *str);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_CPUFLAGS_H_ */
diff --git a/lib/eal/include/generic/rte_cycles.h b/lib/eal/include/generic/rte_cycles.h
index 075e899f5a..7cfd51f0eb 100644
--- a/lib/eal/include/generic/rte_cycles.h
+++ b/lib/eal/include/generic/rte_cycles.h
@@ -16,6 +16,10 @@
 #include <rte_debug.h>
 #include <rte_atomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MS_PER_S 1000
 #define US_PER_S 1000000
 #define NS_PER_S 1000000000
@@ -175,4 +179,8 @@ void rte_delay_us_sleep(unsigned int us);
  */
 void rte_delay_us_callback_register(void(*userfunc)(unsigned int));
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_CYCLES_H_ */
diff --git a/lib/eal/include/generic/rte_io.h b/lib/eal/include/generic/rte_io.h
index ebcf8051e1..73b0f7a9f4 100644
--- a/lib/eal/include/generic/rte_io.h
+++ b/lib/eal/include/generic/rte_io.h
@@ -17,6 +17,10 @@
 #include <rte_compat.h>
 #include <rte_atomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __DOXYGEN__
 
 /**
@@ -396,4 +400,8 @@ rte_write32_wc_relaxed(uint32_t value, volatile void *addr)
 
 #endif /* RTE_OVERRIDE_IO_H */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_IO_H_ */
diff --git a/lib/eal/include/generic/rte_memcpy.h b/lib/eal/include/generic/rte_memcpy.h
index e7f0f8eaa9..da53b72ca8 100644
--- a/lib/eal/include/generic/rte_memcpy.h
+++ b/lib/eal/include/generic/rte_memcpy.h
@@ -5,6 +5,10 @@
 #ifndef _RTE_MEMCPY_H_
 #define _RTE_MEMCPY_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  *
@@ -113,4 +117,8 @@ rte_memcpy(void *dst, const void *src, size_t n);
 
 #endif /* __DOXYGEN__ */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_MEMCPY_H_ */
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index f2a1eadcbd..968c0886d3 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -19,6 +19,10 @@
 #include <rte_atomic.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Pause CPU execution for a short while
  *
@@ -136,4 +140,8 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 } while (0)
 #endif /* ! RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_PAUSE_H_ */
diff --git a/lib/eal/include/generic/rte_power_intrinsics.h b/lib/eal/include/generic/rte_power_intrinsics.h
index ea899f1bfa..86c0559468 100644
--- a/lib/eal/include/generic/rte_power_intrinsics.h
+++ b/lib/eal/include/generic/rte_power_intrinsics.h
@@ -9,6 +9,10 @@
 
 #include <rte_spinlock.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  * Advanced power management operations.
@@ -147,4 +151,8 @@ int rte_power_pause(const uint64_t tsc_timestamp);
 int rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
 		const uint32_t num, const uint64_t tsc_timestamp);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_POWER_INTRINSIC_H_ */
diff --git a/lib/eal/include/generic/rte_prefetch.h b/lib/eal/include/generic/rte_prefetch.h
index 773b3b8d1e..f7ac4ab48a 100644
--- a/lib/eal/include/generic/rte_prefetch.h
+++ b/lib/eal/include/generic/rte_prefetch.h
@@ -7,6 +7,10 @@
 
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  *
@@ -146,4 +150,8 @@ __rte_experimental
 static inline void
 rte_cldemote(const volatile void *p);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_PREFETCH_H_ */
diff --git a/lib/eal/include/generic/rte_rwlock.h b/lib/eal/include/generic/rte_rwlock.h
index 5f939be98c..ac0474466a 100644
--- a/lib/eal/include/generic/rte_rwlock.h
+++ b/lib/eal/include/generic/rte_rwlock.h
@@ -22,10 +22,6 @@
  *  https://locklessinc.com/articles/locks/
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <errno.h>
 
 #include <rte_branch_prediction.h>
@@ -34,6 +30,10 @@ extern "C" {
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_rwlock_t type.
  *
diff --git a/lib/eal/include/generic/rte_spinlock.h b/lib/eal/include/generic/rte_spinlock.h
index 23fb04896f..c2980601b2 100644
--- a/lib/eal/include/generic/rte_spinlock.h
+++ b/lib/eal/include/generic/rte_spinlock.h
@@ -25,6 +25,10 @@
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_spinlock_t type.
  */
@@ -318,4 +322,8 @@ __rte_warn_unused_result
 static inline int rte_spinlock_recursive_trylock_tm(
 	rte_spinlock_recursive_t *slr);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_SPINLOCK_H_ */
diff --git a/lib/eal/include/generic/rte_vect.h b/lib/eal/include/generic/rte_vect.h
index 1f84292a41..b87520a4d9 100644
--- a/lib/eal/include/generic/rte_vect.h
+++ b/lib/eal/include/generic/rte_vect.h
@@ -209,6 +209,10 @@ enum rte_vect_max_simd {
 	 */
 };
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Get the supported SIMD bitwidth.
  *
@@ -230,4 +234,8 @@ uint16_t rte_vect_get_max_simd_bitwidth(void);
  */
 int rte_vect_set_max_simd_bitwidth(uint16_t bitwidth);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_VECT_H_ */
diff --git a/lib/eal/include/rte_alarm.h b/lib/eal/include/rte_alarm.h
index 7e4d0b2407..9b4721b77f 100644
--- a/lib/eal/include/rte_alarm.h
+++ b/lib/eal/include/rte_alarm.h
@@ -14,12 +14,12 @@
  * Does not require hpet support.
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /**
  * Signature of callback back function called when an alarm goes off.
  */
diff --git a/lib/eal/include/rte_bitmap.h b/lib/eal/include/rte_bitmap.h
index ebe46000a0..abb102f1d3 100644
--- a/lib/eal/include/rte_bitmap.h
+++ b/lib/eal/include/rte_bitmap.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_BITMAP_H__
 #define __INCLUDE_RTE_BITMAP_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Bitmap
@@ -43,6 +39,10 @@ extern "C" {
 #include <rte_branch_prediction.h>
 #include <rte_prefetch.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Slab */
 #define RTE_BITMAP_SLAB_BIT_SIZE                 64
 #define RTE_BITMAP_SLAB_BIT_SIZE_LOG2            6
diff --git a/lib/eal/include/rte_bus.h b/lib/eal/include/rte_bus.h
index dfe756fb11..519f7b35f0 100644
--- a/lib/eal/include/rte_bus.h
+++ b/lib/eal/include/rte_bus.h
@@ -14,14 +14,14 @@
  * over the devices and drivers in EAL.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_eal.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_bus;
 struct rte_device;
 
diff --git a/lib/eal/include/rte_class.h b/lib/eal/include/rte_class.h
index 16e544ec9a..7631e36e82 100644
--- a/lib/eal/include/rte_class.h
+++ b/lib/eal/include/rte_class.h
@@ -18,12 +18,12 @@
  * cryptographic co-processor (crypto), etc.
  */
 
+#include <rte_dev.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_dev.h>
-
 /** Double linked list of classes */
 RTE_TAILQ_HEAD(rte_class_list, rte_class);
 
diff --git a/lib/eal/include/rte_common.h b/lib/eal/include/rte_common.h
index eec0400dad..2486caa471 100644
--- a/lib/eal/include/rte_common.h
+++ b/lib/eal/include/rte_common.h
@@ -12,10 +12,6 @@
  * for DPDK.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <assert.h>
 #include <limits.h>
 #include <stdint.h>
@@ -26,6 +22,10 @@ extern "C" {
 /* OS specific include */
 #include <rte_os.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_TOOLCHAIN_MSVC
 #ifndef typeof
 #define typeof __typeof__
diff --git a/lib/eal/include/rte_dev.h b/lib/eal/include/rte_dev.h
index cefa04f905..738400e8d1 100644
--- a/lib/eal/include/rte_dev.h
+++ b/lib/eal/include/rte_dev.h
@@ -13,16 +13,16 @@
  * This file manages the list of device drivers.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_config.h>
 #include <rte_common.h>
 #include <rte_log.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_bus;
 struct rte_devargs;
 struct rte_device;
diff --git a/lib/eal/include/rte_devargs.h b/lib/eal/include/rte_devargs.h
index 515e978bbe..ed5a4675d9 100644
--- a/lib/eal/include/rte_devargs.h
+++ b/lib/eal/include/rte_devargs.h
@@ -16,14 +16,14 @@
  * list of rte_devargs structures.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_dev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_bus;
 
 /**
diff --git a/lib/eal/include/rte_eal_trace.h b/lib/eal/include/rte_eal_trace.h
index c3d15bbe5e..9ad2112801 100644
--- a/lib/eal/include/rte_eal_trace.h
+++ b/lib/eal/include/rte_eal_trace.h
@@ -11,12 +11,12 @@
  * API for EAL trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 /* Generic */
 RTE_TRACE_POINT(
 	rte_eal_trace_generic_void,
diff --git a/lib/eal/include/rte_errno.h b/lib/eal/include/rte_errno.h
index ba45591d24..c49818a40e 100644
--- a/lib/eal/include/rte_errno.h
+++ b/lib/eal/include/rte_errno.h
@@ -11,12 +11,12 @@
 #ifndef _RTE_ERRNO_H_
 #define _RTE_ERRNO_H_
 
+#include <rte_per_lcore.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_per_lcore.h>
-
 RTE_DECLARE_PER_LCORE(int, _rte_errno); /**< Per core error number. */
 
 /**
diff --git a/lib/eal/include/rte_fbarray.h b/lib/eal/include/rte_fbarray.h
index e33076778f..27dbfc2d6c 100644
--- a/lib/eal/include/rte_fbarray.h
+++ b/lib/eal/include/rte_fbarray.h
@@ -30,14 +30,14 @@
  * another process is using ``rte_fbarray``.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_rwlock.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_FBARRAY_NAME_LEN 64
 
 struct rte_fbarray {
diff --git a/lib/eal/include/rte_keepalive.h b/lib/eal/include/rte_keepalive.h
index 3ec413da01..9ff870f6b4 100644
--- a/lib/eal/include/rte_keepalive.h
+++ b/lib/eal/include/rte_keepalive.h
@@ -10,13 +10,13 @@
 #ifndef _KEEPALIVE_H_
 #define _KEEPALIVE_H_
 
+#include <rte_config.h>
+#include <rte_memory.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_config.h>
-#include <rte_memory.h>
-
 #ifndef RTE_KEEPALIVE_MAXCORES
 /**
  * Number of cores to track.
diff --git a/lib/eal/include/rte_mcslock.h b/lib/eal/include/rte_mcslock.h
index 0aeb1a09f4..bb218d2e50 100644
--- a/lib/eal/include/rte_mcslock.h
+++ b/lib/eal/include/rte_mcslock.h
@@ -19,16 +19,16 @@
  * they acquired the lock.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_lcore.h>
 #include <rte_common.h>
 #include <rte_pause.h>
 #include <rte_branch_prediction.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_mcslock_t type.
  */
diff --git a/lib/eal/include/rte_memory.h b/lib/eal/include/rte_memory.h
index 842362d527..dbd0a6bedc 100644
--- a/lib/eal/include/rte_memory.h
+++ b/lib/eal/include/rte_memory.h
@@ -15,16 +15,16 @@
 #include <stddef.h>
 #include <stdio.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bitops.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include <rte_eal_memconfig.h>
 #include <rte_fbarray.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_PGSIZE_4K   (1ULL << 12)
 #define RTE_PGSIZE_64K  (1ULL << 16)
 #define RTE_PGSIZE_256K (1ULL << 18)
diff --git a/lib/eal/include/rte_pci_dev_features.h b/lib/eal/include/rte_pci_dev_features.h
index ee6e10590c..bc6d3d4c1f 100644
--- a/lib/eal/include/rte_pci_dev_features.h
+++ b/lib/eal/include/rte_pci_dev_features.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_PCI_DEV_FEATURES_H
 #define _RTE_PCI_DEV_FEATURES_H
 
+#include <rte_pci_dev_feature_defs.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_pci_dev_feature_defs.h>
-
 #define RTE_INTR_MODE_NONE_NAME "none"
 #define RTE_INTR_MODE_LEGACY_NAME "legacy"
 #define RTE_INTR_MODE_MSI_NAME "msi"
diff --git a/lib/eal/include/rte_pflock.h b/lib/eal/include/rte_pflock.h
index 37aa223ac3..6797ce5920 100644
--- a/lib/eal/include/rte_pflock.h
+++ b/lib/eal/include/rte_pflock.h
@@ -27,14 +27,14 @@
  * All locks must be initialised before use, and only initialised once.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_pflock_t type.
  */
diff --git a/lib/eal/include/rte_random.h b/lib/eal/include/rte_random.h
index 5031c6fe5f..15cbe6215a 100644
--- a/lib/eal/include/rte_random.h
+++ b/lib/eal/include/rte_random.h
@@ -11,12 +11,12 @@
  * Pseudo-random Generators in RTE
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /**
  * Seed the pseudo-random generator.
  *
diff --git a/lib/eal/include/rte_seqcount.h b/lib/eal/include/rte_seqcount.h
index 88a6746900..d71afa6ab7 100644
--- a/lib/eal/include/rte_seqcount.h
+++ b/lib/eal/include/rte_seqcount.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_SEQCOUNT_H_
 #define _RTE_SEQCOUNT_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Seqcount
@@ -27,6 +23,10 @@ extern "C" {
 #include <rte_branch_prediction.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The RTE seqcount type.
  */
diff --git a/lib/eal/include/rte_seqlock.h b/lib/eal/include/rte_seqlock.h
index 2677bd9440..e0e94900d1 100644
--- a/lib/eal/include/rte_seqlock.h
+++ b/lib/eal/include/rte_seqlock.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_SEQLOCK_H_
 #define _RTE_SEQLOCK_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Seqlock
@@ -95,6 +91,10 @@ extern "C" {
 #include <rte_seqcount.h>
 #include <rte_spinlock.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The RTE seqlock type.
  */
diff --git a/lib/eal/include/rte_service.h b/lib/eal/include/rte_service.h
index e49a7a877e..94919ae584 100644
--- a/lib/eal/include/rte_service.h
+++ b/lib/eal/include/rte_service.h
@@ -23,16 +23,16 @@
  * application has access to the remaining lcores as normal.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include<stdio.h>
 #include <stdint.h>
 
 #include <rte_config.h>
 #include <rte_lcore.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_SERVICE_NAME_MAX 32
 
 /* Capabilities of a service.
diff --git a/lib/eal/include/rte_service_component.h b/lib/eal/include/rte_service_component.h
index a5350c97e5..acdf45cf60 100644
--- a/lib/eal/include/rte_service_component.h
+++ b/lib/eal/include/rte_service_component.h
@@ -10,12 +10,12 @@
  * operate, and you wish to run the component using service cores
  */
 
+#include <rte_service.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_service.h>
-
 /**
  * Signature of callback function to run a service.
  *
diff --git a/lib/eal/include/rte_stdatomic.h b/lib/eal/include/rte_stdatomic.h
index 7a081cb500..0f11a15e4e 100644
--- a/lib/eal/include/rte_stdatomic.h
+++ b/lib/eal/include/rte_stdatomic.h
@@ -7,10 +7,6 @@
 
 #include <assert.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_ENABLE_STDATOMIC
 #ifndef _MSC_VER
 #ifdef __STDC_NO_ATOMICS__
@@ -188,6 +184,7 @@ typedef int rte_memory_order;
 #endif
 
 #ifdef __cplusplus
+extern "C" {
 }
 #endif
 
diff --git a/lib/eal/include/rte_string_fns.h b/lib/eal/include/rte_string_fns.h
index 13badec7b3..702bd81251 100644
--- a/lib/eal/include/rte_string_fns.h
+++ b/lib/eal/include/rte_string_fns.h
@@ -11,10 +11,6 @@
 #ifndef _RTE_STRING_FNS_H_
 #define _RTE_STRING_FNS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <ctype.h>
 #include <stdio.h>
 #include <string.h>
@@ -22,6 +18,10 @@ extern "C" {
 #include <rte_common.h>
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Takes string "string" parameter and splits it at character "delim"
  * up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like
@@ -77,6 +77,10 @@ rte_strlcat(char *dst, const char *src, size_t size)
 	return l + strlen(src);
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 /* pull in a strlcpy function */
 #ifdef RTE_EXEC_ENV_FREEBSD
 #ifndef __BSD_VISIBLE /* non-standard functions are hidden */
@@ -95,6 +99,10 @@ rte_strlcat(char *dst, const char *src, size_t size)
 #endif /* RTE_USE_LIBBSD */
 #endif /* FREEBSD */
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Copy string src to buffer dst of size dsize.
  * At most dsize-1 chars will be copied.
@@ -141,7 +149,6 @@ rte_str_skip_leading_spaces(const char *src)
 	return p;
 }
 
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/include/rte_tailq.h b/lib/eal/include/rte_tailq.h
index 931d549e59..89f7ef2134 100644
--- a/lib/eal/include/rte_tailq.h
+++ b/lib/eal/include/rte_tailq.h
@@ -10,13 +10,13 @@
  *  Here defines rte_tailq APIs for only internal use
  */
 
+#include <stdio.h>
+#include <rte_debug.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdio.h>
-#include <rte_debug.h>
-
 /** dummy structure type used by the rte_tailq APIs */
 struct rte_tailq_entry {
 	RTE_TAILQ_ENTRY(rte_tailq_entry) next; /**< Pointer entries for a tailq list */
diff --git a/lib/eal/include/rte_ticketlock.h b/lib/eal/include/rte_ticketlock.h
index 73884eb07b..e60f60699c 100644
--- a/lib/eal/include/rte_ticketlock.h
+++ b/lib/eal/include/rte_ticketlock.h
@@ -17,15 +17,15 @@
  * All locks must be initialised before use, and only initialised once.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_lcore.h>
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_ticketlock_t type.
  */
diff --git a/lib/eal/include/rte_time.h b/lib/eal/include/rte_time.h
index ec25f7b93d..c5c3a233e4 100644
--- a/lib/eal/include/rte_time.h
+++ b/lib/eal/include/rte_time.h
@@ -5,13 +5,13 @@
 #ifndef _RTE_TIME_H_
 #define _RTE_TIME_H_
 
+#include <stdint.h>
+#include <time.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include <time.h>
-
 #define NSEC_PER_SEC             1000000000L
 
 /**
diff --git a/lib/eal/include/rte_trace.h b/lib/eal/include/rte_trace.h
index a6e991fad3..1c824b2158 100644
--- a/lib/eal/include/rte_trace.h
+++ b/lib/eal/include/rte_trace.h
@@ -16,16 +16,16 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdio.h>
 
 #include <rte_common.h>
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  *  Test if trace is enabled.
  *
diff --git a/lib/eal/include/rte_trace_point.h b/lib/eal/include/rte_trace_point.h
index 41e2a7f99e..bc737d585e 100644
--- a/lib/eal/include/rte_trace_point.h
+++ b/lib/eal/include/rte_trace_point.h
@@ -16,10 +16,6 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdio.h>
 
@@ -32,6 +28,10 @@ extern "C" {
 #include <rte_string_fns.h>
 #include <rte_uuid.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** The tracepoint object. */
 typedef RTE_ATOMIC(uint64_t) rte_trace_point_t;
 
diff --git a/lib/eal/include/rte_trace_point_register.h b/lib/eal/include/rte_trace_point_register.h
index 41260e5964..8726338fe4 100644
--- a/lib/eal/include/rte_trace_point_register.h
+++ b/lib/eal/include/rte_trace_point_register.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_TRACE_POINT_REGISTER_H_
 #define _RTE_TRACE_POINT_REGISTER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef _RTE_TRACE_POINT_H_
 #error for registration, include this file first before <rte_trace_point.h>
 #endif
@@ -16,6 +12,10 @@ extern "C" {
 #include <rte_per_lcore.h>
 #include <rte_trace_point.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_DECLARE_PER_LCORE(volatile int, trace_point_sz);
 
 #define RTE_TRACE_POINT_REGISTER(trace, name) \
diff --git a/lib/eal/include/rte_uuid.h b/lib/eal/include/rte_uuid.h
index cfefd4308a..def5907a00 100644
--- a/lib/eal/include/rte_uuid.h
+++ b/lib/eal/include/rte_uuid.h
@@ -10,14 +10,14 @@
 #ifndef _RTE_UUID_H_
 #define _RTE_UUID_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stddef.h>
 #include <string.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Struct describing a Universal Unique Identifier
  */
diff --git a/lib/eal/include/rte_version.h b/lib/eal/include/rte_version.h
index 422d00fdff..be3f753617 100644
--- a/lib/eal/include/rte_version.h
+++ b/lib/eal/include/rte_version.h
@@ -10,13 +10,13 @@
 #ifndef _RTE_VERSION_H_
 #define _RTE_VERSION_H_
 
+#include <string.h>
+#include <stdio.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <string.h>
-#include <stdio.h>
-
 /**
  * Macro to compute a version number usable for comparisons
  */
diff --git a/lib/eal/include/rte_vfio.h b/lib/eal/include/rte_vfio.h
index b774625d9f..923293040b 100644
--- a/lib/eal/include/rte_vfio.h
+++ b/lib/eal/include/rte_vfio.h
@@ -10,10 +10,6 @@
  * RTE VFIO. This library provides various VFIO related utility functions.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdint.h>
 
@@ -32,6 +28,10 @@ extern "C" {
 #endif /* kernel version >= 4.0.0 */
 #endif /* RTE_EAL_VFIO */
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef VFIO_PRESENT
 
 #include <linux/vfio.h>
diff --git a/lib/eal/linux/include/rte_os.h b/lib/eal/linux/include/rte_os.h
index c72bf5b7e6..dba0e29827 100644
--- a/lib/eal/linux/include/rte_os.h
+++ b/lib/eal/linux/include/rte_os.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_OS_H_
 #define _RTE_OS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * This header should contain any definition
  * which is not supported natively or named differently in Linux.
@@ -17,6 +13,10 @@ extern "C" {
 #include <sched.h>
 #include <sys/queue.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* These macros are compatible with system's sys/queue.h. */
 #define RTE_TAILQ_HEAD(name, type) TAILQ_HEAD(name, type)
 #define RTE_TAILQ_ENTRY(type) TAILQ_ENTRY(type)
diff --git a/lib/eal/loongarch/include/rte_atomic.h b/lib/eal/loongarch/include/rte_atomic.h
index 0510b8f781..c8066a4612 100644
--- a/lib/eal/loongarch/include/rte_atomic.h
+++ b/lib/eal/loongarch/include/rte_atomic.h
@@ -9,13 +9,13 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include <rte_common.h>
+#include "generic/rte_atomic.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_atomic.h"
-
 #define rte_mb()	do { asm volatile("dbar 0":::"memory"); } while (0)
 
 #define rte_wmb()	rte_mb()
diff --git a/lib/eal/loongarch/include/rte_byteorder.h b/lib/eal/loongarch/include/rte_byteorder.h
index 0da6097a4f..9b092e2a59 100644
--- a/lib/eal/loongarch/include/rte_byteorder.h
+++ b/lib/eal/loongarch/include/rte_byteorder.h
@@ -5,12 +5,12 @@
 #ifndef RTE_BYTEORDER_LOONGARCH_H
 #define RTE_BYTEORDER_LOONGARCH_H
 
+#include "generic/rte_byteorder.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_byteorder.h"
-
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 
 #define rte_cpu_to_le_16(x) (x)
diff --git a/lib/eal/loongarch/include/rte_cpuflags.h b/lib/eal/loongarch/include/rte_cpuflags.h
index 6b592c147c..c1e04ac545 100644
--- a/lib/eal/loongarch/include/rte_cpuflags.h
+++ b/lib/eal/loongarch/include/rte_cpuflags.h
@@ -5,10 +5,6 @@
 #ifndef RTE_CPUFLAGS_LOONGARCH_H
 #define RTE_CPUFLAGS_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -30,6 +26,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/loongarch/include/rte_cycles.h b/lib/eal/loongarch/include/rte_cycles.h
index f612d1ad10..128c8646e9 100644
--- a/lib/eal/loongarch/include/rte_cycles.h
+++ b/lib/eal/loongarch/include/rte_cycles.h
@@ -5,12 +5,12 @@
 #ifndef RTE_CYCLES_LOONGARCH_H
 #define RTE_CYCLES_LOONGARCH_H
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 /**
  * Read the time base register.
  *
diff --git a/lib/eal/loongarch/include/rte_io.h b/lib/eal/loongarch/include/rte_io.h
index 40e40efa86..e32a4737b2 100644
--- a/lib/eal/loongarch/include/rte_io.h
+++ b/lib/eal/loongarch/include/rte_io.h
@@ -5,12 +5,12 @@
 #ifndef RTE_IO_LOONGARCH_H
 #define RTE_IO_LOONGARCH_H
 
+#include "generic/rte_io.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_io.h"
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/loongarch/include/rte_memcpy.h b/lib/eal/loongarch/include/rte_memcpy.h
index 22578d40f4..5412a0fdc1 100644
--- a/lib/eal/loongarch/include/rte_memcpy.h
+++ b/lib/eal/loongarch/include/rte_memcpy.h
@@ -10,12 +10,12 @@
 
 #include "rte_common.h"
 
+#include "generic/rte_memcpy.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_memcpy.h"
-
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
diff --git a/lib/eal/loongarch/include/rte_pause.h b/lib/eal/loongarch/include/rte_pause.h
index 4302e1b9be..cffa2874d6 100644
--- a/lib/eal/loongarch/include/rte_pause.h
+++ b/lib/eal/loongarch/include/rte_pause.h
@@ -5,14 +5,14 @@
 #ifndef RTE_PAUSE_LOONGARCH_H
 #define RTE_PAUSE_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_atomic.h"
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 }
diff --git a/lib/eal/loongarch/include/rte_power_intrinsics.h b/lib/eal/loongarch/include/rte_power_intrinsics.h
index d5dbd94567..9e11478206 100644
--- a/lib/eal/loongarch/include/rte_power_intrinsics.h
+++ b/lib/eal/loongarch/include/rte_power_intrinsics.h
@@ -5,14 +5,14 @@
 #ifndef RTE_POWER_INTRINSIC_LOONGARCH_H
 #define RTE_POWER_INTRINSIC_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/loongarch/include/rte_prefetch.h b/lib/eal/loongarch/include/rte_prefetch.h
index 64b1fd2c2a..8da08a5566 100644
--- a/lib/eal/loongarch/include/rte_prefetch.h
+++ b/lib/eal/loongarch/include/rte_prefetch.h
@@ -5,14 +5,14 @@
 #ifndef RTE_PREFETCH_LOONGARCH_H
 #define RTE_PREFETCH_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	__builtin_prefetch((const void *)(uintptr_t)p, 0, 3);
diff --git a/lib/eal/loongarch/include/rte_rwlock.h b/lib/eal/loongarch/include/rte_rwlock.h
index aedc6f3349..48924599c5 100644
--- a/lib/eal/loongarch/include/rte_rwlock.h
+++ b/lib/eal/loongarch/include/rte_rwlock.h
@@ -5,12 +5,12 @@
 #ifndef RTE_RWLOCK_LOONGARCH_H
 #define RTE_RWLOCK_LOONGARCH_H
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/loongarch/include/rte_spinlock.h b/lib/eal/loongarch/include/rte_spinlock.h
index e8d34e9728..38f00f631d 100644
--- a/lib/eal/loongarch/include/rte_spinlock.h
+++ b/lib/eal/loongarch/include/rte_spinlock.h
@@ -5,13 +5,13 @@
 #ifndef RTE_SPINLOCK_LOONGARCH_H
 #define RTE_SPINLOCK_LOONGARCH_H
 
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_spinlock.h"
-
 #ifndef RTE_FORCE_INTRINSICS
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
diff --git a/lib/eal/ppc/include/rte_atomic.h b/lib/eal/ppc/include/rte_atomic.h
index 645c7132df..6ce2e5188a 100644
--- a/lib/eal/ppc/include/rte_atomic.h
+++ b/lib/eal/ppc/include/rte_atomic.h
@@ -12,13 +12,13 @@
 #ifndef _RTE_ATOMIC_PPC_64_H_
 #define _RTE_ATOMIC_PPC_64_H_
 
+#include <stdint.h>
+#include "generic/rte_atomic.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include "generic/rte_atomic.h"
-
 #define	rte_mb()  asm volatile("sync" : : : "memory")
 
 #define	rte_wmb() asm volatile("sync" : : : "memory")
diff --git a/lib/eal/ppc/include/rte_byteorder.h b/lib/eal/ppc/include/rte_byteorder.h
index de94e2ad32..1d19e96f72 100644
--- a/lib/eal/ppc/include/rte_byteorder.h
+++ b/lib/eal/ppc/include/rte_byteorder.h
@@ -8,13 +8,13 @@
 #ifndef _RTE_BYTEORDER_PPC_64_H_
 #define _RTE_BYTEORDER_PPC_64_H_
 
+#include <stdint.h>
+#include "generic/rte_byteorder.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include "generic/rte_byteorder.h"
-
 /*
  * An architecture-optimized byte swap for a 16-bit value.
  *
diff --git a/lib/eal/ppc/include/rte_cpuflags.h b/lib/eal/ppc/include/rte_cpuflags.h
index dedc1ab469..b7bb8f6872 100644
--- a/lib/eal/ppc/include/rte_cpuflags.h
+++ b/lib/eal/ppc/include/rte_cpuflags.h
@@ -6,10 +6,6 @@
 #ifndef _RTE_CPUFLAGS_PPC_64_H_
 #define _RTE_CPUFLAGS_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -52,6 +48,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/ppc/include/rte_cycles.h b/lib/eal/ppc/include/rte_cycles.h
index 666fc9b0bf..1e6e6cccc8 100644
--- a/lib/eal/ppc/include/rte_cycles.h
+++ b/lib/eal/ppc/include/rte_cycles.h
@@ -6,10 +6,6 @@
 #ifndef _RTE_CYCLES_PPC_64_H_
 #define _RTE_CYCLES_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <features.h>
 #ifdef __GLIBC__
 #include <sys/platform/ppc.h>
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_byteorder.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Read the time base register.
  *
diff --git a/lib/eal/ppc/include/rte_io.h b/lib/eal/ppc/include/rte_io.h
index 01455065e5..c9371b784e 100644
--- a/lib/eal/ppc/include/rte_io.h
+++ b/lib/eal/ppc/include/rte_io.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_IO_PPC_64_H_
 #define _RTE_IO_PPC_64_H_
 
+#include "generic/rte_io.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_io.h"
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/ppc/include/rte_memcpy.h b/lib/eal/ppc/include/rte_memcpy.h
index 6f388c0234..eae73128c4 100644
--- a/lib/eal/ppc/include/rte_memcpy.h
+++ b/lib/eal/ppc/include/rte_memcpy.h
@@ -12,12 +12,12 @@
 #include "rte_altivec.h"
 #include "rte_common.h"
 
+#include "generic/rte_memcpy.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_memcpy.h"
-
 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 90000)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"
diff --git a/lib/eal/ppc/include/rte_pause.h b/lib/eal/ppc/include/rte_pause.h
index 16e47ce22f..78a73aceed 100644
--- a/lib/eal/ppc/include/rte_pause.h
+++ b/lib/eal/ppc/include/rte_pause.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_PAUSE_PPC64_H_
 #define _RTE_PAUSE_PPC64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_atomic.h"
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 	/* Set hardware multi-threading low priority */
diff --git a/lib/eal/ppc/include/rte_power_intrinsics.h b/lib/eal/ppc/include/rte_power_intrinsics.h
index c0e9ac279f..6207eeb04d 100644
--- a/lib/eal/ppc/include/rte_power_intrinsics.h
+++ b/lib/eal/ppc/include/rte_power_intrinsics.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_POWER_INTRINSIC_PPC_H_
 #define _RTE_POWER_INTRINSIC_PPC_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/ppc/include/rte_prefetch.h b/lib/eal/ppc/include/rte_prefetch.h
index 2e1b5751e0..bae95af7bf 100644
--- a/lib/eal/ppc/include/rte_prefetch.h
+++ b/lib/eal/ppc/include/rte_prefetch.h
@@ -6,14 +6,14 @@
 #ifndef _RTE_PREFETCH_PPC_64_H_
 #define _RTE_PREFETCH_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
diff --git a/lib/eal/ppc/include/rte_rwlock.h b/lib/eal/ppc/include/rte_rwlock.h
index 9fadc04076..bee8da4070 100644
--- a/lib/eal/ppc/include/rte_rwlock.h
+++ b/lib/eal/ppc/include/rte_rwlock.h
@@ -3,12 +3,12 @@
 #ifndef _RTE_RWLOCK_PPC_64_H_
 #define _RTE_RWLOCK_PPC_64_H_
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/ppc/include/rte_spinlock.h b/lib/eal/ppc/include/rte_spinlock.h
index 3a4c905b22..77f90f974a 100644
--- a/lib/eal/ppc/include/rte_spinlock.h
+++ b/lib/eal/ppc/include/rte_spinlock.h
@@ -6,14 +6,14 @@
 #ifndef _RTE_SPINLOCK_PPC_64_H_
 #define _RTE_SPINLOCK_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_pause.h>
 #include "generic/rte_spinlock.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Fixme: Use intrinsics to implement the spinlock on Power architecture */
 
 #ifndef RTE_FORCE_INTRINSICS
diff --git a/lib/eal/riscv/include/rte_atomic.h b/lib/eal/riscv/include/rte_atomic.h
index 2603bc90ea..66346ad474 100644
--- a/lib/eal/riscv/include/rte_atomic.h
+++ b/lib/eal/riscv/include/rte_atomic.h
@@ -12,15 +12,15 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include "generic/rte_atomic.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define rte_mb()	asm volatile("fence rw, rw" : : : "memory")
 
 #define rte_wmb()	asm volatile("fence w, w" : : : "memory")
diff --git a/lib/eal/riscv/include/rte_byteorder.h b/lib/eal/riscv/include/rte_byteorder.h
index 25bd0c275d..c9ff5c0dd1 100644
--- a/lib/eal/riscv/include/rte_byteorder.h
+++ b/lib/eal/riscv/include/rte_byteorder.h
@@ -8,14 +8,14 @@
 #ifndef RTE_BYTEORDER_RISCV_H
 #define RTE_BYTEORDER_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include "generic/rte_byteorder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_BYTE_ORDER
 #define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
 #endif
diff --git a/lib/eal/riscv/include/rte_cpuflags.h b/lib/eal/riscv/include/rte_cpuflags.h
index d742efc40f..ac2004f02d 100644
--- a/lib/eal/riscv/include/rte_cpuflags.h
+++ b/lib/eal/riscv/include/rte_cpuflags.h
@@ -8,10 +8,6 @@
 #ifndef RTE_CPUFLAGS_RISCV_H
 #define RTE_CPUFLAGS_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -46,6 +42,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/riscv/include/rte_cycles.h b/lib/eal/riscv/include/rte_cycles.h
index 04750ca253..7926809a73 100644
--- a/lib/eal/riscv/include/rte_cycles.h
+++ b/lib/eal/riscv/include/rte_cycles.h
@@ -8,12 +8,12 @@
 #ifndef RTE_CYCLES_RISCV_H
 #define RTE_CYCLES_RISCV_H
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 #ifndef RTE_RISCV_RDTSC_USE_HPM
 #define RTE_RISCV_RDTSC_USE_HPM 0
 #endif
diff --git a/lib/eal/riscv/include/rte_io.h b/lib/eal/riscv/include/rte_io.h
index 29659c9590..911dbb6bd2 100644
--- a/lib/eal/riscv/include/rte_io.h
+++ b/lib/eal/riscv/include/rte_io.h
@@ -8,12 +8,12 @@
 #ifndef RTE_IO_RISCV_H
 #define RTE_IO_RISCV_H
 
+#include "generic/rte_io.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_io.h"
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/riscv/include/rte_memcpy.h b/lib/eal/riscv/include/rte_memcpy.h
index e34f19396e..d8a942c5d2 100644
--- a/lib/eal/riscv/include/rte_memcpy.h
+++ b/lib/eal/riscv/include/rte_memcpy.h
@@ -12,12 +12,12 @@
 
 #include "rte_common.h"
 
+#include "generic/rte_memcpy.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_memcpy.h"
-
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
diff --git a/lib/eal/riscv/include/rte_pause.h b/lib/eal/riscv/include/rte_pause.h
index cb8e9ca52d..3f473cd8db 100644
--- a/lib/eal/riscv/include/rte_pause.h
+++ b/lib/eal/riscv/include/rte_pause.h
@@ -7,14 +7,14 @@
 #ifndef RTE_PAUSE_RISCV_H
 #define RTE_PAUSE_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_atomic.h"
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 	/* Insert pause hint directly to be compatible with old compilers.
diff --git a/lib/eal/riscv/include/rte_power_intrinsics.h b/lib/eal/riscv/include/rte_power_intrinsics.h
index 636e58e71f..3f7dba1640 100644
--- a/lib/eal/riscv/include/rte_power_intrinsics.h
+++ b/lib/eal/riscv/include/rte_power_intrinsics.h
@@ -7,14 +7,14 @@
 #ifndef RTE_POWER_INTRINSIC_RISCV_H
 #define RTE_POWER_INTRINSIC_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/riscv/include/rte_prefetch.h b/lib/eal/riscv/include/rte_prefetch.h
index 748cf1b626..42146491ea 100644
--- a/lib/eal/riscv/include/rte_prefetch.h
+++ b/lib/eal/riscv/include/rte_prefetch.h
@@ -8,14 +8,14 @@
 #ifndef RTE_PREFETCH_RISCV_H
 #define RTE_PREFETCH_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	RTE_SET_USED(p);
diff --git a/lib/eal/riscv/include/rte_rwlock.h b/lib/eal/riscv/include/rte_rwlock.h
index 9cdaf1b0ef..730970eecb 100644
--- a/lib/eal/riscv/include/rte_rwlock.h
+++ b/lib/eal/riscv/include/rte_rwlock.h
@@ -7,12 +7,12 @@
 #ifndef RTE_RWLOCK_RISCV_H
 #define RTE_RWLOCK_RISCV_H
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/riscv/include/rte_spinlock.h b/lib/eal/riscv/include/rte_spinlock.h
index 6af430735c..5fe4980e44 100644
--- a/lib/eal/riscv/include/rte_spinlock.h
+++ b/lib/eal/riscv/include/rte_spinlock.h
@@ -12,13 +12,13 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_spinlock.h"
-
 static inline int rte_tm_supported(void)
 {
 	return 0;
diff --git a/lib/eal/windows/include/pthread.h b/lib/eal/windows/include/pthread.h
index 051b9311c2..e1c31017d1 100644
--- a/lib/eal/windows/include/pthread.h
+++ b/lib/eal/windows/include/pthread.h
@@ -13,13 +13,13 @@
  * eal_common_thread.c and common\include\rte_per_lcore.h as Microsoft libc
  * does not contain pthread.h. This may be removed in future releases.
  */
+#include <rte_common.h>
+#include <rte_windows.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_windows.h>
-
 #define PTHREAD_BARRIER_SERIAL_THREAD TRUE
 
 /* defining pthread_t type on Windows since there is no in Microsoft libc*/
diff --git a/lib/eal/windows/include/regex.h b/lib/eal/windows/include/regex.h
index 827f938414..a224c0cd29 100644
--- a/lib/eal/windows/include/regex.h
+++ b/lib/eal/windows/include/regex.h
@@ -10,15 +10,15 @@
  * as Microsoft libc does not contain regex.h. This may be removed in
  * future releases.
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #define REG_NOMATCH 1
 #define REG_ESPACE 12
 
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* defining regex_t for Windows */
 typedef void *regex_t;
 /* defining regmatch_t for Windows */
diff --git a/lib/eal/windows/include/rte_windows.h b/lib/eal/windows/include/rte_windows.h
index 567ed7d820..e78f007ffa 100644
--- a/lib/eal/windows/include/rte_windows.h
+++ b/lib/eal/windows/include/rte_windows.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_WINDOWS_H_
 #define _RTE_WINDOWS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file Windows-specific facilities
  *
@@ -44,6 +40,10 @@ extern "C" {
 #include <devguid.h>
 #include <rte_log.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Log GetLastError() with context, usually a Win32 API function and arguments.
  */
diff --git a/lib/eal/x86/include/rte_atomic.h b/lib/eal/x86/include/rte_atomic.h
index 74b1b24b7a..c72c47c83e 100644
--- a/lib/eal/x86/include/rte_atomic.h
+++ b/lib/eal/x86/include/rte_atomic.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_ATOMIC_X86_H_
 #define _RTE_ATOMIC_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include <rte_config.h>
@@ -31,6 +27,10 @@ extern "C" {
 
 #define rte_smp_rmb() rte_compiler_barrier()
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * From Intel Software Development Manual; Vol 3;
  * 8.2.2 Memory Ordering in P6 and More Recent Processor Families:
@@ -99,10 +99,18 @@ rte_atomic_thread_fence(rte_memory_order memorder)
 		__rte_atomic_thread_fence(memorder);
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 #ifndef RTE_TOOLCHAIN_MSVC
 
 /*------------------------- 16 bit atomic operations -------------------------*/
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_FORCE_INTRINSICS
 static inline int
 rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
@@ -273,6 +281,11 @@ static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 			);
 	return ret != 0;
 }
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif
 
 #ifdef RTE_ARCH_I686
@@ -283,8 +296,4 @@ static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_ATOMIC_X86_H_ */
diff --git a/lib/eal/x86/include/rte_byteorder.h b/lib/eal/x86/include/rte_byteorder.h
index adbec0c157..5a49ffcd50 100644
--- a/lib/eal/x86/include/rte_byteorder.h
+++ b/lib/eal/x86/include/rte_byteorder.h
@@ -5,15 +5,15 @@
 #ifndef _RTE_BYTEORDER_X86_H_
 #define _RTE_BYTEORDER_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include "generic/rte_byteorder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_BYTE_ORDER
 #define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
 #endif
@@ -48,6 +48,10 @@ static inline uint32_t rte_arch_bswap32(uint32_t _x)
 	return x;
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 #define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ?		\
 				   rte_constant_bswap16(x) :		\
 				   rte_arch_bswap16(x)))
@@ -83,8 +87,4 @@ static inline uint32_t rte_arch_bswap32(uint32_t _x)
 #define rte_be_to_cpu_32(x) rte_bswap32(x)
 #define rte_be_to_cpu_64(x) rte_bswap64(x)
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_BYTEORDER_X86_H_ */
diff --git a/lib/eal/x86/include/rte_cpuflags.h b/lib/eal/x86/include/rte_cpuflags.h
index 1ee00e70fe..e843d1e5f4 100644
--- a/lib/eal/x86/include/rte_cpuflags.h
+++ b/lib/eal/x86/include/rte_cpuflags.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_CPUFLAGS_X86_64_H_
 #define _RTE_CPUFLAGS_X86_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 enum rte_cpu_flag_t {
 	/* (EAX 01h) ECX features*/
 	RTE_CPUFLAG_SSE3 = 0,               /**< SSE3 */
@@ -138,6 +134,10 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/x86/include/rte_cycles.h b/lib/eal/x86/include/rte_cycles.h
index 2afe85e28c..8de43840da 100644
--- a/lib/eal/x86/include/rte_cycles.h
+++ b/lib/eal/x86/include/rte_cycles.h
@@ -12,10 +12,6 @@
 #include <x86intrin.h>
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_cycles.h"
 
 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
@@ -26,6 +22,10 @@ extern int rte_cycles_vmware_tsc_map;
 #include <rte_common.h>
 #include <rte_config.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline uint64_t
 rte_rdtsc(void)
 {
diff --git a/lib/eal/x86/include/rte_io.h b/lib/eal/x86/include/rte_io.h
index 0e1fefdee1..c11cb8cd89 100644
--- a/lib/eal/x86/include/rte_io.h
+++ b/lib/eal/x86/include/rte_io.h
@@ -5,16 +5,16 @@
 #ifndef _RTE_IO_X86_H_
 #define _RTE_IO_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include "rte_cpuflags.h"
 
 #define RTE_NATIVE_WRITE32_WC
 #include "generic/rte_io.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * MOVDIRI wrapper.
diff --git a/lib/eal/x86/include/rte_pause.h b/lib/eal/x86/include/rte_pause.h
index b4cf1df1d0..54f028b295 100644
--- a/lib/eal/x86/include/rte_pause.h
+++ b/lib/eal/x86/include/rte_pause.h
@@ -5,13 +5,14 @@
 #ifndef _RTE_PAUSE_X86_H_
 #define _RTE_PAUSE_X86_H_
 
+#include "generic/rte_pause.h"
+
+#include <emmintrin.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_pause.h"
-
-#include <emmintrin.h>
 static inline void rte_pause(void)
 {
 	_mm_pause();
diff --git a/lib/eal/x86/include/rte_power_intrinsics.h b/lib/eal/x86/include/rte_power_intrinsics.h
index e4c2b87f73..fcb780fc5b 100644
--- a/lib/eal/x86/include/rte_power_intrinsics.h
+++ b/lib/eal/x86/include/rte_power_intrinsics.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_POWER_INTRINSIC_X86_H_
 #define _RTE_POWER_INTRINSIC_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/x86/include/rte_prefetch.h b/lib/eal/x86/include/rte_prefetch.h
index 8a9377714f..34a609cc65 100644
--- a/lib/eal/x86/include/rte_prefetch.h
+++ b/lib/eal/x86/include/rte_prefetch.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_PREFETCH_X86_64_H_
 #define _RTE_PREFETCH_X86_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_TOOLCHAIN_MSVC
 #include <emmintrin.h>
 #endif
@@ -17,6 +13,10 @@ extern "C" {
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 #ifdef RTE_TOOLCHAIN_MSVC
diff --git a/lib/eal/x86/include/rte_rwlock.h b/lib/eal/x86/include/rte_rwlock.h
index 1796b69265..281eff33b9 100644
--- a/lib/eal/x86/include/rte_rwlock.h
+++ b/lib/eal/x86/include/rte_rwlock.h
@@ -5,13 +5,13 @@
 #ifndef _RTE_RWLOCK_X86_64_H_
 #define _RTE_RWLOCK_X86_64_H_
 
+#include "generic/rte_rwlock.h"
+#include "rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-#include "rte_spinlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 	__rte_no_thread_safety_analysis
diff --git a/lib/eal/x86/include/rte_spinlock.h b/lib/eal/x86/include/rte_spinlock.h
index a6c23ea1f6..a14da41964 100644
--- a/lib/eal/x86/include/rte_spinlock.h
+++ b/lib/eal/x86/include/rte_spinlock.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_SPINLOCK_X86_64_H_
 #define _RTE_SPINLOCK_X86_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_spinlock.h"
 #include "rte_rtm.h"
 #include "rte_cpuflags.h"
@@ -17,6 +13,10 @@ extern "C" {
 #include "rte_pause.h"
 #include "rte_cycles.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_RTM_MAX_RETRIES (20)
 #define RTE_XABORT_LOCK_BUSY (0xff)
 
@@ -182,7 +182,6 @@ rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
 	return rte_spinlock_recursive_trylock(slr);
 }
 
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index 883e59a927..ae00ead865 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_ETHDEV_DRIVER_H_
 #define _RTE_ETHDEV_DRIVER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -24,6 +20,10 @@ extern "C" {
 #include <rte_compat.h>
 #include <rte_ethdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Structure used to hold information about the callbacks to be called for a
diff --git a/lib/ethdev/ethdev_pci.h b/lib/ethdev/ethdev_pci.h
index ec4f731270..2229ffa252 100644
--- a/lib/ethdev/ethdev_pci.h
+++ b/lib/ethdev/ethdev_pci.h
@@ -6,16 +6,16 @@
 #ifndef _RTE_ETHDEV_PCI_H_
 #define _RTE_ETHDEV_PCI_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_malloc.h>
 #include <rte_pci.h>
 #include <bus_pci_driver.h>
 #include <rte_config.h>
 #include <ethdev_driver.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Copy pci device info to the Ethernet device data.
  * Shared memory (eth_dev->data) only updated by primary process, so it is safe
diff --git a/lib/ethdev/ethdev_trace.h b/lib/ethdev/ethdev_trace.h
index 3bec87bfdb..36a38f718a 100644
--- a/lib/ethdev/ethdev_trace.h
+++ b/lib/ethdev/ethdev_trace.h
@@ -11,10 +11,6 @@
  * API for ethdev trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <dev_driver.h>
 #include <rte_trace_point.h>
 
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_mtr.h"
 #include "rte_tm.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_ethdev_trace_configure,
 	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t nb_rx_q,
diff --git a/lib/ethdev/ethdev_vdev.h b/lib/ethdev/ethdev_vdev.h
index 364f140f91..010ec75a00 100644
--- a/lib/ethdev/ethdev_vdev.h
+++ b/lib/ethdev/ethdev_vdev.h
@@ -6,15 +6,15 @@
 #ifndef _RTE_ETHDEV_VDEV_H_
 #define _RTE_ETHDEV_VDEV_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_config.h>
 #include <rte_malloc.h>
 #include <bus_vdev_driver.h>
 #include <ethdev_driver.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Allocates a new ethdev slot for an Ethernet device and returns the pointer
diff --git a/lib/ethdev/rte_cman.h b/lib/ethdev/rte_cman.h
index 297db8e095..dedd6cb71a 100644
--- a/lib/ethdev/rte_cman.h
+++ b/lib/ethdev/rte_cman.h
@@ -5,12 +5,12 @@
 #ifndef RTE_CMAN_H
 #define RTE_CMAN_H
 
+#include <rte_bitops.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_bitops.h>
-
 /**
  * @file
  * Congestion management related parameters for DPDK.
diff --git a/lib/ethdev/rte_dev_info.h b/lib/ethdev/rte_dev_info.h
index 67cf0ae526..4fde2ad408 100644
--- a/lib/ethdev/rte_dev_info.h
+++ b/lib/ethdev/rte_dev_info.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_DEV_INFO_H_
 #define _RTE_DEV_INFO_H_
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /*
  * Placeholder for accessing device registers
  */
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 548fada1c7..a75e26bf07 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -145,10 +145,6 @@
  * a 0 value by the receive function of the driver for a given number of tries.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 /* Use this macro to check if LRO API is supported */
@@ -5966,6 +5962,10 @@ int rte_eth_cman_config_get(uint16_t port_id, struct rte_eth_cman_config *config
 
 #include <rte_ethdev_core.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Helper routine for rte_eth_rx_burst().
diff --git a/lib/ethdev/rte_ethdev_trace_fp.h b/lib/ethdev/rte_ethdev_trace_fp.h
index 40b6e4756b..c11b4f18f7 100644
--- a/lib/ethdev/rte_ethdev_trace_fp.h
+++ b/lib/ethdev/rte_ethdev_trace_fp.h
@@ -11,12 +11,12 @@
  * API for ethdev trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_ethdev_trace_rx_burst,
 	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t queue_id,
diff --git a/lib/eventdev/event_timer_adapter_pmd.h b/lib/eventdev/event_timer_adapter_pmd.h
index cd5127f047..fffcd90c8f 100644
--- a/lib/eventdev/event_timer_adapter_pmd.h
+++ b/lib/eventdev/event_timer_adapter_pmd.h
@@ -16,12 +16,12 @@
  * versioning.
  */
 
+#include "rte_event_timer_adapter.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "rte_event_timer_adapter.h"
-
 /*
  * Definitions of functions exported by an event timer adapter implementation
  * through *rte_event_timer_adapter_ops* structure supplied in the
diff --git a/lib/eventdev/eventdev_pmd.h b/lib/eventdev/eventdev_pmd.h
index 7a5699f14b..fd5f7a14f4 100644
--- a/lib/eventdev/eventdev_pmd.h
+++ b/lib/eventdev/eventdev_pmd.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_EVENTDEV_PMD_H_
 #define _RTE_EVENTDEV_PMD_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Event PMD APIs
  *
@@ -31,6 +27,10 @@ extern "C" {
 #include "event_timer_adapter_pmd.h"
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern int rte_event_logtype;
 #define RTE_LOGTYPE_EVENTDEV rte_event_logtype
 
diff --git a/lib/eventdev/eventdev_pmd_pci.h b/lib/eventdev/eventdev_pmd_pci.h
index 26aa3a6635..5cb5916a84 100644
--- a/lib/eventdev/eventdev_pmd_pci.h
+++ b/lib/eventdev/eventdev_pmd_pci.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_EVENTDEV_PMD_PCI_H_
 #define _RTE_EVENTDEV_PMD_PCI_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Eventdev PCI PMD APIs
  *
@@ -28,6 +24,10 @@ extern "C" {
 
 #include "eventdev_pmd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef int (*eventdev_pmd_pci_callback_t)(struct rte_eventdev *dev);
 
 /**
diff --git a/lib/eventdev/eventdev_pmd_vdev.h b/lib/eventdev/eventdev_pmd_vdev.h
index bb433ba955..4eaefa0b0b 100644
--- a/lib/eventdev/eventdev_pmd_vdev.h
+++ b/lib/eventdev/eventdev_pmd_vdev.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_EVENTDEV_PMD_VDEV_H_
 #define _RTE_EVENTDEV_PMD_VDEV_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Eventdev VDEV PMD APIs
  *
@@ -27,6 +23,10 @@ extern "C" {
 
 #include "eventdev_pmd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Creates a new virtual event device and returns the pointer to that device.
diff --git a/lib/eventdev/eventdev_trace.h b/lib/eventdev/eventdev_trace.h
index 9c2b261c06..8ff8841729 100644
--- a/lib/eventdev/eventdev_trace.h
+++ b/lib/eventdev/eventdev_trace.h
@@ -11,10 +11,6 @@
  * API for ethdev trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_trace_point.h>
 
 #include "rte_eventdev.h"
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_event_eth_rx_adapter.h"
 #include "rte_event_timer_adapter.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_eventdev_trace_configure,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id,
diff --git a/lib/eventdev/rte_event_crypto_adapter.h b/lib/eventdev/rte_event_crypto_adapter.h
index e07f159b77..c9b277c664 100644
--- a/lib/eventdev/rte_event_crypto_adapter.h
+++ b/lib/eventdev/rte_event_crypto_adapter.h
@@ -167,14 +167,14 @@
  * from the start of the rte_crypto_op including initialization vector (IV).
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Crypto event adapter mode
  */
diff --git a/lib/eventdev/rte_event_eth_rx_adapter.h b/lib/eventdev/rte_event_eth_rx_adapter.h
index cf42c69b0d..9237e198a7 100644
--- a/lib/eventdev/rte_event_eth_rx_adapter.h
+++ b/lib/eventdev/rte_event_eth_rx_adapter.h
@@ -87,10 +87,6 @@
  * event based so the callback can also modify the event data if it needs to.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -98,6 +94,10 @@ extern "C" {
 
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE 32
 
 /* struct rte_event_eth_rx_adapter_queue_conf flags definitions */
diff --git a/lib/eventdev/rte_event_eth_tx_adapter.h b/lib/eventdev/rte_event_eth_tx_adapter.h
index b38b3fce97..ef01345ac2 100644
--- a/lib/eventdev/rte_event_eth_tx_adapter.h
+++ b/lib/eventdev/rte_event_eth_tx_adapter.h
@@ -76,10 +76,6 @@
  * impact due to a change in how the transmit queue index is specified.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -87,6 +83,10 @@ extern "C" {
 
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Adapter configuration structure
  *
diff --git a/lib/eventdev/rte_event_ring.h b/lib/eventdev/rte_event_ring.h
index f9cf19ae16..5769da269e 100644
--- a/lib/eventdev/rte_event_ring.h
+++ b/lib/eventdev/rte_event_ring.h
@@ -14,10 +14,6 @@
 #ifndef _RTE_EVENT_RING_
 #define _RTE_EVENT_RING_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_common.h>
@@ -25,6 +21,10 @@ extern "C" {
 #include <rte_ring_elem.h>
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_TAILQ_EVENT_RING_NAME "RTE_EVENT_RING"
 
 /**
diff --git a/lib/eventdev/rte_event_timer_adapter.h b/lib/eventdev/rte_event_timer_adapter.h
index 0bd1b30045..256807b3bf 100644
--- a/lib/eventdev/rte_event_timer_adapter.h
+++ b/lib/eventdev/rte_event_timer_adapter.h
@@ -107,14 +107,14 @@
  * All these use cases require high resolution and low time drift.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 
 #include "rte_eventdev.h"
 #include "rte_eventdev_trace_fp.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Timer adapter clock source
  */
diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h
index 08e5f9320b..e5c5b7df64 100644
--- a/lib/eventdev/rte_eventdev.h
+++ b/lib/eventdev/rte_eventdev.h
@@ -237,10 +237,6 @@
  * \endcode
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include <rte_errno.h>
@@ -2469,6 +2465,10 @@ rte_event_vector_pool_create(const char *name, unsigned int n,
 
 #include <rte_eventdev_core.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static __rte_always_inline uint16_t
 __rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
 			  const struct rte_event ev[], uint16_t nb_events,
diff --git a/lib/eventdev/rte_eventdev_trace_fp.h b/lib/eventdev/rte_eventdev_trace_fp.h
index 04d510ad00..8656f1e6e4 100644
--- a/lib/eventdev/rte_eventdev_trace_fp.h
+++ b/lib/eventdev/rte_eventdev_trace_fp.h
@@ -11,12 +11,12 @@
  * API for ethdev trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_eventdev_trace_deq_burst,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id, uint8_t port_id, void *ev_table,
diff --git a/lib/graph/rte_graph_model_mcore_dispatch.h b/lib/graph/rte_graph_model_mcore_dispatch.h
index 732b89297f..f9ff3daa88 100644
--- a/lib/graph/rte_graph_model_mcore_dispatch.h
+++ b/lib/graph/rte_graph_model_mcore_dispatch.h
@@ -12,10 +12,6 @@
  * dispatch model.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_errno.h>
 #include <rte_mempool.h>
 #include <rte_memzone.h>
@@ -23,6 +19,10 @@ extern "C" {
 
 #include "rte_graph_worker_common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_GRAPH_SCHED_WQ_SIZE_MULTIPLIER  8
 #define RTE_GRAPH_SCHED_WQ_SIZE(nb_nodes)   \
 	((typeof(nb_nodes))((nb_nodes) * RTE_GRAPH_SCHED_WQ_SIZE_MULTIPLIER))
diff --git a/lib/graph/rte_graph_worker.h b/lib/graph/rte_graph_worker.h
index 03d0e01b68..b0f952a82c 100644
--- a/lib/graph/rte_graph_worker.h
+++ b/lib/graph/rte_graph_worker.h
@@ -6,13 +6,13 @@
 #ifndef _RTE_GRAPH_WORKER_H_
 #define _RTE_GRAPH_WORKER_H_
 
+#include "rte_graph_model_rtc.h"
+#include "rte_graph_model_mcore_dispatch.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "rte_graph_model_rtc.h"
-#include "rte_graph_model_mcore_dispatch.h"
-
 /**
  * Perform graph walk on the circular buffer and invoke the process function
  * of the nodes and collect the stats.
diff --git a/lib/gso/rte_gso.h b/lib/gso/rte_gso.h
index d60cb65f18..75246989dc 100644
--- a/lib/gso/rte_gso.h
+++ b/lib/gso/rte_gso.h
@@ -10,13 +10,13 @@
  * Interface to GSO library
  */
 
+#include <stdint.h>
+#include <rte_mbuf.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include <rte_mbuf.h>
-
 /* Minimum GSO segment size for TCP based packets. */
 #define RTE_GSO_SEG_SIZE_MIN (sizeof(struct rte_ether_hdr) + \
 		sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_tcp_hdr) + 1)
diff --git a/lib/hash/rte_fbk_hash.h b/lib/hash/rte_fbk_hash.h
index b01126999b..1f0c1d1b6c 100644
--- a/lib/hash/rte_fbk_hash.h
+++ b/lib/hash/rte_fbk_hash.h
@@ -18,15 +18,15 @@
 #include <stdint.h>
 #include <errno.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <rte_hash_crc.h>
 #include <rte_jhash.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_FBK_HASH_INIT_VAL_DEFAULT
 /** Initialising value used when calculating hash. */
 #define RTE_FBK_HASH_INIT_VAL_DEFAULT		0xFFFFFFFF
diff --git a/lib/hash/rte_hash_crc.h b/lib/hash/rte_hash_crc.h
index 8ad2422ec3..fa07c97685 100644
--- a/lib/hash/rte_hash_crc.h
+++ b/lib/hash/rte_hash_crc.h
@@ -11,10 +11,6 @@
  * RTE CRC Hash
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_branch_prediction.h>
@@ -39,6 +35,10 @@ extern uint8_t rte_hash_crc32_alg;
 #include "rte_crc_generic.h"
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
  * calculation.
diff --git a/lib/hash/rte_jhash.h b/lib/hash/rte_jhash.h
index f2446f081e..b70799d209 100644
--- a/lib/hash/rte_jhash.h
+++ b/lib/hash/rte_jhash.h
@@ -11,10 +11,6 @@
  * jhash functions.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <string.h>
 #include <limits.h>
@@ -23,6 +19,10 @@ extern "C" {
 #include <rte_log.h>
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* jhash.h: Jenkins hash support.
  *
  * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
index 30b657e67a..ec9bc57efa 100644
--- a/lib/hash/rte_thash.h
+++ b/lib/hash/rte_thash.h
@@ -15,10 +15,6 @@
  * after GRE header decapsulating)
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_byteorder.h>
@@ -28,6 +24,10 @@ extern "C" {
 
 #if defined(RTE_ARCH_X86) || defined(__ARM_NEON)
 #include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 #endif
 
 #ifdef RTE_ARCH_X86
diff --git a/lib/hash/rte_thash_gfni.h b/lib/hash/rte_thash_gfni.h
index 132f37506d..e82378933c 100644
--- a/lib/hash/rte_thash_gfni.h
+++ b/lib/hash/rte_thash_gfni.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_THASH_GFNI_H_
 #define _RTE_THASH_GFNI_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_log.h>
 
@@ -18,6 +14,10 @@ extern "C" {
 
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Stubs only used when GFNI is not available.
diff --git a/lib/ip_frag/rte_ip_frag.h b/lib/ip_frag/rte_ip_frag.h
index 2ad318096b..84fd717953 100644
--- a/lib/ip_frag/rte_ip_frag.h
+++ b/lib/ip_frag/rte_ip_frag.h
@@ -12,10 +12,6 @@
  * Implementation of IP packet fragmentation and reassembly.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 
@@ -25,6 +21,10 @@ extern "C" {
 #include <rte_ip.h>
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_mbuf;
 
 /** death row size (in packets) */
diff --git a/lib/ipsec/rte_ipsec.h b/lib/ipsec/rte_ipsec.h
index f15f6f2966..28b7a61aea 100644
--- a/lib/ipsec/rte_ipsec.h
+++ b/lib/ipsec/rte_ipsec.h
@@ -17,10 +17,6 @@
 #include <rte_ipsec_sa.h>
 #include <rte_mbuf.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 struct rte_ipsec_session;
 
 /**
@@ -181,6 +177,10 @@ rte_ipsec_telemetry_sa_del(const struct rte_ipsec_sa *sa);
 
 #include <rte_ipsec_group.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/log/rte_log.h b/lib/log/rte_log.h
index f357c59548..3735137150 100644
--- a/lib/log/rte_log.h
+++ b/lib/log/rte_log.h
@@ -13,10 +13,6 @@
  * This file provides a log API to RTE applications.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -26,6 +22,10 @@ extern "C" {
 #include <rte_common.h>
 #include <rte_config.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* SDK log type */
 #define RTE_LOGTYPE_EAL        0 /**< Log related to eal. */
 				 /* was RTE_LOGTYPE_MALLOC */
diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h
index 9c6df311cb..329dc1aad4 100644
--- a/lib/lpm/rte_lpm.h
+++ b/lib/lpm/rte_lpm.h
@@ -391,6 +391,10 @@ static inline void
 rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 	uint32_t defv);
 
+#ifdef __cplusplus
+}
+#endif
+
 #if defined(RTE_ARCH_ARM)
 #ifdef RTE_HAS_SVE_ACLE
 #include "rte_lpm_sve.h"
@@ -407,8 +411,4 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 #include "rte_lpm_scalar.h"
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_LPM_H_ */
diff --git a/lib/member/rte_member.h b/lib/member/rte_member.h
index aec192eba5..109bdd000b 100644
--- a/lib/member/rte_member.h
+++ b/lib/member/rte_member.h
@@ -54,10 +54,6 @@
 #ifndef _RTE_MEMBER_H_
 #define _RTE_MEMBER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdbool.h>
 #include <inttypes.h>
@@ -100,6 +96,10 @@ typedef uint16_t member_set_t;
 #define MEMBER_HASH_FUNC       rte_jhash
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** @internal setsummary structure. */
 struct rte_member_setsum;
 
diff --git a/lib/member/rte_member_sketch.h b/lib/member/rte_member_sketch.h
index 74f24ca223..6a8d5104dd 100644
--- a/lib/member/rte_member_sketch.h
+++ b/lib/member/rte_member_sketch.h
@@ -5,13 +5,13 @@
 #ifndef RTE_MEMBER_SKETCH_H
 #define RTE_MEMBER_SKETCH_H
 
+#include <rte_vect.h>
+#include <rte_ring_elem.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_vect.h>
-#include <rte_ring_elem.h>
-
 #define NUM_ROW_SCALAR 5
 #define INTERVAL (1 << 15)
 
diff --git a/lib/member/rte_member_sketch_avx512.h b/lib/member/rte_member_sketch_avx512.h
index 52666b5b4c..a8ef3b065e 100644
--- a/lib/member/rte_member_sketch_avx512.h
+++ b/lib/member/rte_member_sketch_avx512.h
@@ -5,14 +5,14 @@
 #ifndef RTE_MEMBER_SKETCH_AVX512_H
 #define RTE_MEMBER_SKETCH_AVX512_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_vect.h>
 #include "rte_member.h"
 #include "rte_member_sketch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define NUM_ROW_VEC 8
 
 void
diff --git a/lib/member/rte_member_x86.h b/lib/member/rte_member_x86.h
index d115151f9f..4de453485b 100644
--- a/lib/member/rte_member_x86.h
+++ b/lib/member/rte_member_x86.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_MEMBER_X86_H_
 #define _RTE_MEMBER_X86_H_
 
+#include <x86intrin.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <x86intrin.h>
-
 #if defined(__AVX2__)
 
 static inline int
diff --git a/lib/member/rte_xxh64_avx512.h b/lib/member/rte_xxh64_avx512.h
index ffe6cb79f9..58f896ebb8 100644
--- a/lib/member/rte_xxh64_avx512.h
+++ b/lib/member/rte_xxh64_avx512.h
@@ -5,13 +5,13 @@
 #ifndef RTE_XXH64_AVX512_H
 #define RTE_XXH64_AVX512_H
 
+#include <rte_common.h>
+#include <immintrin.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <immintrin.h>
-
 /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
 static const uint64_t PRIME64_1 = 0x9E3779B185EBCA87ULL;
 /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
diff --git a/lib/mempool/mempool_trace.h b/lib/mempool/mempool_trace.h
index dffef062e4..c595a3116b 100644
--- a/lib/mempool/mempool_trace.h
+++ b/lib/mempool/mempool_trace.h
@@ -11,15 +11,15 @@
  * APIs for mempool trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_mempool.h"
 
 #include <rte_memzone.h>
 #include <rte_trace_point.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_mempool_trace_create,
 	RTE_TRACE_POINT_ARGS(const char *name, uint32_t nb_elts,
diff --git a/lib/mempool/rte_mempool_trace_fp.h b/lib/mempool/rte_mempool_trace_fp.h
index ed060e887c..9c5cdbb291 100644
--- a/lib/mempool/rte_mempool_trace_fp.h
+++ b/lib/mempool/rte_mempool_trace_fp.h
@@ -11,12 +11,12 @@
  * Mempool fast path API for trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_mempool_trace_ops_dequeue_bulk,
 	RTE_TRACE_POINT_ARGS(void *mempool, void **obj_table,
diff --git a/lib/meter/rte_meter.h b/lib/meter/rte_meter.h
index bd68cbe389..e72bf93b3e 100644
--- a/lib/meter/rte_meter.h
+++ b/lib/meter/rte_meter.h
@@ -6,10 +6,6 @@
 #ifndef __INCLUDE_RTE_METER_H__
 #define __INCLUDE_RTE_METER_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Traffic Metering
@@ -22,6 +18,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Application Programmer's Interface (API)
  */
diff --git a/lib/mldev/mldev_utils.h b/lib/mldev/mldev_utils.h
index 5e2a180adc..bf21067d38 100644
--- a/lib/mldev/mldev_utils.h
+++ b/lib/mldev/mldev_utils.h
@@ -5,10 +5,6 @@
 #ifndef RTE_MLDEV_UTILS_H
 #define RTE_MLDEV_UTILS_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_compat.h>
 #include <rte_mldev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  *
diff --git a/lib/mldev/rte_mldev_core.h b/lib/mldev/rte_mldev_core.h
index b3bd281083..8dccf125fc 100644
--- a/lib/mldev/rte_mldev_core.h
+++ b/lib/mldev/rte_mldev_core.h
@@ -16,10 +16,6 @@
  * These APIs are for MLDEV PMDs and library only.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <dev_driver.h>
@@ -27,6 +23,10 @@ extern "C" {
 #include <rte_log.h>
 #include <rte_mldev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Device state */
 #define ML_DEV_DETACHED (0)
 #define ML_DEV_ATTACHED (1)
diff --git a/lib/mldev/rte_mldev_pmd.h b/lib/mldev/rte_mldev_pmd.h
index fd5bbf4360..47c0f23223 100644
--- a/lib/mldev/rte_mldev_pmd.h
+++ b/lib/mldev/rte_mldev_pmd.h
@@ -14,10 +14,6 @@
  * These APIs are for MLDEV PMDs only and user applications should not call them directly.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_common.h>
@@ -25,6 +21,10 @@ extern "C" {
 #include <rte_mldev.h>
 #include <rte_mldev_core.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  *
diff --git a/lib/net/rte_ether.h b/lib/net/rte_ether.h
index 32ed515aef..403e84f50b 100644
--- a/lib/net/rte_ether.h
+++ b/lib/net/rte_ether.h
@@ -11,10 +11,6 @@
  * Ethernet Helpers in RTE
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 
@@ -22,6 +18,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_ETHER_ADDR_LEN  6 /**< Length of Ethernet address. */
 #define RTE_ETHER_TYPE_LEN  2 /**< Length of Ethernet type field. */
 #define RTE_ETHER_CRC_LEN   4 /**< Length of Ethernet CRC. */
diff --git a/lib/net/rte_net.h b/lib/net/rte_net.h
index cdc6cf956d..40ad6a71a1 100644
--- a/lib/net/rte_net.h
+++ b/lib/net/rte_net.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_NET_PTYPE_H_
 #define _RTE_NET_PTYPE_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_ip.h>
 #include <rte_udp.h>
 #include <rte_tcp.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Structure containing header lengths associated to a packet, filled
  * by rte_net_get_ptype().
diff --git a/lib/net/rte_sctp.h b/lib/net/rte_sctp.h
index 965682dc2b..a8ba9e49d8 100644
--- a/lib/net/rte_sctp.h
+++ b/lib/net/rte_sctp.h
@@ -14,14 +14,14 @@
 #ifndef _RTE_SCTP_H_
 #define _RTE_SCTP_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * SCTP Header
  */
diff --git a/lib/node/rte_node_eth_api.h b/lib/node/rte_node_eth_api.h
index 143cf131b3..2b7019f6bb 100644
--- a/lib/node/rte_node_eth_api.h
+++ b/lib/node/rte_node_eth_api.h
@@ -16,15 +16,15 @@
  * and its queue associations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include <rte_graph.h>
 #include <rte_mempool.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Port config for ethdev_rx and ethdev_tx node.
  */
diff --git a/lib/node/rte_node_ip4_api.h b/lib/node/rte_node_ip4_api.h
index 24f8ec843a..950751a525 100644
--- a/lib/node/rte_node_ip4_api.h
+++ b/lib/node/rte_node_ip4_api.h
@@ -15,15 +15,15 @@
  * This API allows to do control path functions of ip4_* nodes
  * like ip4_lookup, ip4_rewrite.
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_compat.h>
 
 #include <rte_graph.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * IP4 lookup next nodes.
  */
diff --git a/lib/node/rte_node_ip6_api.h b/lib/node/rte_node_ip6_api.h
index a538dc2ea7..f467aac7b6 100644
--- a/lib/node/rte_node_ip6_api.h
+++ b/lib/node/rte_node_ip6_api.h
@@ -15,13 +15,13 @@
  * This API allows to do control path functions of ip6_* nodes
  * like ip6_lookup, ip6_rewrite.
  */
+#include <rte_common.h>
+#include <rte_compat.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_compat.h>
-
 /**
  * IP6 lookup next nodes.
  */
diff --git a/lib/node/rte_node_udp4_input_api.h b/lib/node/rte_node_udp4_input_api.h
index c873acbbe0..694660bd6a 100644
--- a/lib/node/rte_node_udp4_input_api.h
+++ b/lib/node/rte_node_udp4_input_api.h
@@ -16,14 +16,14 @@
  * like udp4_input.
  *
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_compat.h>
 
 #include "rte_graph.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 /**
  * UDP4 lookup next nodes.
  */
diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h
index c26fc77209..9a50a12142 100644
--- a/lib/pci/rte_pci.h
+++ b/lib/pci/rte_pci.h
@@ -12,14 +12,14 @@
  * RTE PCI Library
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <inttypes.h>
 #include <sys/types.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Conventional PCI and PCI-X Mode 1 devices have 256 bytes of
  * configuration space.  PCI-X Mode 2 and PCIe devices have 4096 bytes of
diff --git a/lib/pdcp/rte_pdcp.h b/lib/pdcp/rte_pdcp.h
index f74524f83d..15fcbf9607 100644
--- a/lib/pdcp/rte_pdcp.h
+++ b/lib/pdcp/rte_pdcp.h
@@ -19,10 +19,6 @@
 #include <rte_pdcp_hdr.h>
 #include <rte_security.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /* Forward declarations. */
 struct rte_pdcp_entity;
 
@@ -373,6 +369,10 @@ rte_pdcp_t_reordering_expiry_handle(const struct rte_pdcp_entity *entity,
  */
 #include <rte_pdcp_group.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/pipeline/rte_pipeline.h b/lib/pipeline/rte_pipeline.h
index 0c7994b4f2..c9e7172453 100644
--- a/lib/pipeline/rte_pipeline.h
+++ b/lib/pipeline/rte_pipeline.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PIPELINE_H__
 #define __INCLUDE_RTE_PIPELINE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Pipeline
@@ -59,6 +55,10 @@ extern "C" {
 #include <rte_table.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_mbuf;
 
 /*
diff --git a/lib/pipeline/rte_port_in_action.h b/lib/pipeline/rte_port_in_action.h
index ec2994599f..9d17bae988 100644
--- a/lib/pipeline/rte_port_in_action.h
+++ b/lib/pipeline/rte_port_in_action.h
@@ -46,10 +46,6 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -57,6 +53,10 @@ extern "C" {
 
 #include "rte_pipeline.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Input port actions. */
 enum rte_port_in_action_type {
 	/** Filter selected input packets. */
diff --git a/lib/pipeline/rte_swx_ctl.h b/lib/pipeline/rte_swx_ctl.h
index 6ef2551ab5..c4e63753f5 100644
--- a/lib/pipeline/rte_swx_ctl.h
+++ b/lib/pipeline/rte_swx_ctl.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_CTL_H__
 #define __INCLUDE_RTE_SWX_CTL_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Pipeline Control
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_swx_port.h"
 #include "rte_swx_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_swx_pipeline;
 
 /** Name size. */
diff --git a/lib/pipeline/rte_swx_extern.h b/lib/pipeline/rte_swx_extern.h
index e10e963d63..1553fa81ec 100644
--- a/lib/pipeline/rte_swx_extern.h
+++ b/lib/pipeline/rte_swx_extern.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_EXTERN_H__
 #define __INCLUDE_RTE_SWX_EXTERN_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Extern objects and functions
@@ -19,6 +15,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Extern type
  */
diff --git a/lib/pipeline/rte_swx_ipsec.h b/lib/pipeline/rte_swx_ipsec.h
index 7c07fdc739..d2e5abef7d 100644
--- a/lib/pipeline/rte_swx_ipsec.h
+++ b/lib/pipeline/rte_swx_ipsec.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_IPSEC_H__
 #define __INCLUDE_RTE_SWX_IPSEC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Internet Protocol Security (IPsec)
@@ -53,6 +49,10 @@ extern "C" {
 #include <rte_compat.h>
 #include <rte_crypto_sym.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * IPsec Setup API
  */
diff --git a/lib/pipeline/rte_swx_pipeline.h b/lib/pipeline/rte_swx_pipeline.h
index 25df042d3b..882bd4bf6f 100644
--- a/lib/pipeline/rte_swx_pipeline.h
+++ b/lib/pipeline/rte_swx_pipeline.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PIPELINE_H__
 #define __INCLUDE_RTE_SWX_PIPELINE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Pipeline
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_swx_table.h"
 #include "rte_swx_extern.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Name size. */
 #ifndef RTE_SWX_NAME_SIZE
 #define RTE_SWX_NAME_SIZE 64
diff --git a/lib/pipeline/rte_swx_pipeline_spec.h b/lib/pipeline/rte_swx_pipeline_spec.h
index dd88c0bfab..077b407c0a 100644
--- a/lib/pipeline/rte_swx_pipeline_spec.h
+++ b/lib/pipeline/rte_swx_pipeline_spec.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PIPELINE_SPEC_H__
 #define __INCLUDE_RTE_SWX_PIPELINE_SPEC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 
@@ -15,6 +11,10 @@ extern "C" {
 
 #include <rte_swx_pipeline.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * extobj.
  *
diff --git a/lib/pipeline/rte_table_action.h b/lib/pipeline/rte_table_action.h
index 5dffbeb700..bab4bfd2e2 100644
--- a/lib/pipeline/rte_table_action.h
+++ b/lib/pipeline/rte_table_action.h
@@ -52,10 +52,6 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -65,6 +61,10 @@ extern "C" {
 
 #include "rte_pipeline.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Table actions. */
 enum rte_table_action_type {
 	/** Forward to next pipeline table, output port or drop. */
diff --git a/lib/port/rte_port.h b/lib/port/rte_port.h
index 0e30db371e..4b20872537 100644
--- a/lib/port/rte_port.h
+++ b/lib/port/rte_port.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_H__
 #define __INCLUDE_RTE_PORT_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port
@@ -20,6 +16,10 @@ extern "C" {
 #include <stdint.h>
 #include <rte_mbuf.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**@{
  * Macros to allow accessing metadata stored in the mbuf headroom
  * just beyond the end of the mbuf data structure returned by a port
diff --git a/lib/port/rte_port_ethdev.h b/lib/port/rte_port_ethdev.h
index e07021cb89..7729ff0da3 100644
--- a/lib/port/rte_port_ethdev.h
+++ b/lib/port/rte_port_ethdev.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_ETHDEV_H__
 #define __INCLUDE_RTE_PORT_ETHDEV_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Ethernet Device
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ethdev_reader port parameters */
 struct rte_port_ethdev_reader_params {
 	/** NIC RX port ID */
diff --git a/lib/port/rte_port_eventdev.h b/lib/port/rte_port_eventdev.h
index 0efb8e1021..d9eccf07d4 100644
--- a/lib/port/rte_port_eventdev.h
+++ b/lib/port/rte_port_eventdev.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_EVENTDEV_H__
 #define __INCLUDE_RTE_PORT_EVENTDEV_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Eventdev Interface
@@ -24,6 +20,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Eventdev_reader port parameters */
 struct rte_port_eventdev_reader_params {
 	/** Eventdev Device ID */
diff --git a/lib/port/rte_port_fd.h b/lib/port/rte_port_fd.h
index 885b9ada22..40a5e4a426 100644
--- a/lib/port/rte_port_fd.h
+++ b/lib/port/rte_port_fd.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_FD_H__
 #define __INCLUDE_RTE_PORT_FD_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port FD Device
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** fd_reader port parameters */
 struct rte_port_fd_reader_params {
 	/** File descriptor */
diff --git a/lib/port/rte_port_frag.h b/lib/port/rte_port_frag.h
index 4055872e8d..9a10f10523 100644
--- a/lib/port/rte_port_frag.h
+++ b/lib/port/rte_port_frag.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_IP_FRAG_H__
 #define __INCLUDE_RTE_PORT_IP_FRAG_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port for IPv4 Fragmentation
@@ -31,6 +27,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ring_reader_ipv4_frag port parameters */
 struct rte_port_ring_reader_frag_params {
 	/** Underlying single consumer ring that has to be pre-initialized. */
diff --git a/lib/port/rte_port_ras.h b/lib/port/rte_port_ras.h
index 94cfb3ed92..86e36f5362 100644
--- a/lib/port/rte_port_ras.h
+++ b/lib/port/rte_port_ras.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_RAS_H__
 #define __INCLUDE_RTE_PORT_RAS_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port for IPv4 Reassembly
@@ -31,6 +27,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ring_writer_ipv4_ras port parameters */
 struct rte_port_ring_writer_ras_params {
 	/** Underlying single consumer ring that has to be pre-initialized. */
diff --git a/lib/port/rte_port_ring.h b/lib/port/rte_port_ring.h
index 027928c924..2089d0889b 100644
--- a/lib/port/rte_port_ring.h
+++ b/lib/port/rte_port_ring.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_RING_H__
 #define __INCLUDE_RTE_PORT_RING_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Ring
@@ -27,6 +23,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ring_reader port parameters */
 struct rte_port_ring_reader_params {
 	/** Underlying consumer ring that has to be pre-initialized */
diff --git a/lib/port/rte_port_sched.h b/lib/port/rte_port_sched.h
index 251380ef80..1bf08ae6a9 100644
--- a/lib/port/rte_port_sched.h
+++ b/lib/port/rte_port_sched.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_SCHED_H__
 #define __INCLUDE_RTE_PORT_SCHED_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Hierarchical Scheduler
@@ -23,6 +19,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** sched_reader port parameters */
 struct rte_port_sched_reader_params {
 	/** Underlying pre-initialized rte_sched_port */
diff --git a/lib/port/rte_port_source_sink.h b/lib/port/rte_port_source_sink.h
index bcdbaf1e40..3122dd5038 100644
--- a/lib/port/rte_port_source_sink.h
+++ b/lib/port/rte_port_source_sink.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_SOURCE_SINK_H__
 #define __INCLUDE_RTE_PORT_SOURCE_SINK_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Source/Sink
@@ -19,6 +15,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** source port parameters */
 struct rte_port_source_params {
 	/** Pre-initialized buffer pool */
diff --git a/lib/port/rte_port_sym_crypto.h b/lib/port/rte_port_sym_crypto.h
index 6532b4388a..d03cdc1e8b 100644
--- a/lib/port/rte_port_sym_crypto.h
+++ b/lib/port/rte_port_sym_crypto.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_SYM_CRYPTO_H__
 #define __INCLUDE_RTE_PORT_SYM_CRYPTO_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port sym crypto Interface
@@ -23,6 +19,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Function prototype for reader post action. */
 typedef void (*rte_port_sym_crypto_reader_callback_fn)(struct rte_mbuf **pkts,
 		uint16_t n_pkts, void *arg);
diff --git a/lib/port/rte_swx_port.h b/lib/port/rte_swx_port.h
index 1dbd95ae87..b52b125572 100644
--- a/lib/port/rte_swx_port.h
+++ b/lib/port/rte_swx_port.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_H__
 #define __INCLUDE_RTE_SWX_PORT_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Port
@@ -17,6 +13,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Packet. */
 struct rte_swx_pkt {
 	/** Opaque packet handle. */
diff --git a/lib/port/rte_swx_port_ethdev.h b/lib/port/rte_swx_port_ethdev.h
index cbc2d7b213..1828031e67 100644
--- a/lib/port/rte_swx_port_ethdev.h
+++ b/lib/port/rte_swx_port_ethdev.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_ETHDEV_H__
 #define __INCLUDE_RTE_SWX_PORT_ETHDEV_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Ethernet Device Input and Output Ports
@@ -17,6 +13,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Ethernet device input port (reader) creation parameters. */
 struct rte_swx_port_ethdev_reader_params {
 	/** Name of a valid and fully configured Ethernet device. */
diff --git a/lib/port/rte_swx_port_fd.h b/lib/port/rte_swx_port_fd.h
index e61719c8f6..63529cf0ab 100644
--- a/lib/port/rte_swx_port_fd.h
+++ b/lib/port/rte_swx_port_fd.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_FD_H__
 #define __INCLUDE_RTE_SWX_PORT_FD_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX FD Input and Output Ports
@@ -18,6 +14,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** fd_reader port parameters */
 struct rte_swx_port_fd_reader_params {
 	/** File descriptor. Must be valid and opened in non-blocking mode. */
diff --git a/lib/port/rte_swx_port_ring.h b/lib/port/rte_swx_port_ring.h
index efc485fb08..ef241c3fee 100644
--- a/lib/port/rte_swx_port_ring.h
+++ b/lib/port/rte_swx_port_ring.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_RING_H__
 #define __INCLUDE_RTE_SWX_PORT_RING_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Ring Input and Output Ports
@@ -18,6 +14,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Ring input port (reader) creation parameters. */
 struct rte_swx_port_ring_reader_params {
 	/** Name of valid RTE ring. */
diff --git a/lib/port/rte_swx_port_source_sink.h b/lib/port/rte_swx_port_source_sink.h
index 91bcbf74f4..e3ca7cfbb4 100644
--- a/lib/port/rte_swx_port_source_sink.h
+++ b/lib/port/rte_swx_port_source_sink.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_SOURCE_SINK_H__
 #define __INCLUDE_RTE_SWX_PORT_SOURCE_SINK_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Source and Sink Ports
@@ -15,6 +11,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum number of packets to read from the PCAP file. */
 #ifndef RTE_SWX_PORT_SOURCE_PKTS_MAX
 #define RTE_SWX_PORT_SOURCE_PKTS_MAX 1024
diff --git a/lib/rawdev/rte_rawdev.h b/lib/rawdev/rte_rawdev.h
index 640037b524..3fc471526e 100644
--- a/lib/rawdev/rte_rawdev.h
+++ b/lib/rawdev/rte_rawdev.h
@@ -14,13 +14,13 @@
  * no specific type already available in DPDK.
  */
 
+#include <rte_common.h>
+#include <rte_memory.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_memory.h>
-
 /* Rawdevice object - essentially a void to be typecast by implementation */
 typedef void *rte_rawdev_obj_t;
 
diff --git a/lib/rawdev/rte_rawdev_pmd.h b/lib/rawdev/rte_rawdev_pmd.h
index 22b406444d..408ed461a4 100644
--- a/lib/rawdev/rte_rawdev_pmd.h
+++ b/lib/rawdev/rte_rawdev_pmd.h
@@ -13,10 +13,6 @@
  * any application.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <dev_driver.h>
@@ -26,6 +22,10 @@ extern "C" {
 
 #include "rte_rawdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern int librawdev_logtype;
 #define RTE_LOGTYPE_RAWDEV librawdev_logtype
 
diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h
index ed3dd6d3d2..550fadf56a 100644
--- a/lib/rcu/rte_rcu_qsbr.h
+++ b/lib/rcu/rte_rcu_qsbr.h
@@ -21,10 +21,6 @@
  * entered quiescent state.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <inttypes.h>
 #include <stdalign.h>
 #include <stdbool.h>
@@ -36,6 +32,10 @@ extern "C" {
 #include <rte_atomic.h>
 #include <rte_ring.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern int rte_rcu_log_type;
 #define RTE_LOGTYPE_RCU rte_rcu_log_type
 
diff --git a/lib/regexdev/rte_regexdev.h b/lib/regexdev/rte_regexdev.h
index a50b841b1e..b18a1d4251 100644
--- a/lib/regexdev/rte_regexdev.h
+++ b/lib/regexdev/rte_regexdev.h
@@ -194,10 +194,6 @@
  * - rte_regexdev_dequeue_burst()
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include <rte_dev.h>
@@ -1428,6 +1424,10 @@ struct rte_regex_ops {
 
 #include "rte_regexdev_core.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice.
diff --git a/lib/ring/rte_ring.h b/lib/ring/rte_ring.h
index c709f30497..11ca69c73d 100644
--- a/lib/ring/rte_ring.h
+++ b/lib/ring/rte_ring.h
@@ -34,13 +34,13 @@
  * for more information.
  */
 
+#include <rte_ring_core.h>
+#include <rte_ring_elem.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_core.h>
-#include <rte_ring_elem.h>
-
 /**
  * Calculate the memory size needed for a ring
  *
diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h
index 270869d214..222c5aeb3f 100644
--- a/lib/ring/rte_ring_core.h
+++ b/lib/ring/rte_ring_core.h
@@ -19,10 +19,6 @@
  * instead.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdalign.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -38,6 +34,10 @@ extern "C" {
 #include <rte_pause.h>
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_TAILQ_RING_NAME "RTE_RING"
 
 /** enqueue/dequeue behavior types */
diff --git a/lib/ring/rte_ring_elem.h b/lib/ring/rte_ring_elem.h
index 7f7d4951d3..506f686884 100644
--- a/lib/ring/rte_ring_elem.h
+++ b/lib/ring/rte_ring_elem.h
@@ -16,10 +16,6 @@
  * RTE Ring with user defined element size
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_ring_core.h>
 #include <rte_ring_elem_pvt.h>
 
@@ -699,6 +695,10 @@ rte_ring_dequeue_burst_elem(struct rte_ring *r, void *obj_table,
 
 #include <rte_ring.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ring/rte_ring_hts.h b/lib/ring/rte_ring_hts.h
index 9a5938ac58..a41acea740 100644
--- a/lib/ring/rte_ring_hts.h
+++ b/lib/ring/rte_ring_hts.h
@@ -24,12 +24,12 @@
  * To achieve that 64-bit CAS is used by head update routine.
  */
 
+#include <rte_ring_hts_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_hts_elem_pvt.h>
-
 /**
  * Enqueue several objects on the HTS ring (multi-producers safe).
  *
diff --git a/lib/ring/rte_ring_peek.h b/lib/ring/rte_ring_peek.h
index c0621d12e2..2312f52668 100644
--- a/lib/ring/rte_ring_peek.h
+++ b/lib/ring/rte_ring_peek.h
@@ -43,12 +43,12 @@
  * with enqueue(/dequeue) operation till _finish_ completes.
  */
 
+#include <rte_ring_peek_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_peek_elem_pvt.h>
-
 /**
  * Start to enqueue several objects on the ring.
  * Note that no actual objects are put in the queue by this function,
diff --git a/lib/ring/rte_ring_peek_zc.h b/lib/ring/rte_ring_peek_zc.h
index 0b5e34b731..3254fe0481 100644
--- a/lib/ring/rte_ring_peek_zc.h
+++ b/lib/ring/rte_ring_peek_zc.h
@@ -67,12 +67,12 @@
  * with enqueue/dequeue operation till _finish_ completes.
  */
 
+#include <rte_ring_peek_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_peek_elem_pvt.h>
-
 /**
  * Ring zero-copy information structure.
  *
diff --git a/lib/ring/rte_ring_rts.h b/lib/ring/rte_ring_rts.h
index 50fc8f74db..d7a3863c83 100644
--- a/lib/ring/rte_ring_rts.h
+++ b/lib/ring/rte_ring_rts.h
@@ -51,12 +51,12 @@
  * By default HTD_MAX == ring.capacity / 8.
  */
 
+#include <rte_ring_rts_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_rts_elem_pvt.h>
-
 /**
  * Enqueue several objects on the RTS ring (multi-producers safe).
  *
diff --git a/lib/sched/rte_approx.h b/lib/sched/rte_approx.h
index b60086330e..738e33a98b 100644
--- a/lib/sched/rte_approx.h
+++ b/lib/sched/rte_approx.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_APPROX_H__
 #define __INCLUDE_RTE_APPROX_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Rational Approximation
@@ -20,6 +16,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Find best rational approximation
  *
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index 1477a47700..2a385ffdba 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -5,10 +5,6 @@
 #ifndef __RTE_PIE_H_INCLUDED__
 #define __RTE_PIE_H_INCLUDED__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * Proportional Integral controller Enhanced (PIE)
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_debug.h>
 #include <rte_cycles.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
 				     */
diff --git a/lib/sched/rte_red.h b/lib/sched/rte_red.h
index afaa35fcd6..e62abb9295 100644
--- a/lib/sched/rte_red.h
+++ b/lib/sched/rte_red.h
@@ -5,10 +5,6 @@
 #ifndef __RTE_RED_H_INCLUDED__
 #define __RTE_RED_H_INCLUDED__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Random Early Detection (RED)
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_cycles.h>
 #include <rte_branch_prediction.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_RED_SCALING                     10         /**< Fraction size for fixed-point */
 #define RTE_RED_S                           (1 << 22)  /**< Packet size multiplied by number of leaf queues */
 #define RTE_RED_MAX_TH_MAX                  1023       /**< Max threshold limit in fixed point format */
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index b882c4a882..222e6b3583 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_SCHED_H__
 #define __INCLUDE_RTE_SCHED_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Hierarchical Scheduler
@@ -62,6 +58,10 @@ extern "C" {
 #include "rte_red.h"
 #include "rte_pie.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum number of queues per pipe.
  * Note that the multiple queues (power of 2) can only be assigned to
  * lowest priority (best-effort) traffic class. Other higher priority traffic
diff --git a/lib/sched/rte_sched_common.h b/lib/sched/rte_sched_common.h
index 573d164569..a5acb9c08a 100644
--- a/lib/sched/rte_sched_common.h
+++ b/lib/sched/rte_sched_common.h
@@ -5,13 +5,13 @@
 #ifndef __INCLUDE_RTE_SCHED_COMMON_H__
 #define __INCLUDE_RTE_SCHED_COMMON_H__
 
+#include <stdint.h>
+#include <sys/types.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include <sys/types.h>
-
 #if 0
 static inline uint32_t
 rte_min_pos_4_u16(uint16_t *x)
diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
index 1c8474b74f..7a9bafa0fa 100644
--- a/lib/security/rte_security.h
+++ b/lib/security/rte_security.h
@@ -12,10 +12,6 @@
  * RTE Security Common Definitions
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <sys/types.h>
 
 #include <rte_compat.h>
@@ -24,6 +20,10 @@ extern "C" {
 #include <rte_ip.h>
 #include <rte_mbuf_dyn.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** IPSec protocol mode */
 enum rte_security_ipsec_sa_mode {
 	RTE_SECURITY_IPSEC_SA_MODE_TRANSPORT = 1,
diff --git a/lib/security/rte_security_driver.h b/lib/security/rte_security_driver.h
index 9bb5052a4c..2ceb145066 100644
--- a/lib/security/rte_security_driver.h
+++ b/lib/security/rte_security_driver.h
@@ -12,13 +12,13 @@
  * RTE Security Common Definitions
  */
 
+#include <rte_compat.h>
+#include "rte_security.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_compat.h>
-#include "rte_security.h"
-
 /**
  * @internal
  * Security session to be used by library for internal usage
diff --git a/lib/stack/rte_stack.h b/lib/stack/rte_stack.h
index 3325757568..4439adfc42 100644
--- a/lib/stack/rte_stack.h
+++ b/lib/stack/rte_stack.h
@@ -15,10 +15,6 @@
 #ifndef _RTE_STACK_H_
 #define _RTE_STACK_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdalign.h>
 
 #include <rte_debug.h>
@@ -95,6 +91,10 @@ struct __rte_cache_aligned rte_stack {
 #include "rte_stack_std.h"
 #include "rte_stack_lf.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Push several objects on the stack (MT-safe).
  *
diff --git a/lib/table/rte_lru.h b/lib/table/rte_lru.h
index 88229d8632..bc1ad36500 100644
--- a/lib/table/rte_lru.h
+++ b/lib/table/rte_lru.h
@@ -5,15 +5,15 @@
 #ifndef __INCLUDE_RTE_LRU_H__
 #define __INCLUDE_RTE_LRU_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_config.h>
 #ifdef RTE_ARCH_X86_64
 #include "rte_lru_x86.h"
 #elif defined(RTE_ARCH_ARM64)
 #include "rte_lru_arm64.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 #else
 #undef RTE_TABLE_HASH_LRU_STRATEGY
 #define RTE_TABLE_HASH_LRU_STRATEGY                        1
@@ -86,8 +86,4 @@ do {									\
 
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/table/rte_lru_arm64.h b/lib/table/rte_lru_arm64.h
index f19b0bdb4e..f9a4678ee0 100644
--- a/lib/table/rte_lru_arm64.h
+++ b/lib/table/rte_lru_arm64.h
@@ -5,14 +5,14 @@
 #ifndef __RTE_LRU_ARM64_H__
 #define __RTE_LRU_ARM64_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_vect.h>
 #include <rte_bitops.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_TABLE_HASH_LRU_STRATEGY
 #ifdef __ARM_NEON
 #define RTE_TABLE_HASH_LRU_STRATEGY                        3
diff --git a/lib/table/rte_lru_x86.h b/lib/table/rte_lru_x86.h
index ddfb8c1c8c..93f4a136a8 100644
--- a/lib/table/rte_lru_x86.h
+++ b/lib/table/rte_lru_x86.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_LRU_X86_H__
 #define __INCLUDE_RTE_LRU_X86_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_config.h>
@@ -97,8 +93,4 @@ do {									\
 
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/table/rte_swx_hash_func.h b/lib/table/rte_swx_hash_func.h
index 04f3d543e7..9c65cfa913 100644
--- a/lib/table/rte_swx_hash_func.h
+++ b/lib/table/rte_swx_hash_func.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_HASH_FUNC_H__
 #define __INCLUDE_RTE_SWX_HASH_FUNC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Hash Function
@@ -15,6 +11,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Hash function prototype
  *
diff --git a/lib/table/rte_swx_keycmp.h b/lib/table/rte_swx_keycmp.h
index 09fb1be869..b0ed819307 100644
--- a/lib/table/rte_swx_keycmp.h
+++ b/lib/table/rte_swx_keycmp.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_KEYCMP_H__
 #define __INCLUDE_RTE_SWX_KEYCMP_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Key Comparison Functions
@@ -16,6 +12,10 @@ extern "C" {
 #include <stdint.h>
 #include <string.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Key comparison function prototype
  *
diff --git a/lib/table/rte_swx_table.h b/lib/table/rte_swx_table.h
index ac01e19781..3c53459498 100644
--- a/lib/table/rte_swx_table.h
+++ b/lib/table/rte_swx_table.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_H__
 #define __INCLUDE_RTE_SWX_TABLE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Table
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_swx_hash_func.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Match type. */
 enum rte_swx_table_match_type {
 	/** Wildcard Match (WM). */
diff --git a/lib/table/rte_swx_table_em.h b/lib/table/rte_swx_table_em.h
index b7423dd060..592541f01f 100644
--- a/lib/table/rte_swx_table_em.h
+++ b/lib/table/rte_swx_table_em.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_EM_H__
 #define __INCLUDE_RTE_SWX_TABLE_EM_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Exact Match Table
@@ -16,6 +12,10 @@ extern "C" {
 
 #include <rte_swx_table.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Exact match table operations - unoptimized. */
 extern struct rte_swx_table_ops rte_swx_table_exact_match_unoptimized_ops;
 
diff --git a/lib/table/rte_swx_table_learner.h b/lib/table/rte_swx_table_learner.h
index c5ea015b8d..9a18be083d 100644
--- a/lib/table/rte_swx_table_learner.h
+++ b/lib/table/rte_swx_table_learner.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_LEARNER_H__
 #define __INCLUDE_RTE_SWX_TABLE_LEARNER_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Learner Table
@@ -53,6 +49,10 @@ extern "C" {
 
 #include "rte_swx_hash_func.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum number of key timeout values per learner table. */
 #ifndef RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX
 #define RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX 16
diff --git a/lib/table/rte_swx_table_selector.h b/lib/table/rte_swx_table_selector.h
index 05863cc90b..ef29bdb6b0 100644
--- a/lib/table/rte_swx_table_selector.h
+++ b/lib/table/rte_swx_table_selector.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_SELECTOR_H__
 #define __INCLUDE_RTE_SWX_TABLE_SELECTOR_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Selector Table
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_swx_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Selector table creation parameters. */
 struct rte_swx_table_selector_params {
 	/** Group ID offset. */
diff --git a/lib/table/rte_swx_table_wm.h b/lib/table/rte_swx_table_wm.h
index 4fd52c0a17..7eb6f8e2a6 100644
--- a/lib/table/rte_swx_table_wm.h
+++ b/lib/table/rte_swx_table_wm.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_WM_H__
 #define __INCLUDE_RTE_SWX_TABLE_WM_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Wildcard Match Table
@@ -16,6 +12,10 @@ extern "C" {
 
 #include <rte_swx_table.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Wildcard match table operations. */
 extern struct rte_swx_table_ops rte_swx_table_wildcard_match_ops;
 
diff --git a/lib/table/rte_table.h b/lib/table/rte_table.h
index 9a5faf0e32..43a5a1a7b3 100644
--- a/lib/table/rte_table.h
+++ b/lib/table/rte_table.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_H__
 #define __INCLUDE_RTE_TABLE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table
@@ -27,6 +23,10 @@ extern "C" {
 #include <stdint.h>
 #include <rte_port.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_mbuf;
 
 /** Lookup table statistics */
diff --git a/lib/table/rte_table_acl.h b/lib/table/rte_table_acl.h
index 1cb7b9fbbd..61af7b88e4 100644
--- a/lib/table/rte_table_acl.h
+++ b/lib/table/rte_table_acl.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_ACL_H__
 #define __INCLUDE_RTE_TABLE_ACL_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table ACL
@@ -25,6 +21,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ACL table parameters */
 struct rte_table_acl_params {
 	/** Name */
diff --git a/lib/table/rte_table_array.h b/lib/table/rte_table_array.h
index fad83b0588..b2a7b95d68 100644
--- a/lib/table/rte_table_array.h
+++ b/lib/table/rte_table_array.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_ARRAY_H__
 #define __INCLUDE_RTE_TABLE_ARRAY_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Array
@@ -20,6 +16,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Array table parameters */
 struct rte_table_array_params {
 	/** Number of array entries. Has to be a power of two. */
diff --git a/lib/table/rte_table_hash.h b/lib/table/rte_table_hash.h
index 6698621dae..ff8fc9e9ce 100644
--- a/lib/table/rte_table_hash.h
+++ b/lib/table/rte_table_hash.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_HASH_H__
 #define __INCLUDE_RTE_TABLE_HASH_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Hash
@@ -52,6 +48,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Hash function */
 typedef uint64_t (*rte_table_hash_op_hash)(
 	void *key,
diff --git a/lib/table/rte_table_hash_cuckoo.h b/lib/table/rte_table_hash_cuckoo.h
index 3a55d28e9b..55aa12216a 100644
--- a/lib/table/rte_table_hash_cuckoo.h
+++ b/lib/table/rte_table_hash_cuckoo.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_HASH_CUCKOO_H__
 #define __INCLUDE_RTE_TABLE_HASH_CUCKOO_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Hash Cuckoo
@@ -20,6 +16,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Hash table parameters */
 struct rte_table_hash_cuckoo_params {
 	/** Name */
diff --git a/lib/table/rte_table_hash_func.h b/lib/table/rte_table_hash_func.h
index aa779c2182..cba7ec4c20 100644
--- a/lib/table/rte_table_hash_func.h
+++ b/lib/table/rte_table_hash_func.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_HASH_FUNC_H__
 #define __INCLUDE_RTE_TABLE_HASH_FUNC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -18,6 +14,10 @@ extern "C" {
 
 #include <x86intrin.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline uint64_t
 rte_crc32_u64(uint64_t crc, uint64_t v)
 {
@@ -28,6 +28,10 @@ rte_crc32_u64(uint64_t crc, uint64_t v)
 #include "rte_table_hash_func_arm64.h"
 #else
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline uint64_t
 rte_crc32_u64(uint64_t crc, uint64_t v)
 {
diff --git a/lib/table/rte_table_lpm.h b/lib/table/rte_table_lpm.h
index dde32deed9..59b9bdee89 100644
--- a/lib/table/rte_table_lpm.h
+++ b/lib/table/rte_table_lpm.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_LPM_H__
 #define __INCLUDE_RTE_TABLE_LPM_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table LPM for IPv4
@@ -45,6 +41,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** LPM table parameters */
 struct rte_table_lpm_params {
 	/** Table name */
diff --git a/lib/table/rte_table_lpm_ipv6.h b/lib/table/rte_table_lpm_ipv6.h
index 96ddbd32c2..166a5ba9ee 100644
--- a/lib/table/rte_table_lpm_ipv6.h
+++ b/lib/table/rte_table_lpm_ipv6.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_LPM_IPV6_H__
 #define __INCLUDE_RTE_TABLE_LPM_IPV6_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table LPM for IPv6
@@ -45,6 +41,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_LPM_IPV6_ADDR_SIZE 16
 
 /** LPM table parameters */
diff --git a/lib/table/rte_table_stub.h b/lib/table/rte_table_stub.h
index 846526ea99..f7e589df16 100644
--- a/lib/table/rte_table_stub.h
+++ b/lib/table/rte_table_stub.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_STUB_H__
 #define __INCLUDE_RTE_TABLE_STUB_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Stub
@@ -18,6 +14,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Stub table parameters: NONE */
 
 /** Stub table operations */
diff --git a/lib/telemetry/rte_telemetry.h b/lib/telemetry/rte_telemetry.h
index cab9daa6fe..463819e2bf 100644
--- a/lib/telemetry/rte_telemetry.h
+++ b/lib/telemetry/rte_telemetry.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_TELEMETRY_H_
 #define _RTE_TELEMETRY_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_compat.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum length for string used in object. */
 #define RTE_TEL_MAX_STRING_LEN 128
 /** Maximum length of string. */
diff --git a/lib/vhost/rte_vdpa.h b/lib/vhost/rte_vdpa.h
index 6ac85d1bbf..18e273c20f 100644
--- a/lib/vhost/rte_vdpa.h
+++ b/lib/vhost/rte_vdpa.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_VDPA_H_
 #define _RTE_VDPA_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -17,6 +13,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum name length for statistics counters */
 #define RTE_VDPA_STATS_NAME_SIZE 64
 
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index b0434c4b8d..c7a5f56df8 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -18,10 +18,6 @@
 #include <rte_memory.h>
 #include <rte_mempool.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifndef __cplusplus
 /* These are not C++-aware. */
 #include <linux/vhost.h>
@@ -29,6 +25,10 @@ extern "C" {
 #include <linux/virtio_net.h>
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_VHOST_USER_CLIENT		(1ULL << 0)
 #define RTE_VHOST_USER_NO_RECONNECT	(1ULL << 1)
 #define RTE_VHOST_USER_RESERVED_1	(1ULL << 2)
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 8f190dd44b..60995e4e62 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -5,15 +5,15 @@
 #ifndef _RTE_VHOST_ASYNC_H_
 #define _RTE_VHOST_ASYNC_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
 #include <rte_mbuf.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Register an async channel for a vhost queue
  *
diff --git a/lib/vhost/rte_vhost_crypto.h b/lib/vhost/rte_vhost_crypto.h
index f962a53818..af61f0907e 100644
--- a/lib/vhost/rte_vhost_crypto.h
+++ b/lib/vhost/rte_vhost_crypto.h
@@ -5,12 +5,12 @@
 #ifndef _VHOST_CRYPTO_H_
 #define _VHOST_CRYPTO_H_
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /* pre-declare structs to avoid including full headers */
 struct rte_mempool;
 struct rte_crypto_op;
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 8db4ab9f4d..42392a0d14 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -5,10 +5,6 @@
 #ifndef _VDPA_DRIVER_H_
 #define _VDPA_DRIVER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 
 #include <rte_compat.h>
@@ -16,6 +12,10 @@ extern "C" {
 #include "rte_vhost.h"
 #include "rte_vdpa.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_VHOST_QUEUE_ALL UINT16_MAX
 
 /**
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v9 2/6] eal: extend bit manipulation functionality
  2024-09-18  9:04               ` [PATCH v9 0/6] Improve EAL bit operations API Mattias Rönnblom
  2024-09-18  9:04                 ` [PATCH v9 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
@ 2024-09-18  9:04                 ` Mattias Rönnblom
  2024-09-18  9:04                 ` [PATCH v9 3/6] eal: add unit tests for bit operations Mattias Rönnblom
                                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-18  9:04 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Add functionality to test and modify the value of individual bits in
32-bit or 64-bit words.

These functions have no implications on memory ordering, atomicity and
does not use volatile and thus does not prevent any compiler
optimizations.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

PATCH v3:
 * Remove unnecessary <rte_compat.h> include.
 * Remove redundant 'fun' parameter from the __RTE_GEN_BIT_*() macros
   (Jack Bond-Preston).
 * Introduce __RTE_BIT_BIT_OPS() macro, consistent with how things
   are done when generating the atomic bit operations.
 * Refer to volatile bit op functions as variants instead of families
   (macro parameter naming).

RFC v6:
 * Have rte_bit_test() accept const-marked bitsets.

RFC v4:
 * Add rte_bit_flip() which, believe it or not, flips the value of a bit.
 * Mark macro-generated private functions as experimental.
 * Use macros to generate *assign*() functions.

RFC v3:
 * Work around lack of C++ support for _Generic (Tyler Retzlaff).
 * Fix ','-related checkpatch warnings.
---
 lib/eal/include/rte_bitops.h | 260 ++++++++++++++++++++++++++++++++++-
 1 file changed, 258 insertions(+), 2 deletions(-)

diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 449565eeae..6915b945ba 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -2,6 +2,7 @@
  * Copyright(c) 2020 Arm Limited
  * Copyright(c) 2010-2019 Intel Corporation
  * Copyright(c) 2023 Microsoft Corporation
+ * Copyright(c) 2024 Ericsson AB
  */
 
 #ifndef _RTE_BITOPS_H_
@@ -11,12 +12,14 @@
  * @file
  * Bit Operations
  *
- * This file defines a family of APIs for bit operations
- * without enforcing memory ordering.
+ * This file provides functionality for low-level, single-word
+ * arithmetic and bit-level operations, such as counting or
+ * setting individual bits.
  */
 
 #include <stdint.h>
 
+#include <rte_compat.h>
 #include <rte_debug.h>
 
 #ifdef __cplusplus
@@ -105,6 +108,197 @@ extern "C" {
 #define RTE_FIELD_GET64(mask, reg) \
 		((typeof(mask))(((reg) & (mask)) >> rte_ctz64(mask)))
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test bit in word.
+ *
+ * Generic selection macro to test the value of a bit in a 32-bit or
+ * 64-bit word. The type of operation depends on the type of the @c
+ * addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_test(addr, nr)					\
+	_Generic((addr),					\
+		uint32_t *: __rte_bit_test32,			\
+		const uint32_t *: __rte_bit_test32,		\
+		uint64_t *: __rte_bit_test64,			\
+		const uint64_t *: __rte_bit_test64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set bit in word.
+ *
+ * Generic selection macro to set a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr
+ * parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_set(addr, nr)				\
+	_Generic((addr),				\
+		 uint32_t *: __rte_bit_set32,		\
+		 uint64_t *: __rte_bit_set64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Clear bit in word.
+ *
+ * Generic selection macro to clear a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr
+ * parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_clear(addr, nr)					\
+	_Generic((addr),					\
+		 uint32_t *: __rte_bit_clear32,			\
+		 uint64_t *: __rte_bit_clear64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Assign a value to a bit in word.
+ *
+ * Generic selection macro to assign a value to a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param value
+ *   The new value of the bit - true for '1', or false for '0'.
+ */
+#define rte_bit_assign(addr, nr, value)					\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_assign32,			\
+		 uint64_t *: __rte_bit_assign64)(addr, nr, value)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Flip a bit in word.
+ *
+ * Generic selection macro to change the value of a bit to '0' if '1'
+ * or '1' if '0' in a 32-bit or 64-bit word. The type of operation
+ * depends on the type of the @c addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_flip(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_flip32,				\
+		 uint64_t *: __rte_bit_flip64)(addr, nr)
+
+#define __RTE_GEN_BIT_TEST(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_ ## variant ## test ## size(const qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		return *addr & mask;					\
+	}
+
+#define __RTE_GEN_BIT_SET(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## set ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		*addr |= mask;						\
+	}								\
+
+#define __RTE_GEN_BIT_CLEAR(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## clear ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		uint ## size ## _t mask = ~((uint ## size ## _t)1 << nr); \
+		(*addr) &= mask;					\
+	}								\
+
+#define __RTE_GEN_BIT_ASSIGN(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## assign ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr, bool value) \
+	{								\
+		if (value)						\
+			__rte_bit_ ## variant ## set ## size(addr, nr);	\
+		else							\
+			__rte_bit_ ## variant ## clear ## size(addr, nr); \
+	}
+
+#define __RTE_GEN_BIT_FLIP(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## flip ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		bool value;						\
+									\
+		value = __rte_bit_ ## variant ## test ## size(addr, nr); \
+		__rte_bit_ ## variant ## assign ## size(addr, nr, !value); \
+	}
+
+#define __RTE_GEN_BIT_OPS(v, qualifier, size)	\
+	__RTE_GEN_BIT_TEST(v, qualifier, size)	\
+	__RTE_GEN_BIT_SET(v, qualifier, size)	\
+	__RTE_GEN_BIT_CLEAR(v, qualifier, size)	\
+	__RTE_GEN_BIT_ASSIGN(v, qualifier, size)	\
+	__RTE_GEN_BIT_FLIP(v, qualifier, size)
+
+#define __RTE_GEN_BIT_OPS_SIZE(size) \
+	__RTE_GEN_BIT_OPS(,, size)
+
+__RTE_GEN_BIT_OPS_SIZE(32)
+__RTE_GEN_BIT_OPS_SIZE(64)
+
 /*------------------------ 32-bit relaxed operations ------------------------*/
 
 /**
@@ -787,6 +981,68 @@ rte_log2_u64(uint64_t v)
 
 #ifdef __cplusplus
 }
+
+/*
+ * Since C++ doesn't support generic selection (i.e., _Generic),
+ * function overloading is used instead. Such functions must be
+ * defined outside 'extern "C"' to be accepted by the compiler.
+ */
+
+#undef rte_bit_test
+#undef rte_bit_set
+#undef rte_bit_clear
+#undef rte_bit_assign
+#undef rte_bit_flip
+
+#define __RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, size, arg1_type, arg1_name) \
+	static inline void						\
+	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
+			arg1_type arg1_name)				\
+	{								\
+		__rte_bit_ ## fun ## size(addr, arg1_name);		\
+	}
+
+#define __RTE_BIT_OVERLOAD_2(fun, qualifier, arg1_type, arg1_name)	\
+	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 32, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 64, arg1_type, arg1_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, size, ret_type, arg1_type, \
+				 arg1_name)				\
+	static inline ret_type						\
+	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
+			arg1_type arg1_name)				\
+	{								\
+		return __rte_bit_ ## fun ## size(addr, arg1_name);	\
+	}
+
+#define __RTE_BIT_OVERLOAD_2R(fun, qualifier, ret_type, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 32, ret_type, arg1_type, \
+				 arg1_name)				\
+	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 64, ret_type, arg1_type, \
+				 arg1_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, size, arg1_type, arg1_name, \
+				arg2_type, arg2_name)			\
+	static inline void						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name)				\
+	{								\
+		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name);	\
+	}
+
+#define __RTE_BIT_OVERLOAD_3(fun, qualifier, arg1_type, arg1_name, arg2_type, \
+			     arg2_name)					\
+	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 32, arg1_type, arg1_name, \
+				arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 64, arg1_type, arg1_name, \
+				arg2_type, arg2_name)
+
+__RTE_BIT_OVERLOAD_2R(test, const, bool, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(set,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(clear,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_3(assign,, unsigned int, nr, bool, value)
+__RTE_BIT_OVERLOAD_2(flip,, unsigned int, nr)
+
 #endif
 
 #endif /* _RTE_BITOPS_H_ */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v9 3/6] eal: add unit tests for bit operations
  2024-09-18  9:04               ` [PATCH v9 0/6] Improve EAL bit operations API Mattias Rönnblom
  2024-09-18  9:04                 ` [PATCH v9 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
  2024-09-18  9:04                 ` [PATCH v9 2/6] eal: extend bit manipulation functionality Mattias Rönnblom
@ 2024-09-18  9:04                 ` Mattias Rönnblom
  2024-09-18  9:04                 ` [PATCH v9 4/6] eal: add atomic " Mattias Rönnblom
                                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-18  9:04 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Extend bitops tests to cover the
rte_bit_[test|set|clear|assign|flip]()
functions.

The tests are converted to use the test suite runner framework.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

RFC v6:
 * Test rte_bit_*test() usage through const pointers.

RFC v4:
 * Remove redundant line continuations.
---
 app/test/test_bitops.c | 85 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 70 insertions(+), 15 deletions(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index 0d4ccfb468..322f58c066 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -1,13 +1,68 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2019 Arm Limited
+ * Copyright(c) 2024 Ericsson AB
  */
 
+#include <stdbool.h>
+
 #include <rte_launch.h>
 #include <rte_bitops.h>
+#include <rte_random.h>
 #include "test.h"
 
-uint32_t val32;
-uint64_t val64;
+#define GEN_TEST_BIT_ACCESS(test_name, set_fun, clear_fun, assign_fun,	\
+			    flip_fun, test_fun, size)			\
+	static int							\
+	test_name(void)							\
+	{								\
+		uint ## size ## _t reference = (uint ## size ## _t)rte_rand(); \
+		unsigned int bit_nr;					\
+		uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
+									\
+		for (bit_nr = 0; bit_nr < size; bit_nr++) {		\
+			bool reference_bit = (reference >> bit_nr) & 1;	\
+			bool assign = rte_rand() & 1;			\
+			if (assign)					\
+				assign_fun(&word, bit_nr, reference_bit); \
+			else {						\
+				if (reference_bit)			\
+					set_fun(&word, bit_nr);		\
+				else					\
+					clear_fun(&word, bit_nr);	\
+									\
+			}						\
+			TEST_ASSERT(test_fun(&word, bit_nr) == reference_bit, \
+				    "Bit %d had unexpected value", bit_nr); \
+			flip_fun(&word, bit_nr);			\
+			TEST_ASSERT(test_fun(&word, bit_nr) != reference_bit, \
+				    "Bit %d had unflipped value", bit_nr); \
+			flip_fun(&word, bit_nr);			\
+									\
+			const uint ## size ## _t *const_ptr = &word;	\
+			TEST_ASSERT(test_fun(const_ptr, bit_nr) ==	\
+				    reference_bit,			\
+				    "Bit %d had unexpected value", bit_nr); \
+		}							\
+									\
+		for (bit_nr = 0; bit_nr < size; bit_nr++) {		\
+			bool reference_bit = (reference >> bit_nr) & 1;	\
+			TEST_ASSERT(test_fun(&word, bit_nr) == reference_bit, \
+				    "Bit %d had unexpected value", bit_nr); \
+		}							\
+									\
+		TEST_ASSERT(reference == word, "Word had unexpected value"); \
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32)
+
+GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
+
+static uint32_t val32;
+static uint64_t val64;
 
 #define MAX_BITS_32 32
 #define MAX_BITS_64 64
@@ -117,22 +172,22 @@ test_bit_relaxed_test_set_clear(void)
 	return TEST_SUCCESS;
 }
 
+static struct unit_test_suite test_suite = {
+	.suite_name = "Bitops test suite",
+	.unit_test_cases = {
+		TEST_CASE(test_bit_access32),
+		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_relaxed_set),
+		TEST_CASE(test_bit_relaxed_clear),
+		TEST_CASE(test_bit_relaxed_test_set_clear),
+		TEST_CASES_END()
+	}
+};
+
 static int
 test_bitops(void)
 {
-	val32 = 0;
-	val64 = 0;
-
-	if (test_bit_relaxed_set() < 0)
-		return TEST_FAILED;
-
-	if (test_bit_relaxed_clear() < 0)
-		return TEST_FAILED;
-
-	if (test_bit_relaxed_test_set_clear() < 0)
-		return TEST_FAILED;
-
-	return TEST_SUCCESS;
+	return unit_test_suite_runner(&test_suite);
 }
 
 REGISTER_FAST_TEST(bitops_autotest, true, true, test_bitops);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v9 4/6] eal: add atomic bit operations
  2024-09-18  9:04               ` [PATCH v9 0/6] Improve EAL bit operations API Mattias Rönnblom
                                   ` (2 preceding siblings ...)
  2024-09-18  9:04                 ` [PATCH v9 3/6] eal: add unit tests for bit operations Mattias Rönnblom
@ 2024-09-18  9:04                 ` Mattias Rönnblom
  2024-09-18  9:04                 ` [PATCH v9 5/6] eal: add unit tests for atomic bit access functions Mattias Rönnblom
  2024-09-18  9:04                 ` [PATCH v9 6/6] eal: extend bitops to handle volatile pointers Mattias Rönnblom
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-18  9:04 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Add atomic bit test/set/clear/assign/flip and
test-and-set/clear/assign/flip functions.

All atomic bit functions allow (and indeed, require) the caller to
specify a memory order.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

PATCH v3:
 * Introduce __RTE_GEN_BIT_ATOMIC_*() 'qualifier' argument already in
   this patch (Jack Bond-Preston).
 * Refer to volatile bit op functions as variants instead of families
   (macro parameter naming).
 * Update release notes.

PATCH:
 * Add missing macro #undef for C++ version of atomic bit flip.

RFC v7:
 * Replace compare-exchange-based rte_bitset_atomic_test_and_*() and
   flip() with implementations that use the previous value as returned
   by the atomic fetch function.
 * Reword documentation to match the non-atomic macro variants.
 * Remove pointer to <rte_stdatomic.h> for memory model documentation,
   since there is no documentation for that API.

RFC v6:
 * Have rte_bit_atomic_test() accept const-marked bitsets.

RFC v4:
 * Add atomic bit flip.
 * Mark macro-generated private functions experimental.

RFC v3:
 * Work around lack of C++ support for _Generic (Tyler Retzlaff).

RFC v2:
 o Add rte_bit_atomic_test_and_assign() (for consistency).
 o Fix bugs in rte_bit_atomic_test_and_[set|clear]().
 o Use <rte_stdatomics.h> to support MSVC.
---
 doc/guides/rel_notes/release_24_11.rst |  17 +
 lib/eal/include/rte_bitops.h           | 415 +++++++++++++++++++++++++
 2 files changed, 432 insertions(+)

diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index 0ff70d9057..3111b1e4c0 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -56,6 +56,23 @@ New Features
      =======================================================
 
 
+* **Extended bit operations API.**
+
+  The support for bit-level operations on single 32- and 64-bit words
+  in <rte_bitops.h> has been extended with two families of
+  semantically well-defined functions.
+
+  rte_bit_[test|set|clear|assign|flip]() functions provide excellent
+  performance (by avoiding restricting the compiler and CPU), but give
+  no guarantees in regards to memory ordering or atomicity.
+
+  rte_bit_atomic_*() provides atomic bit-level operations, including
+  the possibility to specifying memory ordering constraints.
+
+  The new public API elements are polymorphic, using the _Generic-
+  based macros (for C) and function overloading (in C++ translation
+  units).
+
 Removed Items
 -------------
 
diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 6915b945ba..3ad6795fd1 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -21,6 +21,7 @@
 
 #include <rte_compat.h>
 #include <rte_debug.h>
+#include <rte_stdatomic.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -226,6 +227,204 @@ extern "C" {
 		 uint32_t *: __rte_bit_flip32,				\
 		 uint64_t *: __rte_bit_flip64)(addr, nr)
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test if a particular bit in a word is set with a particular memory
+ * order.
+ *
+ * Test a bit with the resulting memory load ordered as per the
+ * specified memory order.
+ *
+ * @param addr
+ *   A pointer to the word to query.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit is set, and false otherwise.
+ */
+#define rte_bit_atomic_test(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test32,			\
+		 const uint32_t *: __rte_bit_atomic_test32,		\
+		 uint64_t *: __rte_bit_atomic_test64,			\
+		 const uint64_t *: __rte_bit_atomic_test64)(addr, nr,	\
+							    memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically set bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in
+ * the word pointed to by @c addr to '1', with the memory ordering as
+ * specified by @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_set(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_set32,			\
+		 uint64_t *: __rte_bit_atomic_set64)(addr, nr, memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically clear bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in
+ * the word pointed to by @c addr to '0', with the memory ordering as
+ * specified by @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_clear(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_clear32,			\
+		 uint64_t *: __rte_bit_atomic_clear64)(addr, nr, memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically assign a value to bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in the
+ * word pointed to by @c addr to the value indicated by @c value, with
+ * the memory ordering as specified with @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param value
+ *   The new value of the bit - true for '1', or false for '0'.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_assign(addr, nr, value, memory_order)		\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_assign32,			\
+		 uint64_t *: __rte_bit_atomic_assign64)(addr, nr, value, \
+							memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically flip bit in word.
+ *
+ * Generic selection macro to atomically negate the value of the bit
+ * specified by @c nr in the word pointed to by @c addr to the value
+ * indicated by @c value, with the memory ordering as specified with
+ * @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_flip(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_flip32,			\
+		 uint64_t *: __rte_bit_atomic_flip64)(addr, nr, memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test and set a bit in word.
+ *
+ * Generic selection macro to atomically test and set bit specified by
+ * @c nr in the word pointed to by @c addr to '1', with the memory
+ * ordering as specified with @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit was set, and false otherwise.
+ */
+#define rte_bit_atomic_test_and_set(addr, nr, memory_order)		\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test_and_set32,		\
+		 uint64_t *: __rte_bit_atomic_test_and_set64)(addr, nr,	\
+							      memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test and clear a bit in word.
+ *
+ * Generic selection macro to atomically test and clear bit specified
+ * by @c nr in the word pointed to by @c addr to '0', with the memory
+ * ordering as specified with @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit was set, and false otherwise.
+ */
+#define rte_bit_atomic_test_and_clear(addr, nr, memory_order)		\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test_and_clear32,		\
+		 uint64_t *: __rte_bit_atomic_test_and_clear64)(addr, nr, \
+								memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test and assign a bit in word.
+ *
+ * Generic selection macro to atomically test and assign bit specified
+ * by @c nr in the word pointed to by @c addr the value specified by
+ * @c value, with the memory ordering as specified with @c
+ * memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param value
+ *   The new value of the bit - true for '1', or false for '0'.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit was set, and false otherwise.
+ */
+#define rte_bit_atomic_test_and_assign(addr, nr, value, memory_order)	\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test_and_assign32,	\
+		 uint64_t *: __rte_bit_atomic_test_and_assign64)(addr, nr, \
+								 value, \
+								 memory_order)
+
 #define __RTE_GEN_BIT_TEST(variant, qualifier, size)			\
 	__rte_experimental						\
 	static inline bool						\
@@ -299,6 +498,146 @@ extern "C" {
 __RTE_GEN_BIT_OPS_SIZE(32)
 __RTE_GEN_BIT_OPS_SIZE(64)
 
+#define __RTE_GEN_BIT_ATOMIC_TEST(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test ## size(const qualifier uint ## size ## _t *addr, \
+						     unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		const qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr = \
+			(const qualifier RTE_ATOMIC(uint ## size ## _t) *)addr;	\
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		return rte_atomic_load_explicit(a_addr, memory_order) & mask; \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_SET(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_ ## variant ## set ## size(qualifier uint ## size ## _t *addr, \
+					      unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		rte_atomic_fetch_or_explicit(a_addr, mask, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_CLEAR(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_ ## variant ## clear ## size(qualifier uint ## size ## _t *addr,	\
+						unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		rte_atomic_fetch_and_explicit(a_addr, ~mask, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_FLIP(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_ ## variant ## flip ## size(qualifier uint ## size ## _t *addr, \
+					       unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		rte_atomic_fetch_xor_explicit(a_addr, mask, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_ASSIGN(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_## variant ## assign ## size(qualifier uint ## size ## _t *addr, \
+						unsigned int nr, bool value, \
+						int memory_order)	\
+	{								\
+		if (value)						\
+			__rte_bit_atomic_ ## variant ## set ## size(addr, nr, memory_order); \
+		else							\
+			__rte_bit_atomic_ ## variant ## clear ## size(addr, nr, \
+								     memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_TEST_AND_SET(variant, qualifier, size)	\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test_and_set ## size(qualifier uint ## size ## _t *addr, \
+						       unsigned int nr,	\
+						       int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		uint ## size ## _t prev;				\
+									\
+		prev = rte_atomic_fetch_or_explicit(a_addr, mask,	\
+						    memory_order);	\
+									\
+		return prev & mask;					\
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_TEST_AND_CLEAR(variant, qualifier, size)	\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test_and_clear ## size(qualifier uint ## size ## _t *addr, \
+							 unsigned int nr, \
+							 int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		uint ## size ## _t prev;				\
+									\
+	        prev = rte_atomic_fetch_and_explicit(a_addr, ~mask,	\
+						     memory_order);	\
+									\
+		return prev & mask;					\
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_TEST_AND_ASSIGN(variant, qualifier, size)	\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test_and_assign ## size(qualifier uint ## size ## _t *addr, \
+							  unsigned int nr, \
+							  bool value,	\
+							  int memory_order) \
+	{								\
+		if (value)						\
+			return __rte_bit_atomic_ ## variant ## test_and_set ## size(addr, nr, memory_order); \
+		else							\
+			return __rte_bit_atomic_ ## variant ## test_and_clear ## size(addr, nr, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_OPS(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_TEST(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_SET(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_CLEAR(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_ASSIGN(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_TEST_AND_SET(variant, qualifier, size) \
+	__RTE_GEN_BIT_ATOMIC_TEST_AND_CLEAR(variant, qualifier, size) \
+	__RTE_GEN_BIT_ATOMIC_TEST_AND_ASSIGN(variant, qualifier, size) \
+	__RTE_GEN_BIT_ATOMIC_FLIP(variant, qualifier, size)
+
+#define __RTE_GEN_BIT_ATOMIC_OPS_SIZE(size) \
+	__RTE_GEN_BIT_ATOMIC_OPS(,, size)
+
+__RTE_GEN_BIT_ATOMIC_OPS_SIZE(32)
+__RTE_GEN_BIT_ATOMIC_OPS_SIZE(64)
+
 /*------------------------ 32-bit relaxed operations ------------------------*/
 
 /**
@@ -994,6 +1333,15 @@ rte_log2_u64(uint64_t v)
 #undef rte_bit_assign
 #undef rte_bit_flip
 
+#undef rte_bit_atomic_test
+#undef rte_bit_atomic_set
+#undef rte_bit_atomic_clear
+#undef rte_bit_atomic_assign
+#undef rte_bit_atomic_flip
+#undef rte_bit_atomic_test_and_set
+#undef rte_bit_atomic_test_and_clear
+#undef rte_bit_atomic_test_and_assign
+
 #define __RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, size, arg1_type, arg1_name) \
 	static inline void						\
 	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
@@ -1037,12 +1385,79 @@ rte_log2_u64(uint64_t v)
 	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 64, arg1_type, arg1_name, \
 				arg2_type, arg2_name)
 
+#define __RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, size, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name)	\
+	static inline ret_type						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name)				\
+	{								\
+		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name); \
+	}
+
+#define __RTE_BIT_OVERLOAD_3R(fun, qualifier, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 32, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name)	\
+	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 64, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, size, arg1_type, arg1_name, \
+				arg2_type, arg2_name, arg3_type, arg3_name) \
+	static inline void						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name, arg3_type arg3_name)	\
+	{								\
+		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name,	\
+					  arg3_name);		      \
+	}
+
+#define __RTE_BIT_OVERLOAD_4(fun, qualifier, arg1_type, arg1_name, arg2_type, \
+			     arg2_name, arg3_type, arg3_name)		\
+	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 32, arg1_type, arg1_name, \
+				arg2_type, arg2_name, arg3_type, arg3_name) \
+	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 64, arg1_type, arg1_name, \
+				arg2_type, arg2_name, arg3_type, arg3_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, size, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name, arg3_type, \
+				 arg3_name)				\
+	static inline ret_type						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name, arg3_type arg3_name)	\
+	{								\
+		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name, \
+						 arg3_name);		\
+	}
+
+#define __RTE_BIT_OVERLOAD_4R(fun, qualifier, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name, arg3_type, arg3_name) \
+	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 32, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name, arg3_type, \
+				 arg3_name)				\
+	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 64, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name, arg3_type, \
+				 arg3_name)
+
 __RTE_BIT_OVERLOAD_2R(test, const, bool, unsigned int, nr)
 __RTE_BIT_OVERLOAD_2(set,, unsigned int, nr)
 __RTE_BIT_OVERLOAD_2(clear,, unsigned int, nr)
 __RTE_BIT_OVERLOAD_3(assign,, unsigned int, nr, bool, value)
 __RTE_BIT_OVERLOAD_2(flip,, unsigned int, nr)
 
+__RTE_BIT_OVERLOAD_3R(atomic_test, const, bool, unsigned int, nr,
+		      int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_set,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_clear,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_4(atomic_assign,, unsigned int, nr, bool, value,
+		     int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_flip,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3R(atomic_test_and_set,, bool, unsigned int, nr,
+		      int, memory_order)
+__RTE_BIT_OVERLOAD_3R(atomic_test_and_clear,, bool, unsigned int, nr,
+		      int, memory_order)
+__RTE_BIT_OVERLOAD_4R(atomic_test_and_assign,, bool, unsigned int, nr,
+		      bool, value, int, memory_order)
+
 #endif
 
 #endif /* _RTE_BITOPS_H_ */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v9 5/6] eal: add unit tests for atomic bit access functions
  2024-09-18  9:04               ` [PATCH v9 0/6] Improve EAL bit operations API Mattias Rönnblom
                                   ` (3 preceding siblings ...)
  2024-09-18  9:04                 ` [PATCH v9 4/6] eal: add atomic " Mattias Rönnblom
@ 2024-09-18  9:04                 ` Mattias Rönnblom
  2024-09-18  9:04                 ` [PATCH v9 6/6] eal: extend bitops to handle volatile pointers Mattias Rönnblom
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-18  9:04 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Extend bitops tests to cover the rte_bit_atomic_*() family of
functions.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

RFC v4:
 * Add atomicity test for atomic bit flip.

RFC v3:
 * Rename variable 'main' to make ICC happy.
---
 app/test/test_bitops.c | 313 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 312 insertions(+), 1 deletion(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index 322f58c066..b80216a0a1 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -3,10 +3,13 @@
  * Copyright(c) 2024 Ericsson AB
  */
 
+#include <inttypes.h>
 #include <stdbool.h>
 
-#include <rte_launch.h>
 #include <rte_bitops.h>
+#include <rte_cycles.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
 #include <rte_random.h>
 #include "test.h"
 
@@ -61,6 +64,304 @@ GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
 GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
 		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
 
+#define bit_atomic_set(addr, nr)				\
+	rte_bit_atomic_set(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_clear(addr, nr)					\
+	rte_bit_atomic_clear(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_assign(addr, nr, value)				\
+	rte_bit_atomic_assign(addr, nr, value, rte_memory_order_relaxed)
+
+#define bit_atomic_flip(addr, nr)					\
+    rte_bit_atomic_flip(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_test(addr, nr)				\
+	rte_bit_atomic_test(addr, nr, rte_memory_order_relaxed)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_access32, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 32)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_access64, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 64)
+
+#define PARALLEL_TEST_RUNTIME 0.25
+
+#define GEN_TEST_BIT_PARALLEL_ASSIGN(size)				\
+									\
+	struct parallel_access_lcore ## size				\
+	{								\
+		unsigned int bit;					\
+		uint ## size ##_t *word;				\
+		bool failed;						\
+	};								\
+									\
+	static int							\
+	run_parallel_assign ## size(void *arg)				\
+	{								\
+		struct parallel_access_lcore ## size *lcore = arg;	\
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		bool value = false;					\
+									\
+		do {							\
+			bool new_value = rte_rand() & 1;		\
+			bool use_test_and_modify = rte_rand() & 1;	\
+			bool use_assign = rte_rand() & 1;		\
+									\
+			if (rte_bit_atomic_test(lcore->word, lcore->bit, \
+						rte_memory_order_relaxed) != value) { \
+				lcore->failed = true;			\
+				break;					\
+			}						\
+									\
+			if (use_test_and_modify) {			\
+				bool old_value;				\
+				if (use_assign) 			\
+					old_value = rte_bit_atomic_test_and_assign( \
+						lcore->word, lcore->bit, new_value, \
+						rte_memory_order_relaxed); \
+				else {					\
+					old_value = new_value ?		\
+						rte_bit_atomic_test_and_set( \
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed) : \
+						rte_bit_atomic_test_and_clear( \
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+				}					\
+				if (old_value != value) {		\
+					lcore->failed = true;		\
+					break;				\
+				}					\
+			} else {					\
+				if (use_assign)				\
+					rte_bit_atomic_assign(lcore->word, lcore->bit, \
+							      new_value, \
+							      rte_memory_order_relaxed); \
+				else {					\
+					if (new_value)			\
+						rte_bit_atomic_set(	\
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+					else				\
+						rte_bit_atomic_clear(	\
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+				}					\
+			}						\
+									\
+			value = new_value;				\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_assign ## size(void)			\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		struct parallel_access_lcore ## size lmain = {		\
+			.word = &word					\
+		};							\
+		struct parallel_access_lcore ## size lworker = {	\
+			.word = &word					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		lmain.bit = rte_rand_max(size);				\
+		do {							\
+			lworker.bit = rte_rand_max(size);		\
+		} while (lworker.bit == lmain.bit);			\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_assign ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_assign ## size(&lmain);			\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		TEST_ASSERT(!lmain.failed, "Main lcore atomic access failed"); \
+		TEST_ASSERT(!lworker.failed, "Worker lcore atomic access " \
+			    "failed");					\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_ASSIGN(32)
+GEN_TEST_BIT_PARALLEL_ASSIGN(64)
+
+#define GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(size)			\
+									\
+	struct parallel_test_and_set_lcore ## size			\
+	{								\
+		uint ## size ##_t *word;				\
+		unsigned int bit;					\
+		uint64_t flips;						\
+	};								\
+									\
+	static int							\
+	run_parallel_test_and_modify ## size(void *arg)		\
+	{								\
+		struct parallel_test_and_set_lcore ## size *lcore = arg; \
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		do {							\
+			bool old_value;					\
+			bool new_value = rte_rand() & 1;		\
+			bool use_assign = rte_rand() & 1;		\
+									\
+			if (use_assign)					\
+				old_value = rte_bit_atomic_test_and_assign( \
+					lcore->word, lcore->bit, new_value, \
+					rte_memory_order_relaxed);	\
+			else						\
+				old_value = new_value ?			\
+					rte_bit_atomic_test_and_set(	\
+						lcore->word, lcore->bit, \
+						rte_memory_order_relaxed) : \
+					rte_bit_atomic_test_and_clear(	\
+						lcore->word, lcore->bit, \
+						rte_memory_order_relaxed); \
+			if (old_value != new_value)			\
+				lcore->flips++;				\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_test_and_modify ## size(void)		\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		unsigned int bit = rte_rand_max(size);			\
+		struct parallel_test_and_set_lcore ## size lmain = {	\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+		struct parallel_test_and_set_lcore ## size lworker = {	\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_test_and_modify ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_test_and_modify ## size(&lmain);		\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		uint64_t total_flips = lmain.flips + lworker.flips;	\
+		bool expected_value = total_flips % 2;			\
+									\
+		TEST_ASSERT(expected_value == rte_bit_test(&word, bit), \
+			    "After %"PRId64" flips, the bit value "	\
+			    "should be %d", total_flips, expected_value); \
+									\
+		uint64_t expected_word = 0;				\
+		rte_bit_assign(&expected_word, bit, expected_value);	\
+									\
+		TEST_ASSERT(expected_word == word, "Untouched bits have " \
+			    "changed value");				\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(32)
+GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(64)
+
+#define GEN_TEST_BIT_PARALLEL_FLIP(size)				\
+									\
+	struct parallel_flip_lcore ## size				\
+	{								\
+		uint ## size ##_t *word;				\
+		unsigned int bit;					\
+		uint64_t flips;						\
+	};								\
+									\
+	static int							\
+	run_parallel_flip ## size(void *arg)				\
+	{								\
+		struct parallel_flip_lcore ## size *lcore = arg; \
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		do {							\
+			rte_bit_atomic_flip(lcore->word, lcore->bit,	\
+					    rte_memory_order_relaxed);	\
+			lcore->flips++;					\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_flip ## size(void)			\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		unsigned int bit = rte_rand_max(size);			\
+		struct parallel_flip_lcore ## size lmain = {		\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+		struct parallel_flip_lcore ## size lworker = {		\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_flip ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_flip ## size(&lmain);			\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		uint64_t total_flips = lmain.flips + lworker.flips;	\
+		bool expected_value = total_flips % 2;			\
+									\
+		TEST_ASSERT(expected_value == rte_bit_test(&word, bit), \
+			    "After %"PRId64" flips, the bit value "	\
+			    "should be %d", total_flips, expected_value); \
+									\
+		uint64_t expected_word = 0;				\
+		rte_bit_assign(&expected_word, bit, expected_value);	\
+									\
+		TEST_ASSERT(expected_word == word, "Untouched bits have " \
+			    "changed value");				\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_FLIP(32)
+GEN_TEST_BIT_PARALLEL_FLIP(64)
+
 static uint32_t val32;
 static uint64_t val64;
 
@@ -177,6 +478,16 @@ static struct unit_test_suite test_suite = {
 	.unit_test_cases = {
 		TEST_CASE(test_bit_access32),
 		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_access32),
+		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_atomic_access32),
+		TEST_CASE(test_bit_atomic_access64),
+		TEST_CASE(test_bit_atomic_parallel_assign32),
+		TEST_CASE(test_bit_atomic_parallel_assign64),
+		TEST_CASE(test_bit_atomic_parallel_test_and_modify32),
+		TEST_CASE(test_bit_atomic_parallel_test_and_modify64),
+		TEST_CASE(test_bit_atomic_parallel_flip32),
+		TEST_CASE(test_bit_atomic_parallel_flip64),
 		TEST_CASE(test_bit_relaxed_set),
 		TEST_CASE(test_bit_relaxed_clear),
 		TEST_CASE(test_bit_relaxed_test_set_clear),
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v9 6/6] eal: extend bitops to handle volatile pointers
  2024-09-18  9:04               ` [PATCH v9 0/6] Improve EAL bit operations API Mattias Rönnblom
                                   ` (4 preceding siblings ...)
  2024-09-18  9:04                 ` [PATCH v9 5/6] eal: add unit tests for atomic bit access functions Mattias Rönnblom
@ 2024-09-18  9:04                 ` Mattias Rönnblom
  5 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-18  9:04 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Have rte_bit_[test|set|clear|assign|flip]() and rte_bit_atomic_*()
handle volatile-marked pointers.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

PATCH v3:
 * Updated to reflect removed 'fun' parameter in __RTE_GEN_BIT_*()
   (Jack Bond-Preston).

PATCH v2:
 * Actually run the test_bit_atomic_v_access*() test functions.
---
 app/test/test_bitops.c       |  32 +++-
 lib/eal/include/rte_bitops.h | 301 +++++++++++++++++++++++------------
 2 files changed, 222 insertions(+), 111 deletions(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index b80216a0a1..10e87f6776 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -14,13 +14,13 @@
 #include "test.h"
 
 #define GEN_TEST_BIT_ACCESS(test_name, set_fun, clear_fun, assign_fun,	\
-			    flip_fun, test_fun, size)			\
+			    flip_fun, test_fun, size, mod)		\
 	static int							\
 	test_name(void)							\
 	{								\
 		uint ## size ## _t reference = (uint ## size ## _t)rte_rand(); \
 		unsigned int bit_nr;					\
-		uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
+		mod uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
 									\
 		for (bit_nr = 0; bit_nr < size; bit_nr++) {		\
 			bool reference_bit = (reference >> bit_nr) & 1;	\
@@ -41,7 +41,7 @@
 				    "Bit %d had unflipped value", bit_nr); \
 			flip_fun(&word, bit_nr);			\
 									\
-			const uint ## size ## _t *const_ptr = &word;	\
+			const mod uint ## size ## _t *const_ptr = &word; \
 			TEST_ASSERT(test_fun(const_ptr, bit_nr) ==	\
 				    reference_bit,			\
 				    "Bit %d had unexpected value", bit_nr); \
@@ -59,10 +59,16 @@
 	}
 
 GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
-		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32)
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32,)
 
 GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
-		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64,)
+
+GEN_TEST_BIT_ACCESS(test_bit_v_access32, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32, volatile)
+
+GEN_TEST_BIT_ACCESS(test_bit_v_access64, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64, volatile)
 
 #define bit_atomic_set(addr, nr)				\
 	rte_bit_atomic_set(addr, nr, rte_memory_order_relaxed)
@@ -81,11 +87,19 @@ GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
 
 GEN_TEST_BIT_ACCESS(test_bit_atomic_access32, bit_atomic_set,
 		    bit_atomic_clear, bit_atomic_assign,
-		    bit_atomic_flip, bit_atomic_test, 32)
+		    bit_atomic_flip, bit_atomic_test, 32,)
 
 GEN_TEST_BIT_ACCESS(test_bit_atomic_access64, bit_atomic_set,
 		    bit_atomic_clear, bit_atomic_assign,
-		    bit_atomic_flip, bit_atomic_test, 64)
+		    bit_atomic_flip, bit_atomic_test, 64,)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_v_access32, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 32, volatile)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_v_access64, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 64, volatile)
 
 #define PARALLEL_TEST_RUNTIME 0.25
 
@@ -480,8 +494,12 @@ static struct unit_test_suite test_suite = {
 		TEST_CASE(test_bit_access64),
 		TEST_CASE(test_bit_access32),
 		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_v_access32),
+		TEST_CASE(test_bit_v_access64),
 		TEST_CASE(test_bit_atomic_access32),
 		TEST_CASE(test_bit_atomic_access64),
+		TEST_CASE(test_bit_atomic_v_access32),
+		TEST_CASE(test_bit_atomic_v_access64),
 		TEST_CASE(test_bit_atomic_parallel_assign32),
 		TEST_CASE(test_bit_atomic_parallel_assign64),
 		TEST_CASE(test_bit_atomic_parallel_test_and_modify32),
diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 3ad6795fd1..d7a07c4099 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -127,12 +127,16 @@ extern "C" {
  * @param nr
  *   The index of the bit.
  */
-#define rte_bit_test(addr, nr)					\
-	_Generic((addr),					\
-		uint32_t *: __rte_bit_test32,			\
-		const uint32_t *: __rte_bit_test32,		\
-		uint64_t *: __rte_bit_test64,			\
-		const uint64_t *: __rte_bit_test64)(addr, nr)
+#define rte_bit_test(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_test32,				\
+		 const uint32_t *: __rte_bit_test32,			\
+		 volatile uint32_t *: __rte_bit_v_test32,		\
+		 const volatile uint32_t *: __rte_bit_v_test32,		\
+		 uint64_t *: __rte_bit_test64,				\
+		 const uint64_t *: __rte_bit_test64,			\
+		 volatile uint64_t *: __rte_bit_v_test64,		\
+		 const volatile uint64_t *: __rte_bit_v_test64)(addr, nr)
 
 /**
  * @warning
@@ -152,10 +156,12 @@ extern "C" {
  * @param nr
  *   The index of the bit.
  */
-#define rte_bit_set(addr, nr)				\
-	_Generic((addr),				\
-		 uint32_t *: __rte_bit_set32,		\
-		 uint64_t *: __rte_bit_set64)(addr, nr)
+#define rte_bit_set(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_set32,				\
+		 volatile uint32_t *: __rte_bit_v_set32,		\
+		 uint64_t *: __rte_bit_set64,				\
+		 volatile uint64_t *: __rte_bit_v_set64)(addr, nr)
 
 /**
  * @warning
@@ -175,10 +181,12 @@ extern "C" {
  * @param nr
  *   The index of the bit.
  */
-#define rte_bit_clear(addr, nr)					\
-	_Generic((addr),					\
-		 uint32_t *: __rte_bit_clear32,			\
-		 uint64_t *: __rte_bit_clear64)(addr, nr)
+#define rte_bit_clear(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_clear32,				\
+		 volatile uint32_t *: __rte_bit_v_clear32,		\
+		 uint64_t *: __rte_bit_clear64,				\
+		 volatile uint64_t *: __rte_bit_v_clear64)(addr, nr)
 
 /**
  * @warning
@@ -202,7 +210,9 @@ extern "C" {
 #define rte_bit_assign(addr, nr, value)					\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_assign32,			\
-		 uint64_t *: __rte_bit_assign64)(addr, nr, value)
+		 volatile uint32_t *: __rte_bit_v_assign32,		\
+		 uint64_t *: __rte_bit_assign64,			\
+		 volatile uint64_t *: __rte_bit_v_assign64)(addr, nr, value)
 
 /**
  * @warning
@@ -225,7 +235,9 @@ extern "C" {
 #define rte_bit_flip(addr, nr)						\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_flip32,				\
-		 uint64_t *: __rte_bit_flip64)(addr, nr)
+		 volatile uint32_t *: __rte_bit_v_flip32,		\
+		 uint64_t *: __rte_bit_flip64,				\
+		 volatile uint64_t *: __rte_bit_v_flip64)(addr, nr)
 
 /**
  * @warning
@@ -250,9 +262,13 @@ extern "C" {
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test32,			\
 		 const uint32_t *: __rte_bit_atomic_test32,		\
+		 volatile uint32_t *: __rte_bit_atomic_v_test32,	\
+		 const volatile uint32_t *: __rte_bit_atomic_v_test32,	\
 		 uint64_t *: __rte_bit_atomic_test64,			\
-		 const uint64_t *: __rte_bit_atomic_test64)(addr, nr,	\
-							    memory_order)
+		 const uint64_t *: __rte_bit_atomic_test64,		\
+		 volatile uint64_t *: __rte_bit_atomic_v_test64,	\
+		 const volatile uint64_t *: __rte_bit_atomic_v_test64) \
+						    (addr, nr, memory_order)
 
 /**
  * @warning
@@ -274,7 +290,10 @@ extern "C" {
 #define rte_bit_atomic_set(addr, nr, memory_order)			\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_set32,			\
-		 uint64_t *: __rte_bit_atomic_set64)(addr, nr, memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_set32,		\
+		 uint64_t *: __rte_bit_atomic_set64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_set64)(addr, nr, \
+								memory_order)
 
 /**
  * @warning
@@ -296,7 +315,10 @@ extern "C" {
 #define rte_bit_atomic_clear(addr, nr, memory_order)			\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_clear32,			\
-		 uint64_t *: __rte_bit_atomic_clear64)(addr, nr, memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_clear32,	\
+		 uint64_t *: __rte_bit_atomic_clear64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_clear64)(addr, nr, \
+								  memory_order)
 
 /**
  * @warning
@@ -320,8 +342,11 @@ extern "C" {
 #define rte_bit_atomic_assign(addr, nr, value, memory_order)		\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_assign32,			\
-		 uint64_t *: __rte_bit_atomic_assign64)(addr, nr, value, \
-							memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_assign32,	\
+		 uint64_t *: __rte_bit_atomic_assign64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_assign64)(addr, nr, \
+								   value, \
+								   memory_order)
 
 /**
  * @warning
@@ -344,7 +369,10 @@ extern "C" {
 #define rte_bit_atomic_flip(addr, nr, memory_order)			\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_flip32,			\
-		 uint64_t *: __rte_bit_atomic_flip64)(addr, nr, memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_flip32,	\
+		 uint64_t *: __rte_bit_atomic_flip64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_flip64)(addr, nr, \
+								 memory_order)
 
 /**
  * @warning
@@ -368,8 +396,10 @@ extern "C" {
 #define rte_bit_atomic_test_and_set(addr, nr, memory_order)		\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test_and_set32,		\
-		 uint64_t *: __rte_bit_atomic_test_and_set64)(addr, nr,	\
-							      memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_test_and_set32, \
+		 uint64_t *: __rte_bit_atomic_test_and_set64,		\
+		 volatile uint64_t *: __rte_bit_atomic_v_test_and_set64) \
+						    (addr, nr, memory_order)
 
 /**
  * @warning
@@ -393,8 +423,10 @@ extern "C" {
 #define rte_bit_atomic_test_and_clear(addr, nr, memory_order)		\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test_and_clear32,		\
-		 uint64_t *: __rte_bit_atomic_test_and_clear64)(addr, nr, \
-								memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_test_and_clear32, \
+		 uint64_t *: __rte_bit_atomic_test_and_clear64,		\
+		 volatile uint64_t *: __rte_bit_atomic_v_test_and_clear64) \
+						       (addr, nr, memory_order)
 
 /**
  * @warning
@@ -421,9 +453,10 @@ extern "C" {
 #define rte_bit_atomic_test_and_assign(addr, nr, value, memory_order)	\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test_and_assign32,	\
-		 uint64_t *: __rte_bit_atomic_test_and_assign64)(addr, nr, \
-								 value, \
-								 memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_test_and_assign32, \
+		 uint64_t *: __rte_bit_atomic_test_and_assign64,	\
+		 volatile uint64_t *: __rte_bit_atomic_v_test_and_assign64) \
+						(addr, nr, value, memory_order)
 
 #define __RTE_GEN_BIT_TEST(variant, qualifier, size)			\
 	__rte_experimental						\
@@ -493,7 +526,8 @@ extern "C" {
 	__RTE_GEN_BIT_FLIP(v, qualifier, size)
 
 #define __RTE_GEN_BIT_OPS_SIZE(size) \
-	__RTE_GEN_BIT_OPS(,, size)
+	__RTE_GEN_BIT_OPS(,, size) \
+	__RTE_GEN_BIT_OPS(v_, volatile, size)
 
 __RTE_GEN_BIT_OPS_SIZE(32)
 __RTE_GEN_BIT_OPS_SIZE(64)
@@ -633,7 +667,8 @@ __RTE_GEN_BIT_OPS_SIZE(64)
 	__RTE_GEN_BIT_ATOMIC_FLIP(variant, qualifier, size)
 
 #define __RTE_GEN_BIT_ATOMIC_OPS_SIZE(size) \
-	__RTE_GEN_BIT_ATOMIC_OPS(,, size)
+	__RTE_GEN_BIT_ATOMIC_OPS(,, size) \
+	__RTE_GEN_BIT_ATOMIC_OPS(v_, volatile, size)
 
 __RTE_GEN_BIT_ATOMIC_OPS_SIZE(32)
 __RTE_GEN_BIT_ATOMIC_OPS_SIZE(64)
@@ -1342,120 +1377,178 @@ rte_log2_u64(uint64_t v)
 #undef rte_bit_atomic_test_and_clear
 #undef rte_bit_atomic_test_and_assign
 
-#define __RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, size, arg1_type, arg1_name) \
+#define __RTE_BIT_OVERLOAD_V_2(family, v, fun, c, size, arg1_type, arg1_name) \
 	static inline void						\
-	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
-			arg1_type arg1_name)				\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name)			\
 	{								\
-		__rte_bit_ ## fun ## size(addr, arg1_name);		\
+		__rte_bit_ ## family ## v ## fun ## size(addr, arg1_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_2(fun, qualifier, arg1_type, arg1_name)	\
-	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 32, arg1_type, arg1_name) \
-	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 64, arg1_type, arg1_name)
+#define __RTE_BIT_OVERLOAD_SZ_2(family, fun, c, size, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_V_2(family,, fun, c, size, arg1_type,	\
+			       arg1_name)				\
+	__RTE_BIT_OVERLOAD_V_2(family, v_, fun, c volatile, size, \
+			       arg1_type, arg1_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, size, ret_type, arg1_type, \
-				 arg1_name)				\
+#define __RTE_BIT_OVERLOAD_2(family, fun, c, arg1_type, arg1_name)	\
+	__RTE_BIT_OVERLOAD_SZ_2(family, fun, c, 32, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2(family, fun, c, 64, arg1_type, arg1_name)
+
+#define __RTE_BIT_OVERLOAD_V_2R(family, v, fun, c, size, ret_type, arg1_type, \
+				arg1_name)				\
 	static inline ret_type						\
-	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
 			arg1_type arg1_name)				\
 	{								\
-		return __rte_bit_ ## fun ## size(addr, arg1_name);	\
+		return __rte_bit_ ## family ## v ## fun ## size(addr,	\
+								arg1_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_2R(fun, qualifier, ret_type, arg1_type, arg1_name) \
-	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 32, ret_type, arg1_type, \
+#define __RTE_BIT_OVERLOAD_SZ_2R(family, fun, c, size, ret_type, arg1_type, \
+				 arg1_name)				\
+	__RTE_BIT_OVERLOAD_V_2R(family,, fun, c, size, ret_type, arg1_type, \
+				arg1_name)				\
+	__RTE_BIT_OVERLOAD_V_2R(family, v_, fun, c volatile,		\
+				size, ret_type, arg1_type, arg1_name)
+
+#define __RTE_BIT_OVERLOAD_2R(family, fun, c, ret_type, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2R(family, fun, c, 32, ret_type, arg1_type, \
 				 arg1_name)				\
-	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 64, ret_type, arg1_type, \
+	__RTE_BIT_OVERLOAD_SZ_2R(family, fun, c, 64, ret_type, arg1_type, \
 				 arg1_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, size, arg1_type, arg1_name, \
-				arg2_type, arg2_name)			\
+#define __RTE_BIT_OVERLOAD_V_3(family, v, fun, c, size, arg1_type, arg1_name, \
+			       arg2_type, arg2_name)			\
 	static inline void						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name)				\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name) \
 	{								\
-		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name);	\
+		__rte_bit_ ## family ## v ## fun ## size(addr, arg1_name, \
+							 arg2_name);	\
 	}
 
-#define __RTE_BIT_OVERLOAD_3(fun, qualifier, arg1_type, arg1_name, arg2_type, \
+#define __RTE_BIT_OVERLOAD_SZ_3(family, fun, c, size, arg1_type, arg1_name, \
+				arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_V_3(family,, fun, c, size, arg1_type, arg1_name, \
+			       arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_V_3(family, v_, fun, c volatile, size, arg1_type, \
+			       arg1_name, arg2_type, arg2_name)
+
+#define __RTE_BIT_OVERLOAD_3(family, fun, c, arg1_type, arg1_name, arg2_type, \
 			     arg2_name)					\
-	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 32, arg1_type, arg1_name, \
+	__RTE_BIT_OVERLOAD_SZ_3(family, fun, c, 32, arg1_type, arg1_name, \
 				arg2_type, arg2_name)			\
-	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 64, arg1_type, arg1_name, \
+	__RTE_BIT_OVERLOAD_SZ_3(family, fun, c, 64, arg1_type, arg1_name, \
 				arg2_type, arg2_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, size, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name)	\
+#define __RTE_BIT_OVERLOAD_V_3R(family, v, fun, c, size, ret_type, arg1_type, \
+				arg1_name, arg2_type, arg2_name)	\
 	static inline ret_type						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name)				\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name) \
 	{								\
-		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name); \
+		return __rte_bit_ ## family ## v ## fun ## size(addr,	\
+								arg1_name, \
+								arg2_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_3R(fun, qualifier, ret_type, arg1_type, arg1_name, \
-			      arg2_type, arg2_name)			\
-	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 32, ret_type, arg1_type, \
+#define __RTE_BIT_OVERLOAD_SZ_3R(family, fun, c, size, ret_type, arg1_type, \
 				 arg1_name, arg2_type, arg2_name)	\
-	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 64, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name)
+	__RTE_BIT_OVERLOAD_V_3R(family,, fun, c, size, ret_type, \
+				arg1_type, arg1_name, arg2_type, arg2_name) \
+	__RTE_BIT_OVERLOAD_V_3R(family, v_, fun, c volatile, size, \
+				ret_type, arg1_type, arg1_name, arg2_type, \
+				arg2_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, size, arg1_type, arg1_name, \
-				arg2_type, arg2_name, arg3_type, arg3_name) \
+#define __RTE_BIT_OVERLOAD_3R(family, fun, c, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_SZ_3R(family, fun, c, 32, ret_type,		\
+				 arg1_type, arg1_name, arg2_type, arg2_name) \
+	__RTE_BIT_OVERLOAD_SZ_3R(family, fun, c, 64, ret_type, \
+				 arg1_type, arg1_name, arg2_type, arg2_name)
+
+#define __RTE_BIT_OVERLOAD_V_4(family, v, fun, c, size, arg1_type, arg1_name, \
+			       arg2_type, arg2_name, arg3_type,	arg3_name) \
 	static inline void						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name, arg3_type arg3_name)	\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name, \
+				  arg3_type arg3_name)			\
 	{								\
-		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name,	\
-					  arg3_name);		      \
+		__rte_bit_ ## family ## v ## fun ## size(addr, arg1_name, \
+							 arg2_name,	\
+							 arg3_name);	\
 	}
 
-#define __RTE_BIT_OVERLOAD_4(fun, qualifier, arg1_type, arg1_name, arg2_type, \
-			     arg2_name, arg3_type, arg3_name)		\
-	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 32, arg1_type, arg1_name, \
+#define __RTE_BIT_OVERLOAD_SZ_4(family, fun, c, size, arg1_type, arg1_name, \
 				arg2_type, arg2_name, arg3_type, arg3_name) \
-	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 64, arg1_type, arg1_name, \
-				arg2_type, arg2_name, arg3_type, arg3_name)
-
-#define __RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, size, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name, arg3_type, \
-				 arg3_name)				\
+	__RTE_BIT_OVERLOAD_V_4(family,, fun, c, size, arg1_type,	\
+			       arg1_name, arg2_type, arg2_name, arg3_type, \
+			       arg3_name)				\
+	__RTE_BIT_OVERLOAD_V_4(family, v_, fun, c volatile, size,	\
+			       arg1_type, arg1_name, arg2_type, arg2_name, \
+			       arg3_type, arg3_name)
+
+#define __RTE_BIT_OVERLOAD_4(family, fun, c, arg1_type, arg1_name, arg2_type, \
+			     arg2_name, arg3_type, arg3_name)		\
+	__RTE_BIT_OVERLOAD_SZ_4(family, fun, c, 32, arg1_type,		\
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)				\
+	__RTE_BIT_OVERLOAD_SZ_4(family, fun, c, 64, arg1_type,		\
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)
+
+#define __RTE_BIT_OVERLOAD_V_4R(family, v, fun, c, size, ret_type, arg1_type, \
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)				\
 	static inline ret_type						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name, arg3_type arg3_name)	\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name, \
+				  arg3_type arg3_name)			\
 	{								\
-		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name, \
-						 arg3_name);		\
+		return __rte_bit_ ## family ## v ## fun ## size(addr,	\
+								arg1_name, \
+								arg2_name, \
+								arg3_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_4R(fun, qualifier, ret_type, arg1_type, arg1_name, \
-			      arg2_type, arg2_name, arg3_type, arg3_name) \
-	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 32, ret_type, arg1_type, \
+#define __RTE_BIT_OVERLOAD_SZ_4R(family, fun, c, size, ret_type, arg1_type, \
 				 arg1_name, arg2_type, arg2_name, arg3_type, \
 				 arg3_name)				\
-	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 64, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name, arg3_type, \
-				 arg3_name)
-
-__RTE_BIT_OVERLOAD_2R(test, const, bool, unsigned int, nr)
-__RTE_BIT_OVERLOAD_2(set,, unsigned int, nr)
-__RTE_BIT_OVERLOAD_2(clear,, unsigned int, nr)
-__RTE_BIT_OVERLOAD_3(assign,, unsigned int, nr, bool, value)
-__RTE_BIT_OVERLOAD_2(flip,, unsigned int, nr)
-
-__RTE_BIT_OVERLOAD_3R(atomic_test, const, bool, unsigned int, nr,
+	__RTE_BIT_OVERLOAD_V_4R(family,, fun, c, size, ret_type, arg1_type, \
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)				\
+	__RTE_BIT_OVERLOAD_V_4R(family, v_, fun, c volatile, size,	\
+				ret_type, arg1_type, arg1_name, arg2_type, \
+				arg2_name, arg3_type, arg3_name)
+
+#define __RTE_BIT_OVERLOAD_4R(family, fun, c, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name, arg3_type, arg3_name) \
+	__RTE_BIT_OVERLOAD_SZ_4R(family, fun, c, 32, ret_type,		\
+				 arg1_type, arg1_name, arg2_type, arg2_name, \
+				 arg3_type, arg3_name)			\
+	__RTE_BIT_OVERLOAD_SZ_4R(family, fun, c, 64, ret_type,		\
+				 arg1_type, arg1_name, arg2_type, arg2_name, \
+				 arg3_type, arg3_name)
+
+__RTE_BIT_OVERLOAD_2R(, test, const, bool, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(, set,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(, clear,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_3(, assign,, unsigned int, nr, bool, value)
+__RTE_BIT_OVERLOAD_2(, flip,, unsigned int, nr)
+
+__RTE_BIT_OVERLOAD_3R(atomic_, test, const, bool, unsigned int, nr,
 		      int, memory_order)
-__RTE_BIT_OVERLOAD_3(atomic_set,, unsigned int, nr, int, memory_order)
-__RTE_BIT_OVERLOAD_3(atomic_clear,, unsigned int, nr, int, memory_order)
-__RTE_BIT_OVERLOAD_4(atomic_assign,, unsigned int, nr, bool, value,
+__RTE_BIT_OVERLOAD_3(atomic_, set,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_, clear,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_4(atomic_, assign,, unsigned int, nr, bool, value,
 		     int, memory_order)
-__RTE_BIT_OVERLOAD_3(atomic_flip,, unsigned int, nr, int, memory_order)
-__RTE_BIT_OVERLOAD_3R(atomic_test_and_set,, bool, unsigned int, nr,
+__RTE_BIT_OVERLOAD_3(atomic_, flip,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3R(atomic_, test_and_set,, bool, unsigned int, nr,
 		      int, memory_order)
-__RTE_BIT_OVERLOAD_3R(atomic_test_and_clear,, bool, unsigned int, nr,
+__RTE_BIT_OVERLOAD_3R(atomic_, test_and_clear,, bool, unsigned int, nr,
 		      int, memory_order)
-__RTE_BIT_OVERLOAD_4R(atomic_test_and_assign,, bool, unsigned int, nr,
+__RTE_BIT_OVERLOAD_4R(atomic_, test_and_assign,, bool, unsigned int, nr,
 		      bool, value, int, memory_order)
 
 #endif
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v10 0/7] Improve EAL bit operations API
  2024-09-18  9:04                 ` [PATCH v9 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
@ 2024-09-19 19:31                   ` Mattias Rönnblom
  2024-09-19 19:31                     ` [PATCH v10 1/7] buildtools/chkincs: relax C linkage requirement Mattias Rönnblom
                                       ` (6 more replies)
  0 siblings, 7 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-19 19:31 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

This patch set represent an attempt to improve and extend the RTE
bitops API, in particular for functions that operate on individual
bits.

All new functionality is exposed to the user as generic selection
macros, delegating the actual work to private (__-marked) static
inline functions. Public functions (e.g., rte_bit_set32()) would just
be bloating the API. Such generic selection macros will here be
referred to as "functions", although technically they are not.

The legacy <rte_bitops.h> rte_bit_relaxed_*() functions is replaced
with two new families:

rte_bit_[test|set|clear|assign|flip]() which provides no memory
ordering or atomicity guarantees, but does provide the best
performance. The performance degradation resulting from the use of
volatile (e.g., forcing loads and stores to actually occur and in the
number specified) and atomic (e.g., LOCK-prefixed instructions on x86)
may be significant. rte_bit_[test|set|clear|assign|flip]() may be
used with volatile word pointers, in which case they guarantee
that the program-level accesses actually occur.

rte_bit_atomic_*() which provides atomic bit-level operations,
including the possibility to specifying memory ordering constraints
(or the lack thereof).

The atomic functions take non-_Atomic pointers, to be flexible, just
like the GCC builtins and default <rte_stdatomic.h>. The issue with
_Atomic APIs is that it may well be the case that the user wants to
perform both non-atomic and atomic operations on the same word.

Having _Atomic-marked addresses would complicate supporting atomic
bit-level operations in the bitset API (proposed in a different RFC
patchset), and potentially other APIs depending on RTE bitops for
atomic bit-level ops). Either one needs two bitset variants, one
_Atomic bitset and one non-atomic one, or the bitset code needs to
cast the non-_Atomic pointer to an _Atomic one. Having a separate
_Atomic bitset would be bloat and also prevent the user from both, in
some situations, doing atomic operations against a bit set, while in
other situations (e.g., at times when MT safety is not a concern)
operating on the same objects in a non-atomic manner.

Unlike rte_bit_relaxed_*(), individual bits are represented by bool,
not uint32_t or uint64_t. The author found the use of such large types
confusing, and also failed to see any performance benefits.

A set of functions rte_bit_*_assign() are added, to assign a
particular boolean value to a particular bit.

All new functions have properly documented semantics.

All new functions operate on both 32 and 64-bit words, with type
checking.

_Generic allow the user code to be a little more impact. Have a
type-generic atomic test/set/clear/assign bit API also seems
consistent with the "core" (word-size) atomics API, which is generic
(both GCC builtins and <rte_stdatomic.h> are).

The _Generic versions avoids having explicit unsigned long versions of
all functions. If you have an unsigned long, it's safe to use the
generic version (e.g., rte_set_bit()) and _Generic will pick the right
function, provided long is either 32 or 64 bit on your platform (which
it is on all DPDK-supported ABIs).

The generic rte_bit_set() is a macro, and not a function, but
nevertheless has been given a lower-case name. That's how C11 does it
(for atomics, and other _Generic), and <rte_stdatomic.h>. Its address
can't be taken, but it does not evaluate its parameters more than
once.

C++ doesn't support generic selection. In C++ translation units the
_Generic macros are replaced with overloaded functions, implemented by
means of a huge, complicated C macro mess.

Mattias Rönnblom (7):
  buildtools/chkincs: relax C linkage requirement
  dpdk: use C linkage only where appropriate
  eal: extend bit manipulation functionality
  eal: add unit tests for bit operations
  eal: add atomic bit operations
  eal: add unit tests for atomic bit access functions
  eal: extend bitops to handle volatile pointers

 app/test/packet_burst_generator.h             |   8 +-
 app/test/test_bitops.c                        | 416 +++++++++-
 app/test/virtual_pmd.h                        |   4 +-
 buildtools/chkincs/chkextern.py               |  84 ++
 buildtools/chkincs/meson.build                |  21 +-
 doc/guides/rel_notes/release_24_11.rst        |  17 +
 drivers/bus/auxiliary/bus_auxiliary_driver.h  |   8 +-
 drivers/bus/cdx/bus_cdx_driver.h              |   8 +-
 drivers/bus/dpaa/include/fsl_qman.h           |   8 +-
 drivers/bus/fslmc/bus_fslmc_driver.h          |   8 +-
 drivers/bus/pci/bus_pci_driver.h              |   8 +-
 drivers/bus/pci/rte_bus_pci.h                 |   8 +-
 drivers/bus/platform/bus_platform_driver.h    |   8 +-
 drivers/bus/vdev/bus_vdev_driver.h            |   8 +-
 drivers/bus/vmbus/bus_vmbus_driver.h          |   8 +-
 drivers/bus/vmbus/rte_bus_vmbus.h             |   8 +-
 drivers/dma/cnxk/cnxk_dma_event_dp.h          |   8 +-
 drivers/dma/ioat/ioat_hw_defs.h               |   4 +-
 drivers/event/dlb2/rte_pmd_dlb2.h             |   8 +-
 drivers/mempool/dpaa2/rte_dpaa2_mempool.h     |   6 +-
 drivers/net/avp/rte_avp_fifo.h                |   8 +-
 drivers/net/bonding/rte_eth_bond.h            |   4 +-
 drivers/net/i40e/rte_pmd_i40e.h               |   8 +-
 drivers/net/mlx5/mlx5_trace.h                 |   8 +-
 drivers/net/ring/rte_eth_ring.h               |   4 +-
 drivers/net/vhost/rte_eth_vhost.h             |   8 +-
 drivers/raw/ifpga/afu_pmd_core.h              |   8 +-
 drivers/raw/ifpga/afu_pmd_he_hssi.h           |   6 +-
 drivers/raw/ifpga/afu_pmd_he_lpbk.h           |   6 +-
 drivers/raw/ifpga/afu_pmd_he_mem.h            |   6 +-
 drivers/raw/ifpga/afu_pmd_n3000.h             |   6 +-
 drivers/raw/ifpga/rte_pmd_afu.h               |   4 +-
 drivers/raw/ifpga/rte_pmd_ifpga.h             |   4 +-
 examples/ethtool/lib/rte_ethtool.h            |   8 +-
 examples/qos_sched/main.h                     |   4 +-
 examples/vm_power_manager/channel_manager.h   |   8 +-
 lib/acl/rte_acl_osdep.h                       |   8 -
 lib/bbdev/rte_bbdev.h                         |   8 +-
 lib/bbdev/rte_bbdev_op.h                      |   8 +-
 lib/bbdev/rte_bbdev_pmd.h                     |   8 +-
 lib/bpf/bpf_def.h                             |   9 -
 lib/compressdev/rte_comp.h                    |   4 +-
 lib/compressdev/rte_compressdev.h             |   6 +-
 lib/compressdev/rte_compressdev_internal.h    |   8 +-
 lib/compressdev/rte_compressdev_pmd.h         |   8 +-
 lib/cryptodev/cryptodev_pmd.h                 |   8 +-
 lib/cryptodev/cryptodev_trace.h               |   8 +-
 lib/cryptodev/rte_crypto.h                    |   8 +-
 lib/cryptodev/rte_crypto_asym.h               |   8 -
 lib/cryptodev/rte_crypto_sym.h                |   8 +-
 lib/cryptodev/rte_cryptodev.h                 |   8 +-
 lib/cryptodev/rte_cryptodev_trace_fp.h        |   4 +-
 lib/dispatcher/rte_dispatcher.h               |   8 +-
 lib/dmadev/rte_dmadev.h                       |   8 +
 lib/eal/arm/include/rte_atomic_32.h           |   4 +-
 lib/eal/arm/include/rte_atomic_64.h           |   8 +-
 lib/eal/arm/include/rte_byteorder.h           |   8 +-
 lib/eal/arm/include/rte_cpuflags_32.h         |   8 -
 lib/eal/arm/include/rte_cpuflags_64.h         |   8 -
 lib/eal/arm/include/rte_cycles_32.h           |   4 +-
 lib/eal/arm/include/rte_cycles_64.h           |   4 +-
 lib/eal/arm/include/rte_io.h                  |   8 -
 lib/eal/arm/include/rte_io_64.h               |   8 +-
 lib/eal/arm/include/rte_memcpy_32.h           |   8 +-
 lib/eal/arm/include/rte_memcpy_64.h           |  23 +-
 lib/eal/arm/include/rte_pause.h               |   8 -
 lib/eal/arm/include/rte_pause_32.h            |   6 +-
 lib/eal/arm/include/rte_pause_64.h            |   8 +-
 lib/eal/arm/include/rte_power_intrinsics.h    |   8 -
 lib/eal/arm/include/rte_prefetch_32.h         |   8 +-
 lib/eal/arm/include/rte_prefetch_64.h         |   8 +-
 lib/eal/arm/include/rte_rwlock.h              |   4 +-
 lib/eal/arm/include/rte_spinlock.h            |   6 +-
 lib/eal/freebsd/include/rte_os.h              |   8 -
 lib/eal/include/bus_driver.h                  |   8 +-
 lib/eal/include/dev_driver.h                  |   8 -
 lib/eal/include/eal_trace_internal.h          |   8 +-
 lib/eal/include/generic/rte_atomic.h          |   8 +
 lib/eal/include/generic/rte_byteorder.h       |   8 +
 lib/eal/include/generic/rte_cpuflags.h        |   8 +
 lib/eal/include/generic/rte_cycles.h          |   8 +
 lib/eal/include/generic/rte_io.h              |   8 +
 lib/eal/include/generic/rte_memcpy.h          |   8 +
 lib/eal/include/generic/rte_pause.h           |   8 +
 .../include/generic/rte_power_intrinsics.h    |   8 +
 lib/eal/include/generic/rte_prefetch.h        |   8 +
 lib/eal/include/generic/rte_rwlock.h          |   8 +-
 lib/eal/include/generic/rte_spinlock.h        |   8 +
 lib/eal/include/generic/rte_vect.h            |   8 +
 lib/eal/include/rte_alarm.h                   |   4 +-
 lib/eal/include/rte_bitmap.h                  |   8 +-
 lib/eal/include/rte_bitops.h                  | 768 +++++++++++++++++-
 lib/eal/include/rte_branch_prediction.h       |   8 -
 lib/eal/include/rte_bus.h                     |   8 +-
 lib/eal/include/rte_class.h                   |   4 +-
 lib/eal/include/rte_common.h                  |   8 +-
 lib/eal/include/rte_compat.h                  |   8 -
 lib/eal/include/rte_dev.h                     |   8 +-
 lib/eal/include/rte_devargs.h                 |   8 +-
 lib/eal/include/rte_eal_trace.h               |   4 +-
 lib/eal/include/rte_errno.h                   |   4 +-
 lib/eal/include/rte_fbarray.h                 |   8 +-
 lib/eal/include/rte_keepalive.h               |   6 +-
 lib/eal/include/rte_mcslock.h                 |   8 +-
 lib/eal/include/rte_memory.h                  |   8 +-
 lib/eal/include/rte_pci_dev_feature_defs.h    |   8 -
 lib/eal/include/rte_pci_dev_features.h        |   8 -
 lib/eal/include/rte_per_lcore.h               |   8 -
 lib/eal/include/rte_pflock.h                  |   8 +-
 lib/eal/include/rte_random.h                  |   4 +-
 lib/eal/include/rte_seqcount.h                |   8 +-
 lib/eal/include/rte_seqlock.h                 |   8 +-
 lib/eal/include/rte_service.h                 |   8 +-
 lib/eal/include/rte_service_component.h       |   4 +-
 lib/eal/include/rte_stdatomic.h               |   5 +-
 lib/eal/include/rte_string_fns.h              |  17 +-
 lib/eal/include/rte_tailq.h                   |   6 +-
 lib/eal/include/rte_ticketlock.h              |   8 +-
 lib/eal/include/rte_time.h                    |   6 +-
 lib/eal/include/rte_trace.h                   |   8 +-
 lib/eal/include/rte_trace_point.h             |   8 +-
 lib/eal/include/rte_trace_point_register.h    |   8 +-
 lib/eal/include/rte_uuid.h                    |   8 +-
 lib/eal/include/rte_version.h                 |   6 +-
 lib/eal/include/rte_vfio.h                    |   8 +-
 lib/eal/linux/include/rte_os.h                |   8 -
 lib/eal/loongarch/include/rte_atomic.h        |   6 +-
 lib/eal/loongarch/include/rte_byteorder.h     |   4 +-
 lib/eal/loongarch/include/rte_cpuflags.h      |   8 -
 lib/eal/loongarch/include/rte_cycles.h        |   4 +-
 lib/eal/loongarch/include/rte_io.h            |   8 -
 lib/eal/loongarch/include/rte_memcpy.h        |   4 +-
 lib/eal/loongarch/include/rte_pause.h         |   8 +-
 .../loongarch/include/rte_power_intrinsics.h  |   8 -
 lib/eal/loongarch/include/rte_prefetch.h      |   8 +-
 lib/eal/loongarch/include/rte_rwlock.h        |   4 +-
 lib/eal/loongarch/include/rte_spinlock.h      |   6 +-
 lib/eal/ppc/include/rte_atomic.h              |   6 +-
 lib/eal/ppc/include/rte_byteorder.h           |   6 +-
 lib/eal/ppc/include/rte_cpuflags.h            |   8 -
 lib/eal/ppc/include/rte_cycles.h              |   8 +-
 lib/eal/ppc/include/rte_io.h                  |   8 -
 lib/eal/ppc/include/rte_memcpy.h              |   4 +-
 lib/eal/ppc/include/rte_pause.h               |   8 +-
 lib/eal/ppc/include/rte_power_intrinsics.h    |   8 -
 lib/eal/ppc/include/rte_prefetch.h            |   8 +-
 lib/eal/ppc/include/rte_rwlock.h              |   4 +-
 lib/eal/ppc/include/rte_spinlock.h            |   8 +-
 lib/eal/riscv/include/rte_atomic.h            |   8 +-
 lib/eal/riscv/include/rte_byteorder.h         |   8 +-
 lib/eal/riscv/include/rte_cpuflags.h          |   8 -
 lib/eal/riscv/include/rte_cycles.h            |   4 +-
 lib/eal/riscv/include/rte_io.h                |   8 -
 lib/eal/riscv/include/rte_memcpy.h            |   4 +-
 lib/eal/riscv/include/rte_pause.h             |   8 +-
 lib/eal/riscv/include/rte_power_intrinsics.h  |   8 -
 lib/eal/riscv/include/rte_prefetch.h          |   8 +-
 lib/eal/riscv/include/rte_rwlock.h            |   4 +-
 lib/eal/riscv/include/rte_spinlock.h          |   6 +-
 lib/eal/windows/include/pthread.h             |   6 +-
 lib/eal/windows/include/regex.h               |   8 +-
 lib/eal/windows/include/rte_os.h              |   8 -
 lib/eal/windows/include/rte_windows.h         |   8 -
 lib/eal/x86/include/rte_atomic.h              |  25 +-
 lib/eal/x86/include/rte_byteorder.h           |  16 +-
 lib/eal/x86/include/rte_cpuflags.h            |   8 -
 lib/eal/x86/include/rte_cycles.h              |   8 +-
 lib/eal/x86/include/rte_io.h                  |   8 +-
 lib/eal/x86/include/rte_pause.h               |   7 +-
 lib/eal/x86/include/rte_power_intrinsics.h    |   8 -
 lib/eal/x86/include/rte_prefetch.h            |   8 +-
 lib/eal/x86/include/rte_rwlock.h              |   6 +-
 lib/eal/x86/include/rte_spinlock.h            |   9 +-
 lib/ethdev/ethdev_driver.h                    |   8 +-
 lib/ethdev/ethdev_pci.h                       |   8 +-
 lib/ethdev/ethdev_trace.h                     |   8 +-
 lib/ethdev/ethdev_vdev.h                      |   8 +-
 lib/ethdev/rte_cman.h                         |   8 -
 lib/ethdev/rte_dev_info.h                     |   8 -
 lib/ethdev/rte_eth_ctrl.h                     |   8 -
 lib/ethdev/rte_ethdev.h                       |   8 +-
 lib/ethdev/rte_ethdev_trace_fp.h              |   4 +-
 lib/eventdev/event_timer_adapter_pmd.h        |   8 -
 lib/eventdev/eventdev_pmd.h                   |   8 +-
 lib/eventdev/eventdev_pmd_pci.h               |   8 +-
 lib/eventdev/eventdev_pmd_vdev.h              |   8 +-
 lib/eventdev/eventdev_trace.h                 |   8 +-
 lib/eventdev/rte_event_crypto_adapter.h       |   8 +-
 lib/eventdev/rte_event_eth_rx_adapter.h       |   8 +-
 lib/eventdev/rte_event_eth_tx_adapter.h       |   8 +-
 lib/eventdev/rte_event_ring.h                 |   8 +-
 lib/eventdev/rte_event_timer_adapter.h        |   8 +-
 lib/eventdev/rte_eventdev.h                   |   8 +-
 lib/eventdev/rte_eventdev_trace_fp.h          |   4 +-
 lib/graph/rte_graph_model_mcore_dispatch.h    |   8 +-
 lib/graph/rte_graph_worker.h                  |   6 +-
 lib/gso/rte_gso.h                             |   6 +-
 lib/hash/rte_fbk_hash.h                       |   8 +-
 lib/hash/rte_hash_crc.h                       |   8 +-
 lib/hash/rte_jhash.h                          |   8 +-
 lib/hash/rte_thash.h                          |   8 +-
 lib/hash/rte_thash_gfni.h                     |   8 +-
 lib/ip_frag/rte_ip_frag.h                     |   8 +-
 lib/ipsec/rte_ipsec.h                         |   8 +-
 lib/log/rte_log.h                             |   8 +-
 lib/lpm/rte_lpm.h                             |   8 +-
 lib/member/rte_member.h                       |   8 +-
 lib/member/rte_member_sketch.h                |   6 +-
 lib/member/rte_member_sketch_avx512.h         |   8 +-
 lib/member/rte_member_x86.h                   |   4 +-
 lib/member/rte_xxh64_avx512.h                 |   6 +-
 lib/mempool/mempool_trace.h                   |   8 +-
 lib/mempool/rte_mempool_trace_fp.h            |   4 +-
 lib/meter/rte_meter.h                         |   8 +-
 lib/mldev/mldev_utils.h                       |   8 +-
 lib/mldev/rte_mldev_core.h                    |   8 -
 lib/mldev/rte_mldev_pmd.h                     |   8 +-
 lib/net/rte_dtls.h                            |   8 -
 lib/net/rte_ecpri.h                           |   8 -
 lib/net/rte_esp.h                             |   8 -
 lib/net/rte_ether.h                           |   8 +-
 lib/net/rte_geneve.h                          |   8 -
 lib/net/rte_gre.h                             |   8 -
 lib/net/rte_gtp.h                             |   8 -
 lib/net/rte_higig.h                           |   8 -
 lib/net/rte_ib.h                              |   8 -
 lib/net/rte_icmp.h                            |   8 -
 lib/net/rte_l2tpv2.h                          |   8 -
 lib/net/rte_macsec.h                          |   8 -
 lib/net/rte_mpls.h                            |   8 -
 lib/net/rte_net.h                             |   8 +-
 lib/net/rte_pdcp_hdr.h                        |   8 -
 lib/net/rte_ppp.h                             |   8 -
 lib/net/rte_sctp.h                            |   8 -
 lib/net/rte_tcp.h                             |   8 -
 lib/net/rte_tls.h                             |   8 -
 lib/net/rte_udp.h                             |   8 -
 lib/net/rte_vxlan.h                           |  10 -
 lib/node/rte_node_eth_api.h                   |   8 +-
 lib/node/rte_node_ip4_api.h                   |   8 +-
 lib/node/rte_node_ip6_api.h                   |   6 +-
 lib/node/rte_node_udp4_input_api.h            |   8 +-
 lib/pci/rte_pci.h                             |   8 +-
 lib/pdcp/rte_pdcp.h                           |   8 +-
 lib/pipeline/rte_pipeline.h                   |   8 +-
 lib/pipeline/rte_port_in_action.h             |   8 +-
 lib/pipeline/rte_swx_ctl.h                    |   8 +-
 lib/pipeline/rte_swx_extern.h                 |   8 -
 lib/pipeline/rte_swx_ipsec.h                  |   8 +-
 lib/pipeline/rte_swx_pipeline.h               |   8 +-
 lib/pipeline/rte_swx_pipeline_spec.h          |   8 +-
 lib/pipeline/rte_table_action.h               |   8 +-
 lib/port/rte_port.h                           |   8 -
 lib/port/rte_port_ethdev.h                    |   8 +-
 lib/port/rte_port_eventdev.h                  |   8 +-
 lib/port/rte_port_fd.h                        |   8 +-
 lib/port/rte_port_frag.h                      |   8 +-
 lib/port/rte_port_ras.h                       |   8 +-
 lib/port/rte_port_ring.h                      |   8 +-
 lib/port/rte_port_sched.h                     |   8 +-
 lib/port/rte_port_source_sink.h               |   8 +-
 lib/port/rte_port_sym_crypto.h                |   8 +-
 lib/port/rte_swx_port.h                       |   8 -
 lib/port/rte_swx_port_ethdev.h                |   8 +-
 lib/port/rte_swx_port_fd.h                    |   8 +-
 lib/port/rte_swx_port_ring.h                  |   8 +-
 lib/port/rte_swx_port_source_sink.h           |   8 +-
 lib/rawdev/rte_rawdev.h                       |   6 +-
 lib/rawdev/rte_rawdev_pmd.h                   |   8 +-
 lib/rcu/rte_rcu_qsbr.h                        |   8 +-
 lib/regexdev/rte_regexdev.h                   |   8 +-
 lib/ring/rte_ring.h                           |   6 +-
 lib/ring/rte_ring_core.h                      |   8 -
 lib/ring/rte_ring_elem.h                      |   8 +-
 lib/ring/rte_ring_hts.h                       |   4 +-
 lib/ring/rte_ring_peek.h                      |   4 +-
 lib/ring/rte_ring_peek_zc.h                   |   4 +-
 lib/ring/rte_ring_rts.h                       |   4 +-
 lib/sched/rte_approx.h                        |   8 +-
 lib/sched/rte_pie.h                           |   8 +-
 lib/sched/rte_red.h                           |   8 +-
 lib/sched/rte_sched.h                         |   8 +-
 lib/sched/rte_sched_common.h                  |   6 +-
 lib/security/rte_security.h                   |   8 +-
 lib/security/rte_security_driver.h            |   6 +-
 lib/stack/rte_stack.h                         |   8 +-
 lib/table/rte_lru.h                           |   8 -
 lib/table/rte_lru_arm64.h                     |   8 +-
 lib/table/rte_lru_x86.h                       |   8 -
 lib/table/rte_swx_hash_func.h                 |   8 -
 lib/table/rte_swx_keycmp.h                    |   8 +-
 lib/table/rte_swx_table.h                     |   8 -
 lib/table/rte_swx_table_em.h                  |   8 +-
 lib/table/rte_swx_table_learner.h             |   8 +-
 lib/table/rte_swx_table_selector.h            |   8 +-
 lib/table/rte_swx_table_wm.h                  |   8 +-
 lib/table/rte_table.h                         |   8 -
 lib/table/rte_table_acl.h                     |   8 +-
 lib/table/rte_table_array.h                   |   8 +-
 lib/table/rte_table_hash.h                    |   8 +-
 lib/table/rte_table_hash_cuckoo.h             |   8 +-
 lib/table/rte_table_hash_func.h               |  24 +-
 lib/table/rte_table_lpm.h                     |   8 +-
 lib/table/rte_table_lpm_ipv6.h                |   8 +-
 lib/table/rte_table_stub.h                    |   8 +-
 lib/telemetry/rte_telemetry.h                 |   8 +-
 lib/vhost/rte_vdpa.h                          |   8 +-
 lib/vhost/rte_vhost.h                         |   8 +-
 lib/vhost/rte_vhost_async.h                   |   8 +-
 lib/vhost/rte_vhost_crypto.h                  |   4 +-
 lib/vhost/vdpa_driver.h                       |   8 +-
 311 files changed, 2253 insertions(+), 1362 deletions(-)
 create mode 100755 buildtools/chkincs/chkextern.py

-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v10 1/7] buildtools/chkincs: relax C linkage requirement
  2024-09-19 19:31                   ` [PATCH v10 0/7] Improve EAL bit operations API Mattias Rönnblom
@ 2024-09-19 19:31                     ` Mattias Rönnblom
  2024-09-19 19:31                     ` [PATCH v10 2/7] dpdk: use C linkage only where appropriate Mattias Rönnblom
                                       ` (5 subsequent siblings)
  6 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-19 19:31 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Relax chkincs requirement of all DPDK header files having to contain
'extern "C"'.

Instructing a C++ toolchain to use C linkage is only necessarily if the
header file declares symbols (i.e., functions or global variables).

With this change, chkincs tries to find if any functions or references
to global variables are declared in the header file, and if not, no C
linkage is required.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 buildtools/chkincs/chkextern.py | 84 +++++++++++++++++++++++++++++++++
 buildtools/chkincs/meson.build  | 14 +++---
 2 files changed, 91 insertions(+), 7 deletions(-)
 create mode 100755 buildtools/chkincs/chkextern.py

diff --git a/buildtools/chkincs/chkextern.py b/buildtools/chkincs/chkextern.py
new file mode 100755
index 0000000000..c9747fad1e
--- /dev/null
+++ b/buildtools/chkincs/chkextern.py
@@ -0,0 +1,84 @@
+#! /usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2024 Ericsson AB
+
+import sys
+import re
+
+def strip_cpp(header):
+    no_cpp = ""
+    header = header.replace("\\\n", " ")
+
+    for line in header.split("\n"):
+        if re.match(r'^\s*#.*', line) is None and len(line) > 0:
+            no_cpp += "%s\n" % line
+
+    return no_cpp
+
+
+def strip_comments(header):
+    no_c_comments = re.sub(r'/\*.*?\*/', '', header, flags=re.DOTALL)
+    no_cxx_comments = re.sub(r'//.*', '', no_c_comments)
+    return no_cxx_comments
+
+
+def strip(header):
+    header = strip_comments(header)
+    header = strip_cpp(header)
+    return header
+
+
+def has_extern_c(header):
+    return header.find('extern "C"') != -1
+
+
+def has_vars(header):
+    return re.search(r'^extern\s+[a-z0-9_]+\s.*;', header, flags=re.MULTILINE) is not None
+
+
+FUNCTION_RES = [
+    r'rte_[a-z0-9_]+\(',
+    r'cmdline_[a-z0-9_]+\(',
+    r'vt100_[a-z0-9_]+\(',
+    r'rdline_[a-z0-9_]+\(',
+    r'cirbuf_[a-z0-9_]+\('
+]
+
+
+def has_functions(header):
+    for function_re in FUNCTION_RES:
+        if re.search(function_re, header) is not None:
+            return True
+    return False
+
+
+def has_symbols(header):
+    return has_functions(header) or has_vars(header)
+
+
+def chk_missing(filename):
+    header = open(filename).read()
+    if has_symbols(header) and not has_extern_c(header):
+        print(filename)
+
+
+def chk_redundant(filename):
+    header = open(filename).read()
+    if not has_symbols(header) and has_extern_c(header):
+        print(filename)
+
+if len(sys.argv) < 3:
+    print("%s missing|redundant <header-file> ..." % sys.argv[0])
+    sys.exit(1)
+
+op = sys.argv[1]
+headers = sys.argv[2:]
+
+for header in headers:
+    if op == 'missing':
+        chk_missing(header)
+    elif op == 'redundant':
+        chk_redundant(header)
+    else:
+        print("Unknown operation.")
+        sys.exit(1)
diff --git a/buildtools/chkincs/meson.build b/buildtools/chkincs/meson.build
index f2dadcae18..762f85efe5 100644
--- a/buildtools/chkincs/meson.build
+++ b/buildtools/chkincs/meson.build
@@ -38,13 +38,13 @@ if not add_languages('cpp', required: false)
 endif
 
 # check for extern C in files, since this is not detected as an error by the compiler
-grep = find_program('grep', required: false)
-if grep.found()
-    errlist = run_command([grep, '--files-without-match', '^extern "C"', dpdk_chkinc_headers],
-            check: false, capture: true).stdout().split()
-    if errlist != []
-        error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- '.join(errlist))
-    endif
+chkextern = find_program('chkextern.py')
+
+missing_extern_headers = run_command(chkextern, 'missing', dpdk_chkinc_headers,
+      capture: true, check: true).stdout().split()
+
+if missing_extern_headers != []
+    error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- '.join(missing_extern_headers))
 endif
 
 gen_cpp_files = generator(gen_c_file_for_header,
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v10 2/7] dpdk: use C linkage only where appropriate
  2024-09-19 19:31                   ` [PATCH v10 0/7] Improve EAL bit operations API Mattias Rönnblom
  2024-09-19 19:31                     ` [PATCH v10 1/7] buildtools/chkincs: relax C linkage requirement Mattias Rönnblom
@ 2024-09-19 19:31                     ` Mattias Rönnblom
  2024-09-19 19:31                     ` [PATCH v10 3/7] eal: extend bit manipulation functionality Mattias Rönnblom
                                       ` (4 subsequent siblings)
  6 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-19 19:31 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Assure that 'extern "C" { /../ }' do not cover files included from a
particular header file, and address minor issues resulting from this
change of order.

Dealing with C++ should delegate to the individual include file level,
rather than being imposed by the user of that file. For example,
forcing C linkage prevents __Generic macros being replaced with
overloaded static inline functions in C++ translation units.

Eliminate 'extern "C"' from files which do not declare any symbols
(e.g., only macros or struct types). With this change, the
chkextern.py tool fails the build in case it detect such
redundancy.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>

--

PATCH v10:
 * Enable redundant C linkage checks in chkincs, and fix headers
   which had such.
 * Fix more ARM build issues.

PATCH v9:
 * Fix yet another issue in the ARM build. Author really needs an
   ARM system powerful enough to build DPDK.

PATCH v8:
 * Fix issues in rte_memcpy_64.h causing build failures on ARM.

PATCH v7:
 * Fix issues in rte_io.h, rte_pause.h and rte_thash_gfni.h causing
   build failures on ARM. (David Marchand)
 * Fix issue in rte_vfio.h, causing build failures unless VFIO_PRESENT.

PATCH v6:
 * Add missing extern "C" in rte_atomic.h, rte_cpuflags.h, rte_io.h,
   rte_vect.h.
 * Fix 32-bit x86 build issues in rte_atomic.h.

PATCH v5:
 * rte_dmadev.h was still including files under extern "C" { /../ }.
   (Chengwen Feng)
 * Fix rte_byteorder.h, broken on 32-bit x86.
---
 app/test/packet_burst_generator.h             |  8 +++---
 app/test/virtual_pmd.h                        |  4 +--
 buildtools/chkincs/meson.build                |  7 ++++++
 drivers/bus/auxiliary/bus_auxiliary_driver.h  |  8 +++---
 drivers/bus/cdx/bus_cdx_driver.h              |  8 +++---
 drivers/bus/dpaa/include/fsl_qman.h           |  8 +++---
 drivers/bus/fslmc/bus_fslmc_driver.h          |  8 +++---
 drivers/bus/pci/bus_pci_driver.h              |  8 +++---
 drivers/bus/pci/rte_bus_pci.h                 |  8 +++---
 drivers/bus/platform/bus_platform_driver.h    |  8 +++---
 drivers/bus/vdev/bus_vdev_driver.h            |  8 +++---
 drivers/bus/vmbus/bus_vmbus_driver.h          |  8 +++---
 drivers/bus/vmbus/rte_bus_vmbus.h             |  8 +++---
 drivers/dma/cnxk/cnxk_dma_event_dp.h          |  8 +++---
 drivers/dma/ioat/ioat_hw_defs.h               |  4 +--
 drivers/event/dlb2/rte_pmd_dlb2.h             |  8 +++---
 drivers/mempool/dpaa2/rte_dpaa2_mempool.h     |  6 ++---
 drivers/net/avp/rte_avp_fifo.h                |  8 +++---
 drivers/net/bonding/rte_eth_bond.h            |  4 +--
 drivers/net/i40e/rte_pmd_i40e.h               |  8 +++---
 drivers/net/mlx5/mlx5_trace.h                 |  8 +++---
 drivers/net/ring/rte_eth_ring.h               |  4 +--
 drivers/net/vhost/rte_eth_vhost.h             |  8 +++---
 drivers/raw/ifpga/afu_pmd_core.h              |  8 +++---
 drivers/raw/ifpga/afu_pmd_he_hssi.h           |  6 ++---
 drivers/raw/ifpga/afu_pmd_he_lpbk.h           |  6 ++---
 drivers/raw/ifpga/afu_pmd_he_mem.h            |  6 ++---
 drivers/raw/ifpga/afu_pmd_n3000.h             |  6 ++---
 drivers/raw/ifpga/rte_pmd_afu.h               |  4 +--
 drivers/raw/ifpga/rte_pmd_ifpga.h             |  4 +--
 examples/ethtool/lib/rte_ethtool.h            |  8 +++---
 examples/qos_sched/main.h                     |  4 +--
 examples/vm_power_manager/channel_manager.h   |  8 +++---
 lib/acl/rte_acl_osdep.h                       |  8 ------
 lib/bbdev/rte_bbdev.h                         |  8 +++---
 lib/bbdev/rte_bbdev_op.h                      |  8 +++---
 lib/bbdev/rte_bbdev_pmd.h                     |  8 +++---
 lib/bpf/bpf_def.h                             |  9 -------
 lib/compressdev/rte_comp.h                    |  4 +--
 lib/compressdev/rte_compressdev.h             |  6 ++---
 lib/compressdev/rte_compressdev_internal.h    |  8 +++---
 lib/compressdev/rte_compressdev_pmd.h         |  8 +++---
 lib/cryptodev/cryptodev_pmd.h                 |  8 +++---
 lib/cryptodev/cryptodev_trace.h               |  8 +++---
 lib/cryptodev/rte_crypto.h                    |  8 +++---
 lib/cryptodev/rte_crypto_asym.h               |  8 ------
 lib/cryptodev/rte_crypto_sym.h                |  8 +++---
 lib/cryptodev/rte_cryptodev.h                 |  8 +++---
 lib/cryptodev/rte_cryptodev_trace_fp.h        |  4 +--
 lib/dispatcher/rte_dispatcher.h               |  8 +++---
 lib/dmadev/rte_dmadev.h                       |  8 ++++++
 lib/eal/arm/include/rte_atomic_32.h           |  4 +--
 lib/eal/arm/include/rte_atomic_64.h           |  8 +++---
 lib/eal/arm/include/rte_byteorder.h           |  8 +++---
 lib/eal/arm/include/rte_cpuflags_32.h         |  8 ------
 lib/eal/arm/include/rte_cpuflags_64.h         |  8 ------
 lib/eal/arm/include/rte_cycles_32.h           |  4 +--
 lib/eal/arm/include/rte_cycles_64.h           |  4 +--
 lib/eal/arm/include/rte_io.h                  |  8 ------
 lib/eal/arm/include/rte_io_64.h               |  8 +++---
 lib/eal/arm/include/rte_memcpy_32.h           |  8 +++---
 lib/eal/arm/include/rte_memcpy_64.h           | 23 +++++++++++------
 lib/eal/arm/include/rte_pause.h               |  8 ------
 lib/eal/arm/include/rte_pause_32.h            |  6 ++---
 lib/eal/arm/include/rte_pause_64.h            |  8 +++---
 lib/eal/arm/include/rte_power_intrinsics.h    |  8 ------
 lib/eal/arm/include/rte_prefetch_32.h         |  8 +++---
 lib/eal/arm/include/rte_prefetch_64.h         |  8 +++---
 lib/eal/arm/include/rte_rwlock.h              |  4 +--
 lib/eal/arm/include/rte_spinlock.h            |  6 ++---
 lib/eal/freebsd/include/rte_os.h              |  8 ------
 lib/eal/include/bus_driver.h                  |  8 +++---
 lib/eal/include/dev_driver.h                  |  8 ------
 lib/eal/include/eal_trace_internal.h          |  8 +++---
 lib/eal/include/generic/rte_atomic.h          |  8 ++++++
 lib/eal/include/generic/rte_byteorder.h       |  8 ++++++
 lib/eal/include/generic/rte_cpuflags.h        |  8 ++++++
 lib/eal/include/generic/rte_cycles.h          |  8 ++++++
 lib/eal/include/generic/rte_io.h              |  8 ++++++
 lib/eal/include/generic/rte_memcpy.h          |  8 ++++++
 lib/eal/include/generic/rte_pause.h           |  8 ++++++
 .../include/generic/rte_power_intrinsics.h    |  8 ++++++
 lib/eal/include/generic/rte_prefetch.h        |  8 ++++++
 lib/eal/include/generic/rte_rwlock.h          |  8 +++---
 lib/eal/include/generic/rte_spinlock.h        |  8 ++++++
 lib/eal/include/generic/rte_vect.h            |  8 ++++++
 lib/eal/include/rte_alarm.h                   |  4 +--
 lib/eal/include/rte_bitmap.h                  |  8 +++---
 lib/eal/include/rte_branch_prediction.h       |  8 ------
 lib/eal/include/rte_bus.h                     |  8 +++---
 lib/eal/include/rte_class.h                   |  4 +--
 lib/eal/include/rte_common.h                  |  8 +++---
 lib/eal/include/rte_compat.h                  |  8 ------
 lib/eal/include/rte_dev.h                     |  8 +++---
 lib/eal/include/rte_devargs.h                 |  8 +++---
 lib/eal/include/rte_eal_trace.h               |  4 +--
 lib/eal/include/rte_errno.h                   |  4 +--
 lib/eal/include/rte_fbarray.h                 |  8 +++---
 lib/eal/include/rte_keepalive.h               |  6 ++---
 lib/eal/include/rte_mcslock.h                 |  8 +++---
 lib/eal/include/rte_memory.h                  |  8 +++---
 lib/eal/include/rte_pci_dev_feature_defs.h    |  8 ------
 lib/eal/include/rte_pci_dev_features.h        |  8 ------
 lib/eal/include/rte_per_lcore.h               |  8 ------
 lib/eal/include/rte_pflock.h                  |  8 +++---
 lib/eal/include/rte_random.h                  |  4 +--
 lib/eal/include/rte_seqcount.h                |  8 +++---
 lib/eal/include/rte_seqlock.h                 |  8 +++---
 lib/eal/include/rte_service.h                 |  8 +++---
 lib/eal/include/rte_service_component.h       |  4 +--
 lib/eal/include/rte_stdatomic.h               |  5 +---
 lib/eal/include/rte_string_fns.h              | 17 +++++++++----
 lib/eal/include/rte_tailq.h                   |  6 ++---
 lib/eal/include/rte_ticketlock.h              |  8 +++---
 lib/eal/include/rte_time.h                    |  6 ++---
 lib/eal/include/rte_trace.h                   |  8 +++---
 lib/eal/include/rte_trace_point.h             |  8 +++---
 lib/eal/include/rte_trace_point_register.h    |  8 +++---
 lib/eal/include/rte_uuid.h                    |  8 +++---
 lib/eal/include/rte_version.h                 |  6 ++---
 lib/eal/include/rte_vfio.h                    |  8 +++---
 lib/eal/linux/include/rte_os.h                |  8 ------
 lib/eal/loongarch/include/rte_atomic.h        |  6 ++---
 lib/eal/loongarch/include/rte_byteorder.h     |  4 +--
 lib/eal/loongarch/include/rte_cpuflags.h      |  8 ------
 lib/eal/loongarch/include/rte_cycles.h        |  4 +--
 lib/eal/loongarch/include/rte_io.h            |  8 ------
 lib/eal/loongarch/include/rte_memcpy.h        |  4 +--
 lib/eal/loongarch/include/rte_pause.h         |  8 +++---
 .../loongarch/include/rte_power_intrinsics.h  |  8 ------
 lib/eal/loongarch/include/rte_prefetch.h      |  8 +++---
 lib/eal/loongarch/include/rte_rwlock.h        |  4 +--
 lib/eal/loongarch/include/rte_spinlock.h      |  6 ++---
 lib/eal/ppc/include/rte_atomic.h              |  6 ++---
 lib/eal/ppc/include/rte_byteorder.h           |  6 ++---
 lib/eal/ppc/include/rte_cpuflags.h            |  8 ------
 lib/eal/ppc/include/rte_cycles.h              |  8 +++---
 lib/eal/ppc/include/rte_io.h                  |  8 ------
 lib/eal/ppc/include/rte_memcpy.h              |  4 +--
 lib/eal/ppc/include/rte_pause.h               |  8 +++---
 lib/eal/ppc/include/rte_power_intrinsics.h    |  8 ------
 lib/eal/ppc/include/rte_prefetch.h            |  8 +++---
 lib/eal/ppc/include/rte_rwlock.h              |  4 +--
 lib/eal/ppc/include/rte_spinlock.h            |  8 +++---
 lib/eal/riscv/include/rte_atomic.h            |  8 +++---
 lib/eal/riscv/include/rte_byteorder.h         |  8 +++---
 lib/eal/riscv/include/rte_cpuflags.h          |  8 ------
 lib/eal/riscv/include/rte_cycles.h            |  4 +--
 lib/eal/riscv/include/rte_io.h                |  8 ------
 lib/eal/riscv/include/rte_memcpy.h            |  4 +--
 lib/eal/riscv/include/rte_pause.h             |  8 +++---
 lib/eal/riscv/include/rte_power_intrinsics.h  |  8 ------
 lib/eal/riscv/include/rte_prefetch.h          |  8 +++---
 lib/eal/riscv/include/rte_rwlock.h            |  4 +--
 lib/eal/riscv/include/rte_spinlock.h          |  6 ++---
 lib/eal/windows/include/pthread.h             |  6 ++---
 lib/eal/windows/include/regex.h               |  8 +++---
 lib/eal/windows/include/rte_os.h              |  8 ------
 lib/eal/windows/include/rte_windows.h         |  8 ------
 lib/eal/x86/include/rte_atomic.h              | 25 +++++++++++++------
 lib/eal/x86/include/rte_byteorder.h           | 16 ++++++------
 lib/eal/x86/include/rte_cpuflags.h            |  8 ------
 lib/eal/x86/include/rte_cycles.h              |  8 +++---
 lib/eal/x86/include/rte_io.h                  |  8 +++---
 lib/eal/x86/include/rte_pause.h               |  7 +++---
 lib/eal/x86/include/rte_power_intrinsics.h    |  8 ------
 lib/eal/x86/include/rte_prefetch.h            |  8 +++---
 lib/eal/x86/include/rte_rwlock.h              |  6 ++---
 lib/eal/x86/include/rte_spinlock.h            |  9 +++----
 lib/ethdev/ethdev_driver.h                    |  8 +++---
 lib/ethdev/ethdev_pci.h                       |  8 +++---
 lib/ethdev/ethdev_trace.h                     |  8 +++---
 lib/ethdev/ethdev_vdev.h                      |  8 +++---
 lib/ethdev/rte_cman.h                         |  8 ------
 lib/ethdev/rte_dev_info.h                     |  8 ------
 lib/ethdev/rte_eth_ctrl.h                     |  8 ------
 lib/ethdev/rte_ethdev.h                       |  8 +++---
 lib/ethdev/rte_ethdev_trace_fp.h              |  4 +--
 lib/eventdev/event_timer_adapter_pmd.h        |  8 ------
 lib/eventdev/eventdev_pmd.h                   |  8 +++---
 lib/eventdev/eventdev_pmd_pci.h               |  8 +++---
 lib/eventdev/eventdev_pmd_vdev.h              |  8 +++---
 lib/eventdev/eventdev_trace.h                 |  8 +++---
 lib/eventdev/rte_event_crypto_adapter.h       |  8 +++---
 lib/eventdev/rte_event_eth_rx_adapter.h       |  8 +++---
 lib/eventdev/rte_event_eth_tx_adapter.h       |  8 +++---
 lib/eventdev/rte_event_ring.h                 |  8 +++---
 lib/eventdev/rte_event_timer_adapter.h        |  8 +++---
 lib/eventdev/rte_eventdev.h                   |  8 +++---
 lib/eventdev/rte_eventdev_trace_fp.h          |  4 +--
 lib/graph/rte_graph_model_mcore_dispatch.h    |  8 +++---
 lib/graph/rte_graph_worker.h                  |  6 ++---
 lib/gso/rte_gso.h                             |  6 ++---
 lib/hash/rte_fbk_hash.h                       |  8 +++---
 lib/hash/rte_hash_crc.h                       |  8 +++---
 lib/hash/rte_jhash.h                          |  8 +++---
 lib/hash/rte_thash.h                          |  8 +++---
 lib/hash/rte_thash_gfni.h                     |  8 +++---
 lib/ip_frag/rte_ip_frag.h                     |  8 +++---
 lib/ipsec/rte_ipsec.h                         |  8 +++---
 lib/log/rte_log.h                             |  8 +++---
 lib/lpm/rte_lpm.h                             |  8 +++---
 lib/member/rte_member.h                       |  8 +++---
 lib/member/rte_member_sketch.h                |  6 ++---
 lib/member/rte_member_sketch_avx512.h         |  8 +++---
 lib/member/rte_member_x86.h                   |  4 +--
 lib/member/rte_xxh64_avx512.h                 |  6 ++---
 lib/mempool/mempool_trace.h                   |  8 +++---
 lib/mempool/rte_mempool_trace_fp.h            |  4 +--
 lib/meter/rte_meter.h                         |  8 +++---
 lib/mldev/mldev_utils.h                       |  8 +++---
 lib/mldev/rte_mldev_core.h                    |  8 ------
 lib/mldev/rte_mldev_pmd.h                     |  8 +++---
 lib/net/rte_dtls.h                            |  8 ------
 lib/net/rte_ecpri.h                           |  8 ------
 lib/net/rte_esp.h                             |  8 ------
 lib/net/rte_ether.h                           |  8 +++---
 lib/net/rte_geneve.h                          |  8 ------
 lib/net/rte_gre.h                             |  8 ------
 lib/net/rte_gtp.h                             |  8 ------
 lib/net/rte_higig.h                           |  8 ------
 lib/net/rte_ib.h                              |  8 ------
 lib/net/rte_icmp.h                            |  8 ------
 lib/net/rte_l2tpv2.h                          |  8 ------
 lib/net/rte_macsec.h                          |  8 ------
 lib/net/rte_mpls.h                            |  8 ------
 lib/net/rte_net.h                             |  8 +++---
 lib/net/rte_pdcp_hdr.h                        |  8 ------
 lib/net/rte_ppp.h                             |  8 ------
 lib/net/rte_sctp.h                            |  8 ------
 lib/net/rte_tcp.h                             |  8 ------
 lib/net/rte_tls.h                             |  8 ------
 lib/net/rte_udp.h                             |  8 ------
 lib/net/rte_vxlan.h                           | 10 --------
 lib/node/rte_node_eth_api.h                   |  8 +++---
 lib/node/rte_node_ip4_api.h                   |  8 +++---
 lib/node/rte_node_ip6_api.h                   |  6 ++---
 lib/node/rte_node_udp4_input_api.h            |  8 +++---
 lib/pci/rte_pci.h                             |  8 +++---
 lib/pdcp/rte_pdcp.h                           |  8 +++---
 lib/pipeline/rte_pipeline.h                   |  8 +++---
 lib/pipeline/rte_port_in_action.h             |  8 +++---
 lib/pipeline/rte_swx_ctl.h                    |  8 +++---
 lib/pipeline/rte_swx_extern.h                 |  8 ------
 lib/pipeline/rte_swx_ipsec.h                  |  8 +++---
 lib/pipeline/rte_swx_pipeline.h               |  8 +++---
 lib/pipeline/rte_swx_pipeline_spec.h          |  8 +++---
 lib/pipeline/rte_table_action.h               |  8 +++---
 lib/port/rte_port.h                           |  8 ------
 lib/port/rte_port_ethdev.h                    |  8 +++---
 lib/port/rte_port_eventdev.h                  |  8 +++---
 lib/port/rte_port_fd.h                        |  8 +++---
 lib/port/rte_port_frag.h                      |  8 +++---
 lib/port/rte_port_ras.h                       |  8 +++---
 lib/port/rte_port_ring.h                      |  8 +++---
 lib/port/rte_port_sched.h                     |  8 +++---
 lib/port/rte_port_source_sink.h               |  8 +++---
 lib/port/rte_port_sym_crypto.h                |  8 +++---
 lib/port/rte_swx_port.h                       |  8 ------
 lib/port/rte_swx_port_ethdev.h                |  8 +++---
 lib/port/rte_swx_port_fd.h                    |  8 +++---
 lib/port/rte_swx_port_ring.h                  |  8 +++---
 lib/port/rte_swx_port_source_sink.h           |  8 +++---
 lib/rawdev/rte_rawdev.h                       |  6 ++---
 lib/rawdev/rte_rawdev_pmd.h                   |  8 +++---
 lib/rcu/rte_rcu_qsbr.h                        |  8 +++---
 lib/regexdev/rte_regexdev.h                   |  8 +++---
 lib/ring/rte_ring.h                           |  6 ++---
 lib/ring/rte_ring_core.h                      |  8 ------
 lib/ring/rte_ring_elem.h                      |  8 +++---
 lib/ring/rte_ring_hts.h                       |  4 +--
 lib/ring/rte_ring_peek.h                      |  4 +--
 lib/ring/rte_ring_peek_zc.h                   |  4 +--
 lib/ring/rte_ring_rts.h                       |  4 +--
 lib/sched/rte_approx.h                        |  8 +++---
 lib/sched/rte_pie.h                           |  8 +++---
 lib/sched/rte_red.h                           |  8 +++---
 lib/sched/rte_sched.h                         |  8 +++---
 lib/sched/rte_sched_common.h                  |  6 ++---
 lib/security/rte_security.h                   |  8 +++---
 lib/security/rte_security_driver.h            |  6 ++---
 lib/stack/rte_stack.h                         |  8 +++---
 lib/table/rte_lru.h                           |  8 ------
 lib/table/rte_lru_arm64.h                     |  8 +++---
 lib/table/rte_lru_x86.h                       |  8 ------
 lib/table/rte_swx_hash_func.h                 |  8 ------
 lib/table/rte_swx_keycmp.h                    |  8 +++---
 lib/table/rte_swx_table.h                     |  8 ------
 lib/table/rte_swx_table_em.h                  |  8 +++---
 lib/table/rte_swx_table_learner.h             |  8 +++---
 lib/table/rte_swx_table_selector.h            |  8 +++---
 lib/table/rte_swx_table_wm.h                  |  8 +++---
 lib/table/rte_table.h                         |  8 ------
 lib/table/rte_table_acl.h                     |  8 +++---
 lib/table/rte_table_array.h                   |  8 +++---
 lib/table/rte_table_hash.h                    |  8 +++---
 lib/table/rte_table_hash_cuckoo.h             |  8 +++---
 lib/table/rte_table_hash_func.h               | 24 +++++++++++++++---
 lib/table/rte_table_lpm.h                     |  8 +++---
 lib/table/rte_table_lpm_ipv6.h                |  8 +++---
 lib/table/rte_table_stub.h                    |  8 +++---
 lib/telemetry/rte_telemetry.h                 |  8 +++---
 lib/vhost/rte_vdpa.h                          |  8 +++---
 lib/vhost/rte_vhost.h                         |  8 +++---
 lib/vhost/rte_vhost_async.h                   |  8 +++---
 lib/vhost/rte_vhost_crypto.h                  |  4 +--
 lib/vhost/vdpa_driver.h                       |  8 +++---
 307 files changed, 979 insertions(+), 1337 deletions(-)

diff --git a/app/test/packet_burst_generator.h b/app/test/packet_burst_generator.h
index b99286f50e..cce41bcd0f 100644
--- a/app/test/packet_burst_generator.h
+++ b/app/test/packet_burst_generator.h
@@ -5,10 +5,6 @@
 #ifndef PACKET_BURST_GENERATOR_H_
 #define PACKET_BURST_GENERATOR_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_mbuf.h>
 #include <rte_ether.h>
 #include <rte_arp.h>
@@ -17,6 +13,10 @@ extern "C" {
 #include <rte_tcp.h>
 #include <rte_sctp.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define IPV4_ADDR(a, b, c, d)(((a & 0xff) << 24) | ((b & 0xff) << 16) | \
 		((c & 0xff) << 8) | (d & 0xff))
 
diff --git a/app/test/virtual_pmd.h b/app/test/virtual_pmd.h
index 120b58b273..a5a71d7cb4 100644
--- a/app/test/virtual_pmd.h
+++ b/app/test/virtual_pmd.h
@@ -5,12 +5,12 @@
 #ifndef __VIRTUAL_ETHDEV_H_
 #define __VIRTUAL_ETHDEV_H_
 
+#include <rte_ether.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ether.h>
-
 int
 virtual_ethdev_init(void);
 
diff --git a/buildtools/chkincs/meson.build b/buildtools/chkincs/meson.build
index 762f85efe5..9f45b49275 100644
--- a/buildtools/chkincs/meson.build
+++ b/buildtools/chkincs/meson.build
@@ -43,10 +43,17 @@ chkextern = find_program('chkextern.py')
 missing_extern_headers = run_command(chkextern, 'missing', dpdk_chkinc_headers,
       capture: true, check: true).stdout().split()
 
+redundant_extern_headers = run_command(chkextern, 'redundant', dpdk_chkinc_headers,
+      capture: true, check: true).stdout().split()
+
 if missing_extern_headers != []
     error('Files missing C++ \'extern "C"\' guards:\n- ' + '\n- '.join(missing_extern_headers))
 endif
 
+if redundant_extern_headers != []
+    error('Redundant C++ \'extern "C"\' guards:\n- ' + '\n- '.join(redundant_extern_headers))
+endif
+
 gen_cpp_files = generator(gen_c_file_for_header,
         output: '@BASENAME@.cpp',
         arguments: ['@INPUT@', '@OUTPUT@'])
diff --git a/drivers/bus/auxiliary/bus_auxiliary_driver.h b/drivers/bus/auxiliary/bus_auxiliary_driver.h
index 58fb7c7f69..40ab1f0912 100644
--- a/drivers/bus/auxiliary/bus_auxiliary_driver.h
+++ b/drivers/bus/auxiliary/bus_auxiliary_driver.h
@@ -11,10 +11,6 @@
  * Auxiliary Bus Interface.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -28,6 +24,10 @@ extern "C" {
 #include <dev_driver.h>
 #include <rte_kvargs.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_BUS_AUXILIARY_NAME "auxiliary"
 
 /* Forward declarations */
diff --git a/drivers/bus/cdx/bus_cdx_driver.h b/drivers/bus/cdx/bus_cdx_driver.h
index 211f8e406b..d390e7b5a1 100644
--- a/drivers/bus/cdx/bus_cdx_driver.h
+++ b/drivers/bus/cdx/bus_cdx_driver.h
@@ -10,10 +10,6 @@
  * AMD CDX bus interface
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdlib.h>
 #include <inttypes.h>
 #include <linux/types.h>
@@ -22,6 +18,10 @@ extern "C" {
 #include <dev_driver.h>
 #include <rte_interrupts.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_cdx_device;
 struct rte_cdx_driver;
diff --git a/drivers/bus/dpaa/include/fsl_qman.h b/drivers/bus/dpaa/include/fsl_qman.h
index c0677976e8..f39007b84d 100644
--- a/drivers/bus/dpaa/include/fsl_qman.h
+++ b/drivers/bus/dpaa/include/fsl_qman.h
@@ -8,14 +8,14 @@
 #ifndef __FSL_QMAN_H
 #define __FSL_QMAN_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <dpaa_rbtree.h>
 #include <rte_compat.h>
 #include <rte_eventdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* FQ lookups (turn this on for 64bit user-space) */
 #ifdef RTE_ARCH_64
 #define CONFIG_FSL_QMAN_FQ_LOOKUP
diff --git a/drivers/bus/fslmc/bus_fslmc_driver.h b/drivers/bus/fslmc/bus_fslmc_driver.h
index 7ac5fe6ff1..3095458133 100644
--- a/drivers/bus/fslmc/bus_fslmc_driver.h
+++ b/drivers/bus/fslmc/bus_fslmc_driver.h
@@ -13,10 +13,6 @@
  * RTE FSLMC Bus Interface
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -40,6 +36,10 @@ extern "C" {
 #include "portal/dpaa2_hw_pvt.h"
 #include "portal/dpaa2_hw_dpio.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define FSLMC_OBJECT_MAX_LEN 32   /**< Length of each device on bus */
 
 #define DPAA2_INVALID_MBUF_SEQN        0
diff --git a/drivers/bus/pci/bus_pci_driver.h b/drivers/bus/pci/bus_pci_driver.h
index be32263a82..2cc1119072 100644
--- a/drivers/bus/pci/bus_pci_driver.h
+++ b/drivers/bus/pci/bus_pci_driver.h
@@ -6,14 +6,14 @@
 #ifndef BUS_PCI_DRIVER_H
 #define BUS_PCI_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus_pci.h>
 #include <dev_driver.h>
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Pathname of PCI devices directory. */
 __rte_internal
 const char *rte_pci_get_sysfs_path(void);
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index a3798cb1cb..19a7b15b99 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -11,10 +11,6 @@
  * PCI device & driver interface
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -27,6 +23,10 @@ extern "C" {
 #include <rte_interrupts.h>
 #include <rte_pci.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_pci_device;
 struct rte_pci_driver;
diff --git a/drivers/bus/platform/bus_platform_driver.h b/drivers/bus/platform/bus_platform_driver.h
index 5ac54fb739..a6f246f7c4 100644
--- a/drivers/bus/platform/bus_platform_driver.h
+++ b/drivers/bus/platform/bus_platform_driver.h
@@ -10,10 +10,6 @@
  * Platform bus interface.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stddef.h>
 #include <stdint.h>
 
@@ -23,6 +19,10 @@ extern "C" {
 #include <rte_os.h>
 #include <rte_vfio.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_platform_bus;
 struct rte_platform_device;
diff --git a/drivers/bus/vdev/bus_vdev_driver.h b/drivers/bus/vdev/bus_vdev_driver.h
index bc7e30d7c6..cba1fb5269 100644
--- a/drivers/bus/vdev/bus_vdev_driver.h
+++ b/drivers/bus/vdev/bus_vdev_driver.h
@@ -5,15 +5,15 @@
 #ifndef BUS_VDEV_DRIVER_H
 #define BUS_VDEV_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus_vdev.h>
 #include <rte_compat.h>
 #include <dev_driver.h>
 #include <rte_devargs.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_vdev_device {
 	RTE_TAILQ_ENTRY(rte_vdev_device) next;      /**< Next attached vdev */
 	struct rte_device device;               /**< Inherit core device */
diff --git a/drivers/bus/vmbus/bus_vmbus_driver.h b/drivers/bus/vmbus/bus_vmbus_driver.h
index e2475a642d..bc394208de 100644
--- a/drivers/bus/vmbus/bus_vmbus_driver.h
+++ b/drivers/bus/vmbus/bus_vmbus_driver.h
@@ -6,14 +6,14 @@
 #ifndef BUS_VMBUS_DRIVER_H
 #define BUS_VMBUS_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus_vmbus.h>
 #include <rte_compat.h>
 #include <dev_driver.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct vmbus_channel;
 struct vmbus_mon_page;
 
diff --git a/drivers/bus/vmbus/rte_bus_vmbus.h b/drivers/bus/vmbus/rte_bus_vmbus.h
index 9467bd8f3d..fd18bca73c 100644
--- a/drivers/bus/vmbus/rte_bus_vmbus.h
+++ b/drivers/bus/vmbus/rte_bus_vmbus.h
@@ -11,10 +11,6 @@
  *
  * VMBUS Interface
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -28,6 +24,10 @@ extern "C" {
 #include <rte_interrupts.h>
 #include <rte_vmbus_reg.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Forward declarations */
 struct rte_vmbus_device;
 struct rte_vmbus_driver;
diff --git a/drivers/dma/cnxk/cnxk_dma_event_dp.h b/drivers/dma/cnxk/cnxk_dma_event_dp.h
index 06b5ca8279..8c6cf5dd9a 100644
--- a/drivers/dma/cnxk/cnxk_dma_event_dp.h
+++ b/drivers/dma/cnxk/cnxk_dma_event_dp.h
@@ -5,16 +5,16 @@
 #ifndef _CNXK_DMA_EVENT_DP_H_
 #define _CNXK_DMA_EVENT_DP_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_common.h>
 #include <rte_compat.h>
 #include <rte_eventdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 __rte_internal
 uint16_t cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events);
 
diff --git a/drivers/dma/ioat/ioat_hw_defs.h b/drivers/dma/ioat/ioat_hw_defs.h
index dc3493a78f..11893951f2 100644
--- a/drivers/dma/ioat/ioat_hw_defs.h
+++ b/drivers/dma/ioat/ioat_hw_defs.h
@@ -5,12 +5,12 @@
 #ifndef IOAT_HW_DEFS_H
 #define IOAT_HW_DEFS_H
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 #define IOAT_PCI_CHANERR_INT_OFFSET	0x180
 
 #define IOAT_VER_3_0	0x30
diff --git a/drivers/event/dlb2/rte_pmd_dlb2.h b/drivers/event/dlb2/rte_pmd_dlb2.h
index 334c6c356d..dba7fd2f43 100644
--- a/drivers/event/dlb2/rte_pmd_dlb2.h
+++ b/drivers/event/dlb2/rte_pmd_dlb2.h
@@ -11,14 +11,14 @@
 #ifndef _RTE_PMD_DLB2_H_
 #define _RTE_PMD_DLB2_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
diff --git a/drivers/mempool/dpaa2/rte_dpaa2_mempool.h b/drivers/mempool/dpaa2/rte_dpaa2_mempool.h
index 7fe3d93f61..0286090b1b 100644
--- a/drivers/mempool/dpaa2/rte_dpaa2_mempool.h
+++ b/drivers/mempool/dpaa2/rte_dpaa2_mempool.h
@@ -12,13 +12,13 @@
  *
  */
 
+#include <rte_compat.h>
+#include <rte_mempool.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_compat.h>
-#include <rte_mempool.h>
-
 /**
  * Get BPID corresponding to the packet pool
  *
diff --git a/drivers/net/avp/rte_avp_fifo.h b/drivers/net/avp/rte_avp_fifo.h
index c1658da685..879de3b1c0 100644
--- a/drivers/net/avp/rte_avp_fifo.h
+++ b/drivers/net/avp/rte_avp_fifo.h
@@ -8,10 +8,6 @@
 
 #include "rte_avp_common.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef __KERNEL__
 /* Write memory barrier for kernel compiles */
 #define AVP_WMB() smp_wmb()
@@ -27,6 +23,10 @@ extern "C" {
 #ifndef __KERNEL__
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Initializes the avp fifo structure
  */
diff --git a/drivers/net/bonding/rte_eth_bond.h b/drivers/net/bonding/rte_eth_bond.h
index f10165f2c6..e59ff8793e 100644
--- a/drivers/net/bonding/rte_eth_bond.h
+++ b/drivers/net/bonding/rte_eth_bond.h
@@ -17,12 +17,12 @@
  * load balancing of network ports
  */
 
+#include <rte_ether.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ether.h>
-
 /* Supported modes of operation of link bonding library  */
 
 #define BONDING_MODE_ROUND_ROBIN		(0)
diff --git a/drivers/net/i40e/rte_pmd_i40e.h b/drivers/net/i40e/rte_pmd_i40e.h
index a802f989e9..5af7e2330f 100644
--- a/drivers/net/i40e/rte_pmd_i40e.h
+++ b/drivers/net/i40e/rte_pmd_i40e.h
@@ -14,14 +14,14 @@
  *
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_ethdev.h>
 #include <rte_ether.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Response sent back to i40e driver from user app after callback
  */
diff --git a/drivers/net/mlx5/mlx5_trace.h b/drivers/net/mlx5/mlx5_trace.h
index 888d96f60b..a8f0b372c8 100644
--- a/drivers/net/mlx5/mlx5_trace.h
+++ b/drivers/net/mlx5/mlx5_trace.h
@@ -11,14 +11,14 @@
  * API for mlx5 PMD trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <mlx5_prm.h>
 #include <rte_mbuf.h>
 #include <rte_trace_point.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* TX burst subroutines trace points. */
 RTE_TRACE_POINT_FP(
 	rte_pmd_mlx5_trace_tx_entry,
diff --git a/drivers/net/ring/rte_eth_ring.h b/drivers/net/ring/rte_eth_ring.h
index 59e074d0ad..98292c7b33 100644
--- a/drivers/net/ring/rte_eth_ring.h
+++ b/drivers/net/ring/rte_eth_ring.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_ETH_RING_H_
 #define _RTE_ETH_RING_H_
 
+#include <rte_ring.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring.h>
-
 /**
  * Create a new ethdev port from a set of rings
  *
diff --git a/drivers/net/vhost/rte_eth_vhost.h b/drivers/net/vhost/rte_eth_vhost.h
index 0e68b9f668..6ec59a7adc 100644
--- a/drivers/net/vhost/rte_eth_vhost.h
+++ b/drivers/net/vhost/rte_eth_vhost.h
@@ -5,15 +5,15 @@
 #ifndef _RTE_ETH_VHOST_H_
 #define _RTE_ETH_VHOST_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdbool.h>
 
 #include <rte_vhost.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Event description.
  */
diff --git a/drivers/raw/ifpga/afu_pmd_core.h b/drivers/raw/ifpga/afu_pmd_core.h
index a8f1afe343..abf9e491f7 100644
--- a/drivers/raw/ifpga/afu_pmd_core.h
+++ b/drivers/raw/ifpga/afu_pmd_core.h
@@ -5,10 +5,6 @@
 #ifndef AFU_PMD_CORE_H
 #define AFU_PMD_CORE_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -20,6 +16,10 @@ extern "C" {
 
 #include "ifpga_rawdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define AFU_RAWDEV_MAX_DRVS  32
 
 struct afu_rawdev;
diff --git a/drivers/raw/ifpga/afu_pmd_he_hssi.h b/drivers/raw/ifpga/afu_pmd_he_hssi.h
index aebbe32d54..282289d912 100644
--- a/drivers/raw/ifpga/afu_pmd_he_hssi.h
+++ b/drivers/raw/ifpga/afu_pmd_he_hssi.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_HE_HSSI_H
 #define AFU_PMD_HE_HSSI_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define HE_HSSI_UUID_L    0xbb370242ac130002
 #define HE_HSSI_UUID_H    0x823c334c98bf11ea
 #define NUM_HE_HSSI_PORTS 8
diff --git a/drivers/raw/ifpga/afu_pmd_he_lpbk.h b/drivers/raw/ifpga/afu_pmd_he_lpbk.h
index eab7b55199..67b3653c21 100644
--- a/drivers/raw/ifpga/afu_pmd_he_lpbk.h
+++ b/drivers/raw/ifpga/afu_pmd_he_lpbk.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_HE_LPBK_H
 #define AFU_PMD_HE_LPBK_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define HE_LPBK_UUID_L     0xb94b12284c31e02b
 #define HE_LPBK_UUID_H     0x56e203e9864f49a7
 #define HE_MEM_LPBK_UUID_L 0xbb652a578330a8eb
diff --git a/drivers/raw/ifpga/afu_pmd_he_mem.h b/drivers/raw/ifpga/afu_pmd_he_mem.h
index 998ca92416..41854d8c58 100644
--- a/drivers/raw/ifpga/afu_pmd_he_mem.h
+++ b/drivers/raw/ifpga/afu_pmd_he_mem.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_HE_MEM_H
 #define AFU_PMD_HE_MEM_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define HE_MEM_TG_UUID_L  0xa3dc5b831f5cecbb
 #define HE_MEM_TG_UUID_H  0x4dadea342c7848cb
 
diff --git a/drivers/raw/ifpga/afu_pmd_n3000.h b/drivers/raw/ifpga/afu_pmd_n3000.h
index 403cc64b91..f6b6e07c6b 100644
--- a/drivers/raw/ifpga/afu_pmd_n3000.h
+++ b/drivers/raw/ifpga/afu_pmd_n3000.h
@@ -5,13 +5,13 @@
 #ifndef AFU_PMD_N3000_H
 #define AFU_PMD_N3000_H
 
+#include "afu_pmd_core.h"
+#include "rte_pmd_afu.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "afu_pmd_core.h"
-#include "rte_pmd_afu.h"
-
 #define N3000_AFU_UUID_L  0xc000c9660d824272
 #define N3000_AFU_UUID_H  0x9aeffe5f84570612
 #define N3000_NLB0_UUID_L 0xf89e433683f9040b
diff --git a/drivers/raw/ifpga/rte_pmd_afu.h b/drivers/raw/ifpga/rte_pmd_afu.h
index 5403ed25f5..0edacc3a9c 100644
--- a/drivers/raw/ifpga/rte_pmd_afu.h
+++ b/drivers/raw/ifpga/rte_pmd_afu.h
@@ -14,12 +14,12 @@
  *
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 #define RTE_PMD_AFU_N3000_NLB   1
 #define RTE_PMD_AFU_N3000_DMA   2
 
diff --git a/drivers/raw/ifpga/rte_pmd_ifpga.h b/drivers/raw/ifpga/rte_pmd_ifpga.h
index 791543f2cd..36b7f9c018 100644
--- a/drivers/raw/ifpga/rte_pmd_ifpga.h
+++ b/drivers/raw/ifpga/rte_pmd_ifpga.h
@@ -14,12 +14,12 @@
  *
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 #define IFPGA_MAX_PORT_NUM   4
 
 /**
diff --git a/examples/ethtool/lib/rte_ethtool.h b/examples/ethtool/lib/rte_ethtool.h
index d27e0102b1..c7dd3d9755 100644
--- a/examples/ethtool/lib/rte_ethtool.h
+++ b/examples/ethtool/lib/rte_ethtool.h
@@ -30,14 +30,14 @@
  * rte_ethtool_net_set_rx_mode      net_device_ops::ndo_set_rx_mode
  *
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_ethdev.h>
 #include <linux/ethtool.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Retrieve the Ethernet device driver information according to
  * attributes described by ethtool data structure, ethtool_drvinfo.
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 04e77a4a10..ea66df0434 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -5,12 +5,12 @@
 #ifndef _MAIN_H_
 #define _MAIN_H_
 
+#include <rte_sched.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_sched.h>
-
 #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
 
 /*
diff --git a/examples/vm_power_manager/channel_manager.h b/examples/vm_power_manager/channel_manager.h
index eb989b20ad..6f70539815 100644
--- a/examples/vm_power_manager/channel_manager.h
+++ b/examples/vm_power_manager/channel_manager.h
@@ -5,16 +5,16 @@
 #ifndef CHANNEL_MANAGER_H_
 #define CHANNEL_MANAGER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <linux/limits.h>
 #include <linux/un.h>
 #include <stdbool.h>
 
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Maximum name length including '\0' terminator */
 #define CHANNEL_MGR_MAX_NAME_LEN    64
 
diff --git a/lib/acl/rte_acl_osdep.h b/lib/acl/rte_acl_osdep.h
index 3c1dc402ca..b2c262dee7 100644
--- a/lib/acl/rte_acl_osdep.h
+++ b/lib/acl/rte_acl_osdep.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_ACL_OSDEP_H_
 #define _RTE_ACL_OSDEP_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -49,8 +45,4 @@ extern "C" {
 #include <rte_cpuflags.h>
 #include <rte_debug.h>
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_ACL_OSDEP_H_ */
diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h
index 0cbfdd1c95..9e83dd2bb0 100644
--- a/lib/bbdev/rte_bbdev.h
+++ b/lib/bbdev/rte_bbdev.h
@@ -20,10 +20,6 @@
  * from the same queue.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdbool.h>
 
@@ -32,6 +28,10 @@ extern "C" {
 
 #include "rte_bbdev_op.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_BBDEV_MAX_DEVS
 #define RTE_BBDEV_MAX_DEVS 128  /**< Max number of devices */
 #endif
diff --git a/lib/bbdev/rte_bbdev_op.h b/lib/bbdev/rte_bbdev_op.h
index 459631d0d0..6f4bae7d0f 100644
--- a/lib/bbdev/rte_bbdev_op.h
+++ b/lib/bbdev/rte_bbdev_op.h
@@ -11,10 +11,6 @@
  * Defines wireless base band layer 1 operations and capabilities
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -23,6 +19,10 @@ extern "C" {
 #include <rte_memory.h>
 #include <rte_mempool.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Number of columns in sub-block interleaver (36.212, section 5.1.4.1.1) */
 #define RTE_BBDEV_TURBO_C_SUBBLOCK (32)
 /* Maximum size of Transport Block (36.213, Table, Table 7.1.7.2.5-1) */
diff --git a/lib/bbdev/rte_bbdev_pmd.h b/lib/bbdev/rte_bbdev_pmd.h
index 442b23943d..0a1738fc05 100644
--- a/lib/bbdev/rte_bbdev_pmd.h
+++ b/lib/bbdev/rte_bbdev_pmd.h
@@ -14,15 +14,15 @@
  * bbdev interface. User applications should not use this API.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_log.h>
 
 #include "rte_bbdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Suggested value for SW based devices */
 #define RTE_BBDEV_DEFAULT_MAX_NB_QUEUES RTE_MAX_LCORE
 
diff --git a/lib/bpf/bpf_def.h b/lib/bpf/bpf_def.h
index f08cd9106b..1593a29296 100644
--- a/lib/bpf/bpf_def.h
+++ b/lib/bpf/bpf_def.h
@@ -7,10 +7,6 @@
 #ifndef _RTE_BPF_DEF_H_
 #define _RTE_BPF_DEF_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -25,7 +21,6 @@ extern "C" {
 
 #include <stdint.h>
 
-
 /*
  * The instruction encodings.
  */
@@ -144,8 +139,4 @@ struct ebpf_insn {
  */
 #define	EBPF_FUNC_MAX_ARGS	(EBPF_REG_6 - EBPF_REG_1)
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_BPF_DEF_H_ */
diff --git a/lib/compressdev/rte_comp.h b/lib/compressdev/rte_comp.h
index 830a240b6b..d66a4b1cb9 100644
--- a/lib/compressdev/rte_comp.h
+++ b/lib/compressdev/rte_comp.h
@@ -11,12 +11,12 @@
  * RTE definitions for Data Compression Service
  */
 
+#include <rte_mbuf.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_mbuf.h>
-
 /**
  * compression service feature flags
  *
diff --git a/lib/compressdev/rte_compressdev.h b/lib/compressdev/rte_compressdev.h
index e0294a18bd..b3392553a6 100644
--- a/lib/compressdev/rte_compressdev.h
+++ b/lib/compressdev/rte_compressdev.h
@@ -13,13 +13,13 @@
  * Defines comp device APIs for the provisioning of compression operations.
  */
 
+
+#include "rte_comp.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-
-#include "rte_comp.h"
-
 /**
  * Parameter log base 2 range description.
  * Final value will be 2^value.
diff --git a/lib/compressdev/rte_compressdev_internal.h b/lib/compressdev/rte_compressdev_internal.h
index 67f8b51a37..a980d74cbf 100644
--- a/lib/compressdev/rte_compressdev_internal.h
+++ b/lib/compressdev/rte_compressdev_internal.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_COMPRESSDEV_INTERNAL_H_
 #define _RTE_COMPRESSDEV_INTERNAL_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /* rte_compressdev_internal.h
  * This file holds Compressdev private data structures.
  */
@@ -16,6 +12,10 @@ extern "C" {
 
 #include "rte_comp.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_COMPRESSDEV_NAME_MAX_LEN	(64)
 /**< Max length of name of comp PMD */
 
diff --git a/lib/compressdev/rte_compressdev_pmd.h b/lib/compressdev/rte_compressdev_pmd.h
index 32e29c9d16..ea721f014d 100644
--- a/lib/compressdev/rte_compressdev_pmd.h
+++ b/lib/compressdev/rte_compressdev_pmd.h
@@ -13,10 +13,6 @@
  * them directly.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <dev_driver.h>
@@ -24,6 +20,10 @@ extern "C" {
 #include "rte_compressdev.h"
 #include "rte_compressdev_internal.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_COMPRESSDEV_PMD_NAME_ARG			("name")
 #define RTE_COMPRESSDEV_PMD_SOCKET_ID_ARG		("socket_id")
 
diff --git a/lib/cryptodev/cryptodev_pmd.h b/lib/cryptodev/cryptodev_pmd.h
index 6c114f7181..3e2e2673b8 100644
--- a/lib/cryptodev/cryptodev_pmd.h
+++ b/lib/cryptodev/cryptodev_pmd.h
@@ -5,10 +5,6 @@
 #ifndef _CRYPTODEV_PMD_H_
 #define _CRYPTODEV_PMD_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Crypto PMD APIs
  *
@@ -28,6 +24,10 @@ extern "C" {
 #include "rte_crypto.h"
 #include "rte_cryptodev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 
 #define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_QUEUE_PAIRS	8
 
diff --git a/lib/cryptodev/cryptodev_trace.h b/lib/cryptodev/cryptodev_trace.h
index 935f0d564b..e186f0f3c1 100644
--- a/lib/cryptodev/cryptodev_trace.h
+++ b/lib/cryptodev/cryptodev_trace.h
@@ -11,14 +11,14 @@
  * API for cryptodev trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_trace_point.h>
 
 #include "rte_cryptodev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_cryptodev_trace_configure,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id,
diff --git a/lib/cryptodev/rte_crypto.h b/lib/cryptodev/rte_crypto.h
index dbc2700da5..dcf4a36fb2 100644
--- a/lib/cryptodev/rte_crypto.h
+++ b/lib/cryptodev/rte_crypto.h
@@ -11,10 +11,6 @@
  * RTE Cryptography Common Definitions
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 
 #include <rte_mbuf.h>
 #include <rte_memory.h>
@@ -24,6 +20,10 @@ extern "C" {
 #include "rte_crypto_sym.h"
 #include "rte_crypto_asym.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Crypto operation types */
 enum rte_crypto_op_type {
 	RTE_CRYPTO_OP_TYPE_UNDEFINED,
diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h
index 39d3da3952..6b82eec810 100644
--- a/lib/cryptodev/rte_crypto_asym.h
+++ b/lib/cryptodev/rte_crypto_asym.h
@@ -14,10 +14,6 @@
  * asymmetric crypto operations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 #include <stdint.h>
 
@@ -728,8 +724,4 @@ struct rte_crypto_asym_op {
 	 */
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_CRYPTO_ASYM_H_ */
diff --git a/lib/cryptodev/rte_crypto_sym.h b/lib/cryptodev/rte_crypto_sym.h
index 53b18b9412..fb73024010 100644
--- a/lib/cryptodev/rte_crypto_sym.h
+++ b/lib/cryptodev/rte_crypto_sym.h
@@ -14,10 +14,6 @@
  * as supported symmetric crypto operation combinations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <rte_compat.h>
@@ -26,6 +22,10 @@ extern "C" {
 #include <rte_mempool.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Crypto IO Vector (in analogy with struct iovec)
  * Supposed be used to pass input/output data buffers for crypto data-path
diff --git a/lib/cryptodev/rte_cryptodev.h b/lib/cryptodev/rte_cryptodev.h
index bec947f6d5..8051c5a6a3 100644
--- a/lib/cryptodev/rte_cryptodev.h
+++ b/lib/cryptodev/rte_cryptodev.h
@@ -14,10 +14,6 @@
  * authentication operations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include "rte_kvargs.h"
 #include "rte_crypto.h"
@@ -1859,6 +1855,10 @@ int rte_cryptodev_remove_deq_callback(uint8_t dev_id,
 				      struct rte_cryptodev_cb *cb);
 
 #include <rte_cryptodev_core.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 /**
  *
  * Dequeue a burst of processed crypto operations from a queue on the crypto
diff --git a/lib/cryptodev/rte_cryptodev_trace_fp.h b/lib/cryptodev/rte_cryptodev_trace_fp.h
index dbfbc7b2e5..f23f882804 100644
--- a/lib/cryptodev/rte_cryptodev_trace_fp.h
+++ b/lib/cryptodev/rte_cryptodev_trace_fp.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_CRYPTODEV_TRACE_FP_H_
 #define _RTE_CRYPTODEV_TRACE_FP_H_
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_cryptodev_trace_enqueue_burst,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id, uint16_t qp_id, void **ops,
diff --git a/lib/dispatcher/rte_dispatcher.h b/lib/dispatcher/rte_dispatcher.h
index d8182d5f2c..ba2c353073 100644
--- a/lib/dispatcher/rte_dispatcher.h
+++ b/lib/dispatcher/rte_dispatcher.h
@@ -19,16 +19,16 @@
  * event device.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdint.h>
 
 #include <rte_compat.h>
 #include <rte_eventdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Function prototype for match callbacks.
  *
diff --git a/lib/dmadev/rte_dmadev.h b/lib/dmadev/rte_dmadev.h
index 5474a5281d..d174d325a1 100644
--- a/lib/dmadev/rte_dmadev.h
+++ b/lib/dmadev/rte_dmadev.h
@@ -772,9 +772,17 @@ struct rte_dma_sge {
 	uint32_t length; /**< The DMA operation length. */
 };
 
+#ifdef __cplusplus
+}
+#endif
+
 #include "rte_dmadev_core.h"
 #include "rte_dmadev_trace_fp.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**@{@name DMA operation flag
  * @see rte_dma_copy()
  * @see rte_dma_copy_sg()
diff --git a/lib/eal/arm/include/rte_atomic_32.h b/lib/eal/arm/include/rte_atomic_32.h
index 62fc33773d..0b9a0dfa30 100644
--- a/lib/eal/arm/include/rte_atomic_32.h
+++ b/lib/eal/arm/include/rte_atomic_32.h
@@ -9,12 +9,12 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include "generic/rte_atomic.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_atomic.h"
-
 #define	rte_mb()  __sync_synchronize()
 
 #define	rte_wmb() do { asm volatile ("dmb st" : : : "memory"); } while (0)
diff --git a/lib/eal/arm/include/rte_atomic_64.h b/lib/eal/arm/include/rte_atomic_64.h
index 7c99fc0a02..181bb60929 100644
--- a/lib/eal/arm/include/rte_atomic_64.h
+++ b/lib/eal/arm/include/rte_atomic_64.h
@@ -10,14 +10,14 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_atomic.h"
 #include <rte_branch_prediction.h>
 #include <rte_debug.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define rte_mb() asm volatile("dmb osh" : : : "memory")
 
 #define rte_wmb() asm volatile("dmb oshst" : : : "memory")
diff --git a/lib/eal/arm/include/rte_byteorder.h b/lib/eal/arm/include/rte_byteorder.h
index ff02052f2e..a0aaff4a28 100644
--- a/lib/eal/arm/include/rte_byteorder.h
+++ b/lib/eal/arm/include/rte_byteorder.h
@@ -9,14 +9,14 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include "generic/rte_byteorder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* ARM architecture is bi-endian (both big and little). */
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 
diff --git a/lib/eal/arm/include/rte_cpuflags_32.h b/lib/eal/arm/include/rte_cpuflags_32.h
index 770b09b99d..b166d9d877 100644
--- a/lib/eal/arm/include/rte_cpuflags_32.h
+++ b/lib/eal/arm/include/rte_cpuflags_32.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_CPUFLAGS_ARM32_H_
 #define _RTE_CPUFLAGS_ARM32_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -46,8 +42,4 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_CPUFLAGS_ARM32_H_ */
diff --git a/lib/eal/arm/include/rte_cpuflags_64.h b/lib/eal/arm/include/rte_cpuflags_64.h
index afe70209c3..2fa2e82e45 100644
--- a/lib/eal/arm/include/rte_cpuflags_64.h
+++ b/lib/eal/arm/include/rte_cpuflags_64.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_CPUFLAGS_ARM64_H_
 #define _RTE_CPUFLAGS_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -40,8 +36,4 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_CPUFLAGS_ARM64_H_ */
diff --git a/lib/eal/arm/include/rte_cycles_32.h b/lib/eal/arm/include/rte_cycles_32.h
index 859cd2e5bb..2b20c8c6f5 100644
--- a/lib/eal/arm/include/rte_cycles_32.h
+++ b/lib/eal/arm/include/rte_cycles_32.h
@@ -15,12 +15,12 @@
 
 #include <time.h>
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 /**
  * Read the time base register.
  *
diff --git a/lib/eal/arm/include/rte_cycles_64.h b/lib/eal/arm/include/rte_cycles_64.h
index 8b05302f47..bb76e4d7e0 100644
--- a/lib/eal/arm/include/rte_cycles_64.h
+++ b/lib/eal/arm/include/rte_cycles_64.h
@@ -6,12 +6,12 @@
 #ifndef _RTE_CYCLES_ARM64_H_
 #define _RTE_CYCLES_ARM64_H_
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 /** Read generic counter frequency */
 static __rte_always_inline uint64_t
 __rte_arm64_cntfrq(void)
diff --git a/lib/eal/arm/include/rte_io.h b/lib/eal/arm/include/rte_io.h
index f4e66e6bad..781774dd72 100644
--- a/lib/eal/arm/include/rte_io.h
+++ b/lib/eal/arm/include/rte_io.h
@@ -5,18 +5,10 @@
 #ifndef _RTE_IO_ARM_H_
 #define _RTE_IO_ARM_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_ARCH_64
 #include "rte_io_64.h"
 #else
 #include "generic/rte_io.h"
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_IO_ARM_H_ */
diff --git a/lib/eal/arm/include/rte_io_64.h b/lib/eal/arm/include/rte_io_64.h
index 96da7789ce..88db82a7eb 100644
--- a/lib/eal/arm/include/rte_io_64.h
+++ b/lib/eal/arm/include/rte_io_64.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_IO_ARM64_H_
 #define _RTE_IO_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #define RTE_OVERRIDE_IO_H
@@ -17,6 +13,10 @@ extern "C" {
 #include <rte_compat.h>
 #include "rte_atomic_64.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static __rte_always_inline uint8_t
 rte_read8_relaxed(const volatile void *addr)
 {
diff --git a/lib/eal/arm/include/rte_memcpy_32.h b/lib/eal/arm/include/rte_memcpy_32.h
index fb3245b59c..99fd5757ca 100644
--- a/lib/eal/arm/include/rte_memcpy_32.h
+++ b/lib/eal/arm/include/rte_memcpy_32.h
@@ -8,10 +8,6 @@
 #include <stdint.h>
 #include <string.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_memcpy.h"
 
 #ifdef RTE_ARCH_ARM_NEON_MEMCPY
@@ -23,6 +19,10 @@ extern "C" {
 /* ARM NEON Intrinsics are used to copy data */
 #include <arm_neon.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
diff --git a/lib/eal/arm/include/rte_memcpy_64.h b/lib/eal/arm/include/rte_memcpy_64.h
index 85ad587bd3..90039039be 100644
--- a/lib/eal/arm/include/rte_memcpy_64.h
+++ b/lib/eal/arm/include/rte_memcpy_64.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_MEMCPY_ARM64_H_
 #define _RTE_MEMCPY_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <string.h>
 
@@ -18,6 +14,10 @@ extern "C" {
 #include <rte_common.h>
 #include <rte_branch_prediction.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * The memory copy performance differs on different AArch64 micro-architectures.
  * And the most recent glibc (e.g. 2.23 or later) can provide a better memcpy()
@@ -324,7 +324,16 @@ void *rte_memcpy(void *dst, const void *src, size_t n)
 }
 #endif /* RTE_CACHE_LINE_SIZE >= 128 */
 
-#else
+#ifdef __cplusplus
+}
+#endif
+
+#else /* RTE_ARCH_ARM64_MEMCPY */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
@@ -363,10 +372,10 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
 
 #define rte_memcpy(d, s, n)	memcpy((d), (s), (n))
 
-#endif /* RTE_ARCH_ARM64_MEMCPY */
-
 #ifdef __cplusplus
 }
 #endif
 
+#endif /* RTE_ARCH_ARM64_MEMCPY */
+
 #endif /* _RTE_MEMCPY_ARM_64_H_ */
diff --git a/lib/eal/arm/include/rte_pause.h b/lib/eal/arm/include/rte_pause.h
index 6c7002ad98..9c793c8803 100644
--- a/lib/eal/arm/include/rte_pause.h
+++ b/lib/eal/arm/include/rte_pause.h
@@ -5,18 +5,10 @@
 #ifndef _RTE_PAUSE_ARM_H_
 #define _RTE_PAUSE_ARM_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_ARCH_64
 #include <rte_pause_64.h>
 #else
 #include <rte_pause_32.h>
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_PAUSE_ARM_H_ */
diff --git a/lib/eal/arm/include/rte_pause_32.h b/lib/eal/arm/include/rte_pause_32.h
index d4768c7a98..7870fac763 100644
--- a/lib/eal/arm/include/rte_pause_32.h
+++ b/lib/eal/arm/include/rte_pause_32.h
@@ -5,13 +5,13 @@
 #ifndef _RTE_PAUSE_ARM32_H_
 #define _RTE_PAUSE_ARM32_H_
 
+#include <rte_common.h>
+#include "generic/rte_pause.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_pause.h"
-
 static inline void rte_pause(void)
 {
 }
diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index 9e2dbf3531..1526bf87cc 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -6,10 +6,6 @@
 #ifndef _RTE_PAUSE_ARM64_H_
 #define _RTE_PAUSE_ARM64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_stdatomic.h>
 
@@ -19,6 +15,10 @@ extern "C" {
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 	asm volatile("yield" ::: "memory");
diff --git a/lib/eal/arm/include/rte_power_intrinsics.h b/lib/eal/arm/include/rte_power_intrinsics.h
index 9e498e9ebf..2676db3c29 100644
--- a/lib/eal/arm/include/rte_power_intrinsics.h
+++ b/lib/eal/arm/include/rte_power_intrinsics.h
@@ -5,16 +5,8 @@
 #ifndef _RTE_POWER_INTRINSIC_ARM_H_
 #define _RTE_POWER_INTRINSIC_ARM_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_POWER_INTRINSIC_ARM_H_ */
diff --git a/lib/eal/arm/include/rte_prefetch_32.h b/lib/eal/arm/include/rte_prefetch_32.h
index 0e9a140c8a..619bf27c79 100644
--- a/lib/eal/arm/include/rte_prefetch_32.h
+++ b/lib/eal/arm/include/rte_prefetch_32.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_PREFETCH_ARM32_H_
 #define _RTE_PREFETCH_ARM32_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	asm volatile ("pld [%0]" : : "r" (p));
diff --git a/lib/eal/arm/include/rte_prefetch_64.h b/lib/eal/arm/include/rte_prefetch_64.h
index 22cba48e29..4f60123b8b 100644
--- a/lib/eal/arm/include/rte_prefetch_64.h
+++ b/lib/eal/arm/include/rte_prefetch_64.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_PREFETCH_ARM_64_H_
 #define _RTE_PREFETCH_ARM_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	asm volatile ("PRFM PLDL1KEEP, [%0]" : : "r" (p));
diff --git a/lib/eal/arm/include/rte_rwlock.h b/lib/eal/arm/include/rte_rwlock.h
index 18bb37b036..727cabafec 100644
--- a/lib/eal/arm/include/rte_rwlock.h
+++ b/lib/eal/arm/include/rte_rwlock.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_RWLOCK_ARM_H_
 #define _RTE_RWLOCK_ARM_H_
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/arm/include/rte_spinlock.h b/lib/eal/arm/include/rte_spinlock.h
index a973763c23..a5d01b0d21 100644
--- a/lib/eal/arm/include/rte_spinlock.h
+++ b/lib/eal/arm/include/rte_spinlock.h
@@ -9,13 +9,13 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_spinlock.h"
-
 static inline int rte_tm_supported(void)
 {
 	return 0;
diff --git a/lib/eal/freebsd/include/rte_os.h b/lib/eal/freebsd/include/rte_os.h
index 003468caff..62e70dc15b 100644
--- a/lib/eal/freebsd/include/rte_os.h
+++ b/lib/eal/freebsd/include/rte_os.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_OS_H_
 #define _RTE_OS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * This header should contain any definition
  * which is not supported natively or named differently in FreeBSD.
@@ -78,8 +74,4 @@ typedef cpuset_t rte_cpuset_t;
 
 #endif /* RTE_EAL_FREEBSD_CPUSET_LEGACY */
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_OS_H_ */
diff --git a/lib/eal/include/bus_driver.h b/lib/eal/include/bus_driver.h
index 7b85a17a09..60527b75b6 100644
--- a/lib/eal/include/bus_driver.h
+++ b/lib/eal/include/bus_driver.h
@@ -5,16 +5,16 @@
 #ifndef BUS_DRIVER_H
 #define BUS_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bus.h>
 #include <rte_compat.h>
 #include <rte_dev.h>
 #include <rte_eal.h>
 #include <rte_tailq.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_devargs;
 struct rte_device;
 
diff --git a/lib/eal/include/dev_driver.h b/lib/eal/include/dev_driver.h
index 5efa8c437e..c07d83a43a 100644
--- a/lib/eal/include/dev_driver.h
+++ b/lib/eal/include/dev_driver.h
@@ -5,10 +5,6 @@
 #ifndef DEV_DRIVER_H
 #define DEV_DRIVER_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_dev.h>
 
@@ -34,8 +30,4 @@ struct rte_device {
 	struct rte_devargs *devargs;  /**< Arguments for latest probing */
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* DEV_DRIVER_H */
diff --git a/lib/eal/include/eal_trace_internal.h b/lib/eal/include/eal_trace_internal.h
index 09c354717f..50f91d0929 100644
--- a/lib/eal/include/eal_trace_internal.h
+++ b/lib/eal/include/eal_trace_internal.h
@@ -11,16 +11,16 @@
  * API for EAL trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_alarm.h>
 #include <rte_interrupts.h>
 #include <rte_trace_point.h>
 
 #include "eal_interrupts.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Alarm */
 RTE_TRACE_POINT(
 	rte_eal_trace_alarm_set,
diff --git a/lib/eal/include/generic/rte_atomic.h b/lib/eal/include/generic/rte_atomic.h
index f859707744..0a4f3f8528 100644
--- a/lib/eal/include/generic/rte_atomic.h
+++ b/lib/eal/include/generic/rte_atomic.h
@@ -17,6 +17,10 @@
 #include <rte_common.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __DOXYGEN__
 
 /** @name Memory Barrier
@@ -1156,4 +1160,8 @@ rte_atomic128_cmp_exchange(rte_int128_t *dst,
 
 #endif /* __DOXYGEN__ */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_ATOMIC_H_ */
diff --git a/lib/eal/include/generic/rte_byteorder.h b/lib/eal/include/generic/rte_byteorder.h
index f1c04ba83e..7973d6326f 100644
--- a/lib/eal/include/generic/rte_byteorder.h
+++ b/lib/eal/include/generic/rte_byteorder.h
@@ -24,6 +24,10 @@
 #include <rte_common.h>
 #include <rte_config.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Compile-time endianness detection
  */
@@ -251,4 +255,8 @@ static uint64_t rte_be_to_cpu_64(rte_be64_t x);
 #endif
 #endif
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_BYTEORDER_H_ */
diff --git a/lib/eal/include/generic/rte_cpuflags.h b/lib/eal/include/generic/rte_cpuflags.h
index d35551e931..bfe9df4516 100644
--- a/lib/eal/include/generic/rte_cpuflags.h
+++ b/lib/eal/include/generic/rte_cpuflags.h
@@ -15,6 +15,10 @@
 
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Structure used to describe platform-specific intrinsics that may or may not
  * be supported at runtime.
@@ -104,4 +108,8 @@ rte_cpu_getauxval(unsigned long type);
 int
 rte_cpu_strcmp_auxval(unsigned long type, const char *str);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_CPUFLAGS_H_ */
diff --git a/lib/eal/include/generic/rte_cycles.h b/lib/eal/include/generic/rte_cycles.h
index 075e899f5a..7cfd51f0eb 100644
--- a/lib/eal/include/generic/rte_cycles.h
+++ b/lib/eal/include/generic/rte_cycles.h
@@ -16,6 +16,10 @@
 #include <rte_debug.h>
 #include <rte_atomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MS_PER_S 1000
 #define US_PER_S 1000000
 #define NS_PER_S 1000000000
@@ -175,4 +179,8 @@ void rte_delay_us_sleep(unsigned int us);
  */
 void rte_delay_us_callback_register(void(*userfunc)(unsigned int));
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_CYCLES_H_ */
diff --git a/lib/eal/include/generic/rte_io.h b/lib/eal/include/generic/rte_io.h
index ebcf8051e1..73b0f7a9f4 100644
--- a/lib/eal/include/generic/rte_io.h
+++ b/lib/eal/include/generic/rte_io.h
@@ -17,6 +17,10 @@
 #include <rte_compat.h>
 #include <rte_atomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __DOXYGEN__
 
 /**
@@ -396,4 +400,8 @@ rte_write32_wc_relaxed(uint32_t value, volatile void *addr)
 
 #endif /* RTE_OVERRIDE_IO_H */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_IO_H_ */
diff --git a/lib/eal/include/generic/rte_memcpy.h b/lib/eal/include/generic/rte_memcpy.h
index e7f0f8eaa9..da53b72ca8 100644
--- a/lib/eal/include/generic/rte_memcpy.h
+++ b/lib/eal/include/generic/rte_memcpy.h
@@ -5,6 +5,10 @@
 #ifndef _RTE_MEMCPY_H_
 #define _RTE_MEMCPY_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  *
@@ -113,4 +117,8 @@ rte_memcpy(void *dst, const void *src, size_t n);
 
 #endif /* __DOXYGEN__ */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_MEMCPY_H_ */
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index f2a1eadcbd..968c0886d3 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -19,6 +19,10 @@
 #include <rte_atomic.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Pause CPU execution for a short while
  *
@@ -136,4 +140,8 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 } while (0)
 #endif /* ! RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED */
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_PAUSE_H_ */
diff --git a/lib/eal/include/generic/rte_power_intrinsics.h b/lib/eal/include/generic/rte_power_intrinsics.h
index ea899f1bfa..86c0559468 100644
--- a/lib/eal/include/generic/rte_power_intrinsics.h
+++ b/lib/eal/include/generic/rte_power_intrinsics.h
@@ -9,6 +9,10 @@
 
 #include <rte_spinlock.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  * Advanced power management operations.
@@ -147,4 +151,8 @@ int rte_power_pause(const uint64_t tsc_timestamp);
 int rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
 		const uint32_t num, const uint64_t tsc_timestamp);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_POWER_INTRINSIC_H_ */
diff --git a/lib/eal/include/generic/rte_prefetch.h b/lib/eal/include/generic/rte_prefetch.h
index 773b3b8d1e..f7ac4ab48a 100644
--- a/lib/eal/include/generic/rte_prefetch.h
+++ b/lib/eal/include/generic/rte_prefetch.h
@@ -7,6 +7,10 @@
 
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  *
@@ -146,4 +150,8 @@ __rte_experimental
 static inline void
 rte_cldemote(const volatile void *p);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_PREFETCH_H_ */
diff --git a/lib/eal/include/generic/rte_rwlock.h b/lib/eal/include/generic/rte_rwlock.h
index 5f939be98c..ac0474466a 100644
--- a/lib/eal/include/generic/rte_rwlock.h
+++ b/lib/eal/include/generic/rte_rwlock.h
@@ -22,10 +22,6 @@
  *  https://locklessinc.com/articles/locks/
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <errno.h>
 
 #include <rte_branch_prediction.h>
@@ -34,6 +30,10 @@ extern "C" {
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_rwlock_t type.
  *
diff --git a/lib/eal/include/generic/rte_spinlock.h b/lib/eal/include/generic/rte_spinlock.h
index 23fb04896f..c2980601b2 100644
--- a/lib/eal/include/generic/rte_spinlock.h
+++ b/lib/eal/include/generic/rte_spinlock.h
@@ -25,6 +25,10 @@
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_spinlock_t type.
  */
@@ -318,4 +322,8 @@ __rte_warn_unused_result
 static inline int rte_spinlock_recursive_trylock_tm(
 	rte_spinlock_recursive_t *slr);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_SPINLOCK_H_ */
diff --git a/lib/eal/include/generic/rte_vect.h b/lib/eal/include/generic/rte_vect.h
index 1f84292a41..b87520a4d9 100644
--- a/lib/eal/include/generic/rte_vect.h
+++ b/lib/eal/include/generic/rte_vect.h
@@ -209,6 +209,10 @@ enum rte_vect_max_simd {
 	 */
 };
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Get the supported SIMD bitwidth.
  *
@@ -230,4 +234,8 @@ uint16_t rte_vect_get_max_simd_bitwidth(void);
  */
 int rte_vect_set_max_simd_bitwidth(uint16_t bitwidth);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* _RTE_VECT_H_ */
diff --git a/lib/eal/include/rte_alarm.h b/lib/eal/include/rte_alarm.h
index 7e4d0b2407..9b4721b77f 100644
--- a/lib/eal/include/rte_alarm.h
+++ b/lib/eal/include/rte_alarm.h
@@ -14,12 +14,12 @@
  * Does not require hpet support.
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /**
  * Signature of callback back function called when an alarm goes off.
  */
diff --git a/lib/eal/include/rte_bitmap.h b/lib/eal/include/rte_bitmap.h
index ebe46000a0..abb102f1d3 100644
--- a/lib/eal/include/rte_bitmap.h
+++ b/lib/eal/include/rte_bitmap.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_BITMAP_H__
 #define __INCLUDE_RTE_BITMAP_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Bitmap
@@ -43,6 +39,10 @@ extern "C" {
 #include <rte_branch_prediction.h>
 #include <rte_prefetch.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Slab */
 #define RTE_BITMAP_SLAB_BIT_SIZE                 64
 #define RTE_BITMAP_SLAB_BIT_SIZE_LOG2            6
diff --git a/lib/eal/include/rte_branch_prediction.h b/lib/eal/include/rte_branch_prediction.h
index c0356ca080..dd47c13ddc 100644
--- a/lib/eal/include/rte_branch_prediction.h
+++ b/lib/eal/include/rte_branch_prediction.h
@@ -10,10 +10,6 @@
 #ifndef _RTE_BRANCH_PREDICTION_H_
 #define _RTE_BRANCH_PREDICTION_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Check if a branch is likely to be taken.
  *
@@ -48,8 +44,4 @@ extern "C" {
 #endif
 #endif /* unlikely */
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_BRANCH_PREDICTION_H_ */
diff --git a/lib/eal/include/rte_bus.h b/lib/eal/include/rte_bus.h
index dfe756fb11..519f7b35f0 100644
--- a/lib/eal/include/rte_bus.h
+++ b/lib/eal/include/rte_bus.h
@@ -14,14 +14,14 @@
  * over the devices and drivers in EAL.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_eal.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_bus;
 struct rte_device;
 
diff --git a/lib/eal/include/rte_class.h b/lib/eal/include/rte_class.h
index 16e544ec9a..7631e36e82 100644
--- a/lib/eal/include/rte_class.h
+++ b/lib/eal/include/rte_class.h
@@ -18,12 +18,12 @@
  * cryptographic co-processor (crypto), etc.
  */
 
+#include <rte_dev.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_dev.h>
-
 /** Double linked list of classes */
 RTE_TAILQ_HEAD(rte_class_list, rte_class);
 
diff --git a/lib/eal/include/rte_common.h b/lib/eal/include/rte_common.h
index eec0400dad..2486caa471 100644
--- a/lib/eal/include/rte_common.h
+++ b/lib/eal/include/rte_common.h
@@ -12,10 +12,6 @@
  * for DPDK.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <assert.h>
 #include <limits.h>
 #include <stdint.h>
@@ -26,6 +22,10 @@ extern "C" {
 /* OS specific include */
 #include <rte_os.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_TOOLCHAIN_MSVC
 #ifndef typeof
 #define typeof __typeof__
diff --git a/lib/eal/include/rte_compat.h b/lib/eal/include/rte_compat.h
index 716bc03616..97c1540bd0 100644
--- a/lib/eal/include/rte_compat.h
+++ b/lib/eal/include/rte_compat.h
@@ -6,10 +6,6 @@
 #ifndef _RTE_COMPAT_H_
 #define _RTE_COMPAT_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifndef ALLOW_EXPERIMENTAL_API
 
 #ifdef RTE_TOOLCHAIN_MSVC
@@ -70,8 +66,4 @@ __attribute__((section(".text.internal")))
 
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_COMPAT_H_ */
diff --git a/lib/eal/include/rte_dev.h b/lib/eal/include/rte_dev.h
index cefa04f905..738400e8d1 100644
--- a/lib/eal/include/rte_dev.h
+++ b/lib/eal/include/rte_dev.h
@@ -13,16 +13,16 @@
  * This file manages the list of device drivers.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_config.h>
 #include <rte_common.h>
 #include <rte_log.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_bus;
 struct rte_devargs;
 struct rte_device;
diff --git a/lib/eal/include/rte_devargs.h b/lib/eal/include/rte_devargs.h
index 515e978bbe..ed5a4675d9 100644
--- a/lib/eal/include/rte_devargs.h
+++ b/lib/eal/include/rte_devargs.h
@@ -16,14 +16,14 @@
  * list of rte_devargs structures.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_dev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_bus;
 
 /**
diff --git a/lib/eal/include/rte_eal_trace.h b/lib/eal/include/rte_eal_trace.h
index c3d15bbe5e..9ad2112801 100644
--- a/lib/eal/include/rte_eal_trace.h
+++ b/lib/eal/include/rte_eal_trace.h
@@ -11,12 +11,12 @@
  * API for EAL trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 /* Generic */
 RTE_TRACE_POINT(
 	rte_eal_trace_generic_void,
diff --git a/lib/eal/include/rte_errno.h b/lib/eal/include/rte_errno.h
index ba45591d24..c49818a40e 100644
--- a/lib/eal/include/rte_errno.h
+++ b/lib/eal/include/rte_errno.h
@@ -11,12 +11,12 @@
 #ifndef _RTE_ERRNO_H_
 #define _RTE_ERRNO_H_
 
+#include <rte_per_lcore.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_per_lcore.h>
-
 RTE_DECLARE_PER_LCORE(int, _rte_errno); /**< Per core error number. */
 
 /**
diff --git a/lib/eal/include/rte_fbarray.h b/lib/eal/include/rte_fbarray.h
index e33076778f..27dbfc2d6c 100644
--- a/lib/eal/include/rte_fbarray.h
+++ b/lib/eal/include/rte_fbarray.h
@@ -30,14 +30,14 @@
  * another process is using ``rte_fbarray``.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 
 #include <rte_rwlock.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_FBARRAY_NAME_LEN 64
 
 struct rte_fbarray {
diff --git a/lib/eal/include/rte_keepalive.h b/lib/eal/include/rte_keepalive.h
index 3ec413da01..9ff870f6b4 100644
--- a/lib/eal/include/rte_keepalive.h
+++ b/lib/eal/include/rte_keepalive.h
@@ -10,13 +10,13 @@
 #ifndef _KEEPALIVE_H_
 #define _KEEPALIVE_H_
 
+#include <rte_config.h>
+#include <rte_memory.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_config.h>
-#include <rte_memory.h>
-
 #ifndef RTE_KEEPALIVE_MAXCORES
 /**
  * Number of cores to track.
diff --git a/lib/eal/include/rte_mcslock.h b/lib/eal/include/rte_mcslock.h
index 0aeb1a09f4..bb218d2e50 100644
--- a/lib/eal/include/rte_mcslock.h
+++ b/lib/eal/include/rte_mcslock.h
@@ -19,16 +19,16 @@
  * they acquired the lock.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_lcore.h>
 #include <rte_common.h>
 #include <rte_pause.h>
 #include <rte_branch_prediction.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_mcslock_t type.
  */
diff --git a/lib/eal/include/rte_memory.h b/lib/eal/include/rte_memory.h
index 842362d527..dbd0a6bedc 100644
--- a/lib/eal/include/rte_memory.h
+++ b/lib/eal/include/rte_memory.h
@@ -15,16 +15,16 @@
 #include <stddef.h>
 #include <stdio.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bitops.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include <rte_eal_memconfig.h>
 #include <rte_fbarray.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_PGSIZE_4K   (1ULL << 12)
 #define RTE_PGSIZE_64K  (1ULL << 16)
 #define RTE_PGSIZE_256K (1ULL << 18)
diff --git a/lib/eal/include/rte_pci_dev_feature_defs.h b/lib/eal/include/rte_pci_dev_feature_defs.h
index c5bb631286..e12c22081f 100644
--- a/lib/eal/include/rte_pci_dev_feature_defs.h
+++ b/lib/eal/include/rte_pci_dev_feature_defs.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_PCI_DEV_DEFS_H_
 #define _RTE_PCI_DEV_DEFS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /* interrupt mode */
 enum rte_intr_mode {
 	RTE_INTR_MODE_NONE = 0,
@@ -17,8 +13,4 @@ enum rte_intr_mode {
 	RTE_INTR_MODE_MSIX
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_PCI_DEV_DEFS_H_ */
diff --git a/lib/eal/include/rte_pci_dev_features.h b/lib/eal/include/rte_pci_dev_features.h
index ee6e10590c..6104123d27 100644
--- a/lib/eal/include/rte_pci_dev_features.h
+++ b/lib/eal/include/rte_pci_dev_features.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_PCI_DEV_FEATURES_H
 #define _RTE_PCI_DEV_FEATURES_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_pci_dev_feature_defs.h>
 
 #define RTE_INTR_MODE_NONE_NAME "none"
@@ -16,8 +12,4 @@ extern "C" {
 #define RTE_INTR_MODE_MSI_NAME "msi"
 #define RTE_INTR_MODE_MSIX_NAME "msix"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/eal/include/rte_per_lcore.h b/lib/eal/include/rte_per_lcore.h
index 529995ee17..68a485bd6b 100644
--- a/lib/eal/include/rte_per_lcore.h
+++ b/lib/eal/include/rte_per_lcore.h
@@ -18,10 +18,6 @@
  * Parts of this are execution environment specific.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_TOOLCHAIN_MSVC
 #define RTE_DEFINE_PER_LCORE(type, name)			\
 	__declspec(thread) type per_lcore_##name
@@ -49,8 +45,4 @@ extern "C" {
  */
 #define RTE_PER_LCORE(name) (per_lcore_##name)
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_PER_LCORE_H_ */
diff --git a/lib/eal/include/rte_pflock.h b/lib/eal/include/rte_pflock.h
index 37aa223ac3..6797ce5920 100644
--- a/lib/eal/include/rte_pflock.h
+++ b/lib/eal/include/rte_pflock.h
@@ -27,14 +27,14 @@
  * All locks must be initialised before use, and only initialised once.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_pflock_t type.
  */
diff --git a/lib/eal/include/rte_random.h b/lib/eal/include/rte_random.h
index 5031c6fe5f..15cbe6215a 100644
--- a/lib/eal/include/rte_random.h
+++ b/lib/eal/include/rte_random.h
@@ -11,12 +11,12 @@
  * Pseudo-random Generators in RTE
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /**
  * Seed the pseudo-random generator.
  *
diff --git a/lib/eal/include/rte_seqcount.h b/lib/eal/include/rte_seqcount.h
index 88a6746900..d71afa6ab7 100644
--- a/lib/eal/include/rte_seqcount.h
+++ b/lib/eal/include/rte_seqcount.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_SEQCOUNT_H_
 #define _RTE_SEQCOUNT_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Seqcount
@@ -27,6 +23,10 @@ extern "C" {
 #include <rte_branch_prediction.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The RTE seqcount type.
  */
diff --git a/lib/eal/include/rte_seqlock.h b/lib/eal/include/rte_seqlock.h
index 2677bd9440..e0e94900d1 100644
--- a/lib/eal/include/rte_seqlock.h
+++ b/lib/eal/include/rte_seqlock.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_SEQLOCK_H_
 #define _RTE_SEQLOCK_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Seqlock
@@ -95,6 +91,10 @@ extern "C" {
 #include <rte_seqcount.h>
 #include <rte_spinlock.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The RTE seqlock type.
  */
diff --git a/lib/eal/include/rte_service.h b/lib/eal/include/rte_service.h
index e49a7a877e..94919ae584 100644
--- a/lib/eal/include/rte_service.h
+++ b/lib/eal/include/rte_service.h
@@ -23,16 +23,16 @@
  * application has access to the remaining lcores as normal.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include<stdio.h>
 #include <stdint.h>
 
 #include <rte_config.h>
 #include <rte_lcore.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_SERVICE_NAME_MAX 32
 
 /* Capabilities of a service.
diff --git a/lib/eal/include/rte_service_component.h b/lib/eal/include/rte_service_component.h
index a5350c97e5..acdf45cf60 100644
--- a/lib/eal/include/rte_service_component.h
+++ b/lib/eal/include/rte_service_component.h
@@ -10,12 +10,12 @@
  * operate, and you wish to run the component using service cores
  */
 
+#include <rte_service.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_service.h>
-
 /**
  * Signature of callback function to run a service.
  *
diff --git a/lib/eal/include/rte_stdatomic.h b/lib/eal/include/rte_stdatomic.h
index 7a081cb500..0f11a15e4e 100644
--- a/lib/eal/include/rte_stdatomic.h
+++ b/lib/eal/include/rte_stdatomic.h
@@ -7,10 +7,6 @@
 
 #include <assert.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_ENABLE_STDATOMIC
 #ifndef _MSC_VER
 #ifdef __STDC_NO_ATOMICS__
@@ -188,6 +184,7 @@ typedef int rte_memory_order;
 #endif
 
 #ifdef __cplusplus
+extern "C" {
 }
 #endif
 
diff --git a/lib/eal/include/rte_string_fns.h b/lib/eal/include/rte_string_fns.h
index 13badec7b3..702bd81251 100644
--- a/lib/eal/include/rte_string_fns.h
+++ b/lib/eal/include/rte_string_fns.h
@@ -11,10 +11,6 @@
 #ifndef _RTE_STRING_FNS_H_
 #define _RTE_STRING_FNS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <ctype.h>
 #include <stdio.h>
 #include <string.h>
@@ -22,6 +18,10 @@ extern "C" {
 #include <rte_common.h>
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Takes string "string" parameter and splits it at character "delim"
  * up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like
@@ -77,6 +77,10 @@ rte_strlcat(char *dst, const char *src, size_t size)
 	return l + strlen(src);
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 /* pull in a strlcpy function */
 #ifdef RTE_EXEC_ENV_FREEBSD
 #ifndef __BSD_VISIBLE /* non-standard functions are hidden */
@@ -95,6 +99,10 @@ rte_strlcat(char *dst, const char *src, size_t size)
 #endif /* RTE_USE_LIBBSD */
 #endif /* FREEBSD */
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Copy string src to buffer dst of size dsize.
  * At most dsize-1 chars will be copied.
@@ -141,7 +149,6 @@ rte_str_skip_leading_spaces(const char *src)
 	return p;
 }
 
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/include/rte_tailq.h b/lib/eal/include/rte_tailq.h
index 931d549e59..89f7ef2134 100644
--- a/lib/eal/include/rte_tailq.h
+++ b/lib/eal/include/rte_tailq.h
@@ -10,13 +10,13 @@
  *  Here defines rte_tailq APIs for only internal use
  */
 
+#include <stdio.h>
+#include <rte_debug.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdio.h>
-#include <rte_debug.h>
-
 /** dummy structure type used by the rte_tailq APIs */
 struct rte_tailq_entry {
 	RTE_TAILQ_ENTRY(rte_tailq_entry) next; /**< Pointer entries for a tailq list */
diff --git a/lib/eal/include/rte_ticketlock.h b/lib/eal/include/rte_ticketlock.h
index 73884eb07b..e60f60699c 100644
--- a/lib/eal/include/rte_ticketlock.h
+++ b/lib/eal/include/rte_ticketlock.h
@@ -17,15 +17,15 @@
  * All locks must be initialised before use, and only initialised once.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_lcore.h>
 #include <rte_pause.h>
 #include <rte_stdatomic.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * The rte_ticketlock_t type.
  */
diff --git a/lib/eal/include/rte_time.h b/lib/eal/include/rte_time.h
index ec25f7b93d..c5c3a233e4 100644
--- a/lib/eal/include/rte_time.h
+++ b/lib/eal/include/rte_time.h
@@ -5,13 +5,13 @@
 #ifndef _RTE_TIME_H_
 #define _RTE_TIME_H_
 
+#include <stdint.h>
+#include <time.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include <time.h>
-
 #define NSEC_PER_SEC             1000000000L
 
 /**
diff --git a/lib/eal/include/rte_trace.h b/lib/eal/include/rte_trace.h
index a6e991fad3..1c824b2158 100644
--- a/lib/eal/include/rte_trace.h
+++ b/lib/eal/include/rte_trace.h
@@ -16,16 +16,16 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdio.h>
 
 #include <rte_common.h>
 #include <rte_compat.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  *  Test if trace is enabled.
  *
diff --git a/lib/eal/include/rte_trace_point.h b/lib/eal/include/rte_trace_point.h
index 41e2a7f99e..bc737d585e 100644
--- a/lib/eal/include/rte_trace_point.h
+++ b/lib/eal/include/rte_trace_point.h
@@ -16,10 +16,6 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdio.h>
 
@@ -32,6 +28,10 @@ extern "C" {
 #include <rte_string_fns.h>
 #include <rte_uuid.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** The tracepoint object. */
 typedef RTE_ATOMIC(uint64_t) rte_trace_point_t;
 
diff --git a/lib/eal/include/rte_trace_point_register.h b/lib/eal/include/rte_trace_point_register.h
index 41260e5964..8726338fe4 100644
--- a/lib/eal/include/rte_trace_point_register.h
+++ b/lib/eal/include/rte_trace_point_register.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_TRACE_POINT_REGISTER_H_
 #define _RTE_TRACE_POINT_REGISTER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef _RTE_TRACE_POINT_H_
 #error for registration, include this file first before <rte_trace_point.h>
 #endif
@@ -16,6 +12,10 @@ extern "C" {
 #include <rte_per_lcore.h>
 #include <rte_trace_point.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_DECLARE_PER_LCORE(volatile int, trace_point_sz);
 
 #define RTE_TRACE_POINT_REGISTER(trace, name) \
diff --git a/lib/eal/include/rte_uuid.h b/lib/eal/include/rte_uuid.h
index cfefd4308a..def5907a00 100644
--- a/lib/eal/include/rte_uuid.h
+++ b/lib/eal/include/rte_uuid.h
@@ -10,14 +10,14 @@
 #ifndef _RTE_UUID_H_
 #define _RTE_UUID_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stddef.h>
 #include <string.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Struct describing a Universal Unique Identifier
  */
diff --git a/lib/eal/include/rte_version.h b/lib/eal/include/rte_version.h
index 422d00fdff..be3f753617 100644
--- a/lib/eal/include/rte_version.h
+++ b/lib/eal/include/rte_version.h
@@ -10,13 +10,13 @@
 #ifndef _RTE_VERSION_H_
 #define _RTE_VERSION_H_
 
+#include <string.h>
+#include <stdio.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <string.h>
-#include <stdio.h>
-
 /**
  * Macro to compute a version number usable for comparisons
  */
diff --git a/lib/eal/include/rte_vfio.h b/lib/eal/include/rte_vfio.h
index b774625d9f..923293040b 100644
--- a/lib/eal/include/rte_vfio.h
+++ b/lib/eal/include/rte_vfio.h
@@ -10,10 +10,6 @@
  * RTE VFIO. This library provides various VFIO related utility functions.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 #include <stdint.h>
 
@@ -32,6 +28,10 @@ extern "C" {
 #endif /* kernel version >= 4.0.0 */
 #endif /* RTE_EAL_VFIO */
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef VFIO_PRESENT
 
 #include <linux/vfio.h>
diff --git a/lib/eal/linux/include/rte_os.h b/lib/eal/linux/include/rte_os.h
index c72bf5b7e6..35c07c70cb 100644
--- a/lib/eal/linux/include/rte_os.h
+++ b/lib/eal/linux/include/rte_os.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_OS_H_
 #define _RTE_OS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * This header should contain any definition
  * which is not supported natively or named differently in Linux.
@@ -46,8 +42,4 @@ typedef cpu_set_t rte_cpuset_t;
 } while (0)
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_OS_H_ */
diff --git a/lib/eal/loongarch/include/rte_atomic.h b/lib/eal/loongarch/include/rte_atomic.h
index 0510b8f781..c8066a4612 100644
--- a/lib/eal/loongarch/include/rte_atomic.h
+++ b/lib/eal/loongarch/include/rte_atomic.h
@@ -9,13 +9,13 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include <rte_common.h>
+#include "generic/rte_atomic.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_atomic.h"
-
 #define rte_mb()	do { asm volatile("dbar 0":::"memory"); } while (0)
 
 #define rte_wmb()	rte_mb()
diff --git a/lib/eal/loongarch/include/rte_byteorder.h b/lib/eal/loongarch/include/rte_byteorder.h
index 0da6097a4f..9b092e2a59 100644
--- a/lib/eal/loongarch/include/rte_byteorder.h
+++ b/lib/eal/loongarch/include/rte_byteorder.h
@@ -5,12 +5,12 @@
 #ifndef RTE_BYTEORDER_LOONGARCH_H
 #define RTE_BYTEORDER_LOONGARCH_H
 
+#include "generic/rte_byteorder.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_byteorder.h"
-
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 
 #define rte_cpu_to_le_16(x) (x)
diff --git a/lib/eal/loongarch/include/rte_cpuflags.h b/lib/eal/loongarch/include/rte_cpuflags.h
index 6b592c147c..c4e93255a5 100644
--- a/lib/eal/loongarch/include/rte_cpuflags.h
+++ b/lib/eal/loongarch/include/rte_cpuflags.h
@@ -5,10 +5,6 @@
 #ifndef RTE_CPUFLAGS_LOONGARCH_H
 #define RTE_CPUFLAGS_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -30,8 +26,4 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_CPUFLAGS_LOONGARCH_H */
diff --git a/lib/eal/loongarch/include/rte_cycles.h b/lib/eal/loongarch/include/rte_cycles.h
index f612d1ad10..128c8646e9 100644
--- a/lib/eal/loongarch/include/rte_cycles.h
+++ b/lib/eal/loongarch/include/rte_cycles.h
@@ -5,12 +5,12 @@
 #ifndef RTE_CYCLES_LOONGARCH_H
 #define RTE_CYCLES_LOONGARCH_H
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 /**
  * Read the time base register.
  *
diff --git a/lib/eal/loongarch/include/rte_io.h b/lib/eal/loongarch/include/rte_io.h
index 40e40efa86..551774a0c7 100644
--- a/lib/eal/loongarch/include/rte_io.h
+++ b/lib/eal/loongarch/include/rte_io.h
@@ -5,14 +5,6 @@
 #ifndef RTE_IO_LOONGARCH_H
 #define RTE_IO_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_io.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_IO_LOONGARCH_H */
diff --git a/lib/eal/loongarch/include/rte_memcpy.h b/lib/eal/loongarch/include/rte_memcpy.h
index 22578d40f4..5412a0fdc1 100644
--- a/lib/eal/loongarch/include/rte_memcpy.h
+++ b/lib/eal/loongarch/include/rte_memcpy.h
@@ -10,12 +10,12 @@
 
 #include "rte_common.h"
 
+#include "generic/rte_memcpy.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_memcpy.h"
-
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
diff --git a/lib/eal/loongarch/include/rte_pause.h b/lib/eal/loongarch/include/rte_pause.h
index 4302e1b9be..cffa2874d6 100644
--- a/lib/eal/loongarch/include/rte_pause.h
+++ b/lib/eal/loongarch/include/rte_pause.h
@@ -5,14 +5,14 @@
 #ifndef RTE_PAUSE_LOONGARCH_H
 #define RTE_PAUSE_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_atomic.h"
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 }
diff --git a/lib/eal/loongarch/include/rte_power_intrinsics.h b/lib/eal/loongarch/include/rte_power_intrinsics.h
index d5dbd94567..63cb837240 100644
--- a/lib/eal/loongarch/include/rte_power_intrinsics.h
+++ b/lib/eal/loongarch/include/rte_power_intrinsics.h
@@ -5,16 +5,8 @@
 #ifndef RTE_POWER_INTRINSIC_LOONGARCH_H
 #define RTE_POWER_INTRINSIC_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_POWER_INTRINSIC_LOONGARCH_H */
diff --git a/lib/eal/loongarch/include/rte_prefetch.h b/lib/eal/loongarch/include/rte_prefetch.h
index 64b1fd2c2a..8da08a5566 100644
--- a/lib/eal/loongarch/include/rte_prefetch.h
+++ b/lib/eal/loongarch/include/rte_prefetch.h
@@ -5,14 +5,14 @@
 #ifndef RTE_PREFETCH_LOONGARCH_H
 #define RTE_PREFETCH_LOONGARCH_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	__builtin_prefetch((const void *)(uintptr_t)p, 0, 3);
diff --git a/lib/eal/loongarch/include/rte_rwlock.h b/lib/eal/loongarch/include/rte_rwlock.h
index aedc6f3349..48924599c5 100644
--- a/lib/eal/loongarch/include/rte_rwlock.h
+++ b/lib/eal/loongarch/include/rte_rwlock.h
@@ -5,12 +5,12 @@
 #ifndef RTE_RWLOCK_LOONGARCH_H
 #define RTE_RWLOCK_LOONGARCH_H
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/loongarch/include/rte_spinlock.h b/lib/eal/loongarch/include/rte_spinlock.h
index e8d34e9728..38f00f631d 100644
--- a/lib/eal/loongarch/include/rte_spinlock.h
+++ b/lib/eal/loongarch/include/rte_spinlock.h
@@ -5,13 +5,13 @@
 #ifndef RTE_SPINLOCK_LOONGARCH_H
 #define RTE_SPINLOCK_LOONGARCH_H
 
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_spinlock.h"
-
 #ifndef RTE_FORCE_INTRINSICS
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
diff --git a/lib/eal/ppc/include/rte_atomic.h b/lib/eal/ppc/include/rte_atomic.h
index 645c7132df..6ce2e5188a 100644
--- a/lib/eal/ppc/include/rte_atomic.h
+++ b/lib/eal/ppc/include/rte_atomic.h
@@ -12,13 +12,13 @@
 #ifndef _RTE_ATOMIC_PPC_64_H_
 #define _RTE_ATOMIC_PPC_64_H_
 
+#include <stdint.h>
+#include "generic/rte_atomic.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include "generic/rte_atomic.h"
-
 #define	rte_mb()  asm volatile("sync" : : : "memory")
 
 #define	rte_wmb() asm volatile("sync" : : : "memory")
diff --git a/lib/eal/ppc/include/rte_byteorder.h b/lib/eal/ppc/include/rte_byteorder.h
index de94e2ad32..1d19e96f72 100644
--- a/lib/eal/ppc/include/rte_byteorder.h
+++ b/lib/eal/ppc/include/rte_byteorder.h
@@ -8,13 +8,13 @@
 #ifndef _RTE_BYTEORDER_PPC_64_H_
 #define _RTE_BYTEORDER_PPC_64_H_
 
+#include <stdint.h>
+#include "generic/rte_byteorder.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include "generic/rte_byteorder.h"
-
 /*
  * An architecture-optimized byte swap for a 16-bit value.
  *
diff --git a/lib/eal/ppc/include/rte_cpuflags.h b/lib/eal/ppc/include/rte_cpuflags.h
index dedc1ab469..fda7075a2d 100644
--- a/lib/eal/ppc/include/rte_cpuflags.h
+++ b/lib/eal/ppc/include/rte_cpuflags.h
@@ -6,10 +6,6 @@
 #ifndef _RTE_CPUFLAGS_PPC_64_H_
 #define _RTE_CPUFLAGS_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -52,8 +48,4 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_CPUFLAGS_PPC_64_H_ */
diff --git a/lib/eal/ppc/include/rte_cycles.h b/lib/eal/ppc/include/rte_cycles.h
index 666fc9b0bf..1e6e6cccc8 100644
--- a/lib/eal/ppc/include/rte_cycles.h
+++ b/lib/eal/ppc/include/rte_cycles.h
@@ -6,10 +6,6 @@
 #ifndef _RTE_CYCLES_PPC_64_H_
 #define _RTE_CYCLES_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <features.h>
 #ifdef __GLIBC__
 #include <sys/platform/ppc.h>
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_byteorder.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Read the time base register.
  *
diff --git a/lib/eal/ppc/include/rte_io.h b/lib/eal/ppc/include/rte_io.h
index 01455065e5..87beded069 100644
--- a/lib/eal/ppc/include/rte_io.h
+++ b/lib/eal/ppc/include/rte_io.h
@@ -5,14 +5,6 @@
 #ifndef _RTE_IO_PPC_64_H_
 #define _RTE_IO_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_io.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_IO_PPC_64_H_ */
diff --git a/lib/eal/ppc/include/rte_memcpy.h b/lib/eal/ppc/include/rte_memcpy.h
index 6f388c0234..eae73128c4 100644
--- a/lib/eal/ppc/include/rte_memcpy.h
+++ b/lib/eal/ppc/include/rte_memcpy.h
@@ -12,12 +12,12 @@
 #include "rte_altivec.h"
 #include "rte_common.h"
 
+#include "generic/rte_memcpy.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_memcpy.h"
-
 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 90000)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"
diff --git a/lib/eal/ppc/include/rte_pause.h b/lib/eal/ppc/include/rte_pause.h
index 16e47ce22f..78a73aceed 100644
--- a/lib/eal/ppc/include/rte_pause.h
+++ b/lib/eal/ppc/include/rte_pause.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_PAUSE_PPC64_H_
 #define _RTE_PAUSE_PPC64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_atomic.h"
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 	/* Set hardware multi-threading low priority */
diff --git a/lib/eal/ppc/include/rte_power_intrinsics.h b/lib/eal/ppc/include/rte_power_intrinsics.h
index c0e9ac279f..3255f75448 100644
--- a/lib/eal/ppc/include/rte_power_intrinsics.h
+++ b/lib/eal/ppc/include/rte_power_intrinsics.h
@@ -5,16 +5,8 @@
 #ifndef _RTE_POWER_INTRINSIC_PPC_H_
 #define _RTE_POWER_INTRINSIC_PPC_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_POWER_INTRINSIC_PPC_H_ */
diff --git a/lib/eal/ppc/include/rte_prefetch.h b/lib/eal/ppc/include/rte_prefetch.h
index 2e1b5751e0..bae95af7bf 100644
--- a/lib/eal/ppc/include/rte_prefetch.h
+++ b/lib/eal/ppc/include/rte_prefetch.h
@@ -6,14 +6,14 @@
 #ifndef _RTE_PREFETCH_PPC_64_H_
 #define _RTE_PREFETCH_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
diff --git a/lib/eal/ppc/include/rte_rwlock.h b/lib/eal/ppc/include/rte_rwlock.h
index 9fadc04076..bee8da4070 100644
--- a/lib/eal/ppc/include/rte_rwlock.h
+++ b/lib/eal/ppc/include/rte_rwlock.h
@@ -3,12 +3,12 @@
 #ifndef _RTE_RWLOCK_PPC_64_H_
 #define _RTE_RWLOCK_PPC_64_H_
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/ppc/include/rte_spinlock.h b/lib/eal/ppc/include/rte_spinlock.h
index 3a4c905b22..77f90f974a 100644
--- a/lib/eal/ppc/include/rte_spinlock.h
+++ b/lib/eal/ppc/include/rte_spinlock.h
@@ -6,14 +6,14 @@
 #ifndef _RTE_SPINLOCK_PPC_64_H_
 #define _RTE_SPINLOCK_PPC_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_pause.h>
 #include "generic/rte_spinlock.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Fixme: Use intrinsics to implement the spinlock on Power architecture */
 
 #ifndef RTE_FORCE_INTRINSICS
diff --git a/lib/eal/riscv/include/rte_atomic.h b/lib/eal/riscv/include/rte_atomic.h
index 2603bc90ea..66346ad474 100644
--- a/lib/eal/riscv/include/rte_atomic.h
+++ b/lib/eal/riscv/include/rte_atomic.h
@@ -12,15 +12,15 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include "generic/rte_atomic.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define rte_mb()	asm volatile("fence rw, rw" : : : "memory")
 
 #define rte_wmb()	asm volatile("fence w, w" : : : "memory")
diff --git a/lib/eal/riscv/include/rte_byteorder.h b/lib/eal/riscv/include/rte_byteorder.h
index 25bd0c275d..c9ff5c0dd1 100644
--- a/lib/eal/riscv/include/rte_byteorder.h
+++ b/lib/eal/riscv/include/rte_byteorder.h
@@ -8,14 +8,14 @@
 #ifndef RTE_BYTEORDER_RISCV_H
 #define RTE_BYTEORDER_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include "generic/rte_byteorder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_BYTE_ORDER
 #define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
 #endif
diff --git a/lib/eal/riscv/include/rte_cpuflags.h b/lib/eal/riscv/include/rte_cpuflags.h
index d742efc40f..b1bd7953d4 100644
--- a/lib/eal/riscv/include/rte_cpuflags.h
+++ b/lib/eal/riscv/include/rte_cpuflags.h
@@ -8,10 +8,6 @@
 #ifndef RTE_CPUFLAGS_RISCV_H
 #define RTE_CPUFLAGS_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Enumeration of all CPU features supported
  */
@@ -46,8 +42,4 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_CPUFLAGS_RISCV_H */
diff --git a/lib/eal/riscv/include/rte_cycles.h b/lib/eal/riscv/include/rte_cycles.h
index 04750ca253..7926809a73 100644
--- a/lib/eal/riscv/include/rte_cycles.h
+++ b/lib/eal/riscv/include/rte_cycles.h
@@ -8,12 +8,12 @@
 #ifndef RTE_CYCLES_RISCV_H
 #define RTE_CYCLES_RISCV_H
 
+#include "generic/rte_cycles.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_cycles.h"
-
 #ifndef RTE_RISCV_RDTSC_USE_HPM
 #define RTE_RISCV_RDTSC_USE_HPM 0
 #endif
diff --git a/lib/eal/riscv/include/rte_io.h b/lib/eal/riscv/include/rte_io.h
index 29659c9590..4ae1f087ba 100644
--- a/lib/eal/riscv/include/rte_io.h
+++ b/lib/eal/riscv/include/rte_io.h
@@ -8,14 +8,6 @@
 #ifndef RTE_IO_RISCV_H
 #define RTE_IO_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_io.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_IO_RISCV_H */
diff --git a/lib/eal/riscv/include/rte_memcpy.h b/lib/eal/riscv/include/rte_memcpy.h
index e34f19396e..d8a942c5d2 100644
--- a/lib/eal/riscv/include/rte_memcpy.h
+++ b/lib/eal/riscv/include/rte_memcpy.h
@@ -12,12 +12,12 @@
 
 #include "rte_common.h"
 
+#include "generic/rte_memcpy.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_memcpy.h"
-
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
diff --git a/lib/eal/riscv/include/rte_pause.h b/lib/eal/riscv/include/rte_pause.h
index cb8e9ca52d..3f473cd8db 100644
--- a/lib/eal/riscv/include/rte_pause.h
+++ b/lib/eal/riscv/include/rte_pause.h
@@ -7,14 +7,14 @@
 #ifndef RTE_PAUSE_RISCV_H
 #define RTE_PAUSE_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_atomic.h"
 
 #include "generic/rte_pause.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_pause(void)
 {
 	/* Insert pause hint directly to be compatible with old compilers.
diff --git a/lib/eal/riscv/include/rte_power_intrinsics.h b/lib/eal/riscv/include/rte_power_intrinsics.h
index 636e58e71f..9e7107f0ea 100644
--- a/lib/eal/riscv/include/rte_power_intrinsics.h
+++ b/lib/eal/riscv/include/rte_power_intrinsics.h
@@ -7,16 +7,8 @@
 #ifndef RTE_POWER_INTRINSIC_RISCV_H
 #define RTE_POWER_INTRINSIC_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_POWER_INTRINSIC_RISCV_H */
diff --git a/lib/eal/riscv/include/rte_prefetch.h b/lib/eal/riscv/include/rte_prefetch.h
index 748cf1b626..42146491ea 100644
--- a/lib/eal/riscv/include/rte_prefetch.h
+++ b/lib/eal/riscv/include/rte_prefetch.h
@@ -8,14 +8,14 @@
 #ifndef RTE_PREFETCH_RISCV_H
 #define RTE_PREFETCH_RISCV_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 	RTE_SET_USED(p);
diff --git a/lib/eal/riscv/include/rte_rwlock.h b/lib/eal/riscv/include/rte_rwlock.h
index 9cdaf1b0ef..730970eecb 100644
--- a/lib/eal/riscv/include/rte_rwlock.h
+++ b/lib/eal/riscv/include/rte_rwlock.h
@@ -7,12 +7,12 @@
 #ifndef RTE_RWLOCK_RISCV_H
 #define RTE_RWLOCK_RISCV_H
 
+#include "generic/rte_rwlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 {
diff --git a/lib/eal/riscv/include/rte_spinlock.h b/lib/eal/riscv/include/rte_spinlock.h
index 6af430735c..5fe4980e44 100644
--- a/lib/eal/riscv/include/rte_spinlock.h
+++ b/lib/eal/riscv/include/rte_spinlock.h
@@ -12,13 +12,13 @@
 #  error Platform must be built with RTE_FORCE_INTRINSICS
 #endif
 
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include "generic/rte_spinlock.h"
-
 static inline int rte_tm_supported(void)
 {
 	return 0;
diff --git a/lib/eal/windows/include/pthread.h b/lib/eal/windows/include/pthread.h
index 051b9311c2..e1c31017d1 100644
--- a/lib/eal/windows/include/pthread.h
+++ b/lib/eal/windows/include/pthread.h
@@ -13,13 +13,13 @@
  * eal_common_thread.c and common\include\rte_per_lcore.h as Microsoft libc
  * does not contain pthread.h. This may be removed in future releases.
  */
+#include <rte_common.h>
+#include <rte_windows.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_windows.h>
-
 #define PTHREAD_BARRIER_SERIAL_THREAD TRUE
 
 /* defining pthread_t type on Windows since there is no in Microsoft libc*/
diff --git a/lib/eal/windows/include/regex.h b/lib/eal/windows/include/regex.h
index 827f938414..a224c0cd29 100644
--- a/lib/eal/windows/include/regex.h
+++ b/lib/eal/windows/include/regex.h
@@ -10,15 +10,15 @@
  * as Microsoft libc does not contain regex.h. This may be removed in
  * future releases.
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #define REG_NOMATCH 1
 #define REG_ESPACE 12
 
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* defining regex_t for Windows */
 typedef void *regex_t;
 /* defining regmatch_t for Windows */
diff --git a/lib/eal/windows/include/rte_os.h b/lib/eal/windows/include/rte_os.h
index 1c33058cbe..9d69467aaa 100644
--- a/lib/eal/windows/include/rte_os.h
+++ b/lib/eal/windows/include/rte_os.h
@@ -16,10 +16,6 @@
 
 #include <sched.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /* These macros are compatible with bundled sys/queue.h. */
 #define RTE_TAILQ_HEAD(name, type) \
 struct name { \
@@ -59,8 +55,4 @@ struct { \
  */
 typedef long long ssize_t;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_OS_H_ */
diff --git a/lib/eal/windows/include/rte_windows.h b/lib/eal/windows/include/rte_windows.h
index 567ed7d820..9bba8e74d5 100644
--- a/lib/eal/windows/include/rte_windows.h
+++ b/lib/eal/windows/include/rte_windows.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_WINDOWS_H_
 #define _RTE_WINDOWS_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file Windows-specific facilities
  *
@@ -51,8 +47,4 @@ extern "C" {
 	RTE_LOG_LINE_PREFIX(DEBUG, EAL, \
 		"GetLastError()=%lu: ", GetLastError(), __VA_ARGS__)
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_WINDOWS_H_ */
diff --git a/lib/eal/x86/include/rte_atomic.h b/lib/eal/x86/include/rte_atomic.h
index 74b1b24b7a..c72c47c83e 100644
--- a/lib/eal/x86/include/rte_atomic.h
+++ b/lib/eal/x86/include/rte_atomic.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_ATOMIC_X86_H_
 #define _RTE_ATOMIC_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include <rte_config.h>
@@ -31,6 +27,10 @@ extern "C" {
 
 #define rte_smp_rmb() rte_compiler_barrier()
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * From Intel Software Development Manual; Vol 3;
  * 8.2.2 Memory Ordering in P6 and More Recent Processor Families:
@@ -99,10 +99,18 @@ rte_atomic_thread_fence(rte_memory_order memorder)
 		__rte_atomic_thread_fence(memorder);
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 #ifndef RTE_TOOLCHAIN_MSVC
 
 /*------------------------- 16 bit atomic operations -------------------------*/
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_FORCE_INTRINSICS
 static inline int
 rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
@@ -273,6 +281,11 @@ static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 			);
 	return ret != 0;
 }
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif
 
 #ifdef RTE_ARCH_I686
@@ -283,8 +296,4 @@ static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_ATOMIC_X86_H_ */
diff --git a/lib/eal/x86/include/rte_byteorder.h b/lib/eal/x86/include/rte_byteorder.h
index adbec0c157..5a49ffcd50 100644
--- a/lib/eal/x86/include/rte_byteorder.h
+++ b/lib/eal/x86/include/rte_byteorder.h
@@ -5,15 +5,15 @@
 #ifndef _RTE_BYTEORDER_X86_H_
 #define _RTE_BYTEORDER_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include "generic/rte_byteorder.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_BYTE_ORDER
 #define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
 #endif
@@ -48,6 +48,10 @@ static inline uint32_t rte_arch_bswap32(uint32_t _x)
 	return x;
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 #define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ?		\
 				   rte_constant_bswap16(x) :		\
 				   rte_arch_bswap16(x)))
@@ -83,8 +87,4 @@ static inline uint32_t rte_arch_bswap32(uint32_t _x)
 #define rte_be_to_cpu_32(x) rte_bswap32(x)
 #define rte_be_to_cpu_64(x) rte_bswap64(x)
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_BYTEORDER_X86_H_ */
diff --git a/lib/eal/x86/include/rte_cpuflags.h b/lib/eal/x86/include/rte_cpuflags.h
index 1ee00e70fe..e9f4957f15 100644
--- a/lib/eal/x86/include/rte_cpuflags.h
+++ b/lib/eal/x86/include/rte_cpuflags.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_CPUFLAGS_X86_64_H_
 #define _RTE_CPUFLAGS_X86_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 enum rte_cpu_flag_t {
 	/* (EAX 01h) ECX features*/
 	RTE_CPUFLAG_SSE3 = 0,               /**< SSE3 */
@@ -138,8 +134,4 @@ enum rte_cpu_flag_t {
 
 #include "generic/rte_cpuflags.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_CPUFLAGS_X86_64_H_ */
diff --git a/lib/eal/x86/include/rte_cycles.h b/lib/eal/x86/include/rte_cycles.h
index 2afe85e28c..8de43840da 100644
--- a/lib/eal/x86/include/rte_cycles.h
+++ b/lib/eal/x86/include/rte_cycles.h
@@ -12,10 +12,6 @@
 #include <x86intrin.h>
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_cycles.h"
 
 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
@@ -26,6 +22,10 @@ extern int rte_cycles_vmware_tsc_map;
 #include <rte_common.h>
 #include <rte_config.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline uint64_t
 rte_rdtsc(void)
 {
diff --git a/lib/eal/x86/include/rte_io.h b/lib/eal/x86/include/rte_io.h
index 0e1fefdee1..c11cb8cd89 100644
--- a/lib/eal/x86/include/rte_io.h
+++ b/lib/eal/x86/include/rte_io.h
@@ -5,16 +5,16 @@
 #ifndef _RTE_IO_X86_H_
 #define _RTE_IO_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include "rte_cpuflags.h"
 
 #define RTE_NATIVE_WRITE32_WC
 #include "generic/rte_io.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * MOVDIRI wrapper.
diff --git a/lib/eal/x86/include/rte_pause.h b/lib/eal/x86/include/rte_pause.h
index b4cf1df1d0..54f028b295 100644
--- a/lib/eal/x86/include/rte_pause.h
+++ b/lib/eal/x86/include/rte_pause.h
@@ -5,13 +5,14 @@
 #ifndef _RTE_PAUSE_X86_H_
 #define _RTE_PAUSE_X86_H_
 
+#include "generic/rte_pause.h"
+
+#include <emmintrin.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_pause.h"
-
-#include <emmintrin.h>
 static inline void rte_pause(void)
 {
 	_mm_pause();
diff --git a/lib/eal/x86/include/rte_power_intrinsics.h b/lib/eal/x86/include/rte_power_intrinsics.h
index e4c2b87f73..f315c9c454 100644
--- a/lib/eal/x86/include/rte_power_intrinsics.h
+++ b/lib/eal/x86/include/rte_power_intrinsics.h
@@ -5,16 +5,8 @@
 #ifndef _RTE_POWER_INTRINSIC_X86_H_
 #define _RTE_POWER_INTRINSIC_X86_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 
 #include "generic/rte_power_intrinsics.h"
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_POWER_INTRINSIC_X86_H_ */
diff --git a/lib/eal/x86/include/rte_prefetch.h b/lib/eal/x86/include/rte_prefetch.h
index 8a9377714f..34a609cc65 100644
--- a/lib/eal/x86/include/rte_prefetch.h
+++ b/lib/eal/x86/include/rte_prefetch.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_PREFETCH_X86_64_H_
 #define _RTE_PREFETCH_X86_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef RTE_TOOLCHAIN_MSVC
 #include <emmintrin.h>
 #endif
@@ -17,6 +13,10 @@ extern "C" {
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline void rte_prefetch0(const volatile void *p)
 {
 #ifdef RTE_TOOLCHAIN_MSVC
diff --git a/lib/eal/x86/include/rte_rwlock.h b/lib/eal/x86/include/rte_rwlock.h
index 1796b69265..281eff33b9 100644
--- a/lib/eal/x86/include/rte_rwlock.h
+++ b/lib/eal/x86/include/rte_rwlock.h
@@ -5,13 +5,13 @@
 #ifndef _RTE_RWLOCK_X86_64_H_
 #define _RTE_RWLOCK_X86_64_H_
 
+#include "generic/rte_rwlock.h"
+#include "rte_spinlock.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "generic/rte_rwlock.h"
-#include "rte_spinlock.h"
-
 static inline void
 rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
 	__rte_no_thread_safety_analysis
diff --git a/lib/eal/x86/include/rte_spinlock.h b/lib/eal/x86/include/rte_spinlock.h
index a6c23ea1f6..a14da41964 100644
--- a/lib/eal/x86/include/rte_spinlock.h
+++ b/lib/eal/x86/include/rte_spinlock.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_SPINLOCK_X86_64_H_
 #define _RTE_SPINLOCK_X86_64_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "generic/rte_spinlock.h"
 #include "rte_rtm.h"
 #include "rte_cpuflags.h"
@@ -17,6 +13,10 @@ extern "C" {
 #include "rte_pause.h"
 #include "rte_cycles.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_RTM_MAX_RETRIES (20)
 #define RTE_XABORT_LOCK_BUSY (0xff)
 
@@ -182,7 +182,6 @@ rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
 	return rte_spinlock_recursive_trylock(slr);
 }
 
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index 883e59a927..ae00ead865 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_ETHDEV_DRIVER_H_
 #define _RTE_ETHDEV_DRIVER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -24,6 +20,10 @@ extern "C" {
 #include <rte_compat.h>
 #include <rte_ethdev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Structure used to hold information about the callbacks to be called for a
diff --git a/lib/ethdev/ethdev_pci.h b/lib/ethdev/ethdev_pci.h
index ec4f731270..2229ffa252 100644
--- a/lib/ethdev/ethdev_pci.h
+++ b/lib/ethdev/ethdev_pci.h
@@ -6,16 +6,16 @@
 #ifndef _RTE_ETHDEV_PCI_H_
 #define _RTE_ETHDEV_PCI_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_malloc.h>
 #include <rte_pci.h>
 #include <bus_pci_driver.h>
 #include <rte_config.h>
 #include <ethdev_driver.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Copy pci device info to the Ethernet device data.
  * Shared memory (eth_dev->data) only updated by primary process, so it is safe
diff --git a/lib/ethdev/ethdev_trace.h b/lib/ethdev/ethdev_trace.h
index 3bec87bfdb..36a38f718a 100644
--- a/lib/ethdev/ethdev_trace.h
+++ b/lib/ethdev/ethdev_trace.h
@@ -11,10 +11,6 @@
  * API for ethdev trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <dev_driver.h>
 #include <rte_trace_point.h>
 
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_mtr.h"
 #include "rte_tm.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_ethdev_trace_configure,
 	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t nb_rx_q,
diff --git a/lib/ethdev/ethdev_vdev.h b/lib/ethdev/ethdev_vdev.h
index 364f140f91..010ec75a00 100644
--- a/lib/ethdev/ethdev_vdev.h
+++ b/lib/ethdev/ethdev_vdev.h
@@ -6,15 +6,15 @@
 #ifndef _RTE_ETHDEV_VDEV_H_
 #define _RTE_ETHDEV_VDEV_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_config.h>
 #include <rte_malloc.h>
 #include <bus_vdev_driver.h>
 #include <ethdev_driver.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Allocates a new ethdev slot for an Ethernet device and returns the pointer
diff --git a/lib/ethdev/rte_cman.h b/lib/ethdev/rte_cman.h
index 297db8e095..f33a9c7969 100644
--- a/lib/ethdev/rte_cman.h
+++ b/lib/ethdev/rte_cman.h
@@ -5,10 +5,6 @@
 #ifndef RTE_CMAN_H
 #define RTE_CMAN_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_bitops.h>
 
 /**
@@ -48,8 +44,4 @@ struct rte_cman_red_params {
 	uint16_t maxp_inv;
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_CMAN_H */
diff --git a/lib/ethdev/rte_dev_info.h b/lib/ethdev/rte_dev_info.h
index 67cf0ae526..cb2fe0ae97 100644
--- a/lib/ethdev/rte_dev_info.h
+++ b/lib/ethdev/rte_dev_info.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_DEV_INFO_H_
 #define _RTE_DEV_INFO_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 /*
@@ -52,8 +48,4 @@ struct rte_eth_dev_module_info {
 #define RTE_ETH_MODULE_SFF_8436_LEN         256
 #define RTE_ETH_MODULE_SFF_8436_MAX_LEN     640
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_DEV_INFO_H_ */
diff --git a/lib/ethdev/rte_eth_ctrl.h b/lib/ethdev/rte_eth_ctrl.h
index fc511f227d..9ec3f5eff9 100644
--- a/lib/ethdev/rte_eth_ctrl.h
+++ b/lib/ethdev/rte_eth_ctrl.h
@@ -19,10 +19,6 @@
  * by control APIs should be defined in this file.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Define all structures for ntuple Filter type.
  */
@@ -487,8 +483,4 @@ struct rte_eth_fdir_stats {
 	uint32_t best_cnt;     /**< Number of filters in best effort spaces. */
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_ETH_CTRL_H_ */
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 548fada1c7..a75e26bf07 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -145,10 +145,6 @@
  * a 0 value by the receive function of the driver for a given number of tries.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 /* Use this macro to check if LRO API is supported */
@@ -5966,6 +5962,10 @@ int rte_eth_cman_config_get(uint16_t port_id, struct rte_eth_cman_config *config
 
 #include <rte_ethdev_core.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Helper routine for rte_eth_rx_burst().
diff --git a/lib/ethdev/rte_ethdev_trace_fp.h b/lib/ethdev/rte_ethdev_trace_fp.h
index 40b6e4756b..c11b4f18f7 100644
--- a/lib/ethdev/rte_ethdev_trace_fp.h
+++ b/lib/ethdev/rte_ethdev_trace_fp.h
@@ -11,12 +11,12 @@
  * API for ethdev trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_ethdev_trace_rx_burst,
 	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t queue_id,
diff --git a/lib/eventdev/event_timer_adapter_pmd.h b/lib/eventdev/event_timer_adapter_pmd.h
index cd5127f047..f06e85dc0b 100644
--- a/lib/eventdev/event_timer_adapter_pmd.h
+++ b/lib/eventdev/event_timer_adapter_pmd.h
@@ -16,10 +16,6 @@
  * versioning.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_event_timer_adapter.h"
 
 /*
@@ -112,8 +108,4 @@ struct __rte_cache_aligned rte_event_timer_adapter_data {
 	/**< Flag to indicate adapter started. */
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* __EVENT_TIMER_ADAPTER_PMD_H__ */
diff --git a/lib/eventdev/eventdev_pmd.h b/lib/eventdev/eventdev_pmd.h
index 7a5699f14b..fd5f7a14f4 100644
--- a/lib/eventdev/eventdev_pmd.h
+++ b/lib/eventdev/eventdev_pmd.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_EVENTDEV_PMD_H_
 #define _RTE_EVENTDEV_PMD_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Event PMD APIs
  *
@@ -31,6 +27,10 @@ extern "C" {
 #include "event_timer_adapter_pmd.h"
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern int rte_event_logtype;
 #define RTE_LOGTYPE_EVENTDEV rte_event_logtype
 
diff --git a/lib/eventdev/eventdev_pmd_pci.h b/lib/eventdev/eventdev_pmd_pci.h
index 26aa3a6635..5cb5916a84 100644
--- a/lib/eventdev/eventdev_pmd_pci.h
+++ b/lib/eventdev/eventdev_pmd_pci.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_EVENTDEV_PMD_PCI_H_
 #define _RTE_EVENTDEV_PMD_PCI_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Eventdev PCI PMD APIs
  *
@@ -28,6 +24,10 @@ extern "C" {
 
 #include "eventdev_pmd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef int (*eventdev_pmd_pci_callback_t)(struct rte_eventdev *dev);
 
 /**
diff --git a/lib/eventdev/eventdev_pmd_vdev.h b/lib/eventdev/eventdev_pmd_vdev.h
index bb433ba955..4eaefa0b0b 100644
--- a/lib/eventdev/eventdev_pmd_vdev.h
+++ b/lib/eventdev/eventdev_pmd_vdev.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_EVENTDEV_PMD_VDEV_H_
 #define _RTE_EVENTDEV_PMD_VDEV_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** @file
  * RTE Eventdev VDEV PMD APIs
  *
@@ -27,6 +23,10 @@ extern "C" {
 
 #include "eventdev_pmd.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Creates a new virtual event device and returns the pointer to that device.
diff --git a/lib/eventdev/eventdev_trace.h b/lib/eventdev/eventdev_trace.h
index 9c2b261c06..8ff8841729 100644
--- a/lib/eventdev/eventdev_trace.h
+++ b/lib/eventdev/eventdev_trace.h
@@ -11,10 +11,6 @@
  * API for ethdev trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_trace_point.h>
 
 #include "rte_eventdev.h"
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_event_eth_rx_adapter.h"
 #include "rte_event_timer_adapter.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_eventdev_trace_configure,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id,
diff --git a/lib/eventdev/rte_event_crypto_adapter.h b/lib/eventdev/rte_event_crypto_adapter.h
index e07f159b77..c9b277c664 100644
--- a/lib/eventdev/rte_event_crypto_adapter.h
+++ b/lib/eventdev/rte_event_crypto_adapter.h
@@ -167,14 +167,14 @@
  * from the start of the rte_crypto_op including initialization vector (IV).
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Crypto event adapter mode
  */
diff --git a/lib/eventdev/rte_event_eth_rx_adapter.h b/lib/eventdev/rte_event_eth_rx_adapter.h
index cf42c69b0d..9237e198a7 100644
--- a/lib/eventdev/rte_event_eth_rx_adapter.h
+++ b/lib/eventdev/rte_event_eth_rx_adapter.h
@@ -87,10 +87,6 @@
  * event based so the callback can also modify the event data if it needs to.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -98,6 +94,10 @@ extern "C" {
 
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE 32
 
 /* struct rte_event_eth_rx_adapter_queue_conf flags definitions */
diff --git a/lib/eventdev/rte_event_eth_tx_adapter.h b/lib/eventdev/rte_event_eth_tx_adapter.h
index b38b3fce97..ef01345ac2 100644
--- a/lib/eventdev/rte_event_eth_tx_adapter.h
+++ b/lib/eventdev/rte_event_eth_tx_adapter.h
@@ -76,10 +76,6 @@
  * impact due to a change in how the transmit queue index is specified.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -87,6 +83,10 @@ extern "C" {
 
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Adapter configuration structure
  *
diff --git a/lib/eventdev/rte_event_ring.h b/lib/eventdev/rte_event_ring.h
index f9cf19ae16..5769da269e 100644
--- a/lib/eventdev/rte_event_ring.h
+++ b/lib/eventdev/rte_event_ring.h
@@ -14,10 +14,6 @@
 #ifndef _RTE_EVENT_RING_
 #define _RTE_EVENT_RING_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_common.h>
@@ -25,6 +21,10 @@ extern "C" {
 #include <rte_ring_elem.h>
 #include "rte_eventdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_TAILQ_EVENT_RING_NAME "RTE_EVENT_RING"
 
 /**
diff --git a/lib/eventdev/rte_event_timer_adapter.h b/lib/eventdev/rte_event_timer_adapter.h
index 0bd1b30045..256807b3bf 100644
--- a/lib/eventdev/rte_event_timer_adapter.h
+++ b/lib/eventdev/rte_event_timer_adapter.h
@@ -107,14 +107,14 @@
  * All these use cases require high resolution and low time drift.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 
 #include "rte_eventdev.h"
 #include "rte_eventdev_trace_fp.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Timer adapter clock source
  */
diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h
index 08e5f9320b..e5c5b7df64 100644
--- a/lib/eventdev/rte_eventdev.h
+++ b/lib/eventdev/rte_eventdev.h
@@ -237,10 +237,6 @@
  * \endcode
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include <rte_errno.h>
@@ -2469,6 +2465,10 @@ rte_event_vector_pool_create(const char *name, unsigned int n,
 
 #include <rte_eventdev_core.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static __rte_always_inline uint16_t
 __rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
 			  const struct rte_event ev[], uint16_t nb_events,
diff --git a/lib/eventdev/rte_eventdev_trace_fp.h b/lib/eventdev/rte_eventdev_trace_fp.h
index 04d510ad00..8656f1e6e4 100644
--- a/lib/eventdev/rte_eventdev_trace_fp.h
+++ b/lib/eventdev/rte_eventdev_trace_fp.h
@@ -11,12 +11,12 @@
  * API for ethdev trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_eventdev_trace_deq_burst,
 	RTE_TRACE_POINT_ARGS(uint8_t dev_id, uint8_t port_id, void *ev_table,
diff --git a/lib/graph/rte_graph_model_mcore_dispatch.h b/lib/graph/rte_graph_model_mcore_dispatch.h
index 732b89297f..f9ff3daa88 100644
--- a/lib/graph/rte_graph_model_mcore_dispatch.h
+++ b/lib/graph/rte_graph_model_mcore_dispatch.h
@@ -12,10 +12,6 @@
  * dispatch model.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_errno.h>
 #include <rte_mempool.h>
 #include <rte_memzone.h>
@@ -23,6 +19,10 @@ extern "C" {
 
 #include "rte_graph_worker_common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_GRAPH_SCHED_WQ_SIZE_MULTIPLIER  8
 #define RTE_GRAPH_SCHED_WQ_SIZE(nb_nodes)   \
 	((typeof(nb_nodes))((nb_nodes) * RTE_GRAPH_SCHED_WQ_SIZE_MULTIPLIER))
diff --git a/lib/graph/rte_graph_worker.h b/lib/graph/rte_graph_worker.h
index 03d0e01b68..b0f952a82c 100644
--- a/lib/graph/rte_graph_worker.h
+++ b/lib/graph/rte_graph_worker.h
@@ -6,13 +6,13 @@
 #ifndef _RTE_GRAPH_WORKER_H_
 #define _RTE_GRAPH_WORKER_H_
 
+#include "rte_graph_model_rtc.h"
+#include "rte_graph_model_mcore_dispatch.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "rte_graph_model_rtc.h"
-#include "rte_graph_model_mcore_dispatch.h"
-
 /**
  * Perform graph walk on the circular buffer and invoke the process function
  * of the nodes and collect the stats.
diff --git a/lib/gso/rte_gso.h b/lib/gso/rte_gso.h
index d60cb65f18..75246989dc 100644
--- a/lib/gso/rte_gso.h
+++ b/lib/gso/rte_gso.h
@@ -10,13 +10,13 @@
  * Interface to GSO library
  */
 
+#include <stdint.h>
+#include <rte_mbuf.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include <rte_mbuf.h>
-
 /* Minimum GSO segment size for TCP based packets. */
 #define RTE_GSO_SEG_SIZE_MIN (sizeof(struct rte_ether_hdr) + \
 		sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_tcp_hdr) + 1)
diff --git a/lib/hash/rte_fbk_hash.h b/lib/hash/rte_fbk_hash.h
index b01126999b..1f0c1d1b6c 100644
--- a/lib/hash/rte_fbk_hash.h
+++ b/lib/hash/rte_fbk_hash.h
@@ -18,15 +18,15 @@
 #include <stdint.h>
 #include <errno.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <rte_hash_crc.h>
 #include <rte_jhash.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_FBK_HASH_INIT_VAL_DEFAULT
 /** Initialising value used when calculating hash. */
 #define RTE_FBK_HASH_INIT_VAL_DEFAULT		0xFFFFFFFF
diff --git a/lib/hash/rte_hash_crc.h b/lib/hash/rte_hash_crc.h
index 8ad2422ec3..fa07c97685 100644
--- a/lib/hash/rte_hash_crc.h
+++ b/lib/hash/rte_hash_crc.h
@@ -11,10 +11,6 @@
  * RTE CRC Hash
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_branch_prediction.h>
@@ -39,6 +35,10 @@ extern uint8_t rte_hash_crc32_alg;
 #include "rte_crc_generic.h"
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
  * calculation.
diff --git a/lib/hash/rte_jhash.h b/lib/hash/rte_jhash.h
index f2446f081e..b70799d209 100644
--- a/lib/hash/rte_jhash.h
+++ b/lib/hash/rte_jhash.h
@@ -11,10 +11,6 @@
  * jhash functions.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <string.h>
 #include <limits.h>
@@ -23,6 +19,10 @@ extern "C" {
 #include <rte_log.h>
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* jhash.h: Jenkins hash support.
  *
  * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
index 30b657e67a..ec9bc57efa 100644
--- a/lib/hash/rte_thash.h
+++ b/lib/hash/rte_thash.h
@@ -15,10 +15,6 @@
  * after GRE header decapsulating)
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_byteorder.h>
@@ -28,6 +24,10 @@ extern "C" {
 
 #if defined(RTE_ARCH_X86) || defined(__ARM_NEON)
 #include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 #endif
 
 #ifdef RTE_ARCH_X86
diff --git a/lib/hash/rte_thash_gfni.h b/lib/hash/rte_thash_gfni.h
index 132f37506d..e82378933c 100644
--- a/lib/hash/rte_thash_gfni.h
+++ b/lib/hash/rte_thash_gfni.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_THASH_GFNI_H_
 #define _RTE_THASH_GFNI_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_log.h>
 
@@ -18,6 +14,10 @@ extern "C" {
 
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  * Stubs only used when GFNI is not available.
diff --git a/lib/ip_frag/rte_ip_frag.h b/lib/ip_frag/rte_ip_frag.h
index 2ad318096b..84fd717953 100644
--- a/lib/ip_frag/rte_ip_frag.h
+++ b/lib/ip_frag/rte_ip_frag.h
@@ -12,10 +12,6 @@
  * Implementation of IP packet fragmentation and reassembly.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 
@@ -25,6 +21,10 @@ extern "C" {
 #include <rte_ip.h>
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_mbuf;
 
 /** death row size (in packets) */
diff --git a/lib/ipsec/rte_ipsec.h b/lib/ipsec/rte_ipsec.h
index f15f6f2966..28b7a61aea 100644
--- a/lib/ipsec/rte_ipsec.h
+++ b/lib/ipsec/rte_ipsec.h
@@ -17,10 +17,6 @@
 #include <rte_ipsec_sa.h>
 #include <rte_mbuf.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 struct rte_ipsec_session;
 
 /**
@@ -181,6 +177,10 @@ rte_ipsec_telemetry_sa_del(const struct rte_ipsec_sa *sa);
 
 #include <rte_ipsec_group.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/log/rte_log.h b/lib/log/rte_log.h
index f357c59548..3735137150 100644
--- a/lib/log/rte_log.h
+++ b/lib/log/rte_log.h
@@ -13,10 +13,6 @@
  * This file provides a log API to RTE applications.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -26,6 +22,10 @@ extern "C" {
 #include <rte_common.h>
 #include <rte_config.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* SDK log type */
 #define RTE_LOGTYPE_EAL        0 /**< Log related to eal. */
 				 /* was RTE_LOGTYPE_MALLOC */
diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h
index 9c6df311cb..329dc1aad4 100644
--- a/lib/lpm/rte_lpm.h
+++ b/lib/lpm/rte_lpm.h
@@ -391,6 +391,10 @@ static inline void
 rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 	uint32_t defv);
 
+#ifdef __cplusplus
+}
+#endif
+
 #if defined(RTE_ARCH_ARM)
 #ifdef RTE_HAS_SVE_ACLE
 #include "rte_lpm_sve.h"
@@ -407,8 +411,4 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 #include "rte_lpm_scalar.h"
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_LPM_H_ */
diff --git a/lib/member/rte_member.h b/lib/member/rte_member.h
index aec192eba5..109bdd000b 100644
--- a/lib/member/rte_member.h
+++ b/lib/member/rte_member.h
@@ -54,10 +54,6 @@
 #ifndef _RTE_MEMBER_H_
 #define _RTE_MEMBER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdbool.h>
 #include <inttypes.h>
@@ -100,6 +96,10 @@ typedef uint16_t member_set_t;
 #define MEMBER_HASH_FUNC       rte_jhash
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** @internal setsummary structure. */
 struct rte_member_setsum;
 
diff --git a/lib/member/rte_member_sketch.h b/lib/member/rte_member_sketch.h
index 74f24ca223..6a8d5104dd 100644
--- a/lib/member/rte_member_sketch.h
+++ b/lib/member/rte_member_sketch.h
@@ -5,13 +5,13 @@
 #ifndef RTE_MEMBER_SKETCH_H
 #define RTE_MEMBER_SKETCH_H
 
+#include <rte_vect.h>
+#include <rte_ring_elem.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_vect.h>
-#include <rte_ring_elem.h>
-
 #define NUM_ROW_SCALAR 5
 #define INTERVAL (1 << 15)
 
diff --git a/lib/member/rte_member_sketch_avx512.h b/lib/member/rte_member_sketch_avx512.h
index 52666b5b4c..a8ef3b065e 100644
--- a/lib/member/rte_member_sketch_avx512.h
+++ b/lib/member/rte_member_sketch_avx512.h
@@ -5,14 +5,14 @@
 #ifndef RTE_MEMBER_SKETCH_AVX512_H
 #define RTE_MEMBER_SKETCH_AVX512_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_vect.h>
 #include "rte_member.h"
 #include "rte_member_sketch.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define NUM_ROW_VEC 8
 
 void
diff --git a/lib/member/rte_member_x86.h b/lib/member/rte_member_x86.h
index d115151f9f..4de453485b 100644
--- a/lib/member/rte_member_x86.h
+++ b/lib/member/rte_member_x86.h
@@ -5,12 +5,12 @@
 #ifndef _RTE_MEMBER_X86_H_
 #define _RTE_MEMBER_X86_H_
 
+#include <x86intrin.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <x86intrin.h>
-
 #if defined(__AVX2__)
 
 static inline int
diff --git a/lib/member/rte_xxh64_avx512.h b/lib/member/rte_xxh64_avx512.h
index ffe6cb79f9..58f896ebb8 100644
--- a/lib/member/rte_xxh64_avx512.h
+++ b/lib/member/rte_xxh64_avx512.h
@@ -5,13 +5,13 @@
 #ifndef RTE_XXH64_AVX512_H
 #define RTE_XXH64_AVX512_H
 
+#include <rte_common.h>
+#include <immintrin.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <immintrin.h>
-
 /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
 static const uint64_t PRIME64_1 = 0x9E3779B185EBCA87ULL;
 /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
diff --git a/lib/mempool/mempool_trace.h b/lib/mempool/mempool_trace.h
index dffef062e4..c595a3116b 100644
--- a/lib/mempool/mempool_trace.h
+++ b/lib/mempool/mempool_trace.h
@@ -11,15 +11,15 @@
  * APIs for mempool trace support
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include "rte_mempool.h"
 
 #include <rte_memzone.h>
 #include <rte_trace_point.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 RTE_TRACE_POINT(
 	rte_mempool_trace_create,
 	RTE_TRACE_POINT_ARGS(const char *name, uint32_t nb_elts,
diff --git a/lib/mempool/rte_mempool_trace_fp.h b/lib/mempool/rte_mempool_trace_fp.h
index ed060e887c..9c5cdbb291 100644
--- a/lib/mempool/rte_mempool_trace_fp.h
+++ b/lib/mempool/rte_mempool_trace_fp.h
@@ -11,12 +11,12 @@
  * Mempool fast path API for trace support
  */
 
+#include <rte_trace_point.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_trace_point.h>
-
 RTE_TRACE_POINT_FP(
 	rte_mempool_trace_ops_dequeue_bulk,
 	RTE_TRACE_POINT_ARGS(void *mempool, void **obj_table,
diff --git a/lib/meter/rte_meter.h b/lib/meter/rte_meter.h
index bd68cbe389..e72bf93b3e 100644
--- a/lib/meter/rte_meter.h
+++ b/lib/meter/rte_meter.h
@@ -6,10 +6,6 @@
 #ifndef __INCLUDE_RTE_METER_H__
 #define __INCLUDE_RTE_METER_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Traffic Metering
@@ -22,6 +18,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Application Programmer's Interface (API)
  */
diff --git a/lib/mldev/mldev_utils.h b/lib/mldev/mldev_utils.h
index 5e2a180adc..bf21067d38 100644
--- a/lib/mldev/mldev_utils.h
+++ b/lib/mldev/mldev_utils.h
@@ -5,10 +5,6 @@
 #ifndef RTE_MLDEV_UTILS_H
 #define RTE_MLDEV_UTILS_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_compat.h>
 #include <rte_mldev.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  *
diff --git a/lib/mldev/rte_mldev_core.h b/lib/mldev/rte_mldev_core.h
index b3bd281083..75d3466bc7 100644
--- a/lib/mldev/rte_mldev_core.h
+++ b/lib/mldev/rte_mldev_core.h
@@ -16,10 +16,6 @@
  * These APIs are for MLDEV PMDs and library only.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <dev_driver.h>
@@ -668,8 +664,4 @@ struct rte_ml_dev_global {
 	uint8_t max_devs;
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_MLDEV_INTERNAL_H */
diff --git a/lib/mldev/rte_mldev_pmd.h b/lib/mldev/rte_mldev_pmd.h
index fd5bbf4360..47c0f23223 100644
--- a/lib/mldev/rte_mldev_pmd.h
+++ b/lib/mldev/rte_mldev_pmd.h
@@ -14,10 +14,6 @@
  * These APIs are for MLDEV PMDs only and user applications should not call them directly.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_common.h>
@@ -25,6 +21,10 @@ extern "C" {
 #include <rte_mldev.h>
 #include <rte_mldev_core.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @internal
  *
diff --git a/lib/net/rte_dtls.h b/lib/net/rte_dtls.h
index 4f541df89c..246cd8a72d 100644
--- a/lib/net/rte_dtls.h
+++ b/lib/net/rte_dtls.h
@@ -13,10 +13,6 @@
 
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #define RTE_DTLS_TYPE_INVALID               0 /**< Invalid DTLS message type. */
 #define RTE_DTLS_TYPE_CHANGE_CIPHER_SPEC   20 /**< Change cipher spec message. */
 #define RTE_DTLS_TYPE_ALERT                21 /**< Alert message. */
@@ -54,8 +50,4 @@ struct rte_dtls_hdr {
 	rte_be16_t length;
 } __rte_packed;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_DTLS_H */
diff --git a/lib/net/rte_ecpri.h b/lib/net/rte_ecpri.h
index 137c44f3b3..19821336a6 100644
--- a/lib/net/rte_ecpri.h
+++ b/lib/net/rte_ecpri.h
@@ -17,10 +17,6 @@
 #include <stdint.h>
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /*
  * eCPRI Protocol Revision 1.0, 1.1, 1.2, 2.0: 0001b
  * Other values are reserved for future
@@ -182,8 +178,4 @@ struct rte_ecpri_combined_msg_hdr {
 	};
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_ECPRI_H_ */
diff --git a/lib/net/rte_esp.h b/lib/net/rte_esp.h
index 464c513e2b..745a9847fe 100644
--- a/lib/net/rte_esp.h
+++ b/lib/net/rte_esp.h
@@ -13,10 +13,6 @@
 
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * ESP Header
  */
@@ -33,8 +29,4 @@ struct rte_esp_tail {
 	uint8_t next_proto;  /**< IPv4 or IPv6 or next layer header */
 } __rte_packed;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_ESP_H_ */
diff --git a/lib/net/rte_ether.h b/lib/net/rte_ether.h
index 32ed515aef..403e84f50b 100644
--- a/lib/net/rte_ether.h
+++ b/lib/net/rte_ether.h
@@ -11,10 +11,6 @@
  * Ethernet Helpers in RTE
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 
@@ -22,6 +18,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_byteorder.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_ETHER_ADDR_LEN  6 /**< Length of Ethernet address. */
 #define RTE_ETHER_TYPE_LEN  2 /**< Length of Ethernet type field. */
 #define RTE_ETHER_CRC_LEN   4 /**< Length of Ethernet CRC. */
diff --git a/lib/net/rte_geneve.h b/lib/net/rte_geneve.h
index 3bbc561847..eb2c85f1e9 100644
--- a/lib/net/rte_geneve.h
+++ b/lib/net/rte_geneve.h
@@ -14,10 +14,6 @@
 
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** GENEVE default port. */
 #define RTE_GENEVE_DEFAULT_PORT 6081
 
@@ -61,8 +57,4 @@ struct rte_geneve_hdr {
 /* GENEVE ETH next protocol types */
 #define RTE_GENEVE_TYPE_ETH	0x6558 /**< Ethernet Protocol. */
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_GENEVE_H_ */
diff --git a/lib/net/rte_gre.h b/lib/net/rte_gre.h
index 8da8027b43..1483e1b42d 100644
--- a/lib/net/rte_gre.h
+++ b/lib/net/rte_gre.h
@@ -19,10 +19,6 @@
  * over an Internet Protocol network.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * GRE Header
  */
@@ -70,8 +66,4 @@ struct rte_gre_hdr_opt_sequence {
 	rte_be32_t sequence;
 } __rte_packed;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_GRE_H_ */
diff --git a/lib/net/rte_gtp.h b/lib/net/rte_gtp.h
index 9849872366..ab06e23a6e 100644
--- a/lib/net/rte_gtp.h
+++ b/lib/net/rte_gtp.h
@@ -17,10 +17,6 @@
 #include <stdint.h>
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * Simplified GTP protocol header.
  * Contains 8-bit header info, 8-bit message type,
@@ -150,8 +146,4 @@ struct rte_gtp_psc_type1_hdr {
 #define RTE_GTPC_UDP_PORT 2123 /**< GTP-C UDP destination port */
 #define RTE_GTPU_UDP_PORT 2152 /**< GTP-U UDP destination port */
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_GTP_H_ */
diff --git a/lib/net/rte_higig.h b/lib/net/rte_higig.h
index 7991f6db79..275e09a23b 100644
--- a/lib/net/rte_higig.h
+++ b/lib/net/rte_higig.h
@@ -16,10 +16,6 @@
 #include <stdint.h>
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  *
  * higig2 frc header.
@@ -144,8 +140,4 @@ struct rte_higig2_hdr {
 	};
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_HIGIG_H_ */
diff --git a/lib/net/rte_ib.h b/lib/net/rte_ib.h
index 9eab5f9e15..a551f3753f 100644
--- a/lib/net/rte_ib.h
+++ b/lib/net/rte_ib.h
@@ -17,10 +17,6 @@
 
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * InfiniBand Base Transport Header according to
  * IB Specification Vol 1-Release-1.4.
@@ -63,8 +59,4 @@ struct rte_ib_bth {
 /** RoCEv2 default port. */
 #define RTE_ROCEV2_DEFAULT_PORT 4791
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_IB_H */
diff --git a/lib/net/rte_icmp.h b/lib/net/rte_icmp.h
index 4bf64d70ad..7a33280aa1 100644
--- a/lib/net/rte_icmp.h
+++ b/lib/net/rte_icmp.h
@@ -18,10 +18,6 @@
 
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * ICMP base header
  */
@@ -59,8 +55,4 @@ struct rte_icmp_hdr {
 #define RTE_ICMP6_ECHO_REQUEST 128
 #define RTE_ICMP6_ECHO_REPLY   129
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_ICMP_H_ */
diff --git a/lib/net/rte_l2tpv2.h b/lib/net/rte_l2tpv2.h
index 9cda347d45..ac16657856 100644
--- a/lib/net/rte_l2tpv2.h
+++ b/lib/net/rte_l2tpv2.h
@@ -74,10 +74,6 @@
 #include <stdint.h>
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /*
  * L2TPv2 Message Type
  */
@@ -239,8 +235,4 @@ struct rte_l2tpv2_combined_msg_hdr {
 	};
 } __rte_packed;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_L2TPV2_H_ */
diff --git a/lib/net/rte_macsec.h b/lib/net/rte_macsec.h
index d0cd2f69ab..beeeb8effe 100644
--- a/lib/net/rte_macsec.h
+++ b/lib/net/rte_macsec.h
@@ -13,10 +13,6 @@
 
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #define RTE_MACSEC_TCI_VER_MASK	0x80 /**< Version mask for MACsec. Should be 0. */
 #define RTE_MACSEC_TCI_ES	0x40 /**< Mask for End station (ES) bit - SCI is not valid. */
 #define RTE_MACSEC_TCI_SC	0x20 /**< Mask for SCI present bit. */
@@ -56,8 +52,4 @@ struct rte_macsec_sci_hdr {
 	uint8_t sci[RTE_MACSEC_SCI_LEN]; /**< Optional secure channel ID. */
 } __rte_packed;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_MACSEC_H */
diff --git a/lib/net/rte_mpls.h b/lib/net/rte_mpls.h
index 51523e7a11..35a356efd3 100644
--- a/lib/net/rte_mpls.h
+++ b/lib/net/rte_mpls.h
@@ -14,10 +14,6 @@
 #include <stdint.h>
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * MPLS header.
  */
@@ -36,8 +32,4 @@ struct rte_mpls_hdr {
 	uint8_t  ttl;       /**< Time to live. */
 } __rte_packed;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_MPLS_H_ */
diff --git a/lib/net/rte_net.h b/lib/net/rte_net.h
index cdc6cf956d..40ad6a71a1 100644
--- a/lib/net/rte_net.h
+++ b/lib/net/rte_net.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_NET_PTYPE_H_
 #define _RTE_NET_PTYPE_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_ip.h>
 #include <rte_udp.h>
 #include <rte_tcp.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Structure containing header lengths associated to a packet, filled
  * by rte_net_get_ptype().
diff --git a/lib/net/rte_pdcp_hdr.h b/lib/net/rte_pdcp_hdr.h
index 72ae9a66cb..c22b66bf93 100644
--- a/lib/net/rte_pdcp_hdr.h
+++ b/lib/net/rte_pdcp_hdr.h
@@ -16,10 +16,6 @@
 
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * 4.3.1
  *
@@ -140,8 +136,4 @@ struct rte_pdcp_up_ctrl_pdu_hdr {
 	uint8_t bitmap[];
 } __rte_packed;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_PDCP_HDR_H */
diff --git a/lib/net/rte_ppp.h b/lib/net/rte_ppp.h
index 7b86ac4363..63c72a9392 100644
--- a/lib/net/rte_ppp.h
+++ b/lib/net/rte_ppp.h
@@ -14,10 +14,6 @@
 #include <stdint.h>
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * PPP Header
  */
@@ -27,8 +23,4 @@ struct rte_ppp_hdr {
 	rte_be16_t proto_id; /**< PPP protocol identifier(16) */
 } __rte_packed;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_PPP_H_ */
diff --git a/lib/net/rte_sctp.h b/lib/net/rte_sctp.h
index 965682dc2b..e757c57db3 100644
--- a/lib/net/rte_sctp.h
+++ b/lib/net/rte_sctp.h
@@ -14,10 +14,6 @@
 #ifndef _RTE_SCTP_H_
 #define _RTE_SCTP_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_byteorder.h>
@@ -32,8 +28,4 @@ struct rte_sctp_hdr {
 	rte_be32_t cksum;    /**< Checksum. */
 } __rte_packed;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_SCTP_H_ */
diff --git a/lib/net/rte_tcp.h b/lib/net/rte_tcp.h
index 506ac4e8ce..1bcacbf038 100644
--- a/lib/net/rte_tcp.h
+++ b/lib/net/rte_tcp.h
@@ -18,10 +18,6 @@
 
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * TCP Header
  */
@@ -49,8 +45,4 @@ struct rte_tcp_hdr {
 #define RTE_TCP_SYN_FLAG 0x02 /**< Synchronize sequence numbers */
 #define RTE_TCP_FIN_FLAG 0x01 /**< No more data from sender */
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_TCP_H_ */
diff --git a/lib/net/rte_tls.h b/lib/net/rte_tls.h
index 2eb3c6d453..595567e3e9 100644
--- a/lib/net/rte_tls.h
+++ b/lib/net/rte_tls.h
@@ -13,10 +13,6 @@
 
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #define RTE_TLS_TYPE_INVALID              0 /**< Invalid TLS message type. */
 #define RTE_TLS_TYPE_CHANGE_CIPHER_SPEC  20 /**< Change cipher spec message. */
 #define RTE_TLS_TYPE_ALERT               21 /**< Alert message. */
@@ -41,8 +37,4 @@ struct rte_tls_hdr {
 	rte_be16_t length;
 } __rte_packed;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_TLS_H */
diff --git a/lib/net/rte_udp.h b/lib/net/rte_udp.h
index 6135494c4a..c01dad9c9b 100644
--- a/lib/net/rte_udp.h
+++ b/lib/net/rte_udp.h
@@ -18,10 +18,6 @@
 
 #include <rte_byteorder.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * UDP Header
  */
@@ -32,8 +28,4 @@ struct rte_udp_hdr {
 	rte_be16_t dgram_cksum; /**< UDP datagram checksum */
 } __rte_packed;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_UDP_H_ */
diff --git a/lib/net/rte_vxlan.h b/lib/net/rte_vxlan.h
index 140c1d589b..bd1c89835e 100644
--- a/lib/net/rte_vxlan.h
+++ b/lib/net/rte_vxlan.h
@@ -16,11 +16,6 @@
 #include <rte_byteorder.h>
 #include <rte_udp.h>
 
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /** VXLAN default port. */
 #define RTE_VXLAN_DEFAULT_PORT 4789
 /** VXLAN GPE port. */
@@ -152,9 +147,4 @@ struct rte_vxlan_gpe_hdr {
 #define RTE_VXLAN_GPE_TYPE_GBP  6 /**< GBP Protocol. */
 #define RTE_VXLAN_GPE_TYPE_VBNG 7 /**< vBNG Protocol. */
 
-
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* RTE_VXLAN_H_ */
diff --git a/lib/node/rte_node_eth_api.h b/lib/node/rte_node_eth_api.h
index 143cf131b3..2b7019f6bb 100644
--- a/lib/node/rte_node_eth_api.h
+++ b/lib/node/rte_node_eth_api.h
@@ -16,15 +16,15 @@
  * and its queue associations.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include <rte_graph.h>
 #include <rte_mempool.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Port config for ethdev_rx and ethdev_tx node.
  */
diff --git a/lib/node/rte_node_ip4_api.h b/lib/node/rte_node_ip4_api.h
index 24f8ec843a..950751a525 100644
--- a/lib/node/rte_node_ip4_api.h
+++ b/lib/node/rte_node_ip4_api.h
@@ -15,15 +15,15 @@
  * This API allows to do control path functions of ip4_* nodes
  * like ip4_lookup, ip4_rewrite.
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_compat.h>
 
 #include <rte_graph.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * IP4 lookup next nodes.
  */
diff --git a/lib/node/rte_node_ip6_api.h b/lib/node/rte_node_ip6_api.h
index a538dc2ea7..f467aac7b6 100644
--- a/lib/node/rte_node_ip6_api.h
+++ b/lib/node/rte_node_ip6_api.h
@@ -15,13 +15,13 @@
  * This API allows to do control path functions of ip6_* nodes
  * like ip6_lookup, ip6_rewrite.
  */
+#include <rte_common.h>
+#include <rte_compat.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_compat.h>
-
 /**
  * IP6 lookup next nodes.
  */
diff --git a/lib/node/rte_node_udp4_input_api.h b/lib/node/rte_node_udp4_input_api.h
index c873acbbe0..694660bd6a 100644
--- a/lib/node/rte_node_udp4_input_api.h
+++ b/lib/node/rte_node_udp4_input_api.h
@@ -16,14 +16,14 @@
  * like udp4_input.
  *
  */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_common.h>
 #include <rte_compat.h>
 
 #include "rte_graph.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 /**
  * UDP4 lookup next nodes.
  */
diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h
index c26fc77209..9a50a12142 100644
--- a/lib/pci/rte_pci.h
+++ b/lib/pci/rte_pci.h
@@ -12,14 +12,14 @@
  * RTE PCI Library
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdio.h>
 #include <inttypes.h>
 #include <sys/types.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * Conventional PCI and PCI-X Mode 1 devices have 256 bytes of
  * configuration space.  PCI-X Mode 2 and PCIe devices have 4096 bytes of
diff --git a/lib/pdcp/rte_pdcp.h b/lib/pdcp/rte_pdcp.h
index f74524f83d..15fcbf9607 100644
--- a/lib/pdcp/rte_pdcp.h
+++ b/lib/pdcp/rte_pdcp.h
@@ -19,10 +19,6 @@
 #include <rte_pdcp_hdr.h>
 #include <rte_security.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /* Forward declarations. */
 struct rte_pdcp_entity;
 
@@ -373,6 +369,10 @@ rte_pdcp_t_reordering_expiry_handle(const struct rte_pdcp_entity *entity,
  */
 #include <rte_pdcp_group.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/pipeline/rte_pipeline.h b/lib/pipeline/rte_pipeline.h
index 0c7994b4f2..c9e7172453 100644
--- a/lib/pipeline/rte_pipeline.h
+++ b/lib/pipeline/rte_pipeline.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PIPELINE_H__
 #define __INCLUDE_RTE_PIPELINE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Pipeline
@@ -59,6 +55,10 @@ extern "C" {
 #include <rte_table.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_mbuf;
 
 /*
diff --git a/lib/pipeline/rte_port_in_action.h b/lib/pipeline/rte_port_in_action.h
index ec2994599f..9d17bae988 100644
--- a/lib/pipeline/rte_port_in_action.h
+++ b/lib/pipeline/rte_port_in_action.h
@@ -46,10 +46,6 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -57,6 +53,10 @@ extern "C" {
 
 #include "rte_pipeline.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Input port actions. */
 enum rte_port_in_action_type {
 	/** Filter selected input packets. */
diff --git a/lib/pipeline/rte_swx_ctl.h b/lib/pipeline/rte_swx_ctl.h
index 6ef2551ab5..c4e63753f5 100644
--- a/lib/pipeline/rte_swx_ctl.h
+++ b/lib/pipeline/rte_swx_ctl.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_CTL_H__
 #define __INCLUDE_RTE_SWX_CTL_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Pipeline Control
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_swx_port.h"
 #include "rte_swx_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct rte_swx_pipeline;
 
 /** Name size. */
diff --git a/lib/pipeline/rte_swx_extern.h b/lib/pipeline/rte_swx_extern.h
index e10e963d63..5d5b508833 100644
--- a/lib/pipeline/rte_swx_extern.h
+++ b/lib/pipeline/rte_swx_extern.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_EXTERN_H__
 #define __INCLUDE_RTE_SWX_EXTERN_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Extern objects and functions
@@ -91,8 +87,4 @@ typedef int
 typedef int
 (*rte_swx_extern_func_t)(void *mailbox);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/pipeline/rte_swx_ipsec.h b/lib/pipeline/rte_swx_ipsec.h
index 7c07fdc739..d2e5abef7d 100644
--- a/lib/pipeline/rte_swx_ipsec.h
+++ b/lib/pipeline/rte_swx_ipsec.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_IPSEC_H__
 #define __INCLUDE_RTE_SWX_IPSEC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Internet Protocol Security (IPsec)
@@ -53,6 +49,10 @@ extern "C" {
 #include <rte_compat.h>
 #include <rte_crypto_sym.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * IPsec Setup API
  */
diff --git a/lib/pipeline/rte_swx_pipeline.h b/lib/pipeline/rte_swx_pipeline.h
index 25df042d3b..882bd4bf6f 100644
--- a/lib/pipeline/rte_swx_pipeline.h
+++ b/lib/pipeline/rte_swx_pipeline.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PIPELINE_H__
 #define __INCLUDE_RTE_SWX_PIPELINE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Pipeline
@@ -22,6 +18,10 @@ extern "C" {
 #include "rte_swx_table.h"
 #include "rte_swx_extern.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Name size. */
 #ifndef RTE_SWX_NAME_SIZE
 #define RTE_SWX_NAME_SIZE 64
diff --git a/lib/pipeline/rte_swx_pipeline_spec.h b/lib/pipeline/rte_swx_pipeline_spec.h
index dd88c0bfab..077b407c0a 100644
--- a/lib/pipeline/rte_swx_pipeline_spec.h
+++ b/lib/pipeline/rte_swx_pipeline_spec.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PIPELINE_SPEC_H__
 #define __INCLUDE_RTE_SWX_PIPELINE_SPEC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <stdio.h>
 
@@ -15,6 +11,10 @@ extern "C" {
 
 #include <rte_swx_pipeline.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * extobj.
  *
diff --git a/lib/pipeline/rte_table_action.h b/lib/pipeline/rte_table_action.h
index 5dffbeb700..bab4bfd2e2 100644
--- a/lib/pipeline/rte_table_action.h
+++ b/lib/pipeline/rte_table_action.h
@@ -52,10 +52,6 @@
  * @b EXPERIMENTAL: this API may change without prior notice
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -65,6 +61,10 @@ extern "C" {
 
 #include "rte_pipeline.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Table actions. */
 enum rte_table_action_type {
 	/** Forward to next pipeline table, output port or drop. */
diff --git a/lib/port/rte_port.h b/lib/port/rte_port.h
index 0e30db371e..81f57c41a5 100644
--- a/lib/port/rte_port.h
+++ b/lib/port/rte_port.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_H__
 #define __INCLUDE_RTE_PORT_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port
@@ -224,8 +220,4 @@ struct rte_port_out_ops {
 	rte_port_out_op_stats_read f_stats;   /**< Stats */
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/port/rte_port_ethdev.h b/lib/port/rte_port_ethdev.h
index e07021cb89..7729ff0da3 100644
--- a/lib/port/rte_port_ethdev.h
+++ b/lib/port/rte_port_ethdev.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_ETHDEV_H__
 #define __INCLUDE_RTE_PORT_ETHDEV_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Ethernet Device
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ethdev_reader port parameters */
 struct rte_port_ethdev_reader_params {
 	/** NIC RX port ID */
diff --git a/lib/port/rte_port_eventdev.h b/lib/port/rte_port_eventdev.h
index 0efb8e1021..d9eccf07d4 100644
--- a/lib/port/rte_port_eventdev.h
+++ b/lib/port/rte_port_eventdev.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_EVENTDEV_H__
 #define __INCLUDE_RTE_PORT_EVENTDEV_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Eventdev Interface
@@ -24,6 +20,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Eventdev_reader port parameters */
 struct rte_port_eventdev_reader_params {
 	/** Eventdev Device ID */
diff --git a/lib/port/rte_port_fd.h b/lib/port/rte_port_fd.h
index 885b9ada22..40a5e4a426 100644
--- a/lib/port/rte_port_fd.h
+++ b/lib/port/rte_port_fd.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_FD_H__
 #define __INCLUDE_RTE_PORT_FD_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port FD Device
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** fd_reader port parameters */
 struct rte_port_fd_reader_params {
 	/** File descriptor */
diff --git a/lib/port/rte_port_frag.h b/lib/port/rte_port_frag.h
index 4055872e8d..9a10f10523 100644
--- a/lib/port/rte_port_frag.h
+++ b/lib/port/rte_port_frag.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_IP_FRAG_H__
 #define __INCLUDE_RTE_PORT_IP_FRAG_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port for IPv4 Fragmentation
@@ -31,6 +27,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ring_reader_ipv4_frag port parameters */
 struct rte_port_ring_reader_frag_params {
 	/** Underlying single consumer ring that has to be pre-initialized. */
diff --git a/lib/port/rte_port_ras.h b/lib/port/rte_port_ras.h
index 94cfb3ed92..86e36f5362 100644
--- a/lib/port/rte_port_ras.h
+++ b/lib/port/rte_port_ras.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_RAS_H__
 #define __INCLUDE_RTE_PORT_RAS_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port for IPv4 Reassembly
@@ -31,6 +27,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ring_writer_ipv4_ras port parameters */
 struct rte_port_ring_writer_ras_params {
 	/** Underlying single consumer ring that has to be pre-initialized. */
diff --git a/lib/port/rte_port_ring.h b/lib/port/rte_port_ring.h
index 027928c924..2089d0889b 100644
--- a/lib/port/rte_port_ring.h
+++ b/lib/port/rte_port_ring.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_RING_H__
 #define __INCLUDE_RTE_PORT_RING_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Ring
@@ -27,6 +23,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ring_reader port parameters */
 struct rte_port_ring_reader_params {
 	/** Underlying consumer ring that has to be pre-initialized */
diff --git a/lib/port/rte_port_sched.h b/lib/port/rte_port_sched.h
index 251380ef80..1bf08ae6a9 100644
--- a/lib/port/rte_port_sched.h
+++ b/lib/port/rte_port_sched.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_SCHED_H__
 #define __INCLUDE_RTE_PORT_SCHED_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Hierarchical Scheduler
@@ -23,6 +19,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** sched_reader port parameters */
 struct rte_port_sched_reader_params {
 	/** Underlying pre-initialized rte_sched_port */
diff --git a/lib/port/rte_port_source_sink.h b/lib/port/rte_port_source_sink.h
index bcdbaf1e40..3122dd5038 100644
--- a/lib/port/rte_port_source_sink.h
+++ b/lib/port/rte_port_source_sink.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_SOURCE_SINK_H__
 #define __INCLUDE_RTE_PORT_SOURCE_SINK_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port Source/Sink
@@ -19,6 +15,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** source port parameters */
 struct rte_port_source_params {
 	/** Pre-initialized buffer pool */
diff --git a/lib/port/rte_port_sym_crypto.h b/lib/port/rte_port_sym_crypto.h
index 6532b4388a..d03cdc1e8b 100644
--- a/lib/port/rte_port_sym_crypto.h
+++ b/lib/port/rte_port_sym_crypto.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_PORT_SYM_CRYPTO_H__
 #define __INCLUDE_RTE_PORT_SYM_CRYPTO_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Port sym crypto Interface
@@ -23,6 +19,10 @@ extern "C" {
 
 #include "rte_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Function prototype for reader post action. */
 typedef void (*rte_port_sym_crypto_reader_callback_fn)(struct rte_mbuf **pkts,
 		uint16_t n_pkts, void *arg);
diff --git a/lib/port/rte_swx_port.h b/lib/port/rte_swx_port.h
index 1dbd95ae87..99f6dea2f3 100644
--- a/lib/port/rte_swx_port.h
+++ b/lib/port/rte_swx_port.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_H__
 #define __INCLUDE_RTE_SWX_PORT_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Port
@@ -240,8 +236,4 @@ struct rte_swx_port_out_ops {
 	rte_swx_port_out_stats_read_t stats_read;
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/port/rte_swx_port_ethdev.h b/lib/port/rte_swx_port_ethdev.h
index cbc2d7b213..1828031e67 100644
--- a/lib/port/rte_swx_port_ethdev.h
+++ b/lib/port/rte_swx_port_ethdev.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_ETHDEV_H__
 #define __INCLUDE_RTE_SWX_PORT_ETHDEV_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Ethernet Device Input and Output Ports
@@ -17,6 +13,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Ethernet device input port (reader) creation parameters. */
 struct rte_swx_port_ethdev_reader_params {
 	/** Name of a valid and fully configured Ethernet device. */
diff --git a/lib/port/rte_swx_port_fd.h b/lib/port/rte_swx_port_fd.h
index e61719c8f6..63529cf0ab 100644
--- a/lib/port/rte_swx_port_fd.h
+++ b/lib/port/rte_swx_port_fd.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_FD_H__
 #define __INCLUDE_RTE_SWX_PORT_FD_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX FD Input and Output Ports
@@ -18,6 +14,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** fd_reader port parameters */
 struct rte_swx_port_fd_reader_params {
 	/** File descriptor. Must be valid and opened in non-blocking mode. */
diff --git a/lib/port/rte_swx_port_ring.h b/lib/port/rte_swx_port_ring.h
index efc485fb08..ef241c3fee 100644
--- a/lib/port/rte_swx_port_ring.h
+++ b/lib/port/rte_swx_port_ring.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_RING_H__
 #define __INCLUDE_RTE_SWX_PORT_RING_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Ring Input and Output Ports
@@ -18,6 +14,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Ring input port (reader) creation parameters. */
 struct rte_swx_port_ring_reader_params {
 	/** Name of valid RTE ring. */
diff --git a/lib/port/rte_swx_port_source_sink.h b/lib/port/rte_swx_port_source_sink.h
index 91bcbf74f4..e3ca7cfbb4 100644
--- a/lib/port/rte_swx_port_source_sink.h
+++ b/lib/port/rte_swx_port_source_sink.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_PORT_SOURCE_SINK_H__
 #define __INCLUDE_RTE_SWX_PORT_SOURCE_SINK_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Source and Sink Ports
@@ -15,6 +11,10 @@ extern "C" {
 
 #include "rte_swx_port.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum number of packets to read from the PCAP file. */
 #ifndef RTE_SWX_PORT_SOURCE_PKTS_MAX
 #define RTE_SWX_PORT_SOURCE_PKTS_MAX 1024
diff --git a/lib/rawdev/rte_rawdev.h b/lib/rawdev/rte_rawdev.h
index 640037b524..3fc471526e 100644
--- a/lib/rawdev/rte_rawdev.h
+++ b/lib/rawdev/rte_rawdev.h
@@ -14,13 +14,13 @@
  * no specific type already available in DPDK.
  */
 
+#include <rte_common.h>
+#include <rte_memory.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_common.h>
-#include <rte_memory.h>
-
 /* Rawdevice object - essentially a void to be typecast by implementation */
 typedef void *rte_rawdev_obj_t;
 
diff --git a/lib/rawdev/rte_rawdev_pmd.h b/lib/rawdev/rte_rawdev_pmd.h
index 22b406444d..408ed461a4 100644
--- a/lib/rawdev/rte_rawdev_pmd.h
+++ b/lib/rawdev/rte_rawdev_pmd.h
@@ -13,10 +13,6 @@
  * any application.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <string.h>
 
 #include <dev_driver.h>
@@ -26,6 +22,10 @@ extern "C" {
 
 #include "rte_rawdev.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern int librawdev_logtype;
 #define RTE_LOGTYPE_RAWDEV librawdev_logtype
 
diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h
index ed3dd6d3d2..550fadf56a 100644
--- a/lib/rcu/rte_rcu_qsbr.h
+++ b/lib/rcu/rte_rcu_qsbr.h
@@ -21,10 +21,6 @@
  * entered quiescent state.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <inttypes.h>
 #include <stdalign.h>
 #include <stdbool.h>
@@ -36,6 +32,10 @@ extern "C" {
 #include <rte_atomic.h>
 #include <rte_ring.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern int rte_rcu_log_type;
 #define RTE_LOGTYPE_RCU rte_rcu_log_type
 
diff --git a/lib/regexdev/rte_regexdev.h b/lib/regexdev/rte_regexdev.h
index a50b841b1e..b18a1d4251 100644
--- a/lib/regexdev/rte_regexdev.h
+++ b/lib/regexdev/rte_regexdev.h
@@ -194,10 +194,6 @@
  * - rte_regexdev_dequeue_burst()
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_compat.h>
 #include <rte_common.h>
 #include <rte_dev.h>
@@ -1428,6 +1424,10 @@ struct rte_regex_ops {
 
 #include "rte_regexdev_core.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice.
diff --git a/lib/ring/rte_ring.h b/lib/ring/rte_ring.h
index c709f30497..11ca69c73d 100644
--- a/lib/ring/rte_ring.h
+++ b/lib/ring/rte_ring.h
@@ -34,13 +34,13 @@
  * for more information.
  */
 
+#include <rte_ring_core.h>
+#include <rte_ring_elem.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_core.h>
-#include <rte_ring_elem.h>
-
 /**
  * Calculate the memory size needed for a ring
  *
diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h
index 270869d214..6cd6ce9884 100644
--- a/lib/ring/rte_ring_core.h
+++ b/lib/ring/rte_ring_core.h
@@ -19,10 +19,6 @@
  * instead.
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdalign.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -167,8 +163,4 @@ struct rte_ring {
 #define RING_F_MP_HTS_ENQ 0x0020 /**< The default enqueue is "MP HTS". */
 #define RING_F_MC_HTS_DEQ 0x0040 /**< The default dequeue is "MC HTS". */
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* _RTE_RING_CORE_H_ */
diff --git a/lib/ring/rte_ring_elem.h b/lib/ring/rte_ring_elem.h
index 7f7d4951d3..506f686884 100644
--- a/lib/ring/rte_ring_elem.h
+++ b/lib/ring/rte_ring_elem.h
@@ -16,10 +16,6 @@
  * RTE Ring with user defined element size
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_ring_core.h>
 #include <rte_ring_elem_pvt.h>
 
@@ -699,6 +695,10 @@ rte_ring_dequeue_burst_elem(struct rte_ring *r, void *obj_table,
 
 #include <rte_ring.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ring/rte_ring_hts.h b/lib/ring/rte_ring_hts.h
index 9a5938ac58..a41acea740 100644
--- a/lib/ring/rte_ring_hts.h
+++ b/lib/ring/rte_ring_hts.h
@@ -24,12 +24,12 @@
  * To achieve that 64-bit CAS is used by head update routine.
  */
 
+#include <rte_ring_hts_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_hts_elem_pvt.h>
-
 /**
  * Enqueue several objects on the HTS ring (multi-producers safe).
  *
diff --git a/lib/ring/rte_ring_peek.h b/lib/ring/rte_ring_peek.h
index c0621d12e2..2312f52668 100644
--- a/lib/ring/rte_ring_peek.h
+++ b/lib/ring/rte_ring_peek.h
@@ -43,12 +43,12 @@
  * with enqueue(/dequeue) operation till _finish_ completes.
  */
 
+#include <rte_ring_peek_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_peek_elem_pvt.h>
-
 /**
  * Start to enqueue several objects on the ring.
  * Note that no actual objects are put in the queue by this function,
diff --git a/lib/ring/rte_ring_peek_zc.h b/lib/ring/rte_ring_peek_zc.h
index 0b5e34b731..3254fe0481 100644
--- a/lib/ring/rte_ring_peek_zc.h
+++ b/lib/ring/rte_ring_peek_zc.h
@@ -67,12 +67,12 @@
  * with enqueue/dequeue operation till _finish_ completes.
  */
 
+#include <rte_ring_peek_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_peek_elem_pvt.h>
-
 /**
  * Ring zero-copy information structure.
  *
diff --git a/lib/ring/rte_ring_rts.h b/lib/ring/rte_ring_rts.h
index 50fc8f74db..d7a3863c83 100644
--- a/lib/ring/rte_ring_rts.h
+++ b/lib/ring/rte_ring_rts.h
@@ -51,12 +51,12 @@
  * By default HTD_MAX == ring.capacity / 8.
  */
 
+#include <rte_ring_rts_elem_pvt.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_ring_rts_elem_pvt.h>
-
 /**
  * Enqueue several objects on the RTS ring (multi-producers safe).
  *
diff --git a/lib/sched/rte_approx.h b/lib/sched/rte_approx.h
index b60086330e..738e33a98b 100644
--- a/lib/sched/rte_approx.h
+++ b/lib/sched/rte_approx.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_APPROX_H__
 #define __INCLUDE_RTE_APPROX_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Rational Approximation
@@ -20,6 +16,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Find best rational approximation
  *
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index 1477a47700..2a385ffdba 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -5,10 +5,6 @@
 #ifndef __RTE_PIE_H_INCLUDED__
 #define __RTE_PIE_H_INCLUDED__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * Proportional Integral controller Enhanced (PIE)
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_debug.h>
 #include <rte_cycles.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
 				     */
diff --git a/lib/sched/rte_red.h b/lib/sched/rte_red.h
index afaa35fcd6..e62abb9295 100644
--- a/lib/sched/rte_red.h
+++ b/lib/sched/rte_red.h
@@ -5,10 +5,6 @@
 #ifndef __RTE_RED_H_INCLUDED__
 #define __RTE_RED_H_INCLUDED__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Random Early Detection (RED)
@@ -20,6 +16,10 @@ extern "C" {
 #include <rte_cycles.h>
 #include <rte_branch_prediction.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_RED_SCALING                     10         /**< Fraction size for fixed-point */
 #define RTE_RED_S                           (1 << 22)  /**< Packet size multiplied by number of leaf queues */
 #define RTE_RED_MAX_TH_MAX                  1023       /**< Max threshold limit in fixed point format */
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index b882c4a882..222e6b3583 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_SCHED_H__
 #define __INCLUDE_RTE_SCHED_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Hierarchical Scheduler
@@ -62,6 +58,10 @@ extern "C" {
 #include "rte_red.h"
 #include "rte_pie.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum number of queues per pipe.
  * Note that the multiple queues (power of 2) can only be assigned to
  * lowest priority (best-effort) traffic class. Other higher priority traffic
diff --git a/lib/sched/rte_sched_common.h b/lib/sched/rte_sched_common.h
index 573d164569..a5acb9c08a 100644
--- a/lib/sched/rte_sched_common.h
+++ b/lib/sched/rte_sched_common.h
@@ -5,13 +5,13 @@
 #ifndef __INCLUDE_RTE_SCHED_COMMON_H__
 #define __INCLUDE_RTE_SCHED_COMMON_H__
 
+#include <stdint.h>
+#include <sys/types.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-#include <sys/types.h>
-
 #if 0
 static inline uint32_t
 rte_min_pos_4_u16(uint16_t *x)
diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
index 1c8474b74f..7a9bafa0fa 100644
--- a/lib/security/rte_security.h
+++ b/lib/security/rte_security.h
@@ -12,10 +12,6 @@
  * RTE Security Common Definitions
  */
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <sys/types.h>
 
 #include <rte_compat.h>
@@ -24,6 +20,10 @@ extern "C" {
 #include <rte_ip.h>
 #include <rte_mbuf_dyn.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** IPSec protocol mode */
 enum rte_security_ipsec_sa_mode {
 	RTE_SECURITY_IPSEC_SA_MODE_TRANSPORT = 1,
diff --git a/lib/security/rte_security_driver.h b/lib/security/rte_security_driver.h
index 9bb5052a4c..2ceb145066 100644
--- a/lib/security/rte_security_driver.h
+++ b/lib/security/rte_security_driver.h
@@ -12,13 +12,13 @@
  * RTE Security Common Definitions
  */
 
+#include <rte_compat.h>
+#include "rte_security.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_compat.h>
-#include "rte_security.h"
-
 /**
  * @internal
  * Security session to be used by library for internal usage
diff --git a/lib/stack/rte_stack.h b/lib/stack/rte_stack.h
index 3325757568..4439adfc42 100644
--- a/lib/stack/rte_stack.h
+++ b/lib/stack/rte_stack.h
@@ -15,10 +15,6 @@
 #ifndef _RTE_STACK_H_
 #define _RTE_STACK_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdalign.h>
 
 #include <rte_debug.h>
@@ -95,6 +91,10 @@ struct __rte_cache_aligned rte_stack {
 #include "rte_stack_std.h"
 #include "rte_stack_lf.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Push several objects on the stack (MT-safe).
  *
diff --git a/lib/table/rte_lru.h b/lib/table/rte_lru.h
index 88229d8632..28aab12923 100644
--- a/lib/table/rte_lru.h
+++ b/lib/table/rte_lru.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_LRU_H__
 #define __INCLUDE_RTE_LRU_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <rte_config.h>
 #ifdef RTE_ARCH_X86_64
 #include "rte_lru_x86.h"
@@ -86,8 +82,4 @@ do {									\
 
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/table/rte_lru_arm64.h b/lib/table/rte_lru_arm64.h
index f19b0bdb4e..f9a4678ee0 100644
--- a/lib/table/rte_lru_arm64.h
+++ b/lib/table/rte_lru_arm64.h
@@ -5,14 +5,14 @@
 #ifndef __RTE_LRU_ARM64_H__
 #define __RTE_LRU_ARM64_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_vect.h>
 #include <rte_bitops.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifndef RTE_TABLE_HASH_LRU_STRATEGY
 #ifdef __ARM_NEON
 #define RTE_TABLE_HASH_LRU_STRATEGY                        3
diff --git a/lib/table/rte_lru_x86.h b/lib/table/rte_lru_x86.h
index ddfb8c1c8c..93f4a136a8 100644
--- a/lib/table/rte_lru_x86.h
+++ b/lib/table/rte_lru_x86.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_LRU_X86_H__
 #define __INCLUDE_RTE_LRU_X86_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_config.h>
@@ -97,8 +93,4 @@ do {									\
 
 #endif
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/table/rte_swx_hash_func.h b/lib/table/rte_swx_hash_func.h
index 04f3d543e7..25a93ac60a 100644
--- a/lib/table/rte_swx_hash_func.h
+++ b/lib/table/rte_swx_hash_func.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_HASH_FUNC_H__
 #define __INCLUDE_RTE_SWX_HASH_FUNC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Hash Function
@@ -32,8 +28,4 @@ typedef uint32_t
 		       uint32_t length,
 		       uint32_t seed);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/table/rte_swx_keycmp.h b/lib/table/rte_swx_keycmp.h
index 09fb1be869..b0ed819307 100644
--- a/lib/table/rte_swx_keycmp.h
+++ b/lib/table/rte_swx_keycmp.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_KEYCMP_H__
 #define __INCLUDE_RTE_SWX_KEYCMP_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Key Comparison Functions
@@ -16,6 +12,10 @@ extern "C" {
 #include <stdint.h>
 #include <string.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Key comparison function prototype
  *
diff --git a/lib/table/rte_swx_table.h b/lib/table/rte_swx_table.h
index ac01e19781..cf7dcea8a1 100644
--- a/lib/table/rte_swx_table.h
+++ b/lib/table/rte_swx_table.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_H__
 #define __INCLUDE_RTE_SWX_TABLE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Table
@@ -314,8 +310,4 @@ struct rte_swx_table_ops {
 	rte_swx_table_free_t free;
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/table/rte_swx_table_em.h b/lib/table/rte_swx_table_em.h
index b7423dd060..592541f01f 100644
--- a/lib/table/rte_swx_table_em.h
+++ b/lib/table/rte_swx_table_em.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_EM_H__
 #define __INCLUDE_RTE_SWX_TABLE_EM_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Exact Match Table
@@ -16,6 +12,10 @@ extern "C" {
 
 #include <rte_swx_table.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Exact match table operations - unoptimized. */
 extern struct rte_swx_table_ops rte_swx_table_exact_match_unoptimized_ops;
 
diff --git a/lib/table/rte_swx_table_learner.h b/lib/table/rte_swx_table_learner.h
index c5ea015b8d..9a18be083d 100644
--- a/lib/table/rte_swx_table_learner.h
+++ b/lib/table/rte_swx_table_learner.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_LEARNER_H__
 #define __INCLUDE_RTE_SWX_TABLE_LEARNER_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Learner Table
@@ -53,6 +49,10 @@ extern "C" {
 
 #include "rte_swx_hash_func.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum number of key timeout values per learner table. */
 #ifndef RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX
 #define RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX 16
diff --git a/lib/table/rte_swx_table_selector.h b/lib/table/rte_swx_table_selector.h
index 05863cc90b..ef29bdb6b0 100644
--- a/lib/table/rte_swx_table_selector.h
+++ b/lib/table/rte_swx_table_selector.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_SELECTOR_H__
 #define __INCLUDE_RTE_SWX_TABLE_SELECTOR_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Selector Table
@@ -21,6 +17,10 @@ extern "C" {
 
 #include "rte_swx_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Selector table creation parameters. */
 struct rte_swx_table_selector_params {
 	/** Group ID offset. */
diff --git a/lib/table/rte_swx_table_wm.h b/lib/table/rte_swx_table_wm.h
index 4fd52c0a17..7eb6f8e2a6 100644
--- a/lib/table/rte_swx_table_wm.h
+++ b/lib/table/rte_swx_table_wm.h
@@ -4,10 +4,6 @@
 #ifndef __INCLUDE_RTE_SWX_TABLE_WM_H__
 #define __INCLUDE_RTE_SWX_TABLE_WM_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE SWX Wildcard Match Table
@@ -16,6 +12,10 @@ extern "C" {
 
 #include <rte_swx_table.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Wildcard match table operations. */
 extern struct rte_swx_table_ops rte_swx_table_wildcard_match_ops;
 
diff --git a/lib/table/rte_table.h b/lib/table/rte_table.h
index 9a5faf0e32..2743070b32 100644
--- a/lib/table/rte_table.h
+++ b/lib/table/rte_table.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_H__
 #define __INCLUDE_RTE_TABLE_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table
@@ -264,8 +260,4 @@ struct rte_table_ops {
 	rte_table_op_stats_read f_stats;              /**< Stats */
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/lib/table/rte_table_acl.h b/lib/table/rte_table_acl.h
index 1cb7b9fbbd..61af7b88e4 100644
--- a/lib/table/rte_table_acl.h
+++ b/lib/table/rte_table_acl.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_ACL_H__
 #define __INCLUDE_RTE_TABLE_ACL_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table ACL
@@ -25,6 +21,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** ACL table parameters */
 struct rte_table_acl_params {
 	/** Name */
diff --git a/lib/table/rte_table_array.h b/lib/table/rte_table_array.h
index fad83b0588..b2a7b95d68 100644
--- a/lib/table/rte_table_array.h
+++ b/lib/table/rte_table_array.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_ARRAY_H__
 #define __INCLUDE_RTE_TABLE_ARRAY_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Array
@@ -20,6 +16,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Array table parameters */
 struct rte_table_array_params {
 	/** Number of array entries. Has to be a power of two. */
diff --git a/lib/table/rte_table_hash.h b/lib/table/rte_table_hash.h
index 6698621dae..ff8fc9e9ce 100644
--- a/lib/table/rte_table_hash.h
+++ b/lib/table/rte_table_hash.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_HASH_H__
 #define __INCLUDE_RTE_TABLE_HASH_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Hash
@@ -52,6 +48,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Hash function */
 typedef uint64_t (*rte_table_hash_op_hash)(
 	void *key,
diff --git a/lib/table/rte_table_hash_cuckoo.h b/lib/table/rte_table_hash_cuckoo.h
index 3a55d28e9b..55aa12216a 100644
--- a/lib/table/rte_table_hash_cuckoo.h
+++ b/lib/table/rte_table_hash_cuckoo.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_HASH_CUCKOO_H__
 #define __INCLUDE_RTE_TABLE_HASH_CUCKOO_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Hash Cuckoo
@@ -20,6 +16,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Hash table parameters */
 struct rte_table_hash_cuckoo_params {
 	/** Name */
diff --git a/lib/table/rte_table_hash_func.h b/lib/table/rte_table_hash_func.h
index aa779c2182..ca56e6c885 100644
--- a/lib/table/rte_table_hash_func.h
+++ b/lib/table/rte_table_hash_func.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_HASH_FUNC_H__
 #define __INCLUDE_RTE_TABLE_HASH_FUNC_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
@@ -18,16 +14,28 @@ extern "C" {
 
 #include <x86intrin.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline uint64_t
 rte_crc32_u64(uint64_t crc, uint64_t v)
 {
 	return _mm_crc32_u64(crc, v);
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 #elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32)
 #include "rte_table_hash_func_arm64.h"
 #else
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 static inline uint64_t
 rte_crc32_u64(uint64_t crc, uint64_t v)
 {
@@ -44,6 +52,14 @@ rte_crc32_u64(uint64_t crc, uint64_t v)
 	return crc;
 }
 
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+#ifdef __cplusplus
+extern "C" {
 #endif
 
 __rte_experimental
diff --git a/lib/table/rte_table_lpm.h b/lib/table/rte_table_lpm.h
index dde32deed9..59b9bdee89 100644
--- a/lib/table/rte_table_lpm.h
+++ b/lib/table/rte_table_lpm.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_LPM_H__
 #define __INCLUDE_RTE_TABLE_LPM_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table LPM for IPv4
@@ -45,6 +41,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** LPM table parameters */
 struct rte_table_lpm_params {
 	/** Table name */
diff --git a/lib/table/rte_table_lpm_ipv6.h b/lib/table/rte_table_lpm_ipv6.h
index 96ddbd32c2..166a5ba9ee 100644
--- a/lib/table/rte_table_lpm_ipv6.h
+++ b/lib/table/rte_table_lpm_ipv6.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_LPM_IPV6_H__
 #define __INCLUDE_RTE_TABLE_LPM_IPV6_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table LPM for IPv6
@@ -45,6 +41,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_LPM_IPV6_ADDR_SIZE 16
 
 /** LPM table parameters */
diff --git a/lib/table/rte_table_stub.h b/lib/table/rte_table_stub.h
index 846526ea99..f7e589df16 100644
--- a/lib/table/rte_table_stub.h
+++ b/lib/table/rte_table_stub.h
@@ -5,10 +5,6 @@
 #ifndef __INCLUDE_RTE_TABLE_STUB_H__
 #define __INCLUDE_RTE_TABLE_STUB_H__
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  * RTE Table Stub
@@ -18,6 +14,10 @@ extern "C" {
 
 #include "rte_table.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Stub table parameters: NONE */
 
 /** Stub table operations */
diff --git a/lib/telemetry/rte_telemetry.h b/lib/telemetry/rte_telemetry.h
index cab9daa6fe..463819e2bf 100644
--- a/lib/telemetry/rte_telemetry.h
+++ b/lib/telemetry/rte_telemetry.h
@@ -5,14 +5,14 @@
 #ifndef _RTE_TELEMETRY_H_
 #define _RTE_TELEMETRY_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 #include <rte_compat.h>
 #include <rte_common.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum length for string used in object. */
 #define RTE_TEL_MAX_STRING_LEN 128
 /** Maximum length of string. */
diff --git a/lib/vhost/rte_vdpa.h b/lib/vhost/rte_vdpa.h
index 6ac85d1bbf..18e273c20f 100644
--- a/lib/vhost/rte_vdpa.h
+++ b/lib/vhost/rte_vdpa.h
@@ -5,10 +5,6 @@
 #ifndef _RTE_VDPA_H_
 #define _RTE_VDPA_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * @file
  *
@@ -17,6 +13,10 @@ extern "C" {
 
 #include <stdint.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Maximum name length for statistics counters */
 #define RTE_VDPA_STATS_NAME_SIZE 64
 
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index b0434c4b8d..c7a5f56df8 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -18,10 +18,6 @@
 #include <rte_memory.h>
 #include <rte_mempool.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifndef __cplusplus
 /* These are not C++-aware. */
 #include <linux/vhost.h>
@@ -29,6 +25,10 @@ extern "C" {
 #include <linux/virtio_net.h>
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_VHOST_USER_CLIENT		(1ULL << 0)
 #define RTE_VHOST_USER_NO_RECONNECT	(1ULL << 1)
 #define RTE_VHOST_USER_RESERVED_1	(1ULL << 2)
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 8f190dd44b..60995e4e62 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -5,15 +5,15 @@
 #ifndef _RTE_VHOST_ASYNC_H_
 #define _RTE_VHOST_ASYNC_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdint.h>
 
 #include <rte_compat.h>
 #include <rte_mbuf.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Register an async channel for a vhost queue
  *
diff --git a/lib/vhost/rte_vhost_crypto.h b/lib/vhost/rte_vhost_crypto.h
index f962a53818..af61f0907e 100644
--- a/lib/vhost/rte_vhost_crypto.h
+++ b/lib/vhost/rte_vhost_crypto.h
@@ -5,12 +5,12 @@
 #ifndef _VHOST_CRYPTO_H_
 #define _VHOST_CRYPTO_H_
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdint.h>
-
 /* pre-declare structs to avoid including full headers */
 struct rte_mempool;
 struct rte_crypto_op;
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 8db4ab9f4d..42392a0d14 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -5,10 +5,6 @@
 #ifndef _VDPA_DRIVER_H_
 #define _VDPA_DRIVER_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <stdbool.h>
 
 #include <rte_compat.h>
@@ -16,6 +12,10 @@ extern "C" {
 #include "rte_vhost.h"
 #include "rte_vdpa.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define RTE_VHOST_QUEUE_ALL UINT16_MAX
 
 /**
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v10 3/7] eal: extend bit manipulation functionality
  2024-09-19 19:31                   ` [PATCH v10 0/7] Improve EAL bit operations API Mattias Rönnblom
  2024-09-19 19:31                     ` [PATCH v10 1/7] buildtools/chkincs: relax C linkage requirement Mattias Rönnblom
  2024-09-19 19:31                     ` [PATCH v10 2/7] dpdk: use C linkage only where appropriate Mattias Rönnblom
@ 2024-09-19 19:31                     ` Mattias Rönnblom
  2024-09-19 19:31                     ` [PATCH v10 4/7] eal: add unit tests for bit operations Mattias Rönnblom
                                       ` (3 subsequent siblings)
  6 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-19 19:31 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Add functionality to test and modify the value of individual bits in
32-bit or 64-bit words.

These functions have no implications on memory ordering, atomicity and
does not use volatile and thus does not prevent any compiler
optimizations.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

PATCH v3:
 * Remove unnecessary <rte_compat.h> include.
 * Remove redundant 'fun' parameter from the __RTE_GEN_BIT_*() macros
   (Jack Bond-Preston).
 * Introduce __RTE_BIT_BIT_OPS() macro, consistent with how things
   are done when generating the atomic bit operations.
 * Refer to volatile bit op functions as variants instead of families
   (macro parameter naming).

RFC v6:
 * Have rte_bit_test() accept const-marked bitsets.

RFC v4:
 * Add rte_bit_flip() which, believe it or not, flips the value of a bit.
 * Mark macro-generated private functions as experimental.
 * Use macros to generate *assign*() functions.

RFC v3:
 * Work around lack of C++ support for _Generic (Tyler Retzlaff).
 * Fix ','-related checkpatch warnings.
---
 lib/eal/include/rte_bitops.h | 260 ++++++++++++++++++++++++++++++++++-
 1 file changed, 258 insertions(+), 2 deletions(-)

diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 449565eeae..6915b945ba 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -2,6 +2,7 @@
  * Copyright(c) 2020 Arm Limited
  * Copyright(c) 2010-2019 Intel Corporation
  * Copyright(c) 2023 Microsoft Corporation
+ * Copyright(c) 2024 Ericsson AB
  */
 
 #ifndef _RTE_BITOPS_H_
@@ -11,12 +12,14 @@
  * @file
  * Bit Operations
  *
- * This file defines a family of APIs for bit operations
- * without enforcing memory ordering.
+ * This file provides functionality for low-level, single-word
+ * arithmetic and bit-level operations, such as counting or
+ * setting individual bits.
  */
 
 #include <stdint.h>
 
+#include <rte_compat.h>
 #include <rte_debug.h>
 
 #ifdef __cplusplus
@@ -105,6 +108,197 @@ extern "C" {
 #define RTE_FIELD_GET64(mask, reg) \
 		((typeof(mask))(((reg) & (mask)) >> rte_ctz64(mask)))
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test bit in word.
+ *
+ * Generic selection macro to test the value of a bit in a 32-bit or
+ * 64-bit word. The type of operation depends on the type of the @c
+ * addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_test(addr, nr)					\
+	_Generic((addr),					\
+		uint32_t *: __rte_bit_test32,			\
+		const uint32_t *: __rte_bit_test32,		\
+		uint64_t *: __rte_bit_test64,			\
+		const uint64_t *: __rte_bit_test64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set bit in word.
+ *
+ * Generic selection macro to set a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr
+ * parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_set(addr, nr)				\
+	_Generic((addr),				\
+		 uint32_t *: __rte_bit_set32,		\
+		 uint64_t *: __rte_bit_set64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Clear bit in word.
+ *
+ * Generic selection macro to clear a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr
+ * parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_clear(addr, nr)					\
+	_Generic((addr),					\
+		 uint32_t *: __rte_bit_clear32,			\
+		 uint64_t *: __rte_bit_clear64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Assign a value to a bit in word.
+ *
+ * Generic selection macro to assign a value to a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param value
+ *   The new value of the bit - true for '1', or false for '0'.
+ */
+#define rte_bit_assign(addr, nr, value)					\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_assign32,			\
+		 uint64_t *: __rte_bit_assign64)(addr, nr, value)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Flip a bit in word.
+ *
+ * Generic selection macro to change the value of a bit to '0' if '1'
+ * or '1' if '0' in a 32-bit or 64-bit word. The type of operation
+ * depends on the type of the @c addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_flip(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_flip32,				\
+		 uint64_t *: __rte_bit_flip64)(addr, nr)
+
+#define __RTE_GEN_BIT_TEST(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_ ## variant ## test ## size(const qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		return *addr & mask;					\
+	}
+
+#define __RTE_GEN_BIT_SET(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## set ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		*addr |= mask;						\
+	}								\
+
+#define __RTE_GEN_BIT_CLEAR(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## clear ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		uint ## size ## _t mask = ~((uint ## size ## _t)1 << nr); \
+		(*addr) &= mask;					\
+	}								\
+
+#define __RTE_GEN_BIT_ASSIGN(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## assign ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr, bool value) \
+	{								\
+		if (value)						\
+			__rte_bit_ ## variant ## set ## size(addr, nr);	\
+		else							\
+			__rte_bit_ ## variant ## clear ## size(addr, nr); \
+	}
+
+#define __RTE_GEN_BIT_FLIP(variant, qualifier, size)			\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_ ## variant ## flip ## size(qualifier uint ## size ## _t *addr, \
+					    unsigned int nr)		\
+	{								\
+		bool value;						\
+									\
+		value = __rte_bit_ ## variant ## test ## size(addr, nr); \
+		__rte_bit_ ## variant ## assign ## size(addr, nr, !value); \
+	}
+
+#define __RTE_GEN_BIT_OPS(v, qualifier, size)	\
+	__RTE_GEN_BIT_TEST(v, qualifier, size)	\
+	__RTE_GEN_BIT_SET(v, qualifier, size)	\
+	__RTE_GEN_BIT_CLEAR(v, qualifier, size)	\
+	__RTE_GEN_BIT_ASSIGN(v, qualifier, size)	\
+	__RTE_GEN_BIT_FLIP(v, qualifier, size)
+
+#define __RTE_GEN_BIT_OPS_SIZE(size) \
+	__RTE_GEN_BIT_OPS(,, size)
+
+__RTE_GEN_BIT_OPS_SIZE(32)
+__RTE_GEN_BIT_OPS_SIZE(64)
+
 /*------------------------ 32-bit relaxed operations ------------------------*/
 
 /**
@@ -787,6 +981,68 @@ rte_log2_u64(uint64_t v)
 
 #ifdef __cplusplus
 }
+
+/*
+ * Since C++ doesn't support generic selection (i.e., _Generic),
+ * function overloading is used instead. Such functions must be
+ * defined outside 'extern "C"' to be accepted by the compiler.
+ */
+
+#undef rte_bit_test
+#undef rte_bit_set
+#undef rte_bit_clear
+#undef rte_bit_assign
+#undef rte_bit_flip
+
+#define __RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, size, arg1_type, arg1_name) \
+	static inline void						\
+	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
+			arg1_type arg1_name)				\
+	{								\
+		__rte_bit_ ## fun ## size(addr, arg1_name);		\
+	}
+
+#define __RTE_BIT_OVERLOAD_2(fun, qualifier, arg1_type, arg1_name)	\
+	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 32, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 64, arg1_type, arg1_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, size, ret_type, arg1_type, \
+				 arg1_name)				\
+	static inline ret_type						\
+	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
+			arg1_type arg1_name)				\
+	{								\
+		return __rte_bit_ ## fun ## size(addr, arg1_name);	\
+	}
+
+#define __RTE_BIT_OVERLOAD_2R(fun, qualifier, ret_type, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 32, ret_type, arg1_type, \
+				 arg1_name)				\
+	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 64, ret_type, arg1_type, \
+				 arg1_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, size, arg1_type, arg1_name, \
+				arg2_type, arg2_name)			\
+	static inline void						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name)				\
+	{								\
+		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name);	\
+	}
+
+#define __RTE_BIT_OVERLOAD_3(fun, qualifier, arg1_type, arg1_name, arg2_type, \
+			     arg2_name)					\
+	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 32, arg1_type, arg1_name, \
+				arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 64, arg1_type, arg1_name, \
+				arg2_type, arg2_name)
+
+__RTE_BIT_OVERLOAD_2R(test, const, bool, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(set,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(clear,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_3(assign,, unsigned int, nr, bool, value)
+__RTE_BIT_OVERLOAD_2(flip,, unsigned int, nr)
+
 #endif
 
 #endif /* _RTE_BITOPS_H_ */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v10 4/7] eal: add unit tests for bit operations
  2024-09-19 19:31                   ` [PATCH v10 0/7] Improve EAL bit operations API Mattias Rönnblom
                                       ` (2 preceding siblings ...)
  2024-09-19 19:31                     ` [PATCH v10 3/7] eal: extend bit manipulation functionality Mattias Rönnblom
@ 2024-09-19 19:31                     ` Mattias Rönnblom
  2024-09-19 19:31                     ` [PATCH v10 5/7] eal: add atomic " Mattias Rönnblom
                                       ` (2 subsequent siblings)
  6 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-19 19:31 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Extend bitops tests to cover the
rte_bit_[test|set|clear|assign|flip]()
functions.

The tests are converted to use the test suite runner framework.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

RFC v6:
 * Test rte_bit_*test() usage through const pointers.

RFC v4:
 * Remove redundant line continuations.
---
 app/test/test_bitops.c | 85 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 70 insertions(+), 15 deletions(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index 0d4ccfb468..322f58c066 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -1,13 +1,68 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2019 Arm Limited
+ * Copyright(c) 2024 Ericsson AB
  */
 
+#include <stdbool.h>
+
 #include <rte_launch.h>
 #include <rte_bitops.h>
+#include <rte_random.h>
 #include "test.h"
 
-uint32_t val32;
-uint64_t val64;
+#define GEN_TEST_BIT_ACCESS(test_name, set_fun, clear_fun, assign_fun,	\
+			    flip_fun, test_fun, size)			\
+	static int							\
+	test_name(void)							\
+	{								\
+		uint ## size ## _t reference = (uint ## size ## _t)rte_rand(); \
+		unsigned int bit_nr;					\
+		uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
+									\
+		for (bit_nr = 0; bit_nr < size; bit_nr++) {		\
+			bool reference_bit = (reference >> bit_nr) & 1;	\
+			bool assign = rte_rand() & 1;			\
+			if (assign)					\
+				assign_fun(&word, bit_nr, reference_bit); \
+			else {						\
+				if (reference_bit)			\
+					set_fun(&word, bit_nr);		\
+				else					\
+					clear_fun(&word, bit_nr);	\
+									\
+			}						\
+			TEST_ASSERT(test_fun(&word, bit_nr) == reference_bit, \
+				    "Bit %d had unexpected value", bit_nr); \
+			flip_fun(&word, bit_nr);			\
+			TEST_ASSERT(test_fun(&word, bit_nr) != reference_bit, \
+				    "Bit %d had unflipped value", bit_nr); \
+			flip_fun(&word, bit_nr);			\
+									\
+			const uint ## size ## _t *const_ptr = &word;	\
+			TEST_ASSERT(test_fun(const_ptr, bit_nr) ==	\
+				    reference_bit,			\
+				    "Bit %d had unexpected value", bit_nr); \
+		}							\
+									\
+		for (bit_nr = 0; bit_nr < size; bit_nr++) {		\
+			bool reference_bit = (reference >> bit_nr) & 1;	\
+			TEST_ASSERT(test_fun(&word, bit_nr) == reference_bit, \
+				    "Bit %d had unexpected value", bit_nr); \
+		}							\
+									\
+		TEST_ASSERT(reference == word, "Word had unexpected value"); \
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32)
+
+GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
+
+static uint32_t val32;
+static uint64_t val64;
 
 #define MAX_BITS_32 32
 #define MAX_BITS_64 64
@@ -117,22 +172,22 @@ test_bit_relaxed_test_set_clear(void)
 	return TEST_SUCCESS;
 }
 
+static struct unit_test_suite test_suite = {
+	.suite_name = "Bitops test suite",
+	.unit_test_cases = {
+		TEST_CASE(test_bit_access32),
+		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_relaxed_set),
+		TEST_CASE(test_bit_relaxed_clear),
+		TEST_CASE(test_bit_relaxed_test_set_clear),
+		TEST_CASES_END()
+	}
+};
+
 static int
 test_bitops(void)
 {
-	val32 = 0;
-	val64 = 0;
-
-	if (test_bit_relaxed_set() < 0)
-		return TEST_FAILED;
-
-	if (test_bit_relaxed_clear() < 0)
-		return TEST_FAILED;
-
-	if (test_bit_relaxed_test_set_clear() < 0)
-		return TEST_FAILED;
-
-	return TEST_SUCCESS;
+	return unit_test_suite_runner(&test_suite);
 }
 
 REGISTER_FAST_TEST(bitops_autotest, true, true, test_bitops);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v10 5/7] eal: add atomic bit operations
  2024-09-19 19:31                   ` [PATCH v10 0/7] Improve EAL bit operations API Mattias Rönnblom
                                       ` (3 preceding siblings ...)
  2024-09-19 19:31                     ` [PATCH v10 4/7] eal: add unit tests for bit operations Mattias Rönnblom
@ 2024-09-19 19:31                     ` Mattias Rönnblom
  2024-09-19 19:31                     ` [PATCH v10 6/7] eal: add unit tests for atomic bit access functions Mattias Rönnblom
  2024-09-19 19:31                     ` [PATCH v10 7/7] eal: extend bitops to handle volatile pointers Mattias Rönnblom
  6 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-19 19:31 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Add atomic bit test/set/clear/assign/flip and
test-and-set/clear/assign/flip functions.

All atomic bit functions allow (and indeed, require) the caller to
specify a memory order.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

PATCH v3:
 * Introduce __RTE_GEN_BIT_ATOMIC_*() 'qualifier' argument already in
   this patch (Jack Bond-Preston).
 * Refer to volatile bit op functions as variants instead of families
   (macro parameter naming).
 * Update release notes.

PATCH:
 * Add missing macro #undef for C++ version of atomic bit flip.

RFC v7:
 * Replace compare-exchange-based rte_bitset_atomic_test_and_*() and
   flip() with implementations that use the previous value as returned
   by the atomic fetch function.
 * Reword documentation to match the non-atomic macro variants.
 * Remove pointer to <rte_stdatomic.h> for memory model documentation,
   since there is no documentation for that API.

RFC v6:
 * Have rte_bit_atomic_test() accept const-marked bitsets.

RFC v4:
 * Add atomic bit flip.
 * Mark macro-generated private functions experimental.

RFC v3:
 * Work around lack of C++ support for _Generic (Tyler Retzlaff).

RFC v2:
 o Add rte_bit_atomic_test_and_assign() (for consistency).
 o Fix bugs in rte_bit_atomic_test_and_[set|clear]().
 o Use <rte_stdatomics.h> to support MSVC.
---
 doc/guides/rel_notes/release_24_11.rst |  17 +
 lib/eal/include/rte_bitops.h           | 415 +++++++++++++++++++++++++
 2 files changed, 432 insertions(+)

diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index 0ff70d9057..3111b1e4c0 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -56,6 +56,23 @@ New Features
      =======================================================
 
 
+* **Extended bit operations API.**
+
+  The support for bit-level operations on single 32- and 64-bit words
+  in <rte_bitops.h> has been extended with two families of
+  semantically well-defined functions.
+
+  rte_bit_[test|set|clear|assign|flip]() functions provide excellent
+  performance (by avoiding restricting the compiler and CPU), but give
+  no guarantees in regards to memory ordering or atomicity.
+
+  rte_bit_atomic_*() provides atomic bit-level operations, including
+  the possibility to specifying memory ordering constraints.
+
+  The new public API elements are polymorphic, using the _Generic-
+  based macros (for C) and function overloading (in C++ translation
+  units).
+
 Removed Items
 -------------
 
diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 6915b945ba..3ad6795fd1 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -21,6 +21,7 @@
 
 #include <rte_compat.h>
 #include <rte_debug.h>
+#include <rte_stdatomic.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -226,6 +227,204 @@ extern "C" {
 		 uint32_t *: __rte_bit_flip32,				\
 		 uint64_t *: __rte_bit_flip64)(addr, nr)
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test if a particular bit in a word is set with a particular memory
+ * order.
+ *
+ * Test a bit with the resulting memory load ordered as per the
+ * specified memory order.
+ *
+ * @param addr
+ *   A pointer to the word to query.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit is set, and false otherwise.
+ */
+#define rte_bit_atomic_test(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test32,			\
+		 const uint32_t *: __rte_bit_atomic_test32,		\
+		 uint64_t *: __rte_bit_atomic_test64,			\
+		 const uint64_t *: __rte_bit_atomic_test64)(addr, nr,	\
+							    memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically set bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in
+ * the word pointed to by @c addr to '1', with the memory ordering as
+ * specified by @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_set(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_set32,			\
+		 uint64_t *: __rte_bit_atomic_set64)(addr, nr, memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically clear bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in
+ * the word pointed to by @c addr to '0', with the memory ordering as
+ * specified by @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_clear(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_clear32,			\
+		 uint64_t *: __rte_bit_atomic_clear64)(addr, nr, memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically assign a value to bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in the
+ * word pointed to by @c addr to the value indicated by @c value, with
+ * the memory ordering as specified with @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param value
+ *   The new value of the bit - true for '1', or false for '0'.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_assign(addr, nr, value, memory_order)		\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_assign32,			\
+		 uint64_t *: __rte_bit_atomic_assign64)(addr, nr, value, \
+							memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically flip bit in word.
+ *
+ * Generic selection macro to atomically negate the value of the bit
+ * specified by @c nr in the word pointed to by @c addr to the value
+ * indicated by @c value, with the memory ordering as specified with
+ * @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_flip(addr, nr, memory_order)			\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_flip32,			\
+		 uint64_t *: __rte_bit_atomic_flip64)(addr, nr, memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test and set a bit in word.
+ *
+ * Generic selection macro to atomically test and set bit specified by
+ * @c nr in the word pointed to by @c addr to '1', with the memory
+ * ordering as specified with @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit was set, and false otherwise.
+ */
+#define rte_bit_atomic_test_and_set(addr, nr, memory_order)		\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test_and_set32,		\
+		 uint64_t *: __rte_bit_atomic_test_and_set64)(addr, nr,	\
+							      memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test and clear a bit in word.
+ *
+ * Generic selection macro to atomically test and clear bit specified
+ * by @c nr in the word pointed to by @c addr to '0', with the memory
+ * ordering as specified with @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit was set, and false otherwise.
+ */
+#define rte_bit_atomic_test_and_clear(addr, nr, memory_order)		\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test_and_clear32,		\
+		 uint64_t *: __rte_bit_atomic_test_and_clear64)(addr, nr, \
+								memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically test and assign a bit in word.
+ *
+ * Generic selection macro to atomically test and assign bit specified
+ * by @c nr in the word pointed to by @c addr the value specified by
+ * @c value, with the memory ordering as specified with @c
+ * memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param value
+ *   The new value of the bit - true for '1', or false for '0'.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit was set, and false otherwise.
+ */
+#define rte_bit_atomic_test_and_assign(addr, nr, value, memory_order)	\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_atomic_test_and_assign32,	\
+		 uint64_t *: __rte_bit_atomic_test_and_assign64)(addr, nr, \
+								 value, \
+								 memory_order)
+
 #define __RTE_GEN_BIT_TEST(variant, qualifier, size)			\
 	__rte_experimental						\
 	static inline bool						\
@@ -299,6 +498,146 @@ extern "C" {
 __RTE_GEN_BIT_OPS_SIZE(32)
 __RTE_GEN_BIT_OPS_SIZE(64)
 
+#define __RTE_GEN_BIT_ATOMIC_TEST(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test ## size(const qualifier uint ## size ## _t *addr, \
+						     unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		const qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr = \
+			(const qualifier RTE_ATOMIC(uint ## size ## _t) *)addr;	\
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		return rte_atomic_load_explicit(a_addr, memory_order) & mask; \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_SET(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_ ## variant ## set ## size(qualifier uint ## size ## _t *addr, \
+					      unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		rte_atomic_fetch_or_explicit(a_addr, mask, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_CLEAR(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_ ## variant ## clear ## size(qualifier uint ## size ## _t *addr,	\
+						unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		rte_atomic_fetch_and_explicit(a_addr, ~mask, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_FLIP(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_ ## variant ## flip ## size(qualifier uint ## size ## _t *addr, \
+					       unsigned int nr, int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		rte_atomic_fetch_xor_explicit(a_addr, mask, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_ASSIGN(variant, qualifier, size)		\
+	__rte_experimental						\
+	static inline void						\
+	__rte_bit_atomic_## variant ## assign ## size(qualifier uint ## size ## _t *addr, \
+						unsigned int nr, bool value, \
+						int memory_order)	\
+	{								\
+		if (value)						\
+			__rte_bit_atomic_ ## variant ## set ## size(addr, nr, memory_order); \
+		else							\
+			__rte_bit_atomic_ ## variant ## clear ## size(addr, nr, \
+								     memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_TEST_AND_SET(variant, qualifier, size)	\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test_and_set ## size(qualifier uint ## size ## _t *addr, \
+						       unsigned int nr,	\
+						       int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		uint ## size ## _t prev;				\
+									\
+		prev = rte_atomic_fetch_or_explicit(a_addr, mask,	\
+						    memory_order);	\
+									\
+		return prev & mask;					\
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_TEST_AND_CLEAR(variant, qualifier, size)	\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test_and_clear ## size(qualifier uint ## size ## _t *addr, \
+							 unsigned int nr, \
+							 int memory_order) \
+	{								\
+		RTE_ASSERT(nr < size);					\
+									\
+		qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =	\
+			(qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
+		uint ## size ## _t mask = (uint ## size ## _t)1 << nr;	\
+		uint ## size ## _t prev;				\
+									\
+	        prev = rte_atomic_fetch_and_explicit(a_addr, ~mask,	\
+						     memory_order);	\
+									\
+		return prev & mask;					\
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_TEST_AND_ASSIGN(variant, qualifier, size)	\
+	__rte_experimental						\
+	static inline bool						\
+	__rte_bit_atomic_ ## variant ## test_and_assign ## size(qualifier uint ## size ## _t *addr, \
+							  unsigned int nr, \
+							  bool value,	\
+							  int memory_order) \
+	{								\
+		if (value)						\
+			return __rte_bit_atomic_ ## variant ## test_and_set ## size(addr, nr, memory_order); \
+		else							\
+			return __rte_bit_atomic_ ## variant ## test_and_clear ## size(addr, nr, memory_order); \
+	}
+
+#define __RTE_GEN_BIT_ATOMIC_OPS(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_TEST(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_SET(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_CLEAR(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_ASSIGN(variant, qualifier, size)	\
+	__RTE_GEN_BIT_ATOMIC_TEST_AND_SET(variant, qualifier, size) \
+	__RTE_GEN_BIT_ATOMIC_TEST_AND_CLEAR(variant, qualifier, size) \
+	__RTE_GEN_BIT_ATOMIC_TEST_AND_ASSIGN(variant, qualifier, size) \
+	__RTE_GEN_BIT_ATOMIC_FLIP(variant, qualifier, size)
+
+#define __RTE_GEN_BIT_ATOMIC_OPS_SIZE(size) \
+	__RTE_GEN_BIT_ATOMIC_OPS(,, size)
+
+__RTE_GEN_BIT_ATOMIC_OPS_SIZE(32)
+__RTE_GEN_BIT_ATOMIC_OPS_SIZE(64)
+
 /*------------------------ 32-bit relaxed operations ------------------------*/
 
 /**
@@ -994,6 +1333,15 @@ rte_log2_u64(uint64_t v)
 #undef rte_bit_assign
 #undef rte_bit_flip
 
+#undef rte_bit_atomic_test
+#undef rte_bit_atomic_set
+#undef rte_bit_atomic_clear
+#undef rte_bit_atomic_assign
+#undef rte_bit_atomic_flip
+#undef rte_bit_atomic_test_and_set
+#undef rte_bit_atomic_test_and_clear
+#undef rte_bit_atomic_test_and_assign
+
 #define __RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, size, arg1_type, arg1_name) \
 	static inline void						\
 	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
@@ -1037,12 +1385,79 @@ rte_log2_u64(uint64_t v)
 	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 64, arg1_type, arg1_name, \
 				arg2_type, arg2_name)
 
+#define __RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, size, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name)	\
+	static inline ret_type						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name)				\
+	{								\
+		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name); \
+	}
+
+#define __RTE_BIT_OVERLOAD_3R(fun, qualifier, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 32, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name)	\
+	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 64, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, size, arg1_type, arg1_name, \
+				arg2_type, arg2_name, arg3_type, arg3_name) \
+	static inline void						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name, arg3_type arg3_name)	\
+	{								\
+		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name,	\
+					  arg3_name);		      \
+	}
+
+#define __RTE_BIT_OVERLOAD_4(fun, qualifier, arg1_type, arg1_name, arg2_type, \
+			     arg2_name, arg3_type, arg3_name)		\
+	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 32, arg1_type, arg1_name, \
+				arg2_type, arg2_name, arg3_type, arg3_name) \
+	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 64, arg1_type, arg1_name, \
+				arg2_type, arg2_name, arg3_type, arg3_name)
+
+#define __RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, size, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name, arg3_type, \
+				 arg3_name)				\
+	static inline ret_type						\
+	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
+			arg2_type arg2_name, arg3_type arg3_name)	\
+	{								\
+		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name, \
+						 arg3_name);		\
+	}
+
+#define __RTE_BIT_OVERLOAD_4R(fun, qualifier, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name, arg3_type, arg3_name) \
+	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 32, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name, arg3_type, \
+				 arg3_name)				\
+	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 64, ret_type, arg1_type, \
+				 arg1_name, arg2_type, arg2_name, arg3_type, \
+				 arg3_name)
+
 __RTE_BIT_OVERLOAD_2R(test, const, bool, unsigned int, nr)
 __RTE_BIT_OVERLOAD_2(set,, unsigned int, nr)
 __RTE_BIT_OVERLOAD_2(clear,, unsigned int, nr)
 __RTE_BIT_OVERLOAD_3(assign,, unsigned int, nr, bool, value)
 __RTE_BIT_OVERLOAD_2(flip,, unsigned int, nr)
 
+__RTE_BIT_OVERLOAD_3R(atomic_test, const, bool, unsigned int, nr,
+		      int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_set,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_clear,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_4(atomic_assign,, unsigned int, nr, bool, value,
+		     int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_flip,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3R(atomic_test_and_set,, bool, unsigned int, nr,
+		      int, memory_order)
+__RTE_BIT_OVERLOAD_3R(atomic_test_and_clear,, bool, unsigned int, nr,
+		      int, memory_order)
+__RTE_BIT_OVERLOAD_4R(atomic_test_and_assign,, bool, unsigned int, nr,
+		      bool, value, int, memory_order)
+
 #endif
 
 #endif /* _RTE_BITOPS_H_ */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v10 6/7] eal: add unit tests for atomic bit access functions
  2024-09-19 19:31                   ` [PATCH v10 0/7] Improve EAL bit operations API Mattias Rönnblom
                                       ` (4 preceding siblings ...)
  2024-09-19 19:31                     ` [PATCH v10 5/7] eal: add atomic " Mattias Rönnblom
@ 2024-09-19 19:31                     ` Mattias Rönnblom
  2024-09-19 19:31                     ` [PATCH v10 7/7] eal: extend bitops to handle volatile pointers Mattias Rönnblom
  6 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-19 19:31 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Extend bitops tests to cover the rte_bit_atomic_*() family of
functions.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

RFC v4:
 * Add atomicity test for atomic bit flip.

RFC v3:
 * Rename variable 'main' to make ICC happy.
---
 app/test/test_bitops.c | 313 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 312 insertions(+), 1 deletion(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index 322f58c066..b80216a0a1 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -3,10 +3,13 @@
  * Copyright(c) 2024 Ericsson AB
  */
 
+#include <inttypes.h>
 #include <stdbool.h>
 
-#include <rte_launch.h>
 #include <rte_bitops.h>
+#include <rte_cycles.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
 #include <rte_random.h>
 #include "test.h"
 
@@ -61,6 +64,304 @@ GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
 GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
 		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
 
+#define bit_atomic_set(addr, nr)				\
+	rte_bit_atomic_set(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_clear(addr, nr)					\
+	rte_bit_atomic_clear(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_assign(addr, nr, value)				\
+	rte_bit_atomic_assign(addr, nr, value, rte_memory_order_relaxed)
+
+#define bit_atomic_flip(addr, nr)					\
+    rte_bit_atomic_flip(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_test(addr, nr)				\
+	rte_bit_atomic_test(addr, nr, rte_memory_order_relaxed)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_access32, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 32)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_access64, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 64)
+
+#define PARALLEL_TEST_RUNTIME 0.25
+
+#define GEN_TEST_BIT_PARALLEL_ASSIGN(size)				\
+									\
+	struct parallel_access_lcore ## size				\
+	{								\
+		unsigned int bit;					\
+		uint ## size ##_t *word;				\
+		bool failed;						\
+	};								\
+									\
+	static int							\
+	run_parallel_assign ## size(void *arg)				\
+	{								\
+		struct parallel_access_lcore ## size *lcore = arg;	\
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		bool value = false;					\
+									\
+		do {							\
+			bool new_value = rte_rand() & 1;		\
+			bool use_test_and_modify = rte_rand() & 1;	\
+			bool use_assign = rte_rand() & 1;		\
+									\
+			if (rte_bit_atomic_test(lcore->word, lcore->bit, \
+						rte_memory_order_relaxed) != value) { \
+				lcore->failed = true;			\
+				break;					\
+			}						\
+									\
+			if (use_test_and_modify) {			\
+				bool old_value;				\
+				if (use_assign) 			\
+					old_value = rte_bit_atomic_test_and_assign( \
+						lcore->word, lcore->bit, new_value, \
+						rte_memory_order_relaxed); \
+				else {					\
+					old_value = new_value ?		\
+						rte_bit_atomic_test_and_set( \
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed) : \
+						rte_bit_atomic_test_and_clear( \
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+				}					\
+				if (old_value != value) {		\
+					lcore->failed = true;		\
+					break;				\
+				}					\
+			} else {					\
+				if (use_assign)				\
+					rte_bit_atomic_assign(lcore->word, lcore->bit, \
+							      new_value, \
+							      rte_memory_order_relaxed); \
+				else {					\
+					if (new_value)			\
+						rte_bit_atomic_set(	\
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+					else				\
+						rte_bit_atomic_clear(	\
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+				}					\
+			}						\
+									\
+			value = new_value;				\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_assign ## size(void)			\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		struct parallel_access_lcore ## size lmain = {		\
+			.word = &word					\
+		};							\
+		struct parallel_access_lcore ## size lworker = {	\
+			.word = &word					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		lmain.bit = rte_rand_max(size);				\
+		do {							\
+			lworker.bit = rte_rand_max(size);		\
+		} while (lworker.bit == lmain.bit);			\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_assign ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_assign ## size(&lmain);			\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		TEST_ASSERT(!lmain.failed, "Main lcore atomic access failed"); \
+		TEST_ASSERT(!lworker.failed, "Worker lcore atomic access " \
+			    "failed");					\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_ASSIGN(32)
+GEN_TEST_BIT_PARALLEL_ASSIGN(64)
+
+#define GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(size)			\
+									\
+	struct parallel_test_and_set_lcore ## size			\
+	{								\
+		uint ## size ##_t *word;				\
+		unsigned int bit;					\
+		uint64_t flips;						\
+	};								\
+									\
+	static int							\
+	run_parallel_test_and_modify ## size(void *arg)		\
+	{								\
+		struct parallel_test_and_set_lcore ## size *lcore = arg; \
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		do {							\
+			bool old_value;					\
+			bool new_value = rte_rand() & 1;		\
+			bool use_assign = rte_rand() & 1;		\
+									\
+			if (use_assign)					\
+				old_value = rte_bit_atomic_test_and_assign( \
+					lcore->word, lcore->bit, new_value, \
+					rte_memory_order_relaxed);	\
+			else						\
+				old_value = new_value ?			\
+					rte_bit_atomic_test_and_set(	\
+						lcore->word, lcore->bit, \
+						rte_memory_order_relaxed) : \
+					rte_bit_atomic_test_and_clear(	\
+						lcore->word, lcore->bit, \
+						rte_memory_order_relaxed); \
+			if (old_value != new_value)			\
+				lcore->flips++;				\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_test_and_modify ## size(void)		\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		unsigned int bit = rte_rand_max(size);			\
+		struct parallel_test_and_set_lcore ## size lmain = {	\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+		struct parallel_test_and_set_lcore ## size lworker = {	\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_test_and_modify ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_test_and_modify ## size(&lmain);		\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		uint64_t total_flips = lmain.flips + lworker.flips;	\
+		bool expected_value = total_flips % 2;			\
+									\
+		TEST_ASSERT(expected_value == rte_bit_test(&word, bit), \
+			    "After %"PRId64" flips, the bit value "	\
+			    "should be %d", total_flips, expected_value); \
+									\
+		uint64_t expected_word = 0;				\
+		rte_bit_assign(&expected_word, bit, expected_value);	\
+									\
+		TEST_ASSERT(expected_word == word, "Untouched bits have " \
+			    "changed value");				\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(32)
+GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(64)
+
+#define GEN_TEST_BIT_PARALLEL_FLIP(size)				\
+									\
+	struct parallel_flip_lcore ## size				\
+	{								\
+		uint ## size ##_t *word;				\
+		unsigned int bit;					\
+		uint64_t flips;						\
+	};								\
+									\
+	static int							\
+	run_parallel_flip ## size(void *arg)				\
+	{								\
+		struct parallel_flip_lcore ## size *lcore = arg; \
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		do {							\
+			rte_bit_atomic_flip(lcore->word, lcore->bit,	\
+					    rte_memory_order_relaxed);	\
+			lcore->flips++;					\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_flip ## size(void)			\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		unsigned int bit = rte_rand_max(size);			\
+		struct parallel_flip_lcore ## size lmain = {		\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+		struct parallel_flip_lcore ## size lworker = {		\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_flip ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_flip ## size(&lmain);			\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		uint64_t total_flips = lmain.flips + lworker.flips;	\
+		bool expected_value = total_flips % 2;			\
+									\
+		TEST_ASSERT(expected_value == rte_bit_test(&word, bit), \
+			    "After %"PRId64" flips, the bit value "	\
+			    "should be %d", total_flips, expected_value); \
+									\
+		uint64_t expected_word = 0;				\
+		rte_bit_assign(&expected_word, bit, expected_value);	\
+									\
+		TEST_ASSERT(expected_word == word, "Untouched bits have " \
+			    "changed value");				\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_FLIP(32)
+GEN_TEST_BIT_PARALLEL_FLIP(64)
+
 static uint32_t val32;
 static uint64_t val64;
 
@@ -177,6 +478,16 @@ static struct unit_test_suite test_suite = {
 	.unit_test_cases = {
 		TEST_CASE(test_bit_access32),
 		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_access32),
+		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_atomic_access32),
+		TEST_CASE(test_bit_atomic_access64),
+		TEST_CASE(test_bit_atomic_parallel_assign32),
+		TEST_CASE(test_bit_atomic_parallel_assign64),
+		TEST_CASE(test_bit_atomic_parallel_test_and_modify32),
+		TEST_CASE(test_bit_atomic_parallel_test_and_modify64),
+		TEST_CASE(test_bit_atomic_parallel_flip32),
+		TEST_CASE(test_bit_atomic_parallel_flip64),
 		TEST_CASE(test_bit_relaxed_set),
 		TEST_CASE(test_bit_relaxed_clear),
 		TEST_CASE(test_bit_relaxed_test_set_clear),
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH v10 7/7] eal: extend bitops to handle volatile pointers
  2024-09-19 19:31                   ` [PATCH v10 0/7] Improve EAL bit operations API Mattias Rönnblom
                                       ` (5 preceding siblings ...)
  2024-09-19 19:31                     ` [PATCH v10 6/7] eal: add unit tests for atomic bit access functions Mattias Rönnblom
@ 2024-09-19 19:31                     ` Mattias Rönnblom
  6 siblings, 0 replies; 63+ messages in thread
From: Mattias Rönnblom @ 2024-09-19 19:31 UTC (permalink / raw)
  To: dev
  Cc: hofors, Heng Wang, Stephen Hemminger, Tyler Retzlaff,
	Morten Brørup, Jack Bond-Preston, David Marchand,
	Chengwen Feng, Mattias Rönnblom

Have rte_bit_[test|set|clear|assign|flip]() and rte_bit_atomic_*()
handle volatile-marked pointers.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

PATCH v3:
 * Updated to reflect removed 'fun' parameter in __RTE_GEN_BIT_*()
   (Jack Bond-Preston).

PATCH v2:
 * Actually run the test_bit_atomic_v_access*() test functions.
---
 app/test/test_bitops.c       |  32 +++-
 lib/eal/include/rte_bitops.h | 301 +++++++++++++++++++++++------------
 2 files changed, 222 insertions(+), 111 deletions(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index b80216a0a1..10e87f6776 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -14,13 +14,13 @@
 #include "test.h"
 
 #define GEN_TEST_BIT_ACCESS(test_name, set_fun, clear_fun, assign_fun,	\
-			    flip_fun, test_fun, size)			\
+			    flip_fun, test_fun, size, mod)		\
 	static int							\
 	test_name(void)							\
 	{								\
 		uint ## size ## _t reference = (uint ## size ## _t)rte_rand(); \
 		unsigned int bit_nr;					\
-		uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
+		mod uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
 									\
 		for (bit_nr = 0; bit_nr < size; bit_nr++) {		\
 			bool reference_bit = (reference >> bit_nr) & 1;	\
@@ -41,7 +41,7 @@
 				    "Bit %d had unflipped value", bit_nr); \
 			flip_fun(&word, bit_nr);			\
 									\
-			const uint ## size ## _t *const_ptr = &word;	\
+			const mod uint ## size ## _t *const_ptr = &word; \
 			TEST_ASSERT(test_fun(const_ptr, bit_nr) ==	\
 				    reference_bit,			\
 				    "Bit %d had unexpected value", bit_nr); \
@@ -59,10 +59,16 @@
 	}
 
 GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
-		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32)
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32,)
 
 GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
-		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64,)
+
+GEN_TEST_BIT_ACCESS(test_bit_v_access32, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 32, volatile)
+
+GEN_TEST_BIT_ACCESS(test_bit_v_access64, rte_bit_set, rte_bit_clear,
+		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64, volatile)
 
 #define bit_atomic_set(addr, nr)				\
 	rte_bit_atomic_set(addr, nr, rte_memory_order_relaxed)
@@ -81,11 +87,19 @@ GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
 
 GEN_TEST_BIT_ACCESS(test_bit_atomic_access32, bit_atomic_set,
 		    bit_atomic_clear, bit_atomic_assign,
-		    bit_atomic_flip, bit_atomic_test, 32)
+		    bit_atomic_flip, bit_atomic_test, 32,)
 
 GEN_TEST_BIT_ACCESS(test_bit_atomic_access64, bit_atomic_set,
 		    bit_atomic_clear, bit_atomic_assign,
-		    bit_atomic_flip, bit_atomic_test, 64)
+		    bit_atomic_flip, bit_atomic_test, 64,)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_v_access32, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 32, volatile)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_v_access64, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 64, volatile)
 
 #define PARALLEL_TEST_RUNTIME 0.25
 
@@ -480,8 +494,12 @@ static struct unit_test_suite test_suite = {
 		TEST_CASE(test_bit_access64),
 		TEST_CASE(test_bit_access32),
 		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_v_access32),
+		TEST_CASE(test_bit_v_access64),
 		TEST_CASE(test_bit_atomic_access32),
 		TEST_CASE(test_bit_atomic_access64),
+		TEST_CASE(test_bit_atomic_v_access32),
+		TEST_CASE(test_bit_atomic_v_access64),
 		TEST_CASE(test_bit_atomic_parallel_assign32),
 		TEST_CASE(test_bit_atomic_parallel_assign64),
 		TEST_CASE(test_bit_atomic_parallel_test_and_modify32),
diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 3ad6795fd1..d7a07c4099 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -127,12 +127,16 @@ extern "C" {
  * @param nr
  *   The index of the bit.
  */
-#define rte_bit_test(addr, nr)					\
-	_Generic((addr),					\
-		uint32_t *: __rte_bit_test32,			\
-		const uint32_t *: __rte_bit_test32,		\
-		uint64_t *: __rte_bit_test64,			\
-		const uint64_t *: __rte_bit_test64)(addr, nr)
+#define rte_bit_test(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_test32,				\
+		 const uint32_t *: __rte_bit_test32,			\
+		 volatile uint32_t *: __rte_bit_v_test32,		\
+		 const volatile uint32_t *: __rte_bit_v_test32,		\
+		 uint64_t *: __rte_bit_test64,				\
+		 const uint64_t *: __rte_bit_test64,			\
+		 volatile uint64_t *: __rte_bit_v_test64,		\
+		 const volatile uint64_t *: __rte_bit_v_test64)(addr, nr)
 
 /**
  * @warning
@@ -152,10 +156,12 @@ extern "C" {
  * @param nr
  *   The index of the bit.
  */
-#define rte_bit_set(addr, nr)				\
-	_Generic((addr),				\
-		 uint32_t *: __rte_bit_set32,		\
-		 uint64_t *: __rte_bit_set64)(addr, nr)
+#define rte_bit_set(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_set32,				\
+		 volatile uint32_t *: __rte_bit_v_set32,		\
+		 uint64_t *: __rte_bit_set64,				\
+		 volatile uint64_t *: __rte_bit_v_set64)(addr, nr)
 
 /**
  * @warning
@@ -175,10 +181,12 @@ extern "C" {
  * @param nr
  *   The index of the bit.
  */
-#define rte_bit_clear(addr, nr)					\
-	_Generic((addr),					\
-		 uint32_t *: __rte_bit_clear32,			\
-		 uint64_t *: __rte_bit_clear64)(addr, nr)
+#define rte_bit_clear(addr, nr)						\
+	_Generic((addr),						\
+		 uint32_t *: __rte_bit_clear32,				\
+		 volatile uint32_t *: __rte_bit_v_clear32,		\
+		 uint64_t *: __rte_bit_clear64,				\
+		 volatile uint64_t *: __rte_bit_v_clear64)(addr, nr)
 
 /**
  * @warning
@@ -202,7 +210,9 @@ extern "C" {
 #define rte_bit_assign(addr, nr, value)					\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_assign32,			\
-		 uint64_t *: __rte_bit_assign64)(addr, nr, value)
+		 volatile uint32_t *: __rte_bit_v_assign32,		\
+		 uint64_t *: __rte_bit_assign64,			\
+		 volatile uint64_t *: __rte_bit_v_assign64)(addr, nr, value)
 
 /**
  * @warning
@@ -225,7 +235,9 @@ extern "C" {
 #define rte_bit_flip(addr, nr)						\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_flip32,				\
-		 uint64_t *: __rte_bit_flip64)(addr, nr)
+		 volatile uint32_t *: __rte_bit_v_flip32,		\
+		 uint64_t *: __rte_bit_flip64,				\
+		 volatile uint64_t *: __rte_bit_v_flip64)(addr, nr)
 
 /**
  * @warning
@@ -250,9 +262,13 @@ extern "C" {
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test32,			\
 		 const uint32_t *: __rte_bit_atomic_test32,		\
+		 volatile uint32_t *: __rte_bit_atomic_v_test32,	\
+		 const volatile uint32_t *: __rte_bit_atomic_v_test32,	\
 		 uint64_t *: __rte_bit_atomic_test64,			\
-		 const uint64_t *: __rte_bit_atomic_test64)(addr, nr,	\
-							    memory_order)
+		 const uint64_t *: __rte_bit_atomic_test64,		\
+		 volatile uint64_t *: __rte_bit_atomic_v_test64,	\
+		 const volatile uint64_t *: __rte_bit_atomic_v_test64) \
+						    (addr, nr, memory_order)
 
 /**
  * @warning
@@ -274,7 +290,10 @@ extern "C" {
 #define rte_bit_atomic_set(addr, nr, memory_order)			\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_set32,			\
-		 uint64_t *: __rte_bit_atomic_set64)(addr, nr, memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_set32,		\
+		 uint64_t *: __rte_bit_atomic_set64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_set64)(addr, nr, \
+								memory_order)
 
 /**
  * @warning
@@ -296,7 +315,10 @@ extern "C" {
 #define rte_bit_atomic_clear(addr, nr, memory_order)			\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_clear32,			\
-		 uint64_t *: __rte_bit_atomic_clear64)(addr, nr, memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_clear32,	\
+		 uint64_t *: __rte_bit_atomic_clear64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_clear64)(addr, nr, \
+								  memory_order)
 
 /**
  * @warning
@@ -320,8 +342,11 @@ extern "C" {
 #define rte_bit_atomic_assign(addr, nr, value, memory_order)		\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_assign32,			\
-		 uint64_t *: __rte_bit_atomic_assign64)(addr, nr, value, \
-							memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_assign32,	\
+		 uint64_t *: __rte_bit_atomic_assign64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_assign64)(addr, nr, \
+								   value, \
+								   memory_order)
 
 /**
  * @warning
@@ -344,7 +369,10 @@ extern "C" {
 #define rte_bit_atomic_flip(addr, nr, memory_order)			\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_flip32,			\
-		 uint64_t *: __rte_bit_atomic_flip64)(addr, nr, memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_flip32,	\
+		 uint64_t *: __rte_bit_atomic_flip64,			\
+		 volatile uint64_t *: __rte_bit_atomic_v_flip64)(addr, nr, \
+								 memory_order)
 
 /**
  * @warning
@@ -368,8 +396,10 @@ extern "C" {
 #define rte_bit_atomic_test_and_set(addr, nr, memory_order)		\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test_and_set32,		\
-		 uint64_t *: __rte_bit_atomic_test_and_set64)(addr, nr,	\
-							      memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_test_and_set32, \
+		 uint64_t *: __rte_bit_atomic_test_and_set64,		\
+		 volatile uint64_t *: __rte_bit_atomic_v_test_and_set64) \
+						    (addr, nr, memory_order)
 
 /**
  * @warning
@@ -393,8 +423,10 @@ extern "C" {
 #define rte_bit_atomic_test_and_clear(addr, nr, memory_order)		\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test_and_clear32,		\
-		 uint64_t *: __rte_bit_atomic_test_and_clear64)(addr, nr, \
-								memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_test_and_clear32, \
+		 uint64_t *: __rte_bit_atomic_test_and_clear64,		\
+		 volatile uint64_t *: __rte_bit_atomic_v_test_and_clear64) \
+						       (addr, nr, memory_order)
 
 /**
  * @warning
@@ -421,9 +453,10 @@ extern "C" {
 #define rte_bit_atomic_test_and_assign(addr, nr, value, memory_order)	\
 	_Generic((addr),						\
 		 uint32_t *: __rte_bit_atomic_test_and_assign32,	\
-		 uint64_t *: __rte_bit_atomic_test_and_assign64)(addr, nr, \
-								 value, \
-								 memory_order)
+		 volatile uint32_t *: __rte_bit_atomic_v_test_and_assign32, \
+		 uint64_t *: __rte_bit_atomic_test_and_assign64,	\
+		 volatile uint64_t *: __rte_bit_atomic_v_test_and_assign64) \
+						(addr, nr, value, memory_order)
 
 #define __RTE_GEN_BIT_TEST(variant, qualifier, size)			\
 	__rte_experimental						\
@@ -493,7 +526,8 @@ extern "C" {
 	__RTE_GEN_BIT_FLIP(v, qualifier, size)
 
 #define __RTE_GEN_BIT_OPS_SIZE(size) \
-	__RTE_GEN_BIT_OPS(,, size)
+	__RTE_GEN_BIT_OPS(,, size) \
+	__RTE_GEN_BIT_OPS(v_, volatile, size)
 
 __RTE_GEN_BIT_OPS_SIZE(32)
 __RTE_GEN_BIT_OPS_SIZE(64)
@@ -633,7 +667,8 @@ __RTE_GEN_BIT_OPS_SIZE(64)
 	__RTE_GEN_BIT_ATOMIC_FLIP(variant, qualifier, size)
 
 #define __RTE_GEN_BIT_ATOMIC_OPS_SIZE(size) \
-	__RTE_GEN_BIT_ATOMIC_OPS(,, size)
+	__RTE_GEN_BIT_ATOMIC_OPS(,, size) \
+	__RTE_GEN_BIT_ATOMIC_OPS(v_, volatile, size)
 
 __RTE_GEN_BIT_ATOMIC_OPS_SIZE(32)
 __RTE_GEN_BIT_ATOMIC_OPS_SIZE(64)
@@ -1342,120 +1377,178 @@ rte_log2_u64(uint64_t v)
 #undef rte_bit_atomic_test_and_clear
 #undef rte_bit_atomic_test_and_assign
 
-#define __RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, size, arg1_type, arg1_name) \
+#define __RTE_BIT_OVERLOAD_V_2(family, v, fun, c, size, arg1_type, arg1_name) \
 	static inline void						\
-	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
-			arg1_type arg1_name)				\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name)			\
 	{								\
-		__rte_bit_ ## fun ## size(addr, arg1_name);		\
+		__rte_bit_ ## family ## v ## fun ## size(addr, arg1_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_2(fun, qualifier, arg1_type, arg1_name)	\
-	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 32, arg1_type, arg1_name) \
-	__RTE_BIT_OVERLOAD_SZ_2(fun, qualifier, 64, arg1_type, arg1_name)
+#define __RTE_BIT_OVERLOAD_SZ_2(family, fun, c, size, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_V_2(family,, fun, c, size, arg1_type,	\
+			       arg1_name)				\
+	__RTE_BIT_OVERLOAD_V_2(family, v_, fun, c volatile, size, \
+			       arg1_type, arg1_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, size, ret_type, arg1_type, \
-				 arg1_name)				\
+#define __RTE_BIT_OVERLOAD_2(family, fun, c, arg1_type, arg1_name)	\
+	__RTE_BIT_OVERLOAD_SZ_2(family, fun, c, 32, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2(family, fun, c, 64, arg1_type, arg1_name)
+
+#define __RTE_BIT_OVERLOAD_V_2R(family, v, fun, c, size, ret_type, arg1_type, \
+				arg1_name)				\
 	static inline ret_type						\
-	rte_bit_ ## fun(qualifier uint ## size ## _t *addr,		\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
 			arg1_type arg1_name)				\
 	{								\
-		return __rte_bit_ ## fun ## size(addr, arg1_name);	\
+		return __rte_bit_ ## family ## v ## fun ## size(addr,	\
+								arg1_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_2R(fun, qualifier, ret_type, arg1_type, arg1_name) \
-	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 32, ret_type, arg1_type, \
+#define __RTE_BIT_OVERLOAD_SZ_2R(family, fun, c, size, ret_type, arg1_type, \
+				 arg1_name)				\
+	__RTE_BIT_OVERLOAD_V_2R(family,, fun, c, size, ret_type, arg1_type, \
+				arg1_name)				\
+	__RTE_BIT_OVERLOAD_V_2R(family, v_, fun, c volatile,		\
+				size, ret_type, arg1_type, arg1_name)
+
+#define __RTE_BIT_OVERLOAD_2R(family, fun, c, ret_type, arg1_type, arg1_name) \
+	__RTE_BIT_OVERLOAD_SZ_2R(family, fun, c, 32, ret_type, arg1_type, \
 				 arg1_name)				\
-	__RTE_BIT_OVERLOAD_SZ_2R(fun, qualifier, 64, ret_type, arg1_type, \
+	__RTE_BIT_OVERLOAD_SZ_2R(family, fun, c, 64, ret_type, arg1_type, \
 				 arg1_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, size, arg1_type, arg1_name, \
-				arg2_type, arg2_name)			\
+#define __RTE_BIT_OVERLOAD_V_3(family, v, fun, c, size, arg1_type, arg1_name, \
+			       arg2_type, arg2_name)			\
 	static inline void						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name)				\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name) \
 	{								\
-		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name);	\
+		__rte_bit_ ## family ## v ## fun ## size(addr, arg1_name, \
+							 arg2_name);	\
 	}
 
-#define __RTE_BIT_OVERLOAD_3(fun, qualifier, arg1_type, arg1_name, arg2_type, \
+#define __RTE_BIT_OVERLOAD_SZ_3(family, fun, c, size, arg1_type, arg1_name, \
+				arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_V_3(family,, fun, c, size, arg1_type, arg1_name, \
+			       arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_V_3(family, v_, fun, c volatile, size, arg1_type, \
+			       arg1_name, arg2_type, arg2_name)
+
+#define __RTE_BIT_OVERLOAD_3(family, fun, c, arg1_type, arg1_name, arg2_type, \
 			     arg2_name)					\
-	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 32, arg1_type, arg1_name, \
+	__RTE_BIT_OVERLOAD_SZ_3(family, fun, c, 32, arg1_type, arg1_name, \
 				arg2_type, arg2_name)			\
-	__RTE_BIT_OVERLOAD_SZ_3(fun, qualifier, 64, arg1_type, arg1_name, \
+	__RTE_BIT_OVERLOAD_SZ_3(family, fun, c, 64, arg1_type, arg1_name, \
 				arg2_type, arg2_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, size, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name)	\
+#define __RTE_BIT_OVERLOAD_V_3R(family, v, fun, c, size, ret_type, arg1_type, \
+				arg1_name, arg2_type, arg2_name)	\
 	static inline ret_type						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name)				\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name) \
 	{								\
-		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name); \
+		return __rte_bit_ ## family ## v ## fun ## size(addr,	\
+								arg1_name, \
+								arg2_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_3R(fun, qualifier, ret_type, arg1_type, arg1_name, \
-			      arg2_type, arg2_name)			\
-	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 32, ret_type, arg1_type, \
+#define __RTE_BIT_OVERLOAD_SZ_3R(family, fun, c, size, ret_type, arg1_type, \
 				 arg1_name, arg2_type, arg2_name)	\
-	__RTE_BIT_OVERLOAD_SZ_3R(fun, qualifier, 64, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name)
+	__RTE_BIT_OVERLOAD_V_3R(family,, fun, c, size, ret_type, \
+				arg1_type, arg1_name, arg2_type, arg2_name) \
+	__RTE_BIT_OVERLOAD_V_3R(family, v_, fun, c volatile, size, \
+				ret_type, arg1_type, arg1_name, arg2_type, \
+				arg2_name)
 
-#define __RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, size, arg1_type, arg1_name, \
-				arg2_type, arg2_name, arg3_type, arg3_name) \
+#define __RTE_BIT_OVERLOAD_3R(family, fun, c, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name)			\
+	__RTE_BIT_OVERLOAD_SZ_3R(family, fun, c, 32, ret_type,		\
+				 arg1_type, arg1_name, arg2_type, arg2_name) \
+	__RTE_BIT_OVERLOAD_SZ_3R(family, fun, c, 64, ret_type, \
+				 arg1_type, arg1_name, arg2_type, arg2_name)
+
+#define __RTE_BIT_OVERLOAD_V_4(family, v, fun, c, size, arg1_type, arg1_name, \
+			       arg2_type, arg2_name, arg3_type,	arg3_name) \
 	static inline void						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name, arg3_type arg3_name)	\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name, \
+				  arg3_type arg3_name)			\
 	{								\
-		__rte_bit_ ## fun ## size(addr, arg1_name, arg2_name,	\
-					  arg3_name);		      \
+		__rte_bit_ ## family ## v ## fun ## size(addr, arg1_name, \
+							 arg2_name,	\
+							 arg3_name);	\
 	}
 
-#define __RTE_BIT_OVERLOAD_4(fun, qualifier, arg1_type, arg1_name, arg2_type, \
-			     arg2_name, arg3_type, arg3_name)		\
-	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 32, arg1_type, arg1_name, \
+#define __RTE_BIT_OVERLOAD_SZ_4(family, fun, c, size, arg1_type, arg1_name, \
 				arg2_type, arg2_name, arg3_type, arg3_name) \
-	__RTE_BIT_OVERLOAD_SZ_4(fun, qualifier, 64, arg1_type, arg1_name, \
-				arg2_type, arg2_name, arg3_type, arg3_name)
-
-#define __RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, size, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name, arg3_type, \
-				 arg3_name)				\
+	__RTE_BIT_OVERLOAD_V_4(family,, fun, c, size, arg1_type,	\
+			       arg1_name, arg2_type, arg2_name, arg3_type, \
+			       arg3_name)				\
+	__RTE_BIT_OVERLOAD_V_4(family, v_, fun, c volatile, size,	\
+			       arg1_type, arg1_name, arg2_type, arg2_name, \
+			       arg3_type, arg3_name)
+
+#define __RTE_BIT_OVERLOAD_4(family, fun, c, arg1_type, arg1_name, arg2_type, \
+			     arg2_name, arg3_type, arg3_name)		\
+	__RTE_BIT_OVERLOAD_SZ_4(family, fun, c, 32, arg1_type,		\
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)				\
+	__RTE_BIT_OVERLOAD_SZ_4(family, fun, c, 64, arg1_type,		\
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)
+
+#define __RTE_BIT_OVERLOAD_V_4R(family, v, fun, c, size, ret_type, arg1_type, \
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)				\
 	static inline ret_type						\
-	rte_bit_ ## fun(uint ## size ## _t *addr, arg1_type arg1_name,	\
-			arg2_type arg2_name, arg3_type arg3_name)	\
+	rte_bit_ ## family ## fun(c uint ## size ## _t *addr,		\
+				  arg1_type arg1_name, arg2_type arg2_name, \
+				  arg3_type arg3_name)			\
 	{								\
-		return __rte_bit_ ## fun ## size(addr, arg1_name, arg2_name, \
-						 arg3_name);		\
+		return __rte_bit_ ## family ## v ## fun ## size(addr,	\
+								arg1_name, \
+								arg2_name, \
+								arg3_name); \
 	}
 
-#define __RTE_BIT_OVERLOAD_4R(fun, qualifier, ret_type, arg1_type, arg1_name, \
-			      arg2_type, arg2_name, arg3_type, arg3_name) \
-	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 32, ret_type, arg1_type, \
+#define __RTE_BIT_OVERLOAD_SZ_4R(family, fun, c, size, ret_type, arg1_type, \
 				 arg1_name, arg2_type, arg2_name, arg3_type, \
 				 arg3_name)				\
-	__RTE_BIT_OVERLOAD_SZ_4R(fun, qualifier, 64, ret_type, arg1_type, \
-				 arg1_name, arg2_type, arg2_name, arg3_type, \
-				 arg3_name)
-
-__RTE_BIT_OVERLOAD_2R(test, const, bool, unsigned int, nr)
-__RTE_BIT_OVERLOAD_2(set,, unsigned int, nr)
-__RTE_BIT_OVERLOAD_2(clear,, unsigned int, nr)
-__RTE_BIT_OVERLOAD_3(assign,, unsigned int, nr, bool, value)
-__RTE_BIT_OVERLOAD_2(flip,, unsigned int, nr)
-
-__RTE_BIT_OVERLOAD_3R(atomic_test, const, bool, unsigned int, nr,
+	__RTE_BIT_OVERLOAD_V_4R(family,, fun, c, size, ret_type, arg1_type, \
+				arg1_name, arg2_type, arg2_name, arg3_type, \
+				arg3_name)				\
+	__RTE_BIT_OVERLOAD_V_4R(family, v_, fun, c volatile, size,	\
+				ret_type, arg1_type, arg1_name, arg2_type, \
+				arg2_name, arg3_type, arg3_name)
+
+#define __RTE_BIT_OVERLOAD_4R(family, fun, c, ret_type, arg1_type, arg1_name, \
+			      arg2_type, arg2_name, arg3_type, arg3_name) \
+	__RTE_BIT_OVERLOAD_SZ_4R(family, fun, c, 32, ret_type,		\
+				 arg1_type, arg1_name, arg2_type, arg2_name, \
+				 arg3_type, arg3_name)			\
+	__RTE_BIT_OVERLOAD_SZ_4R(family, fun, c, 64, ret_type,		\
+				 arg1_type, arg1_name, arg2_type, arg2_name, \
+				 arg3_type, arg3_name)
+
+__RTE_BIT_OVERLOAD_2R(, test, const, bool, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(, set,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_2(, clear,, unsigned int, nr)
+__RTE_BIT_OVERLOAD_3(, assign,, unsigned int, nr, bool, value)
+__RTE_BIT_OVERLOAD_2(, flip,, unsigned int, nr)
+
+__RTE_BIT_OVERLOAD_3R(atomic_, test, const, bool, unsigned int, nr,
 		      int, memory_order)
-__RTE_BIT_OVERLOAD_3(atomic_set,, unsigned int, nr, int, memory_order)
-__RTE_BIT_OVERLOAD_3(atomic_clear,, unsigned int, nr, int, memory_order)
-__RTE_BIT_OVERLOAD_4(atomic_assign,, unsigned int, nr, bool, value,
+__RTE_BIT_OVERLOAD_3(atomic_, set,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3(atomic_, clear,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_4(atomic_, assign,, unsigned int, nr, bool, value,
 		     int, memory_order)
-__RTE_BIT_OVERLOAD_3(atomic_flip,, unsigned int, nr, int, memory_order)
-__RTE_BIT_OVERLOAD_3R(atomic_test_and_set,, bool, unsigned int, nr,
+__RTE_BIT_OVERLOAD_3(atomic_, flip,, unsigned int, nr, int, memory_order)
+__RTE_BIT_OVERLOAD_3R(atomic_, test_and_set,, bool, unsigned int, nr,
 		      int, memory_order)
-__RTE_BIT_OVERLOAD_3R(atomic_test_and_clear,, bool, unsigned int, nr,
+__RTE_BIT_OVERLOAD_3R(atomic_, test_and_clear,, bool, unsigned int, nr,
 		      int, memory_order)
-__RTE_BIT_OVERLOAD_4R(atomic_test_and_assign,, bool, unsigned int, nr,
+__RTE_BIT_OVERLOAD_4R(atomic_, test_and_assign,, bool, unsigned int, nr,
 		      bool, value, int, memory_order)
 
 #endif
-- 
2.34.1


^ permalink raw reply	[flat|nested] 63+ messages in thread

end of thread, other threads:[~2024-09-19 19:41 UTC | newest]

Thread overview: 63+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-31 13:13 [RFC v3] eal: add bitset type Mattias Rönnblom
2024-01-31 16:02 ` Stephen Hemminger
2024-01-31 16:28   ` Mattias Rönnblom
2024-01-31 16:06 ` Stephen Hemminger
2024-01-31 18:45   ` Mattias Rönnblom
2024-02-01  8:04     ` Morten Brørup
2024-02-02 10:19       ` Mattias Rönnblom
2024-02-02 12:42         ` Morten Brørup
2024-02-16 10:23 ` [RFC v4 1/4] " Mattias Rönnblom
2024-02-16 10:23   ` [RFC v4 2/4] eal: add bitset test suite Mattias Rönnblom
2024-02-16 10:23   ` [RFC v4 3/4] service: use multi-word bitset to represent service flags Mattias Rönnblom
2024-02-16 10:23   ` [RFC v4 4/4] event/dsw: optimize serving port logic Mattias Rönnblom
2024-05-05  7:33   ` [RFC v5 1/6] eal: add bitset type Mattias Rönnblom
2024-05-05  7:33     ` [RFC v5 2/6] eal: add bitset test suite Mattias Rönnblom
2024-05-05  7:33     ` [RFC v5 3/6] eal: add atomic bitset functions Mattias Rönnblom
2024-05-05  7:33     ` [RFC v5 4/6] eal: add unit tests for atomic bitset operations Mattias Rönnblom
2024-05-05  7:33     ` [RFC v5 5/6] service: use multi-word bitset to represent service flags Mattias Rönnblom
2024-05-05  7:33     ` [RFC v5 6/6] event/dsw: optimize serving port logic Mattias Rönnblom
2024-08-09 20:14     ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
2024-08-09 20:14       ` [PATCH 2/6] eal: add bitset test suite Mattias Rönnblom
2024-09-12  4:51         ` Tyler Retzlaff
2024-08-09 20:14       ` [PATCH 3/6] eal: add atomic bitset functions Mattias Rönnblom
2024-09-12  4:51         ` Tyler Retzlaff
2024-08-09 20:14       ` [PATCH 4/6] eal: add unit tests for atomic bitset operations Mattias Rönnblom
2024-09-12  4:52         ` Tyler Retzlaff
2024-08-09 20:14       ` [PATCH 5/6] service: use multi-word bitset to represent service flags Mattias Rönnblom
2024-09-12  4:52         ` Tyler Retzlaff
2024-08-09 20:14       ` [PATCH 6/6] event/dsw: add support for larger port count Mattias Rönnblom
2024-09-12  4:53         ` Tyler Retzlaff
2024-08-20 17:09       ` [PATCH 1/6] eal: add bitset type Mattias Rönnblom
2024-09-02 13:55       ` Morten Brørup
2024-09-02 14:46         ` Mattias Rönnblom
2024-09-02 14:49         ` Mattias Rönnblom
2024-09-12  4:51       ` Tyler Retzlaff
2024-09-17  9:36       ` [PATCH v7 0/6] Improve EAL bit operations API Mattias Rönnblom
2024-09-17  9:36         ` [PATCH v7 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
2024-09-17 10:48           ` [PATCH v8 0/6] Improve EAL bit operations API Mattias Rönnblom
2024-09-17 10:48             ` [PATCH v8 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
2024-09-18  9:04               ` [PATCH v9 0/6] Improve EAL bit operations API Mattias Rönnblom
2024-09-18  9:04                 ` [PATCH v9 1/6] dpdk: do not force C linkage on include file dependencies Mattias Rönnblom
2024-09-19 19:31                   ` [PATCH v10 0/7] Improve EAL bit operations API Mattias Rönnblom
2024-09-19 19:31                     ` [PATCH v10 1/7] buildtools/chkincs: relax C linkage requirement Mattias Rönnblom
2024-09-19 19:31                     ` [PATCH v10 2/7] dpdk: use C linkage only where appropriate Mattias Rönnblom
2024-09-19 19:31                     ` [PATCH v10 3/7] eal: extend bit manipulation functionality Mattias Rönnblom
2024-09-19 19:31                     ` [PATCH v10 4/7] eal: add unit tests for bit operations Mattias Rönnblom
2024-09-19 19:31                     ` [PATCH v10 5/7] eal: add atomic " Mattias Rönnblom
2024-09-19 19:31                     ` [PATCH v10 6/7] eal: add unit tests for atomic bit access functions Mattias Rönnblom
2024-09-19 19:31                     ` [PATCH v10 7/7] eal: extend bitops to handle volatile pointers Mattias Rönnblom
2024-09-18  9:04                 ` [PATCH v9 2/6] eal: extend bit manipulation functionality Mattias Rönnblom
2024-09-18  9:04                 ` [PATCH v9 3/6] eal: add unit tests for bit operations Mattias Rönnblom
2024-09-18  9:04                 ` [PATCH v9 4/6] eal: add atomic " Mattias Rönnblom
2024-09-18  9:04                 ` [PATCH v9 5/6] eal: add unit tests for atomic bit access functions Mattias Rönnblom
2024-09-18  9:04                 ` [PATCH v9 6/6] eal: extend bitops to handle volatile pointers Mattias Rönnblom
2024-09-17 10:48             ` [PATCH v8 2/6] eal: extend bit manipulation functionality Mattias Rönnblom
2024-09-17 10:48             ` [PATCH v8 3/6] eal: add unit tests for bit operations Mattias Rönnblom
2024-09-17 10:48             ` [PATCH v8 4/6] eal: add atomic " Mattias Rönnblom
2024-09-17 10:48             ` [PATCH v8 5/6] eal: add unit tests for atomic bit access functions Mattias Rönnblom
2024-09-17 10:48             ` [PATCH v8 6/6] eal: extend bitops to handle volatile pointers Mattias Rönnblom
2024-09-17  9:36         ` [PATCH v7 2/6] eal: extend bit manipulation functionality Mattias Rönnblom
2024-09-17  9:36         ` [PATCH v7 3/6] eal: add unit tests for bit operations Mattias Rönnblom
2024-09-17  9:36         ` [PATCH v7 4/6] eal: add atomic " Mattias Rönnblom
2024-09-17  9:36         ` [PATCH v7 5/6] eal: add unit tests for atomic bit access functions Mattias Rönnblom
2024-09-17  9:36         ` [PATCH v7 6/6] eal: extend bitops to handle volatile pointers Mattias Rönnblom

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).