DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH 0/5] use abstracted bit count functions
@ 2023-11-02  1:05 Tyler Retzlaff
  2023-11-02  1:05 ` [PATCH 1/5] table: " Tyler Retzlaff
                   ` (7 more replies)
  0 siblings, 8 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-02  1:05 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang

The first set of conversions missed the long 'l' versions of the
builtins that were being used. This series completes the conversion
of remaining libraries from __builtin_ctzl and __builtin_clzl.

Tyler Retzlaff (5):
  table: use abstracted bit count functions
  distributor: use abstracted bit count functions
  hash: use abstracted bit count functions
  member: use abstracted bit count functions
  rcu: use abstracted bit count functions

 lib/distributor/rte_distributor_single.c |  2 +-
 lib/hash/rte_cuckoo_hash.c               | 16 ++++++++--------
 lib/member/rte_member_vbf.c              | 12 ++++++------
 lib/member/rte_member_x86.h              |  6 +++---
 lib/rcu/rte_rcu_qsbr.c                   |  4 ++--
 lib/rcu/rte_rcu_qsbr.h                   |  2 +-
 lib/table/rte_lru_arm64.h                |  2 +-
 lib/table/rte_swx_table_em.c             |  4 ++--
 lib/table/rte_table_hash_ext.c           |  4 ++--
 lib/table/rte_table_hash_lru.c           |  4 ++--
 10 files changed, 28 insertions(+), 28 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/5] table: use abstracted bit count functions
  2023-11-02  1:05 [PATCH 0/5] use abstracted bit count functions Tyler Retzlaff
@ 2023-11-02  1:05 ` Tyler Retzlaff
  2023-11-02  1:05 ` [PATCH 2/5] distributor: " Tyler Retzlaff
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-02  1:05 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang

Use rte_clz64 instead of __builtin_clzl
Use rte_ctz64 instead of __builtin_ctzl

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/table/rte_lru_arm64.h      | 2 +-
 lib/table/rte_swx_table_em.c   | 4 ++--
 lib/table/rte_table_hash_ext.c | 4 ++--
 lib/table/rte_table_hash_lru.c | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/table/rte_lru_arm64.h b/lib/table/rte_lru_arm64.h
index add889a..ddfd841 100644
--- a/lib/table/rte_lru_arm64.h
+++ b/lib/table/rte_lru_arm64.h
@@ -33,7 +33,7 @@
 	uint16x4_t min_vec = vmov_n_u16(vminv_u16(lru_vec));
 	uint64_t mask = vget_lane_u64(vreinterpret_u64_u16(
 			vceq_u16(min_vec, lru_vec)), 0);
-	return __builtin_clzl(mask) >> 4;
+	return rte_clz64(mask) >> 4;
 }
 #define lru_pos(bucket) f_lru_pos(bucket->lru_list)
 
diff --git a/lib/table/rte_swx_table_em.c b/lib/table/rte_swx_table_em.c
index 84837c8..8d67c05 100644
--- a/lib/table/rte_swx_table_em.c
+++ b/lib/table/rte_swx_table_em.c
@@ -260,8 +260,8 @@ struct table {
 	if (!params->hash_func)
 		t->params.hash_func = rte_hash_crc;
 
-	t->key_size_shl = __builtin_ctzl(key_size);
-	t->data_size_shl = __builtin_ctzl(key_data_size);
+	t->key_size_shl = rte_ctz64(key_size);
+	t->data_size_shl = rte_ctz64(key_data_size);
 	t->n_buckets = n_buckets;
 	t->n_buckets_ext = n_buckets_ext;
 	t->total_size = total_size;
diff --git a/lib/table/rte_table_hash_ext.c b/lib/table/rte_table_hash_ext.c
index 51a20ac..1cf0fc2 100644
--- a/lib/table/rte_table_hash_ext.c
+++ b/lib/table/rte_table_hash_ext.c
@@ -243,8 +243,8 @@ struct rte_table_hash {
 
 	/* Internal */
 	t->bucket_mask = t->n_buckets - 1;
-	t->key_size_shl = __builtin_ctzl(p->key_size);
-	t->data_size_shl = __builtin_ctzl(entry_size);
+	t->key_size_shl = rte_ctz64(p->key_size);
+	t->data_size_shl = rte_ctz64(entry_size);
 
 	/* Tables */
 	key_mask_offset = 0;
diff --git a/lib/table/rte_table_hash_lru.c b/lib/table/rte_table_hash_lru.c
index a4e1a05..5f28710 100644
--- a/lib/table/rte_table_hash_lru.c
+++ b/lib/table/rte_table_hash_lru.c
@@ -220,8 +220,8 @@ struct rte_table_hash {
 
 	/* Internal */
 	t->bucket_mask = t->n_buckets - 1;
-	t->key_size_shl = __builtin_ctzl(p->key_size);
-	t->data_size_shl = __builtin_ctzl(entry_size);
+	t->key_size_shl = rte_ctz64(p->key_size);
+	t->data_size_shl = rte_ctz64(entry_size);
 
 	/* Tables */
 	key_mask_offset = 0;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 2/5] distributor: use abstracted bit count functions
  2023-11-02  1:05 [PATCH 0/5] use abstracted bit count functions Tyler Retzlaff
  2023-11-02  1:05 ` [PATCH 1/5] table: " Tyler Retzlaff
@ 2023-11-02  1:05 ` Tyler Retzlaff
  2023-11-02  1:05 ` [PATCH 3/5] hash: " Tyler Retzlaff
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-02  1:05 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang

Use rte_ctz64 instead of __builtin_ctzl

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/distributor/rte_distributor_single.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index ad43c13..08144e5 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -252,7 +252,7 @@ struct rte_mbuf *
 
 			if (match) {
 				next_mb = NULL;
-				unsigned worker = __builtin_ctzl(match);
+				unsigned worker = rte_ctz64(match);
 				if (add_to_backlog(&d->backlog[worker],
 						next_value) < 0)
 					next_idx--;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 3/5] hash: use abstracted bit count functions
  2023-11-02  1:05 [PATCH 0/5] use abstracted bit count functions Tyler Retzlaff
  2023-11-02  1:05 ` [PATCH 1/5] table: " Tyler Retzlaff
  2023-11-02  1:05 ` [PATCH 2/5] distributor: " Tyler Retzlaff
@ 2023-11-02  1:05 ` Tyler Retzlaff
  2023-11-02  1:05 ` [PATCH 4/5] member: " Tyler Retzlaff
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-02  1:05 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang

Use rte_ctz64 instead of __builtin_ctzl

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/hash/rte_cuckoo_hash.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
index b2cf60d..d8d4cc1 100644
--- a/lib/hash/rte_cuckoo_hash.c
+++ b/lib/hash/rte_cuckoo_hash.c
@@ -1931,7 +1931,7 @@ struct rte_hash *
 
 		if (prim_hitmask[i]) {
 			uint32_t first_hit =
-					__builtin_ctzl(prim_hitmask[i])
+					rte_ctz64(prim_hitmask[i])
 					>> 1;
 			uint32_t key_idx =
 				primary_bkt[i]->key_idx[first_hit];
@@ -1945,7 +1945,7 @@ struct rte_hash *
 
 		if (sec_hitmask[i]) {
 			uint32_t first_hit =
-					__builtin_ctzl(sec_hitmask[i])
+					rte_ctz64(sec_hitmask[i])
 					>> 1;
 			uint32_t key_idx =
 				secondary_bkt[i]->key_idx[first_hit];
@@ -1962,7 +1962,7 @@ struct rte_hash *
 		positions[i] = -ENOENT;
 		while (prim_hitmask[i]) {
 			uint32_t hit_index =
-					__builtin_ctzl(prim_hitmask[i])
+					rte_ctz64(prim_hitmask[i])
 					>> 1;
 			uint32_t key_idx =
 				primary_bkt[i]->key_idx[hit_index];
@@ -1990,7 +1990,7 @@ struct rte_hash *
 
 		while (sec_hitmask[i]) {
 			uint32_t hit_index =
-					__builtin_ctzl(sec_hitmask[i])
+					rte_ctz64(sec_hitmask[i])
 					>> 1;
 			uint32_t key_idx =
 				secondary_bkt[i]->key_idx[hit_index];
@@ -2088,7 +2088,7 @@ struct rte_hash *
 
 			if (prim_hitmask[i]) {
 				uint32_t first_hit =
-						__builtin_ctzl(prim_hitmask[i])
+						rte_ctz64(prim_hitmask[i])
 						>> 1;
 				uint32_t key_idx =
 					primary_bkt[i]->key_idx[first_hit];
@@ -2102,7 +2102,7 @@ struct rte_hash *
 
 			if (sec_hitmask[i]) {
 				uint32_t first_hit =
-						__builtin_ctzl(sec_hitmask[i])
+						rte_ctz64(sec_hitmask[i])
 						>> 1;
 				uint32_t key_idx =
 					secondary_bkt[i]->key_idx[first_hit];
@@ -2118,7 +2118,7 @@ struct rte_hash *
 		for (i = 0; i < num_keys; i++) {
 			while (prim_hitmask[i]) {
 				uint32_t hit_index =
-						__builtin_ctzl(prim_hitmask[i])
+						rte_ctz64(prim_hitmask[i])
 						>> 1;
 				uint32_t key_idx =
 				rte_atomic_load_explicit(
@@ -2150,7 +2150,7 @@ struct rte_hash *
 
 			while (sec_hitmask[i]) {
 				uint32_t hit_index =
-						__builtin_ctzl(sec_hitmask[i])
+						rte_ctz64(sec_hitmask[i])
 						>> 1;
 				uint32_t key_idx =
 				rte_atomic_load_explicit(
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 4/5] member: use abstracted bit count functions
  2023-11-02  1:05 [PATCH 0/5] use abstracted bit count functions Tyler Retzlaff
                   ` (2 preceding siblings ...)
  2023-11-02  1:05 ` [PATCH 3/5] hash: " Tyler Retzlaff
@ 2023-11-02  1:05 ` Tyler Retzlaff
  2023-11-02  1:05 ` [PATCH 5/5] rcu: " Tyler Retzlaff
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-02  1:05 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang

Use rte_ctz64 instead of __builtin_ctzl

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/member/rte_member_vbf.c | 12 ++++++------
 lib/member/rte_member_x86.h |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/member/rte_member_vbf.c b/lib/member/rte_member_vbf.c
index 9df4620..6440e35 100644
--- a/lib/member/rte_member_vbf.c
+++ b/lib/member/rte_member_vbf.c
@@ -108,8 +108,8 @@
 	 * div_shift is used for division shift, to be divided by number of bits
 	 * represented by a uint32_t variable
 	 */
-	ss->mul_shift = __builtin_ctzl(ss->num_set);
-	ss->div_shift = __builtin_ctzl(32 >> ss->mul_shift);
+	ss->mul_shift = rte_ctz64(ss->num_set);
+	ss->div_shift = rte_ctz64(32 >> ss->mul_shift);
 
 	RTE_MEMBER_LOG(DEBUG, "vector bloom filter created, "
 		"each bloom filter expects %u keys, needs %u bits, %u hashes, "
@@ -174,7 +174,7 @@
 	}
 
 	if (mask) {
-		*set_id = __builtin_ctzl(mask) + 1;
+		*set_id = rte_ctz64(mask) + 1;
 		return 1;
 	}
 
@@ -207,7 +207,7 @@
 	}
 	for (i = 0; i < num_keys; i++) {
 		if (mask[i]) {
-			set_ids[i] = __builtin_ctzl(mask[i]) + 1;
+			set_ids[i] = rte_ctz64(mask[i]) + 1;
 			num_matches++;
 		} else
 			set_ids[i] = RTE_MEMBER_NO_MATCH;
@@ -233,7 +233,7 @@
 		mask &= test_bit(bit_loc, ss);
 	}
 	while (mask) {
-		uint32_t loc = __builtin_ctzl(mask);
+		uint32_t loc = rte_ctz64(mask);
 		set_id[num_matches] = loc + 1;
 		num_matches++;
 		if (num_matches >= match_per_key)
@@ -272,7 +272,7 @@
 	for (i = 0; i < num_keys; i++) {
 		match_cnt_t = 0;
 		while (mask[i]) {
-			uint32_t loc = __builtin_ctzl(mask[i]);
+			uint32_t loc = rte_ctz64(mask[i]);
 			set_ids[i * match_per_key + match_cnt_t] = loc + 1;
 			match_cnt_t++;
 			if (match_cnt_t >= match_per_key)
diff --git a/lib/member/rte_member_x86.h b/lib/member/rte_member_x86.h
index 74c8e38..ee830f5 100644
--- a/lib/member/rte_member_x86.h
+++ b/lib/member/rte_member_x86.h
@@ -22,7 +22,7 @@
 		_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
 		_mm256_set1_epi16(tmp_sig)));
 	if (hitmask) {
-		uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+		uint32_t hit_idx = rte_ctz64(hitmask) >> 1;
 		buckets[bucket_id].sets[hit_idx] = set_id;
 		return 1;
 	}
@@ -38,7 +38,7 @@
 		_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
 		_mm256_set1_epi16(tmp_sig)));
 	while (hitmask) {
-		uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+		uint32_t hit_idx = rte_ctz64(hitmask) >> 1;
 		if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
 			*set_id = buckets[bucket_id].sets[hit_idx];
 			return 1;
@@ -59,7 +59,7 @@
 		_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
 		_mm256_set1_epi16(tmp_sig)));
 	while (hitmask) {
-		uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+		uint32_t hit_idx = rte_ctz64(hitmask) >> 1;
 		if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
 			set_id[*counter] = buckets[bucket_id].sets[hit_idx];
 			(*counter)++;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 5/5] rcu: use abstracted bit count functions
  2023-11-02  1:05 [PATCH 0/5] use abstracted bit count functions Tyler Retzlaff
                   ` (3 preceding siblings ...)
  2023-11-02  1:05 ` [PATCH 4/5] member: " Tyler Retzlaff
@ 2023-11-02  1:05 ` Tyler Retzlaff
  2023-11-02  7:39 ` [PATCH 0/5] " Morten Brørup
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-02  1:05 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang

Use rte_ctz64 instead of __builtin_ctzl

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/rcu/rte_rcu_qsbr.c | 4 ++--
 lib/rcu/rte_rcu_qsbr.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/rcu/rte_rcu_qsbr.c b/lib/rcu/rte_rcu_qsbr.c
index 4dc7714..a9f3d6c 100644
--- a/lib/rcu/rte_rcu_qsbr.c
+++ b/lib/rcu/rte_rcu_qsbr.c
@@ -231,7 +231,7 @@
 					rte_memory_order_acquire);
 		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
 		while (bmap) {
-			t = __builtin_ctzl(bmap);
+			t = rte_ctz64(bmap);
 			fprintf(f, "%u ", id + t);
 
 			bmap &= ~(1UL << t);
@@ -252,7 +252,7 @@
 					rte_memory_order_acquire);
 		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
 		while (bmap) {
-			t = __builtin_ctzl(bmap);
+			t = rte_ctz64(bmap);
 			fprintf(f, "thread ID = %u, count = %" PRIu64 ", lock count = %u\n",
 				id + t,
 				rte_atomic_load_explicit(
diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h
index 9f4aed2..13461f8 100644
--- a/lib/rcu/rte_rcu_qsbr.h
+++ b/lib/rcu/rte_rcu_qsbr.h
@@ -530,7 +530,7 @@ struct rte_rcu_qsbr_dq_parameters {
 		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
 
 		while (bmap) {
-			j = __builtin_ctzl(bmap);
+			j = rte_ctz64(bmap);
 			__RTE_RCU_DP_LOG(DEBUG,
 				"%s: check: token = %" PRIu64 ", wait = %d, Bit Map = 0x%" PRIx64 ", Thread ID = %d",
 				__func__, t, wait, bmap, id + j);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 0/5] use abstracted bit count functions
  2023-11-02  1:05 [PATCH 0/5] use abstracted bit count functions Tyler Retzlaff
                   ` (4 preceding siblings ...)
  2023-11-02  1:05 ` [PATCH 5/5] rcu: " Tyler Retzlaff
@ 2023-11-02  7:39 ` Morten Brørup
  2023-11-02 15:27   ` Tyler Retzlaff
  2023-11-07 19:10 ` [PATCH v2 " Tyler Retzlaff
  2023-11-07 23:38 ` [PATCH v3 0/7] " Tyler Retzlaff
  7 siblings, 1 reply; 29+ messages in thread
From: Morten Brørup @ 2023-11-02  7:39 UTC (permalink / raw)
  To: Tyler Retzlaff, dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Vladimir Medvedkin, Yipeng Wang

> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Thursday, 2 November 2023 02.05
> 
> The first set of conversions missed the long 'l' versions of the
> builtins that were being used. This series completes the conversion
> of remaining libraries from __builtin_ctzl and __builtin_clzl.

NAK to blind search/replace of __builtin_clzl()/clzl().

Although the size of long is 64 bit on 64 bit architectures, it only 32 bit on 32 bit architectures.

You need to look at the types these builtins operate on:
- E.g. in the hash library (patch 3/5) prim_hitmask[i]/sec_hitmask[i] are uint32_t, so rte_ctz32() would be the correct replacement. (I am now asking myself why they were using __builtin_ctzl() instead of __builtin_ctz() here... Probably by mistake.)
- And if the type is "long", you need conditional compiling (or a wrapper macro) to choose between the 32 bit or 64 bit variants.

NB: You can blindly replace __builtin_ctzll()/clzll(), if any, by 64 bit functions.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] use abstracted bit count functions
  2023-11-02  7:39 ` [PATCH 0/5] " Morten Brørup
@ 2023-11-02 15:27   ` Tyler Retzlaff
  2023-11-02 15:33     ` Morten Brørup
  0 siblings, 1 reply; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-02 15:27 UTC (permalink / raw)
  To: Morten Brørup
  Cc: dev, Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Vladimir Medvedkin, Yipeng Wang

On Thu, Nov 02, 2023 at 08:39:04AM +0100, Morten Brørup wrote:
> > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > Sent: Thursday, 2 November 2023 02.05
> > 
> > The first set of conversions missed the long 'l' versions of the
> > builtins that were being used. This series completes the conversion
> > of remaining libraries from __builtin_ctzl and __builtin_clzl.
> 
> NAK to blind search/replace of __builtin_clzl()/clzl().
> 
> Although the size of long is 64 bit on 64 bit architectures, it only 32 bit on 32 bit architectures.
> 
> You need to look at the types these builtins operate on:
> - E.g. in the hash library (patch 3/5) prim_hitmask[i]/sec_hitmask[i] are uint32_t, so rte_ctz32() would be the correct replacement. (I am now asking myself why they were using __builtin_ctzl() instead of __builtin_ctz() here... Probably by mistake.)
> - And if the type is "long", you need conditional compiling (or a wrapper macro) to choose between the 32 bit or 64 bit variants.
> 
> NB: You can blindly replace __builtin_ctzll()/clzll(), if any, by 64 bit functions.

they haven't been blindly replaced. but i would like you to validate my
thinking.

in the case of counting trailing 0s it seems fine if the type is
promoted to 64-bits, in the case of leading i checked the type to make
sure it was already operating on a 64-bit type.

too naive?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 0/5] use abstracted bit count functions
  2023-11-02 15:27   ` Tyler Retzlaff
@ 2023-11-02 15:33     ` Morten Brørup
  2023-11-02 15:36       ` Tyler Retzlaff
  0 siblings, 1 reply; 29+ messages in thread
From: Morten Brørup @ 2023-11-02 15:33 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: dev, Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Vladimir Medvedkin, Yipeng Wang

> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Thursday, 2 November 2023 16.28
> 
> On Thu, Nov 02, 2023 at 08:39:04AM +0100, Morten Brørup wrote:
> > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > Sent: Thursday, 2 November 2023 02.05
> > >
> > > The first set of conversions missed the long 'l' versions of the
> > > builtins that were being used. This series completes the conversion
> > > of remaining libraries from __builtin_ctzl and __builtin_clzl.
> >
> > NAK to blind search/replace of __builtin_clzl()/clzl().
> >
> > Although the size of long is 64 bit on 64 bit architectures, it only
> 32 bit on 32 bit architectures.
> >
> > You need to look at the types these builtins operate on:
> > - E.g. in the hash library (patch 3/5) prim_hitmask[i]/sec_hitmask[i]
> are uint32_t, so rte_ctz32() would be the correct replacement. (I am
> now asking myself why they were using __builtin_ctzl() instead of
> __builtin_ctz() here... Probably by mistake.)
> > - And if the type is "long", you need conditional compiling (or a
> wrapper macro) to choose between the 32 bit or 64 bit variants.
> >
> > NB: You can blindly replace __builtin_ctzll()/clzll(), if any, by 64
> bit functions.
> 
> they haven't been blindly replaced. but i would like you to validate my
> thinking.
> 
> in the case of counting trailing 0s it seems fine if the type is
> promoted to 64-bits,

This will give the correct result, yes. However the 64-bit operation might have a higher performance cost than the 32-bit operation, especially on 32-bit architectures.

> in the case of leading i checked the type to make
> sure it was already operating on a 64-bit type.

If already operating on a 64-bit type, using the 64-bit function is obviously correct.

> 
> too naive?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] use abstracted bit count functions
  2023-11-02 15:33     ` Morten Brørup
@ 2023-11-02 15:36       ` Tyler Retzlaff
  0 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-02 15:36 UTC (permalink / raw)
  To: Morten Brørup
  Cc: dev, Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Vladimir Medvedkin, Yipeng Wang

On Thu, Nov 02, 2023 at 04:33:57PM +0100, Morten Brørup wrote:
> > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > Sent: Thursday, 2 November 2023 16.28
> > 
> > On Thu, Nov 02, 2023 at 08:39:04AM +0100, Morten Brørup wrote:
> > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > Sent: Thursday, 2 November 2023 02.05
> > > >
> > > > The first set of conversions missed the long 'l' versions of the
> > > > builtins that were being used. This series completes the conversion
> > > > of remaining libraries from __builtin_ctzl and __builtin_clzl.
> > >
> > > NAK to blind search/replace of __builtin_clzl()/clzl().
> > >
> > > Although the size of long is 64 bit on 64 bit architectures, it only
> > 32 bit on 32 bit architectures.
> > >
> > > You need to look at the types these builtins operate on:
> > > - E.g. in the hash library (patch 3/5) prim_hitmask[i]/sec_hitmask[i]
> > are uint32_t, so rte_ctz32() would be the correct replacement. (I am
> > now asking myself why they were using __builtin_ctzl() instead of
> > __builtin_ctz() here... Probably by mistake.)
> > > - And if the type is "long", you need conditional compiling (or a
> > wrapper macro) to choose between the 32 bit or 64 bit variants.
> > >
> > > NB: You can blindly replace __builtin_ctzll()/clzll(), if any, by 64
> > bit functions.
> > 
> > they haven't been blindly replaced. but i would like you to validate my
> > thinking.
> > 
> > in the case of counting trailing 0s it seems fine if the type is
> > promoted to 64-bits,
> 
> This will give the correct result, yes. However the 64-bit operation might have a higher performance cost than the 32-bit operation, especially on 32-bit architectures.

true. okay let me clean this up. thanks for the feedback.

> 
> > in the case of leading i checked the type to make
> > sure it was already operating on a 64-bit type.
> 
> If already operating on a 64-bit type, using the 64-bit function is obviously correct.
> 
> > 
> > too naive?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2 0/5] use abstracted bit count functions
  2023-11-02  1:05 [PATCH 0/5] use abstracted bit count functions Tyler Retzlaff
                   ` (5 preceding siblings ...)
  2023-11-02  7:39 ` [PATCH 0/5] " Morten Brørup
@ 2023-11-07 19:10 ` Tyler Retzlaff
  2023-11-07 19:10   ` [PATCH v2 1/5] distributor: " Tyler Retzlaff
                     ` (5 more replies)
  2023-11-07 23:38 ` [PATCH v3 0/7] " Tyler Retzlaff
  7 siblings, 6 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 19:10 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

The first set of conversions missed the long 'l' versions of the
builtins that were being used. This series completes the conversion
of remaining libraries from __builtin_ctzl and __builtin_clzl.

v2: be explicit and use appropriate 32-bit and 64-bit leading
    and trailing counting functions depending on the type of the
    expression passed as an argument to the builtin.

Tyler Retzlaff (5):
  distributor: use abstracted bit count functions
  hash: use abstracted bit count functions
  member: use abstracted bit count functions
  rcu: use abstracted bit count functions
  table: use abstracted bit count functions

 lib/distributor/rte_distributor_single.c |  2 +-
 lib/hash/rte_cuckoo_hash.c               | 16 ++++++++--------
 lib/member/rte_member_vbf.c              | 12 ++++++------
 lib/member/rte_member_x86.h              |  6 +++---
 lib/rcu/rte_rcu_qsbr.c                   |  4 ++--
 lib/rcu/rte_rcu_qsbr.h                   |  2 +-
 lib/table/rte_lru_arm64.h                |  2 +-
 lib/table/rte_swx_table_em.c             |  4 ++--
 lib/table/rte_table_hash_ext.c           |  4 ++--
 lib/table/rte_table_hash_lru.c           |  4 ++--
 10 files changed, 28 insertions(+), 28 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2 1/5] distributor: use abstracted bit count functions
  2023-11-07 19:10 ` [PATCH v2 " Tyler Retzlaff
@ 2023-11-07 19:10   ` Tyler Retzlaff
  2023-11-07 19:10   ` [PATCH v2 2/5] hash: " Tyler Retzlaff
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 19:10 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

Use rte_ctz32 or rte_ctz64 respectively instead of __builtin_ctzl
depending on the resultant type of the expression passed as an argument

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/distributor/rte_distributor_single.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index ad43c13..08144e5 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -252,7 +252,7 @@ struct rte_mbuf *
 
 			if (match) {
 				next_mb = NULL;
-				unsigned worker = __builtin_ctzl(match);
+				unsigned worker = rte_ctz64(match);
 				if (add_to_backlog(&d->backlog[worker],
 						next_value) < 0)
 					next_idx--;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2 2/5] hash: use abstracted bit count functions
  2023-11-07 19:10 ` [PATCH v2 " Tyler Retzlaff
  2023-11-07 19:10   ` [PATCH v2 1/5] distributor: " Tyler Retzlaff
@ 2023-11-07 19:10   ` Tyler Retzlaff
  2023-11-07 19:10   ` [PATCH v2 3/5] member: " Tyler Retzlaff
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 19:10 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

Use rte_ctz32 or rte_ctz64 respectively instead of __builtin_ctzl
depending on the resultant type of the expression passed as an argument

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/hash/rte_cuckoo_hash.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
index b2cf60d..19ee53a 100644
--- a/lib/hash/rte_cuckoo_hash.c
+++ b/lib/hash/rte_cuckoo_hash.c
@@ -1931,7 +1931,7 @@ struct rte_hash *
 
 		if (prim_hitmask[i]) {
 			uint32_t first_hit =
-					__builtin_ctzl(prim_hitmask[i])
+					rte_ctz32(prim_hitmask[i])
 					>> 1;
 			uint32_t key_idx =
 				primary_bkt[i]->key_idx[first_hit];
@@ -1945,7 +1945,7 @@ struct rte_hash *
 
 		if (sec_hitmask[i]) {
 			uint32_t first_hit =
-					__builtin_ctzl(sec_hitmask[i])
+					rte_ctz32(sec_hitmask[i])
 					>> 1;
 			uint32_t key_idx =
 				secondary_bkt[i]->key_idx[first_hit];
@@ -1962,7 +1962,7 @@ struct rte_hash *
 		positions[i] = -ENOENT;
 		while (prim_hitmask[i]) {
 			uint32_t hit_index =
-					__builtin_ctzl(prim_hitmask[i])
+					rte_ctz32(prim_hitmask[i])
 					>> 1;
 			uint32_t key_idx =
 				primary_bkt[i]->key_idx[hit_index];
@@ -1990,7 +1990,7 @@ struct rte_hash *
 
 		while (sec_hitmask[i]) {
 			uint32_t hit_index =
-					__builtin_ctzl(sec_hitmask[i])
+					rte_ctz32(sec_hitmask[i])
 					>> 1;
 			uint32_t key_idx =
 				secondary_bkt[i]->key_idx[hit_index];
@@ -2088,7 +2088,7 @@ struct rte_hash *
 
 			if (prim_hitmask[i]) {
 				uint32_t first_hit =
-						__builtin_ctzl(prim_hitmask[i])
+						rte_ctz32(prim_hitmask[i])
 						>> 1;
 				uint32_t key_idx =
 					primary_bkt[i]->key_idx[first_hit];
@@ -2102,7 +2102,7 @@ struct rte_hash *
 
 			if (sec_hitmask[i]) {
 				uint32_t first_hit =
-						__builtin_ctzl(sec_hitmask[i])
+						rte_ctz32(sec_hitmask[i])
 						>> 1;
 				uint32_t key_idx =
 					secondary_bkt[i]->key_idx[first_hit];
@@ -2118,7 +2118,7 @@ struct rte_hash *
 		for (i = 0; i < num_keys; i++) {
 			while (prim_hitmask[i]) {
 				uint32_t hit_index =
-						__builtin_ctzl(prim_hitmask[i])
+						rte_ctz32(prim_hitmask[i])
 						>> 1;
 				uint32_t key_idx =
 				rte_atomic_load_explicit(
@@ -2150,7 +2150,7 @@ struct rte_hash *
 
 			while (sec_hitmask[i]) {
 				uint32_t hit_index =
-						__builtin_ctzl(sec_hitmask[i])
+						rte_ctz32(sec_hitmask[i])
 						>> 1;
 				uint32_t key_idx =
 				rte_atomic_load_explicit(
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2 3/5] member: use abstracted bit count functions
  2023-11-07 19:10 ` [PATCH v2 " Tyler Retzlaff
  2023-11-07 19:10   ` [PATCH v2 1/5] distributor: " Tyler Retzlaff
  2023-11-07 19:10   ` [PATCH v2 2/5] hash: " Tyler Retzlaff
@ 2023-11-07 19:10   ` Tyler Retzlaff
  2023-11-07 19:10   ` [PATCH v2 4/5] rcu: " Tyler Retzlaff
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 19:10 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

Use rte_ctz32 or rte_ctz64 respectively instead of __builtin_ctzl
depending on the resultant type of the expression passed as an argument

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/member/rte_member_vbf.c | 12 ++++++------
 lib/member/rte_member_x86.h |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/member/rte_member_vbf.c b/lib/member/rte_member_vbf.c
index 9df4620..5a0c51e 100644
--- a/lib/member/rte_member_vbf.c
+++ b/lib/member/rte_member_vbf.c
@@ -108,8 +108,8 @@
 	 * div_shift is used for division shift, to be divided by number of bits
 	 * represented by a uint32_t variable
 	 */
-	ss->mul_shift = __builtin_ctzl(ss->num_set);
-	ss->div_shift = __builtin_ctzl(32 >> ss->mul_shift);
+	ss->mul_shift = rte_ctz32(ss->num_set);
+	ss->div_shift = rte_ctz32(32 >> ss->mul_shift);
 
 	RTE_MEMBER_LOG(DEBUG, "vector bloom filter created, "
 		"each bloom filter expects %u keys, needs %u bits, %u hashes, "
@@ -174,7 +174,7 @@
 	}
 
 	if (mask) {
-		*set_id = __builtin_ctzl(mask) + 1;
+		*set_id = rte_ctz32(mask) + 1;
 		return 1;
 	}
 
@@ -207,7 +207,7 @@
 	}
 	for (i = 0; i < num_keys; i++) {
 		if (mask[i]) {
-			set_ids[i] = __builtin_ctzl(mask[i]) + 1;
+			set_ids[i] = rte_ctz32(mask[i]) + 1;
 			num_matches++;
 		} else
 			set_ids[i] = RTE_MEMBER_NO_MATCH;
@@ -233,7 +233,7 @@
 		mask &= test_bit(bit_loc, ss);
 	}
 	while (mask) {
-		uint32_t loc = __builtin_ctzl(mask);
+		uint32_t loc = rte_ctz32(mask);
 		set_id[num_matches] = loc + 1;
 		num_matches++;
 		if (num_matches >= match_per_key)
@@ -272,7 +272,7 @@
 	for (i = 0; i < num_keys; i++) {
 		match_cnt_t = 0;
 		while (mask[i]) {
-			uint32_t loc = __builtin_ctzl(mask[i]);
+			uint32_t loc = rte_ctz32(mask[i]);
 			set_ids[i * match_per_key + match_cnt_t] = loc + 1;
 			match_cnt_t++;
 			if (match_cnt_t >= match_per_key)
diff --git a/lib/member/rte_member_x86.h b/lib/member/rte_member_x86.h
index 74c8e38..d115151 100644
--- a/lib/member/rte_member_x86.h
+++ b/lib/member/rte_member_x86.h
@@ -22,7 +22,7 @@
 		_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
 		_mm256_set1_epi16(tmp_sig)));
 	if (hitmask) {
-		uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+		uint32_t hit_idx = rte_ctz32(hitmask) >> 1;
 		buckets[bucket_id].sets[hit_idx] = set_id;
 		return 1;
 	}
@@ -38,7 +38,7 @@
 		_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
 		_mm256_set1_epi16(tmp_sig)));
 	while (hitmask) {
-		uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+		uint32_t hit_idx = rte_ctz32(hitmask) >> 1;
 		if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
 			*set_id = buckets[bucket_id].sets[hit_idx];
 			return 1;
@@ -59,7 +59,7 @@
 		_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
 		_mm256_set1_epi16(tmp_sig)));
 	while (hitmask) {
-		uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+		uint32_t hit_idx = rte_ctz32(hitmask) >> 1;
 		if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
 			set_id[*counter] = buckets[bucket_id].sets[hit_idx];
 			(*counter)++;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2 4/5] rcu: use abstracted bit count functions
  2023-11-07 19:10 ` [PATCH v2 " Tyler Retzlaff
                     ` (2 preceding siblings ...)
  2023-11-07 19:10   ` [PATCH v2 3/5] member: " Tyler Retzlaff
@ 2023-11-07 19:10   ` Tyler Retzlaff
  2023-11-07 19:10   ` [PATCH v2 5/5] table: " Tyler Retzlaff
  2023-11-08  8:25   ` [PATCH v2 0/5] " Morten Brørup
  5 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 19:10 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

Use rte_ctz32 or rte_ctz64 respectively instead of __builtin_ctzl
depending on the resultant type of the expression passed as an argument

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/rcu/rte_rcu_qsbr.c | 4 ++--
 lib/rcu/rte_rcu_qsbr.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/rcu/rte_rcu_qsbr.c b/lib/rcu/rte_rcu_qsbr.c
index 4dc7714..a9f3d6c 100644
--- a/lib/rcu/rte_rcu_qsbr.c
+++ b/lib/rcu/rte_rcu_qsbr.c
@@ -231,7 +231,7 @@
 					rte_memory_order_acquire);
 		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
 		while (bmap) {
-			t = __builtin_ctzl(bmap);
+			t = rte_ctz64(bmap);
 			fprintf(f, "%u ", id + t);
 
 			bmap &= ~(1UL << t);
@@ -252,7 +252,7 @@
 					rte_memory_order_acquire);
 		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
 		while (bmap) {
-			t = __builtin_ctzl(bmap);
+			t = rte_ctz64(bmap);
 			fprintf(f, "thread ID = %u, count = %" PRIu64 ", lock count = %u\n",
 				id + t,
 				rte_atomic_load_explicit(
diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h
index 9f4aed2..13461f8 100644
--- a/lib/rcu/rte_rcu_qsbr.h
+++ b/lib/rcu/rte_rcu_qsbr.h
@@ -530,7 +530,7 @@ struct rte_rcu_qsbr_dq_parameters {
 		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
 
 		while (bmap) {
-			j = __builtin_ctzl(bmap);
+			j = rte_ctz64(bmap);
 			__RTE_RCU_DP_LOG(DEBUG,
 				"%s: check: token = %" PRIu64 ", wait = %d, Bit Map = 0x%" PRIx64 ", Thread ID = %d",
 				__func__, t, wait, bmap, id + j);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2 5/5] table: use abstracted bit count functions
  2023-11-07 19:10 ` [PATCH v2 " Tyler Retzlaff
                     ` (3 preceding siblings ...)
  2023-11-07 19:10   ` [PATCH v2 4/5] rcu: " Tyler Retzlaff
@ 2023-11-07 19:10   ` Tyler Retzlaff
  2023-11-08  8:25   ` [PATCH v2 0/5] " Morten Brørup
  5 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 19:10 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

Use rte_clz32 or rte_clz64 respectively instead of __builtin_clzl
depending on the resultant type of the expression passed as an argument

Use rte_ctz32 or rte_ctz64 respectively instead of __builtin_ctzl
depending on the resultant type of the expression passed as an argument

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/table/rte_lru_arm64.h      | 2 +-
 lib/table/rte_swx_table_em.c   | 4 ++--
 lib/table/rte_table_hash_ext.c | 4 ++--
 lib/table/rte_table_hash_lru.c | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/table/rte_lru_arm64.h b/lib/table/rte_lru_arm64.h
index add889a..ddfd841 100644
--- a/lib/table/rte_lru_arm64.h
+++ b/lib/table/rte_lru_arm64.h
@@ -33,7 +33,7 @@
 	uint16x4_t min_vec = vmov_n_u16(vminv_u16(lru_vec));
 	uint64_t mask = vget_lane_u64(vreinterpret_u64_u16(
 			vceq_u16(min_vec, lru_vec)), 0);
-	return __builtin_clzl(mask) >> 4;
+	return rte_clz64(mask) >> 4;
 }
 #define lru_pos(bucket) f_lru_pos(bucket->lru_list)
 
diff --git a/lib/table/rte_swx_table_em.c b/lib/table/rte_swx_table_em.c
index 84837c8..2f042d7 100644
--- a/lib/table/rte_swx_table_em.c
+++ b/lib/table/rte_swx_table_em.c
@@ -260,8 +260,8 @@ struct table {
 	if (!params->hash_func)
 		t->params.hash_func = rte_hash_crc;
 
-	t->key_size_shl = __builtin_ctzl(key_size);
-	t->data_size_shl = __builtin_ctzl(key_data_size);
+	t->key_size_shl = rte_ctz32(key_size);
+	t->data_size_shl = rte_ctz32(key_data_size);
 	t->n_buckets = n_buckets;
 	t->n_buckets_ext = n_buckets_ext;
 	t->total_size = total_size;
diff --git a/lib/table/rte_table_hash_ext.c b/lib/table/rte_table_hash_ext.c
index 51a20ac..9f0220d 100644
--- a/lib/table/rte_table_hash_ext.c
+++ b/lib/table/rte_table_hash_ext.c
@@ -243,8 +243,8 @@ struct rte_table_hash {
 
 	/* Internal */
 	t->bucket_mask = t->n_buckets - 1;
-	t->key_size_shl = __builtin_ctzl(p->key_size);
-	t->data_size_shl = __builtin_ctzl(entry_size);
+	t->key_size_shl = rte_ctz32(p->key_size);
+	t->data_size_shl = rte_ctz32(entry_size);
 
 	/* Tables */
 	key_mask_offset = 0;
diff --git a/lib/table/rte_table_hash_lru.c b/lib/table/rte_table_hash_lru.c
index a4e1a05..758ec4f 100644
--- a/lib/table/rte_table_hash_lru.c
+++ b/lib/table/rte_table_hash_lru.c
@@ -220,8 +220,8 @@ struct rte_table_hash {
 
 	/* Internal */
 	t->bucket_mask = t->n_buckets - 1;
-	t->key_size_shl = __builtin_ctzl(p->key_size);
-	t->data_size_shl = __builtin_ctzl(entry_size);
+	t->key_size_shl = rte_ctz32(p->key_size);
+	t->data_size_shl = rte_ctz32(entry_size);
 
 	/* Tables */
 	key_mask_offset = 0;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 0/7] use abstracted bit count functions
  2023-11-02  1:05 [PATCH 0/5] use abstracted bit count functions Tyler Retzlaff
                   ` (6 preceding siblings ...)
  2023-11-07 19:10 ` [PATCH v2 " Tyler Retzlaff
@ 2023-11-07 23:38 ` Tyler Retzlaff
  2023-11-07 23:38   ` [PATCH v3 1/7] distributor: " Tyler Retzlaff
                     ` (7 more replies)
  7 siblings, 8 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 23:38 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

The first set of conversions missed the long 'l' versions of the
builtins that were being used. This series completes the conversion
of remaining libraries from __builtin_ctzl, __builtin_clzl and
__builtin_popcountl.

v3:
  * add missing include of rte_bitops.h
  * add 2 patches to cover use of __builtin_popcountl

v2:
  * be explicit and use appropriate 32-bit and 64-bit leading
    and trailing counting functions depending on the type of the
    expression passed as an argument to the builtin.


Tyler Retzlaff (7):
  distributor: use abstracted bit count functions
  hash: use abstracted bit count functions
  member: use abstracted bit count functions
  rcu: use abstracted bit count functions
  table: use abstracted bit count functions
  distributor: use abstracted bit count functions
  hash: use abstracted bit count functions

 lib/distributor/rte_distributor_single.c |  4 ++--
 lib/hash/rte_cuckoo_hash.c               | 20 ++++++++++----------
 lib/member/rte_member_vbf.c              | 12 ++++++------
 lib/member/rte_member_x86.h              |  6 +++---
 lib/rcu/rte_rcu_qsbr.c                   |  4 ++--
 lib/rcu/rte_rcu_qsbr.h                   |  2 +-
 lib/table/rte_lru_arm64.h                |  3 ++-
 lib/table/rte_swx_table_em.c             |  4 ++--
 lib/table/rte_table_hash_ext.c           |  4 ++--
 lib/table/rte_table_hash_lru.c           |  4 ++--
 10 files changed, 32 insertions(+), 31 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 1/7] distributor: use abstracted bit count functions
  2023-11-07 23:38 ` [PATCH v3 0/7] " Tyler Retzlaff
@ 2023-11-07 23:38   ` Tyler Retzlaff
  2023-11-07 23:38   ` [PATCH v3 2/7] hash: " Tyler Retzlaff
                     ` (6 subsequent siblings)
  7 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 23:38 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

Use rte_ctz32 or rte_ctz64 respectively instead of __builtin_ctzl
depending on the resultant type of the expression passed as an argument

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/distributor/rte_distributor_single.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index ad43c13..08144e5 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -252,7 +252,7 @@ struct rte_mbuf *
 
 			if (match) {
 				next_mb = NULL;
-				unsigned worker = __builtin_ctzl(match);
+				unsigned worker = rte_ctz64(match);
 				if (add_to_backlog(&d->backlog[worker],
 						next_value) < 0)
 					next_idx--;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 2/7] hash: use abstracted bit count functions
  2023-11-07 23:38 ` [PATCH v3 0/7] " Tyler Retzlaff
  2023-11-07 23:38   ` [PATCH v3 1/7] distributor: " Tyler Retzlaff
@ 2023-11-07 23:38   ` Tyler Retzlaff
  2023-11-07 23:38   ` [PATCH v3 3/7] member: " Tyler Retzlaff
                     ` (5 subsequent siblings)
  7 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 23:38 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

Use rte_ctz32 or rte_ctz64 respectively instead of __builtin_ctzl
depending on the resultant type of the expression passed as an argument

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/hash/rte_cuckoo_hash.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
index b2cf60d..19ee53a 100644
--- a/lib/hash/rte_cuckoo_hash.c
+++ b/lib/hash/rte_cuckoo_hash.c
@@ -1931,7 +1931,7 @@ struct rte_hash *
 
 		if (prim_hitmask[i]) {
 			uint32_t first_hit =
-					__builtin_ctzl(prim_hitmask[i])
+					rte_ctz32(prim_hitmask[i])
 					>> 1;
 			uint32_t key_idx =
 				primary_bkt[i]->key_idx[first_hit];
@@ -1945,7 +1945,7 @@ struct rte_hash *
 
 		if (sec_hitmask[i]) {
 			uint32_t first_hit =
-					__builtin_ctzl(sec_hitmask[i])
+					rte_ctz32(sec_hitmask[i])
 					>> 1;
 			uint32_t key_idx =
 				secondary_bkt[i]->key_idx[first_hit];
@@ -1962,7 +1962,7 @@ struct rte_hash *
 		positions[i] = -ENOENT;
 		while (prim_hitmask[i]) {
 			uint32_t hit_index =
-					__builtin_ctzl(prim_hitmask[i])
+					rte_ctz32(prim_hitmask[i])
 					>> 1;
 			uint32_t key_idx =
 				primary_bkt[i]->key_idx[hit_index];
@@ -1990,7 +1990,7 @@ struct rte_hash *
 
 		while (sec_hitmask[i]) {
 			uint32_t hit_index =
-					__builtin_ctzl(sec_hitmask[i])
+					rte_ctz32(sec_hitmask[i])
 					>> 1;
 			uint32_t key_idx =
 				secondary_bkt[i]->key_idx[hit_index];
@@ -2088,7 +2088,7 @@ struct rte_hash *
 
 			if (prim_hitmask[i]) {
 				uint32_t first_hit =
-						__builtin_ctzl(prim_hitmask[i])
+						rte_ctz32(prim_hitmask[i])
 						>> 1;
 				uint32_t key_idx =
 					primary_bkt[i]->key_idx[first_hit];
@@ -2102,7 +2102,7 @@ struct rte_hash *
 
 			if (sec_hitmask[i]) {
 				uint32_t first_hit =
-						__builtin_ctzl(sec_hitmask[i])
+						rte_ctz32(sec_hitmask[i])
 						>> 1;
 				uint32_t key_idx =
 					secondary_bkt[i]->key_idx[first_hit];
@@ -2118,7 +2118,7 @@ struct rte_hash *
 		for (i = 0; i < num_keys; i++) {
 			while (prim_hitmask[i]) {
 				uint32_t hit_index =
-						__builtin_ctzl(prim_hitmask[i])
+						rte_ctz32(prim_hitmask[i])
 						>> 1;
 				uint32_t key_idx =
 				rte_atomic_load_explicit(
@@ -2150,7 +2150,7 @@ struct rte_hash *
 
 			while (sec_hitmask[i]) {
 				uint32_t hit_index =
-						__builtin_ctzl(sec_hitmask[i])
+						rte_ctz32(sec_hitmask[i])
 						>> 1;
 				uint32_t key_idx =
 				rte_atomic_load_explicit(
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 3/7] member: use abstracted bit count functions
  2023-11-07 23:38 ` [PATCH v3 0/7] " Tyler Retzlaff
  2023-11-07 23:38   ` [PATCH v3 1/7] distributor: " Tyler Retzlaff
  2023-11-07 23:38   ` [PATCH v3 2/7] hash: " Tyler Retzlaff
@ 2023-11-07 23:38   ` Tyler Retzlaff
  2023-11-07 23:38   ` [PATCH v3 4/7] rcu: " Tyler Retzlaff
                     ` (4 subsequent siblings)
  7 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 23:38 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

Use rte_ctz32 or rte_ctz64 respectively instead of __builtin_ctzl
depending on the resultant type of the expression passed as an argument

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/member/rte_member_vbf.c | 12 ++++++------
 lib/member/rte_member_x86.h |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/member/rte_member_vbf.c b/lib/member/rte_member_vbf.c
index 9df4620..5a0c51e 100644
--- a/lib/member/rte_member_vbf.c
+++ b/lib/member/rte_member_vbf.c
@@ -108,8 +108,8 @@
 	 * div_shift is used for division shift, to be divided by number of bits
 	 * represented by a uint32_t variable
 	 */
-	ss->mul_shift = __builtin_ctzl(ss->num_set);
-	ss->div_shift = __builtin_ctzl(32 >> ss->mul_shift);
+	ss->mul_shift = rte_ctz32(ss->num_set);
+	ss->div_shift = rte_ctz32(32 >> ss->mul_shift);
 
 	RTE_MEMBER_LOG(DEBUG, "vector bloom filter created, "
 		"each bloom filter expects %u keys, needs %u bits, %u hashes, "
@@ -174,7 +174,7 @@
 	}
 
 	if (mask) {
-		*set_id = __builtin_ctzl(mask) + 1;
+		*set_id = rte_ctz32(mask) + 1;
 		return 1;
 	}
 
@@ -207,7 +207,7 @@
 	}
 	for (i = 0; i < num_keys; i++) {
 		if (mask[i]) {
-			set_ids[i] = __builtin_ctzl(mask[i]) + 1;
+			set_ids[i] = rte_ctz32(mask[i]) + 1;
 			num_matches++;
 		} else
 			set_ids[i] = RTE_MEMBER_NO_MATCH;
@@ -233,7 +233,7 @@
 		mask &= test_bit(bit_loc, ss);
 	}
 	while (mask) {
-		uint32_t loc = __builtin_ctzl(mask);
+		uint32_t loc = rte_ctz32(mask);
 		set_id[num_matches] = loc + 1;
 		num_matches++;
 		if (num_matches >= match_per_key)
@@ -272,7 +272,7 @@
 	for (i = 0; i < num_keys; i++) {
 		match_cnt_t = 0;
 		while (mask[i]) {
-			uint32_t loc = __builtin_ctzl(mask[i]);
+			uint32_t loc = rte_ctz32(mask[i]);
 			set_ids[i * match_per_key + match_cnt_t] = loc + 1;
 			match_cnt_t++;
 			if (match_cnt_t >= match_per_key)
diff --git a/lib/member/rte_member_x86.h b/lib/member/rte_member_x86.h
index 74c8e38..d115151 100644
--- a/lib/member/rte_member_x86.h
+++ b/lib/member/rte_member_x86.h
@@ -22,7 +22,7 @@
 		_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
 		_mm256_set1_epi16(tmp_sig)));
 	if (hitmask) {
-		uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+		uint32_t hit_idx = rte_ctz32(hitmask) >> 1;
 		buckets[bucket_id].sets[hit_idx] = set_id;
 		return 1;
 	}
@@ -38,7 +38,7 @@
 		_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
 		_mm256_set1_epi16(tmp_sig)));
 	while (hitmask) {
-		uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+		uint32_t hit_idx = rte_ctz32(hitmask) >> 1;
 		if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
 			*set_id = buckets[bucket_id].sets[hit_idx];
 			return 1;
@@ -59,7 +59,7 @@
 		_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
 		_mm256_set1_epi16(tmp_sig)));
 	while (hitmask) {
-		uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+		uint32_t hit_idx = rte_ctz32(hitmask) >> 1;
 		if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
 			set_id[*counter] = buckets[bucket_id].sets[hit_idx];
 			(*counter)++;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 4/7] rcu: use abstracted bit count functions
  2023-11-07 23:38 ` [PATCH v3 0/7] " Tyler Retzlaff
                     ` (2 preceding siblings ...)
  2023-11-07 23:38   ` [PATCH v3 3/7] member: " Tyler Retzlaff
@ 2023-11-07 23:38   ` Tyler Retzlaff
  2023-11-07 23:38   ` [PATCH v3 5/7] table: " Tyler Retzlaff
                     ` (3 subsequent siblings)
  7 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 23:38 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

Use rte_ctz32 or rte_ctz64 respectively instead of __builtin_ctzl
depending on the resultant type of the expression passed as an argument

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/rcu/rte_rcu_qsbr.c | 4 ++--
 lib/rcu/rte_rcu_qsbr.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/rcu/rte_rcu_qsbr.c b/lib/rcu/rte_rcu_qsbr.c
index 4dc7714..a9f3d6c 100644
--- a/lib/rcu/rte_rcu_qsbr.c
+++ b/lib/rcu/rte_rcu_qsbr.c
@@ -231,7 +231,7 @@
 					rte_memory_order_acquire);
 		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
 		while (bmap) {
-			t = __builtin_ctzl(bmap);
+			t = rte_ctz64(bmap);
 			fprintf(f, "%u ", id + t);
 
 			bmap &= ~(1UL << t);
@@ -252,7 +252,7 @@
 					rte_memory_order_acquire);
 		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
 		while (bmap) {
-			t = __builtin_ctzl(bmap);
+			t = rte_ctz64(bmap);
 			fprintf(f, "thread ID = %u, count = %" PRIu64 ", lock count = %u\n",
 				id + t,
 				rte_atomic_load_explicit(
diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h
index 9f4aed2..13461f8 100644
--- a/lib/rcu/rte_rcu_qsbr.h
+++ b/lib/rcu/rte_rcu_qsbr.h
@@ -530,7 +530,7 @@ struct rte_rcu_qsbr_dq_parameters {
 		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
 
 		while (bmap) {
-			j = __builtin_ctzl(bmap);
+			j = rte_ctz64(bmap);
 			__RTE_RCU_DP_LOG(DEBUG,
 				"%s: check: token = %" PRIu64 ", wait = %d, Bit Map = 0x%" PRIx64 ", Thread ID = %d",
 				__func__, t, wait, bmap, id + j);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 5/7] table: use abstracted bit count functions
  2023-11-07 23:38 ` [PATCH v3 0/7] " Tyler Retzlaff
                     ` (3 preceding siblings ...)
  2023-11-07 23:38   ` [PATCH v3 4/7] rcu: " Tyler Retzlaff
@ 2023-11-07 23:38   ` Tyler Retzlaff
  2023-11-07 23:38   ` [PATCH v3 6/7] distributor: " Tyler Retzlaff
                     ` (2 subsequent siblings)
  7 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 23:38 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

Use rte_clz32 or rte_clz64 respectively instead of __builtin_clzl
depending on the resultant type of the expression passed as an argument

Use rte_ctz32 or rte_ctz64 respectively instead of __builtin_ctzl
depending on the resultant type of the expression passed as an argument

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/table/rte_lru_arm64.h      | 3 ++-
 lib/table/rte_swx_table_em.c   | 4 ++--
 lib/table/rte_table_hash_ext.c | 4 ++--
 lib/table/rte_table_hash_lru.c | 4 ++--
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/lib/table/rte_lru_arm64.h b/lib/table/rte_lru_arm64.h
index add889a..f19b0bd 100644
--- a/lib/table/rte_lru_arm64.h
+++ b/lib/table/rte_lru_arm64.h
@@ -11,6 +11,7 @@
 
 #include <stdint.h>
 #include <rte_vect.h>
+#include <rte_bitops.h>
 
 #ifndef RTE_TABLE_HASH_LRU_STRATEGY
 #ifdef __ARM_NEON
@@ -33,7 +34,7 @@
 	uint16x4_t min_vec = vmov_n_u16(vminv_u16(lru_vec));
 	uint64_t mask = vget_lane_u64(vreinterpret_u64_u16(
 			vceq_u16(min_vec, lru_vec)), 0);
-	return __builtin_clzl(mask) >> 4;
+	return rte_clz64(mask) >> 4;
 }
 #define lru_pos(bucket) f_lru_pos(bucket->lru_list)
 
diff --git a/lib/table/rte_swx_table_em.c b/lib/table/rte_swx_table_em.c
index 84837c8..2f042d7 100644
--- a/lib/table/rte_swx_table_em.c
+++ b/lib/table/rte_swx_table_em.c
@@ -260,8 +260,8 @@ struct table {
 	if (!params->hash_func)
 		t->params.hash_func = rte_hash_crc;
 
-	t->key_size_shl = __builtin_ctzl(key_size);
-	t->data_size_shl = __builtin_ctzl(key_data_size);
+	t->key_size_shl = rte_ctz32(key_size);
+	t->data_size_shl = rte_ctz32(key_data_size);
 	t->n_buckets = n_buckets;
 	t->n_buckets_ext = n_buckets_ext;
 	t->total_size = total_size;
diff --git a/lib/table/rte_table_hash_ext.c b/lib/table/rte_table_hash_ext.c
index 51a20ac..9f0220d 100644
--- a/lib/table/rte_table_hash_ext.c
+++ b/lib/table/rte_table_hash_ext.c
@@ -243,8 +243,8 @@ struct rte_table_hash {
 
 	/* Internal */
 	t->bucket_mask = t->n_buckets - 1;
-	t->key_size_shl = __builtin_ctzl(p->key_size);
-	t->data_size_shl = __builtin_ctzl(entry_size);
+	t->key_size_shl = rte_ctz32(p->key_size);
+	t->data_size_shl = rte_ctz32(entry_size);
 
 	/* Tables */
 	key_mask_offset = 0;
diff --git a/lib/table/rte_table_hash_lru.c b/lib/table/rte_table_hash_lru.c
index a4e1a05..758ec4f 100644
--- a/lib/table/rte_table_hash_lru.c
+++ b/lib/table/rte_table_hash_lru.c
@@ -220,8 +220,8 @@ struct rte_table_hash {
 
 	/* Internal */
 	t->bucket_mask = t->n_buckets - 1;
-	t->key_size_shl = __builtin_ctzl(p->key_size);
-	t->data_size_shl = __builtin_ctzl(entry_size);
+	t->key_size_shl = rte_ctz32(p->key_size);
+	t->data_size_shl = rte_ctz32(entry_size);
 
 	/* Tables */
 	key_mask_offset = 0;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 6/7] distributor: use abstracted bit count functions
  2023-11-07 23:38 ` [PATCH v3 0/7] " Tyler Retzlaff
                     ` (4 preceding siblings ...)
  2023-11-07 23:38   ` [PATCH v3 5/7] table: " Tyler Retzlaff
@ 2023-11-07 23:38   ` Tyler Retzlaff
  2023-11-07 23:38   ` [PATCH v3 7/7] hash: " Tyler Retzlaff
  2023-11-08  8:34   ` [PATCH v3 0/7] use abstracted bit count functions Morten Brørup
  7 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 23:38 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

Use rte_popcount64 instead of __builtin_popcountl where the argument
type passed to the intrinsic was 64-bits.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/distributor/rte_distributor_single.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index 08144e5..84d88e3 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -341,7 +341,7 @@ struct rte_mbuf *
 {
 	unsigned wkr, total_outstanding;
 
-	total_outstanding = __builtin_popcountl(d->in_flight_bitmask);
+	total_outstanding = rte_popcount64(d->in_flight_bitmask);
 
 	for (wkr = 0; wkr < d->num_workers; wkr++)
 		total_outstanding += d->backlog[wkr].count;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 7/7] hash: use abstracted bit count functions
  2023-11-07 23:38 ` [PATCH v3 0/7] " Tyler Retzlaff
                     ` (5 preceding siblings ...)
  2023-11-07 23:38   ` [PATCH v3 6/7] distributor: " Tyler Retzlaff
@ 2023-11-07 23:38   ` Tyler Retzlaff
  2023-11-08  8:47     ` CI test system not catching truncation bugs for 32-bit architectures? Morten Brørup
  2023-11-08  8:34   ` [PATCH v3 0/7] use abstracted bit count functions Morten Brørup
  7 siblings, 1 reply; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-07 23:38 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Tyler Retzlaff, Vladimir Medvedkin, Yipeng Wang, mb

Use rte_popcount64 instead of __builtin_popcountl where the argument
type passed to the intrinsic was 64-bits.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/hash/rte_cuckoo_hash.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
index 19ee53a..ccdc3b9 100644
--- a/lib/hash/rte_cuckoo_hash.c
+++ b/lib/hash/rte_cuckoo_hash.c
@@ -2357,7 +2357,7 @@ struct rte_hash *
 	__rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
 
 	/* Return number of hits */
-	return __builtin_popcountl(*hit_mask);
+	return rte_popcount64(*hit_mask);
 }
 
 
@@ -2474,7 +2474,7 @@ struct rte_hash *
 			positions, hit_mask, data);
 
 	/* Return number of hits */
-	return __builtin_popcountl(*hit_mask);
+	return rte_popcount64(*hit_mask);
 }
 
 int32_t
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH v2 0/5] use abstracted bit count functions
  2023-11-07 19:10 ` [PATCH v2 " Tyler Retzlaff
                     ` (4 preceding siblings ...)
  2023-11-07 19:10   ` [PATCH v2 5/5] table: " Tyler Retzlaff
@ 2023-11-08  8:25   ` Morten Brørup
  5 siblings, 0 replies; 29+ messages in thread
From: Morten Brørup @ 2023-11-08  8:25 UTC (permalink / raw)
  To: Tyler Retzlaff, dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Vladimir Medvedkin, Yipeng Wang

> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Tuesday, 7 November 2023 20.10
> 
> The first set of conversions missed the long 'l' versions of the
> builtins that were being used. This series completes the conversion
> of remaining libraries from __builtin_ctzl and __builtin_clzl.
> 
> v2: be explicit and use appropriate 32-bit and 64-bit leading
>     and trailing counting functions depending on the type of the
>     expression passed as an argument to the builtin.

Series-acked-by: Morten Brørup <mb@smartsharesystems.com>


^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH v3 0/7] use abstracted bit count functions
  2023-11-07 23:38 ` [PATCH v3 0/7] " Tyler Retzlaff
                     ` (6 preceding siblings ...)
  2023-11-07 23:38   ` [PATCH v3 7/7] hash: " Tyler Retzlaff
@ 2023-11-08  8:34   ` Morten Brørup
  2023-11-08 16:57     ` Thomas Monjalon
  7 siblings, 1 reply; 29+ messages in thread
From: Morten Brørup @ 2023-11-08  8:34 UTC (permalink / raw)
  To: Tyler Retzlaff, dev
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Vladimir Medvedkin, Yipeng Wang

> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Wednesday, 8 November 2023 00.38
> 
> The first set of conversions missed the long 'l' versions of the
> builtins that were being used. This series completes the conversion
> of remaining libraries from __builtin_ctzl, __builtin_clzl and
> __builtin_popcountl.
> 
> v3:
>   * add missing include of rte_bitops.h
>   * add 2 patches to cover use of __builtin_popcountl
> 
> v2:
>   * be explicit and use appropriate 32-bit and 64-bit leading
>     and trailing counting functions depending on the type of the
>     expression passed as an argument to the builtin.

Didn't notice v3 before ack'ing v2.

Series-acked-by: Morten Brørup <mb@smartsharesystems.com>


^ permalink raw reply	[flat|nested] 29+ messages in thread

* CI test system not catching truncation bugs for 32-bit architectures?
  2023-11-07 23:38   ` [PATCH v3 7/7] hash: " Tyler Retzlaff
@ 2023-11-08  8:47     ` Morten Brørup
  0 siblings, 0 replies; 29+ messages in thread
From: Morten Brørup @ 2023-11-08  8:47 UTC (permalink / raw)
  To: dev, ci
  Cc: Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Vladimir Medvedkin, Yipeng Wang, Tyler Retzlaff

> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Wednesday, 8 November 2023 00.38
> 
> Use rte_popcount64 instead of __builtin_popcountl where the argument
> type passed to the intrinsic was 64-bits.

Can someone please explain why our CI test system accepted passing a 64-bit value to __builtin_popcountl(unsigned long) when building for 32-bit architectures, where an unsigned long is 32 bit? The 32 most significant bits were blindly truncated here.

It looks like this patch also fixes a bug (which should have been caught by the CI system) for 32-bit architectures.

> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  lib/hash/rte_cuckoo_hash.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
> index 19ee53a..ccdc3b9 100644
> --- a/lib/hash/rte_cuckoo_hash.c
> +++ b/lib/hash/rte_cuckoo_hash.c
> @@ -2357,7 +2357,7 @@ struct rte_hash *
>  	__rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask,
> data);
> 
>  	/* Return number of hits */
> -	return __builtin_popcountl(*hit_mask);
> +	return rte_popcount64(*hit_mask);
>  }
> 
> 
> @@ -2474,7 +2474,7 @@ struct rte_hash *
>  			positions, hit_mask, data);
> 
>  	/* Return number of hits */
> -	return __builtin_popcountl(*hit_mask);
> +	return rte_popcount64(*hit_mask);
>  }
> 
>  int32_t
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/7] use abstracted bit count functions
  2023-11-08  8:34   ` [PATCH v3 0/7] use abstracted bit count functions Morten Brørup
@ 2023-11-08 16:57     ` Thomas Monjalon
  2023-11-08 18:42       ` Tyler Retzlaff
  0 siblings, 1 reply; 29+ messages in thread
From: Thomas Monjalon @ 2023-11-08 16:57 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: dev, Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Vladimir Medvedkin, Yipeng Wang, Morten Brørup

08/11/2023 09:34, Morten Brørup:
> > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > Sent: Wednesday, 8 November 2023 00.38
> > 
> > The first set of conversions missed the long 'l' versions of the
> > builtins that were being used. This series completes the conversion
> > of remaining libraries from __builtin_ctzl, __builtin_clzl and
> > __builtin_popcountl.
> > 
> > v3:
> >   * add missing include of rte_bitops.h
> >   * add 2 patches to cover use of __builtin_popcountl
> > 
> > v2:
> >   * be explicit and use appropriate 32-bit and 64-bit leading
> >     and trailing counting functions depending on the type of the
> >     expression passed as an argument to the builtin.
> 
> Didn't notice v3 before ack'ing v2.
> 
> Series-acked-by: Morten Brørup <mb@smartsharesystems.com>

Squashed and applied, thanks.

Note: there are few builtin occurences in drivers.




^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/7] use abstracted bit count functions
  2023-11-08 16:57     ` Thomas Monjalon
@ 2023-11-08 18:42       ` Tyler Retzlaff
  0 siblings, 0 replies; 29+ messages in thread
From: Tyler Retzlaff @ 2023-11-08 18:42 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Bruce Richardson, Cristian Dumitrescu, David Hunt,
	Honnappa Nagarahalli, Ruifeng Wang, Sameh Gobriel,
	Vladimir Medvedkin, Yipeng Wang, Morten Brørup

On Wed, Nov 08, 2023 at 05:57:01PM +0100, Thomas Monjalon wrote:
> 08/11/2023 09:34, Morten Brørup:
> > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > Sent: Wednesday, 8 November 2023 00.38
> > > 
> > > The first set of conversions missed the long 'l' versions of the
> > > builtins that were being used. This series completes the conversion
> > > of remaining libraries from __builtin_ctzl, __builtin_clzl and
> > > __builtin_popcountl.
> > > 
> > > v3:
> > >   * add missing include of rte_bitops.h
> > >   * add 2 patches to cover use of __builtin_popcountl
> > > 
> > > v2:
> > >   * be explicit and use appropriate 32-bit and 64-bit leading
> > >     and trailing counting functions depending on the type of the
> > >     expression passed as an argument to the builtin.
> > 
> > Didn't notice v3 before ack'ing v2.
> > 
> > Series-acked-by: Morten Brørup <mb@smartsharesystems.com>
> 
> Squashed and applied, thanks.
> 
> Note: there are few builtin occurences in drivers.

yes, tests and drivers are on my list. i'm mostly addressing conversions
in libs right now.

> 
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2023-11-08 18:42 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-02  1:05 [PATCH 0/5] use abstracted bit count functions Tyler Retzlaff
2023-11-02  1:05 ` [PATCH 1/5] table: " Tyler Retzlaff
2023-11-02  1:05 ` [PATCH 2/5] distributor: " Tyler Retzlaff
2023-11-02  1:05 ` [PATCH 3/5] hash: " Tyler Retzlaff
2023-11-02  1:05 ` [PATCH 4/5] member: " Tyler Retzlaff
2023-11-02  1:05 ` [PATCH 5/5] rcu: " Tyler Retzlaff
2023-11-02  7:39 ` [PATCH 0/5] " Morten Brørup
2023-11-02 15:27   ` Tyler Retzlaff
2023-11-02 15:33     ` Morten Brørup
2023-11-02 15:36       ` Tyler Retzlaff
2023-11-07 19:10 ` [PATCH v2 " Tyler Retzlaff
2023-11-07 19:10   ` [PATCH v2 1/5] distributor: " Tyler Retzlaff
2023-11-07 19:10   ` [PATCH v2 2/5] hash: " Tyler Retzlaff
2023-11-07 19:10   ` [PATCH v2 3/5] member: " Tyler Retzlaff
2023-11-07 19:10   ` [PATCH v2 4/5] rcu: " Tyler Retzlaff
2023-11-07 19:10   ` [PATCH v2 5/5] table: " Tyler Retzlaff
2023-11-08  8:25   ` [PATCH v2 0/5] " Morten Brørup
2023-11-07 23:38 ` [PATCH v3 0/7] " Tyler Retzlaff
2023-11-07 23:38   ` [PATCH v3 1/7] distributor: " Tyler Retzlaff
2023-11-07 23:38   ` [PATCH v3 2/7] hash: " Tyler Retzlaff
2023-11-07 23:38   ` [PATCH v3 3/7] member: " Tyler Retzlaff
2023-11-07 23:38   ` [PATCH v3 4/7] rcu: " Tyler Retzlaff
2023-11-07 23:38   ` [PATCH v3 5/7] table: " Tyler Retzlaff
2023-11-07 23:38   ` [PATCH v3 6/7] distributor: " Tyler Retzlaff
2023-11-07 23:38   ` [PATCH v3 7/7] hash: " Tyler Retzlaff
2023-11-08  8:47     ` CI test system not catching truncation bugs for 32-bit architectures? Morten Brørup
2023-11-08  8:34   ` [PATCH v3 0/7] use abstracted bit count functions Morten Brørup
2023-11-08 16:57     ` Thomas Monjalon
2023-11-08 18:42       ` Tyler Retzlaff

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).