- * [PATCH 1/7] ring: replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 [PATCH 0/7] replace rte atomics with GCC builtin atomics Tyler Retzlaff
@ 2023-03-17 20:19 ` Tyler Retzlaff
  2023-03-17 20:36   ` Tyler Retzlaff
  2023-03-17 20:19 ` [PATCH 2/7] stack: " Tyler Retzlaff
                   ` (10 subsequent siblings)
  11 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 20:19 UTC (permalink / raw)
  To: dev; +Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/ring/rte_ring_core.h        |  1 -
 lib/ring/rte_ring_generic_pvt.h | 10 ++++++----
 2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h
index 82b2370..b9c7860 100644
--- a/lib/ring/rte_ring_core.h
+++ b/lib/ring/rte_ring_core.h
@@ -31,7 +31,6 @@
 #include <rte_config.h>
 #include <rte_memory.h>
 #include <rte_lcore.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memzone.h>
 #include <rte_pause.h>
diff --git a/lib/ring/rte_ring_generic_pvt.h b/lib/ring/rte_ring_generic_pvt.h
index 5acb6e5..f9a15b6 100644
--- a/lib/ring/rte_ring_generic_pvt.h
+++ b/lib/ring/rte_ring_generic_pvt.h
@@ -92,8 +92,9 @@
 		if (is_sp)
 			r->prod.head = *new_head, success = 1;
 		else
-			success = rte_atomic32_cmpset(&r->prod.head,
-					*old_head, *new_head);
+			success = __atomic_compare_exchange_n(&r->prod.head,
+					old_head, *new_head, 0,
+					__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
 	} while (unlikely(success == 0));
 	return n;
 }
@@ -162,8 +163,9 @@
 			rte_smp_rmb();
 			success = 1;
 		} else {
-			success = rte_atomic32_cmpset(&r->cons.head, *old_head,
-					*new_head);
+			success = __atomic_compare_exchange_n(&r->cons.head,
+					old_head, *new_head, 0,
+					__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
 		}
 	} while (unlikely(success == 0));
 	return n;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH 1/7] ring: replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 ` [PATCH 1/7] ring: " Tyler Retzlaff
@ 2023-03-17 20:36   ` Tyler Retzlaff
  0 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 20:36 UTC (permalink / raw)
  To: dev; +Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas
On Fri, Mar 17, 2023 at 01:19:42PM -0700, Tyler Retzlaff wrote:
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  lib/ring/rte_ring_core.h        |  1 -
>  lib/ring/rte_ring_generic_pvt.h | 10 ++++++----
>  2 files changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h
> index 82b2370..b9c7860 100644
> --- a/lib/ring/rte_ring_core.h
> +++ b/lib/ring/rte_ring_core.h
> @@ -31,7 +31,6 @@
>  #include <rte_config.h>
>  #include <rte_memory.h>
>  #include <rte_lcore.h>
> -#include <rte_atomic.h>
>  #include <rte_branch_prediction.h>
>  #include <rte_memzone.h>
>  #include <rte_pause.h>
> diff --git a/lib/ring/rte_ring_generic_pvt.h b/lib/ring/rte_ring_generic_pvt.h
> index 5acb6e5..f9a15b6 100644
> --- a/lib/ring/rte_ring_generic_pvt.h
> +++ b/lib/ring/rte_ring_generic_pvt.h
> @@ -92,8 +92,9 @@
>  		if (is_sp)
>  			r->prod.head = *new_head, success = 1;
>  		else
> -			success = rte_atomic32_cmpset(&r->prod.head,
> -					*old_head, *new_head);
> +			success = __atomic_compare_exchange_n(&r->prod.head,
> +					old_head, *new_head, 0,
> +					__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
>  	} while (unlikely(success == 0));
>  	return n;
>  }
> @@ -162,8 +163,9 @@
>  			rte_smp_rmb();
>  			success = 1;
>  		} else {
> -			success = rte_atomic32_cmpset(&r->cons.head, *old_head,
> -					*new_head);
> +			success = __atomic_compare_exchange_n(&r->cons.head,
> +					old_head, *new_head, 0,
> +					__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
>  		}
>  	} while (unlikely(success == 0));
>  	return n;
just something i noticed and not related to this change.
i note that old_head for both __rte_ring_move_prod_head and
__rte_ring_move_con_head are performing a non-atomic load to
initialize `*old_head` probably not the best idea.
^ permalink raw reply	[flat|nested] 83+ messages in thread
 
- * [PATCH 2/7] stack: replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 [PATCH 0/7] replace rte atomics with GCC builtin atomics Tyler Retzlaff
  2023-03-17 20:19 ` [PATCH 1/7] ring: " Tyler Retzlaff
@ 2023-03-17 20:19 ` Tyler Retzlaff
  2023-03-17 20:19 ` [PATCH 3/7] dma/idxd: " Tyler Retzlaff
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 20:19 UTC (permalink / raw)
  To: dev; +Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/stack/rte_stack_lf_generic.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/lib/stack/rte_stack_lf_generic.h b/lib/stack/rte_stack_lf_generic.h
index 7fa29ce..3ef0f74 100644
--- a/lib/stack/rte_stack_lf_generic.h
+++ b/lib/stack/rte_stack_lf_generic.h
@@ -26,8 +26,7 @@
 	 * elements. If the mempool is near-empty to the point that this is a
 	 * concern, the user should consider increasing the mempool size.
 	 */
-	return (unsigned int)rte_atomic64_read((rte_atomic64_t *)
-			&s->stack_lf.used.len);
+	return __atomic_load_n(&s->stack_lf.used.len, __ATOMIC_SEQ_CST);
 }
 
 static __rte_always_inline void
@@ -68,7 +67,7 @@
 				__ATOMIC_RELAXED);
 	} while (success == 0);
 
-	rte_atomic64_add((rte_atomic64_t *)&list->len, num);
+	__atomic_fetch_add(&list->len, num, __ATOMIC_SEQ_CST);
 }
 
 static __rte_always_inline struct rte_stack_lf_elem *
@@ -82,14 +81,14 @@
 
 	/* Reserve num elements, if available */
 	while (1) {
-		uint64_t len = rte_atomic64_read((rte_atomic64_t *)&list->len);
+		uint64_t len = __atomic_load_n(&list->len, __ATOMIC_SEQ_CST);
 
 		/* Does the list contain enough elements? */
 		if (unlikely(len < num))
 			return NULL;
 
-		if (rte_atomic64_cmpset((volatile uint64_t *)&list->len,
-					len, len - num))
+		if (__atomic_compare_exchange_n(&list->len, &len, len - num,
+			0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 			break;
 	}
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH 3/7] dma/idxd: replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 [PATCH 0/7] replace rte atomics with GCC builtin atomics Tyler Retzlaff
  2023-03-17 20:19 ` [PATCH 1/7] ring: " Tyler Retzlaff
  2023-03-17 20:19 ` [PATCH 2/7] stack: " Tyler Retzlaff
@ 2023-03-17 20:19 ` Tyler Retzlaff
  2023-03-17 20:19 ` [PATCH 4/7] net/ice: " Tyler Retzlaff
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 20:19 UTC (permalink / raw)
  To: dev; +Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/dma/idxd/idxd_internal.h | 3 +--
 drivers/dma/idxd/idxd_pci.c      | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h
index 180a858..53a0c8e 100644
--- a/drivers/dma/idxd/idxd_internal.h
+++ b/drivers/dma/idxd/idxd_internal.h
@@ -7,7 +7,6 @@
 
 #include <rte_dmadev_pmd.h>
 #include <rte_spinlock.h>
-#include <rte_atomic.h>
 
 #include "idxd_hw_defs.h"
 
@@ -34,7 +33,7 @@ struct idxd_pci_common {
 	rte_spinlock_t lk;
 
 	uint8_t wq_cfg_sz;
-	rte_atomic16_t ref_count;
+	int16_t ref_count;
 	volatile struct rte_idxd_bar0 *regs;
 	volatile uint32_t *wq_regs_base;
 	volatile struct rte_idxd_grpcfg *grp_regs;
diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c
index 781fa02..e869d33 100644
--- a/drivers/dma/idxd/idxd_pci.c
+++ b/drivers/dma/idxd/idxd_pci.c
@@ -6,7 +6,6 @@
 #include <rte_devargs.h>
 #include <rte_dmadev_pmd.h>
 #include <rte_malloc.h>
-#include <rte_atomic.h>
 
 #include "idxd_internal.h"
 
@@ -136,7 +135,8 @@
 	/* if this is the last WQ on the device, disable the device and free
 	 * the PCI struct
 	 */
-	is_last_wq = rte_atomic16_dec_and_test(&idxd->u.pci->ref_count);
+	is_last_wq = __atomic_fetch_sub(&idxd->u.pci->ref_count, 1,
+		__ATOMIC_SEQ_CST) - 1 == 0;
 	if (is_last_wq) {
 		/* disable the device */
 		err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
@@ -350,7 +350,7 @@
 				free(idxd.u.pci);
 			return ret;
 		}
-		rte_atomic16_inc(&idxd.u.pci->ref_count);
+		__atomic_fetch_add(&idxd.u.pci->ref_count, 1, __ATOMIC_SEQ_CST);
 	}
 
 	return 0;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH 4/7] net/ice: replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 [PATCH 0/7] replace rte atomics with GCC builtin atomics Tyler Retzlaff
                   ` (2 preceding siblings ...)
  2023-03-17 20:19 ` [PATCH 3/7] dma/idxd: " Tyler Retzlaff
@ 2023-03-17 20:19 ` Tyler Retzlaff
  2023-03-17 20:41   ` Tyler Retzlaff
  2023-03-17 20:19 ` [PATCH 5/7] net/ixgbe: " Tyler Retzlaff
                   ` (7 subsequent siblings)
  11 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 20:19 UTC (permalink / raw)
  To: dev; +Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/net/ice/ice_dcf.c        |  1 -
 drivers/net/ice/ice_dcf_ethdev.c |  1 -
 drivers/net/ice/ice_ethdev.c     | 10 ++++++----
 3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index 1c3d22a..80d2cbd 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -14,7 +14,6 @@
 #include <rte_common.h>
 
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_eal.h>
 #include <rte_ether.h>
 #include <ethdev_driver.h>
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index dcbf2af..13ff245 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -11,7 +11,6 @@
 #include <rte_interrupts.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_eal.h>
 #include <rte_ether.h>
 #include <ethdev_pci.h>
diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 9a88cf9..bdf4569 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -3927,8 +3927,9 @@ static int ice_init_rss(struct ice_pf *pf)
 	struct rte_eth_link *dst = link;
 	struct rte_eth_link *src = &dev->data->dev_link;
 
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-				*(uint64_t *)src) == 0)
+	if (!__atomic_compare_exchange_n((uint64_t *)dst,
+		(uint64_t *)dst, *(uint64_t *)src, 0,
+		__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 		return -1;
 
 	return 0;
@@ -3941,8 +3942,9 @@ static int ice_init_rss(struct ice_pf *pf)
 	struct rte_eth_link *dst = &dev->data->dev_link;
 	struct rte_eth_link *src = link;
 
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-				*(uint64_t *)src) == 0)
+	if (!__atomic_compare_exchange_n((uint64_t *)dst,
+		(uint64_t *)dst, *(uint64_t *)src, 0,
+		__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 		return -1;
 
 	return 0;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH 4/7] net/ice: replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 ` [PATCH 4/7] net/ice: " Tyler Retzlaff
@ 2023-03-17 20:41   ` Tyler Retzlaff
  0 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 20:41 UTC (permalink / raw)
  To: dev; +Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas
On Fri, Mar 17, 2023 at 01:19:45PM -0700, Tyler Retzlaff wrote:
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  drivers/net/ice/ice_dcf.c        |  1 -
>  drivers/net/ice/ice_dcf_ethdev.c |  1 -
>  drivers/net/ice/ice_ethdev.c     | 10 ++++++----
>  3 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
> index 1c3d22a..80d2cbd 100644
> --- a/drivers/net/ice/ice_dcf.c
> +++ b/drivers/net/ice/ice_dcf.c
> @@ -14,7 +14,6 @@
>  #include <rte_common.h>
>  
>  #include <rte_pci.h>
> -#include <rte_atomic.h>
>  #include <rte_eal.h>
>  #include <rte_ether.h>
>  #include <ethdev_driver.h>
> diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
> index dcbf2af..13ff245 100644
> --- a/drivers/net/ice/ice_dcf_ethdev.c
> +++ b/drivers/net/ice/ice_dcf_ethdev.c
> @@ -11,7 +11,6 @@
>  #include <rte_interrupts.h>
>  #include <rte_debug.h>
>  #include <rte_pci.h>
> -#include <rte_atomic.h>
>  #include <rte_eal.h>
>  #include <rte_ether.h>
>  #include <ethdev_pci.h>
> diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
> index 9a88cf9..bdf4569 100644
> --- a/drivers/net/ice/ice_ethdev.c
> +++ b/drivers/net/ice/ice_ethdev.c
> @@ -3927,8 +3927,9 @@ static int ice_init_rss(struct ice_pf *pf)
>  	struct rte_eth_link *dst = link;
>  	struct rte_eth_link *src = &dev->data->dev_link;
>  
> -	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
> -				*(uint64_t *)src) == 0)
> +	if (!__atomic_compare_exchange_n((uint64_t *)dst,
> +		(uint64_t *)dst, *(uint64_t *)src, 0,
> +		__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
>  		return -1;
>  
>  	return 0;
> @@ -3941,8 +3942,9 @@ static int ice_init_rss(struct ice_pf *pf)
>  	struct rte_eth_link *dst = &dev->data->dev_link;
>  	struct rte_eth_link *src = link;
>  
> -	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
> -				*(uint64_t *)src) == 0)
> +	if (!__atomic_compare_exchange_n((uint64_t *)dst,
> +		(uint64_t *)dst, *(uint64_t *)src, 0,
> +		__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
>  		return -1;
>  
*(uint64_t *)dst for the second parameter look like a bug to me,
a non-atomic load will be generated.
probably this code should be corrected by performing __atomic_load_n(dst, ...)
to a stack variable and then performing the cmpset/compare_exchange.
^ permalink raw reply	[flat|nested] 83+ messages in thread 
 
- * [PATCH 5/7] net/ixgbe: replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 [PATCH 0/7] replace rte atomics with GCC builtin atomics Tyler Retzlaff
                   ` (3 preceding siblings ...)
  2023-03-17 20:19 ` [PATCH 4/7] net/ice: " Tyler Retzlaff
@ 2023-03-17 20:19 ` Tyler Retzlaff
  2023-03-17 20:19 ` [PATCH 6/7] net/null: " Tyler Retzlaff
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 20:19 UTC (permalink / raw)
  To: dev; +Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/net/ixgbe/ixgbe_bypass.c |  1 -
 drivers/net/ixgbe/ixgbe_ethdev.c | 12 ++++++------
 drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
 drivers/net/ixgbe/ixgbe_flow.c   |  1 -
 drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
 5 files changed, 8 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_bypass.c b/drivers/net/ixgbe/ixgbe_bypass.c
index 94f34a2..f615d18 100644
--- a/drivers/net/ixgbe/ixgbe_bypass.c
+++ b/drivers/net/ixgbe/ixgbe_bypass.c
@@ -3,7 +3,6 @@
  */
 
 #include <time.h>
-#include <rte_atomic.h>
 #include <ethdev_driver.h>
 #include "ixgbe_ethdev.h"
 #include "ixgbe_bypass_api.h"
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 88118bc..3efb5ff 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1127,7 +1127,7 @@ struct rte_ixgbe_xstats_name_off {
 		return 0;
 	}
 
-	rte_atomic32_clear(&ad->link_thread_running);
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	ixgbe_parse_devargs(eth_dev->data->dev_private,
 			    pci_dev->device.devargs);
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
@@ -1625,7 +1625,7 @@ static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev)
 		return 0;
 	}
 
-	rte_atomic32_clear(&ad->link_thread_running);
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	ixgbevf_parse_devargs(eth_dev->data->dev_private,
 			      pci_dev->device.devargs);
 
@@ -4186,7 +4186,7 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	struct ixgbe_adapter *ad = dev->data->dev_private;
 	uint32_t timeout = timeout_ms ? timeout_ms : WARNING_TIMEOUT;
 
-	while (rte_atomic32_read(&ad->link_thread_running)) {
+	while (__atomic_load_n(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
 		msec_delay(1);
 		timeout--;
 
@@ -4222,7 +4222,7 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	ixgbe_setup_link(hw, speed, true);
 
 	intr->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
-	rte_atomic32_clear(&ad->link_thread_running);
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	return NULL;
 }
 
@@ -4317,7 +4317,7 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	if (link_up == 0) {
 		if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) {
 			ixgbe_dev_wait_setup_link_complete(dev, 0);
-			if (rte_atomic32_test_and_set(&ad->link_thread_running)) {
+			if (__atomic_test_and_set(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
 				/* To avoid race condition between threads, set
 				 * the IXGBE_FLAG_NEED_LINK_CONFIG flag only
 				 * when there is no link thread running.
@@ -4330,7 +4330,7 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 					dev) < 0) {
 					PMD_DRV_LOG(ERR,
 						"Create link thread failed!");
-					rte_atomic32_clear(&ad->link_thread_running);
+					__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 				}
 			} else {
 				PMD_DRV_LOG(ERR,
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 48290af..2ca6998 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -6,6 +6,7 @@
 #define _IXGBE_ETHDEV_H_
 
 #include <stdint.h>
+#include <stdbool.h>
 #include <sys/queue.h>
 
 #include "base/ixgbe_type.h"
@@ -510,7 +511,7 @@ struct ixgbe_adapter {
 	 */
 	uint8_t pflink_fullchk;
 	uint8_t mac_ctrl_frame_fwd;
-	rte_atomic32_t link_thread_running;
+	bool link_thread_running;
 	pthread_t link_thread_tid;
 };
 
diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c
index eac81ee..687341c 100644
--- a/drivers/net/ixgbe/ixgbe_flow.c
+++ b/drivers/net/ixgbe/ixgbe_flow.c
@@ -18,7 +18,6 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memory.h>
 #include <rte_eal.h>
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index c9d6ca9..8d7251d 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -27,7 +27,6 @@
 #include <rte_eal.h>
 #include <rte_per_lcore.h>
 #include <rte_lcore.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH 6/7] net/null: replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 [PATCH 0/7] replace rte atomics with GCC builtin atomics Tyler Retzlaff
                   ` (4 preceding siblings ...)
  2023-03-17 20:19 ` [PATCH 5/7] net/ixgbe: " Tyler Retzlaff
@ 2023-03-17 20:19 ` Tyler Retzlaff
  2023-03-17 20:44   ` Tyler Retzlaff
  2023-03-17 20:19 ` [PATCH 7/7] net/ring: " Tyler Retzlaff
                   ` (5 subsequent siblings)
  11 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 20:19 UTC (permalink / raw)
  To: dev; +Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/net/null/rte_eth_null.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 47d9554..195c3bd 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -37,8 +37,8 @@ struct null_queue {
 	struct rte_mempool *mb_pool;
 	struct rte_mbuf *dummy_packet;
 
-	rte_atomic64_t rx_pkts;
-	rte_atomic64_t tx_pkts;
+	int64_t rx_pkts;
+	int64_t tx_pkts;
 };
 
 struct pmd_options {
@@ -101,7 +101,7 @@ struct pmd_internals {
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	rte_atomic64_add(&(h->rx_pkts), i);
+	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -128,7 +128,7 @@ struct pmd_internals {
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	rte_atomic64_add(&(h->rx_pkts), i);
+	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -152,7 +152,7 @@ struct pmd_internals {
 	for (i = 0; i < nb_bufs; i++)
 		rte_pktmbuf_free(bufs[i]);
 
-	rte_atomic64_add(&(h->tx_pkts), i);
+	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -174,7 +174,7 @@ struct pmd_internals {
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	rte_atomic64_add(&(h->tx_pkts), i);
+	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -317,7 +317,7 @@ struct pmd_internals {
 				RTE_DIM(internal->rx_null_queues)));
 	for (i = 0; i < num_stats; i++) {
 		igb_stats->q_ipackets[i] =
-			internal->rx_null_queues[i].rx_pkts.cnt;
+			internal->rx_null_queues[i].rx_pkts;
 		rx_total += igb_stats->q_ipackets[i];
 	}
 
@@ -326,7 +326,7 @@ struct pmd_internals {
 				RTE_DIM(internal->tx_null_queues)));
 	for (i = 0; i < num_stats; i++) {
 		igb_stats->q_opackets[i] =
-			internal->tx_null_queues[i].tx_pkts.cnt;
+			internal->tx_null_queues[i].tx_pkts;
 		tx_total += igb_stats->q_opackets[i];
 	}
 
@@ -347,9 +347,9 @@ struct pmd_internals {
 
 	internal = dev->data->dev_private;
 	for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
-		internal->rx_null_queues[i].rx_pkts.cnt = 0;
+		internal->rx_null_queues[i].rx_pkts = 0;
 	for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
-		internal->tx_null_queues[i].tx_pkts.cnt = 0;
+		internal->tx_null_queues[i].tx_pkts = 0;
 
 	return 0;
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH 6/7] net/null: replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 ` [PATCH 6/7] net/null: " Tyler Retzlaff
@ 2023-03-17 20:44   ` Tyler Retzlaff
  0 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 20:44 UTC (permalink / raw)
  To: dev; +Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas
On Fri, Mar 17, 2023 at 01:19:47PM -0700, Tyler Retzlaff wrote:
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  drivers/net/null/rte_eth_null.c | 20 ++++++++++----------
>  1 file changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
> index 47d9554..195c3bd 100644
> --- a/drivers/net/null/rte_eth_null.c
> +++ b/drivers/net/null/rte_eth_null.c
> @@ -37,8 +37,8 @@ struct null_queue {
>  	struct rte_mempool *mb_pool;
>  	struct rte_mbuf *dummy_packet;
>  
> -	rte_atomic64_t rx_pkts;
> -	rte_atomic64_t tx_pkts;
> +	int64_t rx_pkts;
> +	int64_t tx_pkts;
>  };
>  
>  struct pmd_options {
> @@ -101,7 +101,7 @@ struct pmd_internals {
>  		bufs[i]->port = h->internals->port_id;
>  	}
>  
> -	rte_atomic64_add(&(h->rx_pkts), i);
> +	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
>  
>  	return i;
>  }
> @@ -128,7 +128,7 @@ struct pmd_internals {
>  		bufs[i]->port = h->internals->port_id;
>  	}
>  
> -	rte_atomic64_add(&(h->rx_pkts), i);
> +	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
>  
>  	return i;
>  }
> @@ -152,7 +152,7 @@ struct pmd_internals {
>  	for (i = 0; i < nb_bufs; i++)
>  		rte_pktmbuf_free(bufs[i]);
>  
> -	rte_atomic64_add(&(h->tx_pkts), i);
> +	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
>  
>  	return i;
>  }
> @@ -174,7 +174,7 @@ struct pmd_internals {
>  		rte_pktmbuf_free(bufs[i]);
>  	}
>  
> -	rte_atomic64_add(&(h->tx_pkts), i);
> +	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
>  
>  	return i;
>  }
> @@ -317,7 +317,7 @@ struct pmd_internals {
>  				RTE_DIM(internal->rx_null_queues)));
>  	for (i = 0; i < num_stats; i++) {
>  		igb_stats->q_ipackets[i] =
> -			internal->rx_null_queues[i].rx_pkts.cnt;
> +			internal->rx_null_queues[i].rx_pkts;
>  		rx_total += igb_stats->q_ipackets[i];
>  	}
>  
> @@ -326,7 +326,7 @@ struct pmd_internals {
>  				RTE_DIM(internal->tx_null_queues)));
>  	for (i = 0; i < num_stats; i++) {
>  		igb_stats->q_opackets[i] =
> -			internal->tx_null_queues[i].tx_pkts.cnt;
> +			internal->tx_null_queues[i].tx_pkts;
>  		tx_total += igb_stats->q_opackets[i];
>  	}
>  
these variables are operated on with atomic builtins in other places
yet here they are being non-atomically loaded. should probably be using
_atomic_load_n(...)
> @@ -347,9 +347,9 @@ struct pmd_internals {
>  
>  	internal = dev->data->dev_private;
>  	for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
> -		internal->rx_null_queues[i].rx_pkts.cnt = 0;
> +		internal->rx_null_queues[i].rx_pkts = 0;
>  	for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
> -		internal->tx_null_queues[i].tx_pkts.cnt = 0;
> +		internal->tx_null_queues[i].tx_pkts = 0;
same thing, these should probably be __atomic_store_n(...)
^ permalink raw reply	[flat|nested] 83+ messages in thread
 
- * [PATCH 7/7] net/ring: replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 [PATCH 0/7] replace rte atomics with GCC builtin atomics Tyler Retzlaff
                   ` (5 preceding siblings ...)
  2023-03-17 20:19 ` [PATCH 6/7] net/null: " Tyler Retzlaff
@ 2023-03-17 20:19 ` Tyler Retzlaff
  2023-03-17 21:42 ` [PATCH 0/7] " Stephen Hemminger
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 20:19 UTC (permalink / raw)
  To: dev; +Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/net/ring/rte_eth_ring.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index e8bc9b6..15d4a3d 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -44,8 +44,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	rte_atomic64_t rx_pkts;
-	rte_atomic64_t tx_pkts;
+	int64_t rx_pkts;
+	int64_t tx_pkts;
 };
 
 struct pmd_internals {
@@ -80,9 +80,9 @@ struct pmd_internals {
 	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
 			ptrs, nb_bufs, NULL);
 	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts.cnt += nb_rx;
+		r->rx_pkts += nb_rx;
 	else
-		rte_atomic64_add(&(r->rx_pkts), nb_rx);
+		__atomic_fetch_add(&r->rx_pkts, nb_rx, __ATOMIC_SEQ_CST);
 	return nb_rx;
 }
 
@@ -94,9 +94,9 @@ struct pmd_internals {
 	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
 			ptrs, nb_bufs, NULL);
 	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts.cnt += nb_tx;
+		r->tx_pkts += nb_tx;
 	else
-		rte_atomic64_add(&(r->tx_pkts), nb_tx);
+		__atomic_fetch_add(&r->tx_pkts, nb_tx, __ATOMIC_SEQ_CST);
 	return nb_tx;
 }
 
@@ -184,13 +184,13 @@ struct pmd_internals {
 
 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts.cnt;
+		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
 		rx_total += stats->q_ipackets[i];
 	}
 
 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts.cnt;
+		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
 		tx_total += stats->q_opackets[i];
 	}
 
@@ -207,9 +207,9 @@ struct pmd_internals {
 	struct pmd_internals *internal = dev->data->dev_private;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts.cnt = 0;
+		internal->rx_ring_queues[i].rx_pkts = 0;
 	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts.cnt = 0;
+		internal->tx_ring_queues[i].tx_pkts = 0;
 
 	return 0;
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH 0/7] replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 [PATCH 0/7] replace rte atomics with GCC builtin atomics Tyler Retzlaff
                   ` (6 preceding siblings ...)
  2023-03-17 20:19 ` [PATCH 7/7] net/ring: " Tyler Retzlaff
@ 2023-03-17 21:42 ` Stephen Hemminger
  2023-03-17 21:49   ` Tyler Retzlaff
  2023-03-23 22:34 ` [PATCH v2 " Tyler Retzlaff
                   ` (3 subsequent siblings)
  11 siblings, 1 reply; 83+ messages in thread
From: Stephen Hemminger @ 2023-03-17 21:42 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas
On Fri, 17 Mar 2023 13:19:41 -0700
Tyler Retzlaff <roretzla@linux.microsoft.com> wrote:
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
> 
> This series covers the libraries and drivers that are built on Windows.
> 
> The code has be converted to use the __atomic builtins but there are
> additional during conversion i notice that there may be some issues
> that need to be addressed.
I don't think all these cmpset need to use SEQ_CST.
Especially for the places where it is used a loop, might
be more efficient with some of the other memory models.
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH 0/7] replace rte atomics with GCC builtin atomics
  2023-03-17 21:42 ` [PATCH 0/7] " Stephen Hemminger
@ 2023-03-17 21:49   ` Tyler Retzlaff
  2023-03-22 11:28     ` Morten Brørup
  0 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 21:49 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas
On Fri, Mar 17, 2023 at 02:42:26PM -0700, Stephen Hemminger wrote:
> On Fri, 17 Mar 2023 13:19:41 -0700
> Tyler Retzlaff <roretzla@linux.microsoft.com> wrote:
> 
> > Replace the use of rte_atomic.h types and functions, instead use GCC
> > supplied C++11 memory model builtins.
> > 
> > This series covers the libraries and drivers that are built on Windows.
> > 
> > The code has be converted to use the __atomic builtins but there are
> > additional during conversion i notice that there may be some issues
> > that need to be addressed.
> 
> I don't think all these cmpset need to use SEQ_CST.
> Especially for the places where it is used a loop, might
> be more efficient with some of the other memory models.
i agree.
however, i'm not trying to improve the code with this change, just
decouple it from rte_atomics.h so trying my best to avoid any
unnecessary semantic change.
certainly if the maintainers of this code wish to weaken the ordering
where appropriate after the change is merged they can do so and handily
this change has enabled them to do so easily allowing them to test just
their change in isolation.
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * RE: [PATCH 0/7] replace rte atomics with GCC builtin atomics
  2023-03-17 21:49   ` Tyler Retzlaff
@ 2023-03-22 11:28     ` Morten Brørup
  2023-03-22 14:21       ` Tyler Retzlaff
  0 siblings, 1 reply; 83+ messages in thread
From: Morten Brørup @ 2023-03-22 11:28 UTC (permalink / raw)
  To: Tyler Retzlaff, Stephen Hemminger
  Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas
> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Friday, 17 March 2023 22.49
> 
> On Fri, Mar 17, 2023 at 02:42:26PM -0700, Stephen Hemminger wrote:
> > On Fri, 17 Mar 2023 13:19:41 -0700
> > Tyler Retzlaff <roretzla@linux.microsoft.com> wrote:
> >
> > > Replace the use of rte_atomic.h types and functions, instead use GCC
> > > supplied C++11 memory model builtins.
> > >
> > > This series covers the libraries and drivers that are built on Windows.
> > >
> > > The code has be converted to use the __atomic builtins but there are
> > > additional during conversion i notice that there may be some issues
> > > that need to be addressed.
> >
> > I don't think all these cmpset need to use SEQ_CST.
> > Especially for the places where it is used a loop, might
> > be more efficient with some of the other memory models.
> 
> i agree.
> 
> however, i'm not trying to improve the code with this change, just
> decouple it from rte_atomics.h so trying my best to avoid any
> unnecessary semantic change.
> 
> certainly if the maintainers of this code wish to weaken the ordering
> where appropriate after the change is merged they can do so and handily
> this change has enabled them to do so easily allowing them to test just
> their change in isolation.
I agree with the two-step approach, where this first step is a simple search-and-replacement; but I insist that you add a FIXME or similar note where you have blindly used SEQ_CST, indicating that the memory order needs to be reviewed and potentially corrected.
Also, in a couple of the drivers, you are using int64_t for packet counters. These cannot be negative and should be uint64_t. And AFAIK, such counters can use RELAXED memory order.
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * Re: [PATCH 0/7] replace rte atomics with GCC builtin atomics
  2023-03-22 11:28     ` Morten Brørup
@ 2023-03-22 14:21       ` Tyler Retzlaff
  2023-03-22 14:58         ` Morten Brørup
  0 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-22 14:21 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Stephen Hemminger, dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas
On Wed, Mar 22, 2023 at 12:28:44PM +0100, Morten Brørup wrote:
> > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > Sent: Friday, 17 March 2023 22.49
> > 
> > On Fri, Mar 17, 2023 at 02:42:26PM -0700, Stephen Hemminger wrote:
> > > On Fri, 17 Mar 2023 13:19:41 -0700
> > > Tyler Retzlaff <roretzla@linux.microsoft.com> wrote:
> > >
> > > > Replace the use of rte_atomic.h types and functions, instead use GCC
> > > > supplied C++11 memory model builtins.
> > > >
> > > > This series covers the libraries and drivers that are built on Windows.
> > > >
> > > > The code has be converted to use the __atomic builtins but there are
> > > > additional during conversion i notice that there may be some issues
> > > > that need to be addressed.
> > >
> > > I don't think all these cmpset need to use SEQ_CST.
> > > Especially for the places where it is used a loop, might
> > > be more efficient with some of the other memory models.
> > 
> > i agree.
> > 
> > however, i'm not trying to improve the code with this change, just
> > decouple it from rte_atomics.h so trying my best to avoid any
> > unnecessary semantic change.
> > 
> > certainly if the maintainers of this code wish to weaken the ordering
> > where appropriate after the change is merged they can do so and handily
> > this change has enabled them to do so easily allowing them to test just
> > their change in isolation.
> 
> I agree with the two-step approach, where this first step is a simple search-and-replacement; but I insist that you add a FIXME or similar note where you have blindly used SEQ_CST, indicating that the memory order needs to be reviewed and potentially corrected.
i think the maintainers need to take some responsibility, if they see
optimizations they missed when previously writing the code they need to
follow up with a patch themselves. i can't do everything for them and
marking things i'm not sure about will only lead to me having to churn
patch series to remove the unwanted comments later.
keep in mind i have to touch each of these again when converting to
standard so that's a better time to review ~everything in more detail
because when converting to standard that's when suddenly you get a bunch
of code generation that is "fallback" to seq_cst that isn't happening now.
the series that converts to standard needs to be up for review as soon
as possible to maximize available time for feedback before 23.11 so it
would be better to get the simpler cut & paste normalizing the code out
of the way to unblock that series submission.
> 
> Also, in a couple of the drivers, you are using int64_t for packet counters. These cannot be negative and should be uint64_t. And AFAIK, such counters can use RELAXED memory order.
i know you don't mean to say i selected the types and rather that the
types that were selected are not quite correct for their usage. again
on the review that actually adopts std atomics is a better place to make
any potential type changes since we are "breaking" the API for 23.11
anyway. further, the std atomics series technically changes all the
types so it's probably better to make one type change then rather than
one now and one later.
i think it would be best to get these validated and merged asap so we
can get to the std atomics review. when that series is up let's discuss
further how i can mark areas of concern, with that series i expect there
will have to be some changes in order to avoid minor regressions.
thanks!
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * RE: [PATCH 0/7] replace rte atomics with GCC builtin atomics
  2023-03-22 14:21       ` Tyler Retzlaff
@ 2023-03-22 14:58         ` Morten Brørup
  2023-03-22 15:29           ` Tyler Retzlaff
  0 siblings, 1 reply; 83+ messages in thread
From: Morten Brørup @ 2023-03-22 14:58 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: Stephen Hemminger, dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas
> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Wednesday, 22 March 2023 15.22
> 
> On Wed, Mar 22, 2023 at 12:28:44PM +0100, Morten Brørup wrote:
> > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > Sent: Friday, 17 March 2023 22.49
> > >
> > > On Fri, Mar 17, 2023 at 02:42:26PM -0700, Stephen Hemminger wrote:
> > > > On Fri, 17 Mar 2023 13:19:41 -0700
> > > > Tyler Retzlaff <roretzla@linux.microsoft.com> wrote:
> > > >
> > > > > Replace the use of rte_atomic.h types and functions, instead use GCC
> > > > > supplied C++11 memory model builtins.
> > > > >
> > > > > This series covers the libraries and drivers that are built on
> Windows.
> > > > >
> > > > > The code has be converted to use the __atomic builtins but there are
> > > > > additional during conversion i notice that there may be some issues
> > > > > that need to be addressed.
> > > >
> > > > I don't think all these cmpset need to use SEQ_CST.
> > > > Especially for the places where it is used a loop, might
> > > > be more efficient with some of the other memory models.
> > >
> > > i agree.
> > >
> > > however, i'm not trying to improve the code with this change, just
> > > decouple it from rte_atomics.h so trying my best to avoid any
> > > unnecessary semantic change.
> > >
> > > certainly if the maintainers of this code wish to weaken the ordering
> > > where appropriate after the change is merged they can do so and handily
> > > this change has enabled them to do so easily allowing them to test just
> > > their change in isolation.
> >
> > I agree with the two-step approach, where this first step is a simple
> search-and-replacement; but I insist that you add a FIXME or similar note
> where you have blindly used SEQ_CST, indicating that the memory order needs to
> be reviewed and potentially corrected.
> 
> i think the maintainers need to take some responsibility, if they see
> optimizations they missed when previously writing the code they need to
> follow up with a patch themselves. i can't do everything for them and
> marking things i'm not sure about will only lead to me having to churn
> patch series to remove the unwanted comments later.
The previous atomic functions didn't have the "memory order" parameter, so the maintainers didn't have to think about it - and thus they didn't miss any optimizations when accepting the code.
I also agree 100 % that it is not your responsibility to consider or determine which memory order is appropriate!
But I think you should mark the locations where you are changing from the old rte_atomic functions (where no memory order optimization was available) to the new functions - to highlight where the option of memory ordering has been introduced and knowingly ignored (by you).
> 
> keep in mind i have to touch each of these again when converting to
> standard so that's a better time to review ~everything in more detail
> because when converting to standard that's when suddenly you get a bunch
> of code generation that is "fallback" to seq_cst that isn't happening now.
I think you should to do it when replacing the rte_atomic functions with the __atomic functions. It will make it easier to see where the memory order was knowingly ignored, and should be reviewed for optimization.
> 
> the series that converts to standard needs to be up for review as soon
> as possible to maximize available time for feedback before 23.11 so it
> would be better to get the simpler cut & paste normalizing the code out
> of the way to unblock that series submission.
> 
> >
> > Also, in a couple of the drivers, you are using int64_t for packet counters.
> These cannot be negative and should be uint64_t. And AFAIK, such counters can
> use RELAXED memory order.
> 
> i know you don't mean to say i selected the types and rather that the
> types that were selected are not quite correct for their usage.
Yes; the previous types were also signed, and you didn't change that.
> again
> on the review that actually adopts std atomics is a better place to make
> any potential type changes since we are "breaking" the API for 23.11
> anyway. further, the std atomics series technically changes all the
> types so it's probably better to make one type change then rather than
> one now and one later.
> 
> i think it would be best to get these validated and merged asap so we
> can get to the std atomics review. when that series is up let's discuss
> further how i can mark areas of concern, with that series i expect there
> will have to be some changes in order to avoid minor regressions.
> 
> thanks!
I thought it would be better to catch these details (i.e. memory ordering and signedness) early on, but I now understand that you planned to do it in a later step. So I'll let you proceed as you have planned.
Thanks for all your work on this, Tyler. It is much appreciated!
-Morten
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * Re: [PATCH 0/7] replace rte atomics with GCC builtin atomics
  2023-03-22 14:58         ` Morten Brørup
@ 2023-03-22 15:29           ` Tyler Retzlaff
  2023-03-22 16:13             ` Morten Brørup
  0 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-22 15:29 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Stephen Hemminger, dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas
On Wed, Mar 22, 2023 at 03:58:07PM +0100, Morten Brørup wrote:
> > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > Sent: Wednesday, 22 March 2023 15.22
> > 
> > On Wed, Mar 22, 2023 at 12:28:44PM +0100, Morten Brørup wrote:
> > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > Sent: Friday, 17 March 2023 22.49
> > > >
> > > > On Fri, Mar 17, 2023 at 02:42:26PM -0700, Stephen Hemminger wrote:
> > > > > On Fri, 17 Mar 2023 13:19:41 -0700
> > > > > Tyler Retzlaff <roretzla@linux.microsoft.com> wrote:
> > > > >
> > > > > > Replace the use of rte_atomic.h types and functions, instead use GCC
> > > > > > supplied C++11 memory model builtins.
> > > > > >
> > > > > > This series covers the libraries and drivers that are built on
> > Windows.
> > > > > >
> > > > > > The code has be converted to use the __atomic builtins but there are
> > > > > > additional during conversion i notice that there may be some issues
> > > > > > that need to be addressed.
> > > > >
> > > > > I don't think all these cmpset need to use SEQ_CST.
> > > > > Especially for the places where it is used a loop, might
> > > > > be more efficient with some of the other memory models.
> > > >
> > > > i agree.
> > > >
> > > > however, i'm not trying to improve the code with this change, just
> > > > decouple it from rte_atomics.h so trying my best to avoid any
> > > > unnecessary semantic change.
> > > >
> > > > certainly if the maintainers of this code wish to weaken the ordering
> > > > where appropriate after the change is merged they can do so and handily
> > > > this change has enabled them to do so easily allowing them to test just
> > > > their change in isolation.
> > >
> > > I agree with the two-step approach, where this first step is a simple
> > search-and-replacement; but I insist that you add a FIXME or similar note
> > where you have blindly used SEQ_CST, indicating that the memory order needs to
> > be reviewed and potentially corrected.
> > 
> > i think the maintainers need to take some responsibility, if they see
> > optimizations they missed when previously writing the code they need to
> > follow up with a patch themselves. i can't do everything for them and
> > marking things i'm not sure about will only lead to me having to churn
> > patch series to remove the unwanted comments later.
> 
> The previous atomic functions didn't have the "memory order" parameter, so the maintainers didn't have to think about it - and thus they didn't miss any optimizations when accepting the code.
> 
> I also agree 100 % that it is not your responsibility to consider or determine which memory order is appropriate!
> 
> But I think you should mark the locations where you are changing from the old rte_atomic functions (where no memory order optimization was available) to the new functions - to highlight where the option of memory ordering has been introduced and knowingly ignored (by you).
> 
first, i have to apologize i confused myself about which of the many
patch series i have up right now that you were commenting on.
let me ask for clarification in relation to this series.
isn't that every single usage of the rte_atomic APIs? i mean are you
literally asking for the entire patch series to look like the following
patch snippet with the expectation that maintainers will come along and
clean up/review after this series is merged?
-rte_atomic_add32(&o, v);
+//FIXME: opportunity for relaxing ordering constraint, please review
+__atomic_fetch_add(&o, v, order);
this would just be a mechanical addition to this series so i can
certainly accomodate that, i thought something more complicated was
being asked for. if this is all, then sure no problem.
> > keep in mind i have to touch each of these again when converting to
> > standard so that's a better time to review ~everything in more detail
> > because when converting to standard that's when suddenly you get a bunch
> > of code generation that is "fallback" to seq_cst that isn't happening now.
> 
> I think you should to do it when replacing the rte_atomic functions with the __atomic functions. It will make it easier to see where the memory order was knowingly ignored, and should be reviewed for optimization.
> 
> > 
> > the series that converts to standard needs to be up for review as soon
> > as possible to maximize available time for feedback before 23.11 so it
> > would be better to get the simpler cut & paste normalizing the code out
> > of the way to unblock that series submission.
> > 
> > >
> > > Also, in a couple of the drivers, you are using int64_t for packet counters.
> > These cannot be negative and should be uint64_t. And AFAIK, such counters can
> > use RELAXED memory order.
> > 
> > i know you don't mean to say i selected the types and rather that the
> > types that were selected are not quite correct for their usage.
> 
> Yes; the previous types were also signed, and you didn't change that.
> 
> > again
> > on the review that actually adopts std atomics is a better place to make
> > any potential type changes since we are "breaking" the API for 23.11
> > anyway. further, the std atomics series technically changes all the
> > types so it's probably better to make one type change then rather than
> > one now and one later.
> > 
> > i think it would be best to get these validated and merged asap so we
> > can get to the std atomics review. when that series is up let's discuss
> > further how i can mark areas of concern, with that series i expect there
> > will have to be some changes in order to avoid minor regressions.
> > 
> > thanks!
> 
> I thought it would be better to catch these details (i.e. memory ordering and signedness) early on, but I now understand that you planned to do it in a later step. So I'll let you proceed as you have planned.
> 
> Thanks for all your work on this, Tyler. It is much appreciated!
again, sorry for the confusion the sooner i can get some of these merged
the easier it will be for me to manage the final series. i hope
david/thomas can merge the simple normalization patches as soon as 23.03
cycle is complete.
> 
> -Morten
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * RE: [PATCH 0/7] replace rte atomics with GCC builtin atomics
  2023-03-22 15:29           ` Tyler Retzlaff
@ 2023-03-22 16:13             ` Morten Brørup
  2023-03-22 16:40               ` Honnappa Nagarahalli
  0 siblings, 1 reply; 83+ messages in thread
From: Morten Brørup @ 2023-03-22 16:13 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: Stephen Hemminger, dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas
> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Wednesday, 22 March 2023 16.30
> 
> On Wed, Mar 22, 2023 at 03:58:07PM +0100, Morten Brørup wrote:
> > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > Sent: Wednesday, 22 March 2023 15.22
> > >
> > > On Wed, Mar 22, 2023 at 12:28:44PM +0100, Morten Brørup wrote:
> > > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > > Sent: Friday, 17 March 2023 22.49
> > > > >
> > > > > On Fri, Mar 17, 2023 at 02:42:26PM -0700, Stephen Hemminger wrote:
> > > > > > On Fri, 17 Mar 2023 13:19:41 -0700
> > > > > > Tyler Retzlaff <roretzla@linux.microsoft.com> wrote:
> > > > > >
> > > > > > > Replace the use of rte_atomic.h types and functions, instead use
> GCC
> > > > > > > supplied C++11 memory model builtins.
> > > > > > >
> > > > > > > This series covers the libraries and drivers that are built on
> > > Windows.
> > > > > > >
> > > > > > > The code has be converted to use the __atomic builtins but there
> are
> > > > > > > additional during conversion i notice that there may be some
> issues
> > > > > > > that need to be addressed.
> > > > > >
> > > > > > I don't think all these cmpset need to use SEQ_CST.
> > > > > > Especially for the places where it is used a loop, might
> > > > > > be more efficient with some of the other memory models.
> > > > >
> > > > > i agree.
> > > > >
> > > > > however, i'm not trying to improve the code with this change, just
> > > > > decouple it from rte_atomics.h so trying my best to avoid any
> > > > > unnecessary semantic change.
> > > > >
> > > > > certainly if the maintainers of this code wish to weaken the ordering
> > > > > where appropriate after the change is merged they can do so and
> handily
> > > > > this change has enabled them to do so easily allowing them to test
> just
> > > > > their change in isolation.
> > > >
> > > > I agree with the two-step approach, where this first step is a simple
> > > search-and-replacement; but I insist that you add a FIXME or similar note
> > > where you have blindly used SEQ_CST, indicating that the memory order
> needs to
> > > be reviewed and potentially corrected.
> > >
> > > i think the maintainers need to take some responsibility, if they see
> > > optimizations they missed when previously writing the code they need to
> > > follow up with a patch themselves. i can't do everything for them and
> > > marking things i'm not sure about will only lead to me having to churn
> > > patch series to remove the unwanted comments later.
> >
> > The previous atomic functions didn't have the "memory order" parameter, so
> the maintainers didn't have to think about it - and thus they didn't miss any
> optimizations when accepting the code.
> >
> > I also agree 100 % that it is not your responsibility to consider or
> determine which memory order is appropriate!
> >
> > But I think you should mark the locations where you are changing from the
> old rte_atomic functions (where no memory order optimization was available) to
> the new functions - to highlight where the option of memory ordering has been
> introduced and knowingly ignored (by you).
> >
> 
> first, i have to apologize i confused myself about which of the many
> patch series i have up right now that you were commenting on.
No worries... you are rushing through quite an effort for this, so a little confusion is perfectly understandable. Especially when I'm replying to an ageing email. :-)
> 
> let me ask for clarification in relation to this series.
> 
> isn't that every single usage of the rte_atomic APIs?
Probably, yes.
> i mean are you
> literally asking for the entire patch series to look like the following
> patch snippet with the expectation that maintainers will come along and
> clean up/review after this series is merged?
> 
> -rte_atomic_add32(&o, v);
> +//FIXME: opportunity for relaxing ordering constraint, please review
> +__atomic_fetch_add(&o, v, order);
Exactly. And something similar for the rte_atomicXX_t variables changed to intXX_t, such as the packet counters.
Realistically, I don't expect the maintainers to clean them up anytime soon. The purpose is to make the FIXMEs stick until someone eventually cleans them up, so they are not forgotten as time passes.
> 
> this would just be a mechanical addition to this series so i can
> certainly accomodate that, i thought something more complicated was
> being asked for. if this is all, then sure no problem.
Great.
> 
> > > keep in mind i have to touch each of these again when converting to
> > > standard so that's a better time to review ~everything in more detail
> > > because when converting to standard that's when suddenly you get a bunch
> > > of code generation that is "fallback" to seq_cst that isn't happening now.
> >
> > I think you should to do it when replacing the rte_atomic functions with the
> __atomic functions. It will make it easier to see where the memory order was
> knowingly ignored, and should be reviewed for optimization.
> >
> > >
> > > the series that converts to standard needs to be up for review as soon
> > > as possible to maximize available time for feedback before 23.11 so it
> > > would be better to get the simpler cut & paste normalizing the code out
> > > of the way to unblock that series submission.
> > >
> > > >
> > > > Also, in a couple of the drivers, you are using int64_t for packet
> counters.
> > > These cannot be negative and should be uint64_t. And AFAIK, such counters
> can
> > > use RELAXED memory order.
> > >
> > > i know you don't mean to say i selected the types and rather that the
> > > types that were selected are not quite correct for their usage.
> >
> > Yes; the previous types were also signed, and you didn't change that.
> >
> > > again
> > > on the review that actually adopts std atomics is a better place to make
> > > any potential type changes since we are "breaking" the API for 23.11
> > > anyway. further, the std atomics series technically changes all the
> > > types so it's probably better to make one type change then rather than
> > > one now and one later.
> > >
> > > i think it would be best to get these validated and merged asap so we
> > > can get to the std atomics review. when that series is up let's discuss
> > > further how i can mark areas of concern, with that series i expect there
> > > will have to be some changes in order to avoid minor regressions.
> > >
> > > thanks!
> >
> > I thought it would be better to catch these details (i.e. memory ordering
> and signedness) early on, but I now understand that you planned to do it in a
> later step. So I'll let you proceed as you have planned.
> >
> > Thanks for all your work on this, Tyler. It is much appreciated!
> 
> again, sorry for the confusion the sooner i can get some of these merged
> the easier it will be for me to manage the final series. i hope
> david/thomas can merge the simple normalization patches as soon as 23.03
> cycle is complete.
Yes. An early merge would also provide more time for reviewing and optimizing the memory order of the most important atomic operations.
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * RE: [PATCH 0/7] replace rte atomics with GCC builtin atomics
  2023-03-22 16:13             ` Morten Brørup
@ 2023-03-22 16:40               ` Honnappa Nagarahalli
  2023-03-22 17:07                 ` Morten Brørup
  0 siblings, 1 reply; 83+ messages in thread
From: Honnappa Nagarahalli @ 2023-03-22 16:40 UTC (permalink / raw)
  To: Morten Brørup, Tyler Retzlaff
  Cc: Stephen Hemminger, dev, Ruifeng Wang, thomas, nd, nd
> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Wednesday, March 22, 2023 11:14 AM
> To: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Cc: Stephen Hemminger <stephen@networkplumber.org>; dev@dpdk.org;
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; thomas@monjalon.net
> Subject: RE: [PATCH 0/7] replace rte atomics with GCC builtin atomics
> 
> > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > Sent: Wednesday, 22 March 2023 16.30
> >
> > On Wed, Mar 22, 2023 at 03:58:07PM +0100, Morten Brørup wrote:
> > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > Sent: Wednesday, 22 March 2023 15.22
> > > >
> > > > On Wed, Mar 22, 2023 at 12:28:44PM +0100, Morten Brørup wrote:
> > > > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > > > Sent: Friday, 17 March 2023 22.49
> > > > > >
> > > > > > On Fri, Mar 17, 2023 at 02:42:26PM -0700, Stephen Hemminger
> wrote:
> > > > > > > On Fri, 17 Mar 2023 13:19:41 -0700 Tyler Retzlaff
> > > > > > > <roretzla@linux.microsoft.com> wrote:
> > > > > > >
> > > > > > > > Replace the use of rte_atomic.h types and functions,
> > > > > > > > instead use
> > GCC
> > > > > > > > supplied C++11 memory model builtins.
> > > > > > > >
> > > > > > > > This series covers the libraries and drivers that are
> > > > > > > > built on
> > > > Windows.
> > > > > > > >
> > > > > > > > The code has be converted to use the __atomic builtins but
> > > > > > > > there
> > are
> > > > > > > > additional during conversion i notice that there may be
> > > > > > > > some
> > issues
> > > > > > > > that need to be addressed.
> > > > > > >
> > > > > > > I don't think all these cmpset need to use SEQ_CST.
> > > > > > > Especially for the places where it is used a loop, might be
> > > > > > > more efficient with some of the other memory models.
> > > > > >
> > > > > > i agree.
> > > > > >
> > > > > > however, i'm not trying to improve the code with this change,
> > > > > > just decouple it from rte_atomics.h so trying my best to avoid
> > > > > > any unnecessary semantic change.
> > > > > >
> > > > > > certainly if the maintainers of this code wish to weaken the
> > > > > > ordering where appropriate after the change is merged they can
> > > > > > do so and
> > handily
> > > > > > this change has enabled them to do so easily allowing them to
> > > > > > test
> > just
> > > > > > their change in isolation.
> > > > >
> > > > > I agree with the two-step approach, where this first step is a
> > > > > simple
> > > > search-and-replacement; but I insist that you add a FIXME or
> > > > similar note where you have blindly used SEQ_CST, indicating that
> > > > the memory order
> > needs to
> > > > be reviewed and potentially corrected.
> > > >
> > > > i think the maintainers need to take some responsibility, if they
> > > > see optimizations they missed when previously writing the code
> > > > they need to follow up with a patch themselves. i can't do
> > > > everything for them and marking things i'm not sure about will
> > > > only lead to me having to churn patch series to remove the unwanted
> comments later.
> > >
> > > The previous atomic functions didn't have the "memory order"
> > > parameter, so
> > the maintainers didn't have to think about it - and thus they didn't
> > miss any optimizations when accepting the code.
> > >
> > > I also agree 100 % that it is not your responsibility to consider or
> > determine which memory order is appropriate!
> > >
> > > But I think you should mark the locations where you are changing
> > > from the
> > old rte_atomic functions (where no memory order optimization was
> > available) to the new functions - to highlight where the option of
> > memory ordering has been introduced and knowingly ignored (by you).
> > >
> >
> > first, i have to apologize i confused myself about which of the many
> > patch series i have up right now that you were commenting on.
> 
> No worries... you are rushing through quite an effort for this, so a little
> confusion is perfectly understandable. Especially when I'm replying to an ageing
> email. :-)
> 
> >
> > let me ask for clarification in relation to this series.
> >
> > isn't that every single usage of the rte_atomic APIs?
> 
> Probably, yes.
> 
> > i mean are you
> > literally asking for the entire patch series to look like the
> > following patch snippet with the expectation that maintainers will
> > come along and clean up/review after this series is merged?
> >
> > -rte_atomic_add32(&o, v);
> > +//FIXME: opportunity for relaxing ordering constraint, please review
> > +__atomic_fetch_add(&o, v, order);
> 
> Exactly. And something similar for the rte_atomicXX_t variables changed to
> intXX_t, such as the packet counters.
> 
> Realistically, I don't expect the maintainers to clean them up anytime soon. The
> purpose is to make the FIXMEs stick until someone eventually cleans them up, so
> they are not forgotten as time passes.
Cleaning up the rte_atomic APIs is a different effort. There is already lot of effort that has gone into this and there is more effort happening (rte_ring being a painful one)
Instead of having FIXME, why not just send a separate patch with SEQ_CST (still a search and replace)? We can leave the tougher ones like rte_ring as they are being worked on.
> 
> >
> > this would just be a mechanical addition to this series so i can
> > certainly accomodate that, i thought something more complicated was
> > being asked for. if this is all, then sure no problem.
> 
> Great.
> 
> >
> > > > keep in mind i have to touch each of these again when converting
> > > > to standard so that's a better time to review ~everything in more
> > > > detail because when converting to standard that's when suddenly
> > > > you get a bunch of code generation that is "fallback" to seq_cst that isn't
> happening now.
> > >
> > > I think you should to do it when replacing the rte_atomic functions
> > > with the
> > __atomic functions. It will make it easier to see where the memory
> > order was knowingly ignored, and should be reviewed for optimization.
> > >
> > > >
> > > > the series that converts to standard needs to be up for review as
> > > > soon as possible to maximize available time for feedback before
> > > > 23.11 so it would be better to get the simpler cut & paste
> > > > normalizing the code out of the way to unblock that series submission.
> > > >
> > > > >
> > > > > Also, in a couple of the drivers, you are using int64_t for
> > > > > packet
> > counters.
> > > > These cannot be negative and should be uint64_t. And AFAIK, such
> > > > counters
> > can
> > > > use RELAXED memory order.
> > > >
> > > > i know you don't mean to say i selected the types and rather that
> > > > the types that were selected are not quite correct for their usage.
> > >
> > > Yes; the previous types were also signed, and you didn't change that.
> > >
> > > > again
> > > > on the review that actually adopts std atomics is a better place
> > > > to make any potential type changes since we are "breaking" the API
> > > > for 23.11 anyway. further, the std atomics series technically
> > > > changes all the types so it's probably better to make one type
> > > > change then rather than one now and one later.
> > > >
> > > > i think it would be best to get these validated and merged asap so
> > > > we can get to the std atomics review. when that series is up let's
> > > > discuss further how i can mark areas of concern, with that series
> > > > i expect there will have to be some changes in order to avoid minor
> regressions.
> > > >
> > > > thanks!
> > >
> > > I thought it would be better to catch these details (i.e. memory
> > > ordering
> > and signedness) early on, but I now understand that you planned to do
> > it in a later step. So I'll let you proceed as you have planned.
> > >
> > > Thanks for all your work on this, Tyler. It is much appreciated!
> >
> > again, sorry for the confusion the sooner i can get some of these
> > merged the easier it will be for me to manage the final series. i hope
> > david/thomas can merge the simple normalization patches as soon as
> > 23.03 cycle is complete.
> 
> Yes. An early merge would also provide more time for reviewing and optimizing
> the memory order of the most important atomic operations.
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * RE: [PATCH 0/7] replace rte atomics with GCC builtin atomics
  2023-03-22 16:40               ` Honnappa Nagarahalli
@ 2023-03-22 17:07                 ` Morten Brørup
  2023-03-22 17:38                   ` Honnappa Nagarahalli
  0 siblings, 1 reply; 83+ messages in thread
From: Morten Brørup @ 2023-03-22 17:07 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Tyler Retzlaff
  Cc: Stephen Hemminger, dev, Ruifeng Wang, thomas, nd, nd
> From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
> Sent: Wednesday, 22 March 2023 17.40
> 
> > From: Morten Brørup <mb@smartsharesystems.com>
> > Sent: Wednesday, March 22, 2023 11:14 AM
> >
> > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > Sent: Wednesday, 22 March 2023 16.30
> > >
> > > On Wed, Mar 22, 2023 at 03:58:07PM +0100, Morten Brørup wrote:
> > > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > > Sent: Wednesday, 22 March 2023 15.22
> > > > >
> > > > > On Wed, Mar 22, 2023 at 12:28:44PM +0100, Morten Brørup wrote:
> > > > > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > > > > Sent: Friday, 17 March 2023 22.49
> > > > > > >
> > > > > > > On Fri, Mar 17, 2023 at 02:42:26PM -0700, Stephen Hemminger
> > wrote:
> > > > > > > > On Fri, 17 Mar 2023 13:19:41 -0700 Tyler Retzlaff
> > > > > > > > <roretzla@linux.microsoft.com> wrote:
> > > > > > > >
> > > > > > > > > Replace the use of rte_atomic.h types and functions,
> > > > > > > > > instead use
> > > GCC
> > > > > > > > > supplied C++11 memory model builtins.
> > > > > > > > >
> > > > > > > > > This series covers the libraries and drivers that are
> > > > > > > > > built on
> > > > > Windows.
> > > > > > > > >
> > > > > > > > > The code has be converted to use the __atomic builtins
> but
> > > > > > > > > there
> > > are
> > > > > > > > > additional during conversion i notice that there may be
> > > > > > > > > some
> > > issues
> > > > > > > > > that need to be addressed.
> > > > > > > >
> > > > > > > > I don't think all these cmpset need to use SEQ_CST.
> > > > > > > > Especially for the places where it is used a loop, might
> be
> > > > > > > > more efficient with some of the other memory models.
> > > > > > >
> > > > > > > i agree.
> > > > > > >
> > > > > > > however, i'm not trying to improve the code with this
> change,
> > > > > > > just decouple it from rte_atomics.h so trying my best to
> avoid
> > > > > > > any unnecessary semantic change.
> > > > > > >
> > > > > > > certainly if the maintainers of this code wish to weaken the
> > > > > > > ordering where appropriate after the change is merged they
> can
> > > > > > > do so and
> > > handily
> > > > > > > this change has enabled them to do so easily allowing them
> to
> > > > > > > test
> > > just
> > > > > > > their change in isolation.
> > > > > >
> > > > > > I agree with the two-step approach, where this first step is a
> > > > > > simple
> > > > > search-and-replacement; but I insist that you add a FIXME or
> > > > > similar note where you have blindly used SEQ_CST, indicating
> that
> > > > > the memory order
> > > needs to
> > > > > be reviewed and potentially corrected.
> > > > >
> > > > > i think the maintainers need to take some responsibility, if
> they
> > > > > see optimizations they missed when previously writing the code
> > > > > they need to follow up with a patch themselves. i can't do
> > > > > everything for them and marking things i'm not sure about will
> > > > > only lead to me having to churn patch series to remove the
> unwanted
> > comments later.
> > > >
> > > > The previous atomic functions didn't have the "memory order"
> > > > parameter, so
> > > the maintainers didn't have to think about it - and thus they didn't
> > > miss any optimizations when accepting the code.
> > > >
> > > > I also agree 100 % that it is not your responsibility to consider
> or
> > > determine which memory order is appropriate!
> > > >
> > > > But I think you should mark the locations where you are changing
> > > > from the
> > > old rte_atomic functions (where no memory order optimization was
> > > available) to the new functions - to highlight where the option of
> > > memory ordering has been introduced and knowingly ignored (by you).
> > > >
> > >
> > > first, i have to apologize i confused myself about which of the many
> > > patch series i have up right now that you were commenting on.
> >
> > No worries... you are rushing through quite an effort for this, so a
> little
> > confusion is perfectly understandable. Especially when I'm replying to
> an ageing
> > email. :-)
> >
> > >
> > > let me ask for clarification in relation to this series.
> > >
> > > isn't that every single usage of the rte_atomic APIs?
> >
> > Probably, yes.
> >
> > > i mean are you
> > > literally asking for the entire patch series to look like the
> > > following patch snippet with the expectation that maintainers will
> > > come along and clean up/review after this series is merged?
> > >
> > > -rte_atomic_add32(&o, v);
> > > +//FIXME: opportunity for relaxing ordering constraint, please
> review
> > > +__atomic_fetch_add(&o, v, order);
> >
> > Exactly. And something similar for the rte_atomicXX_t variables
> changed to
> > intXX_t, such as the packet counters.
> >
> > Realistically, I don't expect the maintainers to clean them up anytime
> soon. The
> > purpose is to make the FIXMEs stick until someone eventually cleans
> them up, so
> > they are not forgotten as time passes.
> Cleaning up the rte_atomic APIs is a different effort. There is already
> lot of effort that has gone into this and there is more effort happening
> (rte_ring being a painful one)
> 
> Instead of having FIXME, why not just send a separate patch with SEQ_CST
> (still a search and replace)? We can leave the tougher ones like
> rte_ring as they are being worked on.
The FIXME makes it possible in the future to differentiate between the instances that still need review and the instances that have been reviewed where SEQ_CST was the correct choice. (Similarly for the choice of type for variables previously rte_atomicNN_t.)
> 
> >
> > >
> > > this would just be a mechanical addition to this series so i can
> > > certainly accomodate that, i thought something more complicated was
> > > being asked for. if this is all, then sure no problem.
> >
> > Great.
> >
> > >
> > > > > keep in mind i have to touch each of these again when converting
> > > > > to standard so that's a better time to review ~everything in
> more
> > > > > detail because when converting to standard that's when suddenly
> > > > > you get a bunch of code generation that is "fallback" to seq_cst
> that isn't
> > happening now.
> > > >
> > > > I think you should to do it when replacing the rte_atomic
> functions
> > > > with the
> > > __atomic functions. It will make it easier to see where the memory
> > > order was knowingly ignored, and should be reviewed for
> optimization.
> > > >
> > > > >
> > > > > the series that converts to standard needs to be up for review
> as
> > > > > soon as possible to maximize available time for feedback before
> > > > > 23.11 so it would be better to get the simpler cut & paste
> > > > > normalizing the code out of the way to unblock that series
> submission.
> > > > >
> > > > > >
> > > > > > Also, in a couple of the drivers, you are using int64_t for
> > > > > > packet
> > > counters.
> > > > > These cannot be negative and should be uint64_t. And AFAIK, such
> > > > > counters
> > > can
> > > > > use RELAXED memory order.
> > > > >
> > > > > i know you don't mean to say i selected the types and rather
> that
> > > > > the types that were selected are not quite correct for their
> usage.
> > > >
> > > > Yes; the previous types were also signed, and you didn't change
> that.
> > > >
> > > > > again
> > > > > on the review that actually adopts std atomics is a better place
> > > > > to make any potential type changes since we are "breaking" the
> API
> > > > > for 23.11 anyway. further, the std atomics series technically
> > > > > changes all the types so it's probably better to make one type
> > > > > change then rather than one now and one later.
> > > > >
> > > > > i think it would be best to get these validated and merged asap
> so
> > > > > we can get to the std atomics review. when that series is up
> let's
> > > > > discuss further how i can mark areas of concern, with that
> series
> > > > > i expect there will have to be some changes in order to avoid
> minor
> > regressions.
> > > > >
> > > > > thanks!
> > > >
> > > > I thought it would be better to catch these details (i.e. memory
> > > > ordering
> > > and signedness) early on, but I now understand that you planned to
> do
> > > it in a later step. So I'll let you proceed as you have planned.
> > > >
> > > > Thanks for all your work on this, Tyler. It is much appreciated!
> > >
> > > again, sorry for the confusion the sooner i can get some of these
> > > merged the easier it will be for me to manage the final series. i
> hope
> > > david/thomas can merge the simple normalization patches as soon as
> > > 23.03 cycle is complete.
> >
> > Yes. An early merge would also provide more time for reviewing and
> optimizing
> > the memory order of the most important atomic operations.
> 
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * RE: [PATCH 0/7] replace rte atomics with GCC builtin atomics
  2023-03-22 17:07                 ` Morten Brørup
@ 2023-03-22 17:38                   ` Honnappa Nagarahalli
  2023-03-22 18:06                     ` Tyler Retzlaff
  0 siblings, 1 reply; 83+ messages in thread
From: Honnappa Nagarahalli @ 2023-03-22 17:38 UTC (permalink / raw)
  To: Morten Brørup, Tyler Retzlaff
  Cc: Stephen Hemminger, dev, Ruifeng Wang, thomas, nd, nd
> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Wednesday, March 22, 2023 12:08 PM
> To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Tyler Retzlaff
> <roretzla@linux.microsoft.com>
> Cc: Stephen Hemminger <stephen@networkplumber.org>; dev@dpdk.org;
> Ruifeng Wang <Ruifeng.Wang@arm.com>; thomas@monjalon.net; nd
> <nd@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH 0/7] replace rte atomics with GCC builtin atomics
> 
> > From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
> > Sent: Wednesday, 22 March 2023 17.40
> >
> > > From: Morten Brørup <mb@smartsharesystems.com>
> > > Sent: Wednesday, March 22, 2023 11:14 AM
> > >
> > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > Sent: Wednesday, 22 March 2023 16.30
> > > >
> > > > On Wed, Mar 22, 2023 at 03:58:07PM +0100, Morten Brørup wrote:
> > > > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > > > Sent: Wednesday, 22 March 2023 15.22
> > > > > >
> > > > > > On Wed, Mar 22, 2023 at 12:28:44PM +0100, Morten Brørup wrote:
> > > > > > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > > > > > Sent: Friday, 17 March 2023 22.49
> > > > > > > >
> > > > > > > > On Fri, Mar 17, 2023 at 02:42:26PM -0700, Stephen
> > > > > > > > Hemminger
> > > wrote:
> > > > > > > > > On Fri, 17 Mar 2023 13:19:41 -0700 Tyler Retzlaff
> > > > > > > > > <roretzla@linux.microsoft.com> wrote:
> > > > > > > > >
> > > > > > > > > > Replace the use of rte_atomic.h types and functions,
> > > > > > > > > > instead use
> > > > GCC
> > > > > > > > > > supplied C++11 memory model builtins.
> > > > > > > > > >
> > > > > > > > > > This series covers the libraries and drivers that are
> > > > > > > > > > built on
> > > > > > Windows.
> > > > > > > > > >
> > > > > > > > > > The code has be converted to use the __atomic builtins
> > but
> > > > > > > > > > there
> > > > are
> > > > > > > > > > additional during conversion i notice that there may
> > > > > > > > > > be some
> > > > issues
> > > > > > > > > > that need to be addressed.
> > > > > > > > >
> > > > > > > > > I don't think all these cmpset need to use SEQ_CST.
> > > > > > > > > Especially for the places where it is used a loop, might
> > be
> > > > > > > > > more efficient with some of the other memory models.
> > > > > > > >
> > > > > > > > i agree.
> > > > > > > >
> > > > > > > > however, i'm not trying to improve the code with this
> > change,
> > > > > > > > just decouple it from rte_atomics.h so trying my best to
> > avoid
> > > > > > > > any unnecessary semantic change.
> > > > > > > >
> > > > > > > > certainly if the maintainers of this code wish to weaken
> > > > > > > > the ordering where appropriate after the change is merged
> > > > > > > > they
> > can
> > > > > > > > do so and
> > > > handily
> > > > > > > > this change has enabled them to do so easily allowing them
> > to
> > > > > > > > test
> > > > just
> > > > > > > > their change in isolation.
> > > > > > >
> > > > > > > I agree with the two-step approach, where this first step is
> > > > > > > a simple
> > > > > > search-and-replacement; but I insist that you add a FIXME or
> > > > > > similar note where you have blindly used SEQ_CST, indicating
> > that
> > > > > > the memory order
> > > > needs to
> > > > > > be reviewed and potentially corrected.
> > > > > >
> > > > > > i think the maintainers need to take some responsibility, if
> > they
> > > > > > see optimizations they missed when previously writing the code
> > > > > > they need to follow up with a patch themselves. i can't do
> > > > > > everything for them and marking things i'm not sure about will
> > > > > > only lead to me having to churn patch series to remove the
> > unwanted
> > > comments later.
> > > > >
> > > > > The previous atomic functions didn't have the "memory order"
> > > > > parameter, so
> > > > the maintainers didn't have to think about it - and thus they
> > > > didn't miss any optimizations when accepting the code.
> > > > >
> > > > > I also agree 100 % that it is not your responsibility to
> > > > > consider
> > or
> > > > determine which memory order is appropriate!
> > > > >
> > > > > But I think you should mark the locations where you are changing
> > > > > from the
> > > > old rte_atomic functions (where no memory order optimization was
> > > > available) to the new functions - to highlight where the option of
> > > > memory ordering has been introduced and knowingly ignored (by you).
> > > > >
> > > >
> > > > first, i have to apologize i confused myself about which of the
> > > > many patch series i have up right now that you were commenting on.
> > >
> > > No worries... you are rushing through quite an effort for this, so a
> > little
> > > confusion is perfectly understandable. Especially when I'm replying
> > > to
> > an ageing
> > > email. :-)
> > >
> > > >
> > > > let me ask for clarification in relation to this series.
> > > >
> > > > isn't that every single usage of the rte_atomic APIs?
> > >
> > > Probably, yes.
> > >
> > > > i mean are you
> > > > literally asking for the entire patch series to look like the
> > > > following patch snippet with the expectation that maintainers will
> > > > come along and clean up/review after this series is merged?
> > > >
> > > > -rte_atomic_add32(&o, v);
> > > > +//FIXME: opportunity for relaxing ordering constraint, please
> > review
> > > > +__atomic_fetch_add(&o, v, order);
> > >
> > > Exactly. And something similar for the rte_atomicXX_t variables
> > changed to
> > > intXX_t, such as the packet counters.
> > >
> > > Realistically, I don't expect the maintainers to clean them up
> > > anytime
> > soon. The
> > > purpose is to make the FIXMEs stick until someone eventually cleans
> > them up, so
> > > they are not forgotten as time passes.
> > Cleaning up the rte_atomic APIs is a different effort. There is
> > already lot of effort that has gone into this and there is more effort
> > happening (rte_ring being a painful one)
> >
> > Instead of having FIXME, why not just send a separate patch with
> > SEQ_CST (still a search and replace)? We can leave the tougher ones
> > like rte_ring as they are being worked on.
> 
> The FIXME makes it possible in the future to differentiate between the instances
> that still need review and the instances that have been reviewed where
> SEQ_CST was the correct choice. (Similarly for the choice of type for variables
> previously rte_atomicNN_t.)
Apologies, relooked at the heading of this patch, got confused with other patches.
The changes Arm had done for rte_atomic_ to __atomic_xxx were not direct replacements. The algorithms were studied, relaxed where required, race conditions fixed, performance benchmarked. IMO, we need to go through the same steps here.
I looked at the series, we should just review the patch and make suggested changes. Are we constrained by any deadlines for this work?
I would suggest to drop 1/7. Arm is working on removing the non-C11 algorithm for rte_ring (not sure if we will be successful). I think it is better to explore this approach rather than the changes in patch 1/7.
> 
> >
> > >
> > > >
> > > > this would just be a mechanical addition to this series so i can
> > > > certainly accomodate that, i thought something more complicated
> > > > was being asked for. if this is all, then sure no problem.
> > >
> > > Great.
> > >
> > > >
> > > > > > keep in mind i have to touch each of these again when
> > > > > > converting to standard so that's a better time to review
> > > > > > ~everything in
> > more
> > > > > > detail because when converting to standard that's when
> > > > > > suddenly you get a bunch of code generation that is "fallback"
> > > > > > to seq_cst
> > that isn't
> > > happening now.
> > > > >
> > > > > I think you should to do it when replacing the rte_atomic
> > functions
> > > > > with the
> > > > __atomic functions. It will make it easier to see where the memory
> > > > order was knowingly ignored, and should be reviewed for
> > optimization.
> > > > >
> > > > > >
> > > > > > the series that converts to standard needs to be up for review
> > as
> > > > > > soon as possible to maximize available time for feedback
> > > > > > before
> > > > > > 23.11 so it would be better to get the simpler cut & paste
> > > > > > normalizing the code out of the way to unblock that series
> > submission.
> > > > > >
> > > > > > >
> > > > > > > Also, in a couple of the drivers, you are using int64_t for
> > > > > > > packet
> > > > counters.
> > > > > > These cannot be negative and should be uint64_t. And AFAIK,
> > > > > > such counters
> > > > can
> > > > > > use RELAXED memory order.
> > > > > >
> > > > > > i know you don't mean to say i selected the types and rather
> > that
> > > > > > the types that were selected are not quite correct for their
> > usage.
> > > > >
> > > > > Yes; the previous types were also signed, and you didn't change
> > that.
> > > > >
> > > > > > again
> > > > > > on the review that actually adopts std atomics is a better
> > > > > > place to make any potential type changes since we are
> > > > > > "breaking" the
> > API
> > > > > > for 23.11 anyway. further, the std atomics series technically
> > > > > > changes all the types so it's probably better to make one type
> > > > > > change then rather than one now and one later.
> > > > > >
> > > > > > i think it would be best to get these validated and merged
> > > > > > asap
> > so
> > > > > > we can get to the std atomics review. when that series is up
> > let's
> > > > > > discuss further how i can mark areas of concern, with that
> > series
> > > > > > i expect there will have to be some changes in order to avoid
> > minor
> > > regressions.
> > > > > >
> > > > > > thanks!
> > > > >
> > > > > I thought it would be better to catch these details (i.e. memory
> > > > > ordering
> > > > and signedness) early on, but I now understand that you planned to
> > do
> > > > it in a later step. So I'll let you proceed as you have planned.
> > > > >
> > > > > Thanks for all your work on this, Tyler. It is much appreciated!
> > > >
> > > > again, sorry for the confusion the sooner i can get some of these
> > > > merged the easier it will be for me to manage the final series. i
> > hope
> > > > david/thomas can merge the simple normalization patches as soon as
> > > > 23.03 cycle is complete.
> > >
> > > Yes. An early merge would also provide more time for reviewing and
> > optimizing
> > > the memory order of the most important atomic operations.
> >
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * Re: [PATCH 0/7] replace rte atomics with GCC builtin atomics
  2023-03-22 17:38                   ` Honnappa Nagarahalli
@ 2023-03-22 18:06                     ` Tyler Retzlaff
  2023-05-02  3:37                       ` Tyler Retzlaff
  0 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-22 18:06 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: Morten Brørup, Stephen Hemminger, dev, Ruifeng Wang, thomas, nd
On Wed, Mar 22, 2023 at 05:38:12PM +0000, Honnappa Nagarahalli wrote:
> 
> 
> > -----Original Message-----
> > From: Morten Brørup <mb@smartsharesystems.com>
> > Sent: Wednesday, March 22, 2023 12:08 PM
> > To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Tyler Retzlaff
> > <roretzla@linux.microsoft.com>
> > Cc: Stephen Hemminger <stephen@networkplumber.org>; dev@dpdk.org;
> > Ruifeng Wang <Ruifeng.Wang@arm.com>; thomas@monjalon.net; nd
> > <nd@arm.com>; nd <nd@arm.com>
> > Subject: RE: [PATCH 0/7] replace rte atomics with GCC builtin atomics
> > 
> > > From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
> > > Sent: Wednesday, 22 March 2023 17.40
> > >
> > > > From: Morten Brørup <mb@smartsharesystems.com>
> > > > Sent: Wednesday, March 22, 2023 11:14 AM
> > > >
> > > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > > Sent: Wednesday, 22 March 2023 16.30
> > > > >
> > > > > On Wed, Mar 22, 2023 at 03:58:07PM +0100, Morten Brørup wrote:
> > > > > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > > > > Sent: Wednesday, 22 March 2023 15.22
> > > > > > >
> > > > > > > On Wed, Mar 22, 2023 at 12:28:44PM +0100, Morten Brørup wrote:
> > > > > > > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > > > > > > Sent: Friday, 17 March 2023 22.49
> > > > > > > > >
> > > > > > > > > On Fri, Mar 17, 2023 at 02:42:26PM -0700, Stephen
> > > > > > > > > Hemminger
> > > > wrote:
> > > > > > > > > > On Fri, 17 Mar 2023 13:19:41 -0700 Tyler Retzlaff
> > > > > > > > > > <roretzla@linux.microsoft.com> wrote:
> > > > > > > > > >
> > > > > > > > > > > Replace the use of rte_atomic.h types and functions,
> > > > > > > > > > > instead use
> > > > > GCC
> > > > > > > > > > > supplied C++11 memory model builtins.
> > > > > > > > > > >
> > > > > > > > > > > This series covers the libraries and drivers that are
> > > > > > > > > > > built on
> > > > > > > Windows.
> > > > > > > > > > >
> > > > > > > > > > > The code has be converted to use the __atomic builtins
> > > but
> > > > > > > > > > > there
> > > > > are
> > > > > > > > > > > additional during conversion i notice that there may
> > > > > > > > > > > be some
> > > > > issues
> > > > > > > > > > > that need to be addressed.
> > > > > > > > > >
> > > > > > > > > > I don't think all these cmpset need to use SEQ_CST.
> > > > > > > > > > Especially for the places where it is used a loop, might
> > > be
> > > > > > > > > > more efficient with some of the other memory models.
> > > > > > > > >
> > > > > > > > > i agree.
> > > > > > > > >
> > > > > > > > > however, i'm not trying to improve the code with this
> > > change,
> > > > > > > > > just decouple it from rte_atomics.h so trying my best to
> > > avoid
> > > > > > > > > any unnecessary semantic change.
> > > > > > > > >
> > > > > > > > > certainly if the maintainers of this code wish to weaken
> > > > > > > > > the ordering where appropriate after the change is merged
> > > > > > > > > they
> > > can
> > > > > > > > > do so and
> > > > > handily
> > > > > > > > > this change has enabled them to do so easily allowing them
> > > to
> > > > > > > > > test
> > > > > just
> > > > > > > > > their change in isolation.
> > > > > > > >
> > > > > > > > I agree with the two-step approach, where this first step is
> > > > > > > > a simple
> > > > > > > search-and-replacement; but I insist that you add a FIXME or
> > > > > > > similar note where you have blindly used SEQ_CST, indicating
> > > that
> > > > > > > the memory order
> > > > > needs to
> > > > > > > be reviewed and potentially corrected.
> > > > > > >
> > > > > > > i think the maintainers need to take some responsibility, if
> > > they
> > > > > > > see optimizations they missed when previously writing the code
> > > > > > > they need to follow up with a patch themselves. i can't do
> > > > > > > everything for them and marking things i'm not sure about will
> > > > > > > only lead to me having to churn patch series to remove the
> > > unwanted
> > > > comments later.
> > > > > >
> > > > > > The previous atomic functions didn't have the "memory order"
> > > > > > parameter, so
> > > > > the maintainers didn't have to think about it - and thus they
> > > > > didn't miss any optimizations when accepting the code.
> > > > > >
> > > > > > I also agree 100 % that it is not your responsibility to
> > > > > > consider
> > > or
> > > > > determine which memory order is appropriate!
> > > > > >
> > > > > > But I think you should mark the locations where you are changing
> > > > > > from the
> > > > > old rte_atomic functions (where no memory order optimization was
> > > > > available) to the new functions - to highlight where the option of
> > > > > memory ordering has been introduced and knowingly ignored (by you).
> > > > > >
> > > > >
> > > > > first, i have to apologize i confused myself about which of the
> > > > > many patch series i have up right now that you were commenting on.
> > > >
> > > > No worries... you are rushing through quite an effort for this, so a
> > > little
> > > > confusion is perfectly understandable. Especially when I'm replying
> > > > to
> > > an ageing
> > > > email. :-)
> > > >
> > > > >
> > > > > let me ask for clarification in relation to this series.
> > > > >
> > > > > isn't that every single usage of the rte_atomic APIs?
> > > >
> > > > Probably, yes.
> > > >
> > > > > i mean are you
> > > > > literally asking for the entire patch series to look like the
> > > > > following patch snippet with the expectation that maintainers will
> > > > > come along and clean up/review after this series is merged?
> > > > >
> > > > > -rte_atomic_add32(&o, v);
> > > > > +//FIXME: opportunity for relaxing ordering constraint, please
> > > review
> > > > > +__atomic_fetch_add(&o, v, order);
> > > >
> > > > Exactly. And something similar for the rte_atomicXX_t variables
> > > changed to
> > > > intXX_t, such as the packet counters.
> > > >
> > > > Realistically, I don't expect the maintainers to clean them up
> > > > anytime
> > > soon. The
> > > > purpose is to make the FIXMEs stick until someone eventually cleans
> > > them up, so
> > > > they are not forgotten as time passes.
> > > Cleaning up the rte_atomic APIs is a different effort. There is
> > > already lot of effort that has gone into this and there is more effort
> > > happening (rte_ring being a painful one)
> > >
> > > Instead of having FIXME, why not just send a separate patch with
> > > SEQ_CST (still a search and replace)? We can leave the tougher ones
> > > like rte_ring as they are being worked on.
> > 
> > The FIXME makes it possible in the future to differentiate between the instances
> > that still need review and the instances that have been reviewed where
> > SEQ_CST was the correct choice. (Similarly for the choice of type for variables
> > previously rte_atomicNN_t.)
> Apologies, relooked at the heading of this patch, got confused with other patches.
yeah, i did the same thing this morning :)
> 
> The changes Arm had done for rte_atomic_ to __atomic_xxx were not direct replacements. The algorithms were studied, relaxed where required, race conditions fixed, performance benchmarked. IMO, we need to go through the same steps here.
> 
> I looked at the series, we should just review the patch and make suggested changes. Are we constrained by any deadlines for this work?
i'm going to say yes but i'll qualify. the use of the rte_atomic_xxx
APIs drags in extra work when creating a series that performs the actual
conversions to the standard atomics.
if i don't decouple ring from rte_atomic_xxx that means i have to go
convert all the rte_atomic.h to standard atomics and working around some
of the implementation detail to do it is very time consuming. which
then has further flow on effects because then i have to go fix every
single driver that is still using rte_atomic.h.
incidentally i have a work in progress to decouple everything from
rte_atomic.h (including all drivers) but it would really negatively
impact getting standard atomics introduced if we had to serialize the
introduction behind a total removal of rte_atomic or had to make
changes to every consumer of the old rte_atomic APIs.
if we can get by with a comment on the rte_atomic_xxx lines in this
series it would be helpful. when we bring the next series for standard
atomics i'm not adverse to introducing changes to the ordering in that series
if requested so long as i can get the series up 'soon' so there is lots
of review time runway for 23.11.
> 
> I would suggest to drop 1/7. Arm is working on removing the non-C11 algorithm for rte_ring (not sure if we will be successful). I think it is better to explore this approach rather than the changes in patch 1/7.
i think my answer here is timing. i'd rather take the work from arm but
if it isn't coming for a while then it becomes a blocker.
we're waiting for the 23.07 start before this series can be merged. how
about we re-evaluate where arm is at when the merge window opens. we can
then decide to drop 1/7 or not at that time?
ty
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * Re: [PATCH 0/7] replace rte atomics with GCC builtin atomics
  2023-03-22 18:06                     ` Tyler Retzlaff
@ 2023-05-02  3:37                       ` Tyler Retzlaff
  2023-05-02  4:31                         ` Honnappa Nagarahalli
  0 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-05-02  3:37 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: Morten Brørup, Stephen Hemminger, dev, Ruifeng Wang, thomas, nd
On Wed, Mar 22, 2023 at 11:06:08AM -0700, Tyler Retzlaff wrote:
> On Wed, Mar 22, 2023 at 05:38:12PM +0000, Honnappa Nagarahalli wrote:
> > 
> > 
> > > -----Original Message-----
> > > From: Morten Brørup <mb@smartsharesystems.com>
> > > Sent: Wednesday, March 22, 2023 12:08 PM
> > > To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Tyler Retzlaff
> > > <roretzla@linux.microsoft.com>
> > > Cc: Stephen Hemminger <stephen@networkplumber.org>; dev@dpdk.org;
> > > Ruifeng Wang <Ruifeng.Wang@arm.com>; thomas@monjalon.net; nd
> > > <nd@arm.com>; nd <nd@arm.com>
> > > Subject: RE: [PATCH 0/7] replace rte atomics with GCC builtin atomics
> > > 
> > > > From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
> > > > Sent: Wednesday, 22 March 2023 17.40
> > > >
> > > > > From: Morten Brørup <mb@smartsharesystems.com>
> > > > > Sent: Wednesday, March 22, 2023 11:14 AM
> > > > >
> > > > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > > > Sent: Wednesday, 22 March 2023 16.30
> > > > > >
> > > > > > On Wed, Mar 22, 2023 at 03:58:07PM +0100, Morten Brørup wrote:
> > > > > > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > > > > > Sent: Wednesday, 22 March 2023 15.22
> > > > > > > >
> > > > > > > > On Wed, Mar 22, 2023 at 12:28:44PM +0100, Morten Brørup wrote:
> > > > > > > > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > > > > > > > Sent: Friday, 17 March 2023 22.49
> > > > > > > > > >
> > > > > > > > > > On Fri, Mar 17, 2023 at 02:42:26PM -0700, Stephen
> > > > > > > > > > Hemminger
> > > > > wrote:
> > > > > > > > > > > On Fri, 17 Mar 2023 13:19:41 -0700 Tyler Retzlaff
> > > > > > > > > > > <roretzla@linux.microsoft.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > > Replace the use of rte_atomic.h types and functions,
> > > > > > > > > > > > instead use
> > > > > > GCC
> > > > > > > > > > > > supplied C++11 memory model builtins.
> > > > > > > > > > > >
> > > > > > > > > > > > This series covers the libraries and drivers that are
> > > > > > > > > > > > built on
> > > > > > > > Windows.
> > > > > > > > > > > >
> > > > > > > > > > > > The code has be converted to use the __atomic builtins
> > > > but
> > > > > > > > > > > > there
> > > > > > are
> > > > > > > > > > > > additional during conversion i notice that there may
> > > > > > > > > > > > be some
> > > > > > issues
> > > > > > > > > > > > that need to be addressed.
> > > > > > > > > > >
> > > > > > > > > > > I don't think all these cmpset need to use SEQ_CST.
> > > > > > > > > > > Especially for the places where it is used a loop, might
> > > > be
> > > > > > > > > > > more efficient with some of the other memory models.
> > > > > > > > > >
> > > > > > > > > > i agree.
> > > > > > > > > >
> > > > > > > > > > however, i'm not trying to improve the code with this
> > > > change,
> > > > > > > > > > just decouple it from rte_atomics.h so trying my best to
> > > > avoid
> > > > > > > > > > any unnecessary semantic change.
> > > > > > > > > >
> > > > > > > > > > certainly if the maintainers of this code wish to weaken
> > > > > > > > > > the ordering where appropriate after the change is merged
> > > > > > > > > > they
> > > > can
> > > > > > > > > > do so and
> > > > > > handily
> > > > > > > > > > this change has enabled them to do so easily allowing them
> > > > to
> > > > > > > > > > test
> > > > > > just
> > > > > > > > > > their change in isolation.
> > > > > > > > >
> > > > > > > > > I agree with the two-step approach, where this first step is
> > > > > > > > > a simple
> > > > > > > > search-and-replacement; but I insist that you add a FIXME or
> > > > > > > > similar note where you have blindly used SEQ_CST, indicating
> > > > that
> > > > > > > > the memory order
> > > > > > needs to
> > > > > > > > be reviewed and potentially corrected.
> > > > > > > >
> > > > > > > > i think the maintainers need to take some responsibility, if
> > > > they
> > > > > > > > see optimizations they missed when previously writing the code
> > > > > > > > they need to follow up with a patch themselves. i can't do
> > > > > > > > everything for them and marking things i'm not sure about will
> > > > > > > > only lead to me having to churn patch series to remove the
> > > > unwanted
> > > > > comments later.
> > > > > > >
> > > > > > > The previous atomic functions didn't have the "memory order"
> > > > > > > parameter, so
> > > > > > the maintainers didn't have to think about it - and thus they
> > > > > > didn't miss any optimizations when accepting the code.
> > > > > > >
> > > > > > > I also agree 100 % that it is not your responsibility to
> > > > > > > consider
> > > > or
> > > > > > determine which memory order is appropriate!
> > > > > > >
> > > > > > > But I think you should mark the locations where you are changing
> > > > > > > from the
> > > > > > old rte_atomic functions (where no memory order optimization was
> > > > > > available) to the new functions - to highlight where the option of
> > > > > > memory ordering has been introduced and knowingly ignored (by you).
> > > > > > >
> > > > > >
> > > > > > first, i have to apologize i confused myself about which of the
> > > > > > many patch series i have up right now that you were commenting on.
> > > > >
> > > > > No worries... you are rushing through quite an effort for this, so a
> > > > little
> > > > > confusion is perfectly understandable. Especially when I'm replying
> > > > > to
> > > > an ageing
> > > > > email. :-)
> > > > >
> > > > > >
> > > > > > let me ask for clarification in relation to this series.
> > > > > >
> > > > > > isn't that every single usage of the rte_atomic APIs?
> > > > >
> > > > > Probably, yes.
> > > > >
> > > > > > i mean are you
> > > > > > literally asking for the entire patch series to look like the
> > > > > > following patch snippet with the expectation that maintainers will
> > > > > > come along and clean up/review after this series is merged?
> > > > > >
> > > > > > -rte_atomic_add32(&o, v);
> > > > > > +//FIXME: opportunity for relaxing ordering constraint, please
> > > > review
> > > > > > +__atomic_fetch_add(&o, v, order);
> > > > >
> > > > > Exactly. And something similar for the rte_atomicXX_t variables
> > > > changed to
> > > > > intXX_t, such as the packet counters.
> > > > >
> > > > > Realistically, I don't expect the maintainers to clean them up
> > > > > anytime
> > > > soon. The
> > > > > purpose is to make the FIXMEs stick until someone eventually cleans
> > > > them up, so
> > > > > they are not forgotten as time passes.
> > > > Cleaning up the rte_atomic APIs is a different effort. There is
> > > > already lot of effort that has gone into this and there is more effort
> > > > happening (rte_ring being a painful one)
> > > >
> > > > Instead of having FIXME, why not just send a separate patch with
> > > > SEQ_CST (still a search and replace)? We can leave the tougher ones
> > > > like rte_ring as they are being worked on.
> > > 
> > > The FIXME makes it possible in the future to differentiate between the instances
> > > that still need review and the instances that have been reviewed where
> > > SEQ_CST was the correct choice. (Similarly for the choice of type for variables
> > > previously rte_atomicNN_t.)
> > Apologies, relooked at the heading of this patch, got confused with other patches.
> 
> yeah, i did the same thing this morning :)
> 
> > 
> > The changes Arm had done for rte_atomic_ to __atomic_xxx were not direct replacements. The algorithms were studied, relaxed where required, race conditions fixed, performance benchmarked. IMO, we need to go through the same steps here.
> > 
> > I looked at the series, we should just review the patch and make suggested changes. Are we constrained by any deadlines for this work?
> 
> i'm going to say yes but i'll qualify. the use of the rte_atomic_xxx
> APIs drags in extra work when creating a series that performs the actual
> conversions to the standard atomics.
> 
> if i don't decouple ring from rte_atomic_xxx that means i have to go
> convert all the rte_atomic.h to standard atomics and working around some
> of the implementation detail to do it is very time consuming. which
> then has further flow on effects because then i have to go fix every
> single driver that is still using rte_atomic.h.
> 
> incidentally i have a work in progress to decouple everything from
> rte_atomic.h (including all drivers) but it would really negatively
> impact getting standard atomics introduced if we had to serialize the
> introduction behind a total removal of rte_atomic or had to make
> changes to every consumer of the old rte_atomic APIs.
> 
> if we can get by with a comment on the rte_atomic_xxx lines in this
> series it would be helpful. when we bring the next series for standard
> atomics i'm not adverse to introducing changes to the ordering in that series
> if requested so long as i can get the series up 'soon' so there is lots
> of review time runway for 23.11.
> 
> > 
> > I would suggest to drop 1/7. Arm is working on removing the non-C11 algorithm for rte_ring (not sure if we will be successful). I think it is better to explore this approach rather than the changes in patch 1/7.
> 
> i think my answer here is timing. i'd rather take the work from arm but
> if it isn't coming for a while then it becomes a blocker.
> 
> we're waiting for the 23.07 start before this series can be merged. how
> about we re-evaluate where arm is at when the merge window opens. we can
> then decide to drop 1/7 or not at that time?
ping?
any update if there is going to be a series from arm as an acceptable
replacement for patch 1/7? otherwise i think we should take the patch as
is. it isn't altering the semantics of the code and is fairly low line
count change so shouldn't distrupt any out of tree work as a result of
the churn.
please update asap, this is one of the two series that is preventing
submission of the first series converting to standard atomics for
review.
thanks!
> 
> ty
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * RE: [PATCH 0/7] replace rte atomics with GCC builtin atomics
  2023-05-02  3:37                       ` Tyler Retzlaff
@ 2023-05-02  4:31                         ` Honnappa Nagarahalli
  0 siblings, 0 replies; 83+ messages in thread
From: Honnappa Nagarahalli @ 2023-05-02  4:31 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: Morten Brørup, Stephen Hemminger, dev, Ruifeng Wang, thomas,
	nd, Wathsala Wathawana Vithanage, nd
> -----Original Message-----
> From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Sent: Monday, May 1, 2023 10:38 PM
> To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Cc: Morten Brørup <mb@smartsharesystems.com>; Stephen Hemminger
> <stephen@networkplumber.org>; dev@dpdk.org; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; thomas@monjalon.net; nd <nd@arm.com>
> Subject: Re: [PATCH 0/7] replace rte atomics with GCC builtin atomics
> 
> 
> On Wed, Mar 22, 2023 at 11:06:08AM -0700, Tyler Retzlaff wrote:
> > On Wed, Mar 22, 2023 at 05:38:12PM +0000, Honnappa Nagarahalli wrote:
> > >
> > >
> > > > -----Original Message-----
> > > > From: Morten Br�rup <mb@smartsharesystems.com>
> > > > Sent: Wednesday, March 22, 2023 12:08 PM
> > > > To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Tyler
> > > > Retzlaff <roretzla@linux.microsoft.com>
> > > > Cc: Stephen Hemminger <stephen@networkplumber.org>;
> dev@dpdk.org;
> > > > Ruifeng Wang <Ruifeng.Wang@arm.com>; thomas@monjalon.net; nd
> > > > <nd@arm.com>; nd <nd@arm.com>
> > > > Subject: RE: [PATCH 0/7] replace rte atomics with GCC builtin
> > > > atomics
> > > >
> > > > > From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
> > > > > Sent: Wednesday, 22 March 2023 17.40
> > > > >
> > > > > > From: Morten Br�rup <mb@smartsharesystems.com>
> > > > > > Sent: Wednesday, March 22, 2023 11:14 AM
> > > > > >
> > > > > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > > > > Sent: Wednesday, 22 March 2023 16.30
> > > > > > >
> > > > > > > On Wed, Mar 22, 2023 at 03:58:07PM +0100, Morten Br�rup
> wrote:
> > > > > > > > > From: Tyler Retzlaff
> > > > > > > > > [mailto:roretzla@linux.microsoft.com]
> > > > > > > > > Sent: Wednesday, 22 March 2023 15.22
> > > > > > > > >
> > > > > > > > > On Wed, Mar 22, 2023 at 12:28:44PM +0100, Morten Br�rup
> wrote:
> > > > > > > > > > > From: Tyler Retzlaff
> > > > > > > > > > > [mailto:roretzla@linux.microsoft.com]
> > > > > > > > > > > Sent: Friday, 17 March 2023 22.49
> > > > > > > > > > >
> > > > > > > > > > > On Fri, Mar 17, 2023 at 02:42:26PM -0700, Stephen
> > > > > > > > > > > Hemminger
> > > > > > wrote:
> > > > > > > > > > > > On Fri, 17 Mar 2023 13:19:41 -0700 Tyler Retzlaff
> > > > > > > > > > > > <roretzla@linux.microsoft.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > > Replace the use of rte_atomic.h types and
> > > > > > > > > > > > > functions, instead use
> > > > > > > GCC
> > > > > > > > > > > > > supplied C++11 memory model builtins.
> > > > > > > > > > > > >
> > > > > > > > > > > > > This series covers the libraries and drivers
> > > > > > > > > > > > > that are built on
> > > > > > > > > Windows.
> > > > > > > > > > > > >
> > > > > > > > > > > > > The code has be converted to use the __atomic
> > > > > > > > > > > > > builtins
> > > > > but
> > > > > > > > > > > > > there
> > > > > > > are
> > > > > > > > > > > > > additional during conversion i notice that there
> > > > > > > > > > > > > may be some
> > > > > > > issues
> > > > > > > > > > > > > that need to be addressed.
> > > > > > > > > > > >
> > > > > > > > > > > > I don't think all these cmpset need to use SEQ_CST.
> > > > > > > > > > > > Especially for the places where it is used a loop,
> > > > > > > > > > > > might
> > > > > be
> > > > > > > > > > > > more efficient with some of the other memory models.
> > > > > > > > > > >
> > > > > > > > > > > i agree.
> > > > > > > > > > >
> > > > > > > > > > > however, i'm not trying to improve the code with
> > > > > > > > > > > this
> > > > > change,
> > > > > > > > > > > just decouple it from rte_atomics.h so trying my
> > > > > > > > > > > best to
> > > > > avoid
> > > > > > > > > > > any unnecessary semantic change.
> > > > > > > > > > >
> > > > > > > > > > > certainly if the maintainers of this code wish to
> > > > > > > > > > > weaken the ordering where appropriate after the
> > > > > > > > > > > change is merged they
> > > > > can
> > > > > > > > > > > do so and
> > > > > > > handily
> > > > > > > > > > > this change has enabled them to do so easily
> > > > > > > > > > > allowing them
> > > > > to
> > > > > > > > > > > test
> > > > > > > just
> > > > > > > > > > > their change in isolation.
> > > > > > > > > >
> > > > > > > > > > I agree with the two-step approach, where this first
> > > > > > > > > > step is a simple
> > > > > > > > > search-and-replacement; but I insist that you add a
> > > > > > > > > FIXME or similar note where you have blindly used
> > > > > > > > > SEQ_CST, indicating
> > > > > that
> > > > > > > > > the memory order
> > > > > > > needs to
> > > > > > > > > be reviewed and potentially corrected.
> > > > > > > > >
> > > > > > > > > i think the maintainers need to take some
> > > > > > > > > responsibility, if
> > > > > they
> > > > > > > > > see optimizations they missed when previously writing
> > > > > > > > > the code they need to follow up with a patch themselves.
> > > > > > > > > i can't do everything for them and marking things i'm
> > > > > > > > > not sure about will only lead to me having to churn
> > > > > > > > > patch series to remove the
> > > > > unwanted
> > > > > > comments later.
> > > > > > > >
> > > > > > > > The previous atomic functions didn't have the "memory order"
> > > > > > > > parameter, so
> > > > > > > the maintainers didn't have to think about it - and thus
> > > > > > > they didn't miss any optimizations when accepting the code.
> > > > > > > >
> > > > > > > > I also agree 100 % that it is not your responsibility to
> > > > > > > > consider
> > > > > or
> > > > > > > determine which memory order is appropriate!
> > > > > > > >
> > > > > > > > But I think you should mark the locations where you are
> > > > > > > > changing from the
> > > > > > > old rte_atomic functions (where no memory order optimization
> > > > > > > was
> > > > > > > available) to the new functions - to highlight where the
> > > > > > > option of memory ordering has been introduced and knowingly
> ignored (by you).
> > > > > > > >
> > > > > > >
> > > > > > > first, i have to apologize i confused myself about which of
> > > > > > > the many patch series i have up right now that you were commenting
> on.
> > > > > >
> > > > > > No worries... you are rushing through quite an effort for
> > > > > > this, so a
> > > > > little
> > > > > > confusion is perfectly understandable. Especially when I'm
> > > > > > replying to
> > > > > an ageing
> > > > > > email. :-)
> > > > > >
> > > > > > >
> > > > > > > let me ask for clarification in relation to this series.
> > > > > > >
> > > > > > > isn't that every single usage of the rte_atomic APIs?
> > > > > >
> > > > > > Probably, yes.
> > > > > >
> > > > > > > i mean are you
> > > > > > > literally asking for the entire patch series to look like
> > > > > > > the following patch snippet with the expectation that
> > > > > > > maintainers will come along and clean up/review after this series is
> merged?
> > > > > > >
> > > > > > > -rte_atomic_add32(&o, v);
> > > > > > > +//FIXME: opportunity for relaxing ordering constraint,
> > > > > > > +please
> > > > > review
> > > > > > > +__atomic_fetch_add(&o, v, order);
> > > > > >
> > > > > > Exactly. And something similar for the rte_atomicXX_t
> > > > > > variables
> > > > > changed to
> > > > > > intXX_t, such as the packet counters.
> > > > > >
> > > > > > Realistically, I don't expect the maintainers to clean them up
> > > > > > anytime
> > > > > soon. The
> > > > > > purpose is to make the FIXMEs stick until someone eventually
> > > > > > cleans
> > > > > them up, so
> > > > > > they are not forgotten as time passes.
> > > > > Cleaning up the rte_atomic APIs is a different effort. There is
> > > > > already lot of effort that has gone into this and there is more
> > > > > effort happening (rte_ring being a painful one)
> > > > >
> > > > > Instead of having FIXME, why not just send a separate patch with
> > > > > SEQ_CST (still a search and replace)? We can leave the tougher
> > > > > ones like rte_ring as they are being worked on.
> > > >
> > > > The FIXME makes it possible in the future to differentiate between
> > > > the instances that still need review and the instances that have
> > > > been reviewed where SEQ_CST was the correct choice. (Similarly for
> > > > the choice of type for variables previously rte_atomicNN_t.)
> > > Apologies, relooked at the heading of this patch, got confused with other
> patches.
> >
> > yeah, i did the same thing this morning :)
> >
> > >
> > > The changes Arm had done for rte_atomic_ to __atomic_xxx were not direct
> replacements. The algorithms were studied, relaxed where required, race
> conditions fixed, performance benchmarked. IMO, we need to go through the
> same steps here.
> > >
> > > I looked at the series, we should just review the patch and make suggested
> changes. Are we constrained by any deadlines for this work?
> >
> > i'm going to say yes but i'll qualify. the use of the rte_atomic_xxx
> > APIs drags in extra work when creating a series that performs the
> > actual conversions to the standard atomics.
> >
> > if i don't decouple ring from rte_atomic_xxx that means i have to go
> > convert all the rte_atomic.h to standard atomics and working around
> > some of the implementation detail to do it is very time consuming.
> > which then has further flow on effects because then i have to go fix
> > every single driver that is still using rte_atomic.h.
> >
> > incidentally i have a work in progress to decouple everything from
> > rte_atomic.h (including all drivers) but it would really negatively
> > impact getting standard atomics introduced if we had to serialize the
> > introduction behind a total removal of rte_atomic or had to make
> > changes to every consumer of the old rte_atomic APIs.
> >
> > if we can get by with a comment on the rte_atomic_xxx lines in this
> > series it would be helpful. when we bring the next series for standard
> > atomics i'm not adverse to introducing changes to the ordering in that
> > series if requested so long as i can get the series up 'soon' so there
> > is lots of review time runway for 23.11.
> >
> > >
> > > I would suggest to drop 1/7. Arm is working on removing the non-C11
> algorithm for rte_ring (not sure if we will be successful). I think it is better to
> explore this approach rather than the changes in patch 1/7.
> >
> > i think my answer here is timing. i'd rather take the work from arm
> > but if it isn't coming for a while then it becomes a blocker.
> >
> > we're waiting for the 23.07 start before this series can be merged.
> > how about we re-evaluate where arm is at when the merge window opens.
> > we can then decide to drop 1/7 or not at that time?
> 
> ping?
> 
> any update if there is going to be a series from arm as an acceptable
> replacement for patch 1/7? otherwise i think we should take the patch as is. it
> isn't altering the semantics of the code and is fairly low line count change so
> shouldn't distrupt any out of tree work as a result of the churn.
Yes, we are working on a patch. There is a RFC [1], but we are still working on proving if the algorithm is correct. But, the plan is to find a solution (or present alternatives if there are no solutions) in 23.07 release.
[1] https://patchwork.dpdk.org/project/dpdk/patch/20230421191642.217011-1-wathsala.vithanage@arm.com/
> 
> please update asap, this is one of the two series that is preventing submission of
> the first series converting to standard atomics for review.
> 
> thanks!
> 
> >
> > ty
^ permalink raw reply	[flat|nested] 83+ messages in thread 
 
 
 
 
 
 
 
 
 
 
 
 
- * [PATCH v2 0/7] replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 [PATCH 0/7] replace rte atomics with GCC builtin atomics Tyler Retzlaff
                   ` (7 preceding siblings ...)
  2023-03-17 21:42 ` [PATCH 0/7] " Stephen Hemminger
@ 2023-03-23 22:34 ` Tyler Retzlaff
  2023-03-23 22:34   ` [PATCH v2 1/7] ring: " Tyler Retzlaff
                     ` (7 more replies)
  2023-03-23 22:53 ` [PATCH v3 " Tyler Retzlaff
                   ` (2 subsequent siblings)
  11 siblings, 8 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:34 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
This series covers the libraries and drivers that are built on Windows.
The code has be converted to use the __atomic builtins but there are
additional during conversion i notice that there may be some issues
that need to be addressed.
I'll comment in the patches where my concerns are so the maintainers
may comment.
v2:
  * comment code where optimizations may be possible now that memory
    order can be specified.
  * comment code where operations should potentially be atomic so that
    maintainers can review.
  * change a couple of variables labeled as counters to be unsigned.
Tyler Retzlaff (7):
  ring: replace rte atomics with GCC builtin atomics
  stack: replace rte atomics with GCC builtin atomics
  dma/idxd: replace rte atomics with GCC builtin atomics
  net/ice: replace rte atomics with GCC builtin atomics
  net/ixgbe: replace rte atomics with GCC builtin atomics
  net/null: replace rte atomics with GCC builtin atomics
  net/ring: replace rte atomics with GCC builtin atomics
 drivers/dma/idxd/idxd_internal.h |  3 +--
 drivers/dma/idxd/idxd_pci.c      |  8 +++++---
 drivers/net/ice/ice_dcf.c        |  1 -
 drivers/net/ice/ice_dcf_ethdev.c |  1 -
 drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
 drivers/net/ixgbe/ixgbe_bypass.c |  1 -
 drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
 drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
 drivers/net/ixgbe/ixgbe_flow.c   |  1 -
 drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
 drivers/net/null/rte_eth_null.c  | 28 ++++++++++++++++++----------
 drivers/net/ring/rte_eth_ring.c  | 26 ++++++++++++++++----------
 lib/ring/rte_ring_core.h         |  1 -
 lib/ring/rte_ring_generic_pvt.h  | 12 ++++++++----
 lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
 15 files changed, 79 insertions(+), 53 deletions(-)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v2 1/7] ring: replace rte atomics with GCC builtin atomics
  2023-03-23 22:34 ` [PATCH v2 " Tyler Retzlaff
@ 2023-03-23 22:34   ` Tyler Retzlaff
  2023-03-23 22:34   ` [PATCH v2 2/7] stack: " Tyler Retzlaff
                     ` (6 subsequent siblings)
  7 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:34 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/ring/rte_ring_core.h        |  1 -
 lib/ring/rte_ring_generic_pvt.h | 12 ++++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h
index 82b2370..b9c7860 100644
--- a/lib/ring/rte_ring_core.h
+++ b/lib/ring/rte_ring_core.h
@@ -31,7 +31,6 @@
 #include <rte_config.h>
 #include <rte_memory.h>
 #include <rte_lcore.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memzone.h>
 #include <rte_pause.h>
diff --git a/lib/ring/rte_ring_generic_pvt.h b/lib/ring/rte_ring_generic_pvt.h
index 5acb6e5..c284040 100644
--- a/lib/ring/rte_ring_generic_pvt.h
+++ b/lib/ring/rte_ring_generic_pvt.h
@@ -92,8 +92,10 @@
 		if (is_sp)
 			r->prod.head = *new_head, success = 1;
 		else
-			success = rte_atomic32_cmpset(&r->prod.head,
-					*old_head, *new_head);
+			// NOTE: review for potential ordering optimization
+			success = __atomic_compare_exchange_n(&r->prod.head,
+					old_head, *new_head, 0,
+					__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
 	} while (unlikely(success == 0));
 	return n;
 }
@@ -162,8 +164,10 @@
 			rte_smp_rmb();
 			success = 1;
 		} else {
-			success = rte_atomic32_cmpset(&r->cons.head, *old_head,
-					*new_head);
+			// NOTE: review for potential ordering optimization
+			success = __atomic_compare_exchange_n(&r->cons.head,
+					old_head, *new_head, 0,
+					__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
 		}
 	} while (unlikely(success == 0));
 	return n;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v2 2/7] stack: replace rte atomics with GCC builtin atomics
  2023-03-23 22:34 ` [PATCH v2 " Tyler Retzlaff
  2023-03-23 22:34   ` [PATCH v2 1/7] ring: " Tyler Retzlaff
@ 2023-03-23 22:34   ` Tyler Retzlaff
  2023-03-23 22:34   ` [PATCH v2 3/7] dma/idxd: " Tyler Retzlaff
                     ` (5 subsequent siblings)
  7 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:34 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/lib/stack/rte_stack_lf_generic.h b/lib/stack/rte_stack_lf_generic.h
index 7fa29ce..ffed2bf 100644
--- a/lib/stack/rte_stack_lf_generic.h
+++ b/lib/stack/rte_stack_lf_generic.h
@@ -26,8 +26,8 @@
 	 * elements. If the mempool is near-empty to the point that this is a
 	 * concern, the user should consider increasing the mempool size.
 	 */
-	return (unsigned int)rte_atomic64_read((rte_atomic64_t *)
-			&s->stack_lf.used.len);
+	// NOTE: review for potential ordering optimization
+	return __atomic_load_n(&s->stack_lf.used.len, __ATOMIC_SEQ_CST);
 }
 
 static __rte_always_inline void
@@ -67,8 +67,8 @@
 				1, __ATOMIC_RELEASE,
 				__ATOMIC_RELAXED);
 	} while (success == 0);
-
-	rte_atomic64_add((rte_atomic64_t *)&list->len, num);
+	// NOTE: review for potential ordering optimization
+	__atomic_fetch_add(&list->len, num, __ATOMIC_SEQ_CST);
 }
 
 static __rte_always_inline struct rte_stack_lf_elem *
@@ -82,14 +82,16 @@
 
 	/* Reserve num elements, if available */
 	while (1) {
-		uint64_t len = rte_atomic64_read((rte_atomic64_t *)&list->len);
+		// NOTE: review for potential ordering optimization
+		uint64_t len = __atomic_load_n(&list->len, __ATOMIC_SEQ_CST);
 
 		/* Does the list contain enough elements? */
 		if (unlikely(len < num))
 			return NULL;
 
-		if (rte_atomic64_cmpset((volatile uint64_t *)&list->len,
-					len, len - num))
+		// NOTE: review for potential ordering optimization
+		if (__atomic_compare_exchange_n(&list->len, &len, len - num,
+			0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 			break;
 	}
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v2 3/7] dma/idxd: replace rte atomics with GCC builtin atomics
  2023-03-23 22:34 ` [PATCH v2 " Tyler Retzlaff
  2023-03-23 22:34   ` [PATCH v2 1/7] ring: " Tyler Retzlaff
  2023-03-23 22:34   ` [PATCH v2 2/7] stack: " Tyler Retzlaff
@ 2023-03-23 22:34   ` Tyler Retzlaff
  2023-03-23 22:34   ` [PATCH v2 4/7] net/ice: " Tyler Retzlaff
                     ` (4 subsequent siblings)
  7 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:34 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/dma/idxd/idxd_internal.h | 3 +--
 drivers/dma/idxd/idxd_pci.c      | 8 +++++---
 2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h
index 180a858..cd41777 100644
--- a/drivers/dma/idxd/idxd_internal.h
+++ b/drivers/dma/idxd/idxd_internal.h
@@ -7,7 +7,6 @@
 
 #include <rte_dmadev_pmd.h>
 #include <rte_spinlock.h>
-#include <rte_atomic.h>
 
 #include "idxd_hw_defs.h"
 
@@ -34,7 +33,7 @@ struct idxd_pci_common {
 	rte_spinlock_t lk;
 
 	uint8_t wq_cfg_sz;
-	rte_atomic16_t ref_count;
+	uint16_t ref_count;
 	volatile struct rte_idxd_bar0 *regs;
 	volatile uint32_t *wq_regs_base;
 	volatile struct rte_idxd_grpcfg *grp_regs;
diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c
index 781fa02..89cce1d 100644
--- a/drivers/dma/idxd/idxd_pci.c
+++ b/drivers/dma/idxd/idxd_pci.c
@@ -6,7 +6,6 @@
 #include <rte_devargs.h>
 #include <rte_dmadev_pmd.h>
 #include <rte_malloc.h>
-#include <rte_atomic.h>
 
 #include "idxd_internal.h"
 
@@ -136,7 +135,9 @@
 	/* if this is the last WQ on the device, disable the device and free
 	 * the PCI struct
 	 */
-	is_last_wq = rte_atomic16_dec_and_test(&idxd->u.pci->ref_count);
+	// NOTE: review for potential ordering optimization
+	is_last_wq = __atomic_fetch_sub(&idxd->u.pci->ref_count, 1,
+		__ATOMIC_SEQ_CST) - 1 == 0;
 	if (is_last_wq) {
 		/* disable the device */
 		err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
@@ -350,7 +351,8 @@
 				free(idxd.u.pci);
 			return ret;
 		}
-		rte_atomic16_inc(&idxd.u.pci->ref_count);
+		// NOTE: review for potential ordering optimization
+		__atomic_fetch_add(&idxd.u.pci->ref_count, 1, __ATOMIC_SEQ_CST);
 	}
 
 	return 0;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v2 4/7] net/ice: replace rte atomics with GCC builtin atomics
  2023-03-23 22:34 ` [PATCH v2 " Tyler Retzlaff
                     ` (2 preceding siblings ...)
  2023-03-23 22:34   ` [PATCH v2 3/7] dma/idxd: " Tyler Retzlaff
@ 2023-03-23 22:34   ` Tyler Retzlaff
  2023-03-23 22:34   ` [PATCH v2 5/7] net/ixgbe: " Tyler Retzlaff
                     ` (3 subsequent siblings)
  7 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:34 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/net/ice/ice_dcf.c        |  1 -
 drivers/net/ice/ice_dcf_ethdev.c |  1 -
 drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
 3 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index 1c3d22a..80d2cbd 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -14,7 +14,6 @@
 #include <rte_common.h>
 
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_eal.h>
 #include <rte_ether.h>
 #include <ethdev_driver.h>
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index dcbf2af..13ff245 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -11,7 +11,6 @@
 #include <rte_interrupts.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_eal.h>
 #include <rte_ether.h>
 #include <ethdev_pci.h>
diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 9a88cf9..5608f6a 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -3927,8 +3927,10 @@ static int ice_init_rss(struct ice_pf *pf)
 	struct rte_eth_link *dst = link;
 	struct rte_eth_link *src = &dev->data->dev_link;
 
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-				*(uint64_t *)src) == 0)
+	// NOTE: review for potential ordering optimization
+	if (!__atomic_compare_exchange_n((uint64_t *)dst,
+		(uint64_t *)dst, *(uint64_t *)src, 0,
+		__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 		return -1;
 
 	return 0;
@@ -3941,8 +3943,10 @@ static int ice_init_rss(struct ice_pf *pf)
 	struct rte_eth_link *dst = &dev->data->dev_link;
 	struct rte_eth_link *src = link;
 
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-				*(uint64_t *)src) == 0)
+	// NOTE: review for potential ordering optimization
+	if (!__atomic_compare_exchange_n((uint64_t *)dst,
+		(uint64_t *)dst, *(uint64_t *)src, 0,
+		__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 		return -1;
 
 	return 0;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v2 5/7] net/ixgbe: replace rte atomics with GCC builtin atomics
  2023-03-23 22:34 ` [PATCH v2 " Tyler Retzlaff
                     ` (3 preceding siblings ...)
  2023-03-23 22:34   ` [PATCH v2 4/7] net/ice: " Tyler Retzlaff
@ 2023-03-23 22:34   ` Tyler Retzlaff
  2023-03-23 22:34   ` [PATCH v2 6/7] net/null: " Tyler Retzlaff
                     ` (2 subsequent siblings)
  7 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:34 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/net/ixgbe/ixgbe_bypass.c |  1 -
 drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
 drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
 drivers/net/ixgbe/ixgbe_flow.c   |  1 -
 drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
 5 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_bypass.c b/drivers/net/ixgbe/ixgbe_bypass.c
index 94f34a2..f615d18 100644
--- a/drivers/net/ixgbe/ixgbe_bypass.c
+++ b/drivers/net/ixgbe/ixgbe_bypass.c
@@ -3,7 +3,6 @@
  */
 
 #include <time.h>
-#include <rte_atomic.h>
 #include <ethdev_driver.h>
 #include "ixgbe_ethdev.h"
 #include "ixgbe_bypass_api.h"
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 88118bc..2d575f5 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1127,7 +1127,8 @@ struct rte_ixgbe_xstats_name_off {
 		return 0;
 	}
 
-	rte_atomic32_clear(&ad->link_thread_running);
+	// NOTE: review for potential ordering optimization
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	ixgbe_parse_devargs(eth_dev->data->dev_private,
 			    pci_dev->device.devargs);
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
@@ -1625,7 +1626,8 @@ static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev)
 		return 0;
 	}
 
-	rte_atomic32_clear(&ad->link_thread_running);
+	// NOTE: review for potential ordering optimization
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	ixgbevf_parse_devargs(eth_dev->data->dev_private,
 			      pci_dev->device.devargs);
 
@@ -4186,7 +4188,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	struct ixgbe_adapter *ad = dev->data->dev_private;
 	uint32_t timeout = timeout_ms ? timeout_ms : WARNING_TIMEOUT;
 
-	while (rte_atomic32_read(&ad->link_thread_running)) {
+	// NOTE: review for potential ordering optimization
+	while (__atomic_load_n(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
 		msec_delay(1);
 		timeout--;
 
@@ -4222,7 +4225,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	ixgbe_setup_link(hw, speed, true);
 
 	intr->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
-	rte_atomic32_clear(&ad->link_thread_running);
+	// NOTE: review for potential ordering optimization
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	return NULL;
 }
 
@@ -4317,7 +4321,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	if (link_up == 0) {
 		if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) {
 			ixgbe_dev_wait_setup_link_complete(dev, 0);
-			if (rte_atomic32_test_and_set(&ad->link_thread_running)) {
+			// NOTE: review for potential ordering optimization
+			if (__atomic_test_and_set(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
 				/* To avoid race condition between threads, set
 				 * the IXGBE_FLAG_NEED_LINK_CONFIG flag only
 				 * when there is no link thread running.
@@ -4330,7 +4335,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 					dev) < 0) {
 					PMD_DRV_LOG(ERR,
 						"Create link thread failed!");
-					rte_atomic32_clear(&ad->link_thread_running);
+					// NOTE: review for potential ordering optimization
+					__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 				}
 			} else {
 				PMD_DRV_LOG(ERR,
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 48290af..2ca6998 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -6,6 +6,7 @@
 #define _IXGBE_ETHDEV_H_
 
 #include <stdint.h>
+#include <stdbool.h>
 #include <sys/queue.h>
 
 #include "base/ixgbe_type.h"
@@ -510,7 +511,7 @@ struct ixgbe_adapter {
 	 */
 	uint8_t pflink_fullchk;
 	uint8_t mac_ctrl_frame_fwd;
-	rte_atomic32_t link_thread_running;
+	bool link_thread_running;
 	pthread_t link_thread_tid;
 };
 
diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c
index eac81ee..687341c 100644
--- a/drivers/net/ixgbe/ixgbe_flow.c
+++ b/drivers/net/ixgbe/ixgbe_flow.c
@@ -18,7 +18,6 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memory.h>
 #include <rte_eal.h>
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index c9d6ca9..8d7251d 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -27,7 +27,6 @@
 #include <rte_eal.h>
 #include <rte_per_lcore.h>
 #include <rte_lcore.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v2 6/7] net/null: replace rte atomics with GCC builtin atomics
  2023-03-23 22:34 ` [PATCH v2 " Tyler Retzlaff
                     ` (4 preceding siblings ...)
  2023-03-23 22:34   ` [PATCH v2 5/7] net/ixgbe: " Tyler Retzlaff
@ 2023-03-23 22:34   ` Tyler Retzlaff
  2023-03-23 22:34   ` [PATCH v2 7/7] net/ring: " Tyler Retzlaff
  2023-03-24  7:07   ` [PATCH v2 0/7] " Morten Brørup
  7 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:34 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/net/null/rte_eth_null.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 47d9554..6a115f8 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -37,8 +37,8 @@ struct null_queue {
 	struct rte_mempool *mb_pool;
 	struct rte_mbuf *dummy_packet;
 
-	rte_atomic64_t rx_pkts;
-	rte_atomic64_t tx_pkts;
+	uint64_t rx_pkts;
+	uint64_t tx_pkts;
 };
 
 struct pmd_options {
@@ -101,7 +101,8 @@ struct pmd_internals {
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	rte_atomic64_add(&(h->rx_pkts), i);
+	// NOTE: review for potential ordering optimization
+	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -128,7 +129,8 @@ struct pmd_internals {
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	rte_atomic64_add(&(h->rx_pkts), i);
+	// NOTE: review for potential ordering optimization
+	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -152,7 +154,8 @@ struct pmd_internals {
 	for (i = 0; i < nb_bufs; i++)
 		rte_pktmbuf_free(bufs[i]);
 
-	rte_atomic64_add(&(h->tx_pkts), i);
+	// NOTE: review for potential ordering optimization
+	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -174,7 +177,8 @@ struct pmd_internals {
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	rte_atomic64_add(&(h->tx_pkts), i);
+	// NOTE: review for potential ordering optimization
+	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -316,8 +320,9 @@ struct pmd_internals {
 			RTE_MIN(dev->data->nb_rx_queues,
 				RTE_DIM(internal->rx_null_queues)));
 	for (i = 0; i < num_stats; i++) {
+		// NOTE: review for atomic access
 		igb_stats->q_ipackets[i] =
-			internal->rx_null_queues[i].rx_pkts.cnt;
+			internal->rx_null_queues[i].rx_pkts;
 		rx_total += igb_stats->q_ipackets[i];
 	}
 
@@ -325,8 +330,9 @@ struct pmd_internals {
 			RTE_MIN(dev->data->nb_tx_queues,
 				RTE_DIM(internal->tx_null_queues)));
 	for (i = 0; i < num_stats; i++) {
+		// NOTE: review for atomic access
 		igb_stats->q_opackets[i] =
-			internal->tx_null_queues[i].tx_pkts.cnt;
+			internal->tx_null_queues[i].tx_pkts;
 		tx_total += igb_stats->q_opackets[i];
 	}
 
@@ -347,9 +353,11 @@ struct pmd_internals {
 
 	internal = dev->data->dev_private;
 	for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
-		internal->rx_null_queues[i].rx_pkts.cnt = 0;
+		// NOTE: review for atomic access
+		internal->rx_null_queues[i].rx_pkts = 0;
 	for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
-		internal->tx_null_queues[i].tx_pkts.cnt = 0;
+		// NOTE: review for atomic access
+		internal->tx_null_queues[i].tx_pkts = 0;
 
 	return 0;
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v2 7/7] net/ring: replace rte atomics with GCC builtin atomics
  2023-03-23 22:34 ` [PATCH v2 " Tyler Retzlaff
                     ` (5 preceding siblings ...)
  2023-03-23 22:34   ` [PATCH v2 6/7] net/null: " Tyler Retzlaff
@ 2023-03-23 22:34   ` Tyler Retzlaff
  2023-03-24  7:07   ` [PATCH v2 0/7] " Morten Brørup
  7 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:34 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/net/ring/rte_eth_ring.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index e8bc9b6..fb7f0a0 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -44,8 +44,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	rte_atomic64_t rx_pkts;
-	rte_atomic64_t tx_pkts;
+	uint64_t rx_pkts;
+	uint64_t tx_pkts;
 };
 
 struct pmd_internals {
@@ -80,9 +80,10 @@ struct pmd_internals {
 	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
 			ptrs, nb_bufs, NULL);
 	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts.cnt += nb_rx;
+		r->rx_pkts += nb_rx;
 	else
-		rte_atomic64_add(&(r->rx_pkts), nb_rx);
+		// NOTE: review for potential ordering optimization
+		__atomic_fetch_add(&r->rx_pkts, nb_rx, __ATOMIC_SEQ_CST);
 	return nb_rx;
 }
 
@@ -94,9 +95,10 @@ struct pmd_internals {
 	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
 			ptrs, nb_bufs, NULL);
 	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts.cnt += nb_tx;
+		r->tx_pkts += nb_tx;
 	else
-		rte_atomic64_add(&(r->tx_pkts), nb_tx);
+		// NOTE: review for potential ordering optimization
+		__atomic_fetch_add(&r->tx_pkts, nb_tx, __ATOMIC_SEQ_CST);
 	return nb_tx;
 }
 
@@ -184,13 +186,15 @@ struct pmd_internals {
 
 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts.cnt;
+		// NOTE: review for atomic access
+		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
 		rx_total += stats->q_ipackets[i];
 	}
 
 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts.cnt;
+		// NOTE: review for atomic access
+		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
 		tx_total += stats->q_opackets[i];
 	}
 
@@ -207,9 +211,11 @@ struct pmd_internals {
 	struct pmd_internals *internal = dev->data->dev_private;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts.cnt = 0;
+		// NOTE: review for atomic access
+		internal->rx_ring_queues[i].rx_pkts = 0;
 	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts.cnt = 0;
+		// NOTE: review for atomic access
+		internal->tx_ring_queues[i].tx_pkts = 0;
 
 	return 0;
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * RE: [PATCH v2 0/7] replace rte atomics with GCC builtin atomics
  2023-03-23 22:34 ` [PATCH v2 " Tyler Retzlaff
                     ` (6 preceding siblings ...)
  2023-03-23 22:34   ` [PATCH v2 7/7] net/ring: " Tyler Retzlaff
@ 2023-03-24  7:07   ` Morten Brørup
  7 siblings, 0 replies; 83+ messages in thread
From: Morten Brørup @ 2023-03-24  7:07 UTC (permalink / raw)
  To: Tyler Retzlaff, dev; +Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen
> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Thursday, 23 March 2023 23.35
> 
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
> 
> This series covers the libraries and drivers that are built on Windows.
> 
> The code has be converted to use the __atomic builtins but there are
> additional during conversion i notice that there may be some issues
> that need to be addressed.
> 
> I'll comment in the patches where my concerns are so the maintainers
> may comment.
> 
> v2:
>   * comment code where optimizations may be possible now that memory
>     order can be specified.
>   * comment code where operations should potentially be atomic so that
>     maintainers can review.
>   * change a couple of variables labeled as counters to be unsigned.
> 
All good.
Series-acked-by: Morten Brørup <mb@smartsharesystems.com>
^ permalink raw reply	[flat|nested] 83+ messages in thread
 
- * [PATCH v3 0/7] replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 [PATCH 0/7] replace rte atomics with GCC builtin atomics Tyler Retzlaff
                   ` (8 preceding siblings ...)
  2023-03-23 22:34 ` [PATCH v2 " Tyler Retzlaff
@ 2023-03-23 22:53 ` Tyler Retzlaff
  2023-03-23 22:53   ` [PATCH v3 1/7] ring: " Tyler Retzlaff
                     ` (8 more replies)
  2023-06-02 19:45 ` [PATCH v4 0/6] " Tyler Retzlaff
  2023-06-06 21:45 ` [PATCH v5 0/6] " Tyler Retzlaff
  11 siblings, 9 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:53 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
This series covers the libraries and drivers that are built on Windows.
The code has be converted to use the __atomic builtins but there are
additional during conversion i notice that there may be some issues
that need to be addressed.
I'll comment in the patches where my concerns are so the maintainers
may comment.
v3:
  * style, don't use c99 comments
v2:
  * comment code where optimizations may be possible now that memory
    order can be specified.
  * comment code where operations should potentially be atomic so that
    maintainers can review.
  * change a couple of variables labeled as counters to be unsigned.
Tyler Retzlaff (7):
  ring: replace rte atomics with GCC builtin atomics
  stack: replace rte atomics with GCC builtin atomics
  dma/idxd: replace rte atomics with GCC builtin atomics
  net/ice: replace rte atomics with GCC builtin atomics
  net/ixgbe: replace rte atomics with GCC builtin atomics
  net/null: replace rte atomics with GCC builtin atomics
  net/ring: replace rte atomics with GCC builtin atomics
 drivers/dma/idxd/idxd_internal.h |  3 +--
 drivers/dma/idxd/idxd_pci.c      |  8 +++++---
 drivers/net/ice/ice_dcf.c        |  1 -
 drivers/net/ice/ice_dcf_ethdev.c |  1 -
 drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
 drivers/net/ixgbe/ixgbe_bypass.c |  1 -
 drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
 drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
 drivers/net/ixgbe/ixgbe_flow.c   |  1 -
 drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
 drivers/net/null/rte_eth_null.c  | 28 ++++++++++++++++++----------
 drivers/net/ring/rte_eth_ring.c  | 26 ++++++++++++++++----------
 lib/ring/rte_ring_core.h         |  1 -
 lib/ring/rte_ring_generic_pvt.h  | 12 ++++++++----
 lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
 15 files changed, 79 insertions(+), 53 deletions(-)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v3 1/7] ring: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53 ` [PATCH v3 " Tyler Retzlaff
@ 2023-03-23 22:53   ` Tyler Retzlaff
  2023-03-23 22:53   ` [PATCH v3 2/7] stack: " Tyler Retzlaff
                     ` (7 subsequent siblings)
  8 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:53 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/ring/rte_ring_core.h        |  1 -
 lib/ring/rte_ring_generic_pvt.h | 12 ++++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h
index 82b2370..b9c7860 100644
--- a/lib/ring/rte_ring_core.h
+++ b/lib/ring/rte_ring_core.h
@@ -31,7 +31,6 @@
 #include <rte_config.h>
 #include <rte_memory.h>
 #include <rte_lcore.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memzone.h>
 #include <rte_pause.h>
diff --git a/lib/ring/rte_ring_generic_pvt.h b/lib/ring/rte_ring_generic_pvt.h
index 5acb6e5..caa4c74 100644
--- a/lib/ring/rte_ring_generic_pvt.h
+++ b/lib/ring/rte_ring_generic_pvt.h
@@ -92,8 +92,10 @@
 		if (is_sp)
 			r->prod.head = *new_head, success = 1;
 		else
-			success = rte_atomic32_cmpset(&r->prod.head,
-					*old_head, *new_head);
+			/* NOTE: review for potential ordering optimization */
+			success = __atomic_compare_exchange_n(&r->prod.head,
+					old_head, *new_head, 0,
+					__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
 	} while (unlikely(success == 0));
 	return n;
 }
@@ -162,8 +164,10 @@
 			rte_smp_rmb();
 			success = 1;
 		} else {
-			success = rte_atomic32_cmpset(&r->cons.head, *old_head,
-					*new_head);
+			/* NOTE: review for potential ordering optimization */
+			success = __atomic_compare_exchange_n(&r->cons.head,
+					old_head, *new_head, 0,
+					__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
 		}
 	} while (unlikely(success == 0));
 	return n;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v3 2/7] stack: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53 ` [PATCH v3 " Tyler Retzlaff
  2023-03-23 22:53   ` [PATCH v3 1/7] ring: " Tyler Retzlaff
@ 2023-03-23 22:53   ` Tyler Retzlaff
  2023-05-24 20:08     ` David Marchand
  2023-03-23 22:53   ` [PATCH v3 3/7] dma/idxd: " Tyler Retzlaff
                     ` (6 subsequent siblings)
  8 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:53 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/lib/stack/rte_stack_lf_generic.h b/lib/stack/rte_stack_lf_generic.h
index 7fa29ce..aad3747 100644
--- a/lib/stack/rte_stack_lf_generic.h
+++ b/lib/stack/rte_stack_lf_generic.h
@@ -26,8 +26,8 @@
 	 * elements. If the mempool is near-empty to the point that this is a
 	 * concern, the user should consider increasing the mempool size.
 	 */
-	return (unsigned int)rte_atomic64_read((rte_atomic64_t *)
-			&s->stack_lf.used.len);
+	/* NOTE: review for potential ordering optimization */
+	return __atomic_load_n(&s->stack_lf.used.len, __ATOMIC_SEQ_CST);
 }
 
 static __rte_always_inline void
@@ -67,8 +67,8 @@
 				1, __ATOMIC_RELEASE,
 				__ATOMIC_RELAXED);
 	} while (success == 0);
-
-	rte_atomic64_add((rte_atomic64_t *)&list->len, num);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&list->len, num, __ATOMIC_SEQ_CST);
 }
 
 static __rte_always_inline struct rte_stack_lf_elem *
@@ -82,14 +82,16 @@
 
 	/* Reserve num elements, if available */
 	while (1) {
-		uint64_t len = rte_atomic64_read((rte_atomic64_t *)&list->len);
+		/* NOTE: review for potential ordering optimization */
+		uint64_t len = __atomic_load_n(&list->len, __ATOMIC_SEQ_CST);
 
 		/* Does the list contain enough elements? */
 		if (unlikely(len < num))
 			return NULL;
 
-		if (rte_atomic64_cmpset((volatile uint64_t *)&list->len,
-					len, len - num))
+		/* NOTE: review for potential ordering optimization */
+		if (__atomic_compare_exchange_n(&list->len, &len, len - num,
+			0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 			break;
 	}
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH v3 2/7] stack: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53   ` [PATCH v3 2/7] stack: " Tyler Retzlaff
@ 2023-05-24 20:08     ` David Marchand
  0 siblings, 0 replies; 83+ messages in thread
From: David Marchand @ 2023-05-24 20:08 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb,
	Tyler Retzlaff
Hello Olivier,
Review please.
On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
>  1 file changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/lib/stack/rte_stack_lf_generic.h b/lib/stack/rte_stack_lf_generic.h
> index 7fa29ce..aad3747 100644
> --- a/lib/stack/rte_stack_lf_generic.h
> +++ b/lib/stack/rte_stack_lf_generic.h
> @@ -26,8 +26,8 @@
>          * elements. If the mempool is near-empty to the point that this is a
>          * concern, the user should consider increasing the mempool size.
>          */
> -       return (unsigned int)rte_atomic64_read((rte_atomic64_t *)
> -                       &s->stack_lf.used.len);
> +       /* NOTE: review for potential ordering optimization */
> +       return __atomic_load_n(&s->stack_lf.used.len, __ATOMIC_SEQ_CST);
>  }
>
>  static __rte_always_inline void
> @@ -67,8 +67,8 @@
>                                 1, __ATOMIC_RELEASE,
>                                 __ATOMIC_RELAXED);
>         } while (success == 0);
> -
> -       rte_atomic64_add((rte_atomic64_t *)&list->len, num);
> +       /* NOTE: review for potential ordering optimization */
> +       __atomic_fetch_add(&list->len, num, __ATOMIC_SEQ_CST);
>  }
>
>  static __rte_always_inline struct rte_stack_lf_elem *
> @@ -82,14 +82,16 @@
>
>         /* Reserve num elements, if available */
>         while (1) {
> -               uint64_t len = rte_atomic64_read((rte_atomic64_t *)&list->len);
> +               /* NOTE: review for potential ordering optimization */
> +               uint64_t len = __atomic_load_n(&list->len, __ATOMIC_SEQ_CST);
>
>                 /* Does the list contain enough elements? */
>                 if (unlikely(len < num))
>                         return NULL;
>
> -               if (rte_atomic64_cmpset((volatile uint64_t *)&list->len,
> -                                       len, len - num))
> +               /* NOTE: review for potential ordering optimization */
> +               if (__atomic_compare_exchange_n(&list->len, &len, len - num,
> +                       0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
>                         break;
>         }
>
> --
> 1.8.3.1
>
-- 
David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread
 
- * [PATCH v3 3/7] dma/idxd: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53 ` [PATCH v3 " Tyler Retzlaff
  2023-03-23 22:53   ` [PATCH v3 1/7] ring: " Tyler Retzlaff
  2023-03-23 22:53   ` [PATCH v3 2/7] stack: " Tyler Retzlaff
@ 2023-03-23 22:53   ` Tyler Retzlaff
  2023-05-24 20:09     ` David Marchand
  2023-05-25 12:57     ` Kevin Laatz
  2023-03-23 22:53   ` [PATCH v3 4/7] net/ice: " Tyler Retzlaff
                     ` (5 subsequent siblings)
  8 siblings, 2 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:53 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/dma/idxd/idxd_internal.h | 3 +--
 drivers/dma/idxd/idxd_pci.c      | 8 +++++---
 2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h
index 180a858..cd41777 100644
--- a/drivers/dma/idxd/idxd_internal.h
+++ b/drivers/dma/idxd/idxd_internal.h
@@ -7,7 +7,6 @@
 
 #include <rte_dmadev_pmd.h>
 #include <rte_spinlock.h>
-#include <rte_atomic.h>
 
 #include "idxd_hw_defs.h"
 
@@ -34,7 +33,7 @@ struct idxd_pci_common {
 	rte_spinlock_t lk;
 
 	uint8_t wq_cfg_sz;
-	rte_atomic16_t ref_count;
+	uint16_t ref_count;
 	volatile struct rte_idxd_bar0 *regs;
 	volatile uint32_t *wq_regs_base;
 	volatile struct rte_idxd_grpcfg *grp_regs;
diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c
index 781fa02..2de5d15 100644
--- a/drivers/dma/idxd/idxd_pci.c
+++ b/drivers/dma/idxd/idxd_pci.c
@@ -6,7 +6,6 @@
 #include <rte_devargs.h>
 #include <rte_dmadev_pmd.h>
 #include <rte_malloc.h>
-#include <rte_atomic.h>
 
 #include "idxd_internal.h"
 
@@ -136,7 +135,9 @@
 	/* if this is the last WQ on the device, disable the device and free
 	 * the PCI struct
 	 */
-	is_last_wq = rte_atomic16_dec_and_test(&idxd->u.pci->ref_count);
+	/* NOTE: review for potential ordering optimization */
+	is_last_wq = __atomic_fetch_sub(&idxd->u.pci->ref_count, 1,
+		__ATOMIC_SEQ_CST) - 1 == 0;
 	if (is_last_wq) {
 		/* disable the device */
 		err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
@@ -350,7 +351,8 @@
 				free(idxd.u.pci);
 			return ret;
 		}
-		rte_atomic16_inc(&idxd.u.pci->ref_count);
+		/* NOTE: review for potential ordering optimization */
+		__atomic_fetch_add(&idxd.u.pci->ref_count, 1, __ATOMIC_SEQ_CST);
 	}
 
 	return 0;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH v3 3/7] dma/idxd: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53   ` [PATCH v3 3/7] dma/idxd: " Tyler Retzlaff
@ 2023-05-24 20:09     ` David Marchand
  2023-05-25  8:41       ` Bruce Richardson
  2023-05-25 12:57     ` Kevin Laatz
  1 sibling, 1 reply; 83+ messages in thread
From: David Marchand @ 2023-05-24 20:09 UTC (permalink / raw)
  To: Bruce Richardson, Kevin Laatz
  Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb,
	Tyler Retzlaff
Hello Bruce, Kevin,
Review please.
On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  drivers/dma/idxd/idxd_internal.h | 3 +--
>  drivers/dma/idxd/idxd_pci.c      | 8 +++++---
>  2 files changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h
> index 180a858..cd41777 100644
> --- a/drivers/dma/idxd/idxd_internal.h
> +++ b/drivers/dma/idxd/idxd_internal.h
> @@ -7,7 +7,6 @@
>
>  #include <rte_dmadev_pmd.h>
>  #include <rte_spinlock.h>
> -#include <rte_atomic.h>
>
>  #include "idxd_hw_defs.h"
>
> @@ -34,7 +33,7 @@ struct idxd_pci_common {
>         rte_spinlock_t lk;
>
>         uint8_t wq_cfg_sz;
> -       rte_atomic16_t ref_count;
> +       uint16_t ref_count;
>         volatile struct rte_idxd_bar0 *regs;
>         volatile uint32_t *wq_regs_base;
>         volatile struct rte_idxd_grpcfg *grp_regs;
> diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c
> index 781fa02..2de5d15 100644
> --- a/drivers/dma/idxd/idxd_pci.c
> +++ b/drivers/dma/idxd/idxd_pci.c
> @@ -6,7 +6,6 @@
>  #include <rte_devargs.h>
>  #include <rte_dmadev_pmd.h>
>  #include <rte_malloc.h>
> -#include <rte_atomic.h>
>
>  #include "idxd_internal.h"
>
> @@ -136,7 +135,9 @@
>         /* if this is the last WQ on the device, disable the device and free
>          * the PCI struct
>          */
> -       is_last_wq = rte_atomic16_dec_and_test(&idxd->u.pci->ref_count);
> +       /* NOTE: review for potential ordering optimization */
> +       is_last_wq = __atomic_fetch_sub(&idxd->u.pci->ref_count, 1,
> +               __ATOMIC_SEQ_CST) - 1 == 0;
>         if (is_last_wq) {
>                 /* disable the device */
>                 err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
> @@ -350,7 +351,8 @@
>                                 free(idxd.u.pci);
>                         return ret;
>                 }
> -               rte_atomic16_inc(&idxd.u.pci->ref_count);
> +               /* NOTE: review for potential ordering optimization */
> +               __atomic_fetch_add(&idxd.u.pci->ref_count, 1, __ATOMIC_SEQ_CST);
>         }
>
>         return 0;
> --
> 1.8.3.1
>
-- 
David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH v3 3/7] dma/idxd: replace rte atomics with GCC builtin atomics
  2023-05-24 20:09     ` David Marchand
@ 2023-05-25  8:41       ` Bruce Richardson
  2023-05-25 13:59         ` Morten Brørup
  0 siblings, 1 reply; 83+ messages in thread
From: Bruce Richardson @ 2023-05-25  8:41 UTC (permalink / raw)
  To: David Marchand
  Cc: Kevin Laatz, dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas,
	stephen, mb, Tyler Retzlaff
On Wed, May 24, 2023 at 10:09:04PM +0200, David Marchand wrote:
> Hello Bruce, Kevin,
> 
> Review please.
> 
> 
> On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> >
> > Replace the use of rte_atomic.h types and functions, instead use GCC
> > supplied C++11 memory model builtins.
> >
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Two small comments inline below.
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> > ---
> >  drivers/dma/idxd/idxd_internal.h | 3 +--
> >  drivers/dma/idxd/idxd_pci.c      | 8 +++++---
> >  2 files changed, 6 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h
> > index 180a858..cd41777 100644
> > --- a/drivers/dma/idxd/idxd_internal.h
> > +++ b/drivers/dma/idxd/idxd_internal.h
> > @@ -7,7 +7,6 @@
> >
> >  #include <rte_dmadev_pmd.h>
> >  #include <rte_spinlock.h>
> > -#include <rte_atomic.h>
> >
> >  #include "idxd_hw_defs.h"
> >
> > @@ -34,7 +33,7 @@ struct idxd_pci_common {
> >         rte_spinlock_t lk;
> >
> >         uint8_t wq_cfg_sz;
> > -       rte_atomic16_t ref_count;
> > +       uint16_t ref_count;
> >         volatile struct rte_idxd_bar0 *regs;
> >         volatile uint32_t *wq_regs_base;
> >         volatile struct rte_idxd_grpcfg *grp_regs;
> > diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c
> > index 781fa02..2de5d15 100644
> > --- a/drivers/dma/idxd/idxd_pci.c
> > +++ b/drivers/dma/idxd/idxd_pci.c
> > @@ -6,7 +6,6 @@
> >  #include <rte_devargs.h>
> >  #include <rte_dmadev_pmd.h>
> >  #include <rte_malloc.h>
> > -#include <rte_atomic.h>
> >
> >  #include "idxd_internal.h"
> >
> > @@ -136,7 +135,9 @@
> >         /* if this is the last WQ on the device, disable the device and free
> >          * the PCI struct
> >          */
> > -       is_last_wq = rte_atomic16_dec_and_test(&idxd->u.pci->ref_count);
> > +       /* NOTE: review for potential ordering optimization */
> > +       is_last_wq = __atomic_fetch_sub(&idxd->u.pci->ref_count, 1,
> > +               __ATOMIC_SEQ_CST) - 1 == 0;
Rather than "__atomic_fetch_sub(...) - 1 == 0", I think just comparing
"== 1" is simpler and better. I would also bracket the comparison for
clarity.
> >         if (is_last_wq) {
> >                 /* disable the device */
> >                 err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
> > @@ -350,7 +351,8 @@
> >                                 free(idxd.u.pci);
> >                         return ret;
> >                 }
> > -               rte_atomic16_inc(&idxd.u.pci->ref_count);
> > +               /* NOTE: review for potential ordering optimization */
I think we can drop the note. Since this is not datapath code the perf is
not that important.
> > +               __atomic_fetch_add(&idxd.u.pci->ref_count, 1, __ATOMIC_SEQ_CST);
> >         }
> >
> >         return 0;
> > --
> > 1.8.3.1
> >
> 
> -- 
> David Marchand
> 
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * RE: [PATCH v3 3/7] dma/idxd: replace rte atomics with GCC builtin atomics
  2023-05-25  8:41       ` Bruce Richardson
@ 2023-05-25 13:59         ` Morten Brørup
  0 siblings, 0 replies; 83+ messages in thread
From: Morten Brørup @ 2023-05-25 13:59 UTC (permalink / raw)
  To: Bruce Richardson, David Marchand, Honnappa.Nagarahalli, Tyler Retzlaff
  Cc: Kevin Laatz, dev, Ruifeng.Wang, thomas, stephen
> From: Bruce Richardson [mailto:bruce.richardson@intel.com]
> Sent: Thursday, 25 May 2023 10.42
> 
> On Wed, May 24, 2023 at 10:09:04PM +0200, David Marchand wrote:
> > Hello Bruce, Kevin,
> >
> > Review please.
> >
> >
> > On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
> > <roretzla@linux.microsoft.com> wrote:
> > >
> > > Replace the use of rte_atomic.h types and functions, instead use GCC
> > > supplied C++11 memory model builtins.
> > >
> > > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> 
> Two small comments inline below.
> 
> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> 
> > > ---
> > >  drivers/dma/idxd/idxd_internal.h | 3 +--
> > >  drivers/dma/idxd/idxd_pci.c      | 8 +++++---
> > >  2 files changed, 6 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/drivers/dma/idxd/idxd_internal.h
> b/drivers/dma/idxd/idxd_internal.h
> > > index 180a858..cd41777 100644
> > > --- a/drivers/dma/idxd/idxd_internal.h
> > > +++ b/drivers/dma/idxd/idxd_internal.h
> > > @@ -7,7 +7,6 @@
> > >
> > >  #include <rte_dmadev_pmd.h>
> > >  #include <rte_spinlock.h>
> > > -#include <rte_atomic.h>
> > >
> > >  #include "idxd_hw_defs.h"
> > >
> > > @@ -34,7 +33,7 @@ struct idxd_pci_common {
> > >         rte_spinlock_t lk;
> > >
> > >         uint8_t wq_cfg_sz;
> > > -       rte_atomic16_t ref_count;
> > > +       uint16_t ref_count;
> > >         volatile struct rte_idxd_bar0 *regs;
> > >         volatile uint32_t *wq_regs_base;
> > >         volatile struct rte_idxd_grpcfg *grp_regs;
> > > diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c
> > > index 781fa02..2de5d15 100644
> > > --- a/drivers/dma/idxd/idxd_pci.c
> > > +++ b/drivers/dma/idxd/idxd_pci.c
> > > @@ -6,7 +6,6 @@
> > >  #include <rte_devargs.h>
> > >  #include <rte_dmadev_pmd.h>
> > >  #include <rte_malloc.h>
> > > -#include <rte_atomic.h>
> > >
> > >  #include "idxd_internal.h"
> > >
> > > @@ -136,7 +135,9 @@
> > >         /* if this is the last WQ on the device, disable the device and
> free
> > >          * the PCI struct
> > >          */
> > > -       is_last_wq = rte_atomic16_dec_and_test(&idxd->u.pci->ref_count);
> > > +       /* NOTE: review for potential ordering optimization */
> > > +       is_last_wq = __atomic_fetch_sub(&idxd->u.pci->ref_count, 1,
> > > +               __ATOMIC_SEQ_CST) - 1 == 0;
> 
> Rather than "__atomic_fetch_sub(...) - 1 == 0", I think just comparing
> "== 1" is simpler and better. I would also bracket the comparison for
> clarity.
> 
> > >         if (is_last_wq) {
> > >                 /* disable the device */
> > >                 err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
> > > @@ -350,7 +351,8 @@
> > >                                 free(idxd.u.pci);
> > >                         return ret;
> > >                 }
> > > -               rte_atomic16_inc(&idxd.u.pci->ref_count);
> > > +               /* NOTE: review for potential ordering optimization */
> 
> I think we can drop the note. Since this is not datapath code the perf is
> not that important.
Following up on my previous input to the discussion about these notes...
I agree with Bruce on this location. Here it is purely used in the control plane, and atomicity is required, but optimization of this would be a waste of brain power, so we can drop the notes in such situations. Perhaps Honnappa was referring to something similar - and then I agree with Honnappa too. ;-)
In principle: This specific note has been actively considered for optimization, and the conclusion was that further optimization is not required, and thus SEQ_CST is the correct choice here. Ideal to change now, but could be changed with a later (separate) patch as well.
> 
> > > +               __atomic_fetch_add(&idxd.u.pci->ref_count, 1,
> __ATOMIC_SEQ_CST);
> > >         }
> > >
> > >         return 0;
> > > --
> > > 1.8.3.1
> > >
> >
> > --
> > David Marchand
> >
^ permalink raw reply	[flat|nested] 83+ messages in thread
 
 
- * Re: [PATCH v3 3/7] dma/idxd: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53   ` [PATCH v3 3/7] dma/idxd: " Tyler Retzlaff
  2023-05-24 20:09     ` David Marchand
@ 2023-05-25 12:57     ` Kevin Laatz
  1 sibling, 0 replies; 83+ messages in thread
From: Kevin Laatz @ 2023-05-25 12:57 UTC (permalink / raw)
  To: Tyler Retzlaff, dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb
On 23/03/2023 22:53, Tyler Retzlaff wrote:
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>   drivers/dma/idxd/idxd_internal.h | 3 +--
>   drivers/dma/idxd/idxd_pci.c      | 8 +++++---
>   2 files changed, 6 insertions(+), 5 deletions(-)
>
Acked-by: Kevin Laatz <kevin.laatz@intel.com>
^ permalink raw reply	[flat|nested] 83+ messages in thread 
 
- * [PATCH v3 4/7] net/ice: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53 ` [PATCH v3 " Tyler Retzlaff
                     ` (2 preceding siblings ...)
  2023-03-23 22:53   ` [PATCH v3 3/7] dma/idxd: " Tyler Retzlaff
@ 2023-03-23 22:53   ` Tyler Retzlaff
  2023-05-24 20:10     ` David Marchand
  2023-03-23 22:53   ` [PATCH v3 5/7] net/ixgbe: " Tyler Retzlaff
                     ` (4 subsequent siblings)
  8 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:53 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/net/ice/ice_dcf.c        |  1 -
 drivers/net/ice/ice_dcf_ethdev.c |  1 -
 drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
 3 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index 1c3d22a..80d2cbd 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -14,7 +14,6 @@
 #include <rte_common.h>
 
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_eal.h>
 #include <rte_ether.h>
 #include <ethdev_driver.h>
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index dcbf2af..13ff245 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -11,7 +11,6 @@
 #include <rte_interrupts.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_eal.h>
 #include <rte_ether.h>
 #include <ethdev_pci.h>
diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 9a88cf9..a04fca8 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -3927,8 +3927,10 @@ static int ice_init_rss(struct ice_pf *pf)
 	struct rte_eth_link *dst = link;
 	struct rte_eth_link *src = &dev->data->dev_link;
 
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-				*(uint64_t *)src) == 0)
+	/* NOTE: review for potential ordering optimization */
+	if (!__atomic_compare_exchange_n((uint64_t *)dst,
+		(uint64_t *)dst, *(uint64_t *)src, 0,
+		__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 		return -1;
 
 	return 0;
@@ -3941,8 +3943,10 @@ static int ice_init_rss(struct ice_pf *pf)
 	struct rte_eth_link *dst = &dev->data->dev_link;
 	struct rte_eth_link *src = link;
 
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-				*(uint64_t *)src) == 0)
+	/* NOTE: review for potential ordering optimization */
+	if (!__atomic_compare_exchange_n((uint64_t *)dst,
+		(uint64_t *)dst, *(uint64_t *)src, 0,
+		__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 		return -1;
 
 	return 0;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH v3 4/7] net/ice: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53   ` [PATCH v3 4/7] net/ice: " Tyler Retzlaff
@ 2023-05-24 20:10     ` David Marchand
  0 siblings, 0 replies; 83+ messages in thread
From: David Marchand @ 2023-05-24 20:10 UTC (permalink / raw)
  To: Qiming Yang, Qi Zhang
  Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb,
	Tyler Retzlaff
Hello Qiming, Qi,
Review please.
On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  drivers/net/ice/ice_dcf.c        |  1 -
>  drivers/net/ice/ice_dcf_ethdev.c |  1 -
>  drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
>  3 files changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
> index 1c3d22a..80d2cbd 100644
> --- a/drivers/net/ice/ice_dcf.c
> +++ b/drivers/net/ice/ice_dcf.c
> @@ -14,7 +14,6 @@
>  #include <rte_common.h>
>
>  #include <rte_pci.h>
> -#include <rte_atomic.h>
>  #include <rte_eal.h>
>  #include <rte_ether.h>
>  #include <ethdev_driver.h>
> diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
> index dcbf2af..13ff245 100644
> --- a/drivers/net/ice/ice_dcf_ethdev.c
> +++ b/drivers/net/ice/ice_dcf_ethdev.c
> @@ -11,7 +11,6 @@
>  #include <rte_interrupts.h>
>  #include <rte_debug.h>
>  #include <rte_pci.h>
> -#include <rte_atomic.h>
>  #include <rte_eal.h>
>  #include <rte_ether.h>
>  #include <ethdev_pci.h>
> diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
> index 9a88cf9..a04fca8 100644
> --- a/drivers/net/ice/ice_ethdev.c
> +++ b/drivers/net/ice/ice_ethdev.c
> @@ -3927,8 +3927,10 @@ static int ice_init_rss(struct ice_pf *pf)
>         struct rte_eth_link *dst = link;
>         struct rte_eth_link *src = &dev->data->dev_link;
>
> -       if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
> -                               *(uint64_t *)src) == 0)
> +       /* NOTE: review for potential ordering optimization */
> +       if (!__atomic_compare_exchange_n((uint64_t *)dst,
> +               (uint64_t *)dst, *(uint64_t *)src, 0,
> +               __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
>                 return -1;
>
>         return 0;
> @@ -3941,8 +3943,10 @@ static int ice_init_rss(struct ice_pf *pf)
>         struct rte_eth_link *dst = &dev->data->dev_link;
>         struct rte_eth_link *src = link;
>
> -       if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
> -                               *(uint64_t *)src) == 0)
> +       /* NOTE: review for potential ordering optimization */
> +       if (!__atomic_compare_exchange_n((uint64_t *)dst,
> +               (uint64_t *)dst, *(uint64_t *)src, 0,
> +               __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
>                 return -1;
>
>         return 0;
> --
> 1.8.3.1
>
-- 
David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread 
 
- * [PATCH v3 5/7] net/ixgbe: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53 ` [PATCH v3 " Tyler Retzlaff
                     ` (3 preceding siblings ...)
  2023-03-23 22:53   ` [PATCH v3 4/7] net/ice: " Tyler Retzlaff
@ 2023-03-23 22:53   ` Tyler Retzlaff
  2023-05-24 20:11     ` David Marchand
  2023-03-23 22:53   ` [PATCH v3 6/7] net/null: " Tyler Retzlaff
                     ` (3 subsequent siblings)
  8 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:53 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/net/ixgbe/ixgbe_bypass.c |  1 -
 drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
 drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
 drivers/net/ixgbe/ixgbe_flow.c   |  1 -
 drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
 5 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_bypass.c b/drivers/net/ixgbe/ixgbe_bypass.c
index 94f34a2..f615d18 100644
--- a/drivers/net/ixgbe/ixgbe_bypass.c
+++ b/drivers/net/ixgbe/ixgbe_bypass.c
@@ -3,7 +3,6 @@
  */
 
 #include <time.h>
-#include <rte_atomic.h>
 #include <ethdev_driver.h>
 #include "ixgbe_ethdev.h"
 #include "ixgbe_bypass_api.h"
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 88118bc..4bb85af 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1127,7 +1127,8 @@ struct rte_ixgbe_xstats_name_off {
 		return 0;
 	}
 
-	rte_atomic32_clear(&ad->link_thread_running);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	ixgbe_parse_devargs(eth_dev->data->dev_private,
 			    pci_dev->device.devargs);
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
@@ -1625,7 +1626,8 @@ static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev)
 		return 0;
 	}
 
-	rte_atomic32_clear(&ad->link_thread_running);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	ixgbevf_parse_devargs(eth_dev->data->dev_private,
 			      pci_dev->device.devargs);
 
@@ -4186,7 +4188,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	struct ixgbe_adapter *ad = dev->data->dev_private;
 	uint32_t timeout = timeout_ms ? timeout_ms : WARNING_TIMEOUT;
 
-	while (rte_atomic32_read(&ad->link_thread_running)) {
+	/* NOTE: review for potential ordering optimization */
+	while (__atomic_load_n(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
 		msec_delay(1);
 		timeout--;
 
@@ -4222,7 +4225,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	ixgbe_setup_link(hw, speed, true);
 
 	intr->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
-	rte_atomic32_clear(&ad->link_thread_running);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	return NULL;
 }
 
@@ -4317,7 +4321,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	if (link_up == 0) {
 		if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) {
 			ixgbe_dev_wait_setup_link_complete(dev, 0);
-			if (rte_atomic32_test_and_set(&ad->link_thread_running)) {
+			/* NOTE: review for potential ordering optimization */
+			if (__atomic_test_and_set(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
 				/* To avoid race condition between threads, set
 				 * the IXGBE_FLAG_NEED_LINK_CONFIG flag only
 				 * when there is no link thread running.
@@ -4330,7 +4335,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 					dev) < 0) {
 					PMD_DRV_LOG(ERR,
 						"Create link thread failed!");
-					rte_atomic32_clear(&ad->link_thread_running);
+					/* NOTE: review for potential ordering optimization */
+					__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 				}
 			} else {
 				PMD_DRV_LOG(ERR,
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 48290af..2ca6998 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -6,6 +6,7 @@
 #define _IXGBE_ETHDEV_H_
 
 #include <stdint.h>
+#include <stdbool.h>
 #include <sys/queue.h>
 
 #include "base/ixgbe_type.h"
@@ -510,7 +511,7 @@ struct ixgbe_adapter {
 	 */
 	uint8_t pflink_fullchk;
 	uint8_t mac_ctrl_frame_fwd;
-	rte_atomic32_t link_thread_running;
+	bool link_thread_running;
 	pthread_t link_thread_tid;
 };
 
diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c
index eac81ee..687341c 100644
--- a/drivers/net/ixgbe/ixgbe_flow.c
+++ b/drivers/net/ixgbe/ixgbe_flow.c
@@ -18,7 +18,6 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memory.h>
 #include <rte_eal.h>
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index c9d6ca9..8d7251d 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -27,7 +27,6 @@
 #include <rte_eal.h>
 #include <rte_per_lcore.h>
 #include <rte_lcore.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH v3 5/7] net/ixgbe: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53   ` [PATCH v3 5/7] net/ixgbe: " Tyler Retzlaff
@ 2023-05-24 20:11     ` David Marchand
  0 siblings, 0 replies; 83+ messages in thread
From: David Marchand @ 2023-05-24 20:11 UTC (permalink / raw)
  To: Qiming Yang, Wenjun Wu
  Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb,
	Tyler Retzlaff
Hello Qiming, Wenjun,
Review please.
On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  drivers/net/ixgbe/ixgbe_bypass.c |  1 -
>  drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
>  drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
>  drivers/net/ixgbe/ixgbe_flow.c   |  1 -
>  drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
>  5 files changed, 14 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/ixgbe/ixgbe_bypass.c b/drivers/net/ixgbe/ixgbe_bypass.c
> index 94f34a2..f615d18 100644
> --- a/drivers/net/ixgbe/ixgbe_bypass.c
> +++ b/drivers/net/ixgbe/ixgbe_bypass.c
> @@ -3,7 +3,6 @@
>   */
>
>  #include <time.h>
> -#include <rte_atomic.h>
>  #include <ethdev_driver.h>
>  #include "ixgbe_ethdev.h"
>  #include "ixgbe_bypass_api.h"
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
> index 88118bc..4bb85af 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -1127,7 +1127,8 @@ struct rte_ixgbe_xstats_name_off {
>                 return 0;
>         }
>
> -       rte_atomic32_clear(&ad->link_thread_running);
> +       /* NOTE: review for potential ordering optimization */
> +       __atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
>         ixgbe_parse_devargs(eth_dev->data->dev_private,
>                             pci_dev->device.devargs);
>         rte_eth_copy_pci_info(eth_dev, pci_dev);
> @@ -1625,7 +1626,8 @@ static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev)
>                 return 0;
>         }
>
> -       rte_atomic32_clear(&ad->link_thread_running);
> +       /* NOTE: review for potential ordering optimization */
> +       __atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
>         ixgbevf_parse_devargs(eth_dev->data->dev_private,
>                               pci_dev->device.devargs);
>
> @@ -4186,7 +4188,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
>         struct ixgbe_adapter *ad = dev->data->dev_private;
>         uint32_t timeout = timeout_ms ? timeout_ms : WARNING_TIMEOUT;
>
> -       while (rte_atomic32_read(&ad->link_thread_running)) {
> +       /* NOTE: review for potential ordering optimization */
> +       while (__atomic_load_n(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
>                 msec_delay(1);
>                 timeout--;
>
> @@ -4222,7 +4225,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
>         ixgbe_setup_link(hw, speed, true);
>
>         intr->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
> -       rte_atomic32_clear(&ad->link_thread_running);
> +       /* NOTE: review for potential ordering optimization */
> +       __atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
>         return NULL;
>  }
>
> @@ -4317,7 +4321,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
>         if (link_up == 0) {
>                 if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) {
>                         ixgbe_dev_wait_setup_link_complete(dev, 0);
> -                       if (rte_atomic32_test_and_set(&ad->link_thread_running)) {
> +                       /* NOTE: review for potential ordering optimization */
> +                       if (__atomic_test_and_set(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
>                                 /* To avoid race condition between threads, set
>                                  * the IXGBE_FLAG_NEED_LINK_CONFIG flag only
>                                  * when there is no link thread running.
> @@ -4330,7 +4335,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
>                                         dev) < 0) {
>                                         PMD_DRV_LOG(ERR,
>                                                 "Create link thread failed!");
> -                                       rte_atomic32_clear(&ad->link_thread_running);
> +                                       /* NOTE: review for potential ordering optimization */
> +                                       __atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
>                                 }
>                         } else {
>                                 PMD_DRV_LOG(ERR,
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
> index 48290af..2ca6998 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.h
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.h
> @@ -6,6 +6,7 @@
>  #define _IXGBE_ETHDEV_H_
>
>  #include <stdint.h>
> +#include <stdbool.h>
>  #include <sys/queue.h>
>
>  #include "base/ixgbe_type.h"
> @@ -510,7 +511,7 @@ struct ixgbe_adapter {
>          */
>         uint8_t pflink_fullchk;
>         uint8_t mac_ctrl_frame_fwd;
> -       rte_atomic32_t link_thread_running;
> +       bool link_thread_running;
>         pthread_t link_thread_tid;
>  };
>
> diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c
> index eac81ee..687341c 100644
> --- a/drivers/net/ixgbe/ixgbe_flow.c
> +++ b/drivers/net/ixgbe/ixgbe_flow.c
> @@ -18,7 +18,6 @@
>  #include <rte_log.h>
>  #include <rte_debug.h>
>  #include <rte_pci.h>
> -#include <rte_atomic.h>
>  #include <rte_branch_prediction.h>
>  #include <rte_memory.h>
>  #include <rte_eal.h>
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index c9d6ca9..8d7251d 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -27,7 +27,6 @@
>  #include <rte_eal.h>
>  #include <rte_per_lcore.h>
>  #include <rte_lcore.h>
> -#include <rte_atomic.h>
>  #include <rte_branch_prediction.h>
>  #include <rte_mempool.h>
>  #include <rte_malloc.h>
> --
> 1.8.3.1
>
-- 
David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread
 
- * [PATCH v3 6/7] net/null: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53 ` [PATCH v3 " Tyler Retzlaff
                     ` (4 preceding siblings ...)
  2023-03-23 22:53   ` [PATCH v3 5/7] net/ixgbe: " Tyler Retzlaff
@ 2023-03-23 22:53   ` Tyler Retzlaff
  2023-05-24 20:13     ` David Marchand
  2023-03-23 22:53   ` [PATCH v3 7/7] net/ring: " Tyler Retzlaff
                     ` (2 subsequent siblings)
  8 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:53 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/net/null/rte_eth_null.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 47d9554..31081af 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -37,8 +37,8 @@ struct null_queue {
 	struct rte_mempool *mb_pool;
 	struct rte_mbuf *dummy_packet;
 
-	rte_atomic64_t rx_pkts;
-	rte_atomic64_t tx_pkts;
+	uint64_t rx_pkts;
+	uint64_t tx_pkts;
 };
 
 struct pmd_options {
@@ -101,7 +101,8 @@ struct pmd_internals {
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	rte_atomic64_add(&(h->rx_pkts), i);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -128,7 +129,8 @@ struct pmd_internals {
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	rte_atomic64_add(&(h->rx_pkts), i);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -152,7 +154,8 @@ struct pmd_internals {
 	for (i = 0; i < nb_bufs; i++)
 		rte_pktmbuf_free(bufs[i]);
 
-	rte_atomic64_add(&(h->tx_pkts), i);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -174,7 +177,8 @@ struct pmd_internals {
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	rte_atomic64_add(&(h->tx_pkts), i);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -316,8 +320,9 @@ struct pmd_internals {
 			RTE_MIN(dev->data->nb_rx_queues,
 				RTE_DIM(internal->rx_null_queues)));
 	for (i = 0; i < num_stats; i++) {
+		/* NOTE: review for atomic access */
 		igb_stats->q_ipackets[i] =
-			internal->rx_null_queues[i].rx_pkts.cnt;
+			internal->rx_null_queues[i].rx_pkts;
 		rx_total += igb_stats->q_ipackets[i];
 	}
 
@@ -325,8 +330,9 @@ struct pmd_internals {
 			RTE_MIN(dev->data->nb_tx_queues,
 				RTE_DIM(internal->tx_null_queues)));
 	for (i = 0; i < num_stats; i++) {
+		/* NOTE: review for atomic access */
 		igb_stats->q_opackets[i] =
-			internal->tx_null_queues[i].tx_pkts.cnt;
+			internal->tx_null_queues[i].tx_pkts;
 		tx_total += igb_stats->q_opackets[i];
 	}
 
@@ -347,9 +353,11 @@ struct pmd_internals {
 
 	internal = dev->data->dev_private;
 	for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
-		internal->rx_null_queues[i].rx_pkts.cnt = 0;
+		/* NOTE: review for atomic access */
+		internal->rx_null_queues[i].rx_pkts = 0;
 	for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
-		internal->tx_null_queues[i].tx_pkts.cnt = 0;
+		/* NOTE: review for atomic access */
+		internal->tx_null_queues[i].tx_pkts = 0;
 
 	return 0;
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH v3 6/7] net/null: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53   ` [PATCH v3 6/7] net/null: " Tyler Retzlaff
@ 2023-05-24 20:13     ` David Marchand
  0 siblings, 0 replies; 83+ messages in thread
From: David Marchand @ 2023-05-24 20:13 UTC (permalink / raw)
  To: Tetsuya Mukawa
  Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb,
	Tyler Retzlaff
Hello Tetsuya,
Review please.
On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  drivers/net/null/rte_eth_null.c | 28 ++++++++++++++++++----------
>  1 file changed, 18 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
> index 47d9554..31081af 100644
> --- a/drivers/net/null/rte_eth_null.c
> +++ b/drivers/net/null/rte_eth_null.c
> @@ -37,8 +37,8 @@ struct null_queue {
>         struct rte_mempool *mb_pool;
>         struct rte_mbuf *dummy_packet;
>
> -       rte_atomic64_t rx_pkts;
> -       rte_atomic64_t tx_pkts;
> +       uint64_t rx_pkts;
> +       uint64_t tx_pkts;
>  };
>
>  struct pmd_options {
> @@ -101,7 +101,8 @@ struct pmd_internals {
>                 bufs[i]->port = h->internals->port_id;
>         }
>
> -       rte_atomic64_add(&(h->rx_pkts), i);
> +       /* NOTE: review for potential ordering optimization */
> +       __atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
>
>         return i;
>  }
> @@ -128,7 +129,8 @@ struct pmd_internals {
>                 bufs[i]->port = h->internals->port_id;
>         }
>
> -       rte_atomic64_add(&(h->rx_pkts), i);
> +       /* NOTE: review for potential ordering optimization */
> +       __atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
>
>         return i;
>  }
> @@ -152,7 +154,8 @@ struct pmd_internals {
>         for (i = 0; i < nb_bufs; i++)
>                 rte_pktmbuf_free(bufs[i]);
>
> -       rte_atomic64_add(&(h->tx_pkts), i);
> +       /* NOTE: review for potential ordering optimization */
> +       __atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
>
>         return i;
>  }
> @@ -174,7 +177,8 @@ struct pmd_internals {
>                 rte_pktmbuf_free(bufs[i]);
>         }
>
> -       rte_atomic64_add(&(h->tx_pkts), i);
> +       /* NOTE: review for potential ordering optimization */
> +       __atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
>
>         return i;
>  }
> @@ -316,8 +320,9 @@ struct pmd_internals {
>                         RTE_MIN(dev->data->nb_rx_queues,
>                                 RTE_DIM(internal->rx_null_queues)));
>         for (i = 0; i < num_stats; i++) {
> +               /* NOTE: review for atomic access */
>                 igb_stats->q_ipackets[i] =
> -                       internal->rx_null_queues[i].rx_pkts.cnt;
> +                       internal->rx_null_queues[i].rx_pkts;
>                 rx_total += igb_stats->q_ipackets[i];
>         }
>
> @@ -325,8 +330,9 @@ struct pmd_internals {
>                         RTE_MIN(dev->data->nb_tx_queues,
>                                 RTE_DIM(internal->tx_null_queues)));
>         for (i = 0; i < num_stats; i++) {
> +               /* NOTE: review for atomic access */
>                 igb_stats->q_opackets[i] =
> -                       internal->tx_null_queues[i].tx_pkts.cnt;
> +                       internal->tx_null_queues[i].tx_pkts;
>                 tx_total += igb_stats->q_opackets[i];
>         }
>
> @@ -347,9 +353,11 @@ struct pmd_internals {
>
>         internal = dev->data->dev_private;
>         for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
> -               internal->rx_null_queues[i].rx_pkts.cnt = 0;
> +               /* NOTE: review for atomic access */
> +               internal->rx_null_queues[i].rx_pkts = 0;
>         for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
> -               internal->tx_null_queues[i].tx_pkts.cnt = 0;
> +               /* NOTE: review for atomic access */
> +               internal->tx_null_queues[i].tx_pkts = 0;
>
>         return 0;
>  }
> --
> 1.8.3.1
>
-- 
David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread
 
- * [PATCH v3 7/7] net/ring: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53 ` [PATCH v3 " Tyler Retzlaff
                     ` (5 preceding siblings ...)
  2023-03-23 22:53   ` [PATCH v3 6/7] net/null: " Tyler Retzlaff
@ 2023-03-23 22:53   ` Tyler Retzlaff
  2023-05-24 20:12     ` David Marchand
  2023-03-24  7:09   ` [PATCH v3 0/7] " Morten Brørup
  2023-05-24 12:40   ` David Marchand
  8 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-23 22:53 UTC (permalink / raw)
  To: dev
  Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/net/ring/rte_eth_ring.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index e8bc9b6..43eb627 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -44,8 +44,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	rte_atomic64_t rx_pkts;
-	rte_atomic64_t tx_pkts;
+	uint64_t rx_pkts;
+	uint64_t tx_pkts;
 };
 
 struct pmd_internals {
@@ -80,9 +80,10 @@ struct pmd_internals {
 	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
 			ptrs, nb_bufs, NULL);
 	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts.cnt += nb_rx;
+		r->rx_pkts += nb_rx;
 	else
-		rte_atomic64_add(&(r->rx_pkts), nb_rx);
+		/* NOTE: review for potential ordering optimization */
+		__atomic_fetch_add(&r->rx_pkts, nb_rx, __ATOMIC_SEQ_CST);
 	return nb_rx;
 }
 
@@ -94,9 +95,10 @@ struct pmd_internals {
 	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
 			ptrs, nb_bufs, NULL);
 	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts.cnt += nb_tx;
+		r->tx_pkts += nb_tx;
 	else
-		rte_atomic64_add(&(r->tx_pkts), nb_tx);
+		/* NOTE: review for potential ordering optimization */
+		__atomic_fetch_add(&r->tx_pkts, nb_tx, __ATOMIC_SEQ_CST);
 	return nb_tx;
 }
 
@@ -184,13 +186,15 @@ struct pmd_internals {
 
 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts.cnt;
+		/* NOTE: review for atomic access */
+		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
 		rx_total += stats->q_ipackets[i];
 	}
 
 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts.cnt;
+		/* NOTE: review for atomic access */
+		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
 		tx_total += stats->q_opackets[i];
 	}
 
@@ -207,9 +211,11 @@ struct pmd_internals {
 	struct pmd_internals *internal = dev->data->dev_private;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts.cnt = 0;
+		/* NOTE: review for atomic access */
+		internal->rx_ring_queues[i].rx_pkts = 0;
 	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts.cnt = 0;
+		/* NOTE: review for atomic access */
+		internal->tx_ring_queues[i].tx_pkts = 0;
 
 	return 0;
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH v3 7/7] net/ring: replace rte atomics with GCC builtin atomics
  2023-03-23 22:53   ` [PATCH v3 7/7] net/ring: " Tyler Retzlaff
@ 2023-05-24 20:12     ` David Marchand
  2023-05-25  8:44       ` Bruce Richardson
  0 siblings, 1 reply; 83+ messages in thread
From: David Marchand @ 2023-05-24 20:12 UTC (permalink / raw)
  To: Bruce Richardson
  Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb,
	Tyler Retzlaff
Hello Bruce,
For you again, review please.
On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  drivers/net/ring/rte_eth_ring.c | 26 ++++++++++++++++----------
>  1 file changed, 16 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
> index e8bc9b6..43eb627 100644
> --- a/drivers/net/ring/rte_eth_ring.c
> +++ b/drivers/net/ring/rte_eth_ring.c
> @@ -44,8 +44,8 @@ enum dev_action {
>
>  struct ring_queue {
>         struct rte_ring *rng;
> -       rte_atomic64_t rx_pkts;
> -       rte_atomic64_t tx_pkts;
> +       uint64_t rx_pkts;
> +       uint64_t tx_pkts;
>  };
>
>  struct pmd_internals {
> @@ -80,9 +80,10 @@ struct pmd_internals {
>         const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
>                         ptrs, nb_bufs, NULL);
>         if (r->rng->flags & RING_F_SC_DEQ)
> -               r->rx_pkts.cnt += nb_rx;
> +               r->rx_pkts += nb_rx;
>         else
> -               rte_atomic64_add(&(r->rx_pkts), nb_rx);
> +               /* NOTE: review for potential ordering optimization */
> +               __atomic_fetch_add(&r->rx_pkts, nb_rx, __ATOMIC_SEQ_CST);
>         return nb_rx;
>  }
>
> @@ -94,9 +95,10 @@ struct pmd_internals {
>         const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
>                         ptrs, nb_bufs, NULL);
>         if (r->rng->flags & RING_F_SP_ENQ)
> -               r->tx_pkts.cnt += nb_tx;
> +               r->tx_pkts += nb_tx;
>         else
> -               rte_atomic64_add(&(r->tx_pkts), nb_tx);
> +               /* NOTE: review for potential ordering optimization */
> +               __atomic_fetch_add(&r->tx_pkts, nb_tx, __ATOMIC_SEQ_CST);
>         return nb_tx;
>  }
>
> @@ -184,13 +186,15 @@ struct pmd_internals {
>
>         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
>                         i < dev->data->nb_rx_queues; i++) {
> -               stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts.cnt;
> +               /* NOTE: review for atomic access */
> +               stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
>                 rx_total += stats->q_ipackets[i];
>         }
>
>         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
>                         i < dev->data->nb_tx_queues; i++) {
> -               stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts.cnt;
> +               /* NOTE: review for atomic access */
> +               stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
>                 tx_total += stats->q_opackets[i];
>         }
>
> @@ -207,9 +211,11 @@ struct pmd_internals {
>         struct pmd_internals *internal = dev->data->dev_private;
>
>         for (i = 0; i < dev->data->nb_rx_queues; i++)
> -               internal->rx_ring_queues[i].rx_pkts.cnt = 0;
> +               /* NOTE: review for atomic access */
> +               internal->rx_ring_queues[i].rx_pkts = 0;
>         for (i = 0; i < dev->data->nb_tx_queues; i++)
> -               internal->tx_ring_queues[i].tx_pkts.cnt = 0;
> +               /* NOTE: review for atomic access */
> +               internal->tx_ring_queues[i].tx_pkts = 0;
>
>         return 0;
>  }
> --
> 1.8.3.1
>
-- 
David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH v3 7/7] net/ring: replace rte atomics with GCC builtin atomics
  2023-05-24 20:12     ` David Marchand
@ 2023-05-25  8:44       ` Bruce Richardson
  0 siblings, 0 replies; 83+ messages in thread
From: Bruce Richardson @ 2023-05-25  8:44 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb,
	Tyler Retzlaff
On Wed, May 24, 2023 at 10:12:44PM +0200, David Marchand wrote:
> Hello Bruce,
> 
> For you again, review please.
> 
> 
> On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> >
> > Replace the use of rte_atomic.h types and functions, instead use GCC
> > supplied C++11 memory model builtins.
> >
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > ---
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
^ permalink raw reply	[flat|nested] 83+ messages in thread 
 
 
- * RE: [PATCH v3 0/7] replace rte atomics with GCC builtin atomics
  2023-03-23 22:53 ` [PATCH v3 " Tyler Retzlaff
                     ` (6 preceding siblings ...)
  2023-03-23 22:53   ` [PATCH v3 7/7] net/ring: " Tyler Retzlaff
@ 2023-03-24  7:09   ` Morten Brørup
  2023-03-24 19:22     ` Tyler Retzlaff
  2023-05-24 12:40   ` David Marchand
  8 siblings, 1 reply; 83+ messages in thread
From: Morten Brørup @ 2023-03-24  7:09 UTC (permalink / raw)
  To: Tyler Retzlaff, dev; +Cc: Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen
> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Thursday, 23 March 2023 23.54
> 
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
> 
> This series covers the libraries and drivers that are built on Windows.
> 
> The code has be converted to use the __atomic builtins but there are
> additional during conversion i notice that there may be some issues
> that need to be addressed.
> 
> I'll comment in the patches where my concerns are so the maintainers
> may comment.
> 
> v3:
>   * style, don't use c99 comments
> 
> v2:
>   * comment code where optimizations may be possible now that memory
>     order can be specified.
>   * comment code where operations should potentially be atomic so that
>     maintainers can review.
>   * change a couple of variables labeled as counters to be unsigned.
> 
I didn't see the v3 when ack'ing the v2, so in case v2 is quickly skipped by maintainers...
Series-acked-by: Morten Brørup <mb@smartsharesystems.com>
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH v3 0/7] replace rte atomics with GCC builtin atomics
  2023-03-24  7:09   ` [PATCH v3 0/7] " Morten Brørup
@ 2023-03-24 19:22     ` Tyler Retzlaff
  0 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-03-24 19:22 UTC (permalink / raw)
  To: Morten Brørup
  Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen
On Fri, Mar 24, 2023 at 08:09:50AM +0100, Morten Brørup wrote:
> > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > Sent: Thursday, 23 March 2023 23.54
> > 
> > Replace the use of rte_atomic.h types and functions, instead use GCC
> > supplied C++11 memory model builtins.
> > 
> > This series covers the libraries and drivers that are built on Windows.
> > 
> > The code has be converted to use the __atomic builtins but there are
> > additional during conversion i notice that there may be some issues
> > that need to be addressed.
> > 
> > I'll comment in the patches where my concerns are so the maintainers
> > may comment.
> > 
> > v3:
> >   * style, don't use c99 comments
> > 
> > v2:
> >   * comment code where optimizations may be possible now that memory
> >     order can be specified.
> >   * comment code where operations should potentially be atomic so that
> >     maintainers can review.
> >   * change a couple of variables labeled as counters to be unsigned.
> > 
> 
> I didn't see the v3 when ack'ing the v2, so in case v2 is quickly skipped by maintainers...
yeah, my fault. i hammed up the comment style used and needed to quickly
submit v3 to satisfy checkpatches.
thanks!
> 
> Series-acked-by: Morten Brørup <mb@smartsharesystems.com>
^ permalink raw reply	[flat|nested] 83+ messages in thread 
 
- * Re: [PATCH v3 0/7] replace rte atomics with GCC builtin atomics
  2023-03-23 22:53 ` [PATCH v3 " Tyler Retzlaff
                     ` (7 preceding siblings ...)
  2023-03-24  7:09   ` [PATCH v3 0/7] " Morten Brørup
@ 2023-05-24 12:40   ` David Marchand
  2023-05-24 15:47     ` Tyler Retzlaff
  8 siblings, 1 reply; 83+ messages in thread
From: David Marchand @ 2023-05-24 12:40 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb,
	Ferruh Yigit
Hello Tyler,
On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
>
> This series covers the libraries and drivers that are built on Windows.
>
> The code has be converted to use the __atomic builtins but there are
> additional during conversion i notice that there may be some issues
> that need to be addressed.
>
> I'll comment in the patches where my concerns are so the maintainers
> may comment.
>
> v3:
>   * style, don't use c99 comments
>
> v2:
>   * comment code where optimizations may be possible now that memory
>     order can be specified.
>   * comment code where operations should potentially be atomic so that
>     maintainers can review.
>   * change a couple of variables labeled as counters to be unsigned.
>
> Tyler Retzlaff (7):
>   ring: replace rte atomics with GCC builtin atomics
>   stack: replace rte atomics with GCC builtin atomics
>   dma/idxd: replace rte atomics with GCC builtin atomics
>   net/ice: replace rte atomics with GCC builtin atomics
>   net/ixgbe: replace rte atomics with GCC builtin atomics
>   net/null: replace rte atomics with GCC builtin atomics
>   net/ring: replace rte atomics with GCC builtin atomics
>
>  drivers/dma/idxd/idxd_internal.h |  3 +--
>  drivers/dma/idxd/idxd_pci.c      |  8 +++++---
>  drivers/net/ice/ice_dcf.c        |  1 -
>  drivers/net/ice/ice_dcf_ethdev.c |  1 -
>  drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
>  drivers/net/ixgbe/ixgbe_bypass.c |  1 -
>  drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
>  drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
>  drivers/net/ixgbe/ixgbe_flow.c   |  1 -
>  drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
>  drivers/net/null/rte_eth_null.c  | 28 ++++++++++++++++++----------
>  drivers/net/ring/rte_eth_ring.c  | 26 ++++++++++++++++----------
>  lib/ring/rte_ring_core.h         |  1 -
>  lib/ring/rte_ring_generic_pvt.h  | 12 ++++++++----
>  lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
>  15 files changed, 79 insertions(+), 53 deletions(-)
>
There is still some code using the DPDK "legacy" atomic API, but I
guess this will be converted later.
As you proposed, I dropped patch 1 on the ring library (waiting for
ARM to provide an alternative) and applied this series, thanks.
Note: Thomas, Ferruh, we will have to be careful when merging subtrees
to make sure we are not reintroducing those again (like for example
net/ice).
-- 
David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH v3 0/7] replace rte atomics with GCC builtin atomics
  2023-05-24 12:40   ` David Marchand
@ 2023-05-24 15:47     ` Tyler Retzlaff
  2023-05-24 20:06       ` David Marchand
  0 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-05-24 15:47 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb,
	Ferruh Yigit
On Wed, May 24, 2023 at 02:40:43PM +0200, David Marchand wrote:
> Hello Tyler,
> 
> On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> >
> > Replace the use of rte_atomic.h types and functions, instead use GCC
> > supplied C++11 memory model builtins.
> >
> > This series covers the libraries and drivers that are built on Windows.
> >
> > The code has be converted to use the __atomic builtins but there are
> > additional during conversion i notice that there may be some issues
> > that need to be addressed.
> >
> > I'll comment in the patches where my concerns are so the maintainers
> > may comment.
> >
> > v3:
> >   * style, don't use c99 comments
> >
> > v2:
> >   * comment code where optimizations may be possible now that memory
> >     order can be specified.
> >   * comment code where operations should potentially be atomic so that
> >     maintainers can review.
> >   * change a couple of variables labeled as counters to be unsigned.
> >
> > Tyler Retzlaff (7):
> >   ring: replace rte atomics with GCC builtin atomics
> >   stack: replace rte atomics with GCC builtin atomics
> >   dma/idxd: replace rte atomics with GCC builtin atomics
> >   net/ice: replace rte atomics with GCC builtin atomics
> >   net/ixgbe: replace rte atomics with GCC builtin atomics
> >   net/null: replace rte atomics with GCC builtin atomics
> >   net/ring: replace rte atomics with GCC builtin atomics
> >
> >  drivers/dma/idxd/idxd_internal.h |  3 +--
> >  drivers/dma/idxd/idxd_pci.c      |  8 +++++---
> >  drivers/net/ice/ice_dcf.c        |  1 -
> >  drivers/net/ice/ice_dcf_ethdev.c |  1 -
> >  drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
> >  drivers/net/ixgbe/ixgbe_bypass.c |  1 -
> >  drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
> >  drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
> >  drivers/net/ixgbe/ixgbe_flow.c   |  1 -
> >  drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
> >  drivers/net/null/rte_eth_null.c  | 28 ++++++++++++++++++----------
> >  drivers/net/ring/rte_eth_ring.c  | 26 ++++++++++++++++----------
> >  lib/ring/rte_ring_core.h         |  1 -
> >  lib/ring/rte_ring_generic_pvt.h  | 12 ++++++++----
> >  lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
> >  15 files changed, 79 insertions(+), 53 deletions(-)
> >
> 
> There is still some code using the DPDK "legacy" atomic API, but I
> guess this will be converted later.
Yes, it will be converted later.
If I did it correctly... the series was an attempt to move away
from the legacy API where there was a dependency on EAL that would
change when moving to stdatomic. I'm hoping that the remaining use of
the legacy API are not sensitive to the theoretical ABI surface
changing when that move is complete.
> As you proposed, I dropped patch 1 on the ring library (waiting for
> ARM to provide an alternative) and applied this series, thanks.
> 
> Note: Thomas, Ferruh, we will have to be careful when merging subtrees
> to make sure we are not reintroducing those again (like for example
> net/ice).
> 
> -- 
> David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * Re: [PATCH v3 0/7] replace rte atomics with GCC builtin atomics
  2023-05-24 15:47     ` Tyler Retzlaff
@ 2023-05-24 20:06       ` David Marchand
  2023-05-24 22:50         ` Tyler Retzlaff
  0 siblings, 1 reply; 83+ messages in thread
From: David Marchand @ 2023-05-24 20:06 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb,
	Ferruh Yigit
On Wed, May 24, 2023 at 5:47 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
> On Wed, May 24, 2023 at 02:40:43PM +0200, David Marchand wrote:
> > Hello Tyler,
> >
> > On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
> > <roretzla@linux.microsoft.com> wrote:
> > >
> > > Replace the use of rte_atomic.h types and functions, instead use GCC
> > > supplied C++11 memory model builtins.
> > >
> > > This series covers the libraries and drivers that are built on Windows.
> > >
> > > The code has be converted to use the __atomic builtins but there are
> > > additional during conversion i notice that there may be some issues
> > > that need to be addressed.
> > >
> > > I'll comment in the patches where my concerns are so the maintainers
> > > may comment.
> > >
> > > v3:
> > >   * style, don't use c99 comments
> > >
> > > v2:
> > >   * comment code where optimizations may be possible now that memory
> > >     order can be specified.
> > >   * comment code where operations should potentially be atomic so that
> > >     maintainers can review.
> > >   * change a couple of variables labeled as counters to be unsigned.
> > >
> > > Tyler Retzlaff (7):
> > >   ring: replace rte atomics with GCC builtin atomics
> > >   stack: replace rte atomics with GCC builtin atomics
> > >   dma/idxd: replace rte atomics with GCC builtin atomics
> > >   net/ice: replace rte atomics with GCC builtin atomics
> > >   net/ixgbe: replace rte atomics with GCC builtin atomics
> > >   net/null: replace rte atomics with GCC builtin atomics
> > >   net/ring: replace rte atomics with GCC builtin atomics
> > >
> > >  drivers/dma/idxd/idxd_internal.h |  3 +--
> > >  drivers/dma/idxd/idxd_pci.c      |  8 +++++---
> > >  drivers/net/ice/ice_dcf.c        |  1 -
> > >  drivers/net/ice/ice_dcf_ethdev.c |  1 -
> > >  drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
> > >  drivers/net/ixgbe/ixgbe_bypass.c |  1 -
> > >  drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
> > >  drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
> > >  drivers/net/ixgbe/ixgbe_flow.c   |  1 -
> > >  drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
> > >  drivers/net/null/rte_eth_null.c  | 28 ++++++++++++++++++----------
> > >  drivers/net/ring/rte_eth_ring.c  | 26 ++++++++++++++++----------
> > >  lib/ring/rte_ring_core.h         |  1 -
> > >  lib/ring/rte_ring_generic_pvt.h  | 12 ++++++++----
> > >  lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
> > >  15 files changed, 79 insertions(+), 53 deletions(-)
> > >
> >
> > There is still some code using the DPDK "legacy" atomic API, but I
> > guess this will be converted later.
>
> Yes, it will be converted later.
>
> If I did it correctly... the series was an attempt to move away
> from the legacy API where there was a dependency on EAL that would
> change when moving to stdatomic. I'm hoping that the remaining use of
> the legacy API are not sensitive to the theoretical ABI surface
> changing when that move is complete.
Ok.
> > As you proposed, I dropped patch 1 on the ring library (waiting for
> > ARM to provide an alternative) and applied this series, thanks.
> >
> > Note: Thomas, Ferruh, we will have to be careful when merging subtrees
> > to make sure we are not reintroducing those again (like for example
> > net/ice).
Well, I have some second thought about this series so I did not push
it to dpdk.org yet.
Drivers maintainers were not copied so I would like another pair of
eyes on the series: ideally no /* Note: */ should be left when merging
those patches.
I'll reply individually on the patches.
-- 
David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * Re: [PATCH v3 0/7] replace rte atomics with GCC builtin atomics
  2023-05-24 20:06       ` David Marchand
@ 2023-05-24 22:50         ` Tyler Retzlaff
  2023-05-24 22:56           ` Honnappa Nagarahalli
  0 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-05-24 22:50 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, Honnappa.Nagarahalli, Ruifeng.Wang, thomas, stephen, mb,
	Ferruh Yigit
On Wed, May 24, 2023 at 10:06:24PM +0200, David Marchand wrote:
> On Wed, May 24, 2023 at 5:47 PM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> > On Wed, May 24, 2023 at 02:40:43PM +0200, David Marchand wrote:
> > > Hello Tyler,
> > >
> > > On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
> > > <roretzla@linux.microsoft.com> wrote:
> > > >
> > > > Replace the use of rte_atomic.h types and functions, instead use GCC
> > > > supplied C++11 memory model builtins.
> > > >
> > > > This series covers the libraries and drivers that are built on Windows.
> > > >
> > > > The code has be converted to use the __atomic builtins but there are
> > > > additional during conversion i notice that there may be some issues
> > > > that need to be addressed.
> > > >
> > > > I'll comment in the patches where my concerns are so the maintainers
> > > > may comment.
> > > >
> > > > v3:
> > > >   * style, don't use c99 comments
> > > >
> > > > v2:
> > > >   * comment code where optimizations may be possible now that memory
> > > >     order can be specified.
> > > >   * comment code where operations should potentially be atomic so that
> > > >     maintainers can review.
> > > >   * change a couple of variables labeled as counters to be unsigned.
> > > >
> > > > Tyler Retzlaff (7):
> > > >   ring: replace rte atomics with GCC builtin atomics
> > > >   stack: replace rte atomics with GCC builtin atomics
> > > >   dma/idxd: replace rte atomics with GCC builtin atomics
> > > >   net/ice: replace rte atomics with GCC builtin atomics
> > > >   net/ixgbe: replace rte atomics with GCC builtin atomics
> > > >   net/null: replace rte atomics with GCC builtin atomics
> > > >   net/ring: replace rte atomics with GCC builtin atomics
> > > >
> > > >  drivers/dma/idxd/idxd_internal.h |  3 +--
> > > >  drivers/dma/idxd/idxd_pci.c      |  8 +++++---
> > > >  drivers/net/ice/ice_dcf.c        |  1 -
> > > >  drivers/net/ice/ice_dcf_ethdev.c |  1 -
> > > >  drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
> > > >  drivers/net/ixgbe/ixgbe_bypass.c |  1 -
> > > >  drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
> > > >  drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
> > > >  drivers/net/ixgbe/ixgbe_flow.c   |  1 -
> > > >  drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
> > > >  drivers/net/null/rte_eth_null.c  | 28 ++++++++++++++++++----------
> > > >  drivers/net/ring/rte_eth_ring.c  | 26 ++++++++++++++++----------
> > > >  lib/ring/rte_ring_core.h         |  1 -
> > > >  lib/ring/rte_ring_generic_pvt.h  | 12 ++++++++----
> > > >  lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
> > > >  15 files changed, 79 insertions(+), 53 deletions(-)
> > > >
> > >
> > > There is still some code using the DPDK "legacy" atomic API, but I
> > > guess this will be converted later.
> >
> > Yes, it will be converted later.
> >
> > If I did it correctly... the series was an attempt to move away
> > from the legacy API where there was a dependency on EAL that would
> > change when moving to stdatomic. I'm hoping that the remaining use of
> > the legacy API are not sensitive to the theoretical ABI surface
> > changing when that move is complete.
> 
> Ok.
> 
> 
> > > As you proposed, I dropped patch 1 on the ring library (waiting for
> > > ARM to provide an alternative) and applied this series, thanks.
> > >
> > > Note: Thomas, Ferruh, we will have to be careful when merging subtrees
> > > to make sure we are not reintroducing those again (like for example
> > > net/ice).
> 
> Well, I have some second thought about this series so I did not push
> it to dpdk.org yet.
Understood. It's very important to have these reviewed well so no
objection just hope we can get them reviewed properly soon.
> Drivers maintainers were not copied so I would like another pair of
> eyes on the series: ideally no /* Note: */ should be left when merging
> those patches.
The /* Note: */ was explicitly requested by other reviewers as they were
concerned we would lose track of opportunities to weaken ordering after
switching from __sync to __atomic.
Is your request that the comments now be removed?
Thanks!
> I'll reply individually on the patches.
> 
> 
> -- 
> David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * RE: [PATCH v3 0/7] replace rte atomics with GCC builtin atomics
  2023-05-24 22:50         ` Tyler Retzlaff
@ 2023-05-24 22:56           ` Honnappa Nagarahalli
  2023-05-25  0:02             ` Tyler Retzlaff
  0 siblings, 1 reply; 83+ messages in thread
From: Honnappa Nagarahalli @ 2023-05-24 22:56 UTC (permalink / raw)
  To: Tyler Retzlaff, David Marchand
  Cc: dev, Ruifeng Wang, thomas, stephen, mb, Ferruh Yigit, nd, nd
> -----Original Message-----
> From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Sent: Wednesday, May 24, 2023 5:51 PM
> To: David Marchand <david.marchand@redhat.com>
> Cc: dev@dpdk.org; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
> Ruifeng Wang <Ruifeng.Wang@arm.com>; thomas@monjalon.net;
> stephen@networkplumber.org; mb@smartsharesystems.com; Ferruh Yigit
> <ferruh.yigit@amd.com>
> Subject: Re: [PATCH v3 0/7] replace rte atomics with GCC builtin atomics
> 
> On Wed, May 24, 2023 at 10:06:24PM +0200, David Marchand wrote:
> > On Wed, May 24, 2023 at 5:47 PM Tyler Retzlaff
> > <roretzla@linux.microsoft.com> wrote:
> > > On Wed, May 24, 2023 at 02:40:43PM +0200, David Marchand wrote:
> > > > Hello Tyler,
> > > >
> > > > On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
> > > > <roretzla@linux.microsoft.com> wrote:
> > > > >
> > > > > Replace the use of rte_atomic.h types and functions, instead use
> > > > > GCC supplied C++11 memory model builtins.
> > > > >
> > > > > This series covers the libraries and drivers that are built on Windows.
> > > > >
> > > > > The code has be converted to use the __atomic builtins but there
> > > > > are additional during conversion i notice that there may be some
> > > > > issues that need to be addressed.
> > > > >
> > > > > I'll comment in the patches where my concerns are so the
> > > > > maintainers may comment.
> > > > >
> > > > > v3:
> > > > >   * style, don't use c99 comments
> > > > >
> > > > > v2:
> > > > >   * comment code where optimizations may be possible now that
> memory
> > > > >     order can be specified.
> > > > >   * comment code where operations should potentially be atomic so that
> > > > >     maintainers can review.
> > > > >   * change a couple of variables labeled as counters to be unsigned.
> > > > >
> > > > > Tyler Retzlaff (7):
> > > > >   ring: replace rte atomics with GCC builtin atomics
> > > > >   stack: replace rte atomics with GCC builtin atomics
> > > > >   dma/idxd: replace rte atomics with GCC builtin atomics
> > > > >   net/ice: replace rte atomics with GCC builtin atomics
> > > > >   net/ixgbe: replace rte atomics with GCC builtin atomics
> > > > >   net/null: replace rte atomics with GCC builtin atomics
> > > > >   net/ring: replace rte atomics with GCC builtin atomics
> > > > >
> > > > >  drivers/dma/idxd/idxd_internal.h |  3 +--
> > > > >  drivers/dma/idxd/idxd_pci.c      |  8 +++++---
> > > > >  drivers/net/ice/ice_dcf.c        |  1 -
> > > > >  drivers/net/ice/ice_dcf_ethdev.c |  1 -
> > > > >  drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
> > > > >  drivers/net/ixgbe/ixgbe_bypass.c |  1 -
> > > > > drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
> > > > > drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
> > > > >  drivers/net/ixgbe/ixgbe_flow.c   |  1 -
> > > > >  drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
> > > > >  drivers/net/null/rte_eth_null.c  | 28
> > > > > ++++++++++++++++++----------  drivers/net/ring/rte_eth_ring.c  | 26
> ++++++++++++++++----------
> > > > >  lib/ring/rte_ring_core.h         |  1 -
> > > > >  lib/ring/rte_ring_generic_pvt.h  | 12 ++++++++----
> > > > > lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
> > > > >  15 files changed, 79 insertions(+), 53 deletions(-)
> > > > >
> > > >
> > > > There is still some code using the DPDK "legacy" atomic API, but I
> > > > guess this will be converted later.
> > >
> > > Yes, it will be converted later.
> > >
> > > If I did it correctly... the series was an attempt to move away from
> > > the legacy API where there was a dependency on EAL that would change
> > > when moving to stdatomic. I'm hoping that the remaining use of the
> > > legacy API are not sensitive to the theoretical ABI surface changing
> > > when that move is complete.
> >
> > Ok.
> >
> >
> > > > As you proposed, I dropped patch 1 on the ring library (waiting
> > > > for ARM to provide an alternative) and applied this series, thanks.
> > > >
> > > > Note: Thomas, Ferruh, we will have to be careful when merging
> > > > subtrees to make sure we are not reintroducing those again (like
> > > > for example net/ice).
> >
> > Well, I have some second thought about this series so I did not push
> > it to dpdk.org yet.
> 
> Understood. It's very important to have these reviewed well so no objection just
> hope we can get them reviewed properly soon.
> 
> > Drivers maintainers were not copied so I would like another pair of
> > eyes on the series: ideally no /* Note: */ should be left when merging
> > those patches.
> 
> The /* Note: */ was explicitly requested by other reviewers as they were
> concerned we would lose track of opportunities to weaken ordering after
> switching from __sync to __atomic.
Note that some of the changes that I checked are in control plane. While it is good to optimize those, but the benefits might not be much. The presence of SEQ_CST also can act as a note.
> 
> Is your request that the comments now be removed?
> 
> Thanks!
> 
> > I'll reply individually on the patches.
> >
> >
> > --
> > David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * Re: [PATCH v3 0/7] replace rte atomics with GCC builtin atomics
  2023-05-24 22:56           ` Honnappa Nagarahalli
@ 2023-05-25  0:02             ` Tyler Retzlaff
  2023-05-25  7:50               ` Morten Brørup
  0 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-05-25  0:02 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: David Marchand, dev, Ruifeng Wang, thomas, stephen, mb, Ferruh Yigit, nd
Morten,
David and Honnappa are discussing the /* NOTE: */ comments that were
added. If the three of you could come to conclusion about keeping or
removing them it would be appreciated.
Thanks!
On Wed, May 24, 2023 at 10:56:01PM +0000, Honnappa Nagarahalli wrote:
> 
> 
> > -----Original Message-----
> > From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > Sent: Wednesday, May 24, 2023 5:51 PM
> > To: David Marchand <david.marchand@redhat.com>
> > Cc: dev@dpdk.org; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
> > Ruifeng Wang <Ruifeng.Wang@arm.com>; thomas@monjalon.net;
> > stephen@networkplumber.org; mb@smartsharesystems.com; Ferruh Yigit
> > <ferruh.yigit@amd.com>
> > Subject: Re: [PATCH v3 0/7] replace rte atomics with GCC builtin atomics
> > 
> > On Wed, May 24, 2023 at 10:06:24PM +0200, David Marchand wrote:
> > > On Wed, May 24, 2023 at 5:47 PM Tyler Retzlaff
> > > <roretzla@linux.microsoft.com> wrote:
> > > > On Wed, May 24, 2023 at 02:40:43PM +0200, David Marchand wrote:
> > > > > Hello Tyler,
> > > > >
> > > > > On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
> > > > > <roretzla@linux.microsoft.com> wrote:
> > > > > >
> > > > > > Replace the use of rte_atomic.h types and functions, instead use
> > > > > > GCC supplied C++11 memory model builtins.
> > > > > >
> > > > > > This series covers the libraries and drivers that are built on Windows.
> > > > > >
> > > > > > The code has be converted to use the __atomic builtins but there
> > > > > > are additional during conversion i notice that there may be some
> > > > > > issues that need to be addressed.
> > > > > >
> > > > > > I'll comment in the patches where my concerns are so the
> > > > > > maintainers may comment.
> > > > > >
> > > > > > v3:
> > > > > >   * style, don't use c99 comments
> > > > > >
> > > > > > v2:
> > > > > >   * comment code where optimizations may be possible now that
> > memory
> > > > > >     order can be specified.
> > > > > >   * comment code where operations should potentially be atomic so that
> > > > > >     maintainers can review.
> > > > > >   * change a couple of variables labeled as counters to be unsigned.
> > > > > >
> > > > > > Tyler Retzlaff (7):
> > > > > >   ring: replace rte atomics with GCC builtin atomics
> > > > > >   stack: replace rte atomics with GCC builtin atomics
> > > > > >   dma/idxd: replace rte atomics with GCC builtin atomics
> > > > > >   net/ice: replace rte atomics with GCC builtin atomics
> > > > > >   net/ixgbe: replace rte atomics with GCC builtin atomics
> > > > > >   net/null: replace rte atomics with GCC builtin atomics
> > > > > >   net/ring: replace rte atomics with GCC builtin atomics
> > > > > >
> > > > > >  drivers/dma/idxd/idxd_internal.h |  3 +--
> > > > > >  drivers/dma/idxd/idxd_pci.c      |  8 +++++---
> > > > > >  drivers/net/ice/ice_dcf.c        |  1 -
> > > > > >  drivers/net/ice/ice_dcf_ethdev.c |  1 -
> > > > > >  drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
> > > > > >  drivers/net/ixgbe/ixgbe_bypass.c |  1 -
> > > > > > drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
> > > > > > drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
> > > > > >  drivers/net/ixgbe/ixgbe_flow.c   |  1 -
> > > > > >  drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
> > > > > >  drivers/net/null/rte_eth_null.c  | 28
> > > > > > ++++++++++++++++++----------  drivers/net/ring/rte_eth_ring.c  | 26
> > ++++++++++++++++----------
> > > > > >  lib/ring/rte_ring_core.h         |  1 -
> > > > > >  lib/ring/rte_ring_generic_pvt.h  | 12 ++++++++----
> > > > > > lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
> > > > > >  15 files changed, 79 insertions(+), 53 deletions(-)
> > > > > >
> > > > >
> > > > > There is still some code using the DPDK "legacy" atomic API, but I
> > > > > guess this will be converted later.
> > > >
> > > > Yes, it will be converted later.
> > > >
> > > > If I did it correctly... the series was an attempt to move away from
> > > > the legacy API where there was a dependency on EAL that would change
> > > > when moving to stdatomic. I'm hoping that the remaining use of the
> > > > legacy API are not sensitive to the theoretical ABI surface changing
> > > > when that move is complete.
> > >
> > > Ok.
> > >
> > >
> > > > > As you proposed, I dropped patch 1 on the ring library (waiting
> > > > > for ARM to provide an alternative) and applied this series, thanks.
> > > > >
> > > > > Note: Thomas, Ferruh, we will have to be careful when merging
> > > > > subtrees to make sure we are not reintroducing those again (like
> > > > > for example net/ice).
> > >
> > > Well, I have some second thought about this series so I did not push
> > > it to dpdk.org yet.
> > 
> > Understood. It's very important to have these reviewed well so no objection just
> > hope we can get them reviewed properly soon.
> > 
> > > Drivers maintainers were not copied so I would like another pair of
> > > eyes on the series: ideally no /* Note: */ should be left when merging
> > > those patches.
> > 
> > The /* Note: */ was explicitly requested by other reviewers as they were
> > concerned we would lose track of opportunities to weaken ordering after
> > switching from __sync to __atomic.
> Note that some of the changes that I checked are in control plane. While it is good to optimize those, but the benefits might not be much. The presence of SEQ_CST also can act as a note.
> 
> > 
> > Is your request that the comments now be removed?
> > 
> > Thanks!
> > 
> > > I'll reply individually on the patches.
> > >
> > >
> > > --
> > > David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * RE: [PATCH v3 0/7] replace rte atomics with GCC builtin atomics
  2023-05-25  0:02             ` Tyler Retzlaff
@ 2023-05-25  7:50               ` Morten Brørup
  0 siblings, 0 replies; 83+ messages in thread
From: Morten Brørup @ 2023-05-25  7:50 UTC (permalink / raw)
  To: Tyler Retzlaff, Honnappa Nagarahalli, David Marchand
  Cc: dev, Ruifeng Wang, thomas, stephen, Ferruh Yigit, nd
> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Thursday, 25 May 2023 02.03
> 
> Morten,
> 
> David and Honnappa are discussing the /* NOTE: */ comments that were
> added. If the three of you could come to conclusion about keeping or
> removing them it would be appreciated.
> 
> Thanks!
> 
> On Wed, May 24, 2023 at 10:56:01PM +0000, Honnappa Nagarahalli wrote:
> >
> > > From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > > Sent: Wednesday, May 24, 2023 5:51 PM
> > >
> > > On Wed, May 24, 2023 at 10:06:24PM +0200, David Marchand wrote:
> > > > On Wed, May 24, 2023 at 5:47 PM Tyler Retzlaff
> > > > <roretzla@linux.microsoft.com> wrote:
> > > > > On Wed, May 24, 2023 at 02:40:43PM +0200, David Marchand wrote:
> > > > > > Hello Tyler,
> > > > > >
> > > > > > On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
> > > > > > <roretzla@linux.microsoft.com> wrote:
> > > > > > >
> > > > > > > Replace the use of rte_atomic.h types and functions, instead use
> > > > > > > GCC supplied C++11 memory model builtins.
> > > > > > >
> > > > > > > This series covers the libraries and drivers that are built on
> Windows.
> > > > > > >
> > > > > > > The code has be converted to use the __atomic builtins but there
> > > > > > > are additional during conversion i notice that there may be some
> > > > > > > issues that need to be addressed.
[...]
> > > > Well, I have some second thought about this series so I did not push
> > > > it to dpdk.org yet.
> > >
> > > Understood. It's very important to have these reviewed well so no
> objection just
> > > hope we can get them reviewed properly soon.
> > >
> > > > Drivers maintainers were not copied so I would like another pair of
> > > > eyes on the series: ideally no /* Note: */ should be left when merging
> > > > those patches.
> > >
> > > The /* Note: */ was explicitly requested by other reviewers as they were
> > > concerned we would lose track of opportunities to weaken ordering after
> > > switching from __sync to __atomic.
This patch series is an important step towards the more flexible C11 atomics, and I consider further optimization "nice to have", not "must have".
So I don't think we should hold back these patches and require of the maintainers to optimize the atomic accesses before. I would rather leave the notes in the code, so they can be optimized by anyone with the required skills and/or testing facilities at a later time.
I agree that it would be ideal if anyone (e.g. the maintainers) can make optimize the affected libraries/drivers in time for the coming release, but they can be separate patches after this series.
> > Note that some of the changes that I checked are in control plane. While it
> is good to optimize those, but the benefits might not be much. The presence of
> SEQ_CST also can act as a note.
I vote against using SEQ_CST as a note. SEQ_CST might be the correct memory order in some locations, so it would require a note in those locations that SEQ_CST has been reviewed and is the optimal memory order there. I would rather have notes where we know that further consideration for optimization is warranted.
If atomics are used in the control plane, the memory order still need to be correct (i.e. not causing failure, which SEQ_CST should assure). So the note should remain, if not reviewed for optimization. A reviewer can add to the note that this is control plane only, so optimization is not important. Alternatively, if those control plane variables don't need to be atomics, they can be replaced by non-atomic types and accesses - such a modification can also be considered an optimization.
PS: If someone spotted an opportunity for optimization anywhere in DPDK, but was unable to implement and/or test it himself, adding a note about it in the source code could be an alternative. On the other hand, such ideas might belong in Bugzilla instead... (Just arguing for keeping the notes. Not trying to broaden the discussion!)
^ permalink raw reply	[flat|nested] 83+ messages in thread 
 
 
 
 
 
 
 
- * [PATCH v4 0/6] replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 [PATCH 0/7] replace rte atomics with GCC builtin atomics Tyler Retzlaff
                   ` (9 preceding siblings ...)
  2023-03-23 22:53 ` [PATCH v3 " Tyler Retzlaff
@ 2023-06-02 19:45 ` Tyler Retzlaff
  2023-06-02 19:45   ` [PATCH v4 1/6] stack: " Tyler Retzlaff
                     ` (5 more replies)
  2023-06-06 21:45 ` [PATCH v5 0/6] " Tyler Retzlaff
  11 siblings, 6 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-02 19:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
This series covers the libraries and drivers that are built on Windows.
The code has be converted to use the __atomic builtins but there are
additional during conversion I notice that there may be some issues
that need to be addressed.
I'll comment in the patches where my concerns are so the maintainers
may comment.
v4:
  * drop patch for lib/ring it will be provided by ARM / Honnappa
  * rebase for changes in dma/idxd merge
  * adapt __atomic_fetch_sub(...) - 1 == 0 to be (__atomic_fetch_sub(...) == 1)
    as per feedback.
  * drop one /* NOTE: review for potential ordering optimization */ since
    the note reference non-critical to perf control path.
  note:
  Remainder of the NOTE comments have been retained since there
  seems to be no consensus but stronger opinion/argument to keep
  expressed. while I generally agree that changes should not
  include ``TODO'' style comments I also agree that without these
  comments in your face people are very unlikely to feel compelled
  to make the review they are trying to solicit without them. if
  it is absolute that the series won't be merged with them then I
  will remove them, but please be explicit soon.
v3:
  * style, don't use c99 comments
v2:
  * comment code where optimizations may be possible now that memory
    order can be specified.
  * comment code where operations should potentially be atomic so that
    maintainers can review.
  * change a couple of variables labeled as counters to be unsigned.
Tyler Retzlaff (6):
  stack: replace rte atomics with GCC builtin atomics
  dma/idxd: replace rte atomics with GCC builtin atomics
  net/ice: replace rte atomics with GCC builtin atomics
  net/ixgbe: replace rte atomics with GCC builtin atomics
  net/null: replace rte atomics with GCC builtin atomics
  net/ring: replace rte atomics with GCC builtin atomics
 drivers/dma/idxd/idxd_internal.h |  3 +--
 drivers/dma/idxd/idxd_pci.c      | 11 ++++++-----
 drivers/net/ice/ice_dcf.c        |  1 -
 drivers/net/ice/ice_dcf_ethdev.c |  1 -
 drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
 drivers/net/ixgbe/ixgbe_bypass.c |  1 -
 drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
 drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
 drivers/net/ixgbe/ixgbe_flow.c   |  1 -
 drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
 drivers/net/null/rte_eth_null.c  | 28 ++++++++++++++++++----------
 drivers/net/ring/rte_eth_ring.c  | 26 ++++++++++++++++----------
 lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
 13 files changed, 72 insertions(+), 50 deletions(-)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v4 1/6] stack: replace rte atomics with GCC builtin atomics
  2023-06-02 19:45 ` [PATCH v4 0/6] " Tyler Retzlaff
@ 2023-06-02 19:45   ` Tyler Retzlaff
  2023-06-02 19:45   ` [PATCH v4 2/6] dma/idxd: " Tyler Retzlaff
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-02 19:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/lib/stack/rte_stack_lf_generic.h b/lib/stack/rte_stack_lf_generic.h
index 7fa29ce..aad3747 100644
--- a/lib/stack/rte_stack_lf_generic.h
+++ b/lib/stack/rte_stack_lf_generic.h
@@ -26,8 +26,8 @@
 	 * elements. If the mempool is near-empty to the point that this is a
 	 * concern, the user should consider increasing the mempool size.
 	 */
-	return (unsigned int)rte_atomic64_read((rte_atomic64_t *)
-			&s->stack_lf.used.len);
+	/* NOTE: review for potential ordering optimization */
+	return __atomic_load_n(&s->stack_lf.used.len, __ATOMIC_SEQ_CST);
 }
 
 static __rte_always_inline void
@@ -67,8 +67,8 @@
 				1, __ATOMIC_RELEASE,
 				__ATOMIC_RELAXED);
 	} while (success == 0);
-
-	rte_atomic64_add((rte_atomic64_t *)&list->len, num);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&list->len, num, __ATOMIC_SEQ_CST);
 }
 
 static __rte_always_inline struct rte_stack_lf_elem *
@@ -82,14 +82,16 @@
 
 	/* Reserve num elements, if available */
 	while (1) {
-		uint64_t len = rte_atomic64_read((rte_atomic64_t *)&list->len);
+		/* NOTE: review for potential ordering optimization */
+		uint64_t len = __atomic_load_n(&list->len, __ATOMIC_SEQ_CST);
 
 		/* Does the list contain enough elements? */
 		if (unlikely(len < num))
 			return NULL;
 
-		if (rte_atomic64_cmpset((volatile uint64_t *)&list->len,
-					len, len - num))
+		/* NOTE: review for potential ordering optimization */
+		if (__atomic_compare_exchange_n(&list->len, &len, len - num,
+			0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 			break;
 	}
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v4 2/6] dma/idxd: replace rte atomics with GCC builtin atomics
  2023-06-02 19:45 ` [PATCH v4 0/6] " Tyler Retzlaff
  2023-06-02 19:45   ` [PATCH v4 1/6] stack: " Tyler Retzlaff
@ 2023-06-02 19:45   ` Tyler Retzlaff
  2023-06-02 19:45   ` [PATCH v4 3/6] net/ice: " Tyler Retzlaff
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-02 19:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
Acked-by: Kevin Laatz <kevin.laatz@intel.com>
---
 drivers/dma/idxd/idxd_internal.h |  3 +--
 drivers/dma/idxd/idxd_pci.c      | 11 ++++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h
index 180a858..cd41777 100644
--- a/drivers/dma/idxd/idxd_internal.h
+++ b/drivers/dma/idxd/idxd_internal.h
@@ -7,7 +7,6 @@
 
 #include <rte_dmadev_pmd.h>
 #include <rte_spinlock.h>
-#include <rte_atomic.h>
 
 #include "idxd_hw_defs.h"
 
@@ -34,7 +33,7 @@ struct idxd_pci_common {
 	rte_spinlock_t lk;
 
 	uint8_t wq_cfg_sz;
-	rte_atomic16_t ref_count;
+	uint16_t ref_count;
 	volatile struct rte_idxd_bar0 *regs;
 	volatile uint32_t *wq_regs_base;
 	volatile struct rte_idxd_grpcfg *grp_regs;
diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c
index 5e56240..3696c7f 100644
--- a/drivers/dma/idxd/idxd_pci.c
+++ b/drivers/dma/idxd/idxd_pci.c
@@ -6,7 +6,6 @@
 #include <rte_devargs.h>
 #include <rte_dmadev_pmd.h>
 #include <rte_malloc.h>
-#include <rte_atomic.h>
 
 #include "idxd_internal.h"
 
@@ -136,7 +135,8 @@
 	/* if this is the last WQ on the device, disable the device and free
 	 * the PCI struct
 	 */
-	is_last_wq = rte_atomic16_dec_and_test(&idxd->u.pci->ref_count);
+	/* NOTE: review for potential ordering optimization */
+	is_last_wq = (__atomic_fetch_sub(&idxd->u.pci->ref_count, 1, __ATOMIC_SEQ_CST) == 1);
 	if (is_last_wq) {
 		/* disable the device */
 		err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
@@ -322,8 +322,9 @@
 			return ret;
 		}
 		qid = rte_dma_get_dev_id_by_name(qname);
-		max_qid = rte_atomic16_read(
-			&((struct idxd_dmadev *)rte_dma_fp_objs[qid].dev_private)->u.pci->ref_count);
+		max_qid = __atomic_load_n(
+			&((struct idxd_dmadev *)rte_dma_fp_objs[qid].dev_private)->u.pci->ref_count,
+			__ATOMIC_SEQ_CST);
 
 		/* we have queue 0 done, now configure the rest of the queues */
 		for (qid = 1; qid < max_qid; qid++) {
@@ -380,7 +381,7 @@
 				free(idxd.u.pci);
 			return ret;
 		}
-		rte_atomic16_inc(&idxd.u.pci->ref_count);
+		__atomic_fetch_add(&idxd.u.pci->ref_count, 1, __ATOMIC_SEQ_CST);
 	}
 
 	return 0;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v4 3/6] net/ice: replace rte atomics with GCC builtin atomics
  2023-06-02 19:45 ` [PATCH v4 0/6] " Tyler Retzlaff
  2023-06-02 19:45   ` [PATCH v4 1/6] stack: " Tyler Retzlaff
  2023-06-02 19:45   ` [PATCH v4 2/6] dma/idxd: " Tyler Retzlaff
@ 2023-06-02 19:45   ` Tyler Retzlaff
  2023-06-02 19:45   ` [PATCH v4 4/6] net/ixgbe: " Tyler Retzlaff
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-02 19:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 drivers/net/ice/ice_dcf.c        |  1 -
 drivers/net/ice/ice_dcf_ethdev.c |  1 -
 drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
 3 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index 1c3d22a..80d2cbd 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -14,7 +14,6 @@
 #include <rte_common.h>
 
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_eal.h>
 #include <rte_ether.h>
 #include <ethdev_driver.h>
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index dcbf2af..13ff245 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -11,7 +11,6 @@
 #include <rte_interrupts.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_eal.h>
 #include <rte_ether.h>
 #include <ethdev_pci.h>
diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 9a88cf9..a04fca8 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -3927,8 +3927,10 @@ static int ice_init_rss(struct ice_pf *pf)
 	struct rte_eth_link *dst = link;
 	struct rte_eth_link *src = &dev->data->dev_link;
 
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-				*(uint64_t *)src) == 0)
+	/* NOTE: review for potential ordering optimization */
+	if (!__atomic_compare_exchange_n((uint64_t *)dst,
+		(uint64_t *)dst, *(uint64_t *)src, 0,
+		__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 		return -1;
 
 	return 0;
@@ -3941,8 +3943,10 @@ static int ice_init_rss(struct ice_pf *pf)
 	struct rte_eth_link *dst = &dev->data->dev_link;
 	struct rte_eth_link *src = link;
 
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-				*(uint64_t *)src) == 0)
+	/* NOTE: review for potential ordering optimization */
+	if (!__atomic_compare_exchange_n((uint64_t *)dst,
+		(uint64_t *)dst, *(uint64_t *)src, 0,
+		__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 		return -1;
 
 	return 0;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v4 4/6] net/ixgbe: replace rte atomics with GCC builtin atomics
  2023-06-02 19:45 ` [PATCH v4 0/6] " Tyler Retzlaff
                     ` (2 preceding siblings ...)
  2023-06-02 19:45   ` [PATCH v4 3/6] net/ice: " Tyler Retzlaff
@ 2023-06-02 19:45   ` Tyler Retzlaff
  2023-06-02 19:45   ` [PATCH v4 5/6] net/null: " Tyler Retzlaff
  2023-06-02 19:45   ` [PATCH v4 6/6] net/ring: " Tyler Retzlaff
  5 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-02 19:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 drivers/net/ixgbe/ixgbe_bypass.c |  1 -
 drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
 drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
 drivers/net/ixgbe/ixgbe_flow.c   |  1 -
 drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
 5 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_bypass.c b/drivers/net/ixgbe/ixgbe_bypass.c
index 94f34a2..f615d18 100644
--- a/drivers/net/ixgbe/ixgbe_bypass.c
+++ b/drivers/net/ixgbe/ixgbe_bypass.c
@@ -3,7 +3,6 @@
  */
 
 #include <time.h>
-#include <rte_atomic.h>
 #include <ethdev_driver.h>
 #include "ixgbe_ethdev.h"
 #include "ixgbe_bypass_api.h"
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 88118bc..4bb85af 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1127,7 +1127,8 @@ struct rte_ixgbe_xstats_name_off {
 		return 0;
 	}
 
-	rte_atomic32_clear(&ad->link_thread_running);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	ixgbe_parse_devargs(eth_dev->data->dev_private,
 			    pci_dev->device.devargs);
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
@@ -1625,7 +1626,8 @@ static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev)
 		return 0;
 	}
 
-	rte_atomic32_clear(&ad->link_thread_running);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	ixgbevf_parse_devargs(eth_dev->data->dev_private,
 			      pci_dev->device.devargs);
 
@@ -4186,7 +4188,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	struct ixgbe_adapter *ad = dev->data->dev_private;
 	uint32_t timeout = timeout_ms ? timeout_ms : WARNING_TIMEOUT;
 
-	while (rte_atomic32_read(&ad->link_thread_running)) {
+	/* NOTE: review for potential ordering optimization */
+	while (__atomic_load_n(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
 		msec_delay(1);
 		timeout--;
 
@@ -4222,7 +4225,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	ixgbe_setup_link(hw, speed, true);
 
 	intr->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
-	rte_atomic32_clear(&ad->link_thread_running);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	return NULL;
 }
 
@@ -4317,7 +4321,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	if (link_up == 0) {
 		if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) {
 			ixgbe_dev_wait_setup_link_complete(dev, 0);
-			if (rte_atomic32_test_and_set(&ad->link_thread_running)) {
+			/* NOTE: review for potential ordering optimization */
+			if (__atomic_test_and_set(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
 				/* To avoid race condition between threads, set
 				 * the IXGBE_FLAG_NEED_LINK_CONFIG flag only
 				 * when there is no link thread running.
@@ -4330,7 +4335,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 					dev) < 0) {
 					PMD_DRV_LOG(ERR,
 						"Create link thread failed!");
-					rte_atomic32_clear(&ad->link_thread_running);
+					/* NOTE: review for potential ordering optimization */
+					__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 				}
 			} else {
 				PMD_DRV_LOG(ERR,
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 48290af..2ca6998 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -6,6 +6,7 @@
 #define _IXGBE_ETHDEV_H_
 
 #include <stdint.h>
+#include <stdbool.h>
 #include <sys/queue.h>
 
 #include "base/ixgbe_type.h"
@@ -510,7 +511,7 @@ struct ixgbe_adapter {
 	 */
 	uint8_t pflink_fullchk;
 	uint8_t mac_ctrl_frame_fwd;
-	rte_atomic32_t link_thread_running;
+	bool link_thread_running;
 	pthread_t link_thread_tid;
 };
 
diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c
index eac81ee..687341c 100644
--- a/drivers/net/ixgbe/ixgbe_flow.c
+++ b/drivers/net/ixgbe/ixgbe_flow.c
@@ -18,7 +18,6 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memory.h>
 #include <rte_eal.h>
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index c9d6ca9..8d7251d 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -27,7 +27,6 @@
 #include <rte_eal.h>
 #include <rte_per_lcore.h>
 #include <rte_lcore.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v4 5/6] net/null: replace rte atomics with GCC builtin atomics
  2023-06-02 19:45 ` [PATCH v4 0/6] " Tyler Retzlaff
                     ` (3 preceding siblings ...)
  2023-06-02 19:45   ` [PATCH v4 4/6] net/ixgbe: " Tyler Retzlaff
@ 2023-06-02 19:45   ` Tyler Retzlaff
  2023-06-02 19:45   ` [PATCH v4 6/6] net/ring: " Tyler Retzlaff
  5 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-02 19:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 drivers/net/null/rte_eth_null.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 47d9554..31081af 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -37,8 +37,8 @@ struct null_queue {
 	struct rte_mempool *mb_pool;
 	struct rte_mbuf *dummy_packet;
 
-	rte_atomic64_t rx_pkts;
-	rte_atomic64_t tx_pkts;
+	uint64_t rx_pkts;
+	uint64_t tx_pkts;
 };
 
 struct pmd_options {
@@ -101,7 +101,8 @@ struct pmd_internals {
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	rte_atomic64_add(&(h->rx_pkts), i);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -128,7 +129,8 @@ struct pmd_internals {
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	rte_atomic64_add(&(h->rx_pkts), i);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -152,7 +154,8 @@ struct pmd_internals {
 	for (i = 0; i < nb_bufs; i++)
 		rte_pktmbuf_free(bufs[i]);
 
-	rte_atomic64_add(&(h->tx_pkts), i);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -174,7 +177,8 @@ struct pmd_internals {
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	rte_atomic64_add(&(h->tx_pkts), i);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -316,8 +320,9 @@ struct pmd_internals {
 			RTE_MIN(dev->data->nb_rx_queues,
 				RTE_DIM(internal->rx_null_queues)));
 	for (i = 0; i < num_stats; i++) {
+		/* NOTE: review for atomic access */
 		igb_stats->q_ipackets[i] =
-			internal->rx_null_queues[i].rx_pkts.cnt;
+			internal->rx_null_queues[i].rx_pkts;
 		rx_total += igb_stats->q_ipackets[i];
 	}
 
@@ -325,8 +330,9 @@ struct pmd_internals {
 			RTE_MIN(dev->data->nb_tx_queues,
 				RTE_DIM(internal->tx_null_queues)));
 	for (i = 0; i < num_stats; i++) {
+		/* NOTE: review for atomic access */
 		igb_stats->q_opackets[i] =
-			internal->tx_null_queues[i].tx_pkts.cnt;
+			internal->tx_null_queues[i].tx_pkts;
 		tx_total += igb_stats->q_opackets[i];
 	}
 
@@ -347,9 +353,11 @@ struct pmd_internals {
 
 	internal = dev->data->dev_private;
 	for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
-		internal->rx_null_queues[i].rx_pkts.cnt = 0;
+		/* NOTE: review for atomic access */
+		internal->rx_null_queues[i].rx_pkts = 0;
 	for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
-		internal->tx_null_queues[i].tx_pkts.cnt = 0;
+		/* NOTE: review for atomic access */
+		internal->tx_null_queues[i].tx_pkts = 0;
 
 	return 0;
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v4 6/6] net/ring: replace rte atomics with GCC builtin atomics
  2023-06-02 19:45 ` [PATCH v4 0/6] " Tyler Retzlaff
                     ` (4 preceding siblings ...)
  2023-06-02 19:45   ` [PATCH v4 5/6] net/null: " Tyler Retzlaff
@ 2023-06-02 19:45   ` Tyler Retzlaff
  2023-06-05  8:27     ` Olivier Matz
  5 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-02 19:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
---
 drivers/net/ring/rte_eth_ring.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index e8bc9b6..43eb627 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -44,8 +44,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	rte_atomic64_t rx_pkts;
-	rte_atomic64_t tx_pkts;
+	uint64_t rx_pkts;
+	uint64_t tx_pkts;
 };
 
 struct pmd_internals {
@@ -80,9 +80,10 @@ struct pmd_internals {
 	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
 			ptrs, nb_bufs, NULL);
 	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts.cnt += nb_rx;
+		r->rx_pkts += nb_rx;
 	else
-		rte_atomic64_add(&(r->rx_pkts), nb_rx);
+		/* NOTE: review for potential ordering optimization */
+		__atomic_fetch_add(&r->rx_pkts, nb_rx, __ATOMIC_SEQ_CST);
 	return nb_rx;
 }
 
@@ -94,9 +95,10 @@ struct pmd_internals {
 	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
 			ptrs, nb_bufs, NULL);
 	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts.cnt += nb_tx;
+		r->tx_pkts += nb_tx;
 	else
-		rte_atomic64_add(&(r->tx_pkts), nb_tx);
+		/* NOTE: review for potential ordering optimization */
+		__atomic_fetch_add(&r->tx_pkts, nb_tx, __ATOMIC_SEQ_CST);
 	return nb_tx;
 }
 
@@ -184,13 +186,15 @@ struct pmd_internals {
 
 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts.cnt;
+		/* NOTE: review for atomic access */
+		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
 		rx_total += stats->q_ipackets[i];
 	}
 
 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts.cnt;
+		/* NOTE: review for atomic access */
+		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
 		tx_total += stats->q_opackets[i];
 	}
 
@@ -207,9 +211,11 @@ struct pmd_internals {
 	struct pmd_internals *internal = dev->data->dev_private;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts.cnt = 0;
+		/* NOTE: review for atomic access */
+		internal->rx_ring_queues[i].rx_pkts = 0;
 	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts.cnt = 0;
+		/* NOTE: review for atomic access */
+		internal->tx_ring_queues[i].tx_pkts = 0;
 
 	return 0;
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH v4 6/6] net/ring: replace rte atomics with GCC builtin atomics
  2023-06-02 19:45   ` [PATCH v4 6/6] net/ring: " Tyler Retzlaff
@ 2023-06-05  8:27     ` Olivier Matz
  0 siblings, 0 replies; 83+ messages in thread
From: Olivier Matz @ 2023-06-05  8:27 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: dev, david.marchand, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas
Hi Tyler,
Few comments below.
On Fri, Jun 02, 2023 at 12:45:07PM -0700, Tyler Retzlaff wrote:
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
>  drivers/net/ring/rte_eth_ring.c | 26 ++++++++++++++++----------
>  1 file changed, 16 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
> index e8bc9b6..43eb627 100644
> --- a/drivers/net/ring/rte_eth_ring.c
> +++ b/drivers/net/ring/rte_eth_ring.c
> @@ -44,8 +44,8 @@ enum dev_action {
>  
>  struct ring_queue {
>  	struct rte_ring *rng;
> -	rte_atomic64_t rx_pkts;
> -	rte_atomic64_t tx_pkts;
> +	uint64_t rx_pkts;
> +	uint64_t tx_pkts;
>  };
>  
>  struct pmd_internals {
> @@ -80,9 +80,10 @@ struct pmd_internals {
>  	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
>  			ptrs, nb_bufs, NULL);
>  	if (r->rng->flags & RING_F_SC_DEQ)
> -		r->rx_pkts.cnt += nb_rx;
> +		r->rx_pkts += nb_rx;
>  	else
> -		rte_atomic64_add(&(r->rx_pkts), nb_rx);
> +		/* NOTE: review for potential ordering optimization */
> +		__atomic_fetch_add(&r->rx_pkts, nb_rx, __ATOMIC_SEQ_CST);
We can use __ATOMIC_RELAXED here (and below too), since there is no ordering
constraint. We only want statistics to be correct.
You can remove the other NOTEs from the patch.
>  	return nb_rx;
>  }
>  
> @@ -94,9 +95,10 @@ struct pmd_internals {
>  	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
>  			ptrs, nb_bufs, NULL);
>  	if (r->rng->flags & RING_F_SP_ENQ)
> -		r->tx_pkts.cnt += nb_tx;
> +		r->tx_pkts += nb_tx;
>  	else
> -		rte_atomic64_add(&(r->tx_pkts), nb_tx);
> +		/* NOTE: review for potential ordering optimization */
> +		__atomic_fetch_add(&r->tx_pkts, nb_tx, __ATOMIC_SEQ_CST);
>  	return nb_tx;
>  }
>  
> @@ -184,13 +186,15 @@ struct pmd_internals {
>  
>  	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
>  			i < dev->data->nb_rx_queues; i++) {
> -		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts.cnt;
> +		/* NOTE: review for atomic access */
> +		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
>  		rx_total += stats->q_ipackets[i];
>  	}
>  
>  	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
>  			i < dev->data->nb_tx_queues; i++) {
> -		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts.cnt;
> +		/* NOTE: review for atomic access */
> +		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
>  		tx_total += stats->q_opackets[i];
>  	}
>  
> @@ -207,9 +211,11 @@ struct pmd_internals {
>  	struct pmd_internals *internal = dev->data->dev_private;
>  
>  	for (i = 0; i < dev->data->nb_rx_queues; i++)
> -		internal->rx_ring_queues[i].rx_pkts.cnt = 0;
> +		/* NOTE: review for atomic access */
> +		internal->rx_ring_queues[i].rx_pkts = 0;
>  	for (i = 0; i < dev->data->nb_tx_queues; i++)
> -		internal->tx_ring_queues[i].tx_pkts.cnt = 0;
> +		/* NOTE: review for atomic access */
> +		internal->tx_ring_queues[i].tx_pkts = 0;
>  
>  	return 0;
>  }
> -- 
> 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 83+ messages in thread
 
 
- * [PATCH v5 0/6] replace rte atomics with GCC builtin atomics
  2023-03-17 20:19 [PATCH 0/7] replace rte atomics with GCC builtin atomics Tyler Retzlaff
                   ` (10 preceding siblings ...)
  2023-06-02 19:45 ` [PATCH v4 0/6] " Tyler Retzlaff
@ 2023-06-06 21:45 ` Tyler Retzlaff
  2023-06-06 21:45   ` [PATCH v5 1/6] stack: " Tyler Retzlaff
                     ` (6 more replies)
  11 siblings, 7 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-06 21:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
This series covers the libraries and drivers that are built on Windows.
The code has be converted to use the __atomic builtins but there are
additional during conversion I notice that there may be some issues
that need to be addressed.
I'll comment in the patches where my concerns are so the maintainers
may comment.
v5:
  * use relaxed ordering for counter increments in net/ring patch
  * remove note comments from net/ring patch
v4:
  * drop patch for lib/ring it will be provided by ARM / Honnappa
  * rebase for changes in dma/idxd merge
  * adapt __atomic_fetch_sub(...) - 1 == 0 to be (__atomic_fetch_sub(...) == 1)
    as per feedback.
  * drop one /* NOTE: review for potential ordering optimization */ since
    the note reference non-critical to perf control path.
  note:
  Remainder of the NOTE comments have been retained since there
  seems to be no consensus but stronger opinion/argument to keep
  expressed. while I generally agree that changes should not
  include ``TODO'' style comments I also agree that without these
  comments in your face people are very unlikely to feel compelled
  to make the review they are trying to solicit without them. if
  it is absolute that the series won't be merged with them then I
  will remove them, but please be explicit soon.
v3:
  * style, don't use c99 comments
v2:
  * comment code where optimizations may be possible now that memory
    order can be specified.
  * comment code where operations should potentially be atomic so that
    maintainers can review.
  * change a couple of variables labeled as counters to be unsigned.
Tyler Retzlaff (6):
  stack: replace rte atomics with GCC builtin atomics
  dma/idxd: replace rte atomics with GCC builtin atomics
  net/ice: replace rte atomics with GCC builtin atomics
  net/ixgbe: replace rte atomics with GCC builtin atomics
  net/null: replace rte atomics with GCC builtin atomics
  net/ring: replace rte atomics with GCC builtin atomics
 drivers/dma/idxd/idxd_internal.h |  3 +--
 drivers/dma/idxd/idxd_pci.c      | 11 ++++++-----
 drivers/net/ice/ice_dcf.c        |  1 -
 drivers/net/ice/ice_dcf_ethdev.c |  1 -
 drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
 drivers/net/ixgbe/ixgbe_bypass.c |  1 -
 drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
 drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
 drivers/net/ixgbe/ixgbe_flow.c   |  1 -
 drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
 drivers/net/null/rte_eth_null.c  | 28 ++++++++++++++++++----------
 drivers/net/ring/rte_eth_ring.c  | 20 ++++++++++----------
 lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
 13 files changed, 66 insertions(+), 50 deletions(-)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v5 1/6] stack: replace rte atomics with GCC builtin atomics
  2023-06-06 21:45 ` [PATCH v5 0/6] " Tyler Retzlaff
@ 2023-06-06 21:45   ` Tyler Retzlaff
  2023-06-06 21:45   ` [PATCH v5 2/6] dma/idxd: " Tyler Retzlaff
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-06 21:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/lib/stack/rte_stack_lf_generic.h b/lib/stack/rte_stack_lf_generic.h
index 7fa29ce..aad3747 100644
--- a/lib/stack/rte_stack_lf_generic.h
+++ b/lib/stack/rte_stack_lf_generic.h
@@ -26,8 +26,8 @@
 	 * elements. If the mempool is near-empty to the point that this is a
 	 * concern, the user should consider increasing the mempool size.
 	 */
-	return (unsigned int)rte_atomic64_read((rte_atomic64_t *)
-			&s->stack_lf.used.len);
+	/* NOTE: review for potential ordering optimization */
+	return __atomic_load_n(&s->stack_lf.used.len, __ATOMIC_SEQ_CST);
 }
 
 static __rte_always_inline void
@@ -67,8 +67,8 @@
 				1, __ATOMIC_RELEASE,
 				__ATOMIC_RELAXED);
 	} while (success == 0);
-
-	rte_atomic64_add((rte_atomic64_t *)&list->len, num);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&list->len, num, __ATOMIC_SEQ_CST);
 }
 
 static __rte_always_inline struct rte_stack_lf_elem *
@@ -82,14 +82,16 @@
 
 	/* Reserve num elements, if available */
 	while (1) {
-		uint64_t len = rte_atomic64_read((rte_atomic64_t *)&list->len);
+		/* NOTE: review for potential ordering optimization */
+		uint64_t len = __atomic_load_n(&list->len, __ATOMIC_SEQ_CST);
 
 		/* Does the list contain enough elements? */
 		if (unlikely(len < num))
 			return NULL;
 
-		if (rte_atomic64_cmpset((volatile uint64_t *)&list->len,
-					len, len - num))
+		/* NOTE: review for potential ordering optimization */
+		if (__atomic_compare_exchange_n(&list->len, &len, len - num,
+			0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 			break;
 	}
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v5 2/6] dma/idxd: replace rte atomics with GCC builtin atomics
  2023-06-06 21:45 ` [PATCH v5 0/6] " Tyler Retzlaff
  2023-06-06 21:45   ` [PATCH v5 1/6] stack: " Tyler Retzlaff
@ 2023-06-06 21:45   ` Tyler Retzlaff
  2023-06-06 21:45   ` [PATCH v5 3/6] net/ice: " Tyler Retzlaff
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-06 21:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
Acked-by: Kevin Laatz <kevin.laatz@intel.com>
---
 drivers/dma/idxd/idxd_internal.h |  3 +--
 drivers/dma/idxd/idxd_pci.c      | 11 ++++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h
index 180a858..cd41777 100644
--- a/drivers/dma/idxd/idxd_internal.h
+++ b/drivers/dma/idxd/idxd_internal.h
@@ -7,7 +7,6 @@
 
 #include <rte_dmadev_pmd.h>
 #include <rte_spinlock.h>
-#include <rte_atomic.h>
 
 #include "idxd_hw_defs.h"
 
@@ -34,7 +33,7 @@ struct idxd_pci_common {
 	rte_spinlock_t lk;
 
 	uint8_t wq_cfg_sz;
-	rte_atomic16_t ref_count;
+	uint16_t ref_count;
 	volatile struct rte_idxd_bar0 *regs;
 	volatile uint32_t *wq_regs_base;
 	volatile struct rte_idxd_grpcfg *grp_regs;
diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c
index 5e56240..3696c7f 100644
--- a/drivers/dma/idxd/idxd_pci.c
+++ b/drivers/dma/idxd/idxd_pci.c
@@ -6,7 +6,6 @@
 #include <rte_devargs.h>
 #include <rte_dmadev_pmd.h>
 #include <rte_malloc.h>
-#include <rte_atomic.h>
 
 #include "idxd_internal.h"
 
@@ -136,7 +135,8 @@
 	/* if this is the last WQ on the device, disable the device and free
 	 * the PCI struct
 	 */
-	is_last_wq = rte_atomic16_dec_and_test(&idxd->u.pci->ref_count);
+	/* NOTE: review for potential ordering optimization */
+	is_last_wq = (__atomic_fetch_sub(&idxd->u.pci->ref_count, 1, __ATOMIC_SEQ_CST) == 1);
 	if (is_last_wq) {
 		/* disable the device */
 		err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
@@ -322,8 +322,9 @@
 			return ret;
 		}
 		qid = rte_dma_get_dev_id_by_name(qname);
-		max_qid = rte_atomic16_read(
-			&((struct idxd_dmadev *)rte_dma_fp_objs[qid].dev_private)->u.pci->ref_count);
+		max_qid = __atomic_load_n(
+			&((struct idxd_dmadev *)rte_dma_fp_objs[qid].dev_private)->u.pci->ref_count,
+			__ATOMIC_SEQ_CST);
 
 		/* we have queue 0 done, now configure the rest of the queues */
 		for (qid = 1; qid < max_qid; qid++) {
@@ -380,7 +381,7 @@
 				free(idxd.u.pci);
 			return ret;
 		}
-		rte_atomic16_inc(&idxd.u.pci->ref_count);
+		__atomic_fetch_add(&idxd.u.pci->ref_count, 1, __ATOMIC_SEQ_CST);
 	}
 
 	return 0;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v5 3/6] net/ice: replace rte atomics with GCC builtin atomics
  2023-06-06 21:45 ` [PATCH v5 0/6] " Tyler Retzlaff
  2023-06-06 21:45   ` [PATCH v5 1/6] stack: " Tyler Retzlaff
  2023-06-06 21:45   ` [PATCH v5 2/6] dma/idxd: " Tyler Retzlaff
@ 2023-06-06 21:45   ` Tyler Retzlaff
  2023-06-06 21:45   ` [PATCH v5 4/6] net/ixgbe: " Tyler Retzlaff
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-06 21:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 drivers/net/ice/ice_dcf.c        |  1 -
 drivers/net/ice/ice_dcf_ethdev.c |  1 -
 drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
 3 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index 1c3d22a..80d2cbd 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -14,7 +14,6 @@
 #include <rte_common.h>
 
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_eal.h>
 #include <rte_ether.h>
 #include <ethdev_driver.h>
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index dcbf2af..13ff245 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -11,7 +11,6 @@
 #include <rte_interrupts.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_eal.h>
 #include <rte_ether.h>
 #include <ethdev_pci.h>
diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 9a88cf9..a04fca8 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -3927,8 +3927,10 @@ static int ice_init_rss(struct ice_pf *pf)
 	struct rte_eth_link *dst = link;
 	struct rte_eth_link *src = &dev->data->dev_link;
 
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-				*(uint64_t *)src) == 0)
+	/* NOTE: review for potential ordering optimization */
+	if (!__atomic_compare_exchange_n((uint64_t *)dst,
+		(uint64_t *)dst, *(uint64_t *)src, 0,
+		__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 		return -1;
 
 	return 0;
@@ -3941,8 +3943,10 @@ static int ice_init_rss(struct ice_pf *pf)
 	struct rte_eth_link *dst = &dev->data->dev_link;
 	struct rte_eth_link *src = link;
 
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-				*(uint64_t *)src) == 0)
+	/* NOTE: review for potential ordering optimization */
+	if (!__atomic_compare_exchange_n((uint64_t *)dst,
+		(uint64_t *)dst, *(uint64_t *)src, 0,
+		__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
 		return -1;
 
 	return 0;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v5 4/6] net/ixgbe: replace rte atomics with GCC builtin atomics
  2023-06-06 21:45 ` [PATCH v5 0/6] " Tyler Retzlaff
                     ` (2 preceding siblings ...)
  2023-06-06 21:45   ` [PATCH v5 3/6] net/ice: " Tyler Retzlaff
@ 2023-06-06 21:45   ` Tyler Retzlaff
  2023-06-06 21:45   ` [PATCH v5 5/6] net/null: " Tyler Retzlaff
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-06 21:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 drivers/net/ixgbe/ixgbe_bypass.c |  1 -
 drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
 drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
 drivers/net/ixgbe/ixgbe_flow.c   |  1 -
 drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
 5 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_bypass.c b/drivers/net/ixgbe/ixgbe_bypass.c
index 94f34a2..f615d18 100644
--- a/drivers/net/ixgbe/ixgbe_bypass.c
+++ b/drivers/net/ixgbe/ixgbe_bypass.c
@@ -3,7 +3,6 @@
  */
 
 #include <time.h>
-#include <rte_atomic.h>
 #include <ethdev_driver.h>
 #include "ixgbe_ethdev.h"
 #include "ixgbe_bypass_api.h"
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 88118bc..4bb85af 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1127,7 +1127,8 @@ struct rte_ixgbe_xstats_name_off {
 		return 0;
 	}
 
-	rte_atomic32_clear(&ad->link_thread_running);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	ixgbe_parse_devargs(eth_dev->data->dev_private,
 			    pci_dev->device.devargs);
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
@@ -1625,7 +1626,8 @@ static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev)
 		return 0;
 	}
 
-	rte_atomic32_clear(&ad->link_thread_running);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	ixgbevf_parse_devargs(eth_dev->data->dev_private,
 			      pci_dev->device.devargs);
 
@@ -4186,7 +4188,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	struct ixgbe_adapter *ad = dev->data->dev_private;
 	uint32_t timeout = timeout_ms ? timeout_ms : WARNING_TIMEOUT;
 
-	while (rte_atomic32_read(&ad->link_thread_running)) {
+	/* NOTE: review for potential ordering optimization */
+	while (__atomic_load_n(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
 		msec_delay(1);
 		timeout--;
 
@@ -4222,7 +4225,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	ixgbe_setup_link(hw, speed, true);
 
 	intr->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
-	rte_atomic32_clear(&ad->link_thread_running);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 	return NULL;
 }
 
@@ -4317,7 +4321,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	if (link_up == 0) {
 		if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) {
 			ixgbe_dev_wait_setup_link_complete(dev, 0);
-			if (rte_atomic32_test_and_set(&ad->link_thread_running)) {
+			/* NOTE: review for potential ordering optimization */
+			if (__atomic_test_and_set(&ad->link_thread_running, __ATOMIC_SEQ_CST)) {
 				/* To avoid race condition between threads, set
 				 * the IXGBE_FLAG_NEED_LINK_CONFIG flag only
 				 * when there is no link thread running.
@@ -4330,7 +4335,8 @@ static int ixgbevf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 					dev) < 0) {
 					PMD_DRV_LOG(ERR,
 						"Create link thread failed!");
-					rte_atomic32_clear(&ad->link_thread_running);
+					/* NOTE: review for potential ordering optimization */
+					__atomic_clear(&ad->link_thread_running, __ATOMIC_SEQ_CST);
 				}
 			} else {
 				PMD_DRV_LOG(ERR,
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 48290af..2ca6998 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -6,6 +6,7 @@
 #define _IXGBE_ETHDEV_H_
 
 #include <stdint.h>
+#include <stdbool.h>
 #include <sys/queue.h>
 
 #include "base/ixgbe_type.h"
@@ -510,7 +511,7 @@ struct ixgbe_adapter {
 	 */
 	uint8_t pflink_fullchk;
 	uint8_t mac_ctrl_frame_fwd;
-	rte_atomic32_t link_thread_running;
+	bool link_thread_running;
 	pthread_t link_thread_tid;
 };
 
diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c
index eac81ee..687341c 100644
--- a/drivers/net/ixgbe/ixgbe_flow.c
+++ b/drivers/net/ixgbe/ixgbe_flow.c
@@ -18,7 +18,6 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memory.h>
 #include <rte_eal.h>
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index c9d6ca9..8d7251d 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -27,7 +27,6 @@
 #include <rte_eal.h>
 #include <rte_per_lcore.h>
 #include <rte_lcore.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v5 5/6] net/null: replace rte atomics with GCC builtin atomics
  2023-06-06 21:45 ` [PATCH v5 0/6] " Tyler Retzlaff
                     ` (3 preceding siblings ...)
  2023-06-06 21:45   ` [PATCH v5 4/6] net/ixgbe: " Tyler Retzlaff
@ 2023-06-06 21:45   ` Tyler Retzlaff
  2023-06-06 21:45   ` [PATCH v5 6/6] net/ring: " Tyler Retzlaff
  2023-06-09 15:01   ` [PATCH v5 0/6] " David Marchand
  6 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-06 21:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 drivers/net/null/rte_eth_null.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 47d9554..31081af 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -37,8 +37,8 @@ struct null_queue {
 	struct rte_mempool *mb_pool;
 	struct rte_mbuf *dummy_packet;
 
-	rte_atomic64_t rx_pkts;
-	rte_atomic64_t tx_pkts;
+	uint64_t rx_pkts;
+	uint64_t tx_pkts;
 };
 
 struct pmd_options {
@@ -101,7 +101,8 @@ struct pmd_internals {
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	rte_atomic64_add(&(h->rx_pkts), i);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -128,7 +129,8 @@ struct pmd_internals {
 		bufs[i]->port = h->internals->port_id;
 	}
 
-	rte_atomic64_add(&(h->rx_pkts), i);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&h->rx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -152,7 +154,8 @@ struct pmd_internals {
 	for (i = 0; i < nb_bufs; i++)
 		rte_pktmbuf_free(bufs[i]);
 
-	rte_atomic64_add(&(h->tx_pkts), i);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -174,7 +177,8 @@ struct pmd_internals {
 		rte_pktmbuf_free(bufs[i]);
 	}
 
-	rte_atomic64_add(&(h->tx_pkts), i);
+	/* NOTE: review for potential ordering optimization */
+	__atomic_fetch_add(&h->tx_pkts, i, __ATOMIC_SEQ_CST);
 
 	return i;
 }
@@ -316,8 +320,9 @@ struct pmd_internals {
 			RTE_MIN(dev->data->nb_rx_queues,
 				RTE_DIM(internal->rx_null_queues)));
 	for (i = 0; i < num_stats; i++) {
+		/* NOTE: review for atomic access */
 		igb_stats->q_ipackets[i] =
-			internal->rx_null_queues[i].rx_pkts.cnt;
+			internal->rx_null_queues[i].rx_pkts;
 		rx_total += igb_stats->q_ipackets[i];
 	}
 
@@ -325,8 +330,9 @@ struct pmd_internals {
 			RTE_MIN(dev->data->nb_tx_queues,
 				RTE_DIM(internal->tx_null_queues)));
 	for (i = 0; i < num_stats; i++) {
+		/* NOTE: review for atomic access */
 		igb_stats->q_opackets[i] =
-			internal->tx_null_queues[i].tx_pkts.cnt;
+			internal->tx_null_queues[i].tx_pkts;
 		tx_total += igb_stats->q_opackets[i];
 	}
 
@@ -347,9 +353,11 @@ struct pmd_internals {
 
 	internal = dev->data->dev_private;
 	for (i = 0; i < RTE_DIM(internal->rx_null_queues); i++)
-		internal->rx_null_queues[i].rx_pkts.cnt = 0;
+		/* NOTE: review for atomic access */
+		internal->rx_null_queues[i].rx_pkts = 0;
 	for (i = 0; i < RTE_DIM(internal->tx_null_queues); i++)
-		internal->tx_null_queues[i].tx_pkts.cnt = 0;
+		/* NOTE: review for atomic access */
+		internal->tx_null_queues[i].tx_pkts = 0;
 
 	return 0;
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * [PATCH v5 6/6] net/ring: replace rte atomics with GCC builtin atomics
  2023-06-06 21:45 ` [PATCH v5 0/6] " Tyler Retzlaff
                     ` (4 preceding siblings ...)
  2023-06-06 21:45   ` [PATCH v5 5/6] net/null: " Tyler Retzlaff
@ 2023-06-06 21:45   ` Tyler Retzlaff
  2023-06-09 15:01   ` [PATCH v5 0/6] " David Marchand
  6 siblings, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-06 21:45 UTC (permalink / raw)
  To: dev, david.marchand
  Cc: Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas, Tyler Retzlaff
Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
---
 drivers/net/ring/rte_eth_ring.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index e8bc9b6..c43dcce 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -44,8 +44,8 @@ enum dev_action {
 
 struct ring_queue {
 	struct rte_ring *rng;
-	rte_atomic64_t rx_pkts;
-	rte_atomic64_t tx_pkts;
+	uint64_t rx_pkts;
+	uint64_t tx_pkts;
 };
 
 struct pmd_internals {
@@ -80,9 +80,9 @@ struct pmd_internals {
 	const uint16_t nb_rx = (uint16_t)rte_ring_dequeue_burst(r->rng,
 			ptrs, nb_bufs, NULL);
 	if (r->rng->flags & RING_F_SC_DEQ)
-		r->rx_pkts.cnt += nb_rx;
+		r->rx_pkts += nb_rx;
 	else
-		rte_atomic64_add(&(r->rx_pkts), nb_rx);
+		__atomic_fetch_add(&r->rx_pkts, nb_rx, __ATOMIC_RELAXED);
 	return nb_rx;
 }
 
@@ -94,9 +94,9 @@ struct pmd_internals {
 	const uint16_t nb_tx = (uint16_t)rte_ring_enqueue_burst(r->rng,
 			ptrs, nb_bufs, NULL);
 	if (r->rng->flags & RING_F_SP_ENQ)
-		r->tx_pkts.cnt += nb_tx;
+		r->tx_pkts += nb_tx;
 	else
-		rte_atomic64_add(&(r->tx_pkts), nb_tx);
+		__atomic_fetch_add(&r->tx_pkts, nb_tx, __ATOMIC_RELAXED);
 	return nb_tx;
 }
 
@@ -184,13 +184,13 @@ struct pmd_internals {
 
 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 			i < dev->data->nb_rx_queues; i++) {
-		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts.cnt;
+		stats->q_ipackets[i] = internal->rx_ring_queues[i].rx_pkts;
 		rx_total += stats->q_ipackets[i];
 	}
 
 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 			i < dev->data->nb_tx_queues; i++) {
-		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts.cnt;
+		stats->q_opackets[i] = internal->tx_ring_queues[i].tx_pkts;
 		tx_total += stats->q_opackets[i];
 	}
 
@@ -207,9 +207,9 @@ struct pmd_internals {
 	struct pmd_internals *internal = dev->data->dev_private;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		internal->rx_ring_queues[i].rx_pkts.cnt = 0;
+		internal->rx_ring_queues[i].rx_pkts = 0;
 	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		internal->tx_ring_queues[i].tx_pkts.cnt = 0;
+		internal->tx_ring_queues[i].tx_pkts = 0;
 
 	return 0;
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH v5 0/6] replace rte atomics with GCC builtin atomics
  2023-06-06 21:45 ` [PATCH v5 0/6] " Tyler Retzlaff
                     ` (5 preceding siblings ...)
  2023-06-06 21:45   ` [PATCH v5 6/6] net/ring: " Tyler Retzlaff
@ 2023-06-09 15:01   ` David Marchand
  2023-06-09 15:13     ` Tyler Retzlaff
  6 siblings, 1 reply; 83+ messages in thread
From: David Marchand @ 2023-06-09 15:01 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: dev, Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas
On Tue, Jun 6, 2023 at 11:45 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
>
> This series covers the libraries and drivers that are built on Windows.
>
> The code has be converted to use the __atomic builtins but there are
> additional during conversion I notice that there may be some issues
> that need to be addressed.
>
> I'll comment in the patches where my concerns are so the maintainers
> may comment.
>
> v5:
>   * use relaxed ordering for counter increments in net/ring patch
>   * remove note comments from net/ring patch
>
> v4:
>
>   * drop patch for lib/ring it will be provided by ARM / Honnappa
>   * rebase for changes in dma/idxd merge
>   * adapt __atomic_fetch_sub(...) - 1 == 0 to be (__atomic_fetch_sub(...) == 1)
>     as per feedback.
>   * drop one /* NOTE: review for potential ordering optimization */ since
>     the note reference non-critical to perf control path.
>
>   note:
>
>   Remainder of the NOTE comments have been retained since there
>   seems to be no consensus but stronger opinion/argument to keep
>   expressed. while I generally agree that changes should not
>   include ``TODO'' style comments I also agree that without these
>   comments in your face people are very unlikely to feel compelled
>   to make the review they are trying to solicit without them. if
>   it is absolute that the series won't be merged with them then I
>   will remove them, but please be explicit soon.
>
> v3:
>   * style, don't use c99 comments
>
> v2:
>   * comment code where optimizations may be possible now that memory
>     order can be specified.
>   * comment code where operations should potentially be atomic so that
>     maintainers can review.
>   * change a couple of variables labeled as counters to be unsigned.
>
> Tyler Retzlaff (6):
>   stack: replace rte atomics with GCC builtin atomics
>   dma/idxd: replace rte atomics with GCC builtin atomics
>   net/ice: replace rte atomics with GCC builtin atomics
>   net/ixgbe: replace rte atomics with GCC builtin atomics
>   net/null: replace rte atomics with GCC builtin atomics
>   net/ring: replace rte atomics with GCC builtin atomics
>
>  drivers/dma/idxd/idxd_internal.h |  3 +--
>  drivers/dma/idxd/idxd_pci.c      | 11 ++++++-----
>  drivers/net/ice/ice_dcf.c        |  1 -
>  drivers/net/ice/ice_dcf_ethdev.c |  1 -
>  drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
>  drivers/net/ixgbe/ixgbe_bypass.c |  1 -
>  drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
>  drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
>  drivers/net/ixgbe/ixgbe_flow.c   |  1 -
>  drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
>  drivers/net/null/rte_eth_null.c  | 28 ++++++++++++++++++----------
>  drivers/net/ring/rte_eth_ring.c  | 20 ++++++++++----------
>  lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
>  13 files changed, 66 insertions(+), 50 deletions(-)
I am not really enthousiastic about those NOTE:.
I would prefer we get an explicit go/nogo from each maintainers, but
this did not happen.
I think that this indicates that those NOTE: will rot in the code now.
Thomas proposed to track those NOTE: in the release announce mail and
that we ping maintainers regularly.
Let's see how it goes.
I am merging this series so we can progress on the $SUBJECT.
Series applied, thanks.
Tyler, about the patch on the ring library that was dropped by got no
viable alternative, I'll wait for a decision from ARM and you.
-- 
David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread
- * Re: [PATCH v5 0/6] replace rte atomics with GCC builtin atomics
  2023-06-09 15:01   ` [PATCH v5 0/6] " David Marchand
@ 2023-06-09 15:13     ` Tyler Retzlaff
  2023-06-22 19:59       ` Patrick Robb
  0 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-09 15:13 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, Olivier Matz, Bruce Richardson, Kevin Laatz, Qiming Yang,
	Qi Zhang, Wenjun Wu, Tetsuya Mukawa, Honnappa.Nagarahalli,
	thomas
On Fri, Jun 09, 2023 at 05:01:53PM +0200, David Marchand wrote:
> On Tue, Jun 6, 2023 at 11:45 PM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> >
> > Replace the use of rte_atomic.h types and functions, instead use GCC
> > supplied C++11 memory model builtins.
> >
> > This series covers the libraries and drivers that are built on Windows.
> >
> > The code has be converted to use the __atomic builtins but there are
> > additional during conversion I notice that there may be some issues
> > that need to be addressed.
> >
> > I'll comment in the patches where my concerns are so the maintainers
> > may comment.
> >
> > v5:
> >   * use relaxed ordering for counter increments in net/ring patch
> >   * remove note comments from net/ring patch
> >
> > v4:
> >
> >   * drop patch for lib/ring it will be provided by ARM / Honnappa
> >   * rebase for changes in dma/idxd merge
> >   * adapt __atomic_fetch_sub(...) - 1 == 0 to be (__atomic_fetch_sub(...) == 1)
> >     as per feedback.
> >   * drop one /* NOTE: review for potential ordering optimization */ since
> >     the note reference non-critical to perf control path.
> >
> >   note:
> >
> >   Remainder of the NOTE comments have been retained since there
> >   seems to be no consensus but stronger opinion/argument to keep
> >   expressed. while I generally agree that changes should not
> >   include ``TODO'' style comments I also agree that without these
> >   comments in your face people are very unlikely to feel compelled
> >   to make the review they are trying to solicit without them. if
> >   it is absolute that the series won't be merged with them then I
> >   will remove them, but please be explicit soon.
> >
> > v3:
> >   * style, don't use c99 comments
> >
> > v2:
> >   * comment code where optimizations may be possible now that memory
> >     order can be specified.
> >   * comment code where operations should potentially be atomic so that
> >     maintainers can review.
> >   * change a couple of variables labeled as counters to be unsigned.
> >
> > Tyler Retzlaff (6):
> >   stack: replace rte atomics with GCC builtin atomics
> >   dma/idxd: replace rte atomics with GCC builtin atomics
> >   net/ice: replace rte atomics with GCC builtin atomics
> >   net/ixgbe: replace rte atomics with GCC builtin atomics
> >   net/null: replace rte atomics with GCC builtin atomics
> >   net/ring: replace rte atomics with GCC builtin atomics
> >
> >  drivers/dma/idxd/idxd_internal.h |  3 +--
> >  drivers/dma/idxd/idxd_pci.c      | 11 ++++++-----
> >  drivers/net/ice/ice_dcf.c        |  1 -
> >  drivers/net/ice/ice_dcf_ethdev.c |  1 -
> >  drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
> >  drivers/net/ixgbe/ixgbe_bypass.c |  1 -
> >  drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
> >  drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
> >  drivers/net/ixgbe/ixgbe_flow.c   |  1 -
> >  drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
> >  drivers/net/null/rte_eth_null.c  | 28 ++++++++++++++++++----------
> >  drivers/net/ring/rte_eth_ring.c  | 20 ++++++++++----------
> >  lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
> >  13 files changed, 66 insertions(+), 50 deletions(-)
> 
> I am not really enthousiastic about those NOTE:.
> I would prefer we get an explicit go/nogo from each maintainers, but
> this did not happen.
> I think that this indicates that those NOTE: will rot in the code now.
> 
> Thomas proposed to track those NOTE: in the release announce mail and
> that we ping maintainers regularly.
> Let's see how it goes.
Let's leave it for one release cycle, if with the the announce mail
maintainers take no action within that time I'll commit to going
through and cleaning them out before 23.11 rc1.
> 
> I am merging this series so we can progress on the $SUBJECT.
> Series applied, thanks.
Thanks David, this will allow forward progress.
> 
> 
> Tyler, about the patch on the ring library that was dropped by got no
> viable alternative, I'll wait for a decision from ARM and you.
I'll wait for Honnappa to follow up and we'll decide what to do when he
does.
> 
> -- 
> David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * Re: [PATCH v5 0/6] replace rte atomics with GCC builtin atomics
  2023-06-09 15:13     ` Tyler Retzlaff
@ 2023-06-22 19:59       ` Patrick Robb
  2023-06-23  8:53         ` David Marchand
  2023-06-23 21:35         ` Tyler Retzlaff
  0 siblings, 2 replies; 83+ messages in thread
From: Patrick Robb @ 2023-06-22 19:59 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: David Marchand, dev, Olivier Matz, Bruce Richardson, Kevin Laatz,
	Qiming Yang, Qi Zhang, Wenjun Wu, Tetsuya Mukawa,
	Honnappa.Nagarahalli, thomas
[-- Attachment #1: Type: text/plain, Size: 6622 bytes --]
I want to report a possible regression from this patch series seen from CI
testing on our Intel 82599ES 10G NIC, which we failed to report to
patchwork when this initially went under CI due to a bug in our Jenkins
reporting scripts. Use of the ixgbe driver appears to be affected. Tyler I
apologize for the issues seen with reporting. We've made some temporary
changes to avoid this happening again, and are currently reworking our
reporting process entirely to provide greater reliability.
Here is a DTS snippet showing the issue, and the full log for the
failing virtio_smoke test can be downloaded here:
https://dpdkdashboard.iol.unh.edu/results/dashboard/patchsets/26560/
06/06/2023 18:22:58                TestVirtioSmoke: Start send packets and
verify
06/06/2023 18:22:58                         tester: ifconfig enp134s0f0 mtu
9000
06/06/2023 18:22:58                         tester:
06/06/2023 18:42:59                TestVirtioSmoke: Test Case
test_virtio_pvp Result FAILED: TIMEOUT on port start 0
06/06/2023 18:42:59                TestVirtioSmoke: port start 0
ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete too
long time!
ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete too
long time!
ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete too
long time!
We initially took this Intel10G testing offline to investigate as we
thought it was a lab infra failure. Obviously that wasn't the case, so
ideally we will bring this back online when appropriate. But, I don't want
to do so right now and start failing everyone's patchseries which are
obviously unrelated to this. Comments on this are welcome, otherwise of
course I will just return this test coverage to our CI when the state of
the git tree allows for it.
Apologies for the missing report and the timeline on this. We are taking
action to deliver results more reliably going forward.
On Fri, Jun 9, 2023 at 11:13 AM Tyler Retzlaff <roretzla@linux.microsoft.com>
wrote:
> On Fri, Jun 09, 2023 at 05:01:53PM +0200, David Marchand wrote:
> > On Tue, Jun 6, 2023 at 11:45 PM Tyler Retzlaff
> > <roretzla@linux.microsoft.com> wrote:
> > >
> > > Replace the use of rte_atomic.h types and functions, instead use GCC
> > > supplied C++11 memory model builtins.
> > >
> > > This series covers the libraries and drivers that are built on Windows.
> > >
> > > The code has be converted to use the __atomic builtins but there are
> > > additional during conversion I notice that there may be some issues
> > > that need to be addressed.
> > >
> > > I'll comment in the patches where my concerns are so the maintainers
> > > may comment.
> > >
> > > v5:
> > >   * use relaxed ordering for counter increments in net/ring patch
> > >   * remove note comments from net/ring patch
> > >
> > > v4:
> > >
> > >   * drop patch for lib/ring it will be provided by ARM / Honnappa
> > >   * rebase for changes in dma/idxd merge
> > >   * adapt __atomic_fetch_sub(...) - 1 == 0 to be
> (__atomic_fetch_sub(...) == 1)
> > >     as per feedback.
> > >   * drop one /* NOTE: review for potential ordering optimization */
> since
> > >     the note reference non-critical to perf control path.
> > >
> > >   note:
> > >
> > >   Remainder of the NOTE comments have been retained since there
> > >   seems to be no consensus but stronger opinion/argument to keep
> > >   expressed. while I generally agree that changes should not
> > >   include ``TODO'' style comments I also agree that without these
> > >   comments in your face people are very unlikely to feel compelled
> > >   to make the review they are trying to solicit without them. if
> > >   it is absolute that the series won't be merged with them then I
> > >   will remove them, but please be explicit soon.
> > >
> > > v3:
> > >   * style, don't use c99 comments
> > >
> > > v2:
> > >   * comment code where optimizations may be possible now that memory
> > >     order can be specified.
> > >   * comment code where operations should potentially be atomic so that
> > >     maintainers can review.
> > >   * change a couple of variables labeled as counters to be unsigned.
> > >
> > > Tyler Retzlaff (6):
> > >   stack: replace rte atomics with GCC builtin atomics
> > >   dma/idxd: replace rte atomics with GCC builtin atomics
> > >   net/ice: replace rte atomics with GCC builtin atomics
> > >   net/ixgbe: replace rte atomics with GCC builtin atomics
> > >   net/null: replace rte atomics with GCC builtin atomics
> > >   net/ring: replace rte atomics with GCC builtin atomics
> > >
> > >  drivers/dma/idxd/idxd_internal.h |  3 +--
> > >  drivers/dma/idxd/idxd_pci.c      | 11 ++++++-----
> > >  drivers/net/ice/ice_dcf.c        |  1 -
> > >  drivers/net/ice/ice_dcf_ethdev.c |  1 -
> > >  drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
> > >  drivers/net/ixgbe/ixgbe_bypass.c |  1 -
> > >  drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
> > >  drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
> > >  drivers/net/ixgbe/ixgbe_flow.c   |  1 -
> > >  drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
> > >  drivers/net/null/rte_eth_null.c  | 28 ++++++++++++++++++----------
> > >  drivers/net/ring/rte_eth_ring.c  | 20 ++++++++++----------
> > >  lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
> > >  13 files changed, 66 insertions(+), 50 deletions(-)
> >
> > I am not really enthousiastic about those NOTE:.
> > I would prefer we get an explicit go/nogo from each maintainers, but
> > this did not happen.
> > I think that this indicates that those NOTE: will rot in the code now.
> >
> > Thomas proposed to track those NOTE: in the release announce mail and
> > that we ping maintainers regularly.
> > Let's see how it goes.
>
> Let's leave it for one release cycle, if with the the announce mail
> maintainers take no action within that time I'll commit to going
> through and cleaning them out before 23.11 rc1.
>
> >
> > I am merging this series so we can progress on the $SUBJECT.
> > Series applied, thanks.
>
> Thanks David, this will allow forward progress.
>
> >
> >
> > Tyler, about the patch on the ring library that was dropped by got no
> > viable alternative, I'll wait for a decision from ARM and you.
>
> I'll wait for Honnappa to follow up and we'll decide what to do when he
> does.
>
> >
> > --
> > David Marchand
>
-- 
Patrick Robb
Technical Service Manager
UNH InterOperability Laboratory
21 Madbury Rd, Suite 100, Durham, NH 03824
www.iol.unh.edu
[-- Attachment #2: Type: text/html, Size: 9910 bytes --]
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * Re: [PATCH v5 0/6] replace rte atomics with GCC builtin atomics
  2023-06-22 19:59       ` Patrick Robb
@ 2023-06-23  8:53         ` David Marchand
  2023-06-23 21:37           ` Tyler Retzlaff
  2023-06-23 21:35         ` Tyler Retzlaff
  1 sibling, 1 reply; 83+ messages in thread
From: David Marchand @ 2023-06-23  8:53 UTC (permalink / raw)
  To: Patrick Robb
  Cc: Tyler Retzlaff, dev, Qiming Yang, Qi Zhang, Wenjun Wu, thomas
Hello Patrick,
On Thu, Jun 22, 2023 at 10:00 PM Patrick Robb <probb@iol.unh.edu> wrote:
>
> I want to report a possible regression from this patch series seen from CI testing on our Intel 82599ES 10G NIC, which we failed to report to patchwork when this initially went under CI due to a bug in our Jenkins reporting scripts. Use of the ixgbe driver appears to be affected. Tyler I apologize for the issues seen with reporting. We've made some temporary changes to avoid this happening again, and are currently reworking our reporting process entirely to provide greater reliability.
>
> Here is a DTS snippet showing the issue, and the full log for the failing virtio_smoke test can be downloaded here: https://dpdkdashboard.iol.unh.edu/results/dashboard/patchsets/26560/
>
> 06/06/2023 18:22:58                TestVirtioSmoke: Start send packets and verify
> 06/06/2023 18:22:58                         tester: ifconfig enp134s0f0 mtu 9000
> 06/06/2023 18:22:58                         tester:
> 06/06/2023 18:42:59                TestVirtioSmoke: Test Case test_virtio_pvp Result FAILED: TIMEOUT on port start 0
> 06/06/2023 18:42:59                TestVirtioSmoke: port start 0
>
> ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete too long time!
> ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete too long time!
> ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete too long time!
>
> We initially took this Intel10G testing offline to investigate as we thought it was a lab infra failure. Obviously that wasn't the case, so ideally we will bring this back online when appropriate. But, I don't want to do so right now and start failing everyone's patchseries which are obviously unrelated to this. Comments on this are welcome, otherwise of course I will just return this test coverage to our CI when the state of the git tree allows for it.
>
> Apologies for the missing report and the timeline on this. We are taking action to deliver results more reliably going forward.
(reduced the cc list a bit)
This is probably the same issue than what was reported by Intel
validation: https://bugs.dpdk.org/show_bug.cgi?id=1249
A fix has been merged in next-net-intel, it will reach the main repo soon.
https://git.dpdk.org/next/dpdk-next-net-intel/commit/?id=fe4ce0aee766969a0e27fe28ced8ee7c761a2c4e
-- 
David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * Re: [PATCH v5 0/6] replace rte atomics with GCC builtin atomics
  2023-06-23  8:53         ` David Marchand
@ 2023-06-23 21:37           ` Tyler Retzlaff
  2023-06-28 14:01             ` Patrick Robb
  0 siblings, 1 reply; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-23 21:37 UTC (permalink / raw)
  To: David Marchand
  Cc: Patrick Robb, dev, Qiming Yang, Qi Zhang, Wenjun Wu, thomas
On Fri, Jun 23, 2023 at 10:53:22AM +0200, David Marchand wrote:
> Hello Patrick,
> 
> On Thu, Jun 22, 2023 at 10:00 PM Patrick Robb <probb@iol.unh.edu> wrote:
> >
> > I want to report a possible regression from this patch series seen from CI testing on our Intel 82599ES 10G NIC, which we failed to report to patchwork when this initially went under CI due to a bug in our Jenkins reporting scripts. Use of the ixgbe driver appears to be affected. Tyler I apologize for the issues seen with reporting. We've made some temporary changes to avoid this happening again, and are currently reworking our reporting process entirely to provide greater reliability.
> >
> > Here is a DTS snippet showing the issue, and the full log for the failing virtio_smoke test can be downloaded here: https://dpdkdashboard.iol.unh.edu/results/dashboard/patchsets/26560/
> >
> > 06/06/2023 18:22:58                TestVirtioSmoke: Start send packets and verify
> > 06/06/2023 18:22:58                         tester: ifconfig enp134s0f0 mtu 9000
> > 06/06/2023 18:22:58                         tester:
> > 06/06/2023 18:42:59                TestVirtioSmoke: Test Case test_virtio_pvp Result FAILED: TIMEOUT on port start 0
> > 06/06/2023 18:42:59                TestVirtioSmoke: port start 0
> >
> > ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete too long time!
> > ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete too long time!
> > ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete too long time!
> >
> > We initially took this Intel10G testing offline to investigate as we thought it was a lab infra failure. Obviously that wasn't the case, so ideally we will bring this back online when appropriate. But, I don't want to do so right now and start failing everyone's patchseries which are obviously unrelated to this. Comments on this are welcome, otherwise of course I will just return this test coverage to our CI when the state of the git tree allows for it.
> >
> > Apologies for the missing report and the timeline on this. We are taking action to deliver results more reliably going forward.
> 
> (reduced the cc list a bit)
> 
> This is probably the same issue than what was reported by Intel
> validation: https://bugs.dpdk.org/show_bug.cgi?id=1249
> 
Thanks David
I should have read the next thread in the mail chain before replying.
> A fix has been merged in next-net-intel, it will reach the main repo soon.
> https://git.dpdk.org/next/dpdk-next-net-intel/commit/?id=fe4ce0aee766969a0e27fe28ced8ee7c761a2c4e
Patrick please let me know if after this integration I still need to
investigate further.
Thanks
> 
> 
> -- 
> David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * Re: [PATCH v5 0/6] replace rte atomics with GCC builtin atomics
  2023-06-23 21:37           ` Tyler Retzlaff
@ 2023-06-28 14:01             ` Patrick Robb
  2023-06-28 14:49               ` David Marchand
  0 siblings, 1 reply; 83+ messages in thread
From: Patrick Robb @ 2023-06-28 14:01 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: David Marchand, dev, Qiming Yang, Qi Zhang, Wenjun Wu, thomas
[-- Attachment #1: Type: text/plain, Size: 3385 bytes --]
Thanks David, Tyler,
I ran the next-net-intel branch through DTS with the nic utilizing the
ixgbe driver, and everything is passing now. When this reaches the main
repo I will return the nic in question to UNH CI testing.
Best,
Patrick
On Fri, Jun 23, 2023 at 5:37 PM Tyler Retzlaff <roretzla@linux.microsoft.com>
wrote:
> On Fri, Jun 23, 2023 at 10:53:22AM +0200, David Marchand wrote:
> > Hello Patrick,
> >
> > On Thu, Jun 22, 2023 at 10:00 PM Patrick Robb <probb@iol.unh.edu> wrote:
> > >
> > > I want to report a possible regression from this patch series seen
> from CI testing on our Intel 82599ES 10G NIC, which we failed to report to
> patchwork when this initially went under CI due to a bug in our Jenkins
> reporting scripts. Use of the ixgbe driver appears to be affected. Tyler I
> apologize for the issues seen with reporting. We've made some temporary
> changes to avoid this happening again, and are currently reworking our
> reporting process entirely to provide greater reliability.
> > >
> > > Here is a DTS snippet showing the issue, and the full log for the
> failing virtio_smoke test can be downloaded here:
> https://dpdkdashboard.iol.unh.edu/results/dashboard/patchsets/26560/
> > >
> > > 06/06/2023 18:22:58                TestVirtioSmoke: Start send packets
> and verify
> > > 06/06/2023 18:22:58                         tester: ifconfig
> enp134s0f0 mtu 9000
> > > 06/06/2023 18:22:58                         tester:
> > > 06/06/2023 18:42:59                TestVirtioSmoke: Test Case
> test_virtio_pvp Result FAILED: TIMEOUT on port start 0
> > > 06/06/2023 18:42:59                TestVirtioSmoke: port start 0
> > >
> > > ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete
> too long time!
> > > ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete
> too long time!
> > > ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete
> too long time!
> > >
> > > We initially took this Intel10G testing offline to investigate as we
> thought it was a lab infra failure. Obviously that wasn't the case, so
> ideally we will bring this back online when appropriate. But, I don't want
> to do so right now and start failing everyone's patchseries which are
> obviously unrelated to this. Comments on this are welcome, otherwise of
> course I will just return this test coverage to our CI when the state of
> the git tree allows for it.
> > >
> > > Apologies for the missing report and the timeline on this. We are
> taking action to deliver results more reliably going forward.
> >
> > (reduced the cc list a bit)
> >
> > This is probably the same issue than what was reported by Intel
> > validation: https://bugs.dpdk.org/show_bug.cgi?id=1249
> >
>
> Thanks David
>
> I should have read the next thread in the mail chain before replying.
>
> > A fix has been merged in next-net-intel, it will reach the main repo
> soon.
> >
> https://git.dpdk.org/next/dpdk-next-net-intel/commit/?id=fe4ce0aee766969a0e27fe28ced8ee7c761a2c4e
>
> Patrick please let me know if after this integration I still need to
> investigate further.
>
> Thanks
>
> >
> >
> > --
> > David Marchand
>
-- 
Patrick Robb
Technical Service Manager
UNH InterOperability Laboratory
21 Madbury Rd, Suite 100, Durham, NH 03824
www.iol.unh.edu
[-- Attachment #2: Type: text/html, Size: 6281 bytes --]
^ permalink raw reply	[flat|nested] 83+ messages in thread 
- * Re: [PATCH v5 0/6] replace rte atomics with GCC builtin atomics
  2023-06-28 14:01             ` Patrick Robb
@ 2023-06-28 14:49               ` David Marchand
  0 siblings, 0 replies; 83+ messages in thread
From: David Marchand @ 2023-06-28 14:49 UTC (permalink / raw)
  To: Patrick Robb, Ajit Khaparde, Jerin Jacob Kollanukkaran,
	Raslan Darawsheh, Akhil Goyal
  Cc: Tyler Retzlaff, dev, Qiming Yang, Qi Zhang, Wenjun Wu, thomas
On Wed, Jun 28, 2023 at 4:01 PM Patrick Robb <probb@iol.unh.edu> wrote:
>
> Thanks David, Tyler,
>
> I ran the next-net-intel branch through DTS with the nic utilizing the ixgbe driver, and everything is passing now. When this reaches the main repo I will return the nic in question to UNH CI testing.
Thomas pulled the fix in main and tagged v23.07-rc2.
At the time I write this mail, main, next-baseband, next-net,
next-net-intel and next-virtio branches are fine.
Other subtree repositories will get updated soon.
Ping Ajit, Jerin, Raslan, Akhil for their respective subtrees.
Thanks.
-- 
David Marchand
^ permalink raw reply	[flat|nested] 83+ messages in thread 
 
 
 
- * Re: [PATCH v5 0/6] replace rte atomics with GCC builtin atomics
  2023-06-22 19:59       ` Patrick Robb
  2023-06-23  8:53         ` David Marchand
@ 2023-06-23 21:35         ` Tyler Retzlaff
  1 sibling, 0 replies; 83+ messages in thread
From: Tyler Retzlaff @ 2023-06-23 21:35 UTC (permalink / raw)
  To: Patrick Robb
  Cc: David Marchand, dev, Olivier Matz, Bruce Richardson, Kevin Laatz,
	Qiming Yang, Qi Zhang, Wenjun Wu, Tetsuya Mukawa,
	Honnappa.Nagarahalli, thomas
Hi Patrick,
I will take a look at this as a priority asap.
Thanks for bringing it to my attention.
On Thu, Jun 22, 2023 at 03:59:42PM -0400, Patrick Robb wrote:
> I want to report a possible regression from this patch series seen from CI
> testing on our Intel 82599ES 10G NIC, which we failed to report to
> patchwork when this initially went under CI due to a bug in our Jenkins
> reporting scripts. Use of the ixgbe driver appears to be affected. Tyler I
> apologize for the issues seen with reporting. We've made some temporary
> changes to avoid this happening again, and are currently reworking our
> reporting process entirely to provide greater reliability.
> 
> Here is a DTS snippet showing the issue, and the full log for the
> failing virtio_smoke test can be downloaded here:
> https://dpdkdashboard.iol.unh.edu/results/dashboard/patchsets/26560/
> 
> 06/06/2023 18:22:58                TestVirtioSmoke: Start send packets and
> verify
> 06/06/2023 18:22:58                         tester: ifconfig enp134s0f0 mtu
> 9000
> 06/06/2023 18:22:58                         tester:
> 06/06/2023 18:42:59                TestVirtioSmoke: Test Case
> test_virtio_pvp Result FAILED: TIMEOUT on port start 0
> 06/06/2023 18:42:59                TestVirtioSmoke: port start 0
> 
> ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete too
> long time!
> ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete too
> long time!
> ixgbe_dev_wait_setup_link_complete(): IXGBE link thread not complete too
> long time!
> 
> We initially took this Intel10G testing offline to investigate as we
> thought it was a lab infra failure. Obviously that wasn't the case, so
> ideally we will bring this back online when appropriate. But, I don't want
> to do so right now and start failing everyone's patchseries which are
> obviously unrelated to this. Comments on this are welcome, otherwise of
> course I will just return this test coverage to our CI when the state of
> the git tree allows for it.
> 
> Apologies for the missing report and the timeline on this. We are taking
> action to deliver results more reliably going forward.
> 
> 
> On Fri, Jun 9, 2023 at 11:13 AM Tyler Retzlaff <roretzla@linux.microsoft.com>
> wrote:
> 
> > On Fri, Jun 09, 2023 at 05:01:53PM +0200, David Marchand wrote:
> > > On Tue, Jun 6, 2023 at 11:45 PM Tyler Retzlaff
> > > <roretzla@linux.microsoft.com> wrote:
> > > >
> > > > Replace the use of rte_atomic.h types and functions, instead use GCC
> > > > supplied C++11 memory model builtins.
> > > >
> > > > This series covers the libraries and drivers that are built on Windows.
> > > >
> > > > The code has be converted to use the __atomic builtins but there are
> > > > additional during conversion I notice that there may be some issues
> > > > that need to be addressed.
> > > >
> > > > I'll comment in the patches where my concerns are so the maintainers
> > > > may comment.
> > > >
> > > > v5:
> > > >   * use relaxed ordering for counter increments in net/ring patch
> > > >   * remove note comments from net/ring patch
> > > >
> > > > v4:
> > > >
> > > >   * drop patch for lib/ring it will be provided by ARM / Honnappa
> > > >   * rebase for changes in dma/idxd merge
> > > >   * adapt __atomic_fetch_sub(...) - 1 == 0 to be
> > (__atomic_fetch_sub(...) == 1)
> > > >     as per feedback.
> > > >   * drop one /* NOTE: review for potential ordering optimization */
> > since
> > > >     the note reference non-critical to perf control path.
> > > >
> > > >   note:
> > > >
> > > >   Remainder of the NOTE comments have been retained since there
> > > >   seems to be no consensus but stronger opinion/argument to keep
> > > >   expressed. while I generally agree that changes should not
> > > >   include ``TODO'' style comments I also agree that without these
> > > >   comments in your face people are very unlikely to feel compelled
> > > >   to make the review they are trying to solicit without them. if
> > > >   it is absolute that the series won't be merged with them then I
> > > >   will remove them, but please be explicit soon.
> > > >
> > > > v3:
> > > >   * style, don't use c99 comments
> > > >
> > > > v2:
> > > >   * comment code where optimizations may be possible now that memory
> > > >     order can be specified.
> > > >   * comment code where operations should potentially be atomic so that
> > > >     maintainers can review.
> > > >   * change a couple of variables labeled as counters to be unsigned.
> > > >
> > > > Tyler Retzlaff (6):
> > > >   stack: replace rte atomics with GCC builtin atomics
> > > >   dma/idxd: replace rte atomics with GCC builtin atomics
> > > >   net/ice: replace rte atomics with GCC builtin atomics
> > > >   net/ixgbe: replace rte atomics with GCC builtin atomics
> > > >   net/null: replace rte atomics with GCC builtin atomics
> > > >   net/ring: replace rte atomics with GCC builtin atomics
> > > >
> > > >  drivers/dma/idxd/idxd_internal.h |  3 +--
> > > >  drivers/dma/idxd/idxd_pci.c      | 11 ++++++-----
> > > >  drivers/net/ice/ice_dcf.c        |  1 -
> > > >  drivers/net/ice/ice_dcf_ethdev.c |  1 -
> > > >  drivers/net/ice/ice_ethdev.c     | 12 ++++++++----
> > > >  drivers/net/ixgbe/ixgbe_bypass.c |  1 -
> > > >  drivers/net/ixgbe/ixgbe_ethdev.c | 18 ++++++++++++------
> > > >  drivers/net/ixgbe/ixgbe_ethdev.h |  3 ++-
> > > >  drivers/net/ixgbe/ixgbe_flow.c   |  1 -
> > > >  drivers/net/ixgbe/ixgbe_rxtx.c   |  1 -
> > > >  drivers/net/null/rte_eth_null.c  | 28 ++++++++++++++++++----------
> > > >  drivers/net/ring/rte_eth_ring.c  | 20 ++++++++++----------
> > > >  lib/stack/rte_stack_lf_generic.h | 16 +++++++++-------
> > > >  13 files changed, 66 insertions(+), 50 deletions(-)
> > >
> > > I am not really enthousiastic about those NOTE:.
> > > I would prefer we get an explicit go/nogo from each maintainers, but
> > > this did not happen.
> > > I think that this indicates that those NOTE: will rot in the code now.
> > >
> > > Thomas proposed to track those NOTE: in the release announce mail and
> > > that we ping maintainers regularly.
> > > Let's see how it goes.
> >
> > Let's leave it for one release cycle, if with the the announce mail
> > maintainers take no action within that time I'll commit to going
> > through and cleaning them out before 23.11 rc1.
> >
> > >
> > > I am merging this series so we can progress on the $SUBJECT.
> > > Series applied, thanks.
> >
> > Thanks David, this will allow forward progress.
> >
> > >
> > >
> > > Tyler, about the patch on the ring library that was dropped by got no
> > > viable alternative, I'll wait for a decision from ARM and you.
> >
> > I'll wait for Honnappa to follow up and we'll decide what to do when he
> > does.
> >
> > >
> > > --
> > > David Marchand
> >
> 
> 
> -- 
> 
> Patrick Robb
> 
> Technical Service Manager
> 
> UNH InterOperability Laboratory
> 
> 21 Madbury Rd, Suite 100, Durham, NH 03824
> 
> www.iol.unh.edu
^ permalink raw reply	[flat|nested] 83+ messages in thread