DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme
@ 2021-09-02  5:32 Feifei Wang
  2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 1/5] eal: " Feifei Wang
                   ` (12 more replies)
  0 siblings, 13 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-02  5:32 UTC (permalink / raw)
  Cc: dev, nd, Feifei Wang

Add new API for wait_until scheme, and apply this new API into lib to
replace rte_pause.

Feifei Wang (5):
  eal: add new API for wait until scheme
  eal: use wait until scheme for read pflock
  eal: use wait until scheme for mcslock
  lib/bpf: use wait until scheme for Rx/Tx iteration
  lib/distributor: use wait until scheme

 lib/bpf/bpf_pkt.c                        |  11 +-
 lib/distributor/rte_distributor_single.c |  10 +-
 lib/eal/arm/include/rte_pause_64.h       | 271 ++++++++++++++++----
 lib/eal/include/generic/rte_mcslock.h    |   9 +-
 lib/eal/include/generic/rte_pause.h      | 309 +++++++++++++++++++++++
 lib/eal/include/generic/rte_pflock.h     |   5 +-
 6 files changed, 543 insertions(+), 72 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v1 1/5] eal: add new API for wait until scheme
  2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
@ 2021-09-02  5:32 ` Feifei Wang
  2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 2/5] eal: use wait until scheme for read pflock Feifei Wang
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-02  5:32 UTC (permalink / raw)
  To: Ruifeng Wang; +Cc: dev, nd, Feifei Wang

For 'wait until' scheme, add new APIs for more cases:
1. add wait_until_unequal API
2. add wait_until_part_equal API

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/arm/include/rte_pause_64.h  | 271 +++++++++++++++++++-----
 lib/eal/include/generic/rte_pause.h | 309 ++++++++++++++++++++++++++++
 2 files changed, 526 insertions(+), 54 deletions(-)

diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index e87d10b8cc..19716276fc 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -31,20 +31,12 @@ static inline void rte_pause(void)
 /* Put processor into low power WFE(Wait For Event) state. */
 #define __WFE() { asm volatile("wfe" : : : "memory"); }
 
-static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
-{
-	uint16_t value;
-
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 16-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
+/*
+ * Atomic exclusive load from addr, it returns the 16-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
 #define __LOAD_EXC_16(src, dst, memorder) {               \
 	if (memorder == __ATOMIC_RELAXED) {               \
 		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
@@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			: "memory");                      \
 	} }
 
+/*
+ * Atomic exclusive load from addr, it returns the 32-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_32(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+/*
+ * Atomic exclusive load from addr, it returns the 64-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_64(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+		int memorder)
+{
+	uint16_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
 	__LOAD_EXC_16(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			__LOAD_EXC_16(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_16
 }
 
 static __rte_always_inline void
@@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 32-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_32(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_32(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 			__LOAD_EXC_32(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_32
 }
 
 static __rte_always_inline void
@@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 64-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_64(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_64(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -143,6 +141,171 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 		} while (value != expected);
 	}
 }
+
+static __rte_always_inline void
+rte_wait_until_part_equal_16(volatile uint16_t *addr, uint16_t mask,
+		uint16_t expected, int memorder)
+{
+	uint16_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	__LOAD_EXC_16(addr, value, memorder)
+	if ((value & mask) != expected) {
+		__SEVL()
+		do {
+			__WFE()
+			__LOAD_EXC_16(addr, value, memorder)
+		} while ((value & mask) != expected);
+	}
+}
+
+static __rte_always_inline void
+rte_wait_until_part_equal_32(volatile uint32_t *addr, uint32_t mask,
+		uint32_t expected, int memorder)
+{
+	uint32_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	__LOAD_EXC_32(addr, value, memorder)
+	if ((value & mask) != expected) {
+		__SEVL()
+		do {
+			__WFE()
+			__LOAD_EXC_32(addr, value, memorder)
+		} while ((value & mask) != expected);
+	}
+}
+
+static __rte_always_inline void
+rte_wait_until_part_equal_64(volatile uint64_t *addr, uint64_t mask,
+		uint64_t expected, int memorder)
+{
+	uint64_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	__LOAD_EXC_64(addr, value, memorder)
+	if ((value & mask) != expected) {
+		__SEVL()
+		do {
+			__WFE()
+			__LOAD_EXC_64(addr, value, memorder)
+		} while ((value & mask) != expected);
+	}
+}
+
+static __rte_always_inline void
+rte_wait_until_unequal_16(volatile uint16_t *addr, uint16_t original,
+		int memorder)
+{
+	uint16_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	__LOAD_EXC_16(addr, value, memorder)
+	if (value == original) {
+		__SEVL()
+		do {
+			__WFE()
+			__LOAD_EXC_16(addr, value, memorder)
+		} while (value == original);
+	}
+}
+
+static __rte_always_inline void
+rte_wait_until_unequal_32(volatile uint32_t *addr, uint32_t original,
+		int memorder)
+{
+	uint32_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	__LOAD_EXC_32(addr, value, memorder)
+	if (value == original) {
+		__SEVL()
+		do {
+			__WFE()
+			__LOAD_EXC_32(addr, value, memorder)
+		} while (value == original);
+	}
+}
+
+static __rte_always_inline void
+rte_wait_until_unequal_64(volatile uint64_t *addr, uint64_t original,
+		int memorder)
+{
+	uint64_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	__LOAD_EXC_64(addr, value, memorder)
+	if (value == original) {
+		__SEVL()
+		do {
+			__WFE()
+			__LOAD_EXC_64(addr, value, memorder)
+		} while (value == original);
+	}
+}
+
+static __rte_always_inline void
+rte_wait_until_part_unequal_16(volatile uint16_t *addr, uint16_t mask,
+		uint16_t original, int memorder)
+{
+	uint16_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	__LOAD_EXC_16(addr, value, memorder)
+	if ((value & mask) == original) {
+		__SEVL()
+		do {
+			__WFE()
+			__LOAD_EXC_16(addr, value, memorder)
+		} while ((value & mask) == original);
+	}
+}
+
+static __rte_always_inline void
+rte_wait_until_part_unequal_32(volatile uint32_t *addr, uint32_t mask,
+		uint32_t original, int memorder)
+{
+	uint32_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	__LOAD_EXC_32(addr, value, memorder)
+	if ((value & mask) == original) {
+		__SEVL()
+		do {
+			__WFE()
+			__LOAD_EXC_32(addr, value, memorder)
+		} while ((value & mask) == original);
+	}
+}
+
+static __rte_always_inline void
+rte_wait_until_part_unequal_64(volatile uint64_t *addr, uint64_t mask,
+		uint64_t original, int memorder)
+{
+	uint64_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	__LOAD_EXC_64(addr, value, memorder)
+	if ((value & mask) == original) {
+		__SEVL()
+		do {
+			__WFE()
+			__LOAD_EXC_64(addr, value, memorder)
+		} while ((value & mask) == original);
+	}
+}
+
+#undef __LOAD_EXC_16
+#undef __LOAD_EXC_32
 #undef __LOAD_EXC_64
 
 #undef __SEVL
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index 668ee4a184..943a886f01 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -81,6 +81,222 @@ static __rte_always_inline void
 rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 		int memorder);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Wait for part bits of *addr to be equal with a 16-bit expected value, with
+ * a relaxed memory ordering model meaning the loads around this API can be
+ * reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ * value mask of a specific location
+ * @param expected
+ *  A 16-bit expected value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_wait_until_part_equal_16(volatile uint16_t *addr, uint16_t mask,
+		uint16_t expected, int memorder);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Wait for part bits of *addr to be equal with a 32-bit expected value, with
+ * a relaxed memory ordering model meaning the loads around this API can be
+ * reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ * value mask of a specific location
+ * @param expected
+ *  A 32-bit expected value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_wait_until_part_equal_32(volatile uint32_t *addr, uint32_t mask,
+		uint32_t expected, int memorder);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Wait for part bits of *addr to be equal with a 64-bit expected value, with
+ * a relaxed memory ordering model meaning the loads around this API can be
+ * reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ * value mask of a specific location
+ * @param expected
+ *  A 64-bit expected value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_wait_until_part_equal_64(volatile uint64_t *addr, uint64_t mask,
+		uint64_t expected, int memorder);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Wait for *addr to be unequal with a 16-bit original value, with a relaxed
+ * memory ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param original
+ *  A 16-bit original value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_wait_until_unequal_16(volatile uint16_t *addr, uint16_t original,
+		int memorder);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Wait for *addr to be unequal with a 32-bit original value, with a relaxed
+ * memory ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param original
+ *  A 32-bit original value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_wait_until_unequal_32(volatile uint32_t *addr, uint32_t original,
+		int memorder);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Wait for *addr to be unequal with a 64-bit original value, with a relaxed
+ * memory ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param original
+ *  A 64-bit original value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_wait_until_unequal_64(volatile uint64_t *addr, uint64_t original,
+		int memorder);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Wait for part bits of *addr to be unequal with a 16-bit original value, with
+ * a relaxed memory ordering model meaning the loads around this API can be
+ * reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ * value mask of a specific location
+ * @param original
+ *  A 16-bit original value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_wait_until_part_unequal_16(volatile uint16_t *addr, uint16_t mask,
+		uint16_t original, int memorder);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Wait for part bits of *addr to be unequal with a 32-bit original value, with
+ * a relaxed memory ordering model meaning the loads around this API can be
+ * reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ * value mask of a specific location
+ * @param original
+ *  A 32-bit original value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_wait_until_part_unequal_32(volatile uint32_t *addr, uint32_t mask,
+		uint32_t original, int memorder);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Wait for part bits of *addr to be unequal with a 64-bit original value, with
+ * a relaxed memory ordering model meaning the loads around this API can be
+ * reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ * value mask of a specific location
+ * @param original
+ *  A 64-bit original value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_wait_until_part_unequal_64(volatile uint64_t *addr, uint64_t mask,
+		uint64_t original, int memorder);
+
 #ifndef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
 static __rte_always_inline void
 rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
@@ -111,6 +327,99 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 	while (__atomic_load_n(addr, memorder) != expected)
 		rte_pause();
 }
+
+static __rte_always_inline void
+rte_wait_until_part_equal_16(volatile uint16_t *addr, uint16_t mask,
+		uint16_t expected, int memorder)
+{
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	while ((__atomic_load_n(addr, memorder) & mask) != expected)
+		rte_pause();
+
+}
+
+static __rte_always_inline void
+rte_wait_until_part_equal_32(volatile uint32_t *addr, uint32_t mask,
+		uint32_t expected, int memorder)
+{
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	while ((__atomic_load_n(addr, memorder) & mask) != expected)
+		rte_pause();
+
+}
+
+static __rte_always_inline void
+rte_wait_until_part_equal_64(volatile uint64_t *addr, uint64_t mask,
+		uint64_t expected, int memorder)
+{
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	while ((__atomic_load_n(addr, memorder) & mask) != expected)
+		rte_pause();
+
+}
+
+static __rte_always_inline void
+rte_wait_until_unequal_16(volatile uint16_t *addr, uint16_t original,
+		int memorder)
+{
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	while (__atomic_load_n(addr, memorder) == original)
+		rte_pause();
+}
+
+static __rte_always_inline void
+rte_wait_until_unequal_32(volatile uint32_t *addr, uint32_t original,
+		int memorder)
+{
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	while (__atomic_load_n(addr, memorder) == original)
+		rte_pause();
+}
+
+static __rte_always_inline void
+rte_wait_until_unequal_64(volatile uint64_t *addr, uint64_t original,
+		int memorder)
+{
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	while (__atomic_load_n(addr, memorder) == original)
+		rte_pause();
+}
+
+static __rte_always_inline void
+rte_wait_until_part_unequal_16(volatile uint16_t *addr, uint16_t mask,
+		uint16_t original, int memorder)
+{
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	while ((__atomic_load_n(addr, memorder) & mask) == original)
+		rte_pause();
+}
+
+static __rte_always_inline void
+rte_wait_until_part_unequal_32(volatile uint32_t *addr, uint32_t mask,
+		uint32_t original, int memorder)
+{
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	while ((__atomic_load_n(addr, memorder) & mask) == original)
+		rte_pause();
+}
+
+static __rte_always_inline void
+rte_wait_until_part_unequal_64(volatile uint64_t *addr, uint64_t mask,
+		uint64_t original, int memorder)
+{
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	while ((__atomic_load_n(addr, memorder) & mask) == original)
+		rte_pause();
+}
 #endif
 
 #endif /* _RTE_PAUSE_H_ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v1 2/5] eal: use wait until scheme for read pflock
  2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
  2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 1/5] eal: " Feifei Wang
@ 2021-09-02  5:32 ` Feifei Wang
  2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 3/5] eal: use wait until scheme for mcslock Feifei Wang
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-02  5:32 UTC (permalink / raw)
  Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for read pflock update, use wait_until_part_equal
for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_pflock.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/eal/include/generic/rte_pflock.h b/lib/eal/include/generic/rte_pflock.h
index e57c179ef2..5298dec7b8 100644
--- a/lib/eal/include/generic/rte_pflock.h
+++ b/lib/eal/include/generic/rte_pflock.h
@@ -121,9 +121,8 @@ rte_pflock_read_lock(rte_pflock_t *pf)
 		return;
 
 	/* Wait for current write phase to complete. */
-	while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE)
-		& RTE_PFLOCK_WBITS) == w)
-		rte_pause();
+	rte_wait_until_part_unequal_16(&pf->rd.in,
+			RTE_PFLOCK_WBITS, w, __ATOMIC_ACQUIRE);
 }
 
 /**
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v1 3/5] eal: use wait until scheme for mcslock
  2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
  2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 1/5] eal: " Feifei Wang
  2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 2/5] eal: use wait until scheme for read pflock Feifei Wang
@ 2021-09-02  5:32 ` Feifei Wang
  2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 4/5] lib/bpf: use wait until scheme for Rx/Tx iteration Feifei Wang
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-02  5:32 UTC (permalink / raw)
  To: Honnappa Nagarahalli; +Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for mcslock to be updated, use wait_until_unequal
for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_mcslock.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/lib/eal/include/generic/rte_mcslock.h b/lib/eal/include/generic/rte_mcslock.h
index 9f323bd2a2..dabad0d4e0 100644
--- a/lib/eal/include/generic/rte_mcslock.h
+++ b/lib/eal/include/generic/rte_mcslock.h
@@ -117,8 +117,13 @@ rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
 		/* More nodes added to the queue by other CPUs.
 		 * Wait until the next pointer is set.
 		 */
-		while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)
-			rte_pause();
+#ifdef RTE_ARCH_32
+		rte_wait_until_unequal_32((volatile uint32_t *)&me->next,
+				0, __ATOMIC_RELAXED);
+#else
+		rte_wait_until_unequal_64((volatile uint64_t *)&me->next,
+				0, __ATOMIC_RELAXED);
+#endif
 	}
 
 	/* Pass lock to next waiter. */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v1 4/5] lib/bpf: use wait until scheme for Rx/Tx iteration
  2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
                   ` (2 preceding siblings ...)
  2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 3/5] eal: use wait until scheme for mcslock Feifei Wang
@ 2021-09-02  5:32 ` Feifei Wang
  2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 5/5] lib/distributor: use wait until scheme Feifei Wang
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-02  5:32 UTC (permalink / raw)
  To: Konstantin Ananyev, Ferruh Yigit
  Cc: dev, nd, Feifei Wang, stable, Ruifeng Wang

First, fix the bug that keyword const of func arg should be after "*".
This is because const before "*" means the value of "cbi" should not be
changed. But we should monitor that cbi->use changed and then we can
jump out of loop.

Second, instead of polling for cbi->use to be updated, use
wait_until_unequal api.

Fixes: a93ff62a8938 ("bpf: introduce basic Rx/Tx filters")
Cc: konstantin.ananyev@intel.com
Cc: stable@dpdk.org

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/bpf/bpf_pkt.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
index 6e8248f0d6..ed63e00219 100644
--- a/lib/bpf/bpf_pkt.c
+++ b/lib/bpf/bpf_pkt.c
@@ -111,9 +111,9 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
  * Waits till datapath finished using given callback.
  */
 static void
-bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
+bpf_eth_cbi_wait(struct bpf_eth_cbi *const cbi)
 {
-	uint32_t nuse, puse;
+	uint32_t puse;
 
 	/* make sure all previous loads and stores are completed */
 	rte_smp_mb();
@@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
 
 	/* in use, busy wait till current RX/TX iteration is finished */
 	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
-		do {
-			rte_pause();
-			rte_compiler_barrier();
-			nuse = cbi->use;
-		} while (nuse == puse);
+		rte_compiler_barrier();
+		rte_wait_until_unequal_32(&cbi->use, puse, __ATOMIC_RELAXED);
 	}
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v1 5/5] lib/distributor: use wait until scheme
  2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
                   ` (3 preceding siblings ...)
  2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 4/5] lib/bpf: use wait until scheme for Rx/Tx iteration Feifei Wang
@ 2021-09-02  5:32 ` Feifei Wang
  2021-09-02 15:22 ` [dpdk-dev] [RFC PATCH v1 0/5] add new API for " Stephen Hemminger
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-02  5:32 UTC (permalink / raw)
  To: David Hunt; +Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for bufptr64 to be updated, use
rte_wait_until_part_equal for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/distributor/rte_distributor_single.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index f4725b1d0b..95de42f41a 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -33,9 +33,8 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_GET_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_until_part_equal_64((volatile uint64_t *)&buf->bufptr64,
+			RTE_DISTRIB_FLAGS_MASK, 0, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on GET_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
@@ -74,9 +73,8 @@ rte_distributor_return_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_RETURN_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_until_part_equal_64((volatile uint64_t *)&buf->bufptr64,
+			RTE_DISTRIB_FLAGS_MASK, 0, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on RETURN_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme
  2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
                   ` (4 preceding siblings ...)
  2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 5/5] lib/distributor: use wait until scheme Feifei Wang
@ 2021-09-02 15:22 ` Stephen Hemminger
  2021-09-03  7:02   ` [dpdk-dev] 回复: " Feifei Wang
  2021-09-23  9:58 ` [dpdk-dev] [RFC PATCH v2 0/5] add new definitions for wait scheme Feifei Wang
                   ` (6 subsequent siblings)
  12 siblings, 1 reply; 113+ messages in thread
From: Stephen Hemminger @ 2021-09-02 15:22 UTC (permalink / raw)
  To: Feifei Wang; +Cc: dev, nd

On Thu,  2 Sep 2021 13:32:48 +0800
Feifei Wang <feifei.wang2@arm.com> wrote:

> Add new API for wait_until scheme, and apply this new API into lib to
> replace rte_pause.
> 
> Feifei Wang (5):
>   eal: add new API for wait until scheme
>   eal: use wait until scheme for read pflock
>   eal: use wait until scheme for mcslock
>   lib/bpf: use wait until scheme for Rx/Tx iteration
>   lib/distributor: use wait until scheme
> 
>  lib/bpf/bpf_pkt.c                        |  11 +-
>  lib/distributor/rte_distributor_single.c |  10 +-
>  lib/eal/arm/include/rte_pause_64.h       | 271 ++++++++++++++++----
>  lib/eal/include/generic/rte_mcslock.h    |   9 +-
>  lib/eal/include/generic/rte_pause.h      | 309 +++++++++++++++++++++++
>  lib/eal/include/generic/rte_pflock.h     |   5 +-
>  6 files changed, 543 insertions(+), 72 deletions(-)
> 

Since these are all inline, would it be possible to make
this a macro and have the caller pass a condition function?

Look at Linux wait_event() for an example of that.

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复:  [RFC PATCH v1 0/5] add new API for wait until scheme
  2021-09-02 15:22 ` [dpdk-dev] [RFC PATCH v1 0/5] add new API for " Stephen Hemminger
@ 2021-09-03  7:02   ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-03  7:02 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, nd, nd

Hi, Stephen

Thanks for the reviewing. I think it is a good comment.
According to the comments, we plan to change this API as follow:

#define wait_until_event_16(addr, mask, expected, op, memorder)
	uint16_t value
	__LOAD_EXC_16(addr, value, memorder)
	if ((value&mask) op expected) {
		__SEVL()
		do {
			__WFE()
			__LOAD_EXC_16(addr, value, memorder)
		} while ((value&mask) op expected);

1. According to the size, there will be three definitions: 16/32/64 bits
2. op is defined for a symbol(!= or ==), I'm not sure whether it is legal in dpdk.
3. If the case is not 'wait_part_equal/unequal', mask can be 0xFF. 

Have you any more comments for this change?

Best Regards
Feifei
> -----邮件原件-----
> 发件人: Stephen Hemminger <stephen@networkplumber.org>
> 发送时间: Thursday, September 2, 2021 11:22 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>
> 主题: Re: [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme
> 
> On Thu,  2 Sep 2021 13:32:48 +0800
> Feifei Wang <feifei.wang2@arm.com> wrote:
> 
> > Add new API for wait_until scheme, and apply this new API into lib to
> > replace rte_pause.
> >
> > Feifei Wang (5):
> >   eal: add new API for wait until scheme
> >   eal: use wait until scheme for read pflock
> >   eal: use wait until scheme for mcslock
> >   lib/bpf: use wait until scheme for Rx/Tx iteration
> >   lib/distributor: use wait until scheme
> >
> >  lib/bpf/bpf_pkt.c                        |  11 +-
> >  lib/distributor/rte_distributor_single.c |  10 +-
> >  lib/eal/arm/include/rte_pause_64.h       | 271 ++++++++++++++++----
> >  lib/eal/include/generic/rte_mcslock.h    |   9 +-
> >  lib/eal/include/generic/rte_pause.h      | 309 +++++++++++++++++++++++
> >  lib/eal/include/generic/rte_pflock.h     |   5 +-
> >  6 files changed, 543 insertions(+), 72 deletions(-)
> >
> 
> Since these are all inline, would it be possible to make this a macro and have
> the caller pass a condition function?
> 
> Look at Linux wait_event() for an example of that.

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v2 0/5] add new definitions for wait scheme
  2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
                   ` (5 preceding siblings ...)
  2021-09-02 15:22 ` [dpdk-dev] [RFC PATCH v1 0/5] add new API for " Stephen Hemminger
@ 2021-09-23  9:58 ` Feifei Wang
  2021-09-23  9:58   ` [dpdk-dev] [RFC PATCH v2 1/5] eal: " Feifei Wang
                     ` (4 more replies)
  2021-09-26  6:32 ` [dpdk-dev] [RFC PATCH v3 0/5] add new definitions for wait scheme Feifei Wang
                   ` (5 subsequent siblings)
  12 siblings, 5 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-23  9:58 UTC (permalink / raw)
  Cc: dev, nd, Feifei Wang

Add new definitions for wait scheme, and apply this new definitions into
lib to replace rte_pause.

v2:
1. use macro to create new wait scheme (Stephen)

Feifei Wang (5):
  eal: add new definitions for wait scheme
  eal: use wait event for read pflock
  eal: use wait event scheme for mcslock
  lib/bpf: use wait event scheme for Rx/Tx iteration
  lib/distributor: use wait event scheme

 lib/bpf/bpf_pkt.c                        |  11 +-
 lib/distributor/rte_distributor_single.c |  10 +-
 lib/eal/arm/include/rte_pause_64.h       | 151 +++++++++++++++--------
 lib/eal/include/generic/rte_mcslock.h    |  12 +-
 lib/eal/include/generic/rte_pause.h      |  78 ++++++++++++
 lib/eal/include/generic/rte_pflock.h     |   4 +-
 6 files changed, 192 insertions(+), 74 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v2 1/5] eal: add new definitions for wait scheme
  2021-09-23  9:58 ` [dpdk-dev] [RFC PATCH v2 0/5] add new definitions for wait scheme Feifei Wang
@ 2021-09-23  9:58   ` Feifei Wang
  2021-09-23  9:58   ` [dpdk-dev] [RFC PATCH v2 2/5] eal: use wait event for read pflock Feifei Wang
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-23  9:58 UTC (permalink / raw)
  To: Ruifeng Wang; +Cc: dev, nd, Feifei Wang

Introduce macros as generic interface for address monitoring.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/arm/include/rte_pause_64.h  | 151 ++++++++++++++++++----------
 lib/eal/include/generic/rte_pause.h |  78 ++++++++++++++
 2 files changed, 175 insertions(+), 54 deletions(-)

diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index e87d10b8cc..205510e044 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -31,20 +31,12 @@ static inline void rte_pause(void)
 /* Put processor into low power WFE(Wait For Event) state. */
 #define __WFE() { asm volatile("wfe" : : : "memory"); }
 
-static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
-{
-	uint16_t value;
-
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 16-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
+/*
+ * Atomic exclusive load from addr, it returns the 16-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
 #define __LOAD_EXC_16(src, dst, memorder) {               \
 	if (memorder == __ATOMIC_RELAXED) {               \
 		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
@@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			: "memory");                      \
 	} }
 
+/*
+ * Atomic exclusive load from addr, it returns the 32-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_32(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+/*
+ * Atomic exclusive load from addr, it returns the 64-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_64(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+		int memorder)
+{
+	uint16_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
 	__LOAD_EXC_16(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			__LOAD_EXC_16(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_16
 }
 
 static __rte_always_inline void
@@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 32-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_32(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_32(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 			__LOAD_EXC_32(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_32
 }
 
 static __rte_always_inline void
@@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 64-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_64(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_64(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -143,6 +141,51 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 		} while (value != expected);
 	}
 }
+
+#define rte_wait_event_16(addr, mask, expected, cond, memorder)                \
+do {									       \
+	uint16_t value                                                         \
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
+	__LOAD_EXC_16(addr, value, memorder)				       \
+	if ((value & mask) cond expected) {				       \
+		__SEVL()						       \
+		do {							       \
+			__WFE()						       \
+			__LOAD_EXC_16(addr, value, memorder)		       \
+		} while ((value & mask) cond expected);			       \
+	}								       \
+} while (0)
+
+#define rte_wait_event_32(addr, mask, expected, cond, memorder)                \
+do {                                                                           \
+	uint32_t value                                                         \
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
+	__LOAD_EXC_32(addr, value, memorder)                                   \
+	if ((value & mask) op expected) {                                      \
+		__SEVL()                                                       \
+		do {                                                           \
+			__WFE()                                                \
+			__LOAD_EXC_32(addr, value, memorder)                   \
+		} while ((value & mask) cond expected);                        \
+	}                                                                      \
+} while (0)
+
+#define rte_wait_event_64(addr, mask, expected, cond, memorder)                \
+do {                                                                           \
+	uint64_t value                                                         \
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
+	__LOAD_EXC_64(addr, value, memorder)                                   \
+	if ((value & mask) cond expected) {                                    \
+		__SEVL()                                                       \
+		do {                                                           \
+			__WFE()                                                \
+			__LOAD_EXC_64(addr, value, memorder)                   \
+		} while ((value & mask) cond expected);                        \
+	}                                                                      \
+} while (0)
+
+#undef __LOAD_EXC_16
+#undef __LOAD_EXC_32
 #undef __LOAD_EXC_64
 
 #undef __SEVL
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index 668ee4a184..4e32107eca 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 	while (__atomic_load_n(addr, memorder) != expected)
 		rte_pause();
 }
+
+/*
+ * Wait until a 16-bit *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest
+ * @param expected
+ *  A 16-bit expected value to be in the memory location.
+ * @param cond
+ *  A symbol representing the condition (==, !=).
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event_16(addr, mask, expected, cond, memorder)		       \
+do {									       \
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
+									       \
+	while ((__atomic_load_n(addr, memorder) & mask) cond expected)	       \
+		rte_pause();						       \
+} while (0)
+
+/*
+ * Wait until a 32-bit *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest.
+ * @param expected
+ *  A 32-bit expected value to be in the memory location.
+ * @param cond
+ *  A symbol representing the condition (==, !=).
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event_32(addr, mask, expected, cond, memorder)		       \
+do {									       \
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
+									       \
+	while ((__atomic_load_n(addr, memorder) & mask) cond expected)	       \
+		rte_pause();						       \
+} while (0)
+
+/*
+ * Wait until a 64-bit *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest
+ * @param expected
+ *  A 64-bit expected value to be in the memory location.
+ * @param cond
+ *  A symbol representing the condition (==, !=).
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event_64(addr, mask, expected, cond, memorder)		       \
+do {									       \
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
+									       \
+	while ((__atomic_load_n(addr, memorder) & mask) cond expected)	       \
+		rte_pause();						       \
+} while (0)
 #endif
 
 #endif /* _RTE_PAUSE_H_ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v2 2/5] eal: use wait event for read pflock
  2021-09-23  9:58 ` [dpdk-dev] [RFC PATCH v2 0/5] add new definitions for wait scheme Feifei Wang
  2021-09-23  9:58   ` [dpdk-dev] [RFC PATCH v2 1/5] eal: " Feifei Wang
@ 2021-09-23  9:58   ` Feifei Wang
  2021-09-23  9:59   ` [dpdk-dev] [RFC PATCH v2 3/5] eal: use wait event scheme for mcslock Feifei Wang
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-23  9:58 UTC (permalink / raw)
  Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for read pflock update, use wait event scheme for
this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_pflock.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/eal/include/generic/rte_pflock.h b/lib/eal/include/generic/rte_pflock.h
index e57c179ef2..9865f1349c 100644
--- a/lib/eal/include/generic/rte_pflock.h
+++ b/lib/eal/include/generic/rte_pflock.h
@@ -121,9 +121,7 @@ rte_pflock_read_lock(rte_pflock_t *pf)
 		return;
 
 	/* Wait for current write phase to complete. */
-	while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE)
-		& RTE_PFLOCK_WBITS) == w)
-		rte_pause();
+	rte_wait_event_16(&pf->rd.in, RTE_PFLOCK_WBITS, w, ==, __ATOMIC_ACQUIRE);
 }
 
 /**
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v2 3/5] eal: use wait event scheme for mcslock
  2021-09-23  9:58 ` [dpdk-dev] [RFC PATCH v2 0/5] add new definitions for wait scheme Feifei Wang
  2021-09-23  9:58   ` [dpdk-dev] [RFC PATCH v2 1/5] eal: " Feifei Wang
  2021-09-23  9:58   ` [dpdk-dev] [RFC PATCH v2 2/5] eal: use wait event for read pflock Feifei Wang
@ 2021-09-23  9:59   ` Feifei Wang
  2021-09-23  9:59   ` [dpdk-dev] [RFC PATCH v2 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
  2021-09-23  9:59   ` [dpdk-dev] [RFC PATCH v2 5/5] lib/distributor: use wait event scheme Feifei Wang
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-23  9:59 UTC (permalink / raw)
  To: Honnappa Nagarahalli; +Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for mcslock to be updated, use wait event scheme
for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_mcslock.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/lib/eal/include/generic/rte_mcslock.h b/lib/eal/include/generic/rte_mcslock.h
index 9f323bd2a2..264c04021f 100644
--- a/lib/eal/include/generic/rte_mcslock.h
+++ b/lib/eal/include/generic/rte_mcslock.h
@@ -84,8 +84,7 @@ rte_mcslock_lock(rte_mcslock_t **msl, rte_mcslock_t *me)
 	 * to spin on me->locked until the previous lock holder resets
 	 * the me->locked using mcslock_unlock().
 	 */
-	while (__atomic_load_n(&me->locked, __ATOMIC_ACQUIRE))
-		rte_pause();
+	rte_wait_event_32(&me->locked, INT_MAX, 0, !=, __ATOMIC_ACQUIRE);
 }
 
 /**
@@ -117,8 +116,13 @@ rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
 		/* More nodes added to the queue by other CPUs.
 		 * Wait until the next pointer is set.
 		 */
-		while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)
-			rte_pause();
+#ifdef RTE_ARCH_32
+		rte_wait_event_32((volatile uint32_t *)&me->next, UINT_MAX, 0, ==,
+				__ATOMIC_RELAXED);
+#else
+		rte_wait_event_64((volatile uint64_t *)&me->next, ULLONG_MAX, 0, ==,
+				__ATOMIC_RELAXED);
+#endif
 	}
 
 	/* Pass lock to next waiter. */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v2 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-09-23  9:58 ` [dpdk-dev] [RFC PATCH v2 0/5] add new definitions for wait scheme Feifei Wang
                     ` (2 preceding siblings ...)
  2021-09-23  9:59   ` [dpdk-dev] [RFC PATCH v2 3/5] eal: use wait event scheme for mcslock Feifei Wang
@ 2021-09-23  9:59   ` Feifei Wang
  2021-09-24 18:07     ` Ananyev, Konstantin
  2021-09-23  9:59   ` [dpdk-dev] [RFC PATCH v2 5/5] lib/distributor: use wait event scheme Feifei Wang
  4 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-09-23  9:59 UTC (permalink / raw)
  To: Konstantin Ananyev, Ferruh Yigit
  Cc: dev, nd, Feifei Wang, stable, Ruifeng Wang

First, fix the bug that keyword const of func arg should be after "*".
This is because const before "*" means the value of "cbi" should not be
changed. But we should monitor that cbi->use changed and then we can
jump out of loop.

Second, instead of polling for cbi->use to be updated, use
wait event scheme.

Fixes: a93ff62a8938 ("bpf: introduce basic Rx/Tx filters")
Cc: konstantin.ananyev@intel.com
Cc: stable@dpdk.org

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/bpf/bpf_pkt.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
index 6e8248f0d6..08ed8ff68c 100644
--- a/lib/bpf/bpf_pkt.c
+++ b/lib/bpf/bpf_pkt.c
@@ -111,9 +111,9 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
  * Waits till datapath finished using given callback.
  */
 static void
-bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
+bpf_eth_cbi_wait(struct bpf_eth_cbi *const cbi)
 {
-	uint32_t nuse, puse;
+	uint32_t puse;
 
 	/* make sure all previous loads and stores are completed */
 	rte_smp_mb();
@@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
 
 	/* in use, busy wait till current RX/TX iteration is finished */
 	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
-		do {
-			rte_pause();
-			rte_compiler_barrier();
-			nuse = cbi->use;
-		} while (nuse == puse);
+		rte_compiler_barrier();
+		rte_wait_event_32(&cbi->use, UINT_MAX, puse, ==, __ATOMIC_RELAXED);
 	}
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v2 5/5] lib/distributor: use wait event scheme
  2021-09-23  9:58 ` [dpdk-dev] [RFC PATCH v2 0/5] add new definitions for wait scheme Feifei Wang
                     ` (3 preceding siblings ...)
  2021-09-23  9:59   ` [dpdk-dev] [RFC PATCH v2 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
@ 2021-09-23  9:59   ` Feifei Wang
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-23  9:59 UTC (permalink / raw)
  To: David Hunt; +Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for bufptr64 to be updated, use
wait event for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/distributor/rte_distributor_single.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index f4725b1d0b..815305444a 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -33,9 +33,8 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_GET_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event_64((volatile uint64_t *)&buf->bufptr64,
+			RTE_DISTRIB_FLAGS_MASK, 0, !=, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on GET_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
@@ -74,9 +73,8 @@ rte_distributor_return_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_RETURN_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event_64((volatile uint64_t *)&buf->bufptr64,
+			RTE_DISTRIB_FLAGS_MASK, 0, !=, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on RETURN_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v2 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-09-23  9:59   ` [dpdk-dev] [RFC PATCH v2 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
@ 2021-09-24 18:07     ` Ananyev, Konstantin
  2021-09-26  2:19       ` [dpdk-dev] 回复: " Feifei Wang
  0 siblings, 1 reply; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-09-24 18:07 UTC (permalink / raw)
  To: Feifei Wang, Yigit, Ferruh; +Cc: dev, nd, stable, Ruifeng Wang


> 
> First, fix the bug that keyword const of func arg should be after "*".

I believe there is no bug here.

> This is because const before "*" means the value of "cbi" should not be
> changed. 

Exactly, it says that the function itself will not change the value of "cbi".
It just waits for the value to be changed by someone else.
So please keep parameter list intact.

> But we should monitor that cbi->use changed and then we can
> jump out of loop.
> 
> Second, instead of polling for cbi->use to be updated, use
> wait event scheme.
> 
> Fixes: a93ff62a8938 ("bpf: introduce basic Rx/Tx filters")
> Cc: konstantin.ananyev@intel.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  lib/bpf/bpf_pkt.c | 11 ++++-------
>  1 file changed, 4 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
> index 6e8248f0d6..08ed8ff68c 100644
> --- a/lib/bpf/bpf_pkt.c
> +++ b/lib/bpf/bpf_pkt.c
> @@ -111,9 +111,9 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
>   * Waits till datapath finished using given callback.
>   */
>  static void
> -bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> +bpf_eth_cbi_wait(struct bpf_eth_cbi *const cbi)
>  {
> -	uint32_t nuse, puse;
> +	uint32_t puse;
> 
>  	/* make sure all previous loads and stores are completed */
>  	rte_smp_mb();
> @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> 
>  	/* in use, busy wait till current RX/TX iteration is finished */
>  	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
> -		do {
> -			rte_pause();
> -			rte_compiler_barrier();
> -			nuse = cbi->use;
> -		} while (nuse == puse);
> +		rte_compiler_barrier();
> +		rte_wait_event_32(&cbi->use, UINT_MAX, puse, ==, __ATOMIC_RELAXED);
>  	}
>  }
> 
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [RFC PATCH v2 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-09-24 18:07     ` Ananyev, Konstantin
@ 2021-09-26  2:19       ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-26  2:19 UTC (permalink / raw)
  To: Ananyev, Konstantin, Yigit, Ferruh; +Cc: dev, nd, stable, Ruifeng Wang, nd


> -----邮件原件-----
> 发件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 发送时间: Saturday, September 25, 2021 2:08 AM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Yigit, Ferruh
> <ferruh.yigit@intel.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; stable@dpdk.org; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 主题: RE: [RFC PATCH v2 4/5] lib/bpf: use wait event scheme for Rx/Tx
> iteration
> 
> 
> >
> > First, fix the bug that keyword const of func arg should be after "*".
> 
> I believe there is no bug here.
> 
> > This is because const before "*" means the value of "cbi" should not
> > be changed.
> 
> Exactly, it says that the function itself will not change the value of "cbi".
> It just waits for the value to be changed by someone else.
> So please keep parameter list intact.

Thanks for your explanation. The reason I changed is that I ever used rte_wait_until_xx(validate *addr) API here,
And there is conflict between "const" and "validate", complier will report warning here.
But now I think since I keep it as it is, there will be no warning due to new macro has no "validate".
I will delete this unnecessary bug fix.
> 
> > But we should monitor that cbi->use changed and then we can jump out
> > of loop.
> >
> > Second, instead of polling for cbi->use to be updated, use wait event
> > scheme.
> >
> > Fixes: a93ff62a8938 ("bpf: introduce basic Rx/Tx filters")
> > Cc: konstantin.ananyev@intel.com
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >  lib/bpf/bpf_pkt.c | 11 ++++-------
> >  1 file changed, 4 insertions(+), 7 deletions(-)
> >
> > diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c index
> > 6e8248f0d6..08ed8ff68c 100644
> > --- a/lib/bpf/bpf_pkt.c
> > +++ b/lib/bpf/bpf_pkt.c
> > @@ -111,9 +111,9 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
> >   * Waits till datapath finished using given callback.
> >   */
> >  static void
> > -bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> > +bpf_eth_cbi_wait(struct bpf_eth_cbi *const cbi)
> >  {
> > -	uint32_t nuse, puse;
> > +	uint32_t puse;
> >
> >  	/* make sure all previous loads and stores are completed */
> >  	rte_smp_mb();
> > @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> >
> >  	/* in use, busy wait till current RX/TX iteration is finished */
> >  	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
> > -		do {
> > -			rte_pause();
> > -			rte_compiler_barrier();
> > -			nuse = cbi->use;
> > -		} while (nuse == puse);
> > +		rte_compiler_barrier();
> > +		rte_wait_event_32(&cbi->use, UINT_MAX, puse, ==,
> __ATOMIC_RELAXED);
> >  	}
> >  }
> >
> > --
> > 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v3 0/5] add new definitions for wait scheme
  2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
                   ` (6 preceding siblings ...)
  2021-09-23  9:58 ` [dpdk-dev] [RFC PATCH v2 0/5] add new definitions for wait scheme Feifei Wang
@ 2021-09-26  6:32 ` Feifei Wang
  2021-09-26  6:32   ` [dpdk-dev] [RFC PATCH v3 1/5] eal: " Feifei Wang
                     ` (5 more replies)
  2021-10-26  8:01 ` [dpdk-dev] [PATCH v5 0/5] add new definitions for wait scheme Feifei Wang
                   ` (4 subsequent siblings)
  12 siblings, 6 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-26  6:32 UTC (permalink / raw)
  Cc: dev, nd, Feifei Wang

Add new definitions for wait scheme, and apply this new definitions into
lib to replace rte_pause.

v2:
1. use macro to create new wait scheme (Stephen)

v3:
1. delete unnecessary bug fix in bpf (Konstantin)

Feifei Wang (5):
  eal: add new definitions for wait scheme
  eal: use wait event for read pflock
  eal: use wait event scheme for mcslock
  lib/bpf: use wait event scheme for Rx/Tx iteration
  lib/distributor: use wait event scheme

 lib/bpf/bpf_pkt.c                        |   9 +-
 lib/distributor/rte_distributor_single.c |  10 +-
 lib/eal/arm/include/rte_pause_64.h       | 151 +++++++++++++++--------
 lib/eal/include/generic/rte_mcslock.h    |  12 +-
 lib/eal/include/generic/rte_pause.h      |  78 ++++++++++++
 lib/eal/include/generic/rte_pflock.h     |   4 +-
 6 files changed, 191 insertions(+), 73 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait scheme
  2021-09-26  6:32 ` [dpdk-dev] [RFC PATCH v3 0/5] add new definitions for wait scheme Feifei Wang
@ 2021-09-26  6:32   ` Feifei Wang
  2021-10-07 16:18     ` Ananyev, Konstantin
  2021-09-26  6:32   ` [dpdk-dev] [RFC PATCH v3 2/5] eal: use wait event for read pflock Feifei Wang
                     ` (4 subsequent siblings)
  5 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-09-26  6:32 UTC (permalink / raw)
  To: Ruifeng Wang; +Cc: dev, nd, Feifei Wang

Introduce macros as generic interface for address monitoring.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/arm/include/rte_pause_64.h  | 151 ++++++++++++++++++----------
 lib/eal/include/generic/rte_pause.h |  78 ++++++++++++++
 2 files changed, 175 insertions(+), 54 deletions(-)

diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index e87d10b8cc..205510e044 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -31,20 +31,12 @@ static inline void rte_pause(void)
 /* Put processor into low power WFE(Wait For Event) state. */
 #define __WFE() { asm volatile("wfe" : : : "memory"); }
 
-static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
-{
-	uint16_t value;
-
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 16-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
+/*
+ * Atomic exclusive load from addr, it returns the 16-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
 #define __LOAD_EXC_16(src, dst, memorder) {               \
 	if (memorder == __ATOMIC_RELAXED) {               \
 		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
@@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			: "memory");                      \
 	} }
 
+/*
+ * Atomic exclusive load from addr, it returns the 32-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_32(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+/*
+ * Atomic exclusive load from addr, it returns the 64-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_64(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+		int memorder)
+{
+	uint16_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
 	__LOAD_EXC_16(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			__LOAD_EXC_16(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_16
 }
 
 static __rte_always_inline void
@@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 32-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_32(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_32(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 			__LOAD_EXC_32(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_32
 }
 
 static __rte_always_inline void
@@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 64-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_64(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_64(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -143,6 +141,51 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 		} while (value != expected);
 	}
 }
+
+#define rte_wait_event_16(addr, mask, expected, cond, memorder)                \
+do {									       \
+	uint16_t value                                                         \
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
+	__LOAD_EXC_16(addr, value, memorder)				       \
+	if ((value & mask) cond expected) {				       \
+		__SEVL()						       \
+		do {							       \
+			__WFE()						       \
+			__LOAD_EXC_16(addr, value, memorder)		       \
+		} while ((value & mask) cond expected);			       \
+	}								       \
+} while (0)
+
+#define rte_wait_event_32(addr, mask, expected, cond, memorder)                \
+do {                                                                           \
+	uint32_t value                                                         \
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
+	__LOAD_EXC_32(addr, value, memorder)                                   \
+	if ((value & mask) op expected) {                                      \
+		__SEVL()                                                       \
+		do {                                                           \
+			__WFE()                                                \
+			__LOAD_EXC_32(addr, value, memorder)                   \
+		} while ((value & mask) cond expected);                        \
+	}                                                                      \
+} while (0)
+
+#define rte_wait_event_64(addr, mask, expected, cond, memorder)                \
+do {                                                                           \
+	uint64_t value                                                         \
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
+	__LOAD_EXC_64(addr, value, memorder)                                   \
+	if ((value & mask) cond expected) {                                    \
+		__SEVL()                                                       \
+		do {                                                           \
+			__WFE()                                                \
+			__LOAD_EXC_64(addr, value, memorder)                   \
+		} while ((value & mask) cond expected);                        \
+	}                                                                      \
+} while (0)
+
+#undef __LOAD_EXC_16
+#undef __LOAD_EXC_32
 #undef __LOAD_EXC_64
 
 #undef __SEVL
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index 668ee4a184..4e32107eca 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 	while (__atomic_load_n(addr, memorder) != expected)
 		rte_pause();
 }
+
+/*
+ * Wait until a 16-bit *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest
+ * @param expected
+ *  A 16-bit expected value to be in the memory location.
+ * @param cond
+ *  A symbol representing the condition (==, !=).
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event_16(addr, mask, expected, cond, memorder)		       \
+do {									       \
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
+									       \
+	while ((__atomic_load_n(addr, memorder) & mask) cond expected)	       \
+		rte_pause();						       \
+} while (0)
+
+/*
+ * Wait until a 32-bit *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest.
+ * @param expected
+ *  A 32-bit expected value to be in the memory location.
+ * @param cond
+ *  A symbol representing the condition (==, !=).
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event_32(addr, mask, expected, cond, memorder)		       \
+do {									       \
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
+									       \
+	while ((__atomic_load_n(addr, memorder) & mask) cond expected)	       \
+		rte_pause();						       \
+} while (0)
+
+/*
+ * Wait until a 64-bit *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest
+ * @param expected
+ *  A 64-bit expected value to be in the memory location.
+ * @param cond
+ *  A symbol representing the condition (==, !=).
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event_64(addr, mask, expected, cond, memorder)		       \
+do {									       \
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
+									       \
+	while ((__atomic_load_n(addr, memorder) & mask) cond expected)	       \
+		rte_pause();						       \
+} while (0)
 #endif
 
 #endif /* _RTE_PAUSE_H_ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v3 2/5] eal: use wait event for read pflock
  2021-09-26  6:32 ` [dpdk-dev] [RFC PATCH v3 0/5] add new definitions for wait scheme Feifei Wang
  2021-09-26  6:32   ` [dpdk-dev] [RFC PATCH v3 1/5] eal: " Feifei Wang
@ 2021-09-26  6:32   ` Feifei Wang
  2021-09-26  6:33   ` [dpdk-dev] [RFC PATCH v3 3/5] eal: use wait event scheme for mcslock Feifei Wang
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-26  6:32 UTC (permalink / raw)
  Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for read pflock update, use wait event scheme for
this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_pflock.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/eal/include/generic/rte_pflock.h b/lib/eal/include/generic/rte_pflock.h
index e57c179ef2..9865f1349c 100644
--- a/lib/eal/include/generic/rte_pflock.h
+++ b/lib/eal/include/generic/rte_pflock.h
@@ -121,9 +121,7 @@ rte_pflock_read_lock(rte_pflock_t *pf)
 		return;
 
 	/* Wait for current write phase to complete. */
-	while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE)
-		& RTE_PFLOCK_WBITS) == w)
-		rte_pause();
+	rte_wait_event_16(&pf->rd.in, RTE_PFLOCK_WBITS, w, ==, __ATOMIC_ACQUIRE);
 }
 
 /**
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v3 3/5] eal: use wait event scheme for mcslock
  2021-09-26  6:32 ` [dpdk-dev] [RFC PATCH v3 0/5] add new definitions for wait scheme Feifei Wang
  2021-09-26  6:32   ` [dpdk-dev] [RFC PATCH v3 1/5] eal: " Feifei Wang
  2021-09-26  6:32   ` [dpdk-dev] [RFC PATCH v3 2/5] eal: use wait event for read pflock Feifei Wang
@ 2021-09-26  6:33   ` Feifei Wang
  2021-09-26  6:33   ` [dpdk-dev] [RFC PATCH v3 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-26  6:33 UTC (permalink / raw)
  To: Honnappa Nagarahalli; +Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for mcslock to be updated, use wait event scheme
for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_mcslock.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/lib/eal/include/generic/rte_mcslock.h b/lib/eal/include/generic/rte_mcslock.h
index 9f323bd2a2..c8d1c4f38f 100644
--- a/lib/eal/include/generic/rte_mcslock.h
+++ b/lib/eal/include/generic/rte_mcslock.h
@@ -84,8 +84,7 @@ rte_mcslock_lock(rte_mcslock_t **msl, rte_mcslock_t *me)
 	 * to spin on me->locked until the previous lock holder resets
 	 * the me->locked using mcslock_unlock().
 	 */
-	while (__atomic_load_n(&me->locked, __ATOMIC_ACQUIRE))
-		rte_pause();
+	rte_wait_event_32(&me->locked, INT_MAX, 0, !=, __ATOMIC_ACQUIRE);
 }
 
 /**
@@ -117,8 +116,13 @@ rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
 		/* More nodes added to the queue by other CPUs.
 		 * Wait until the next pointer is set.
 		 */
-		while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)
-			rte_pause();
+#ifdef RTE_ARCH_32
+		rte_wait_event_32((uint32_t *)&me->next, UINT_MAX, 0, ==,
+				__ATOMIC_RELAXED);
+#else
+		rte_wait_event_64((uint64_t *)&me->next, ULONG_MAX, 0, ==,
+				__ATOMIC_RELAXED);
+#endif
 	}
 
 	/* Pass lock to next waiter. */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v3 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-09-26  6:32 ` [dpdk-dev] [RFC PATCH v3 0/5] add new definitions for wait scheme Feifei Wang
                     ` (2 preceding siblings ...)
  2021-09-26  6:33   ` [dpdk-dev] [RFC PATCH v3 3/5] eal: use wait event scheme for mcslock Feifei Wang
@ 2021-09-26  6:33   ` Feifei Wang
  2021-10-07 15:50     ` Ananyev, Konstantin
  2021-09-26  6:33   ` [dpdk-dev] [RFC PATCH v3 5/5] lib/distributor: use wait event scheme Feifei Wang
  2021-10-20  8:45   ` [dpdk-dev] [PATCH v4 0/5] add new definitions for wait scheme Feifei Wang
  5 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-09-26  6:33 UTC (permalink / raw)
  To: Konstantin Ananyev; +Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for cbi->use to be updated, use wait event scheme.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/bpf/bpf_pkt.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
index 6e8248f0d6..3af15ae97b 100644
--- a/lib/bpf/bpf_pkt.c
+++ b/lib/bpf/bpf_pkt.c
@@ -113,7 +113,7 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
 static void
 bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
 {
-	uint32_t nuse, puse;
+	uint32_t puse;
 
 	/* make sure all previous loads and stores are completed */
 	rte_smp_mb();
@@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
 
 	/* in use, busy wait till current RX/TX iteration is finished */
 	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
-		do {
-			rte_pause();
-			rte_compiler_barrier();
-			nuse = cbi->use;
-		} while (nuse == puse);
+		rte_compiler_barrier();
+		rte_wait_event_32(&cbi->use, UINT_MAX, puse, ==, __ATOMIC_RELAXED);
 	}
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [RFC PATCH v3 5/5] lib/distributor: use wait event scheme
  2021-09-26  6:32 ` [dpdk-dev] [RFC PATCH v3 0/5] add new definitions for wait scheme Feifei Wang
                     ` (3 preceding siblings ...)
  2021-09-26  6:33   ` [dpdk-dev] [RFC PATCH v3 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
@ 2021-09-26  6:33   ` Feifei Wang
  2021-10-20  8:45   ` [dpdk-dev] [PATCH v4 0/5] add new definitions for wait scheme Feifei Wang
  5 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-09-26  6:33 UTC (permalink / raw)
  To: David Hunt; +Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for bufptr64 to be updated, use
wait event for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/distributor/rte_distributor_single.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index f4725b1d0b..86cab349f4 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -33,9 +33,8 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_GET_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event_64(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+			0, !=, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on GET_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
@@ -74,9 +73,8 @@ rte_distributor_return_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_RETURN_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event_64(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+			0, !=, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on RETURN_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v3 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-09-26  6:33   ` [dpdk-dev] [RFC PATCH v3 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
@ 2021-10-07 15:50     ` Ananyev, Konstantin
  2021-10-07 17:40       ` Ananyev, Konstantin
  0 siblings, 1 reply; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-10-07 15:50 UTC (permalink / raw)
  To: Feifei Wang; +Cc: dev, nd, Ruifeng Wang



> 
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  lib/bpf/bpf_pkt.c | 9 +++------
>  1 file changed, 3 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
> index 6e8248f0d6..3af15ae97b 100644
> --- a/lib/bpf/bpf_pkt.c
> +++ b/lib/bpf/bpf_pkt.c
> @@ -113,7 +113,7 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
>  static void
>  bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
>  {
> -	uint32_t nuse, puse;
> +	uint32_t puse;
> 
>  	/* make sure all previous loads and stores are completed */
>  	rte_smp_mb();
> @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> 
>  	/* in use, busy wait till current RX/TX iteration is finished */
>  	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
> -		do {
> -			rte_pause();
> -			rte_compiler_barrier();
> -			nuse = cbi->use;
> -		} while (nuse == puse);
> +		rte_compiler_barrier();
> +		rte_wait_event_32(&cbi->use, UINT_MAX, puse, ==, __ATOMIC_RELAXED);

If we do use atomic load, while we still need a compiler_barrier() here?

>  	}
>  }
> 
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait scheme
  2021-09-26  6:32   ` [dpdk-dev] [RFC PATCH v3 1/5] eal: " Feifei Wang
@ 2021-10-07 16:18     ` Ananyev, Konstantin
  2021-10-12  8:09       ` [dpdk-dev] 回复: " Feifei Wang
  0 siblings, 1 reply; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-10-07 16:18 UTC (permalink / raw)
  To: Feifei Wang, Ruifeng Wang; +Cc: dev, nd


> Introduce macros as generic interface for address monitoring.
> 
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  lib/eal/arm/include/rte_pause_64.h  | 151 ++++++++++++++++++----------
>  lib/eal/include/generic/rte_pause.h |  78 ++++++++++++++
>  2 files changed, 175 insertions(+), 54 deletions(-)
> 
> diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
> index e87d10b8cc..205510e044 100644
> --- a/lib/eal/arm/include/rte_pause_64.h
> +++ b/lib/eal/arm/include/rte_pause_64.h
> @@ -31,20 +31,12 @@ static inline void rte_pause(void)
>  /* Put processor into low power WFE(Wait For Event) state. */
>  #define __WFE() { asm volatile("wfe" : : : "memory"); }
> 
> -static __rte_always_inline void
> -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> -		int memorder)
> -{
> -	uint16_t value;
> -
> -	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> -
> -	/*
> -	 * Atomic exclusive load from addr, it returns the 16-bit content of
> -	 * *addr while making it 'monitored',when it is written by someone
> -	 * else, the 'monitored' state is cleared and a event is generated
> -	 * implicitly to exit WFE.
> -	 */
> +/*
> + * Atomic exclusive load from addr, it returns the 16-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
>  #define __LOAD_EXC_16(src, dst, memorder) {               \
>  	if (memorder == __ATOMIC_RELAXED) {               \
>  		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
> @@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
>  			: "memory");                      \
>  	} }
> 
> +/*
> + * Atomic exclusive load from addr, it returns the 32-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_32(src, dst, memorder) {              \
> +	if (memorder == __ATOMIC_RELAXED) {              \
> +		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} else {                                         \
> +		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} }
> +
> +/*
> + * Atomic exclusive load from addr, it returns the 64-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_64(src, dst, memorder) {              \
> +	if (memorder == __ATOMIC_RELAXED) {              \
> +		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} else {                                         \
> +		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} }
> +
> +static __rte_always_inline void
> +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> +		int memorder)
> +{
> +	uint16_t value;
> +
> +	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> +
>  	__LOAD_EXC_16(addr, value, memorder)
>  	if (value != expected) {
>  		__SEVL()
> @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
>  			__LOAD_EXC_16(addr, value, memorder)
>  		} while (value != expected);
>  	}
> -#undef __LOAD_EXC_16
>  }
> 
>  static __rte_always_inline void
> @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
> 
>  	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> 
> -	/*
> -	 * Atomic exclusive load from addr, it returns the 32-bit content of
> -	 * *addr while making it 'monitored',when it is written by someone
> -	 * else, the 'monitored' state is cleared and a event is generated
> -	 * implicitly to exit WFE.
> -	 */
> -#define __LOAD_EXC_32(src, dst, memorder) {              \
> -	if (memorder == __ATOMIC_RELAXED) {              \
> -		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} else {                                         \
> -		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} }
> -
>  	__LOAD_EXC_32(addr, value, memorder)
>  	if (value != expected) {
>  		__SEVL()
> @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
>  			__LOAD_EXC_32(addr, value, memorder)
>  		} while (value != expected);
>  	}
> -#undef __LOAD_EXC_32
>  }
> 
>  static __rte_always_inline void
> @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
> 
>  	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> 
> -	/*
> -	 * Atomic exclusive load from addr, it returns the 64-bit content of
> -	 * *addr while making it 'monitored',when it is written by someone
> -	 * else, the 'monitored' state is cleared and a event is generated
> -	 * implicitly to exit WFE.
> -	 */
> -#define __LOAD_EXC_64(src, dst, memorder) {              \
> -	if (memorder == __ATOMIC_RELAXED) {              \
> -		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} else {                                         \
> -		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} }
> -
>  	__LOAD_EXC_64(addr, value, memorder)
>  	if (value != expected) {
>  		__SEVL()
> @@ -143,6 +141,51 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>  		} while (value != expected);
>  	}
>  }
> +
> +#define rte_wait_event_16(addr, mask, expected, cond, memorder)                \
> +do {									       \
> +	uint16_t value                                                         \
> +	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
> +	__LOAD_EXC_16(addr, value, memorder)				       \
> +	if ((value & mask) cond expected) {				       \
> +		__SEVL()						       \
> +		do {							       \
> +			__WFE()						       \
> +			__LOAD_EXC_16(addr, value, memorder)		       \
> +		} while ((value & mask) cond expected);			       \
> +	}								       \
> +} while (0)
> +
> +#define rte_wait_event_32(addr, mask, expected, cond, memorder)                \
> +do {                                                                           \
> +	uint32_t value                                                         \
> +	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
> +	__LOAD_EXC_32(addr, value, memorder)                                   \
> +	if ((value & mask) op expected) {                                      \
> +		__SEVL()                                                       \
> +		do {                                                           \
> +			__WFE()                                                \
> +			__LOAD_EXC_32(addr, value, memorder)                   \
> +		} while ((value & mask) cond expected);                        \
> +	}                                                                      \
> +} while (0)
> +
> +#define rte_wait_event_64(addr, mask, expected, cond, memorder)                \
> +do {                                                                           \
> +	uint64_t value                                                         \
> +	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
> +	__LOAD_EXC_64(addr, value, memorder)                                   \
> +	if ((value & mask) cond expected) {                                    \
> +		__SEVL()                                                       \
> +		do {                                                           \
> +			__WFE()                                                \
> +			__LOAD_EXC_64(addr, value, memorder)                   \
> +		} while ((value & mask) cond expected);                        \
> +	}                                                                      \
> +} while (0)
> +
> +#undef __LOAD_EXC_16
> +#undef __LOAD_EXC_32
>  #undef __LOAD_EXC_64
> 
>  #undef __SEVL
> diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
> index 668ee4a184..4e32107eca 100644
> --- a/lib/eal/include/generic/rte_pause.h
> +++ b/lib/eal/include/generic/rte_pause.h
> @@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>  	while (__atomic_load_n(addr, memorder) != expected)
>  		rte_pause();
>  }
> +
> +/*
> + * Wait until a 16-bit *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + *  A pointer to the memory location.
> + * @param mask
> + *  A mask of value bits in interest
> + * @param expected
> + *  A 16-bit expected value to be in the memory location.
> + * @param cond
> + *  A symbol representing the condition (==, !=).
> + * @param memorder
> + *  Two different memory orders that can be specified:
> + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  C++11 memory orders with the same names, see the C++11 standard or
> + *  the GCC wiki on atomic synchronization for detailed definition.
> + */

Hmm, so now we have 2 APIs doing similar thing:
rte_wait_until_equal_n() and rte_wait_event_n().
Can we probably unite them somehow?
At least make rte_wait_until_equal_n() to use rte_wait_event_n() underneath.

> +#define rte_wait_event_16(addr, mask, expected, cond, memorder)		       \
> +do {									       \
> +	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \

And why user is not allowed to use __ATOMIC_SEQ_CST here?
BTW, if we expect memorder to always be a constant, might be better BUILD_BUG_ON()?

> +									       \
> +	while ((__atomic_load_n(addr, memorder) & mask) cond expected)	       \
> +		rte_pause();						       \
> +} while (0)

Two thoughts with these macros:
1. It is a goof practise to put () around macro parameters in the macro body.
Will save from a lot of unexpected troubles.
2. I think these 3 macros can be united into one.
Something like:

#define rte_wait_event(addr, mask, expected, cond, memorder) do {\
        typeof (*(addr)) val = __atomic_load_n((addr), (memorder)); \
        if ((val & (typeof(val))(mask)) cond (typeof(val))(expected)) \
                break; \
        rte_pause(); \
} while (1);


> +
> +/*
> + * Wait until a 32-bit *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + *  A pointer to the memory location.
> + * @param mask
> + *  A mask of value bits in interest.
> + * @param expected
> + *  A 32-bit expected value to be in the memory location.
> + * @param cond
> + *  A symbol representing the condition (==, !=).
> + * @param memorder
> + *  Two different memory orders that can be specified:
> + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  C++11 memory orders with the same names, see the C++11 standard or
> + *  the GCC wiki on atomic synchronization for detailed definition.
> + */
> +#define rte_wait_event_32(addr, mask, expected, cond, memorder)		       \
> +do {									       \
> +	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
> +									       \
> +	while ((__atomic_load_n(addr, memorder) & mask) cond expected)	       \
> +		rte_pause();						       \
> +} while (0)
> +
> +/*
> + * Wait until a 64-bit *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + *  A pointer to the memory location.
> + * @param mask
> + *  A mask of value bits in interest
> + * @param expected
> + *  A 64-bit expected value to be in the memory location.
> + * @param cond
> + *  A symbol representing the condition (==, !=).
> + * @param memorder
> + *  Two different memory orders that can be specified:
> + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  C++11 memory orders with the same names, see the C++11 standard or
> + *  the GCC wiki on atomic synchronization for detailed definition.
> + */
> +#define rte_wait_event_64(addr, mask, expected, cond, memorder)		       \
> +do {									       \
> +	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
> +									       \
> +	while ((__atomic_load_n(addr, memorder) & mask) cond expected)	       \
> +		rte_pause();						       \
> +} while (0)
>  #endif
> 
>  #endif /* _RTE_PAUSE_H_ */
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v3 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-07 15:50     ` Ananyev, Konstantin
@ 2021-10-07 17:40       ` Ananyev, Konstantin
  2021-10-20  6:20         ` [dpdk-dev] 回复: " Feifei Wang
  0 siblings, 1 reply; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-10-07 17:40 UTC (permalink / raw)
  To: Feifei Wang; +Cc: dev, nd, Ruifeng Wang



> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Thursday, October 7, 2021 4:50 PM
> To: Feifei Wang <feifei.wang2@arm.com>
> Cc: dev@dpdk.org; nd@arm.com; Ruifeng Wang <ruifeng.wang@arm.com>
> Subject: RE: [RFC PATCH v3 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
> 
> 
> 
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >  lib/bpf/bpf_pkt.c | 9 +++------
> >  1 file changed, 3 insertions(+), 6 deletions(-)
> >
> > diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
> > index 6e8248f0d6..3af15ae97b 100644
> > --- a/lib/bpf/bpf_pkt.c
> > +++ b/lib/bpf/bpf_pkt.c
> > @@ -113,7 +113,7 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
> >  static void
> >  bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> >  {
> > -	uint32_t nuse, puse;
> > +	uint32_t puse;
> >
> >  	/* make sure all previous loads and stores are completed */
> >  	rte_smp_mb();
> > @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> >
> >  	/* in use, busy wait till current RX/TX iteration is finished */
> >  	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
> > -		do {
> > -			rte_pause();
> > -			rte_compiler_barrier();
> > -			nuse = cbi->use;
> > -		} while (nuse == puse);
> > +		rte_compiler_barrier();
> > +		rte_wait_event_32(&cbi->use, UINT_MAX, puse, ==, __ATOMIC_RELAXED);

Probably UINT32_MAX will be a bit better here.

> 
> If we do use atomic load, while we still need a compiler_barrier() here?
> 
> >  	}
> >  }
> >
> > --
> > 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复:  [RFC PATCH v3 1/5] eal: add new definitions for wait scheme
  2021-10-07 16:18     ` Ananyev, Konstantin
@ 2021-10-12  8:09       ` Feifei Wang
  2021-10-13 15:03         ` [dpdk-dev] " Ananyev, Konstantin
  0 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-10-12  8:09 UTC (permalink / raw)
  To: Ananyev, Konstantin, Ruifeng Wang; +Cc: dev, nd, nd

> -----邮件原件-----
> 发件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 发送时间: Friday, October 8, 2021 12:19 AM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>
> 主题: RE: [dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait
> scheme

[snip]

> > diff --git a/lib/eal/include/generic/rte_pause.h
> > b/lib/eal/include/generic/rte_pause.h
> > index 668ee4a184..4e32107eca 100644
> > --- a/lib/eal/include/generic/rte_pause.h
> > +++ b/lib/eal/include/generic/rte_pause.h
> > @@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> >  	while (__atomic_load_n(addr, memorder) != expected)
> >  		rte_pause();
> >  }
> > +
> > +/*
> > + * Wait until a 16-bit *addr breaks the condition, with a relaxed
> > +memory
> > + * ordering model meaning the loads around this API can be reordered.
> > + *
> > + * @param addr
> > + *  A pointer to the memory location.
> > + * @param mask
> > + *  A mask of value bits in interest
> > + * @param expected
> > + *  A 16-bit expected value to be in the memory location.
> > + * @param cond
> > + *  A symbol representing the condition (==, !=).
> > + * @param memorder
> > + *  Two different memory orders that can be specified:
> > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > + *  C++11 memory orders with the same names, see the C++11 standard
> > +or
> > + *  the GCC wiki on atomic synchronization for detailed definition.
> > + */
> 
> Hmm, so now we have 2 APIs doing similar thing:
> rte_wait_until_equal_n() and rte_wait_event_n().
> Can we probably unite them somehow?
> At least make rte_wait_until_equal_n() to use rte_wait_event_n() underneath.
> 
You are right. We plan to change rte_wait_until_equal API after this new scheme
is achieved.  And then, we will merge wait_unil into wait_event definition in the next new
patch series.
 
> > +#define rte_wait_event_16(addr, mask, expected, cond, memorder)
> 		       \
> > +do {									       \
> > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > +__ATOMIC_RELAXED);  \
> 
> And why user is not allowed to use __ATOMIC_SEQ_CST here?
Actually this is just a load operation, and acquire here is enough to make sure 'load
addr value' can be before other operations.
 
> BTW, if we expect memorder to always be a constant, might be better
> BUILD_BUG_ON()?
If I understand correctly, you means we can replace 'assert' by 'build_bug_on':
RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder !=__ATOMIC_RELAXED);  

> 
> > +									       \
> > +	while ((__atomic_load_n(addr, memorder) & mask) cond expected)
> 	       \
> > +		rte_pause();						       \
> > +} while (0)
> 
> Two thoughts with these macros:
> 1. It is a goof practise to put () around macro parameters in the macro body.
> Will save from a lot of unexpected troubles.
> 2. I think these 3 macros can be united into one.
> Something like:
> 
> #define rte_wait_event(addr, mask, expected, cond, memorder) do {\
>         typeof (*(addr)) val = __atomic_load_n((addr), (memorder)); \
>         if ((val & (typeof(val))(mask)) cond (typeof(val))(expected)) \
>                 break; \
>         rte_pause(); \
> } while (1);
For this point, I think it is due to different size need to use different assembly instructions
in arm architecture. For example,
load 16 bits instruction is "ldxrh %w[tmp], [%x[addr]"
load 32 bits instruction is " ldxr %w[tmp], [%x[addr]" 
load 64 bits instruction is " ldxr %x[tmp], [%x[addr] "
And for consistency, we also use 3 APIs in generic path.
> 
> 
> > +
> > +/*
> > + * Wait until a 32-bit *addr breaks the condition, with a relaxed
> > +memory
> > + * ordering model meaning the loads around this API can be reordered.
> > + *
> > + * @param addr
> > + *  A pointer to the memory location.
> > + * @param mask
> > + *  A mask of value bits in interest.
> > + * @param expected
> > + *  A 32-bit expected value to be in the memory location.
> > + * @param cond
> > + *  A symbol representing the condition (==, !=).
> > + * @param memorder
> > + *  Two different memory orders that can be specified:
> > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > + *  C++11 memory orders with the same names, see the C++11 standard
> > +or
> > + *  the GCC wiki on atomic synchronization for detailed definition.
> > + */
> > +#define rte_wait_event_32(addr, mask, expected, cond, memorder)
> 		       \
> > +do {									       \
> > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> __ATOMIC_RELAXED);  \
> > +									       \
> > +	while ((__atomic_load_n(addr, memorder) & mask) cond expected)
> 	       \
> > +		rte_pause();						       \
> > +} while (0)
> > +
> > +/*
> > + * Wait until a 64-bit *addr breaks the condition, with a relaxed
> > +memory
> > + * ordering model meaning the loads around this API can be reordered.
> > + *
> > + * @param addr
> > + *  A pointer to the memory location.
> > + * @param mask
> > + *  A mask of value bits in interest
> > + * @param expected
> > + *  A 64-bit expected value to be in the memory location.
> > + * @param cond
> > + *  A symbol representing the condition (==, !=).
> > + * @param memorder
> > + *  Two different memory orders that can be specified:
> > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > + *  C++11 memory orders with the same names, see the C++11 standard
> > +or
> > + *  the GCC wiki on atomic synchronization for detailed definition.
> > + */
> > +#define rte_wait_event_64(addr, mask, expected, cond, memorder)
> 		       \
> > +do {									       \
> > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> __ATOMIC_RELAXED);  \
> > +									       \
> > +	while ((__atomic_load_n(addr, memorder) & mask) cond expected)
> 	       \
> > +		rte_pause();						       \
> > +} while (0)
> >  #endif
> >
> >  #endif /* _RTE_PAUSE_H_ */
> > --
> > 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait scheme
  2021-10-12  8:09       ` [dpdk-dev] 回复: " Feifei Wang
@ 2021-10-13 15:03         ` Ananyev, Konstantin
  2021-10-13 17:00           ` Stephen Hemminger
  2021-10-14  3:08           ` Feifei Wang
  0 siblings, 2 replies; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-10-13 15:03 UTC (permalink / raw)
  To: Feifei Wang, Ruifeng Wang; +Cc: dev, nd, nd

> 
> [snip]
> 
> > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > b/lib/eal/include/generic/rte_pause.h
> > > index 668ee4a184..4e32107eca 100644
> > > --- a/lib/eal/include/generic/rte_pause.h
> > > +++ b/lib/eal/include/generic/rte_pause.h
> > > @@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> > >  	while (__atomic_load_n(addr, memorder) != expected)
> > >  		rte_pause();
> > >  }
> > > +
> > > +/*
> > > + * Wait until a 16-bit *addr breaks the condition, with a relaxed
> > > +memory
> > > + * ordering model meaning the loads around this API can be reordered.
> > > + *
> > > + * @param addr
> > > + *  A pointer to the memory location.
> > > + * @param mask
> > > + *  A mask of value bits in interest
> > > + * @param expected
> > > + *  A 16-bit expected value to be in the memory location.
> > > + * @param cond
> > > + *  A symbol representing the condition (==, !=).
> > > + * @param memorder
> > > + *  Two different memory orders that can be specified:
> > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > + *  C++11 memory orders with the same names, see the C++11 standard
> > > +or
> > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > + */
> >
> > Hmm, so now we have 2 APIs doing similar thing:
> > rte_wait_until_equal_n() and rte_wait_event_n().
> > Can we probably unite them somehow?
> > At least make rte_wait_until_equal_n() to use rte_wait_event_n() underneath.
> >
> You are right. We plan to change rte_wait_until_equal API after this new scheme
> is achieved.  And then, we will merge wait_unil into wait_event definition in the next new
> patch series.
> 
> > > +#define rte_wait_event_16(addr, mask, expected, cond, memorder)
> > 		       \
> > > +do {									       \
> > > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > +__ATOMIC_RELAXED);  \
> >
> > And why user is not allowed to use __ATOMIC_SEQ_CST here?
> Actually this is just a load operation, and acquire here is enough to make sure 'load
> addr value' can be before other operations.
> 
> > BTW, if we expect memorder to always be a constant, might be better
> > BUILD_BUG_ON()?
> If I understand correctly, you means we can replace 'assert' by 'build_bug_on':
> RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder !=__ATOMIC_RELAXED);

Yes, that was my thought.
In that case I think we should be able to catch wrong memorder at compilation stage.

> 
> >
> > > +									       \
> > > +	while ((__atomic_load_n(addr, memorder) & mask) cond expected)
> > 	       \
> > > +		rte_pause();						       \
> > > +} while (0)
> >
> > Two thoughts with these macros:
> > 1. It is a goof practise to put () around macro parameters in the macro body.
> > Will save from a lot of unexpected troubles.
> > 2. I think these 3 macros can be united into one.
> > Something like:
> >
> > #define rte_wait_event(addr, mask, expected, cond, memorder) do {\
> >         typeof (*(addr)) val = __atomic_load_n((addr), (memorder)); \
> >         if ((val & (typeof(val))(mask)) cond (typeof(val))(expected)) \
> >                 break; \
> >         rte_pause(); \
> > } while (1);
> For this point, I think it is due to different size need to use different assembly instructions
> in arm architecture. For example,
> load 16 bits instruction is "ldxrh %w[tmp], [%x[addr]"
> load 32 bits instruction is " ldxr %w[tmp], [%x[addr]"
> load 64 bits instruction is " ldxr %x[tmp], [%x[addr] "

Ok, but it could be then something like that for arm specific code:
if (sizeof(val) == sizeof(uint16_t)) \
	__LOAD_EXC_16(...); \
else if (sizeof(val) == sizeof(uint32_t)) \	
	__LOAD_EXC_32(...); \
else if (sizeof(val) == sizeof(uint64_t)) \
	__LOAD_EXC_64(...); \
...

> And for consistency, we also use 3 APIs in generic path.
Honestly, even one multi-line macro doesn't look nice.
Having 3 identical ones looks even worse.


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait scheme
  2021-10-13 15:03         ` [dpdk-dev] " Ananyev, Konstantin
@ 2021-10-13 17:00           ` Stephen Hemminger
  2021-10-14  3:14             ` [dpdk-dev] 回复: " Feifei Wang
  2021-10-14  3:08           ` Feifei Wang
  1 sibling, 1 reply; 113+ messages in thread
From: Stephen Hemminger @ 2021-10-13 17:00 UTC (permalink / raw)
  To: Ananyev, Konstantin; +Cc: Feifei Wang, Ruifeng Wang, dev, nd

On Wed, 13 Oct 2021 15:03:56 +0000
"Ananyev, Konstantin" <konstantin.ananyev@intel.com> wrote:

> > addr value' can be before other operations.
> >   
> > > BTW, if we expect memorder to always be a constant, might be better
> > > BUILD_BUG_ON()?  
> > If I understand correctly, you means we can replace 'assert' by 'build_bug_on':
> > RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder !=__ATOMIC_RELAXED);  
> 
> Yes, that was my thought.
> In that case I think we should be able to catch wrong memorder at compilation stage.

Maybe:
   RTE_BUILD_BUG_ON(!_constant_p(memorder));
   RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder !=__ATOMIC_RELAXED);  
 

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复:  [RFC PATCH v3 1/5] eal: add new definitions for wait scheme
  2021-10-13 15:03         ` [dpdk-dev] " Ananyev, Konstantin
  2021-10-13 17:00           ` Stephen Hemminger
@ 2021-10-14  3:08           ` Feifei Wang
  1 sibling, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-14  3:08 UTC (permalink / raw)
  To: Ananyev, Konstantin, Ruifeng Wang; +Cc: dev, nd, nd, nd

> -----邮件原件-----
> 发件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 发送时间: Wednesday, October 13, 2021 11:04 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; nd <nd@arm.com>
> 主题: RE: [dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait
> scheme
> 
> >
> > [snip]
> >
> > > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > > b/lib/eal/include/generic/rte_pause.h
> > > > index 668ee4a184..4e32107eca 100644
> > > > --- a/lib/eal/include/generic/rte_pause.h
> > > > +++ b/lib/eal/include/generic/rte_pause.h
> > > > @@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr,
> > > uint64_t expected,
> > > >  	while (__atomic_load_n(addr, memorder) != expected)
> > > >  		rte_pause();
> > > >  }
> > > > +
> > > > +/*
> > > > + * Wait until a 16-bit *addr breaks the condition, with a relaxed
> > > > +memory
> > > > + * ordering model meaning the loads around this API can be reordered.
> > > > + *
> > > > + * @param addr
> > > > + *  A pointer to the memory location.
> > > > + * @param mask
> > > > + *  A mask of value bits in interest
> > > > + * @param expected
> > > > + *  A 16-bit expected value to be in the memory location.
> > > > + * @param cond
> > > > + *  A symbol representing the condition (==, !=).
> > > > + * @param memorder
> > > > + *  Two different memory orders that can be specified:
> > > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > > + *  C++11 memory orders with the same names, see the C++11
> > > > +standard or
> > > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > > + */
> > >
> > > Hmm, so now we have 2 APIs doing similar thing:
> > > rte_wait_until_equal_n() and rte_wait_event_n().
> > > Can we probably unite them somehow?
> > > At least make rte_wait_until_equal_n() to use rte_wait_event_n()
> underneath.
> > >
> > You are right. We plan to change rte_wait_until_equal API after this
> > new scheme is achieved.  And then, we will merge wait_unil into
> > wait_event definition in the next new patch series.
> >
> > > > +#define rte_wait_event_16(addr, mask, expected, cond, memorder)
> > > 		       \
> > > > +do {
> 	       \
> > > > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > +__ATOMIC_RELAXED);  \
> > >
> > > And why user is not allowed to use __ATOMIC_SEQ_CST here?
> > Actually this is just a load operation, and acquire here is enough to
> > make sure 'load addr value' can be before other operations.
> >
> > > BTW, if we expect memorder to always be a constant, might be better
> > > BUILD_BUG_ON()?
> > If I understand correctly, you means we can replace 'assert' by
> 'build_bug_on':
> > RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder
> > !=__ATOMIC_RELAXED);
> 
> Yes, that was my thought.
> In that case I think we should be able to catch wrong memorder at compilation
> stage.
> 
> >
> > >
> > > > +									       \
> > > > +	while ((__atomic_load_n(addr, memorder) & mask) cond expected)
> > > 	       \
> > > > +		rte_pause();						       \
> > > > +} while (0)
> > >
> > > Two thoughts with these macros:
> > > 1. It is a goof practise to put () around macro parameters in the macro
> body.
> > > Will save from a lot of unexpected troubles.
> > > 2. I think these 3 macros can be united into one.
> > > Something like:
> > >
> > > #define rte_wait_event(addr, mask, expected, cond, memorder) do {\
> > >         typeof (*(addr)) val = __atomic_load_n((addr), (memorder)); \
> > >         if ((val & (typeof(val))(mask)) cond (typeof(val))(expected)) \
> > >                 break; \
> > >         rte_pause(); \
> > > } while (1);
> > For this point, I think it is due to different size need to use
> > different assembly instructions in arm architecture. For example, load
> > 16 bits instruction is "ldxrh %w[tmp], [%x[addr]"
> > load 32 bits instruction is " ldxr %w[tmp], [%x[addr]"
> > load 64 bits instruction is " ldxr %x[tmp], [%x[addr] "
> 
> Ok, but it could be then something like that for arm specific code:
> if (sizeof(val) == sizeof(uint16_t)) \
> 	__LOAD_EXC_16(...); \
> else if (sizeof(val) == sizeof(uint32_t)) \
> 	__LOAD_EXC_32(...); \
> else if (sizeof(val) == sizeof(uint64_t)) \
> 	__LOAD_EXC_64(...); \
> ...
> 
I thinks we should use "addr" as judgement:

rte_wait_event(addr, mask, expected, cond, memorder)
if (sizeof(*addr)) == sizeof(uint16_t) 
	uint16_t value                                                         \
	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);  \
	__LOAD_EXC_16(addr, value, memorder)				       \
	if ((value & mask) cond expected) {				       \
		__SEVL()						       \
		do {							       \
			__WFE()						       \
			__LOAD_EXC_16(addr, value, memorder)		       \
		} while ((value & mask) cond expected);			       \
	}	
if (sizeof(*addr)) == sizeof(uint32_t) 
	..........
if (sizeof(*addr)) == sizeof(uint64_t) 
	...........

> > And for consistency, we also use 3 APIs in generic path.
> Honestly, even one multi-line macro doesn't look nice.
> Having 3 identical ones looks even worse.


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复:  [RFC PATCH v3 1/5] eal: add new definitions for wait scheme
  2021-10-13 17:00           ` Stephen Hemminger
@ 2021-10-14  3:14             ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-14  3:14 UTC (permalink / raw)
  To: Stephen Hemminger, Ananyev, Konstantin; +Cc: Ruifeng Wang, dev, nd, nd



> -----邮件原件-----
> 发件人: Stephen Hemminger <stephen@networkplumber.org>
> 发送时间: Thursday, October 14, 2021 1:00 AM
> 收件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 抄送: Feifei Wang <Feifei.Wang2@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; dev@dpdk.org; nd <nd@arm.com>
> 主题: Re: [dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait
> scheme
> 
> On Wed, 13 Oct 2021 15:03:56 +0000
> "Ananyev, Konstantin" <konstantin.ananyev@intel.com> wrote:
> 
> > > addr value' can be before other operations.
> > >
> > > > BTW, if we expect memorder to always be a constant, might be
> > > > better BUILD_BUG_ON()?
> > > If I understand correctly, you means we can replace 'assert' by
> 'build_bug_on':
> > > RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder
> > > !=__ATOMIC_RELAXED);
> >
> > Yes, that was my thought.
> > In that case I think we should be able to catch wrong memorder at
> compilation stage.
> 
> Maybe:
>    RTE_BUILD_BUG_ON(!_constant_p(memorder));
>    RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> memorder !=__ATOMIC_RELAXED);
> 
Thanks for your comments. One question for this, I do not know why we should check if memorder is a constant?
Is it to check whether memorder has been assigned or NULL?  

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [RFC PATCH v3 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-07 17:40       ` Ananyev, Konstantin
@ 2021-10-20  6:20         ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-20  6:20 UTC (permalink / raw)
  To: Ananyev, Konstantin; +Cc: dev, nd, Ruifeng Wang, nd

> -----邮件原件-----
> 发件人: dev <dev-bounces@dpdk.org> 代表 Ananyev, Konstantin
> 发送时间: Friday, October 8, 2021 1:40 AM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 主题: Re: [dpdk-dev] [RFC PATCH v3 4/5] lib/bpf: use wait event scheme for
> Rx/Tx iteration
> 
> 
> 
> > -----Original Message-----
> > From: Ananyev, Konstantin
> > Sent: Thursday, October 7, 2021 4:50 PM
> > To: Feifei Wang <feifei.wang2@arm.com>
> > Cc: dev@dpdk.org; nd@arm.com; Ruifeng Wang <ruifeng.wang@arm.com>
> > Subject: RE: [RFC PATCH v3 4/5] lib/bpf: use wait event scheme for
> > Rx/Tx iteration
> >
> >
> >
> > >
> > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > ---
> > >  lib/bpf/bpf_pkt.c | 9 +++------
> > >  1 file changed, 3 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c index
> > > 6e8248f0d6..3af15ae97b 100644
> > > --- a/lib/bpf/bpf_pkt.c
> > > +++ b/lib/bpf/bpf_pkt.c
> > > @@ -113,7 +113,7 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
> > > static void  bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)  {
> > > -	uint32_t nuse, puse;
> > > +	uint32_t puse;
> > >
> > >  	/* make sure all previous loads and stores are completed */
> > >  	rte_smp_mb();
> > > @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> > >
> > >  	/* in use, busy wait till current RX/TX iteration is finished */
> > >  	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
> > > -		do {
> > > -			rte_pause();
> > > -			rte_compiler_barrier();
> > > -			nuse = cbi->use;
> > > -		} while (nuse == puse);
> > > +		rte_compiler_barrier();
> > > +		rte_wait_event_32(&cbi->use, UINT_MAX, puse, ==,
> > > +__ATOMIC_RELAXED);
> 
> Probably UINT32_MAX will be a bit better here.
That's right, UINT32_MAX is more suitable.
> 
> >
> > If we do use atomic load, while we still need a compiler_barrier() here?
Yes, compiler_barrier can be removed here since atomic_load can update the value in time.
> >
> > >  	}
> > >  }
> > >
> > > --
> > > 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v4 0/5] add new definitions for wait scheme
  2021-09-26  6:32 ` [dpdk-dev] [RFC PATCH v3 0/5] add new definitions for wait scheme Feifei Wang
                     ` (4 preceding siblings ...)
  2021-09-26  6:33   ` [dpdk-dev] [RFC PATCH v3 5/5] lib/distributor: use wait event scheme Feifei Wang
@ 2021-10-20  8:45   ` Feifei Wang
  2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 1/5] eal: " Feifei Wang
                       ` (4 more replies)
  5 siblings, 5 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-20  8:45 UTC (permalink / raw)
  Cc: konstantin.ananyev, dev, nd, Feifei Wang

Add new definitions for wait scheme, and apply this new definitions into
lib to replace rte_pause.

v2:
1. use macro to create new wait scheme (Stephen)

v3:
1. delete unnecessary bug fix in bpf (Konstantin)

v4:
1. put size into the macro body (Konstantin)
2. replace assert with BUILD_BUG_ON (Stephen)
3. delete unnecessary compiler barrier for bpf (Konstantin)

Feifei Wang (5):
  eal: add new definitions for wait scheme
  eal: use wait event for read pflock
  eal: use wait event scheme for mcslock
  lib/bpf: use wait event scheme for Rx/Tx iteration
  lib/distributor: use wait event scheme

 lib/bpf/bpf_pkt.c                        |   9 +-
 lib/distributor/rte_distributor_single.c |  10 +-
 lib/eal/arm/include/rte_pause_64.h       | 126 +++++++++++++----------
 lib/eal/include/generic/rte_mcslock.h    |   9 +-
 lib/eal/include/generic/rte_pause.h      |  32 ++++++
 lib/eal/include/generic/rte_pflock.h     |   4 +-
 6 files changed, 119 insertions(+), 71 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v4 1/5] eal: add new definitions for wait scheme
  2021-10-20  8:45   ` [dpdk-dev] [PATCH v4 0/5] add new definitions for wait scheme Feifei Wang
@ 2021-10-20  8:45     ` Feifei Wang
  2021-10-21 16:24       ` Ananyev, Konstantin
  2021-10-22  0:10       ` [dpdk-dev] " Jerin Jacob
  2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 2/5] eal: use wait event for read pflock Feifei Wang
                       ` (3 subsequent siblings)
  4 siblings, 2 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-20  8:45 UTC (permalink / raw)
  To: Ruifeng Wang; +Cc: konstantin.ananyev, dev, nd, Feifei Wang

Introduce macros as generic interface for address monitoring.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/arm/include/rte_pause_64.h  | 126 ++++++++++++++++------------
 lib/eal/include/generic/rte_pause.h |  32 +++++++
 2 files changed, 104 insertions(+), 54 deletions(-)

diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index e87d10b8cc..23954c2de2 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -31,20 +31,12 @@ static inline void rte_pause(void)
 /* Put processor into low power WFE(Wait For Event) state. */
 #define __WFE() { asm volatile("wfe" : : : "memory"); }
 
-static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
-{
-	uint16_t value;
-
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 16-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
+/*
+ * Atomic exclusive load from addr, it returns the 16-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
 #define __LOAD_EXC_16(src, dst, memorder) {               \
 	if (memorder == __ATOMIC_RELAXED) {               \
 		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
@@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			: "memory");                      \
 	} }
 
+/*
+ * Atomic exclusive load from addr, it returns the 32-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_32(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+/*
+ * Atomic exclusive load from addr, it returns the 64-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_64(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+		int memorder)
+{
+	uint16_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
 	__LOAD_EXC_16(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			__LOAD_EXC_16(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_16
 }
 
 static __rte_always_inline void
@@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 32-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_32(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_32(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 			__LOAD_EXC_32(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_32
 }
 
 static __rte_always_inline void
@@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 64-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_64(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_64(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 		} while (value != expected);
 	}
 }
+
+#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
+do {                                                               \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
+	memorder != __ATOMIC_RELAXED);                             \
+	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
+	uint##size_t value;                                        \
+	__LOAD_EXC_##size(addr, value, memorder)                   \
+	if ((value & mask) cond expected) {		           \
+		__SEVL()                                           \
+		do {                                               \
+			__WFE()                                    \
+			__LOAD_EXC_##size(addr, value, memorder)   \
+		} while ((value & mask) cond expected);            \
+	}                                                          \
+} while (0)
+
+#undef __LOAD_EXC_16
+#undef __LOAD_EXC_32
 #undef __LOAD_EXC_64
 
 #undef __SEVL
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index 668ee4a184..20a5d2a9fd 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 	while (__atomic_load_n(addr, memorder) != expected)
 		rte_pause();
 }
+
+/*
+ * Wait until *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest.
+ * @param expected
+ *  A 16-bit expected value to be in the memory location.
+ * @param cond
+ *  A symbol representing the condition (==, !=).
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ * @param size
+ * The bit size of *addr:
+ * It is used for arm architecture to choose load instructions,
+ * and the optional value is 16, 32 and 64.
+ */
+#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
+do {                                                                   \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
+				memorder != __ATOMIC_RELAXED);         \
+	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
+	while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
+		rte_pause();                                           \
+} while (0)
 #endif
 
 #endif /* _RTE_PAUSE_H_ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v4 2/5] eal: use wait event for read pflock
  2021-10-20  8:45   ` [dpdk-dev] [PATCH v4 0/5] add new definitions for wait scheme Feifei Wang
  2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 1/5] eal: " Feifei Wang
@ 2021-10-20  8:45     ` Feifei Wang
  2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 3/5] eal: use wait event scheme for mcslock Feifei Wang
                       ` (2 subsequent siblings)
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-20  8:45 UTC (permalink / raw)
  Cc: konstantin.ananyev, dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for read pflock update, use wait event scheme for
this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_pflock.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/eal/include/generic/rte_pflock.h b/lib/eal/include/generic/rte_pflock.h
index e57c179ef2..c1c230d131 100644
--- a/lib/eal/include/generic/rte_pflock.h
+++ b/lib/eal/include/generic/rte_pflock.h
@@ -121,9 +121,7 @@ rte_pflock_read_lock(rte_pflock_t *pf)
 		return;
 
 	/* Wait for current write phase to complete. */
-	while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE)
-		& RTE_PFLOCK_WBITS) == w)
-		rte_pause();
+	rte_wait_event(&pf->rd.in, RTE_PFLOCK_WBITS, w, ==, __ATOMIC_ACQUIRE, 16);
 }
 
 /**
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v4 3/5] eal: use wait event scheme for mcslock
  2021-10-20  8:45   ` [dpdk-dev] [PATCH v4 0/5] add new definitions for wait scheme Feifei Wang
  2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 1/5] eal: " Feifei Wang
  2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 2/5] eal: use wait event for read pflock Feifei Wang
@ 2021-10-20  8:45     ` Feifei Wang
  2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
  2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 5/5] lib/distributor: use wait event scheme Feifei Wang
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-20  8:45 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: konstantin.ananyev, dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for mcslock to be updated, use wait event scheme
for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_mcslock.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/lib/eal/include/generic/rte_mcslock.h b/lib/eal/include/generic/rte_mcslock.h
index 34f33c64a5..08137c361b 100644
--- a/lib/eal/include/generic/rte_mcslock.h
+++ b/lib/eal/include/generic/rte_mcslock.h
@@ -116,8 +116,13 @@ rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
 		/* More nodes added to the queue by other CPUs.
 		 * Wait until the next pointer is set.
 		 */
-		while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)
-			rte_pause();
+#ifdef RTE_ARCH_32
+		rte_wait_event((uint32_t *)&me->next, UINT32_MAX, 0, ==,
+				__ATOMIC_RELAXED, 32);
+#else
+		rte_wait_event((uint64_t *)&me->next, UINT64_MAX, 0, ==,
+				__ATOMIC_RELAXED, 64);
+#endif
 	}
 
 	/* Pass lock to next waiter. */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v4 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-20  8:45   ` [dpdk-dev] [PATCH v4 0/5] add new definitions for wait scheme Feifei Wang
                       ` (2 preceding siblings ...)
  2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 3/5] eal: use wait event scheme for mcslock Feifei Wang
@ 2021-10-20  8:45     ` Feifei Wang
  2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 5/5] lib/distributor: use wait event scheme Feifei Wang
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-20  8:45 UTC (permalink / raw)
  To: Konstantin Ananyev; +Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for cbi->use to be updated, use wait event scheme.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/bpf/bpf_pkt.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
index 6e8248f0d6..00a5748061 100644
--- a/lib/bpf/bpf_pkt.c
+++ b/lib/bpf/bpf_pkt.c
@@ -113,7 +113,7 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
 static void
 bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
 {
-	uint32_t nuse, puse;
+	uint32_t puse;
 
 	/* make sure all previous loads and stores are completed */
 	rte_smp_mb();
@@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
 
 	/* in use, busy wait till current RX/TX iteration is finished */
 	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
-		do {
-			rte_pause();
-			rte_compiler_barrier();
-			nuse = cbi->use;
-		} while (nuse == puse);
+		rte_wait_event(&cbi->use, UINT32_MAX, puse, ==,
+				__ATOMIC_RELAXED, 32);
 	}
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v4 5/5] lib/distributor: use wait event scheme
  2021-10-20  8:45   ` [dpdk-dev] [PATCH v4 0/5] add new definitions for wait scheme Feifei Wang
                       ` (3 preceding siblings ...)
  2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
@ 2021-10-20  8:45     ` Feifei Wang
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-20  8:45 UTC (permalink / raw)
  To: David Hunt; +Cc: konstantin.ananyev, dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for bufptr64 to be updated, use
wait event for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/distributor/rte_distributor_single.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index f4725b1d0b..c623bb135d 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -33,9 +33,8 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_GET_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+			0, !=, __ATOMIC_RELAXED, 64);
 
 	/* Sync with distributor on GET_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
@@ -74,9 +73,8 @@ rte_distributor_return_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_RETURN_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+			0, !=, __ATOMIC_RELAXED, 64);
 
 	/* Sync with distributor on RETURN_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] eal: add new definitions for wait scheme
  2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 1/5] eal: " Feifei Wang
@ 2021-10-21 16:24       ` Ananyev, Konstantin
  2021-10-25  9:20         ` [dpdk-dev] 回复: " Feifei Wang
  2021-10-22  0:10       ` [dpdk-dev] " Jerin Jacob
  1 sibling, 1 reply; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-10-21 16:24 UTC (permalink / raw)
  To: Feifei Wang, Ruifeng Wang; +Cc: dev, nd

> Introduce macros as generic interface for address monitoring.
> 
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  lib/eal/arm/include/rte_pause_64.h  | 126 ++++++++++++++++------------
>  lib/eal/include/generic/rte_pause.h |  32 +++++++
>  2 files changed, 104 insertions(+), 54 deletions(-)
> 
> diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
> index e87d10b8cc..23954c2de2 100644
> --- a/lib/eal/arm/include/rte_pause_64.h
> +++ b/lib/eal/arm/include/rte_pause_64.h
> @@ -31,20 +31,12 @@ static inline void rte_pause(void)
>  /* Put processor into low power WFE(Wait For Event) state. */
>  #define __WFE() { asm volatile("wfe" : : : "memory"); }
> 
> -static __rte_always_inline void
> -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> -		int memorder)
> -{
> -	uint16_t value;
> -
> -	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> -
> -	/*
> -	 * Atomic exclusive load from addr, it returns the 16-bit content of
> -	 * *addr while making it 'monitored',when it is written by someone
> -	 * else, the 'monitored' state is cleared and a event is generated
> -	 * implicitly to exit WFE.
> -	 */
> +/*
> + * Atomic exclusive load from addr, it returns the 16-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
>  #define __LOAD_EXC_16(src, dst, memorder) {               \
>  	if (memorder == __ATOMIC_RELAXED) {               \
>  		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
> @@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
>  			: "memory");                      \
>  	} }
> 
> +/*
> + * Atomic exclusive load from addr, it returns the 32-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_32(src, dst, memorder) {              \
> +	if (memorder == __ATOMIC_RELAXED) {              \
> +		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} else {                                         \
> +		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} }
> +
> +/*
> + * Atomic exclusive load from addr, it returns the 64-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_64(src, dst, memorder) {              \
> +	if (memorder == __ATOMIC_RELAXED) {              \
> +		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} else {                                         \
> +		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} }
> +
> +static __rte_always_inline void
> +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> +		int memorder)
> +{
> +	uint16_t value;
> +
> +	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> +
>  	__LOAD_EXC_16(addr, value, memorder)
>  	if (value != expected) {
>  		__SEVL()
> @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
>  			__LOAD_EXC_16(addr, value, memorder)
>  		} while (value != expected);
>  	}
> -#undef __LOAD_EXC_16
>  }
> 
>  static __rte_always_inline void
> @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
> 
>  	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> 
> -	/*
> -	 * Atomic exclusive load from addr, it returns the 32-bit content of
> -	 * *addr while making it 'monitored',when it is written by someone
> -	 * else, the 'monitored' state is cleared and a event is generated
> -	 * implicitly to exit WFE.
> -	 */
> -#define __LOAD_EXC_32(src, dst, memorder) {              \
> -	if (memorder == __ATOMIC_RELAXED) {              \
> -		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} else {                                         \
> -		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} }
> -
>  	__LOAD_EXC_32(addr, value, memorder)
>  	if (value != expected) {
>  		__SEVL()
> @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
>  			__LOAD_EXC_32(addr, value, memorder)
>  		} while (value != expected);
>  	}
> -#undef __LOAD_EXC_32
>  }
> 
>  static __rte_always_inline void
> @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
> 
>  	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> 
> -	/*
> -	 * Atomic exclusive load from addr, it returns the 64-bit content of
> -	 * *addr while making it 'monitored',when it is written by someone
> -	 * else, the 'monitored' state is cleared and a event is generated
> -	 * implicitly to exit WFE.
> -	 */
> -#define __LOAD_EXC_64(src, dst, memorder) {              \
> -	if (memorder == __ATOMIC_RELAXED) {              \
> -		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} else {                                         \
> -		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} }
> -
>  	__LOAD_EXC_64(addr, value, memorder)
>  	if (value != expected) {
>  		__SEVL()
> @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>  		} while (value != expected);
>  	}
>  }
> +
> +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
> +do {                                                               \
> +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> +	memorder != __ATOMIC_RELAXED);                             \
> +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> +	uint##size_t value;                                        \
> +	__LOAD_EXC_##size(addr, value, memorder)                   \
> +	if ((value & mask) cond expected) {		           \
> +		__SEVL()                                           \
> +		do {                                               \
> +			__WFE()                                    \
> +			__LOAD_EXC_##size(addr, value, memorder)   \
> +		} while ((value & mask) cond expected);            \
> +	}                                                          \
> +} while (0)
> +
> +#undef __LOAD_EXC_16
> +#undef __LOAD_EXC_32
>  #undef __LOAD_EXC_64
> 
>  #undef __SEVL
> diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
> index 668ee4a184..20a5d2a9fd 100644
> --- a/lib/eal/include/generic/rte_pause.h
> +++ b/lib/eal/include/generic/rte_pause.h
> @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>  	while (__atomic_load_n(addr, memorder) != expected)
>  		rte_pause();
>  }
> +
> +/*
> + * Wait until *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + *  A pointer to the memory location.
> + * @param mask
> + *  A mask of value bits in interest.
> + * @param expected
> + *  A 16-bit expected value to be in the memory location.
> + * @param cond
> + *  A symbol representing the condition (==, !=).
> + * @param memorder
> + *  Two different memory orders that can be specified:
> + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  C++11 memory orders with the same names, see the C++11 standard or
> + *  the GCC wiki on atomic synchronization for detailed definition.
> + * @param size
> + * The bit size of *addr:
> + * It is used for arm architecture to choose load instructions,
> + * and the optional value is 16, 32 and 64.
> + */
> +#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
> +do {                                                                   \
> +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> +				memorder != __ATOMIC_RELAXED);         \
> +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \

I don't' really understand why you do need 'size' passed as parameter.
Can't it be:
size_t size = sizeof(*(addr));
And then:
RTE_BUILD_BUG_ON(size != sizeof(uint16_t) && size != sizeof(uint32_t) && size != sizeof(uint64_t));  
?

> +	while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> +		rte_pause();                                           \

Just to repeat my own comment from previous version review: 
put () around macro parameters in the macro body.
Will save from a lot of unexpected troubles.

> +} while (0)
>  #endif
> 
>  #endif /* _RTE_PAUSE_H_ */
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] eal: add new definitions for wait scheme
  2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 1/5] eal: " Feifei Wang
  2021-10-21 16:24       ` Ananyev, Konstantin
@ 2021-10-22  0:10       ` Jerin Jacob
  2021-10-25  9:30         ` [dpdk-dev] 回复: " Feifei Wang
  1 sibling, 1 reply; 113+ messages in thread
From: Jerin Jacob @ 2021-10-22  0:10 UTC (permalink / raw)
  To: Feifei Wang; +Cc: Ruifeng Wang, Ananyev, Konstantin, dpdk-dev, nd

On Wed, Oct 20, 2021 at 2:16 PM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Introduce macros as generic interface for address monitoring.
>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  lib/eal/arm/include/rte_pause_64.h  | 126 ++++++++++++++++------------
>  lib/eal/include/generic/rte_pause.h |  32 +++++++
>  2 files changed, 104 insertions(+), 54 deletions(-)
>
> diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
> index e87d10b8cc..23954c2de2 100644
> --- a/lib/eal/arm/include/rte_pause_64.h
> +++ b/lib/eal/arm/include/rte_pause_64.h
> @@ -31,20 +31,12 @@ static inline void rte_pause(void)
>  /* Put processor into low power WFE(Wait For Event) state. */
>  #define __WFE() { asm volatile("wfe" : : : "memory"); }
>
> -static __rte_always_inline void
> -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> -               int memorder)
> -{
> -       uint16_t value;
> -
> -       assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> -
> -       /*
> -        * Atomic exclusive load from addr, it returns the 16-bit content of
> -        * *addr while making it 'monitored',when it is written by someone
> -        * else, the 'monitored' state is cleared and a event is generated

a event -> an event in all the occurrence.

> -        * implicitly to exit WFE.
> -        */
> +/*
> + * Atomic exclusive load from addr, it returns the 16-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
>  #define __LOAD_EXC_16(src, dst, memorder) {               \
>         if (memorder == __ATOMIC_RELAXED) {               \
>                 asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
> @@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
>                         : "memory");                      \
>         } }
>
> +/*
> + * Atomic exclusive load from addr, it returns the 32-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_32(src, dst, memorder) {              \
> +       if (memorder == __ATOMIC_RELAXED) {              \
> +               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> +                       : [tmp] "=&r" (dst)              \
> +                       : [addr] "r"(src)                \
> +                       : "memory");                     \
> +       } else {                                         \
> +               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> +                       : [tmp] "=&r" (dst)              \
> +                       : [addr] "r"(src)                \
> +                       : "memory");                     \
> +       } }
> +
> +/*
> + * Atomic exclusive load from addr, it returns the 64-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_64(src, dst, memorder) {              \
> +       if (memorder == __ATOMIC_RELAXED) {              \
> +               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> +                       : [tmp] "=&r" (dst)              \
> +                       : [addr] "r"(src)                \
> +                       : "memory");                     \
> +       } else {                                         \
> +               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> +                       : [tmp] "=&r" (dst)              \
> +                       : [addr] "r"(src)                \
> +                       : "memory");                     \
> +       } }
> +
> +static __rte_always_inline void
> +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> +               int memorder)
> +{
> +       uint16_t value;
> +
> +       assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> +
>         __LOAD_EXC_16(addr, value, memorder)
>         if (value != expected) {
>                 __SEVL()
> @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
>                         __LOAD_EXC_16(addr, value, memorder)
>                 } while (value != expected);
>         }
> -#undef __LOAD_EXC_16
>  }
>
>  static __rte_always_inline void
> @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
>
>         assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
>
> -       /*
> -        * Atomic exclusive load from addr, it returns the 32-bit content of
> -        * *addr while making it 'monitored',when it is written by someone
> -        * else, the 'monitored' state is cleared and a event is generated
> -        * implicitly to exit WFE.
> -        */
> -#define __LOAD_EXC_32(src, dst, memorder) {              \
> -       if (memorder == __ATOMIC_RELAXED) {              \
> -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } else {                                         \
> -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } }
> -
>         __LOAD_EXC_32(addr, value, memorder)
>         if (value != expected) {
>                 __SEVL()
> @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
>                         __LOAD_EXC_32(addr, value, memorder)
>                 } while (value != expected);
>         }
> -#undef __LOAD_EXC_32
>  }
>
>  static __rte_always_inline void
> @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>
>         assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
>
> -       /*
> -        * Atomic exclusive load from addr, it returns the 64-bit content of
> -        * *addr while making it 'monitored',when it is written by someone
> -        * else, the 'monitored' state is cleared and a event is generated
> -        * implicitly to exit WFE.
> -        */
> -#define __LOAD_EXC_64(src, dst, memorder) {              \
> -       if (memorder == __ATOMIC_RELAXED) {              \
> -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } else {                                         \
> -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } }
> -
>         __LOAD_EXC_64(addr, value, memorder)
>         if (value != expected) {
>                 __SEVL()
> @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>                 } while (value != expected);
>         }
>  }
> +
> +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \

I think it is better to swap "cond" and "expected" positions to get
better readability.

 rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, 0, !=,
__ATOMIC_RELAXED, 64);

Vs

 rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, !=, 0,
__ATOMIC_RELAXED, 64);

> +do {                                                               \

Any reason to not make an inline function instead of macro?

> +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \

Should n't we add __builtin_constant_p(size) of check?

> +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> +       memorder != __ATOMIC_RELAXED);                             \
> +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> +       uint##size_t value;


                               \
> +       __LOAD_EXC_##size(addr, value, memorder)                   \
> +       if ((value & mask) cond expected) {                        \
> +               __SEVL()                                           \
> +               do {                                               \
> +                       __WFE()                                    \
> +                       __LOAD_EXC_##size(addr, value, memorder)   \
> +               } while ((value & mask) cond expected);            \
> +       }                                                          \
> +} while (0)
> +
> +#undef __LOAD_EXC_16
> +#undef __LOAD_EXC_32
>  #undef __LOAD_EXC_64
>
>  #undef __SEVL
> diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
> index 668ee4a184..20a5d2a9fd 100644
> --- a/lib/eal/include/generic/rte_pause.h
> +++ b/lib/eal/include/generic/rte_pause.h
> @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>         while (__atomic_load_n(addr, memorder) != expected)
>                 rte_pause();
>  }
> +
> +/*
> + * Wait until *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + *  A pointer to the memory location.
> + * @param mask
> + *  A mask of value bits in interest.
> + * @param expected
> + *  A 16-bit expected value to be in the memory location.
> + * @param cond
> + *  A symbol representing the condition (==, !=).
> + * @param memorder
> + *  Two different memory orders that can be specified:
> + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  C++11 memory orders with the same names, see the C++11 standard or
> + *  the GCC wiki on atomic synchronization for detailed definition.
> + * @param size
> + * The bit size of *addr:
> + * It is used for arm architecture to choose load instructions,
> + * and the optional value is 16, 32 and 64.
> + */
> +#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
> +do {                                                                   \
> +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> +                               memorder != __ATOMIC_RELAXED);         \
> +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> +       while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> +               rte_pause();                                           \
> +} while (0)
>  #endif
>
>  #endif /* _RTE_PAUSE_H_ */
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [PATCH v4 1/5] eal: add new definitions for wait scheme
  2021-10-21 16:24       ` Ananyev, Konstantin
@ 2021-10-25  9:20         ` Feifei Wang
  2021-10-25 14:28           ` [dpdk-dev] " Ananyev, Konstantin
  0 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-10-25  9:20 UTC (permalink / raw)
  To: Ananyev, Konstantin, Ruifeng Wang; +Cc: dev, nd, nd



> -----邮件原件-----
> 发件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 发送时间: Friday, October 22, 2021 12:25 AM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>
> 主题: RE: [PATCH v4 1/5] eal: add new definitions for wait scheme
> 
> > Introduce macros as generic interface for address monitoring.
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >  lib/eal/arm/include/rte_pause_64.h  | 126
> > ++++++++++++++++------------  lib/eal/include/generic/rte_pause.h |
> > 32 +++++++
> >  2 files changed, 104 insertions(+), 54 deletions(-)
> >
> > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > b/lib/eal/arm/include/rte_pause_64.h
> > index e87d10b8cc..23954c2de2 100644
> > --- a/lib/eal/arm/include/rte_pause_64.h
> > +++ b/lib/eal/arm/include/rte_pause_64.h
> > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> >  /* Put processor into low power WFE(Wait For Event) state. */
> > #define __WFE() { asm volatile("wfe" : : : "memory"); }
> >
> > -static __rte_always_inline void
> > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > -		int memorder)
> > -{
> > -	uint16_t value;
> > -
> > -	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> __ATOMIC_RELAXED);
> > -
> > -	/*
> > -	 * Atomic exclusive load from addr, it returns the 16-bit content of
> > -	 * *addr while making it 'monitored',when it is written by someone
> > -	 * else, the 'monitored' state is cleared and a event is generated
> > -	 * implicitly to exit WFE.
> > -	 */
> > +/*
> > + * Atomic exclusive load from addr, it returns the 16-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and a event is generated
> > + * implicitly to exit WFE.
> > + */
> >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> >  	if (memorder == __ATOMIC_RELAXED) {               \
> >  		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6 +50,52
> @@
> > rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> >  			: "memory");                      \
> >  	} }
> >
> > +/*
> > + * Atomic exclusive load from addr, it returns the 32-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and a event is generated
> > + * implicitly to exit WFE.
> > + */
> > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > +	if (memorder == __ATOMIC_RELAXED) {              \
> > +		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > +			: [tmp] "=&r" (dst)              \
> > +			: [addr] "r"(src)                \
> > +			: "memory");                     \
> > +	} else {                                         \
> > +		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > +			: [tmp] "=&r" (dst)              \
> > +			: [addr] "r"(src)                \
> > +			: "memory");                     \
> > +	} }
> > +
> > +/*
> > + * Atomic exclusive load from addr, it returns the 64-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and a event is generated
> > + * implicitly to exit WFE.
> > + */
> > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > +	if (memorder == __ATOMIC_RELAXED) {              \
> > +		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > +			: [tmp] "=&r" (dst)              \
> > +			: [addr] "r"(src)                \
> > +			: "memory");                     \
> > +	} else {                                         \
> > +		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > +			: [tmp] "=&r" (dst)              \
> > +			: [addr] "r"(src)                \
> > +			: "memory");                     \
> > +	} }
> > +
> > +static __rte_always_inline void
> > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > +		int memorder)
> > +{
> > +	uint16_t value;
> > +
> > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > +__ATOMIC_RELAXED);
> > +
> >  	__LOAD_EXC_16(addr, value, memorder)
> >  	if (value != expected) {
> >  		__SEVL()
> > @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr,
> uint16_t expected,
> >  			__LOAD_EXC_16(addr, value, memorder)
> >  		} while (value != expected);
> >  	}
> > -#undef __LOAD_EXC_16
> >  }
> >
> >  static __rte_always_inline void
> > @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > uint32_t expected,
> >
> >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> >
> > -	/*
> > -	 * Atomic exclusive load from addr, it returns the 32-bit content of
> > -	 * *addr while making it 'monitored',when it is written by someone
> > -	 * else, the 'monitored' state is cleared and a event is generated
> > -	 * implicitly to exit WFE.
> > -	 */
> > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > -	if (memorder == __ATOMIC_RELAXED) {              \
> > -		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > -			: [tmp] "=&r" (dst)              \
> > -			: [addr] "r"(src)                \
> > -			: "memory");                     \
> > -	} else {                                         \
> > -		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > -			: [tmp] "=&r" (dst)              \
> > -			: [addr] "r"(src)                \
> > -			: "memory");                     \
> > -	} }
> > -
> >  	__LOAD_EXC_32(addr, value, memorder)
> >  	if (value != expected) {
> >  		__SEVL()
> > @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> uint32_t expected,
> >  			__LOAD_EXC_32(addr, value, memorder)
> >  		} while (value != expected);
> >  	}
> > -#undef __LOAD_EXC_32
> >  }
> >
> >  static __rte_always_inline void
> > @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> >
> >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> >
> > -	/*
> > -	 * Atomic exclusive load from addr, it returns the 64-bit content of
> > -	 * *addr while making it 'monitored',when it is written by someone
> > -	 * else, the 'monitored' state is cleared and a event is generated
> > -	 * implicitly to exit WFE.
> > -	 */
> > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > -	if (memorder == __ATOMIC_RELAXED) {              \
> > -		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > -			: [tmp] "=&r" (dst)              \
> > -			: [addr] "r"(src)                \
> > -			: "memory");                     \
> > -	} else {                                         \
> > -		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > -			: [tmp] "=&r" (dst)              \
> > -			: [addr] "r"(src)                \
> > -			: "memory");                     \
> > -	} }
> > -
> >  	__LOAD_EXC_64(addr, value, memorder)
> >  	if (value != expected) {
> >  		__SEVL()
> > @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> >  		} while (value != expected);
> >  	}
> >  }
> > +
> > +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
> > +do {                                                               \
> > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> > +	memorder != __ATOMIC_RELAXED);                             \
> > +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> > +	uint##size_t value;                                        \
> > +	__LOAD_EXC_##size(addr, value, memorder)                   \
> > +	if ((value & mask) cond expected) {		           \
> > +		__SEVL()                                           \
> > +		do {                                               \
> > +			__WFE()                                    \
> > +			__LOAD_EXC_##size(addr, value, memorder)   \
> > +		} while ((value & mask) cond expected);            \
> > +	}                                                          \
> > +} while (0)
> > +
> > +#undef __LOAD_EXC_16
> > +#undef __LOAD_EXC_32
> >  #undef __LOAD_EXC_64
> >
> >  #undef __SEVL
> > diff --git a/lib/eal/include/generic/rte_pause.h
> > b/lib/eal/include/generic/rte_pause.h
> > index 668ee4a184..20a5d2a9fd 100644
> > --- a/lib/eal/include/generic/rte_pause.h
> > +++ b/lib/eal/include/generic/rte_pause.h
> > @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> >  	while (__atomic_load_n(addr, memorder) != expected)
> >  		rte_pause();
> >  }
> > +
> > +/*
> > + * Wait until *addr breaks the condition, with a relaxed memory
> > + * ordering model meaning the loads around this API can be reordered.
> > + *
> > + * @param addr
> > + *  A pointer to the memory location.
> > + * @param mask
> > + *  A mask of value bits in interest.
> > + * @param expected
> > + *  A 16-bit expected value to be in the memory location.
> > + * @param cond
> > + *  A symbol representing the condition (==, !=).
> > + * @param memorder
> > + *  Two different memory orders that can be specified:
> > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > + *  C++11 memory orders with the same names, see the C++11 standard
> > +or
> > + *  the GCC wiki on atomic synchronization for detailed definition.
> > + * @param size
> > + * The bit size of *addr:
> > + * It is used for arm architecture to choose load instructions,
> > + * and the optional value is 16, 32 and 64.
> > + */
> > +#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
> > +do {                                                                   \
> > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> > +				memorder != __ATOMIC_RELAXED);         \
> > +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> 
> I don't' really understand why you do need 'size' passed as parameter.
> Can't it be:
> size_t size = sizeof(*(addr));
> And then:
> RTE_BUILD_BUG_ON(size != sizeof(uint16_t) && size != sizeof(uint32_t) &&
> size != sizeof(uint64_t)); ?
> 
> > +	while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> > +		rte_pause();                                           \
> 
> Just to repeat my own comment from previous version review:
> put () around macro parameters in the macro body.
> Will save from a lot of unexpected troubles.

Sorry I didn't catch the point.
In this version, I firstly want to use '__LOAD_EXC_##size' to choose , so I use size
as a parameter.  And in the next version, I will update this as:

#define __LOAD_EXC(src, dst, memorder, size) {    \
	if (size == 16)                               \
		__LOAD_EXC_16(src, dst, memorder)     \
	else if (size == 32)                          \
		__LOAD_EXC_32(src, dst, memorder)     \
	else if (size == 64)                          \
		__LOAD_EXC_64(src, dst, memorder)     \
}

#define rte_wait_event(addr, mask, cond, expected, memorder)    \
do {                                                            \
	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));      \
	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&        \
				memorder != __ATOMIC_RELAXED);  \
	uint32_t size = sizeof(*addr) << 3;                     \
	typeof(*addr) value = 0;                                \
	__LOAD_EXC(addr, value, memorder, size)                 \
	if ((value & mask) cond expected) {                     \
		__SEVL()                                        \
		do {                                            \
			__WFE()                                 \
			__LOAD_EXC(addr, value, memorder, size) \
		} while ((value & mask) cond expected);         \
	}                                                       \
} while (0)

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复:  [PATCH v4 1/5] eal: add new definitions for wait scheme
  2021-10-22  0:10       ` [dpdk-dev] " Jerin Jacob
@ 2021-10-25  9:30         ` Feifei Wang
  2021-10-25  9:43           ` [dpdk-dev] " Jerin Jacob
  0 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-10-25  9:30 UTC (permalink / raw)
  To: Jerin Jacob; +Cc: Ruifeng Wang, Ananyev, Konstantin, dpdk-dev, nd, nd

> -----邮件原件-----
> 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> 发送时间: Friday, October 22, 2021 8:10 AM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; dpdk-dev <dev@dpdk.org>; nd
> <nd@arm.com>
> 主题: Re: [dpdk-dev] [PATCH v4 1/5] eal: add new definitions for wait scheme
> 
> On Wed, Oct 20, 2021 at 2:16 PM Feifei Wang <feifei.wang2@arm.com>
> wrote:
> >
> > Introduce macros as generic interface for address monitoring.
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >  lib/eal/arm/include/rte_pause_64.h  | 126
> > ++++++++++++++++------------  lib/eal/include/generic/rte_pause.h |
> > 32 +++++++
> >  2 files changed, 104 insertions(+), 54 deletions(-)
> >
> > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > b/lib/eal/arm/include/rte_pause_64.h
> > index e87d10b8cc..23954c2de2 100644
> > --- a/lib/eal/arm/include/rte_pause_64.h
> > +++ b/lib/eal/arm/include/rte_pause_64.h
> > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> >  /* Put processor into low power WFE(Wait For Event) state. */
> > #define __WFE() { asm volatile("wfe" : : : "memory"); }
> >
> > -static __rte_always_inline void
> > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > -               int memorder)
> > -{
> > -       uint16_t value;
> > -
> > -       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> __ATOMIC_RELAXED);
> > -
> > -       /*
> > -        * Atomic exclusive load from addr, it returns the 16-bit content of
> > -        * *addr while making it 'monitored',when it is written by someone
> > -        * else, the 'monitored' state is cleared and a event is generated
> 
> a event -> an event in all the occurrence.
> 
> > -        * implicitly to exit WFE.
> > -        */
> > +/*
> > + * Atomic exclusive load from addr, it returns the 16-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and a event is generated
> > + * implicitly to exit WFE.
> > + */
> >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> >         if (memorder == __ATOMIC_RELAXED) {               \
> >                 asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6
> > +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t
> expected,
> >                         : "memory");                      \
> >         } }
> >
> > +/*
> > + * Atomic exclusive load from addr, it returns the 32-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and a event is generated
> > + * implicitly to exit WFE.
> > + */
> > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > +       if (memorder == __ATOMIC_RELAXED) {              \
> > +               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > +                       : [tmp] "=&r" (dst)              \
> > +                       : [addr] "r"(src)                \
> > +                       : "memory");                     \
> > +       } else {                                         \
> > +               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > +                       : [tmp] "=&r" (dst)              \
> > +                       : [addr] "r"(src)                \
> > +                       : "memory");                     \
> > +       } }
> > +
> > +/*
> > + * Atomic exclusive load from addr, it returns the 64-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and a event is generated
> > + * implicitly to exit WFE.
> > + */
> > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > +       if (memorder == __ATOMIC_RELAXED) {              \
> > +               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > +                       : [tmp] "=&r" (dst)              \
> > +                       : [addr] "r"(src)                \
> > +                       : "memory");                     \
> > +       } else {                                         \
> > +               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > +                       : [tmp] "=&r" (dst)              \
> > +                       : [addr] "r"(src)                \
> > +                       : "memory");                     \
> > +       } }
> > +
> > +static __rte_always_inline void
> > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > +               int memorder)
> > +{
> > +       uint16_t value;
> > +
> > +       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > + __ATOMIC_RELAXED);
> > +
> >         __LOAD_EXC_16(addr, value, memorder)
> >         if (value != expected) {
> >                 __SEVL()
> > @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr,
> uint16_t expected,
> >                         __LOAD_EXC_16(addr, value, memorder)
> >                 } while (value != expected);
> >         }
> > -#undef __LOAD_EXC_16
> >  }
> >
> >  static __rte_always_inline void
> > @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > uint32_t expected,
> >
> >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> >
> > -       /*
> > -        * Atomic exclusive load from addr, it returns the 32-bit content of
> > -        * *addr while making it 'monitored',when it is written by someone
> > -        * else, the 'monitored' state is cleared and a event is generated
> > -        * implicitly to exit WFE.
> > -        */
> > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > -       if (memorder == __ATOMIC_RELAXED) {              \
> > -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } else {                                         \
> > -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } }
> > -
> >         __LOAD_EXC_32(addr, value, memorder)
> >         if (value != expected) {
> >                 __SEVL()
> > @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> uint32_t expected,
> >                         __LOAD_EXC_32(addr, value, memorder)
> >                 } while (value != expected);
> >         }
> > -#undef __LOAD_EXC_32
> >  }
> >
> >  static __rte_always_inline void
> > @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> >
> >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> >
> > -       /*
> > -        * Atomic exclusive load from addr, it returns the 64-bit content of
> > -        * *addr while making it 'monitored',when it is written by someone
> > -        * else, the 'monitored' state is cleared and a event is generated
> > -        * implicitly to exit WFE.
> > -        */
> > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > -       if (memorder == __ATOMIC_RELAXED) {              \
> > -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } else {                                         \
> > -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } }
> > -
> >         __LOAD_EXC_64(addr, value, memorder)
> >         if (value != expected) {
> >                 __SEVL()
> > @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> >                 } while (value != expected);
> >         }
> >  }
> > +
> > +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
> 
> I think it is better to swap "cond" and "expected" positions to get better
> readability.
Thanks for the comments, it is better than before and I will update in the next version.
> 
>  rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, 0, !=,
> __ATOMIC_RELAXED, 64);
> 
> Vs
> 
>  rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, !=, 0,
> __ATOMIC_RELAXED, 64);
> 
> > +do {                                                               \
> 
> Any reason to not make an inline function instead of macro?
Because there were many new APIs for different cases. And then we refer to
Linux 'wait_event' code for an example. Please see the first version and its discussion:
http://patches.dpdk.org/project/dpdk/cover/20210902053253.3017858-1-feifei.wang2@arm.com/
> 
> > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> 
> Should n't we add __builtin_constant_p(size) of check?

Please see the discussion with Konstantin.
'size' will not be as a parameter and then it is unnecessary to check it with build_bug.
> 
> > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> > +       memorder != __ATOMIC_RELAXED);                             \
> > +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> > +       uint##size_t value;
> 
> 
>                                \
> > +       __LOAD_EXC_##size(addr, value, memorder)                   \
> > +       if ((value & mask) cond expected) {                        \
> > +               __SEVL()                                           \
> > +               do {                                               \
> > +                       __WFE()                                    \
> > +                       __LOAD_EXC_##size(addr, value, memorder)   \
> > +               } while ((value & mask) cond expected);            \
> > +       }                                                          \
> > +} while (0)
> > +
> > +#undef __LOAD_EXC_16
> > +#undef __LOAD_EXC_32
> >  #undef __LOAD_EXC_64
> >
> >  #undef __SEVL
> > diff --git a/lib/eal/include/generic/rte_pause.h
> > b/lib/eal/include/generic/rte_pause.h
> > index 668ee4a184..20a5d2a9fd 100644
> > --- a/lib/eal/include/generic/rte_pause.h
> > +++ b/lib/eal/include/generic/rte_pause.h
> > @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> >         while (__atomic_load_n(addr, memorder) != expected)
> >                 rte_pause();
> >  }
> > +
> > +/*
> > + * Wait until *addr breaks the condition, with a relaxed memory
> > + * ordering model meaning the loads around this API can be reordered.
> > + *
> > + * @param addr
> > + *  A pointer to the memory location.
> > + * @param mask
> > + *  A mask of value bits in interest.
> > + * @param expected
> > + *  A 16-bit expected value to be in the memory location.
> > + * @param cond
> > + *  A symbol representing the condition (==, !=).
> > + * @param memorder
> > + *  Two different memory orders that can be specified:
> > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > + *  C++11 memory orders with the same names, see the C++11 standard
> > +or
> > + *  the GCC wiki on atomic synchronization for detailed definition.
> > + * @param size
> > + * The bit size of *addr:
> > + * It is used for arm architecture to choose load instructions,
> > + * and the optional value is 16, 32 and 64.
> > + */
> > +#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
> > +do {                                                                   \
> > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> > +                               memorder != __ATOMIC_RELAXED);         \
> > +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> > +       while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> > +               rte_pause();                                           \
> > +} while (0)
> >  #endif
> >
> >  #endif /* _RTE_PAUSE_H_ */
> > --
> > 2.25.1
> >

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] eal: add new definitions for wait scheme
  2021-10-25  9:30         ` [dpdk-dev] 回复: " Feifei Wang
@ 2021-10-25  9:43           ` Jerin Jacob
  2021-10-26  1:11             ` [dpdk-dev] 回复: " Feifei Wang
  0 siblings, 1 reply; 113+ messages in thread
From: Jerin Jacob @ 2021-10-25  9:43 UTC (permalink / raw)
  To: Feifei Wang; +Cc: Ruifeng Wang, Ananyev, Konstantin, dpdk-dev, nd

On Mon, Oct 25, 2021 at 3:01 PM Feifei Wang <Feifei.Wang2@arm.com> wrote:
>
> > -----邮件原件-----
> > 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> > 发送时间: Friday, October 22, 2021 8:10 AM
> > 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> > 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; Ananyev, Konstantin
> > <konstantin.ananyev@intel.com>; dpdk-dev <dev@dpdk.org>; nd
> > <nd@arm.com>
> > 主题: Re: [dpdk-dev] [PATCH v4 1/5] eal: add new definitions for wait scheme
> >
> > On Wed, Oct 20, 2021 at 2:16 PM Feifei Wang <feifei.wang2@arm.com>
> > wrote:
> > >
> > > Introduce macros as generic interface for address monitoring.
> > >
> > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > ---
> > >  lib/eal/arm/include/rte_pause_64.h  | 126
> > > ++++++++++++++++------------  lib/eal/include/generic/rte_pause.h |
> > > 32 +++++++
> > >  2 files changed, 104 insertions(+), 54 deletions(-)
> > >
> > > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > > b/lib/eal/arm/include/rte_pause_64.h
> > > index e87d10b8cc..23954c2de2 100644
> > > --- a/lib/eal/arm/include/rte_pause_64.h
> > > +++ b/lib/eal/arm/include/rte_pause_64.h
> > > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> > >  /* Put processor into low power WFE(Wait For Event) state. */
> > > #define __WFE() { asm volatile("wfe" : : : "memory"); }
> > >
> > > -static __rte_always_inline void
> > > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > -               int memorder)
> > > -{
> > > -       uint16_t value;
> > > -
> > > -       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> > > -
> > > -       /*
> > > -        * Atomic exclusive load from addr, it returns the 16-bit content of
> > > -        * *addr while making it 'monitored',when it is written by someone
> > > -        * else, the 'monitored' state is cleared and a event is generated
> >
> > a event -> an event in all the occurrence.
> >
> > > -        * implicitly to exit WFE.
> > > -        */
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 16-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and a event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> > >         if (memorder == __ATOMIC_RELAXED) {               \
> > >                 asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6
> > > +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t
> > expected,
> > >                         : "memory");                      \
> > >         } }
> > >
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 32-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and a event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > +       if (memorder == __ATOMIC_RELAXED) {              \
> > > +               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > +                       : [tmp] "=&r" (dst)              \
> > > +                       : [addr] "r"(src)                \
> > > +                       : "memory");                     \
> > > +       } else {                                         \
> > > +               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > +                       : [tmp] "=&r" (dst)              \
> > > +                       : [addr] "r"(src)                \
> > > +                       : "memory");                     \
> > > +       } }
> > > +
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 64-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and a event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > +       if (memorder == __ATOMIC_RELAXED) {              \
> > > +               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > +                       : [tmp] "=&r" (dst)              \
> > > +                       : [addr] "r"(src)                \
> > > +                       : "memory");                     \
> > > +       } else {                                         \
> > > +               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > +                       : [tmp] "=&r" (dst)              \
> > > +                       : [addr] "r"(src)                \
> > > +                       : "memory");                     \
> > > +       } }
> > > +
> > > +static __rte_always_inline void
> > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > +               int memorder)
> > > +{
> > > +       uint16_t value;
> > > +
> > > +       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > + __ATOMIC_RELAXED);
> > > +
> > >         __LOAD_EXC_16(addr, value, memorder)
> > >         if (value != expected) {
> > >                 __SEVL()
> > > @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr,
> > uint16_t expected,
> > >                         __LOAD_EXC_16(addr, value, memorder)
> > >                 } while (value != expected);
> > >         }
> > > -#undef __LOAD_EXC_16
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > > uint32_t expected,
> > >
> > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > >
> > > -       /*
> > > -        * Atomic exclusive load from addr, it returns the 32-bit content of
> > > -        * *addr while making it 'monitored',when it is written by someone
> > > -        * else, the 'monitored' state is cleared and a event is generated
> > > -        * implicitly to exit WFE.
> > > -        */
> > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } else {                                         \
> > > -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } }
> > > -
> > >         __LOAD_EXC_32(addr, value, memorder)
> > >         if (value != expected) {
> > >                 __SEVL()
> > > @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > uint32_t expected,
> > >                         __LOAD_EXC_32(addr, value, memorder)
> > >                 } while (value != expected);
> > >         }
> > > -#undef __LOAD_EXC_32
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > > uint64_t expected,
> > >
> > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > >
> > > -       /*
> > > -        * Atomic exclusive load from addr, it returns the 64-bit content of
> > > -        * *addr while making it 'monitored',when it is written by someone
> > > -        * else, the 'monitored' state is cleared and a event is generated
> > > -        * implicitly to exit WFE.
> > > -        */
> > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } else {                                         \
> > > -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } }
> > > -
> > >         __LOAD_EXC_64(addr, value, memorder)
> > >         if (value != expected) {
> > >                 __SEVL()
> > > @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> > >                 } while (value != expected);
> > >         }
> > >  }
> > > +
> > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
> >
> > I think it is better to swap "cond" and "expected" positions to get better
> > readability.
> Thanks for the comments, it is better than before and I will update in the next version.
> >
> >  rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, 0, !=,
> > __ATOMIC_RELAXED, 64);
> >
> > Vs
> >
> >  rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, !=, 0,
> > __ATOMIC_RELAXED, 64);
> >
> > > +do {                                                               \
> >
> > Any reason to not make an inline function instead of macro?
> Because there were many new APIs for different cases. And then we refer to
> Linux 'wait_event' code for an example. Please see the first version and its discussion:
> http://patches.dpdk.org/project/dpdk/cover/20210902053253.3017858-1-feifei.wang2@arm.com/


OK.


> >
> > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> >
> > Should n't we add __builtin_constant_p(size) of check?
>
> Please see the discussion with Konstantin.
> 'size' will not be as a parameter and then it is unnecessary to check it with build_bug.

Make sense to remove the 'size'. My comment was more in the direction
of, if the 'size' is required to pass.

> >
> > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> > > +       memorder != __ATOMIC_RELAXED);                             \
> > > +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> > > +       uint##size_t value;
> >
> >
> >                                \
> > > +       __LOAD_EXC_##size(addr, value, memorder)                   \
> > > +       if ((value & mask) cond expected) {                        \
> > > +               __SEVL()                                           \
> > > +               do {                                               \
> > > +                       __WFE()                                    \
> > > +                       __LOAD_EXC_##size(addr, value, memorder)   \
> > > +               } while ((value & mask) cond expected);            \
> > > +       }                                                          \
> > > +} while (0)
> > > +
> > > +#undef __LOAD_EXC_16
> > > +#undef __LOAD_EXC_32
> > >  #undef __LOAD_EXC_64
> > >
> > >  #undef __SEVL
> > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > b/lib/eal/include/generic/rte_pause.h
> > > index 668ee4a184..20a5d2a9fd 100644
> > > --- a/lib/eal/include/generic/rte_pause.h
> > > +++ b/lib/eal/include/generic/rte_pause.h
> > > @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> > >         while (__atomic_load_n(addr, memorder) != expected)
> > >                 rte_pause();
> > >  }
> > > +
> > > +/*
> > > + * Wait until *addr breaks the condition, with a relaxed memory
> > > + * ordering model meaning the loads around this API can be reordered.
> > > + *
> > > + * @param addr
> > > + *  A pointer to the memory location.
> > > + * @param mask
> > > + *  A mask of value bits in interest.
> > > + * @param expected
> > > + *  A 16-bit expected value to be in the memory location.
> > > + * @param cond
> > > + *  A symbol representing the condition (==, !=).
> > > + * @param memorder
> > > + *  Two different memory orders that can be specified:
> > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > + *  C++11 memory orders with the same names, see the C++11 standard
> > > +or
> > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > + * @param size
> > > + * The bit size of *addr:
> > > + * It is used for arm architecture to choose load instructions,
> > > + * and the optional value is 16, 32 and 64.
> > > + */
> > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
> > > +do {                                                                   \
> > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> > > +                               memorder != __ATOMIC_RELAXED);         \
> > > +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> > > +       while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> > > +               rte_pause();                                           \
> > > +} while (0)
> > >  #endif
> > >
> > >  #endif /* _RTE_PAUSE_H_ */
> > > --
> > > 2.25.1
> > >

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] eal: add new definitions for wait scheme
  2021-10-25  9:20         ` [dpdk-dev] 回复: " Feifei Wang
@ 2021-10-25 14:28           ` Ananyev, Konstantin
  2021-10-26  1:08             ` [dpdk-dev] 回复: " Feifei Wang
  0 siblings, 1 reply; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-10-25 14:28 UTC (permalink / raw)
  To: Feifei Wang, Ruifeng Wang; +Cc: dev, nd, nd


> > > Introduce macros as generic interface for address monitoring.
> > >
> > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > ---
> > >  lib/eal/arm/include/rte_pause_64.h  | 126
> > > ++++++++++++++++------------  lib/eal/include/generic/rte_pause.h |
> > > 32 +++++++
> > >  2 files changed, 104 insertions(+), 54 deletions(-)
> > >
> > > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > > b/lib/eal/arm/include/rte_pause_64.h
> > > index e87d10b8cc..23954c2de2 100644
> > > --- a/lib/eal/arm/include/rte_pause_64.h
> > > +++ b/lib/eal/arm/include/rte_pause_64.h
> > > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> > >  /* Put processor into low power WFE(Wait For Event) state. */
> > > #define __WFE() { asm volatile("wfe" : : : "memory"); }
> > >
> > > -static __rte_always_inline void
> > > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > -		int memorder)
> > > -{
> > > -	uint16_t value;
> > > -
> > > -	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> > > -
> > > -	/*
> > > -	 * Atomic exclusive load from addr, it returns the 16-bit content of
> > > -	 * *addr while making it 'monitored',when it is written by someone
> > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > -	 * implicitly to exit WFE.
> > > -	 */
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 16-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and a event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> > >  	if (memorder == __ATOMIC_RELAXED) {               \
> > >  		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6 +50,52
> > @@
> > > rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > >  			: "memory");                      \
> > >  	} }
> > >
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 32-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and a event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > +	if (memorder == __ATOMIC_RELAXED) {              \
> > > +		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > +			: [tmp] "=&r" (dst)              \
> > > +			: [addr] "r"(src)                \
> > > +			: "memory");                     \
> > > +	} else {                                         \
> > > +		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > +			: [tmp] "=&r" (dst)              \
> > > +			: [addr] "r"(src)                \
> > > +			: "memory");                     \
> > > +	} }
> > > +
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 64-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and a event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > +	if (memorder == __ATOMIC_RELAXED) {              \
> > > +		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > +			: [tmp] "=&r" (dst)              \
> > > +			: [addr] "r"(src)                \
> > > +			: "memory");                     \
> > > +	} else {                                         \
> > > +		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > +			: [tmp] "=&r" (dst)              \
> > > +			: [addr] "r"(src)                \
> > > +			: "memory");                     \
> > > +	} }
> > > +
> > > +static __rte_always_inline void
> > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > +		int memorder)
> > > +{
> > > +	uint16_t value;
> > > +
> > > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > +__ATOMIC_RELAXED);
> > > +
> > >  	__LOAD_EXC_16(addr, value, memorder)
> > >  	if (value != expected) {
> > >  		__SEVL()
> > > @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr,
> > uint16_t expected,
> > >  			__LOAD_EXC_16(addr, value, memorder)
> > >  		} while (value != expected);
> > >  	}
> > > -#undef __LOAD_EXC_16
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > > uint32_t expected,
> > >
> > >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > >
> > > -	/*
> > > -	 * Atomic exclusive load from addr, it returns the 32-bit content of
> > > -	 * *addr while making it 'monitored',when it is written by someone
> > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > -	 * implicitly to exit WFE.
> > > -	 */
> > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > -	if (memorder == __ATOMIC_RELAXED) {              \
> > > -		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > -			: [tmp] "=&r" (dst)              \
> > > -			: [addr] "r"(src)                \
> > > -			: "memory");                     \
> > > -	} else {                                         \
> > > -		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > -			: [tmp] "=&r" (dst)              \
> > > -			: [addr] "r"(src)                \
> > > -			: "memory");                     \
> > > -	} }
> > > -
> > >  	__LOAD_EXC_32(addr, value, memorder)
> > >  	if (value != expected) {
> > >  		__SEVL()
> > > @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > uint32_t expected,
> > >  			__LOAD_EXC_32(addr, value, memorder)
> > >  		} while (value != expected);
> > >  	}
> > > -#undef __LOAD_EXC_32
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > > uint64_t expected,
> > >
> > >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > >
> > > -	/*
> > > -	 * Atomic exclusive load from addr, it returns the 64-bit content of
> > > -	 * *addr while making it 'monitored',when it is written by someone
> > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > -	 * implicitly to exit WFE.
> > > -	 */
> > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > -	if (memorder == __ATOMIC_RELAXED) {              \
> > > -		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > -			: [tmp] "=&r" (dst)              \
> > > -			: [addr] "r"(src)                \
> > > -			: "memory");                     \
> > > -	} else {                                         \
> > > -		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > -			: [tmp] "=&r" (dst)              \
> > > -			: [addr] "r"(src)                \
> > > -			: "memory");                     \
> > > -	} }
> > > -
> > >  	__LOAD_EXC_64(addr, value, memorder)
> > >  	if (value != expected) {
> > >  		__SEVL()
> > > @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> > >  		} while (value != expected);
> > >  	}
> > >  }
> > > +
> > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
> > > +do {                                                               \
> > > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> > > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> > > +	memorder != __ATOMIC_RELAXED);                             \
> > > +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> > > +	uint##size_t value;                                        \
> > > +	__LOAD_EXC_##size(addr, value, memorder)                   \
> > > +	if ((value & mask) cond expected) {		           \
> > > +		__SEVL()                                           \
> > > +		do {                                               \
> > > +			__WFE()                                    \
> > > +			__LOAD_EXC_##size(addr, value, memorder)   \
> > > +		} while ((value & mask) cond expected);            \
> > > +	}                                                          \
> > > +} while (0)
> > > +
> > > +#undef __LOAD_EXC_16
> > > +#undef __LOAD_EXC_32
> > >  #undef __LOAD_EXC_64
> > >
> > >  #undef __SEVL
> > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > b/lib/eal/include/generic/rte_pause.h
> > > index 668ee4a184..20a5d2a9fd 100644
> > > --- a/lib/eal/include/generic/rte_pause.h
> > > +++ b/lib/eal/include/generic/rte_pause.h
> > > @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> > >  	while (__atomic_load_n(addr, memorder) != expected)
> > >  		rte_pause();
> > >  }
> > > +
> > > +/*
> > > + * Wait until *addr breaks the condition, with a relaxed memory
> > > + * ordering model meaning the loads around this API can be reordered.
> > > + *
> > > + * @param addr
> > > + *  A pointer to the memory location.
> > > + * @param mask
> > > + *  A mask of value bits in interest.
> > > + * @param expected
> > > + *  A 16-bit expected value to be in the memory location.
> > > + * @param cond
> > > + *  A symbol representing the condition (==, !=).
> > > + * @param memorder
> > > + *  Two different memory orders that can be specified:
> > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > + *  C++11 memory orders with the same names, see the C++11 standard
> > > +or
> > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > + * @param size
> > > + * The bit size of *addr:
> > > + * It is used for arm architecture to choose load instructions,
> > > + * and the optional value is 16, 32 and 64.
> > > + */
> > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
> > > +do {                                                                   \
> > > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> > > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> > > +				memorder != __ATOMIC_RELAXED);         \
> > > +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> >
> > I don't' really understand why you do need 'size' passed as parameter.
> > Can't it be:
> > size_t size = sizeof(*(addr));
> > And then:
> > RTE_BUILD_BUG_ON(size != sizeof(uint16_t) && size != sizeof(uint32_t) &&
> > size != sizeof(uint64_t)); ?
> >
> > > +	while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> > > +		rte_pause();                                           \
> >
> > Just to repeat my own comment from previous version review:
> > put () around macro parameters in the macro body.
> > Will save from a lot of unexpected troubles.
> 
> Sorry I didn't catch the point.
> In this version, I firstly want to use '__LOAD_EXC_##size' to choose , so I use size
> as a parameter.  And in the next version, I will update this as:
> 
> #define __LOAD_EXC(src, dst, memorder, size) {    \
> 	if (size == 16)                               \
> 		__LOAD_EXC_16(src, dst, memorder)     \
> 	else if (size == 32)                          \
> 		__LOAD_EXC_32(src, dst, memorder)     \
> 	else if (size == 64)                          \
> 		__LOAD_EXC_64(src, dst, memorder)     \
> }
> 
> #define rte_wait_event(addr, mask, cond, expected, memorder)    \
> do {                                                            \
> 	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));      \
> 	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&        \
> 				memorder != __ATOMIC_RELAXED);  \
> 	uint32_t size = sizeof(*addr) << 3;                     \
> 	typeof(*addr) value = 0;                                \
> 	__LOAD_EXC(addr, value, memorder, size)                 \
> 	if ((value & mask) cond expected) {                     \
> 		__SEVL()                                        \
> 		do {                                            \
> 			__WFE()                                 \
> 			__LOAD_EXC(addr, value, memorder, size) \
> 		} while ((value & mask) cond expected);         \
> 	}                                                       \
> } while (0)

Sorry, I probably wasn't clear enough.
I meant use '(' ')' around  macro arguments (to avoid un-predicted side-effects with operands associativity):
uint32_t size = sizeof(*(addr)) ...;
...
if ((value & (mask)) cond (expected))
...







^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [PATCH v4 1/5] eal: add new definitions for wait scheme
  2021-10-25 14:28           ` [dpdk-dev] " Ananyev, Konstantin
@ 2021-10-26  1:08             ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-26  1:08 UTC (permalink / raw)
  To: Ananyev, Konstantin, Ruifeng Wang; +Cc: dev, nd, nd, nd



> -----邮件原件-----
> 发件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 发送时间: Monday, October 25, 2021 10:29 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; nd <nd@arm.com>
> 主题: RE: [PATCH v4 1/5] eal: add new definitions for wait scheme
> 
> 
> > > > Introduce macros as generic interface for address monitoring.
> > > >
> > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > ---
> > > >  lib/eal/arm/include/rte_pause_64.h  | 126
> > > > ++++++++++++++++------------  lib/eal/include/generic/rte_pause.h
> > > > ++++++++++++++++|
> > > > 32 +++++++
> > > >  2 files changed, 104 insertions(+), 54 deletions(-)
> > > >
> > > > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > > > b/lib/eal/arm/include/rte_pause_64.h
> > > > index e87d10b8cc..23954c2de2 100644
> > > > --- a/lib/eal/arm/include/rte_pause_64.h
> > > > +++ b/lib/eal/arm/include/rte_pause_64.h
> > > > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> > > >  /* Put processor into low power WFE(Wait For Event) state. */
> > > > #define __WFE() { asm volatile("wfe" : : : "memory"); }
> > > >
> > > > -static __rte_always_inline void
> > > > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > > -		int memorder)
> > > > -{
> > > > -	uint16_t value;
> > > > -
> > > > -	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > > > -
> > > > -	/*
> > > > -	 * Atomic exclusive load from addr, it returns the 16-bit content of
> > > > -	 * *addr while making it 'monitored',when it is written by someone
> > > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > > -	 * implicitly to exit WFE.
> > > > -	 */
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 16-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and a event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> > > >  	if (memorder == __ATOMIC_RELAXED) {               \
> > > >  		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6 +50,52
> > > @@
> > > > rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > >  			: "memory");                      \
> > > >  	} }
> > > >
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 32-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and a event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > > +	if (memorder == __ATOMIC_RELAXED) {              \
> > > > +		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > > +			: [tmp] "=&r" (dst)              \
> > > > +			: [addr] "r"(src)                \
> > > > +			: "memory");                     \
> > > > +	} else {                                         \
> > > > +		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > +			: [tmp] "=&r" (dst)              \
> > > > +			: [addr] "r"(src)                \
> > > > +			: "memory");                     \
> > > > +	} }
> > > > +
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 64-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and a event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > > +	if (memorder == __ATOMIC_RELAXED) {              \
> > > > +		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > > +			: [tmp] "=&r" (dst)              \
> > > > +			: [addr] "r"(src)                \
> > > > +			: "memory");                     \
> > > > +	} else {                                         \
> > > > +		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > > +			: [tmp] "=&r" (dst)              \
> > > > +			: [addr] "r"(src)                \
> > > > +			: "memory");                     \
> > > > +	} }
> > > > +
> > > > +static __rte_always_inline void
> > > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > > +		int memorder)
> > > > +{
> > > > +	uint16_t value;
> > > > +
> > > > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > +__ATOMIC_RELAXED);
> > > > +
> > > >  	__LOAD_EXC_16(addr, value, memorder)
> > > >  	if (value != expected) {
> > > >  		__SEVL()
> > > > @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t
> > > > *addr,
> > > uint16_t expected,
> > > >  			__LOAD_EXC_16(addr, value, memorder)
> > > >  		} while (value != expected);
> > > >  	}
> > > > -#undef __LOAD_EXC_16
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t
> > > > *addr, uint32_t expected,
> > > >
> > > >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > >
> > > > -	/*
> > > > -	 * Atomic exclusive load from addr, it returns the 32-bit content of
> > > > -	 * *addr while making it 'monitored',when it is written by someone
> > > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > > -	 * implicitly to exit WFE.
> > > > -	 */
> > > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > > -	if (memorder == __ATOMIC_RELAXED) {              \
> > > > -		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > > -			: [tmp] "=&r" (dst)              \
> > > > -			: [addr] "r"(src)                \
> > > > -			: "memory");                     \
> > > > -	} else {                                         \
> > > > -		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > -			: [tmp] "=&r" (dst)              \
> > > > -			: [addr] "r"(src)                \
> > > > -			: "memory");                     \
> > > > -	} }
> > > > -
> > > >  	__LOAD_EXC_32(addr, value, memorder)
> > > >  	if (value != expected) {
> > > >  		__SEVL()
> > > > @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t
> > > > *addr,
> > > uint32_t expected,
> > > >  			__LOAD_EXC_32(addr, value, memorder)
> > > >  		} while (value != expected);
> > > >  	}
> > > > -#undef __LOAD_EXC_32
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr, uint64_t expected,
> > > >
> > > >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > >
> > > > -	/*
> > > > -	 * Atomic exclusive load from addr, it returns the 64-bit content of
> > > > -	 * *addr while making it 'monitored',when it is written by someone
> > > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > > -	 * implicitly to exit WFE.
> > > > -	 */
> > > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > > -	if (memorder == __ATOMIC_RELAXED) {              \
> > > > -		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > > -			: [tmp] "=&r" (dst)              \
> > > > -			: [addr] "r"(src)                \
> > > > -			: "memory");                     \
> > > > -	} else {                                         \
> > > > -		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > > -			: [tmp] "=&r" (dst)              \
> > > > -			: [addr] "r"(src)                \
> > > > -			: "memory");                     \
> > > > -	} }
> > > > -
> > > >  	__LOAD_EXC_64(addr, value, memorder)
> > > >  	if (value != expected) {
> > > >  		__SEVL()
> > > > @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr,
> > > uint64_t expected,
> > > >  		} while (value != expected);
> > > >  	}
> > > >  }
> > > > +
> > > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
> > > > +do {                                                               \
> > > > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> > > > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> > > > +	memorder != __ATOMIC_RELAXED);                             \
> > > > +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> > > > +	uint##size_t value;                                        \
> > > > +	__LOAD_EXC_##size(addr, value, memorder)                   \
> > > > +	if ((value & mask) cond expected) {		           \
> > > > +		__SEVL()                                           \
> > > > +		do {                                               \
> > > > +			__WFE()                                    \
> > > > +			__LOAD_EXC_##size(addr, value, memorder)   \
> > > > +		} while ((value & mask) cond expected);            \
> > > > +	}                                                          \
> > > > +} while (0)
> > > > +
> > > > +#undef __LOAD_EXC_16
> > > > +#undef __LOAD_EXC_32
> > > >  #undef __LOAD_EXC_64
> > > >
> > > >  #undef __SEVL
> > > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > > b/lib/eal/include/generic/rte_pause.h
> > > > index 668ee4a184..20a5d2a9fd 100644
> > > > --- a/lib/eal/include/generic/rte_pause.h
> > > > +++ b/lib/eal/include/generic/rte_pause.h
> > > > @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr,
> > > uint64_t expected,
> > > >  	while (__atomic_load_n(addr, memorder) != expected)
> > > >  		rte_pause();
> > > >  }
> > > > +
> > > > +/*
> > > > + * Wait until *addr breaks the condition, with a relaxed memory
> > > > + * ordering model meaning the loads around this API can be reordered.
> > > > + *
> > > > + * @param addr
> > > > + *  A pointer to the memory location.
> > > > + * @param mask
> > > > + *  A mask of value bits in interest.
> > > > + * @param expected
> > > > + *  A 16-bit expected value to be in the memory location.
> > > > + * @param cond
> > > > + *  A symbol representing the condition (==, !=).
> > > > + * @param memorder
> > > > + *  Two different memory orders that can be specified:
> > > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > > + *  C++11 memory orders with the same names, see the C++11
> > > > +standard or
> > > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > > + * @param size
> > > > + * The bit size of *addr:
> > > > + * It is used for arm architecture to choose load instructions,
> > > > + * and the optional value is 16, 32 and 64.
> > > > + */
> > > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size)
> \
> > > > +do {                                                                   \
> > > > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> > > > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> > > > +				memorder != __ATOMIC_RELAXED);         \
> > > > +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> > >
> > > I don't' really understand why you do need 'size' passed as parameter.
> > > Can't it be:
> > > size_t size = sizeof(*(addr));
> > > And then:
> > > RTE_BUILD_BUG_ON(size != sizeof(uint16_t) && size !=
> > > sizeof(uint32_t) && size != sizeof(uint64_t)); ?
> > >
> > > > +	while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> > > > +		rte_pause();                                           \
> > >
> > > Just to repeat my own comment from previous version review:
> > > put () around macro parameters in the macro body.
> > > Will save from a lot of unexpected troubles.
> >
> > Sorry I didn't catch the point.
> > In this version, I firstly want to use '__LOAD_EXC_##size' to choose ,
> > so I use size as a parameter.  And in the next version, I will update
> > this as:
> >
> > #define __LOAD_EXC(src, dst, memorder, size) {    \
> > 	if (size == 16)                               \
> > 		__LOAD_EXC_16(src, dst, memorder)     \
> > 	else if (size == 32)                          \
> > 		__LOAD_EXC_32(src, dst, memorder)     \
> > 	else if (size == 64)                          \
> > 		__LOAD_EXC_64(src, dst, memorder)     \
> > }
> >
> > #define rte_wait_event(addr, mask, cond, expected, memorder)    \
> > do {                                                            \
> > 	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));      \
> > 	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&        \
> > 				memorder != __ATOMIC_RELAXED);  \
> > 	uint32_t size = sizeof(*addr) << 3;                     \
> > 	typeof(*addr) value = 0;                                \
> > 	__LOAD_EXC(addr, value, memorder, size)                 \
> > 	if ((value & mask) cond expected) {                     \
> > 		__SEVL()                                        \
> > 		do {                                            \
> > 			__WFE()                                 \
> > 			__LOAD_EXC(addr, value, memorder, size) \
> > 		} while ((value & mask) cond expected);         \
> > 	}                                                       \
> > } while (0)
> 
> Sorry, I probably wasn't clear enough.
> I meant use '(' ')' around  macro arguments (to avoid un-predicted side-effects
> with operands associativity):
> uint32_t size = sizeof(*(addr)) ...;
> ...
> if ((value & (mask)) cond (expected))
> ...
That's Ok. So in the next version, I will change the following:
1. size will not be as a parameter
2. I will add '()' around macro arguments
> 
> 
> 
> 
> 


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复:  [PATCH v4 1/5] eal: add new definitions for wait scheme
  2021-10-25  9:43           ` [dpdk-dev] " Jerin Jacob
@ 2021-10-26  1:11             ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-26  1:11 UTC (permalink / raw)
  To: Jerin Jacob; +Cc: Ruifeng Wang, Ananyev, Konstantin, dpdk-dev, nd, nd


> -----邮件原件-----
> 发件人: dev <dev-bounces@dpdk.org> 代表 Jerin Jacob
> 发送时间: Monday, October 25, 2021 5:44 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; dpdk-dev <dev@dpdk.org>; nd
> <nd@arm.com>
> 主题: Re: [dpdk-dev] [PATCH v4 1/5] eal: add new definitions for wait scheme
> 
> On Mon, Oct 25, 2021 at 3:01 PM Feifei Wang <Feifei.Wang2@arm.com>
> wrote:
> >
> > > -----邮件原件-----
> > > 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> > > 发送时间: Friday, October 22, 2021 8:10 AM
> > > 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> > > 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; Ananyev, Konstantin
> > > <konstantin.ananyev@intel.com>; dpdk-dev <dev@dpdk.org>; nd
> > > <nd@arm.com>
> > > 主题: Re: [dpdk-dev] [PATCH v4 1/5] eal: add new definitions for wait
> > > scheme
> > >
> > > On Wed, Oct 20, 2021 at 2:16 PM Feifei Wang <feifei.wang2@arm.com>
> > > wrote:
> > > >
> > > > Introduce macros as generic interface for address monitoring.
> > > >
> > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > ---
> > > >  lib/eal/arm/include/rte_pause_64.h  | 126
> > > > ++++++++++++++++------------  lib/eal/include/generic/rte_pause.h
> > > > ++++++++++++++++|
> > > > 32 +++++++
> > > >  2 files changed, 104 insertions(+), 54 deletions(-)
> > > >
> > > > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > > > b/lib/eal/arm/include/rte_pause_64.h
> > > > index e87d10b8cc..23954c2de2 100644
> > > > --- a/lib/eal/arm/include/rte_pause_64.h
> > > > +++ b/lib/eal/arm/include/rte_pause_64.h
> > > > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> > > >  /* Put processor into low power WFE(Wait For Event) state. */
> > > > #define __WFE() { asm volatile("wfe" : : : "memory"); }
> > > >
> > > > -static __rte_always_inline void
> > > > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > > -               int memorder)
> > > > -{
> > > > -       uint16_t value;
> > > > -
> > > > -       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > > > -
> > > > -       /*
> > > > -        * Atomic exclusive load from addr, it returns the 16-bit content of
> > > > -        * *addr while making it 'monitored',when it is written by someone
> > > > -        * else, the 'monitored' state is cleared and a event is generated
> > >
> > > a event -> an event in all the occurrence.
> > >
> > > > -        * implicitly to exit WFE.
> > > > -        */
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 16-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and a event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> > > >         if (memorder == __ATOMIC_RELAXED) {               \
> > > >                 asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@
> > > > -58,6
> > > > +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr,
> > > > +uint16_t
> > > expected,
> > > >                         : "memory");                      \
> > > >         } }
> > > >
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 32-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and a event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > > +       if (memorder == __ATOMIC_RELAXED) {              \
> > > > +               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > > +                       : [tmp] "=&r" (dst)              \
> > > > +                       : [addr] "r"(src)                \
> > > > +                       : "memory");                     \
> > > > +       } else {                                         \
> > > > +               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > +                       : [tmp] "=&r" (dst)              \
> > > > +                       : [addr] "r"(src)                \
> > > > +                       : "memory");                     \
> > > > +       } }
> > > > +
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 64-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and a event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > > +       if (memorder == __ATOMIC_RELAXED) {              \
> > > > +               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > > +                       : [tmp] "=&r" (dst)              \
> > > > +                       : [addr] "r"(src)                \
> > > > +                       : "memory");                     \
> > > > +       } else {                                         \
> > > > +               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > > +                       : [tmp] "=&r" (dst)              \
> > > > +                       : [addr] "r"(src)                \
> > > > +                       : "memory");                     \
> > > > +       } }
> > > > +
> > > > +static __rte_always_inline void
> > > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > > +               int memorder)
> > > > +{
> > > > +       uint16_t value;
> > > > +
> > > > +       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > + __ATOMIC_RELAXED);
> > > > +
> > > >         __LOAD_EXC_16(addr, value, memorder)
> > > >         if (value != expected) {
> > > >                 __SEVL()
> > > > @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t
> > > > *addr,
> > > uint16_t expected,
> > > >                         __LOAD_EXC_16(addr, value, memorder)
> > > >                 } while (value != expected);
> > > >         }
> > > > -#undef __LOAD_EXC_16
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t
> > > > *addr, uint32_t expected,
> > > >
> > > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > >
> > > > -       /*
> > > > -        * Atomic exclusive load from addr, it returns the 32-bit content of
> > > > -        * *addr while making it 'monitored',when it is written by someone
> > > > -        * else, the 'monitored' state is cleared and a event is generated
> > > > -        * implicitly to exit WFE.
> > > > -        */
> > > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > > -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } else {                                         \
> > > > -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } }
> > > > -
> > > >         __LOAD_EXC_32(addr, value, memorder)
> > > >         if (value != expected) {
> > > >                 __SEVL()
> > > > @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t
> > > > *addr,
> > > uint32_t expected,
> > > >                         __LOAD_EXC_32(addr, value, memorder)
> > > >                 } while (value != expected);
> > > >         }
> > > > -#undef __LOAD_EXC_32
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr, uint64_t expected,
> > > >
> > > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > >
> > > > -       /*
> > > > -        * Atomic exclusive load from addr, it returns the 64-bit content of
> > > > -        * *addr while making it 'monitored',when it is written by someone
> > > > -        * else, the 'monitored' state is cleared and a event is generated
> > > > -        * implicitly to exit WFE.
> > > > -        */
> > > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > > -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } else {                                         \
> > > > -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } }
> > > > -
> > > >         __LOAD_EXC_64(addr, value, memorder)
> > > >         if (value != expected) {
> > > >                 __SEVL()
> > > > @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr,
> > > uint64_t expected,
> > > >                 } while (value != expected);
> > > >         }
> > > >  }
> > > > +
> > > > +#define rte_wait_event(addr, mask, expected, cond, memorder,
> > > > +size) \
> > >
> > > I think it is better to swap "cond" and "expected" positions to get
> > > better readability.
> > Thanks for the comments, it is better than before and I will update in the
> next version.
> > >
> > >  rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, 0, !=,
> > > __ATOMIC_RELAXED, 64);
> > >
> > > Vs
> > >
> > >  rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, !=, 0,
> > > __ATOMIC_RELAXED, 64);
> > >
> > > > +do {                                                               \
> > >
> > > Any reason to not make an inline function instead of macro?
> > Because there were many new APIs for different cases. And then we
> > refer to Linux 'wait_event' code for an example. Please see the first version
> and its discussion:
> > http://patches.dpdk.org/project/dpdk/cover/20210902053253.3017858-1-
> fe
> > ifei.wang2@arm.com/
> 
> 
> OK.
> 
> 
> > >
> > > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> > >
> > > Should n't we add __builtin_constant_p(size) of check?
> >
> > Please see the discussion with Konstantin.
> > 'size' will not be as a parameter and then it is unnecessary to check it with
> build_bug.
> 
> Make sense to remove the 'size'. My comment was more in the direction of, if
> the 'size' is required to pass.
That's Ok. Thanks very much for your valuable comments.
> 
> > >
> > > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> > > > +       memorder != __ATOMIC_RELAXED);                             \
> > > > +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> > > > +       uint##size_t value;
> > >
> > >
> > >                                \
> > > > +       __LOAD_EXC_##size(addr, value, memorder)                   \
> > > > +       if ((value & mask) cond expected) {                        \
> > > > +               __SEVL()                                           \
> > > > +               do {                                               \
> > > > +                       __WFE()                                    \
> > > > +                       __LOAD_EXC_##size(addr, value, memorder)   \
> > > > +               } while ((value & mask) cond expected);            \
> > > > +       }                                                          \
> > > > +} while (0)
> > > > +
> > > > +#undef __LOAD_EXC_16
> > > > +#undef __LOAD_EXC_32
> > > >  #undef __LOAD_EXC_64
> > > >
> > > >  #undef __SEVL
> > > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > > b/lib/eal/include/generic/rte_pause.h
> > > > index 668ee4a184..20a5d2a9fd 100644
> > > > --- a/lib/eal/include/generic/rte_pause.h
> > > > +++ b/lib/eal/include/generic/rte_pause.h
> > > > @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr,
> > > uint64_t expected,
> > > >         while (__atomic_load_n(addr, memorder) != expected)
> > > >                 rte_pause();
> > > >  }
> > > > +
> > > > +/*
> > > > + * Wait until *addr breaks the condition, with a relaxed memory
> > > > + * ordering model meaning the loads around this API can be reordered.
> > > > + *
> > > > + * @param addr
> > > > + *  A pointer to the memory location.
> > > > + * @param mask
> > > > + *  A mask of value bits in interest.
> > > > + * @param expected
> > > > + *  A 16-bit expected value to be in the memory location.
> > > > + * @param cond
> > > > + *  A symbol representing the condition (==, !=).
> > > > + * @param memorder
> > > > + *  Two different memory orders that can be specified:
> > > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > > + *  C++11 memory orders with the same names, see the C++11
> > > > +standard or
> > > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > > + * @param size
> > > > + * The bit size of *addr:
> > > > + * It is used for arm architecture to choose load instructions,
> > > > + * and the optional value is 16, 32 and 64.
> > > > + */
> > > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size)
> \
> > > > +do {                                                                   \
> > > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> > > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> \
> > > > +                               memorder != __ATOMIC_RELAXED);         \
> > > > +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> > > > +       while ((__atomic_load_n(addr, memorder) & mask) cond expected)
> \
> > > > +               rte_pause();                                           \
> > > > +} while (0)
> > > >  #endif
> > > >
> > > >  #endif /* _RTE_PAUSE_H_ */
> > > > --
> > > > 2.25.1
> > > >

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v5 0/5] add new definitions for wait scheme
  2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
                   ` (7 preceding siblings ...)
  2021-09-26  6:32 ` [dpdk-dev] [RFC PATCH v3 0/5] add new definitions for wait scheme Feifei Wang
@ 2021-10-26  8:01 ` Feifei Wang
  2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 1/5] eal: " Feifei Wang
                     ` (4 more replies)
  2021-10-27  8:10 ` [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme Feifei Wang
                   ` (3 subsequent siblings)
  12 siblings, 5 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-26  8:01 UTC (permalink / raw)
  Cc: dev, nd, Feifei Wang

Add new definitions for wait scheme, and apply this new definitions into
lib to replace rte_pause.

v2:
1. use macro to create new wait scheme (Stephen)

v3:
1. delete unnecessary bug fix in bpf (Konstantin)

v4:
1. put size into the macro body (Konstantin)
2. replace assert with BUILD_BUG_ON (Stephen)
3. delete unnecessary compiler barrier for bpf (Konstantin)

v5:
1. 'size' is not the parameter (Konstantin)
2. put () around macro parameters (Konstantin)
3. fix some original typo issue (Jerin)
4. swap 'rte_wait_event' parameter location (Jerin)
4. add new macro '__LOAD_EXC'
5. delete 'undef' to prevent compilation warning
 
Feifei Wang (5):
  eal: add new definitions for wait scheme
  eal: use wait event for read pflock
  eal: use wait event scheme for mcslock
  lib/bpf: use wait event scheme for Rx/Tx iteration
  lib/distributor: use wait event scheme

 lib/bpf/bpf_pkt.c                        |  11 +-
 lib/distributor/rte_distributor_single.c |  10 +-
 lib/eal/arm/include/rte_pause_64.h       | 135 +++++++++++++----------
 lib/eal/include/generic/rte_mcslock.h    |   9 +-
 lib/eal/include/generic/rte_pause.h      |  27 +++++
 lib/eal/include/generic/rte_pflock.h     |   4 +-
 6 files changed, 121 insertions(+), 75 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v5 1/5] eal: add new definitions for wait scheme
  2021-10-26  8:01 ` [dpdk-dev] [PATCH v5 0/5] add new definitions for wait scheme Feifei Wang
@ 2021-10-26  8:02   ` Feifei Wang
  2021-10-26  8:08     ` [dpdk-dev] 回复: " Feifei Wang
  2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 2/5] eal: use wait event for read pflock Feifei Wang
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-10-26  8:02 UTC (permalink / raw)
  To: Ruifeng Wang; +Cc: dev, nd, Feifei Wang

Introduce macros as generic interface for address monitoring.
For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.

Furthermore, to prevent compilation warning in arm:
----------------------------------------------
'warning: implicit declaration of function ...'
----------------------------------------------
Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.

This is because original macros are undefine at the end of the file.
If new macro 'rte_wait_event' calls them in other files, they will be
seen as 'not defined'.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/arm/include/rte_pause_64.h  | 135 ++++++++++++++++------------
 lib/eal/include/generic/rte_pause.h |  27 ++++++
 2 files changed, 105 insertions(+), 57 deletions(-)

diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index e87d10b8cc..1fea0dec63 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -31,20 +31,12 @@ static inline void rte_pause(void)
 /* Put processor into low power WFE(Wait For Event) state. */
 #define __WFE() { asm volatile("wfe" : : : "memory"); }
 
-static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
-{
-	uint16_t value;
-
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 16-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
+/*
+ * Atomic exclusive load from addr, it returns the 16-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
 #define __LOAD_EXC_16(src, dst, memorder) {               \
 	if (memorder == __ATOMIC_RELAXED) {               \
 		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
@@ -58,6 +50,62 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			: "memory");                      \
 	} }
 
+/*
+ * Atomic exclusive load from addr, it returns the 32-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_32(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+/*
+ * Atomic exclusive load from addr, it returns the 64-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_64(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+#define __LOAD_EXC(src, dst, memorder, size) {          \
+	assert(size == 16 || size == 32 || size == 64); \
+	if (size == 16)                                 \
+		__LOAD_EXC_16(src, dst, memorder)       \
+	else if (size == 32)                            \
+		__LOAD_EXC_32(src, dst, memorder)       \
+	else if (size == 64)                            \
+		__LOAD_EXC_64(src, dst, memorder)       \
+}
+
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+		int memorder)
+{
+	uint16_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
 	__LOAD_EXC_16(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -66,7 +114,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			__LOAD_EXC_16(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_16
 }
 
 static __rte_always_inline void
@@ -77,25 +124,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 32-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_32(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_32(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -104,7 +132,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 			__LOAD_EXC_32(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_32
 }
 
 static __rte_always_inline void
@@ -115,25 +142,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 64-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_64(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_64(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -143,10 +151,23 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 		} while (value != expected);
 	}
 }
-#undef __LOAD_EXC_64
 
-#undef __SEVL
-#undef __WFE
+#define rte_wait_event(addr, mask, cond, expected, memorder)      \
+do {                                                              \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));        \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&          \
+				memorder != __ATOMIC_RELAXED);    \
+	uint32_t size = sizeof(*(addr)) << 3;                     \
+	typeof(*(addr)) value = 0;                                \
+	__LOAD_EXC((addr), value, memorder, size)                 \
+	if ((value & (mask)) cond expected) {                     \
+		__SEVL()                                          \
+		do {                                              \
+			__WFE()                                   \
+			__LOAD_EXC((addr), value, memorder, size) \
+		} while ((value & (mask)) cond expected);         \
+	}                                                         \
+} while (0)
 
 #endif
 
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index 668ee4a184..e31a006844 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -111,6 +111,33 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 	while (__atomic_load_n(addr, memorder) != expected)
 		rte_pause();
 }
+
+/*
+ * Wait until *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest.
+ * @param cond
+ *  A symbol representing the condition.
+ * @param expected
+ *  An expected value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event(addr, mask, cond, expected, memorder)                   \
+do {                                                                           \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                     \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                       \
+				memorder != __ATOMIC_RELAXED);                 \
+	while ((__atomic_load_n((addr), (memorder)) & (mask)) cond expected)   \
+		rte_pause();                                                   \
+} while (0)
 #endif
 
 #endif /* _RTE_PAUSE_H_ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v5 2/5] eal: use wait event for read pflock
  2021-10-26  8:01 ` [dpdk-dev] [PATCH v5 0/5] add new definitions for wait scheme Feifei Wang
  2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 1/5] eal: " Feifei Wang
@ 2021-10-26  8:02   ` Feifei Wang
  2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 3/5] eal: use wait event scheme for mcslock Feifei Wang
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-26  8:02 UTC (permalink / raw)
  Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for read pflock update, use wait event scheme for
this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_pflock.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/eal/include/generic/rte_pflock.h b/lib/eal/include/generic/rte_pflock.h
index e57c179ef2..7573b036bf 100644
--- a/lib/eal/include/generic/rte_pflock.h
+++ b/lib/eal/include/generic/rte_pflock.h
@@ -121,9 +121,7 @@ rte_pflock_read_lock(rte_pflock_t *pf)
 		return;
 
 	/* Wait for current write phase to complete. */
-	while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE)
-		& RTE_PFLOCK_WBITS) == w)
-		rte_pause();
+	rte_wait_event(&pf->rd.in, RTE_PFLOCK_WBITS, ==, w, __ATOMIC_ACQUIRE);
 }
 
 /**
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v5 3/5] eal: use wait event scheme for mcslock
  2021-10-26  8:01 ` [dpdk-dev] [PATCH v5 0/5] add new definitions for wait scheme Feifei Wang
  2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 1/5] eal: " Feifei Wang
  2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 2/5] eal: use wait event for read pflock Feifei Wang
@ 2021-10-26  8:02   ` Feifei Wang
  2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
  2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 5/5] lib/distributor: use wait event scheme Feifei Wang
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-26  8:02 UTC (permalink / raw)
  To: Honnappa Nagarahalli; +Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for mcslock to be updated, use wait event scheme
for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_mcslock.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/lib/eal/include/generic/rte_mcslock.h b/lib/eal/include/generic/rte_mcslock.h
index 34f33c64a5..806a2b2c7e 100644
--- a/lib/eal/include/generic/rte_mcslock.h
+++ b/lib/eal/include/generic/rte_mcslock.h
@@ -116,8 +116,13 @@ rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
 		/* More nodes added to the queue by other CPUs.
 		 * Wait until the next pointer is set.
 		 */
-		while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)
-			rte_pause();
+#ifdef RTE_ARCH_32
+		rte_wait_event((uint32_t *)&me->next, UINT32_MAX, ==, 0,
+				__ATOMIC_RELAXED);
+#else
+		rte_wait_event((uint64_t *)&me->next, UINT64_MAX, ==, 0,
+				__ATOMIC_RELAXED);
+#endif
 	}
 
 	/* Pass lock to next waiter. */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-26  8:01 ` [dpdk-dev] [PATCH v5 0/5] add new definitions for wait scheme Feifei Wang
                     ` (2 preceding siblings ...)
  2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 3/5] eal: use wait event scheme for mcslock Feifei Wang
@ 2021-10-26  8:02   ` Feifei Wang
  2021-10-26  8:18     ` [dpdk-dev] 回复: " Feifei Wang
  2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 5/5] lib/distributor: use wait event scheme Feifei Wang
  4 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-10-26  8:02 UTC (permalink / raw)
  To: Konstantin Ananyev; +Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for cbi->use to be updated, use wait event scheme.

Furthermore, delete 'const' for 'bpf_eth_cbi_wait'. This is because of
a compilation error:
-----------------------------------------------------------------------
../lib/eal/include/rte_common.h:36:13: error: read-only variable ‘value’
used as ‘asm’ output
   36 | #define asm __asm__
      |             ^~~~~~~

../lib/eal/arm/include/rte_pause_64.h:66:3: note: in expansion of macro
‘asm’
   66 |   asm volatile("ldaxr %w[tmp], [%x[addr]]" \
      |   ^~~

../lib/eal/arm/include/rte_pause_64.h:96:3: note: in expansion of macro
‘__LOAD_EXC_32’
   96 |   __LOAD_EXC_32((src), dst, memorder)     \
      |   ^~~~~~~~~~~~~

../lib/eal/arm/include/rte_pause_64.h:167:4: note: in expansion of macro
‘__LOAD_EXC’
  167 |    __LOAD_EXC((addr), value, memorder, size) \
      |    ^~~~~~~~~~

../lib/bpf/bpf_pkt.c:125:3: note: in expansion of macro ‘rte_wait_event’
  125 |   rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
-----------------------------------------------------------------------

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/bpf/bpf_pkt.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
index 6e8248f0d6..213d44a75a 100644
--- a/lib/bpf/bpf_pkt.c
+++ b/lib/bpf/bpf_pkt.c
@@ -111,9 +111,9 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
  * Waits till datapath finished using given callback.
  */
 static void
-bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
+bpf_eth_cbi_wait(struct bpf_eth_cbi *cbi)
 {
-	uint32_t nuse, puse;
+	uint32_t puse;
 
 	/* make sure all previous loads and stores are completed */
 	rte_smp_mb();
@@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
 
 	/* in use, busy wait till current RX/TX iteration is finished */
 	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
-		do {
-			rte_pause();
-			rte_compiler_barrier();
-			nuse = cbi->use;
-		} while (nuse == puse);
+		rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
+				__ATOMIC_RELAXED);
 	}
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v5 5/5] lib/distributor: use wait event scheme
  2021-10-26  8:01 ` [dpdk-dev] [PATCH v5 0/5] add new definitions for wait scheme Feifei Wang
                     ` (3 preceding siblings ...)
  2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
@ 2021-10-26  8:02   ` Feifei Wang
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-26  8:02 UTC (permalink / raw)
  To: David Hunt; +Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for bufptr64 to be updated, use
wait event for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/distributor/rte_distributor_single.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index f4725b1d0b..d52b24a453 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -33,9 +33,8 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_GET_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+			!=, 0, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on GET_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
@@ -74,9 +73,8 @@ rte_distributor_return_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_RETURN_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+			!=, 0, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on RETURN_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [PATCH v5 1/5] eal: add new definitions for wait scheme
  2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 1/5] eal: " Feifei Wang
@ 2021-10-26  8:08     ` Feifei Wang
  2021-10-26  9:46       ` [dpdk-dev] " Ananyev, Konstantin
  0 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-10-26  8:08 UTC (permalink / raw)
  To: Ananyev, Konstantin; +Cc: dev, nd, nd

> -----邮件原件-----
> 发件人: Feifei Wang <feifei.wang2@arm.com>
> 发送时间: Tuesday, October 26, 2021 4:02 PM
> 收件人: Ruifeng Wang <Ruifeng.Wang@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; Feifei Wang
> <Feifei.Wang2@arm.com>
> 主题: [PATCH v5 1/5] eal: add new definitions for wait scheme
> 
> Introduce macros as generic interface for address monitoring.
> For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
> and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.
> 
> Furthermore, to prevent compilation warning in arm:
> ----------------------------------------------
> 'warning: implicit declaration of function ...'
> ----------------------------------------------
> Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
> 
> This is because original macros are undefine at the end of the file.
> If new macro 'rte_wait_event' calls them in other files, they will be seen as
> 'not defined'.
> 
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  lib/eal/arm/include/rte_pause_64.h  | 135 ++++++++++++++++------------
> lib/eal/include/generic/rte_pause.h |  27 ++++++
>  2 files changed, 105 insertions(+), 57 deletions(-)
> 
> diff --git a/lib/eal/arm/include/rte_pause_64.h
> b/lib/eal/arm/include/rte_pause_64.h
> index e87d10b8cc..1fea0dec63 100644
> --- a/lib/eal/arm/include/rte_pause_64.h
> +++ b/lib/eal/arm/include/rte_pause_64.h
> @@ -31,20 +31,12 @@ static inline void rte_pause(void)
>  /* Put processor into low power WFE(Wait For Event) state. */  #define
> __WFE() { asm volatile("wfe" : : : "memory"); }
> 
> -static __rte_always_inline void
> -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> -		int memorder)
> -{
> -	uint16_t value;
> -
> -	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> __ATOMIC_RELAXED);
> -
> -	/*
> -	 * Atomic exclusive load from addr, it returns the 16-bit content of
> -	 * *addr while making it 'monitored',when it is written by someone
> -	 * else, the 'monitored' state is cleared and a event is generated
> -	 * implicitly to exit WFE.
> -	 */
> +/*
> + * Atomic exclusive load from addr, it returns the 16-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and an event is generated
> + * implicitly to exit WFE.
> + */
>  #define __LOAD_EXC_16(src, dst, memorder) {               \
>  	if (memorder == __ATOMIC_RELAXED) {               \
>  		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6 +50,62
> @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
>  			: "memory");                      \
>  	} }
> 
> +/*
> + * Atomic exclusive load from addr, it returns the 32-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and an event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_32(src, dst, memorder) {              \
> +	if (memorder == __ATOMIC_RELAXED) {              \
> +		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} else {                                         \
> +		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} }
> +
> +/*
> + * Atomic exclusive load from addr, it returns the 64-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and an event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_64(src, dst, memorder) {              \
> +	if (memorder == __ATOMIC_RELAXED) {              \
> +		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} else {                                         \
> +		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} }
> +
> +#define __LOAD_EXC(src, dst, memorder, size) {          \
> +	assert(size == 16 || size == 32 || size == 64); \
> +	if (size == 16)                                 \
> +		__LOAD_EXC_16(src, dst, memorder)       \
> +	else if (size == 32)                            \
> +		__LOAD_EXC_32(src, dst, memorder)       \
> +	else if (size == 64)                            \
> +		__LOAD_EXC_64(src, dst, memorder)       \
> +}
> +
> +static __rte_always_inline void
> +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> +		int memorder)
> +{
> +	uint16_t value;
> +
> +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> __ATOMIC_RELAXED);
> +
>  	__LOAD_EXC_16(addr, value, memorder)
>  	if (value != expected) {
>  		__SEVL()
> @@ -66,7 +114,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr,
> uint16_t expected,
>  			__LOAD_EXC_16(addr, value, memorder)
>  		} while (value != expected);
>  	}
> -#undef __LOAD_EXC_16
>  }
> 
>  static __rte_always_inline void
> @@ -77,25 +124,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> uint32_t expected,
> 
>  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> __ATOMIC_RELAXED);
> 
> -	/*
> -	 * Atomic exclusive load from addr, it returns the 32-bit content of
> -	 * *addr while making it 'monitored',when it is written by someone
> -	 * else, the 'monitored' state is cleared and a event is generated
> -	 * implicitly to exit WFE.
> -	 */
> -#define __LOAD_EXC_32(src, dst, memorder) {              \
> -	if (memorder == __ATOMIC_RELAXED) {              \
> -		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} else {                                         \
> -		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} }
> -
>  	__LOAD_EXC_32(addr, value, memorder)
>  	if (value != expected) {
>  		__SEVL()
> @@ -104,7 +132,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> uint32_t expected,
>  			__LOAD_EXC_32(addr, value, memorder)
>  		} while (value != expected);
>  	}
> -#undef __LOAD_EXC_32
>  }
> 
>  static __rte_always_inline void
> @@ -115,25 +142,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> 
>  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> __ATOMIC_RELAXED);
> 
> -	/*
> -	 * Atomic exclusive load from addr, it returns the 64-bit content of
> -	 * *addr while making it 'monitored',when it is written by someone
> -	 * else, the 'monitored' state is cleared and a event is generated
> -	 * implicitly to exit WFE.
> -	 */
> -#define __LOAD_EXC_64(src, dst, memorder) {              \
> -	if (memorder == __ATOMIC_RELAXED) {              \
> -		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} else {                                         \
> -		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} }
> -
>  	__LOAD_EXC_64(addr, value, memorder)
>  	if (value != expected) {
>  		__SEVL()
> @@ -143,10 +151,23 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
>  		} while (value != expected);
>  	}
>  }
> -#undef __LOAD_EXC_64
> 
> -#undef __SEVL
> -#undef __WFE
> +#define rte_wait_event(addr, mask, cond, expected, memorder)      \
> +do {                                                              \
> +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));        \
> +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&          \
> +				memorder != __ATOMIC_RELAXED);    \
> +	uint32_t size = sizeof(*(addr)) << 3;                     \
> +	typeof(*(addr)) value = 0;                                \
> +	__LOAD_EXC((addr), value, memorder, size)                 \
> +	if ((value & (mask)) cond expected) {                     \
> +		__SEVL()                                          \
> +		do {                                              \
> +			__WFE()                                   \
> +			__LOAD_EXC((addr), value, memorder, size) \
> +		} while ((value & (mask)) cond expected);         \

Hi, Konstantin

For this patch, I cannot add '()' for expected due to patch style check will report:
-------------------------------------------------------------------------------------------------------------------
WARNING:SPACING: space prohibited between function name and open parenthesis '('
#203: FILE: lib/eal/arm/include/rte_pause_64.h:163:
+       if ((value & (mask)) cond (expected)) {                   \

WARNING:SPACING: space prohibited between function name and open parenthesis '('
#208: FILE: lib/eal/arm/include/rte_pause_64.h:168:
+               } while ((value & (mask)) cond (expected));       \

WARNING:SPACING: space prohibited between function name and open parenthesis '('
#246: FILE: lib/eal/include/generic/rte_pause.h:138:
+       while ((__atomic_load_n((addr), (memorder)) & mask) cond (expected)) \

total: 1 errors, 3 warnings, 211 lines checked
-------------------------------------------------------------------------------------------------------------------
So I just add '()' for 'addr' and 'mask'.


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
@ 2021-10-26  8:18     ` Feifei Wang
  2021-10-26  9:43       ` [dpdk-dev] " Ananyev, Konstantin
  0 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-10-26  8:18 UTC (permalink / raw)
  To: Konstantin Ananyev; +Cc: dev, nd, Ruifeng Wang, nd



> -----邮件原件-----
> 发件人: Feifei Wang <feifei.wang2@arm.com>
> 发送时间: Tuesday, October 26, 2021 4:02 PM
> 收件人: Konstantin Ananyev <konstantin.ananyev@intel.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; Feifei Wang
> <Feifei.Wang2@arm.com>; Ruifeng Wang <Ruifeng.Wang@arm.com>
> 主题: [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
> 
> Instead of polling for cbi->use to be updated, use wait event scheme.
> 
> Furthermore, delete 'const' for 'bpf_eth_cbi_wait'. This is because of a
> compilation error:
> -----------------------------------------------------------------------
> ../lib/eal/include/rte_common.h:36:13: error: read-only variable ‘value’
> used as ‘asm’ output
>    36 | #define asm __asm__
>       |             ^~~~~~~
> 
> ../lib/eal/arm/include/rte_pause_64.h:66:3: note: in expansion of macro ‘asm’
>    66 |   asm volatile("ldaxr %w[tmp], [%x[addr]]" \
>       |   ^~~
> 
> ../lib/eal/arm/include/rte_pause_64.h:96:3: note: in expansion of macro
> ‘__LOAD_EXC_32’
>    96 |   __LOAD_EXC_32((src), dst, memorder)     \
>       |   ^~~~~~~~~~~~~
> 
> ../lib/eal/arm/include/rte_pause_64.h:167:4: note: in expansion of macro
> ‘__LOAD_EXC’
>   167 |    __LOAD_EXC((addr), value, memorder, size) \
>       |    ^~~~~~~~~~
> 
> ../lib/bpf/bpf_pkt.c:125:3: note: in expansion of macro ‘rte_wait_event’
>   125 |   rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> -----------------------------------------------------------------------
> 
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  lib/bpf/bpf_pkt.c | 11 ++++-------
>  1 file changed, 4 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c index
> 6e8248f0d6..213d44a75a 100644
> --- a/lib/bpf/bpf_pkt.c
> +++ b/lib/bpf/bpf_pkt.c
> @@ -111,9 +111,9 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
>   * Waits till datapath finished using given callback.
>   */
>  static void
> -bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> +bpf_eth_cbi_wait(struct bpf_eth_cbi *cbi)

Hi, Konstantin

For this bpf patch, I delete 'const' through this is contrary to what we
discussed earlier. This is because if  we keep 'constant' here and use 'rte_wait_event'
new macro, compiler will report error. And earlier the arm version cannot be compiled
due to I forgot enable "wfe" config in the meson file, so this issue can not happen before.

>  {
> -	uint32_t nuse, puse;
> +	uint32_t puse;
> 
>  	/* make sure all previous loads and stores are completed */
>  	rte_smp_mb();
> @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> 
>  	/* in use, busy wait till current RX/TX iteration is finished */
>  	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
> -		do {
> -			rte_pause();
> -			rte_compiler_barrier();
> -			nuse = cbi->use;
> -		} while (nuse == puse);
> +		rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> +				__ATOMIC_RELAXED);
>  	}
>  }
> 
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-26  8:18     ` [dpdk-dev] 回复: " Feifei Wang
@ 2021-10-26  9:43       ` Ananyev, Konstantin
  2021-10-26 12:56         ` Ananyev, Konstantin
  0 siblings, 1 reply; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-10-26  9:43 UTC (permalink / raw)
  To: Feifei Wang; +Cc: dev, nd, Ruifeng Wang, nd

Hi Feifei,

> > Instead of polling for cbi->use to be updated, use wait event scheme.
> >
> > Furthermore, delete 'const' for 'bpf_eth_cbi_wait'. This is because of a
> > compilation error:
> > -----------------------------------------------------------------------
> > ../lib/eal/include/rte_common.h:36:13: error: read-only variable ‘value’
> > used as ‘asm’ output
> >    36 | #define asm __asm__
> >       |             ^~~~~~~
> >
> > ../lib/eal/arm/include/rte_pause_64.h:66:3: note: in expansion of macro ‘asm’
> >    66 |   asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> >       |   ^~~
> >
> > ../lib/eal/arm/include/rte_pause_64.h:96:3: note: in expansion of macro
> > ‘__LOAD_EXC_32’
> >    96 |   __LOAD_EXC_32((src), dst, memorder)     \
> >       |   ^~~~~~~~~~~~~
> >
> > ../lib/eal/arm/include/rte_pause_64.h:167:4: note: in expansion of macro
> > ‘__LOAD_EXC’
> >   167 |    __LOAD_EXC((addr), value, memorder, size) \
> >       |    ^~~~~~~~~~
> >
> > ../lib/bpf/bpf_pkt.c:125:3: note: in expansion of macro ‘rte_wait_event’
> >   125 |   rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> > -----------------------------------------------------------------------
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >  lib/bpf/bpf_pkt.c | 11 ++++-------
> >  1 file changed, 4 insertions(+), 7 deletions(-)
> >
> > diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c index
> > 6e8248f0d6..213d44a75a 100644
> > --- a/lib/bpf/bpf_pkt.c
> > +++ b/lib/bpf/bpf_pkt.c
> > @@ -111,9 +111,9 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
> >   * Waits till datapath finished using given callback.
> >   */
> >  static void
> > -bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> > +bpf_eth_cbi_wait(struct bpf_eth_cbi *cbi)	
> 
> Hi, Konstantin
> 
> For this bpf patch, I delete 'const' through this is contrary to what we
> discussed earlier. This is because if  we keep 'constant' here and use 'rte_wait_event'
> new macro, compiler will report error. And earlier the arm version cannot be compiled
> due to I forgot enable "wfe" config in the meson file, so this issue can not happen before.


Honestly, I don't understand why we have to remove perfectly valid 'const' qualifier here.
If this macro can't be used with pointers to const (still don't understand why),
then let's just not use this macro here.
Strictly speaking I don't see much benefit here from it.

> 
> >  {
> > -	uint32_t nuse, puse;
> > +	uint32_t puse;
> >
> >  	/* make sure all previous loads and stores are completed */
> >  	rte_smp_mb();
> > @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> >
> >  	/* in use, busy wait till current RX/TX iteration is finished */
> >  	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
> > -		do {
> > -			rte_pause();
> > -			rte_compiler_barrier();
> > -			nuse = cbi->use;
> > -		} while (nuse == puse);
> > +		rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> > +				__ATOMIC_RELAXED);
> >  	}
> >  }
> >
> > --
> > 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/5] eal: add new definitions for wait scheme
  2021-10-26  8:08     ` [dpdk-dev] 回复: " Feifei Wang
@ 2021-10-26  9:46       ` Ananyev, Konstantin
  2021-10-26  9:59         ` Ananyev, Konstantin
  0 siblings, 1 reply; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-10-26  9:46 UTC (permalink / raw)
  To: Feifei Wang; +Cc: dev, nd, nd



> >
> > Introduce macros as generic interface for address monitoring.
> > For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
> > and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.
> >
> > Furthermore, to prevent compilation warning in arm:
> > ----------------------------------------------
> > 'warning: implicit declaration of function ...'
> > ----------------------------------------------
> > Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
> >
> > This is because original macros are undefine at the end of the file.
> > If new macro 'rte_wait_event' calls them in other files, they will be seen as
> > 'not defined'.
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >  lib/eal/arm/include/rte_pause_64.h  | 135 ++++++++++++++++------------
> > lib/eal/include/generic/rte_pause.h |  27 ++++++
> >  2 files changed, 105 insertions(+), 57 deletions(-)
> >
> > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > b/lib/eal/arm/include/rte_pause_64.h
> > index e87d10b8cc..1fea0dec63 100644
> > --- a/lib/eal/arm/include/rte_pause_64.h
> > +++ b/lib/eal/arm/include/rte_pause_64.h
> > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> >  /* Put processor into low power WFE(Wait For Event) state. */  #define
> > __WFE() { asm volatile("wfe" : : : "memory"); }
> >
> > -static __rte_always_inline void
> > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > -		int memorder)
> > -{
> > -	uint16_t value;
> > -
> > -	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> > -
> > -	/*
> > -	 * Atomic exclusive load from addr, it returns the 16-bit content of
> > -	 * *addr while making it 'monitored',when it is written by someone
> > -	 * else, the 'monitored' state is cleared and a event is generated
> > -	 * implicitly to exit WFE.
> > -	 */
> > +/*
> > + * Atomic exclusive load from addr, it returns the 16-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and an event is generated
> > + * implicitly to exit WFE.
> > + */
> >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> >  	if (memorder == __ATOMIC_RELAXED) {               \
> >  		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6 +50,62
> > @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> >  			: "memory");                      \
> >  	} }
> >
> > +/*
> > + * Atomic exclusive load from addr, it returns the 32-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and an event is generated
> > + * implicitly to exit WFE.
> > + */
> > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > +	if (memorder == __ATOMIC_RELAXED) {              \
> > +		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > +			: [tmp] "=&r" (dst)              \
> > +			: [addr] "r"(src)                \
> > +			: "memory");                     \
> > +	} else {                                         \
> > +		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > +			: [tmp] "=&r" (dst)              \
> > +			: [addr] "r"(src)                \
> > +			: "memory");                     \
> > +	} }
> > +
> > +/*
> > + * Atomic exclusive load from addr, it returns the 64-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and an event is generated
> > + * implicitly to exit WFE.
> > + */
> > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > +	if (memorder == __ATOMIC_RELAXED) {              \
> > +		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > +			: [tmp] "=&r" (dst)              \
> > +			: [addr] "r"(src)                \
> > +			: "memory");                     \
> > +	} else {                                         \
> > +		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > +			: [tmp] "=&r" (dst)              \
> > +			: [addr] "r"(src)                \
> > +			: "memory");                     \
> > +	} }
> > +
> > +#define __LOAD_EXC(src, dst, memorder, size) {          \
> > +	assert(size == 16 || size == 32 || size == 64); \
> > +	if (size == 16)                                 \
> > +		__LOAD_EXC_16(src, dst, memorder)       \
> > +	else if (size == 32)                            \
> > +		__LOAD_EXC_32(src, dst, memorder)       \
> > +	else if (size == 64)                            \
> > +		__LOAD_EXC_64(src, dst, memorder)       \
> > +}
> > +
> > +static __rte_always_inline void
> > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > +		int memorder)
> > +{
> > +	uint16_t value;
> > +
> > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> > +
> >  	__LOAD_EXC_16(addr, value, memorder)
> >  	if (value != expected) {
> >  		__SEVL()
> > @@ -66,7 +114,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr,
> > uint16_t expected,
> >  			__LOAD_EXC_16(addr, value, memorder)
> >  		} while (value != expected);
> >  	}
> > -#undef __LOAD_EXC_16
> >  }
> >
> >  static __rte_always_inline void
> > @@ -77,25 +124,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > uint32_t expected,
> >
> >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> >
> > -	/*
> > -	 * Atomic exclusive load from addr, it returns the 32-bit content of
> > -	 * *addr while making it 'monitored',when it is written by someone
> > -	 * else, the 'monitored' state is cleared and a event is generated
> > -	 * implicitly to exit WFE.
> > -	 */
> > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > -	if (memorder == __ATOMIC_RELAXED) {              \
> > -		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > -			: [tmp] "=&r" (dst)              \
> > -			: [addr] "r"(src)                \
> > -			: "memory");                     \
> > -	} else {                                         \
> > -		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > -			: [tmp] "=&r" (dst)              \
> > -			: [addr] "r"(src)                \
> > -			: "memory");                     \
> > -	} }
> > -
> >  	__LOAD_EXC_32(addr, value, memorder)
> >  	if (value != expected) {
> >  		__SEVL()
> > @@ -104,7 +132,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > uint32_t expected,
> >  			__LOAD_EXC_32(addr, value, memorder)
> >  		} while (value != expected);
> >  	}
> > -#undef __LOAD_EXC_32
> >  }
> >
> >  static __rte_always_inline void
> > @@ -115,25 +142,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> >
> >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> >
> > -	/*
> > -	 * Atomic exclusive load from addr, it returns the 64-bit content of
> > -	 * *addr while making it 'monitored',when it is written by someone
> > -	 * else, the 'monitored' state is cleared and a event is generated
> > -	 * implicitly to exit WFE.
> > -	 */
> > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > -	if (memorder == __ATOMIC_RELAXED) {              \
> > -		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > -			: [tmp] "=&r" (dst)              \
> > -			: [addr] "r"(src)                \
> > -			: "memory");                     \
> > -	} else {                                         \
> > -		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > -			: [tmp] "=&r" (dst)              \
> > -			: [addr] "r"(src)                \
> > -			: "memory");                     \
> > -	} }
> > -
> >  	__LOAD_EXC_64(addr, value, memorder)
> >  	if (value != expected) {
> >  		__SEVL()
> > @@ -143,10 +151,23 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> >  		} while (value != expected);
> >  	}
> >  }
> > -#undef __LOAD_EXC_64
> >
> > -#undef __SEVL
> > -#undef __WFE
> > +#define rte_wait_event(addr, mask, cond, expected, memorder)      \
> > +do {                                                              \
> > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));        \
> > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&          \
> > +				memorder != __ATOMIC_RELAXED);    \
> > +	uint32_t size = sizeof(*(addr)) << 3;                     \
> > +	typeof(*(addr)) value = 0;                                \
> > +	__LOAD_EXC((addr), value, memorder, size)                 \
> > +	if ((value & (mask)) cond expected) {                     \
> > +		__SEVL()                                          \
> > +		do {                                              \
> > +			__WFE()                                   \
> > +			__LOAD_EXC((addr), value, memorder, size) \
> > +		} while ((value & (mask)) cond expected);         \
> 
> Hi, Konstantin
> 
> For this patch, I cannot add '()' for expected due to patch style check will report:
> -------------------------------------------------------------------------------------------------------------------
> WARNING:SPACING: space prohibited between function name and open parenthesis '('
> #203: FILE: lib/eal/arm/include/rte_pause_64.h:163:
> +       if ((value & (mask)) cond (expected)) {                   \
> 
> WARNING:SPACING: space prohibited between function name and open parenthesis '('
> #208: FILE: lib/eal/arm/include/rte_pause_64.h:168:
> +               } while ((value & (mask)) cond (expected));       \
> 
> WARNING:SPACING: space prohibited between function name and open parenthesis '('
> #246: FILE: lib/eal/include/generic/rte_pause.h:138:
> +       while ((__atomic_load_n((addr), (memorder)) & mask) cond (expected)) \
> 
> total: 1 errors, 3 warnings, 211 lines checked

It is just checkpatch warnings.
Personally I's better live with checkpatch complaints then with problematic macro.

> -------------------------------------------------------------------------------------------------------------------
> So I just add '()' for 'addr' and 'mask'.


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/5] eal: add new definitions for wait scheme
  2021-10-26  9:46       ` [dpdk-dev] " Ananyev, Konstantin
@ 2021-10-26  9:59         ` Ananyev, Konstantin
  2021-10-27  6:56           ` [dpdk-dev] 回复: " Feifei Wang
  0 siblings, 1 reply; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-10-26  9:59 UTC (permalink / raw)
  To: Feifei Wang; +Cc: dev, nd, nd


> > > Introduce macros as generic interface for address monitoring.
> > > For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
> > > and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.
> > >
> > > Furthermore, to prevent compilation warning in arm:
> > > ----------------------------------------------
> > > 'warning: implicit declaration of function ...'
> > > ----------------------------------------------
> > > Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
> > >
> > > This is because original macros are undefine at the end of the file.
> > > If new macro 'rte_wait_event' calls them in other files, they will be seen as
> > > 'not defined'.
> > >
> > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > ---
> > >  lib/eal/arm/include/rte_pause_64.h  | 135 ++++++++++++++++------------
> > > lib/eal/include/generic/rte_pause.h |  27 ++++++
> > >  2 files changed, 105 insertions(+), 57 deletions(-)
> > >
> > > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > > b/lib/eal/arm/include/rte_pause_64.h
> > > index e87d10b8cc..1fea0dec63 100644
> > > --- a/lib/eal/arm/include/rte_pause_64.h
> > > +++ b/lib/eal/arm/include/rte_pause_64.h
> > > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> > >  /* Put processor into low power WFE(Wait For Event) state. */  #define
> > > __WFE() { asm volatile("wfe" : : : "memory"); }
> > >
> > > -static __rte_always_inline void
> > > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > -		int memorder)
> > > -{
> > > -	uint16_t value;
> > > -
> > > -	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > > -
> > > -	/*
> > > -	 * Atomic exclusive load from addr, it returns the 16-bit content of
> > > -	 * *addr while making it 'monitored',when it is written by someone
> > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > -	 * implicitly to exit WFE.
> > > -	 */
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 16-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and an event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> > >  	if (memorder == __ATOMIC_RELAXED) {               \
> > >  		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6 +50,62
> > > @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > >  			: "memory");                      \
> > >  	} }
> > >
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 32-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and an event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > +	if (memorder == __ATOMIC_RELAXED) {              \
> > > +		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > +			: [tmp] "=&r" (dst)              \
> > > +			: [addr] "r"(src)                \
> > > +			: "memory");                     \
> > > +	} else {                                         \
> > > +		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > +			: [tmp] "=&r" (dst)              \
> > > +			: [addr] "r"(src)                \
> > > +			: "memory");                     \
> > > +	} }
> > > +
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 64-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and an event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > +	if (memorder == __ATOMIC_RELAXED) {              \
> > > +		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > +			: [tmp] "=&r" (dst)              \
> > > +			: [addr] "r"(src)                \
> > > +			: "memory");                     \
> > > +	} else {                                         \
> > > +		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > +			: [tmp] "=&r" (dst)              \
> > > +			: [addr] "r"(src)                \
> > > +			: "memory");                     \
> > > +	} }
> > > +
> > > +#define __LOAD_EXC(src, dst, memorder, size) {          \
> > > +	assert(size == 16 || size == 32 || size == 64); \
> > > +	if (size == 16)                                 \
> > > +		__LOAD_EXC_16(src, dst, memorder)       \
> > > +	else if (size == 32)                            \
> > > +		__LOAD_EXC_32(src, dst, memorder)       \
> > > +	else if (size == 64)                            \
> > > +		__LOAD_EXC_64(src, dst, memorder)       \
> > > +}
> > > +
> > > +static __rte_always_inline void
> > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > +		int memorder)
> > > +{
> > > +	uint16_t value;
> > > +
> > > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > > +
> > >  	__LOAD_EXC_16(addr, value, memorder)
> > >  	if (value != expected) {
> > >  		__SEVL()
> > > @@ -66,7 +114,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr,
> > > uint16_t expected,
> > >  			__LOAD_EXC_16(addr, value, memorder)
> > >  		} while (value != expected);
> > >  	}
> > > -#undef __LOAD_EXC_16
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -77,25 +124,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > > uint32_t expected,
> > >
> > >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > >
> > > -	/*
> > > -	 * Atomic exclusive load from addr, it returns the 32-bit content of
> > > -	 * *addr while making it 'monitored',when it is written by someone
> > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > -	 * implicitly to exit WFE.
> > > -	 */
> > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > -	if (memorder == __ATOMIC_RELAXED) {              \
> > > -		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > -			: [tmp] "=&r" (dst)              \
> > > -			: [addr] "r"(src)                \
> > > -			: "memory");                     \
> > > -	} else {                                         \
> > > -		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > -			: [tmp] "=&r" (dst)              \
> > > -			: [addr] "r"(src)                \
> > > -			: "memory");                     \
> > > -	} }
> > > -
> > >  	__LOAD_EXC_32(addr, value, memorder)
> > >  	if (value != expected) {
> > >  		__SEVL()
> > > @@ -104,7 +132,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > > uint32_t expected,
> > >  			__LOAD_EXC_32(addr, value, memorder)
> > >  		} while (value != expected);
> > >  	}
> > > -#undef __LOAD_EXC_32
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -115,25 +142,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > > uint64_t expected,
> > >
> > >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > >
> > > -	/*
> > > -	 * Atomic exclusive load from addr, it returns the 64-bit content of
> > > -	 * *addr while making it 'monitored',when it is written by someone
> > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > -	 * implicitly to exit WFE.
> > > -	 */
> > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > -	if (memorder == __ATOMIC_RELAXED) {              \
> > > -		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > -			: [tmp] "=&r" (dst)              \
> > > -			: [addr] "r"(src)                \
> > > -			: "memory");                     \
> > > -	} else {                                         \
> > > -		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > -			: [tmp] "=&r" (dst)              \
> > > -			: [addr] "r"(src)                \
> > > -			: "memory");                     \
> > > -	} }
> > > -
> > >  	__LOAD_EXC_64(addr, value, memorder)
> > >  	if (value != expected) {
> > >  		__SEVL()
> > > @@ -143,10 +151,23 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > > uint64_t expected,
> > >  		} while (value != expected);
> > >  	}
> > >  }
> > > -#undef __LOAD_EXC_64
> > >
> > > -#undef __SEVL
> > > -#undef __WFE
> > > +#define rte_wait_event(addr, mask, cond, expected, memorder)      \
> > > +do {                                                              \
> > > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));        \
> > > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&          \
> > > +				memorder != __ATOMIC_RELAXED);    \
> > > +	uint32_t size = sizeof(*(addr)) << 3;                     \
> > > +	typeof(*(addr)) value = 0;                                \
> > > +	__LOAD_EXC((addr), value, memorder, size)                 \
> > > +	if ((value & (mask)) cond expected) {                     \
> > > +		__SEVL()                                          \
> > > +		do {                                              \
> > > +			__WFE()                                   \
> > > +			__LOAD_EXC((addr), value, memorder, size) \
> > > +		} while ((value & (mask)) cond expected);         \
> >
> > Hi, Konstantin
> >
> > For this patch, I cannot add '()' for expected due to patch style check will report:
> > -------------------------------------------------------------------------------------------------------------------
> > WARNING:SPACING: space prohibited between function name and open parenthesis '('
> > #203: FILE: lib/eal/arm/include/rte_pause_64.h:163:
> > +       if ((value & (mask)) cond (expected)) {                   \
> >
> > WARNING:SPACING: space prohibited between function name and open parenthesis '('
> > #208: FILE: lib/eal/arm/include/rte_pause_64.h:168:
> > +               } while ((value & (mask)) cond (expected));       \
> >
> > WARNING:SPACING: space prohibited between function name and open parenthesis '('
> > #246: FILE: lib/eal/include/generic/rte_pause.h:138:
> > +       while ((__atomic_load_n((addr), (memorder)) & mask) cond (expected)) \
> >
> > total: 1 errors, 3 warnings, 211 lines checked
> 
> It is just checkpatch warnings.
> Personally I's better live with checkpatch complaints then with problematic macro.
> 
> > -------------------------------------------------------------------------------------------------------------------
> > So I just add '()' for 'addr' and 'mask'.

I wonder can we overcome it by:
typeof(*(addr)) expected_value = (expected); \
...
if ((value & (mask)) cond expected_value) \
  ...
?




^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-26  9:43       ` [dpdk-dev] " Ananyev, Konstantin
@ 2021-10-26 12:56         ` Ananyev, Konstantin
  2021-10-27  7:04           ` [dpdk-dev] 回复: " Feifei Wang
  0 siblings, 1 reply; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-10-26 12:56 UTC (permalink / raw)
  To: Feifei Wang; +Cc: dev, nd, Ruifeng Wang, nd


> Hi Feifei,
> 
> > > Instead of polling for cbi->use to be updated, use wait event scheme.
> > >
> > > Furthermore, delete 'const' for 'bpf_eth_cbi_wait'. This is because of a
> > > compilation error:
> > > -----------------------------------------------------------------------
> > > ../lib/eal/include/rte_common.h:36:13: error: read-only variable ‘value’
> > > used as ‘asm’ output
> > >    36 | #define asm __asm__
> > >       |             ^~~~~~~
> > >
> > > ../lib/eal/arm/include/rte_pause_64.h:66:3: note: in expansion of macro ‘asm’
> > >    66 |   asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > >       |   ^~~
> > >
> > > ../lib/eal/arm/include/rte_pause_64.h:96:3: note: in expansion of macro
> > > ‘__LOAD_EXC_32’
> > >    96 |   __LOAD_EXC_32((src), dst, memorder)     \
> > >       |   ^~~~~~~~~~~~~
> > >
> > > ../lib/eal/arm/include/rte_pause_64.h:167:4: note: in expansion of macro
> > > ‘__LOAD_EXC’
> > >   167 |    __LOAD_EXC((addr), value, memorder, size) \
> > >       |    ^~~~~~~~~~
> > >
> > > ../lib/bpf/bpf_pkt.c:125:3: note: in expansion of macro ‘rte_wait_event’
> > >   125 |   rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> > > -----------------------------------------------------------------------
> > >
> > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > ---
> > >  lib/bpf/bpf_pkt.c | 11 ++++-------
> > >  1 file changed, 4 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c index
> > > 6e8248f0d6..213d44a75a 100644
> > > --- a/lib/bpf/bpf_pkt.c
> > > +++ b/lib/bpf/bpf_pkt.c
> > > @@ -111,9 +111,9 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
> > >   * Waits till datapath finished using given callback.
> > >   */
> > >  static void
> > > -bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> > > +bpf_eth_cbi_wait(struct bpf_eth_cbi *cbi)
> >
> > Hi, Konstantin
> >
> > For this bpf patch, I delete 'const' through this is contrary to what we
> > discussed earlier. This is because if  we keep 'constant' here and use 'rte_wait_event'
> > new macro, compiler will report error. And earlier the arm version cannot be compiled
> > due to I forgot enable "wfe" config in the meson file, so this issue can not happen before.
> 
> 
> Honestly, I don't understand why we have to remove perfectly valid 'const' qualifier here.
> If this macro can't be used with pointers to const (still don't understand why),
> then let's just not use this macro here.
> Strictly speaking I don't see much benefit here from it.
> 
> >
> > >  {
> > > -	uint32_t nuse, puse;
> > > +	uint32_t puse;
> > >
> > >  	/* make sure all previous loads and stores are completed */
> > >  	rte_smp_mb();
> > > @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> > >
> > >  	/* in use, busy wait till current RX/TX iteration is finished */
> > >  	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
> > > -		do {
> > > -			rte_pause();
> > > -			rte_compiler_barrier();
> > > -			nuse = cbi->use;
> > > -		} while (nuse == puse);
> > > +		rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> > > +				__ATOMIC_RELAXED);

After another thought, if we do type conversion at macro invocation time:

bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
{
  ...
  rte_wait_event((uint32_t *)&cbi->use, UINT32_MAX, ==, puse, __ATOMIC_RELAXED);

would that help?


> > >  	}
> > >  }
> > >
> > > --
> > > 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [PATCH v5 1/5] eal: add new definitions for wait scheme
  2021-10-26  9:59         ` Ananyev, Konstantin
@ 2021-10-27  6:56           ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-27  6:56 UTC (permalink / raw)
  To: Ananyev, Konstantin; +Cc: dev, nd, nd, nd



> -----邮件原件-----
> 发件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 发送时间: Tuesday, October 26, 2021 5:59 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; nd <nd@arm.com>
> 主题: RE: [PATCH v5 1/5] eal: add new definitions for wait scheme
> 
> 
> > > > Introduce macros as generic interface for address monitoring.
> > > > For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
> > > > and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.
> > > >
> > > > Furthermore, to prevent compilation warning in arm:
> > > > ----------------------------------------------
> > > > 'warning: implicit declaration of function ...'
> > > > ----------------------------------------------
> > > > Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and
> '__WFE'.
> > > >
> > > > This is because original macros are undefine at the end of the file.
> > > > If new macro 'rte_wait_event' calls them in other files, they will
> > > > be seen as 'not defined'.
> > > >
> > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > ---
> > > >  lib/eal/arm/include/rte_pause_64.h  | 135
> > > > ++++++++++++++++------------ lib/eal/include/generic/rte_pause.h |
> > > > 27 ++++++
> > > >  2 files changed, 105 insertions(+), 57 deletions(-)
> > > >
> > > > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > > > b/lib/eal/arm/include/rte_pause_64.h
> > > > index e87d10b8cc..1fea0dec63 100644
> > > > --- a/lib/eal/arm/include/rte_pause_64.h
> > > > +++ b/lib/eal/arm/include/rte_pause_64.h
> > > > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> > > >  /* Put processor into low power WFE(Wait For Event) state. */
> > > > #define
> > > > __WFE() { asm volatile("wfe" : : : "memory"); }
> > > >
> > > > -static __rte_always_inline void
> > > > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > > -		int memorder)
> > > > -{
> > > > -	uint16_t value;
> > > > -
> > > > -	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > > -
> > > > -	/*
> > > > -	 * Atomic exclusive load from addr, it returns the 16-bit content of
> > > > -	 * *addr while making it 'monitored',when it is written by someone
> > > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > > -	 * implicitly to exit WFE.
> > > > -	 */
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 16-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and an event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> > > >  	if (memorder == __ATOMIC_RELAXED) {               \
> > > >  		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6 +50,62
> @@
> > > > rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > >  			: "memory");                      \
> > > >  	} }
> > > >
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 32-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and an event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > > +	if (memorder == __ATOMIC_RELAXED) {              \
> > > > +		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > > +			: [tmp] "=&r" (dst)              \
> > > > +			: [addr] "r"(src)                \
> > > > +			: "memory");                     \
> > > > +	} else {                                         \
> > > > +		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > +			: [tmp] "=&r" (dst)              \
> > > > +			: [addr] "r"(src)                \
> > > > +			: "memory");                     \
> > > > +	} }
> > > > +
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 64-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and an event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > > +	if (memorder == __ATOMIC_RELAXED) {              \
> > > > +		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > > +			: [tmp] "=&r" (dst)              \
> > > > +			: [addr] "r"(src)                \
> > > > +			: "memory");                     \
> > > > +	} else {                                         \
> > > > +		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > > +			: [tmp] "=&r" (dst)              \
> > > > +			: [addr] "r"(src)                \
> > > > +			: "memory");                     \
> > > > +	} }
> > > > +
> > > > +#define __LOAD_EXC(src, dst, memorder, size) {          \
> > > > +	assert(size == 16 || size == 32 || size == 64); \
> > > > +	if (size == 16)                                 \
> > > > +		__LOAD_EXC_16(src, dst, memorder)       \
> > > > +	else if (size == 32)                            \
> > > > +		__LOAD_EXC_32(src, dst, memorder)       \
> > > > +	else if (size == 64)                            \
> > > > +		__LOAD_EXC_64(src, dst, memorder)       \
> > > > +}
> > > > +
> > > > +static __rte_always_inline void
> > > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > > +		int memorder)
> > > > +{
> > > > +	uint16_t value;
> > > > +
> > > > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > > +
> > > >  	__LOAD_EXC_16(addr, value, memorder)
> > > >  	if (value != expected) {
> > > >  		__SEVL()
> > > > @@ -66,7 +114,6 @@ rte_wait_until_equal_16(volatile uint16_t
> > > > *addr, uint16_t expected,
> > > >  			__LOAD_EXC_16(addr, value, memorder)
> > > >  		} while (value != expected);
> > > >  	}
> > > > -#undef __LOAD_EXC_16
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -77,25 +124,6 @@ rte_wait_until_equal_32(volatile uint32_t
> > > > *addr, uint32_t expected,
> > > >
> > > >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > >
> > > > -	/*
> > > > -	 * Atomic exclusive load from addr, it returns the 32-bit content of
> > > > -	 * *addr while making it 'monitored',when it is written by someone
> > > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > > -	 * implicitly to exit WFE.
> > > > -	 */
> > > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > > -	if (memorder == __ATOMIC_RELAXED) {              \
> > > > -		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > > -			: [tmp] "=&r" (dst)              \
> > > > -			: [addr] "r"(src)                \
> > > > -			: "memory");                     \
> > > > -	} else {                                         \
> > > > -		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > -			: [tmp] "=&r" (dst)              \
> > > > -			: [addr] "r"(src)                \
> > > > -			: "memory");                     \
> > > > -	} }
> > > > -
> > > >  	__LOAD_EXC_32(addr, value, memorder)
> > > >  	if (value != expected) {
> > > >  		__SEVL()
> > > > @@ -104,7 +132,6 @@ rte_wait_until_equal_32(volatile uint32_t
> > > > *addr, uint32_t expected,
> > > >  			__LOAD_EXC_32(addr, value, memorder)
> > > >  		} while (value != expected);
> > > >  	}
> > > > -#undef __LOAD_EXC_32
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -115,25 +142,6 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr, uint64_t expected,
> > > >
> > > >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > >
> > > > -	/*
> > > > -	 * Atomic exclusive load from addr, it returns the 64-bit content of
> > > > -	 * *addr while making it 'monitored',when it is written by someone
> > > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > > -	 * implicitly to exit WFE.
> > > > -	 */
> > > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > > -	if (memorder == __ATOMIC_RELAXED) {              \
> > > > -		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > > -			: [tmp] "=&r" (dst)              \
> > > > -			: [addr] "r"(src)                \
> > > > -			: "memory");                     \
> > > > -	} else {                                         \
> > > > -		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > > -			: [tmp] "=&r" (dst)              \
> > > > -			: [addr] "r"(src)                \
> > > > -			: "memory");                     \
> > > > -	} }
> > > > -
> > > >  	__LOAD_EXC_64(addr, value, memorder)
> > > >  	if (value != expected) {
> > > >  		__SEVL()
> > > > @@ -143,10 +151,23 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr, uint64_t expected,
> > > >  		} while (value != expected);
> > > >  	}
> > > >  }
> > > > -#undef __LOAD_EXC_64
> > > >
> > > > -#undef __SEVL
> > > > -#undef __WFE
> > > > +#define rte_wait_event(addr, mask, cond, expected, memorder)      \
> > > > +do {                                                              \
> > > > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));        \
> > > > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&          \
> > > > +				memorder != __ATOMIC_RELAXED);    \
> > > > +	uint32_t size = sizeof(*(addr)) << 3;                     \
> > > > +	typeof(*(addr)) value = 0;                                \
> > > > +	__LOAD_EXC((addr), value, memorder, size)                 \
> > > > +	if ((value & (mask)) cond expected) {                     \
> > > > +		__SEVL()                                          \
> > > > +		do {                                              \
> > > > +			__WFE()                                   \
> > > > +			__LOAD_EXC((addr), value, memorder, size) \
> > > > +		} while ((value & (mask)) cond expected);         \
> > >
> > > Hi, Konstantin
> > >
> > > For this patch, I cannot add '()' for expected due to patch style check will
> report:
> > > --------------------------------------------------------------------
> > > -----------------------------------------------
> > > WARNING:SPACING: space prohibited between function name and open
> parenthesis '('
> > > #203: FILE: lib/eal/arm/include/rte_pause_64.h:163:
> > > +       if ((value & (mask)) cond (expected)) {                   \
> > >
> > > WARNING:SPACING: space prohibited between function name and open
> parenthesis '('
> > > #208: FILE: lib/eal/arm/include/rte_pause_64.h:168:
> > > +               } while ((value & (mask)) cond (expected));       \
> > >
> > > WARNING:SPACING: space prohibited between function name and open
> parenthesis '('
> > > #246: FILE: lib/eal/include/generic/rte_pause.h:138:
> > > +       while ((__atomic_load_n((addr), (memorder)) & mask) cond
> > > + (expected)) \
> > >
> > > total: 1 errors, 3 warnings, 211 lines checked
> >
> > It is just checkpatch warnings.
> > Personally I's better live with checkpatch complaints then with problematic
> macro.
> >
> > > --------------------------------------------------------------------
> > > -----------------------------------------------
> > > So I just add '()' for 'addr' and 'mask'.
> 
> I wonder can we overcome it by:
> typeof(*(addr)) expected_value = (expected); \ ...
> if ((value & (mask)) cond expected_value) \
>   ...
> ?
It's a good comments, and I try to do with this and no check log warning happens.
Thanks.
> 
> 


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-26 12:56         ` Ananyev, Konstantin
@ 2021-10-27  7:04           ` Feifei Wang
  2021-10-27  7:31             ` Feifei Wang
  2021-10-27 14:47             ` [dpdk-dev] " Ananyev, Konstantin
  0 siblings, 2 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-27  7:04 UTC (permalink / raw)
  To: Ananyev, Konstantin; +Cc: dev, nd, Ruifeng Wang, nd, nd



> -----邮件原件-----
> 发件人: dev <dev-bounces@dpdk.org> 代表 Ananyev, Konstantin
> 发送时间: Tuesday, October 26, 2021 8:57 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; nd <nd@arm.com>
> 主题: Re: [dpdk-dev] [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx
> iteration
> 
> 
> > Hi Feifei,
> >
> > > > Instead of polling for cbi->use to be updated, use wait event scheme.
> > > >
> > > > Furthermore, delete 'const' for 'bpf_eth_cbi_wait'. This is
> > > > because of a compilation error:
> > > > ------------------------------------------------------------------
> > > > -----
> > > > ../lib/eal/include/rte_common.h:36:13: error: read-only variable ‘value’
> > > > used as ‘asm’ output
> > > >    36 | #define asm __asm__
> > > >       |             ^~~~~~~
> > > >
> > > > ../lib/eal/arm/include/rte_pause_64.h:66:3: note: in expansion of
> macro ‘asm’
> > > >    66 |   asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > >       |   ^~~
> > > >
> > > > ../lib/eal/arm/include/rte_pause_64.h:96:3: note: in expansion of
> > > > macro ‘__LOAD_EXC_32’
> > > >    96 |   __LOAD_EXC_32((src), dst, memorder)     \
> > > >       |   ^~~~~~~~~~~~~
> > > >
> > > > ../lib/eal/arm/include/rte_pause_64.h:167:4: note: in expansion of
> > > > macro ‘__LOAD_EXC’
> > > >   167 |    __LOAD_EXC((addr), value, memorder, size) \
> > > >       |    ^~~~~~~~~~
> > > >
> > > > ../lib/bpf/bpf_pkt.c:125:3: note: in expansion of macro ‘rte_wait_event’
> > > >   125 |   rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> > > > ------------------------------------------------------------------
> > > > -----
> > > >
> > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > ---
> > > >  lib/bpf/bpf_pkt.c | 11 ++++-------
> > > >  1 file changed, 4 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c index
> > > > 6e8248f0d6..213d44a75a 100644
> > > > --- a/lib/bpf/bpf_pkt.c
> > > > +++ b/lib/bpf/bpf_pkt.c
> > > > @@ -111,9 +111,9 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
> > > >   * Waits till datapath finished using given callback.
> > > >   */
> > > >  static void
> > > > -bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> > > > +bpf_eth_cbi_wait(struct bpf_eth_cbi *cbi)
> > >
> > > Hi, Konstantin
> > >
> > > For this bpf patch, I delete 'const' through this is contrary to
> > > what we discussed earlier. This is because if  we keep 'constant' here and
> use 'rte_wait_event'
> > > new macro, compiler will report error. And earlier the arm version
> > > cannot be compiled due to I forgot enable "wfe" config in the meson file,
> so this issue can not happen before.
> >
> >
> > Honestly, I don't understand why we have to remove perfectly valid 'const'
> qualifier here.
> > If this macro can't be used with pointers to const (still don't
> > understand why), then let's just not use this macro here.
> > Strictly speaking I don't see much benefit here from it.
> >
> > >
> > > >  {
> > > > -	uint32_t nuse, puse;
> > > > +	uint32_t puse;
> > > >
> > > >  	/* make sure all previous loads and stores are completed */
> > > >  	rte_smp_mb();
> > > > @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi
> > > > *cbi)
> > > >
> > > >  	/* in use, busy wait till current RX/TX iteration is finished */
> > > >  	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
> > > > -		do {
> > > > -			rte_pause();
> > > > -			rte_compiler_barrier();
> > > > -			nuse = cbi->use;
> > > > -		} while (nuse == puse);
> > > > +		rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> > > > +				__ATOMIC_RELAXED);
> 
> After another thought, if we do type conversion at macro invocation time:
> 
> bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi) {
>   ...
>   rte_wait_event((uint32_t *)&cbi->use, UINT32_MAX, ==, puse,
> __ATOMIC_RELAXED);
> 
> would that help?

I try to with this and it will report compiler warning:
' cast discards ‘const’ qualifier'.
I think this is due to that in rte_wait_event macro, we use
typeof(*(addr)) value = 0;
 and value is defined as "const uint32_t",
but it should be able to be updated.

Furthermore, this reflects the limitations of the new macro, it cannot be applied
when 'addr' is type of 'const'. Finally, I think I should give up the change for "bpf".
> 
> 
> > > >  	}
> > > >  }
> > > >
> > > > --
> > > > 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-27  7:04           ` [dpdk-dev] 回复: " Feifei Wang
@ 2021-10-27  7:31             ` Feifei Wang
  2021-10-27 14:47             ` [dpdk-dev] " Ananyev, Konstantin
  1 sibling, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-27  7:31 UTC (permalink / raw)
  To: Ananyev, Konstantin; +Cc: dev, nd, Ruifeng Wang, nd, nd, nd



> -----邮件原件-----
> 发件人: Feifei Wang
> 发送时间: Wednesday, October 27, 2021 3:04 PM
> 收件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>
> 主题: 回复: [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
> 
> 
> 
> > -----邮件原件-----
> > 发件人: dev <dev-bounces@dpdk.org> 代表 Ananyev, Konstantin
> > 发送时间: Tuesday, October 26, 2021 8:57 PM
> > 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> > 抄送: dev@dpdk.org; nd <nd@arm.com>; Ruifeng Wang
> > <Ruifeng.Wang@arm.com>; nd <nd@arm.com>
> > 主题: Re: [dpdk-dev] [PATCH v5 4/5] lib/bpf: use wait event scheme for
> > Rx/Tx iteration
> >
> >
> > > Hi Feifei,
> > >
> > > > > Instead of polling for cbi->use to be updated, use wait event scheme.
> > > > >
> > > > > Furthermore, delete 'const' for 'bpf_eth_cbi_wait'. This is
> > > > > because of a compilation error:
> > > > > ----------------------------------------------------------------
> > > > > --
> > > > > -----
> > > > > ../lib/eal/include/rte_common.h:36:13: error: read-only variable
> ‘value’
> > > > > used as ‘asm’ output
> > > > >    36 | #define asm __asm__
> > > > >       |             ^~~~~~~
> > > > >
> > > > > ../lib/eal/arm/include/rte_pause_64.h:66:3: note: in expansion
> > > > > of
> > macro ‘asm’
> > > > >    66 |   asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > >       |   ^~~
> > > > >
> > > > > ../lib/eal/arm/include/rte_pause_64.h:96:3: note: in expansion
> > > > > of macro ‘__LOAD_EXC_32’
> > > > >    96 |   __LOAD_EXC_32((src), dst, memorder)     \
> > > > >       |   ^~~~~~~~~~~~~
> > > > >
> > > > > ../lib/eal/arm/include/rte_pause_64.h:167:4: note: in expansion
> > > > > of macro ‘__LOAD_EXC’
> > > > >   167 |    __LOAD_EXC((addr), value, memorder, size) \
> > > > >       |    ^~~~~~~~~~
> > > > >
> > > > > ../lib/bpf/bpf_pkt.c:125:3: note: in expansion of macro
> ‘rte_wait_event’
> > > > >   125 |   rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> > > > > ----------------------------------------------------------------
> > > > > --
> > > > > -----
> > > > >
> > > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > > ---
> > > > >  lib/bpf/bpf_pkt.c | 11 ++++-------
> > > > >  1 file changed, 4 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c index
> > > > > 6e8248f0d6..213d44a75a 100644
> > > > > --- a/lib/bpf/bpf_pkt.c
> > > > > +++ b/lib/bpf/bpf_pkt.c
> > > > > @@ -111,9 +111,9 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
> > > > >   * Waits till datapath finished using given callback.
> > > > >   */
> > > > >  static void
> > > > > -bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> > > > > +bpf_eth_cbi_wait(struct bpf_eth_cbi *cbi)
> > > >
> > > > Hi, Konstantin
> > > >
> > > > For this bpf patch, I delete 'const' through this is contrary to
> > > > what we discussed earlier. This is because if  we keep 'constant'
> > > > here and
> > use 'rte_wait_event'
> > > > new macro, compiler will report error. And earlier the arm version
> > > > cannot be compiled due to I forgot enable "wfe" config in the
> > > > meson file,
> > so this issue can not happen before.
> > >
> > >
> > > Honestly, I don't understand why we have to remove perfectly valid
> 'const'
> > qualifier here.
> > > If this macro can't be used with pointers to const (still don't
> > > understand why), then let's just not use this macro here.
> > > Strictly speaking I don't see much benefit here from it.
> > >
> > > >
> > > > >  {
> > > > > -	uint32_t nuse, puse;
> > > > > +	uint32_t puse;
> > > > >
> > > > >  	/* make sure all previous loads and stores are completed */
> > > > >  	rte_smp_mb();
> > > > > @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi
> > > > > *cbi)
> > > > >
> > > > >  	/* in use, busy wait till current RX/TX iteration is finished */
> > > > >  	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
> > > > > -		do {
> > > > > -			rte_pause();
> > > > > -			rte_compiler_barrier();
> > > > > -			nuse = cbi->use;
> > > > > -		} while (nuse == puse);
> > > > > +		rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> > > > > +				__ATOMIC_RELAXED);
> >
> > After another thought, if we do type conversion at macro invocation time:
> >
> > bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi) {
> >   ...
> >   rte_wait_event((uint32_t *)&cbi->use, UINT32_MAX, ==, puse,
> > __ATOMIC_RELAXED);
> >
> > would that help?
> 
> I try to with this and it will report compiler warning:
> ' cast discards ‘const’ qualifier'.
> I think this is due to that in rte_wait_event macro, we use
> typeof(*(addr)) value = 0;
>  and value is defined as "const uint32_t", but it should be able to be updated.
> 

Correct a little.
The explain is for 'asm error' in the commit message.

> Furthermore, this reflects the limitations of the new macro, it cannot be
> applied when 'addr' is type of 'const'. Finally, I think I should give up the
> change for "bpf".
> >
> >
> > > > >  	}
> > > > >  }
> > > > >
> > > > > --
> > > > > 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme
  2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
                   ` (8 preceding siblings ...)
  2021-10-26  8:01 ` [dpdk-dev] [PATCH v5 0/5] add new definitions for wait scheme Feifei Wang
@ 2021-10-27  8:10 ` Feifei Wang
  2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 1/4] eal: " Feifei Wang
                     ` (4 more replies)
  2021-10-28  6:56 ` [dpdk-dev] [PATCH v7 0/5] " Feifei Wang
                   ` (2 subsequent siblings)
  12 siblings, 5 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-27  8:10 UTC (permalink / raw)
  Cc: dev, nd, Feifei Wang

Add new definitions for wait scheme, and apply this new definitions into
lib to replace rte_pause.

v2:
1. use macro to create new wait scheme (Stephen)

v3:
1. delete unnecessary bug fix in bpf (Konstantin)

v4:
1. put size into the macro body (Konstantin)
2. replace assert with BUILD_BUG_ON (Stephen)
3. delete unnecessary compiler barrier for bpf (Konstantin)

v5:
1. 'size' is not the parameter (Konstantin)
2. put () around macro parameters (Konstantin)
3. fix some original typo issue (Jerin)
4. swap 'rte_wait_event' parameter location (Jerin)
5. add new macro '__LOAD_EXC'
6. delete 'undef' to prevent compilation warning

v6:
1. fix patch style check warning
2. delete 'bpf' patch due to 'const' limit

Feifei Wang (4):
  eal: add new definitions for wait scheme
  eal: use wait event for read pflock
  eal: use wait event scheme for mcslock
  lib/distributor: use wait event scheme

 lib/distributor/rte_distributor_single.c |  10 +-
 lib/eal/arm/include/rte_pause_64.h       | 136 +++++++++++++----------
 lib/eal/include/generic/rte_mcslock.h    |   9 +-
 lib/eal/include/generic/rte_pause.h      |  28 +++++
 lib/eal/include/generic/rte_pflock.h     |   4 +-
 5 files changed, 119 insertions(+), 68 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v6 1/4] eal: add new definitions for wait scheme
  2021-10-27  8:10 ` [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme Feifei Wang
@ 2021-10-27  8:10   ` Feifei Wang
  2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 2/4] eal: use wait event for read pflock Feifei Wang
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-27  8:10 UTC (permalink / raw)
  To: Ruifeng Wang; +Cc: dev, nd, Feifei Wang

Introduce macros as generic interface for address monitoring.
For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.

Furthermore, to prevent compilation warning in arm:
----------------------------------------------
'warning: implicit declaration of function ...'
----------------------------------------------
Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.

This is because original macros are undefine at the end of the file.
If new macro 'rte_wait_event' calls them in other files, they will be
seen as 'not defined'.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/arm/include/rte_pause_64.h  | 136 ++++++++++++++++------------
 lib/eal/include/generic/rte_pause.h |  28 ++++++
 2 files changed, 107 insertions(+), 57 deletions(-)

diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index e87d10b8cc..87df224ac1 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -31,20 +31,12 @@ static inline void rte_pause(void)
 /* Put processor into low power WFE(Wait For Event) state. */
 #define __WFE() { asm volatile("wfe" : : : "memory"); }
 
-static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
-{
-	uint16_t value;
-
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 16-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
+/*
+ * Atomic exclusive load from addr, it returns the 16-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
 #define __LOAD_EXC_16(src, dst, memorder) {               \
 	if (memorder == __ATOMIC_RELAXED) {               \
 		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
@@ -58,6 +50,62 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			: "memory");                      \
 	} }
 
+/*
+ * Atomic exclusive load from addr, it returns the 32-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_32(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+/*
+ * Atomic exclusive load from addr, it returns the 64-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_64(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+#define __LOAD_EXC(src, dst, memorder, size) {          \
+	assert(size == 16 || size == 32 || size == 64); \
+	if (size == 16)                                 \
+		__LOAD_EXC_16(src, dst, memorder)       \
+	else if (size == 32)                            \
+		__LOAD_EXC_32(src, dst, memorder)       \
+	else if (size == 64)                            \
+		__LOAD_EXC_64(src, dst, memorder)       \
+}
+
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+		int memorder)
+{
+	uint16_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
 	__LOAD_EXC_16(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -66,7 +114,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			__LOAD_EXC_16(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_16
 }
 
 static __rte_always_inline void
@@ -77,25 +124,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 32-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_32(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_32(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -104,7 +132,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 			__LOAD_EXC_32(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_32
 }
 
 static __rte_always_inline void
@@ -115,25 +142,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 64-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_64(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_64(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -143,10 +151,24 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 		} while (value != expected);
 	}
 }
-#undef __LOAD_EXC_64
 
-#undef __SEVL
-#undef __WFE
+#define rte_wait_event(addr, mask, cond, expected, memorder)      \
+do {                                                              \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));        \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&          \
+				memorder != __ATOMIC_RELAXED);    \
+	uint32_t size = sizeof(*(addr)) << 3;                     \
+	typeof(*(addr)) expected_value = (expected);              \
+	typeof(*(addr)) value = 0;                                \
+	__LOAD_EXC((addr), value, memorder, size)                 \
+	if ((value & (mask)) cond expected_value) {               \
+		__SEVL()                                          \
+		do {                                              \
+			__WFE()                                   \
+			__LOAD_EXC((addr), value, memorder, size) \
+		} while ((value & (mask)) cond expected_value);   \
+	}                                                         \
+} while (0)
 
 #endif
 
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index 668ee4a184..d0c5b5a415 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 	while (__atomic_load_n(addr, memorder) != expected)
 		rte_pause();
 }
+
+/*
+ * Wait until *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest.
+ * @param cond
+ *  A symbol representing the condition.
+ * @param expected
+ *  An expected value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event(addr, mask, cond, expected, memorder)                       \
+do {                                                                               \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                         \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                           \
+				memorder != __ATOMIC_RELAXED);                     \
+	typeof(*(addr)) expected_value = (expected);                               \
+	while ((__atomic_load_n((addr), (memorder)) & (mask)) cond expected_value) \
+		rte_pause();                                                       \
+} while (0)
 #endif
 
 #endif /* _RTE_PAUSE_H_ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v6 2/4] eal: use wait event for read pflock
  2021-10-27  8:10 ` [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme Feifei Wang
  2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 1/4] eal: " Feifei Wang
@ 2021-10-27  8:10   ` Feifei Wang
  2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 3/4] eal: use wait event scheme for mcslock Feifei Wang
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-27  8:10 UTC (permalink / raw)
  Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for read pflock update, use wait event scheme for
this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_pflock.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/eal/include/generic/rte_pflock.h b/lib/eal/include/generic/rte_pflock.h
index e57c179ef2..7573b036bf 100644
--- a/lib/eal/include/generic/rte_pflock.h
+++ b/lib/eal/include/generic/rte_pflock.h
@@ -121,9 +121,7 @@ rte_pflock_read_lock(rte_pflock_t *pf)
 		return;
 
 	/* Wait for current write phase to complete. */
-	while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE)
-		& RTE_PFLOCK_WBITS) == w)
-		rte_pause();
+	rte_wait_event(&pf->rd.in, RTE_PFLOCK_WBITS, ==, w, __ATOMIC_ACQUIRE);
 }
 
 /**
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v6 3/4] eal: use wait event scheme for mcslock
  2021-10-27  8:10 ` [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme Feifei Wang
  2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 1/4] eal: " Feifei Wang
  2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 2/4] eal: use wait event for read pflock Feifei Wang
@ 2021-10-27  8:10   ` Feifei Wang
  2021-10-27 11:16     ` Mattias Rönnblom
  2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 4/4] lib/distributor: use wait event scheme Feifei Wang
  2021-10-27 10:57   ` [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme Jerin Jacob
  4 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-10-27  8:10 UTC (permalink / raw)
  To: Honnappa Nagarahalli; +Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for mcslock to be updated, use wait event scheme
for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_mcslock.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/lib/eal/include/generic/rte_mcslock.h b/lib/eal/include/generic/rte_mcslock.h
index 34f33c64a5..806a2b2c7e 100644
--- a/lib/eal/include/generic/rte_mcslock.h
+++ b/lib/eal/include/generic/rte_mcslock.h
@@ -116,8 +116,13 @@ rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
 		/* More nodes added to the queue by other CPUs.
 		 * Wait until the next pointer is set.
 		 */
-		while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)
-			rte_pause();
+#ifdef RTE_ARCH_32
+		rte_wait_event((uint32_t *)&me->next, UINT32_MAX, ==, 0,
+				__ATOMIC_RELAXED);
+#else
+		rte_wait_event((uint64_t *)&me->next, UINT64_MAX, ==, 0,
+				__ATOMIC_RELAXED);
+#endif
 	}
 
 	/* Pass lock to next waiter. */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v6 4/4] lib/distributor: use wait event scheme
  2021-10-27  8:10 ` [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme Feifei Wang
                     ` (2 preceding siblings ...)
  2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 3/4] eal: use wait event scheme for mcslock Feifei Wang
@ 2021-10-27  8:10   ` Feifei Wang
  2021-10-27 10:57   ` [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme Jerin Jacob
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-27  8:10 UTC (permalink / raw)
  To: David Hunt; +Cc: dev, nd, Feifei Wang, Ruifeng Wang

Instead of polling for bufptr64 to be updated, use
wait event for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/distributor/rte_distributor_single.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index f4725b1d0b..d52b24a453 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -33,9 +33,8 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_GET_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+			!=, 0, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on GET_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
@@ -74,9 +73,8 @@ rte_distributor_return_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_RETURN_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+			!=, 0, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on RETURN_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme
  2021-10-27  8:10 ` [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme Feifei Wang
                     ` (3 preceding siblings ...)
  2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 4/4] lib/distributor: use wait event scheme Feifei Wang
@ 2021-10-27 10:57   ` Jerin Jacob
  2021-10-28  6:33     ` [dpdk-dev] 回复: " Feifei Wang
  4 siblings, 1 reply; 113+ messages in thread
From: Jerin Jacob @ 2021-10-27 10:57 UTC (permalink / raw)
  To: Feifei Wang, Ananyev, Konstantin, Stephen Hemminger,
	David Marchand, Thomas Monjalon
  Cc: dpdk-dev, nd

On Wed, Oct 27, 2021 at 1:40 PM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Add new definitions for wait scheme, and apply this new definitions into
> lib to replace rte_pause.
>
> v2:
> 1. use macro to create new wait scheme (Stephen)
>
> v3:
> 1. delete unnecessary bug fix in bpf (Konstantin)
>
> v4:
> 1. put size into the macro body (Konstantin)
> 2. replace assert with BUILD_BUG_ON (Stephen)
> 3. delete unnecessary compiler barrier for bpf (Konstantin)
>
> v5:
> 1. 'size' is not the parameter (Konstantin)
> 2. put () around macro parameters (Konstantin)
> 3. fix some original typo issue (Jerin)
> 4. swap 'rte_wait_event' parameter location (Jerin)
> 5. add new macro '__LOAD_EXC'
> 6. delete 'undef' to prevent compilation warning

+ David, Konstantin, Stephen,

Please make a practice to add exiting reviewers.

undef the local marco may result in conflict with other libraries.
Please add __RTE_ARM_ for existing macros (mark as internal) to fix
the namespace if we are taking that path

>
> v6:
> 1. fix patch style check warning
> 2. delete 'bpf' patch due to 'const' limit
>
> Feifei Wang (4):
>   eal: add new definitions for wait scheme
>   eal: use wait event for read pflock
>   eal: use wait event scheme for mcslock
>   lib/distributor: use wait event scheme
>
>  lib/distributor/rte_distributor_single.c |  10 +-
>  lib/eal/arm/include/rte_pause_64.h       | 136 +++++++++++++----------
>  lib/eal/include/generic/rte_mcslock.h    |   9 +-
>  lib/eal/include/generic/rte_pause.h      |  28 +++++
>  lib/eal/include/generic/rte_pflock.h     |   4 +-
>  5 files changed, 119 insertions(+), 68 deletions(-)
>
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v6 3/4] eal: use wait event scheme for mcslock
  2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 3/4] eal: use wait event scheme for mcslock Feifei Wang
@ 2021-10-27 11:16     ` Mattias Rönnblom
  2021-10-28  6:32       ` [dpdk-dev] 回复: " Feifei Wang
  0 siblings, 1 reply; 113+ messages in thread
From: Mattias Rönnblom @ 2021-10-27 11:16 UTC (permalink / raw)
  To: Feifei Wang, Honnappa Nagarahalli; +Cc: dev, nd, Ruifeng Wang

On 2021-10-27 10:10, Feifei Wang wrote:
> Instead of polling for mcslock to be updated, use wait event scheme
> for this case.
>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>   lib/eal/include/generic/rte_mcslock.h | 9 +++++++--
>   1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/lib/eal/include/generic/rte_mcslock.h b/lib/eal/include/generic/rte_mcslock.h
> index 34f33c64a5..806a2b2c7e 100644
> --- a/lib/eal/include/generic/rte_mcslock.h
> +++ b/lib/eal/include/generic/rte_mcslock.h
> @@ -116,8 +116,13 @@ rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
>   		/* More nodes added to the queue by other CPUs.
>   		 * Wait until the next pointer is set.
>   		 */
> -		while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)
> -			rte_pause();
> +#ifdef RTE_ARCH_32
> +		rte_wait_event((uint32_t *)&me->next, UINT32_MAX, ==, 0,
> +				__ATOMIC_RELAXED);
> +#else
> +		rte_wait_event((uint64_t *)&me->next, UINT64_MAX, ==, 0,
> +				__ATOMIC_RELAXED);
> +#endif
>   	}
>   
>   	/* Pass lock to next waiter. */

You could do something like

rte_wait_event((uintptr_t *)&me->next, UINTPTR_MAX, ==, 0, __ATOMIC_RELAXED);

and avoid the #ifdef.


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-27  7:04           ` [dpdk-dev] 回复: " Feifei Wang
  2021-10-27  7:31             ` Feifei Wang
@ 2021-10-27 14:47             ` Ananyev, Konstantin
  2021-10-28  6:24               ` [dpdk-dev] 回复: " Feifei Wang
  1 sibling, 1 reply; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-10-27 14:47 UTC (permalink / raw)
  To: Feifei Wang; +Cc: dev, nd, Ruifeng Wang, nd, nd



> 
> > -----邮件原件-----
> > 发件人: dev <dev-bounces@dpdk.org> 代表 Ananyev, Konstantin
> > 发送时间: Tuesday, October 26, 2021 8:57 PM
> > 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> > 抄送: dev@dpdk.org; nd <nd@arm.com>; Ruifeng Wang
> > <Ruifeng.Wang@arm.com>; nd <nd@arm.com>
> > 主题: Re: [dpdk-dev] [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx
> > iteration
> >
> >
> > > Hi Feifei,
> > >
> > > > > Instead of polling for cbi->use to be updated, use wait event scheme.
> > > > >
> > > > > Furthermore, delete 'const' for 'bpf_eth_cbi_wait'. This is
> > > > > because of a compilation error:
> > > > > ------------------------------------------------------------------
> > > > > -----
> > > > > ../lib/eal/include/rte_common.h:36:13: error: read-only variable ‘value’
> > > > > used as ‘asm’ output
> > > > >    36 | #define asm __asm__
> > > > >       |             ^~~~~~~
> > > > >
> > > > > ../lib/eal/arm/include/rte_pause_64.h:66:3: note: in expansion of
> > macro ‘asm’
> > > > >    66 |   asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > >       |   ^~~
> > > > >
> > > > > ../lib/eal/arm/include/rte_pause_64.h:96:3: note: in expansion of
> > > > > macro ‘__LOAD_EXC_32’
> > > > >    96 |   __LOAD_EXC_32((src), dst, memorder)     \
> > > > >       |   ^~~~~~~~~~~~~
> > > > >
> > > > > ../lib/eal/arm/include/rte_pause_64.h:167:4: note: in expansion of
> > > > > macro ‘__LOAD_EXC’
> > > > >   167 |    __LOAD_EXC((addr), value, memorder, size) \
> > > > >       |    ^~~~~~~~~~
> > > > >
> > > > > ../lib/bpf/bpf_pkt.c:125:3: note: in expansion of macro ‘rte_wait_event’
> > > > >   125 |   rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> > > > > ------------------------------------------------------------------
> > > > > -----
> > > > >
> > > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > > ---
> > > > >  lib/bpf/bpf_pkt.c | 11 ++++-------
> > > > >  1 file changed, 4 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c index
> > > > > 6e8248f0d6..213d44a75a 100644
> > > > > --- a/lib/bpf/bpf_pkt.c
> > > > > +++ b/lib/bpf/bpf_pkt.c
> > > > > @@ -111,9 +111,9 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
> > > > >   * Waits till datapath finished using given callback.
> > > > >   */
> > > > >  static void
> > > > > -bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> > > > > +bpf_eth_cbi_wait(struct bpf_eth_cbi *cbi)
> > > >
> > > > Hi, Konstantin
> > > >
> > > > For this bpf patch, I delete 'const' through this is contrary to
> > > > what we discussed earlier. This is because if  we keep 'constant' here and
> > use 'rte_wait_event'
> > > > new macro, compiler will report error. And earlier the arm version
> > > > cannot be compiled due to I forgot enable "wfe" config in the meson file,
> > so this issue can not happen before.
> > >
> > >
> > > Honestly, I don't understand why we have to remove perfectly valid 'const'
> > qualifier here.
> > > If this macro can't be used with pointers to const (still don't
> > > understand why), then let's just not use this macro here.
> > > Strictly speaking I don't see much benefit here from it.
> > >
> > > >
> > > > >  {
> > > > > -	uint32_t nuse, puse;
> > > > > +	uint32_t puse;
> > > > >
> > > > >  	/* make sure all previous loads and stores are completed */
> > > > >  	rte_smp_mb();
> > > > > @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi
> > > > > *cbi)
> > > > >
> > > > >  	/* in use, busy wait till current RX/TX iteration is finished */
> > > > >  	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
> > > > > -		do {
> > > > > -			rte_pause();
> > > > > -			rte_compiler_barrier();
> > > > > -			nuse = cbi->use;
> > > > > -		} while (nuse == puse);
> > > > > +		rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> > > > > +				__ATOMIC_RELAXED);
> >
> > After another thought, if we do type conversion at macro invocation time:
> >
> > bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi) {
> >   ...
> >   rte_wait_event((uint32_t *)&cbi->use, UINT32_MAX, ==, puse,
> > __ATOMIC_RELAXED);
> >
> > would that help?
> 
> I try to with this and it will report compiler warning:
> ' cast discards ‘const’ qualifier'.

Something like:
(uint32_t *)(uintptr_t)&cbi->use
?

> I think this is due to that in rte_wait_event macro, we use
> typeof(*(addr)) value = 0;
>  and value is defined as "const uint32_t",
> but it should be able to be updated.
> Furthermore, this reflects the limitations of the new macro, it cannot be applied
> when 'addr' is type of 'const'. Finally, I think I should give up the change for "bpf".

Ah yes, I see.
One trick to avoid it:
typeof (*(addr) + 0) value;
...
But it would cause integer promotion for uint16_t.
So probably wouldn't suit you here.

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-27 14:47             ` [dpdk-dev] " Ananyev, Konstantin
@ 2021-10-28  6:24               ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-28  6:24 UTC (permalink / raw)
  To: Ananyev, Konstantin; +Cc: dev, nd, Ruifeng Wang, nd, nd, nd



> -----邮件原件-----
> 发件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 发送时间: Wednesday, October 27, 2021 10:48 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>
> 主题: RE: [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
> 
> 
> 
> >
> > > -----邮件原件-----
> > > 发件人: dev <dev-bounces@dpdk.org> 代表 Ananyev, Konstantin
> > > 发送时间: Tuesday, October 26, 2021 8:57 PM
> > > 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> > > 抄送: dev@dpdk.org; nd <nd@arm.com>; Ruifeng Wang
> > > <Ruifeng.Wang@arm.com>; nd <nd@arm.com>
> > > 主题: Re: [dpdk-dev] [PATCH v5 4/5] lib/bpf: use wait event scheme for
> > > Rx/Tx iteration
> > >
> > >
> > > > Hi Feifei,
> > > >
> > > > > > Instead of polling for cbi->use to be updated, use wait event scheme.
> > > > > >
> > > > > > Furthermore, delete 'const' for 'bpf_eth_cbi_wait'. This is
> > > > > > because of a compilation error:
> > > > > > --------------------------------------------------------------
> > > > > > ----
> > > > > > -----
> > > > > > ../lib/eal/include/rte_common.h:36:13: error: read-only variable
> ‘value’
> > > > > > used as ‘asm’ output
> > > > > >    36 | #define asm __asm__
> > > > > >       |             ^~~~~~~
> > > > > >
> > > > > > ../lib/eal/arm/include/rte_pause_64.h:66:3: note: in expansion
> > > > > > of
> > > macro ‘asm’
> > > > > >    66 |   asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > > >       |   ^~~
> > > > > >
> > > > > > ../lib/eal/arm/include/rte_pause_64.h:96:3: note: in expansion
> > > > > > of macro ‘__LOAD_EXC_32’
> > > > > >    96 |   __LOAD_EXC_32((src), dst, memorder)     \
> > > > > >       |   ^~~~~~~~~~~~~
> > > > > >
> > > > > > ../lib/eal/arm/include/rte_pause_64.h:167:4: note: in
> > > > > > expansion of macro ‘__LOAD_EXC’
> > > > > >   167 |    __LOAD_EXC((addr), value, memorder, size) \
> > > > > >       |    ^~~~~~~~~~
> > > > > >
> > > > > > ../lib/bpf/bpf_pkt.c:125:3: note: in expansion of macro
> ‘rte_wait_event’
> > > > > >   125 |   rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> > > > > > --------------------------------------------------------------
> > > > > > ----
> > > > > > -----
> > > > > >
> > > > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > > > ---
> > > > > >  lib/bpf/bpf_pkt.c | 11 ++++-------
> > > > > >  1 file changed, 4 insertions(+), 7 deletions(-)
> > > > > >
> > > > > > diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c index
> > > > > > 6e8248f0d6..213d44a75a 100644
> > > > > > --- a/lib/bpf/bpf_pkt.c
> > > > > > +++ b/lib/bpf/bpf_pkt.c
> > > > > > @@ -111,9 +111,9 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
> > > > > >   * Waits till datapath finished using given callback.
> > > > > >   */
> > > > > >  static void
> > > > > > -bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> > > > > > +bpf_eth_cbi_wait(struct bpf_eth_cbi *cbi)
> > > > >
> > > > > Hi, Konstantin
> > > > >
> > > > > For this bpf patch, I delete 'const' through this is contrary to
> > > > > what we discussed earlier. This is because if  we keep
> > > > > 'constant' here and
> > > use 'rte_wait_event'
> > > > > new macro, compiler will report error. And earlier the arm
> > > > > version cannot be compiled due to I forgot enable "wfe" config
> > > > > in the meson file,
> > > so this issue can not happen before.
> > > >
> > > >
> > > > Honestly, I don't understand why we have to remove perfectly valid
> 'const'
> > > qualifier here.
> > > > If this macro can't be used with pointers to const (still don't
> > > > understand why), then let's just not use this macro here.
> > > > Strictly speaking I don't see much benefit here from it.
> > > >
> > > > >
> > > > > >  {
> > > > > > -	uint32_t nuse, puse;
> > > > > > +	uint32_t puse;
> > > > > >
> > > > > >  	/* make sure all previous loads and stores are completed */
> > > > > >  	rte_smp_mb();
> > > > > > @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi
> > > > > > *cbi)
> > > > > >
> > > > > >  	/* in use, busy wait till current RX/TX iteration is finished */
> > > > > >  	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
> > > > > > -		do {
> > > > > > -			rte_pause();
> > > > > > -			rte_compiler_barrier();
> > > > > > -			nuse = cbi->use;
> > > > > > -		} while (nuse == puse);
> > > > > > +		rte_wait_event(&cbi->use, UINT32_MAX, ==, puse,
> > > > > > +				__ATOMIC_RELAXED);
> > >
> > > After another thought, if we do type conversion at macro invocation time:
> > >
> > > bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi) {
> > >   ...
> > >   rte_wait_event((uint32_t *)&cbi->use, UINT32_MAX, ==, puse,
> > > __ATOMIC_RELAXED);
> > >
> > > would that help?
> >
> > I try to with this and it will report compiler warning:
> > ' cast discards ‘const’ qualifier'.
> 
> Something like:
> (uint32_t *)(uintptr_t)&cbi->use
> ?
I try to apply this and it is OK to fix complier warning.
Good comments and with this change I think wfe new macro
can be applied in this bpf API. Thanks.
> 
> > I think this is due to that in rte_wait_event macro, we use
> > typeof(*(addr)) value = 0;
> >  and value is defined as "const uint32_t", but it should be able to be
> > updated.
> > Furthermore, this reflects the limitations of the new macro, it cannot
> > be applied when 'addr' is type of 'const'. Finally, I think I should give up the
> change for "bpf".
> 
> Ah yes, I see.
> One trick to avoid it:
> typeof (*(addr) + 0) value;
> ...
> But it would cause integer promotion for uint16_t.
> So probably wouldn't suit you here.
I also try with this change, it can also fix our issues. But as you say,
If *addr is uint16_t, it will large its size. It is a really good suggestion since 
I'm willing to apply the last strategy.

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复:  [PATCH v6 3/4] eal: use wait event scheme for mcslock
  2021-10-27 11:16     ` Mattias Rönnblom
@ 2021-10-28  6:32       ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-28  6:32 UTC (permalink / raw)
  To: Mattias Rönnblom, Honnappa Nagarahalli; +Cc: dev, nd, Ruifeng Wang, nd



> -----邮件原件-----
> 发件人: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> 发送时间: Wednesday, October 27, 2021 7:16 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 主题: Re: [dpdk-dev] [PATCH v6 3/4] eal: use wait event scheme for mcslock
> 
> On 2021-10-27 10:10, Feifei Wang wrote:
> > Instead of polling for mcslock to be updated, use wait event scheme
> > for this case.
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >   lib/eal/include/generic/rte_mcslock.h | 9 +++++++--
> >   1 file changed, 7 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/eal/include/generic/rte_mcslock.h
> > b/lib/eal/include/generic/rte_mcslock.h
> > index 34f33c64a5..806a2b2c7e 100644
> > --- a/lib/eal/include/generic/rte_mcslock.h
> > +++ b/lib/eal/include/generic/rte_mcslock.h
> > @@ -116,8 +116,13 @@ rte_mcslock_unlock(rte_mcslock_t **msl,
> rte_mcslock_t *me)
> >   		/* More nodes added to the queue by other CPUs.
> >   		 * Wait until the next pointer is set.
> >   		 */
> > -		while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) ==
> NULL)
> > -			rte_pause();
> > +#ifdef RTE_ARCH_32
> > +		rte_wait_event((uint32_t *)&me->next, UINT32_MAX, ==, 0,
> > +				__ATOMIC_RELAXED);
> > +#else
> > +		rte_wait_event((uint64_t *)&me->next, UINT64_MAX, ==, 0,
> > +				__ATOMIC_RELAXED);
> > +#endif
> >   	}
> >
> >   	/* Pass lock to next waiter. */
> 
> You could do something like
> 
> rte_wait_event)&me->next, UINTPTR_MAX, ==, 0,
> __ATOMIC_RELAXED);
> 
> and avoid the #ifdef.
Good comments, it can fix the problem. Thanks for this comments.


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复:  [PATCH v6 0/4] add new definitions for wait scheme
  2021-10-27 10:57   ` [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme Jerin Jacob
@ 2021-10-28  6:33     ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-28  6:33 UTC (permalink / raw)
  To: Jerin Jacob, Ananyev, Konstantin, Stephen Hemminger,
	David Marchand, thomas
  Cc: dpdk-dev, nd, nd



> -----邮件原件-----
> 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> 发送时间: Wednesday, October 27, 2021 6:58 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; Stephen Hemminger
> <stephen@networkplumber.org>; David Marchand
> <david.marchand@redhat.com>; thomas@monjalon.net
> 抄送: dpdk-dev <dev@dpdk.org>; nd <nd@arm.com>
> 主题: Re: [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme
> 
> On Wed, Oct 27, 2021 at 1:40 PM Feifei Wang <feifei.wang2@arm.com>
> wrote:
> >
> > Add new definitions for wait scheme, and apply this new definitions
> > into lib to replace rte_pause.
> >
> > v2:
> > 1. use macro to create new wait scheme (Stephen)
> >
> > v3:
> > 1. delete unnecessary bug fix in bpf (Konstantin)
> >
> > v4:
> > 1. put size into the macro body (Konstantin) 2. replace assert with
> > BUILD_BUG_ON (Stephen) 3. delete unnecessary compiler barrier for bpf
> > (Konstantin)
> >
> > v5:
> > 1. 'size' is not the parameter (Konstantin) 2. put () around macro
> > parameters (Konstantin) 3. fix some original typo issue (Jerin) 4.
> > swap 'rte_wait_event' parameter location (Jerin) 5. add new macro
> > '__LOAD_EXC'
> > 6. delete 'undef' to prevent compilation warning
> 
> + David, Konstantin, Stephen,
> 
> Please make a practice to add exiting reviewers.
That's Ok.
> 
> undef the local marco may result in conflict with other libraries.
> Please add __RTE_ARM_ for existing macros (mark as internal) to fix the
> namespace if we are taking that path
Thanks for the comments, I will update this in the next version.
> 
> >
> > v6:
> > 1. fix patch style check warning
> > 2. delete 'bpf' patch due to 'const' limit
> >
> > Feifei Wang (4):
> >   eal: add new definitions for wait scheme
> >   eal: use wait event for read pflock
> >   eal: use wait event scheme for mcslock
> >   lib/distributor: use wait event scheme
> >
> >  lib/distributor/rte_distributor_single.c |  10 +-
> >  lib/eal/arm/include/rte_pause_64.h       | 136 +++++++++++++----------
> >  lib/eal/include/generic/rte_mcslock.h    |   9 +-
> >  lib/eal/include/generic/rte_pause.h      |  28 +++++
> >  lib/eal/include/generic/rte_pflock.h     |   4 +-
> >  5 files changed, 119 insertions(+), 68 deletions(-)
> >
> > --
> > 2.25.1
> >

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v7 0/5] add new definitions for wait scheme
  2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
                   ` (9 preceding siblings ...)
  2021-10-27  8:10 ` [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme Feifei Wang
@ 2021-10-28  6:56 ` Feifei Wang
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 1/5] eal: " Feifei Wang
                     ` (4 more replies)
  2021-10-29  8:20 ` [dpdk-dev] [PATCH v8 0/5] add new definitions for wait scheme Feifei Wang
  2021-11-01  6:00 ` [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme Feifei Wang
  12 siblings, 5 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-28  6:56 UTC (permalink / raw)
  Cc: dev, nd, jerinjacobk, konstantin.ananyev, stephen,
	david.marchand, thomas, mattias.ronnblom, Feifei Wang

Add new definitions for wait scheme, and apply this new definitions into
lib to replace rte_pause.

v2:
1. use macro to create new wait scheme (Stephen)

v3:
1. delete unnecessary bug fix in bpf (Konstantin)

v4:
1. put size into the macro body (Konstantin)
2. replace assert with BUILD_BUG_ON (Stephen)
3. delete unnecessary compiler barrier for bpf (Konstantin)

v5:
1. 'size' is not the parameter (Konstantin)
2. put () around macro parameters (Konstantin)
3. fix some original typo issue (Jerin)
4. swap 'rte_wait_event' parameter location (Jerin)
5. add new macro '__LOAD_EXC'
6. delete 'undef' to prevent compilation warning

v6:
1. fix patch style check warning
2. delete 'bpf' patch due to 'const' limit

v7:
1. add __RTE_ARM to to fix the namespace (Jerin)
2. use 'uintptr_t *' in mcslock for different
architecture(32/64) (Mattias)
3. add a new pointer 'next' in mcslock to fix
compiler issue
4. add bpf patch and use 'uintptr_t' to fix const
discard warning (Konstantin)

Feifei Wang (5):
  eal: add new definitions for wait scheme
  eal: use wait event for read pflock
  eal: use wait event scheme for mcslock
  lib/bpf: use wait event scheme for Rx/Tx iteration
  lib/distributor: use wait event scheme

 lib/bpf/bpf_pkt.c                        |   9 +-
 lib/distributor/rte_distributor_single.c |  10 +-
 lib/eal/arm/include/rte_pause_64.h       | 166 +++++++++++++----------
 lib/eal/include/generic/rte_mcslock.h    |   5 +-
 lib/eal/include/generic/rte_pause.h      |  28 ++++
 lib/eal/include/generic/rte_pflock.h     |   4 +-
 6 files changed, 133 insertions(+), 89 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v7 1/5] eal: add new definitions for wait scheme
  2021-10-28  6:56 ` [dpdk-dev] [PATCH v7 0/5] " Feifei Wang
@ 2021-10-28  6:56   ` Feifei Wang
  2021-10-28  7:15     ` Jerin Jacob
  2021-10-28 13:14     ` [dpdk-dev] " Ananyev, Konstantin
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 2/5] eal: use wait event for read pflock Feifei Wang
                     ` (3 subsequent siblings)
  4 siblings, 2 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-28  6:56 UTC (permalink / raw)
  To: Ruifeng Wang
  Cc: dev, nd, jerinjacobk, konstantin.ananyev, stephen,
	david.marchand, thomas, mattias.ronnblom, Feifei Wang

Introduce macros as generic interface for address monitoring.
For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.

Furthermore, to prevent compilation warning in arm:
----------------------------------------------
'warning: implicit declaration of function ...'
----------------------------------------------
Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
And add ‘__RTE_ARM’ for these macros to fix the namespace.

This is because original macros are undefine at the end of the file.
If new macro 'rte_wait_event' calls them in other files, they will be
seen as 'not defined'.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/arm/include/rte_pause_64.h  | 166 ++++++++++++++++------------
 lib/eal/include/generic/rte_pause.h |  28 +++++
 2 files changed, 122 insertions(+), 72 deletions(-)

diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index e87d10b8cc..d547226a8d 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -26,26 +26,18 @@ static inline void rte_pause(void)
 #ifdef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
 
 /* Send an event to quit WFE. */
-#define __SEVL() { asm volatile("sevl" : : : "memory"); }
+#define __RTE_ARM_SEVL() { asm volatile("sevl" : : : "memory"); }
 
 /* Put processor into low power WFE(Wait For Event) state. */
-#define __WFE() { asm volatile("wfe" : : : "memory"); }
+#define __RTE_ARM_WFE() { asm volatile("wfe" : : : "memory"); }
 
-static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
-{
-	uint16_t value;
-
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 16-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_16(src, dst, memorder) {               \
+/*
+ * Atomic exclusive load from addr, it returns the 16-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_16(src, dst, memorder) {       \
 	if (memorder == __ATOMIC_RELAXED) {               \
 		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
 			: [tmp] "=&r" (dst)               \
@@ -58,15 +50,70 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			: "memory");                      \
 	} }
 
-	__LOAD_EXC_16(addr, value, memorder)
+/*
+ * Atomic exclusive load from addr, it returns the 32-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_32(src, dst, memorder) {      \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+/*
+ * Atomic exclusive load from addr, it returns the 64-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_64(src, dst, memorder) {      \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+#define __RTE_ARM_LOAD_EXC(src, dst, memorder, size) {    \
+	assert(size == 16 || size == 32 || size == 64);   \
+	if (size == 16)                                   \
+		__RTE_ARM_LOAD_EXC_16(src, dst, memorder) \
+	else if (size == 32)                              \
+		__RTE_ARM_LOAD_EXC_32(src, dst, memorder) \
+	else if (size == 64)                              \
+		__RTE_ARM_LOAD_EXC_64(src, dst, memorder) \
+}
+
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+		int memorder)
+{
+	uint16_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	__RTE_ARM_LOAD_EXC_16(addr, value, memorder)
 	if (value != expected) {
-		__SEVL()
+		 __RTE_ARM_SEVL()
 		do {
-			__WFE()
-			__LOAD_EXC_16(addr, value, memorder)
+			__RTE_ARM_WFE()
+			__RTE_ARM_LOAD_EXC_16(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_16
 }
 
 static __rte_always_inline void
@@ -77,34 +124,14 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 32-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_32(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
-	__LOAD_EXC_32(addr, value, memorder)
+	__RTE_ARM_LOAD_EXC_32(addr, value, memorder)
 	if (value != expected) {
-		__SEVL()
+		__RTE_ARM_SEVL()
 		do {
-			__WFE()
-			__LOAD_EXC_32(addr, value, memorder)
+			__RTE_ARM_WFE()
+			__RTE_ARM_LOAD_EXC_32(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_32
 }
 
 static __rte_always_inline void
@@ -115,38 +142,33 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 64-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_64(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
-	__LOAD_EXC_64(addr, value, memorder)
+	__RTE_ARM_LOAD_EXC_64(addr, value, memorder)
 	if (value != expected) {
-		__SEVL()
+		__RTE_ARM_SEVL()
 		do {
-			__WFE()
-			__LOAD_EXC_64(addr, value, memorder)
+			__RTE_ARM_WFE()
+			__RTE_ARM_LOAD_EXC_64(addr, value, memorder)
 		} while (value != expected);
 	}
 }
-#undef __LOAD_EXC_64
 
-#undef __SEVL
-#undef __WFE
+#define rte_wait_event(addr, mask, cond, expected, memorder)              \
+do {                                                                      \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                  \
+				memorder != __ATOMIC_RELAXED);            \
+	uint32_t size = sizeof(*(addr)) << 3;                             \
+	typeof(*(addr)) expected_value = (expected);                      \
+	typeof(*(addr)) value = 0;                                        \
+	__RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \
+	if ((value & (mask)) cond expected_value) {                       \
+		__RTE_ARM_SEVL()                                          \
+		do {                                                      \
+			__RTE_ARM_WFE()                                   \
+			__RTE_ARM_LOAD_EXC((addr), value, memorder, size) \
+		} while ((value & (mask)) cond expected_value);           \
+	}                                                                 \
+} while (0)
 
 #endif
 
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index 668ee4a184..d0c5b5a415 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 	while (__atomic_load_n(addr, memorder) != expected)
 		rte_pause();
 }
+
+/*
+ * Wait until *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest.
+ * @param cond
+ *  A symbol representing the condition.
+ * @param expected
+ *  An expected value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event(addr, mask, cond, expected, memorder)                       \
+do {                                                                               \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                         \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                           \
+				memorder != __ATOMIC_RELAXED);                     \
+	typeof(*(addr)) expected_value = (expected);                               \
+	while ((__atomic_load_n((addr), (memorder)) & (mask)) cond expected_value) \
+		rte_pause();                                                       \
+} while (0)
 #endif
 
 #endif /* _RTE_PAUSE_H_ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v7 2/5] eal: use wait event for read pflock
  2021-10-28  6:56 ` [dpdk-dev] [PATCH v7 0/5] " Feifei Wang
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 1/5] eal: " Feifei Wang
@ 2021-10-28  6:56   ` Feifei Wang
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 3/5] eal: use wait event scheme for mcslock Feifei Wang
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-28  6:56 UTC (permalink / raw)
  Cc: dev, nd, jerinjacobk, konstantin.ananyev, stephen,
	david.marchand, thomas, mattias.ronnblom, Feifei Wang,
	Ruifeng Wang

Instead of polling for read pflock update, use wait event scheme for
this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_pflock.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/eal/include/generic/rte_pflock.h b/lib/eal/include/generic/rte_pflock.h
index e57c179ef2..7573b036bf 100644
--- a/lib/eal/include/generic/rte_pflock.h
+++ b/lib/eal/include/generic/rte_pflock.h
@@ -121,9 +121,7 @@ rte_pflock_read_lock(rte_pflock_t *pf)
 		return;
 
 	/* Wait for current write phase to complete. */
-	while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE)
-		& RTE_PFLOCK_WBITS) == w)
-		rte_pause();
+	rte_wait_event(&pf->rd.in, RTE_PFLOCK_WBITS, ==, w, __ATOMIC_ACQUIRE);
 }
 
 /**
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v7 3/5] eal: use wait event scheme for mcslock
  2021-10-28  6:56 ` [dpdk-dev] [PATCH v7 0/5] " Feifei Wang
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 1/5] eal: " Feifei Wang
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 2/5] eal: use wait event for read pflock Feifei Wang
@ 2021-10-28  6:56   ` Feifei Wang
  2021-10-28  7:02     ` Jerin Jacob
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 5/5] lib/distributor: use wait event scheme Feifei Wang
  4 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-10-28  6:56 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: dev, nd, jerinjacobk, konstantin.ananyev, stephen,
	david.marchand, thomas, mattias.ronnblom, Feifei Wang,
	Ruifeng Wang

Instead of polling for mcslock to be updated, use wait event scheme
for this case.

Furthermore, use 'uintptr_t *' is for different size of pointer in 32/64
bits architecture.

And define a new pointer 'next' for the compilation error:
-------------------------------------------------------------------
'dereferencing type-punned pointer will break strict-aliasing rules'
-------------------------------------------------------------------

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_mcslock.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/eal/include/generic/rte_mcslock.h b/lib/eal/include/generic/rte_mcslock.h
index 34f33c64a5..d5b9b293cd 100644
--- a/lib/eal/include/generic/rte_mcslock.h
+++ b/lib/eal/include/generic/rte_mcslock.h
@@ -116,8 +116,9 @@ rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
 		/* More nodes added to the queue by other CPUs.
 		 * Wait until the next pointer is set.
 		 */
-		while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)
-			rte_pause();
+		uintptr_t *next = NULL;
+		next = (uintptr_t *)&me->next;
+		rte_wait_event(next, UINTPTR_MAX, ==, 0, __ATOMIC_RELAXED);
 	}
 
 	/* Pass lock to next waiter. */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v7 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-28  6:56 ` [dpdk-dev] [PATCH v7 0/5] " Feifei Wang
                     ` (2 preceding siblings ...)
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 3/5] eal: use wait event scheme for mcslock Feifei Wang
@ 2021-10-28  6:56   ` Feifei Wang
  2021-10-28 13:15     ` Ananyev, Konstantin
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 5/5] lib/distributor: use wait event scheme Feifei Wang
  4 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-10-28  6:56 UTC (permalink / raw)
  To: Konstantin Ananyev
  Cc: dev, nd, jerinjacobk, stephen, david.marchand, thomas,
	mattias.ronnblom, Feifei Wang, Ruifeng Wang

Instead of polling for cbi->use to be updated, use wait event scheme.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/bpf/bpf_pkt.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
index 6e8248f0d6..c8a1cd1eb8 100644
--- a/lib/bpf/bpf_pkt.c
+++ b/lib/bpf/bpf_pkt.c
@@ -113,7 +113,7 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
 static void
 bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
 {
-	uint32_t nuse, puse;
+	uint32_t puse;
 
 	/* make sure all previous loads and stores are completed */
 	rte_smp_mb();
@@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
 
 	/* in use, busy wait till current RX/TX iteration is finished */
 	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
-		do {
-			rte_pause();
-			rte_compiler_barrier();
-			nuse = cbi->use;
-		} while (nuse == puse);
+		rte_wait_event((uint32_t *)(uintptr_t)&cbi->use, UINT32_MAX,
+				==, puse, __ATOMIC_RELAXED);
 	}
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v7 5/5] lib/distributor: use wait event scheme
  2021-10-28  6:56 ` [dpdk-dev] [PATCH v7 0/5] " Feifei Wang
                     ` (3 preceding siblings ...)
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
@ 2021-10-28  6:56   ` Feifei Wang
  4 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-28  6:56 UTC (permalink / raw)
  To: David Hunt
  Cc: dev, nd, jerinjacobk, konstantin.ananyev, stephen,
	david.marchand, thomas, mattias.ronnblom, Feifei Wang,
	Ruifeng Wang

Instead of polling for bufptr64 to be updated, use
wait event for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/distributor/rte_distributor_single.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index f4725b1d0b..d52b24a453 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -33,9 +33,8 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_GET_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+			!=, 0, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on GET_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
@@ -74,9 +73,8 @@ rte_distributor_return_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_RETURN_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+			!=, 0, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on RETURN_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v7 3/5] eal: use wait event scheme for mcslock
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 3/5] eal: use wait event scheme for mcslock Feifei Wang
@ 2021-10-28  7:02     ` Jerin Jacob
  2021-10-28  7:14       ` [dpdk-dev] 回复: " Feifei Wang
  0 siblings, 1 reply; 113+ messages in thread
From: Jerin Jacob @ 2021-10-28  7:02 UTC (permalink / raw)
  To: Feifei Wang
  Cc: Honnappa Nagarahalli, dpdk-dev, nd, Ananyev, Konstantin,
	Stephen Hemminger, David Marchand, Thomas Monjalon,
	Mattias Rönnblom, Ruifeng Wang

On Thu, Oct 28, 2021 at 12:27 PM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Instead of polling for mcslock to be updated, use wait event scheme
> for this case.
>
> Furthermore, use 'uintptr_t *' is for different size of pointer in 32/64
> bits architecture.
>
> And define a new pointer 'next' for the compilation error:
> -------------------------------------------------------------------
> 'dereferencing type-punned pointer will break strict-aliasing rules'
> -------------------------------------------------------------------
>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  lib/eal/include/generic/rte_mcslock.h | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/lib/eal/include/generic/rte_mcslock.h b/lib/eal/include/generic/rte_mcslock.h
> index 34f33c64a5..d5b9b293cd 100644
> --- a/lib/eal/include/generic/rte_mcslock.h
> +++ b/lib/eal/include/generic/rte_mcslock.h
> @@ -116,8 +116,9 @@ rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
>                 /* More nodes added to the queue by other CPUs.
>                  * Wait until the next pointer is set.
>                  */
> -               while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)
> -                       rte_pause();
> +               uintptr_t *next = NULL;

It is going to update in the next line. Why explicit NULL assignment?

> +               next = (uintptr_t *)&me->next;
> +               rte_wait_event(next, UINTPTR_MAX, ==, 0, __ATOMIC_RELAXED);
>         }
>
>         /* Pass lock to next waiter. */
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复:  [PATCH v7 3/5] eal: use wait event scheme for mcslock
  2021-10-28  7:02     ` Jerin Jacob
@ 2021-10-28  7:14       ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-28  7:14 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: Honnappa Nagarahalli, dpdk-dev, nd, Ananyev, Konstantin,
	Stephen Hemminger, David Marchand, thomas, Mattias Rönnblom,
	Ruifeng Wang, nd



> -----邮件原件-----
> 发件人: dev <dev-bounces@dpdk.org> 代表 Jerin Jacob
> 发送时间: Thursday, October 28, 2021 3:02 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; dpdk-dev
> <dev@dpdk.org>; nd <nd@arm.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; Stephen Hemminger
> <stephen@networkplumber.org>; David Marchand
> <david.marchand@redhat.com>; thomas@monjalon.net; Mattias Rönnblom
> <mattias.ronnblom@ericsson.com>; Ruifeng Wang <Ruifeng.Wang@arm.com>
> 主题: Re: [dpdk-dev] [PATCH v7 3/5] eal: use wait event scheme for mcslock
> 
> On Thu, Oct 28, 2021 at 12:27 PM Feifei Wang <feifei.wang2@arm.com>
> wrote:
> >
> > Instead of polling for mcslock to be updated, use wait event scheme
> > for this case.
> >
> > Furthermore, use 'uintptr_t *' is for different size of pointer in
> > 32/64 bits architecture.
> >
> > And define a new pointer 'next' for the compilation error:
> > -------------------------------------------------------------------
> > 'dereferencing type-punned pointer will break strict-aliasing rules'
> > -------------------------------------------------------------------
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >  lib/eal/include/generic/rte_mcslock.h | 5 +++--
> >  1 file changed, 3 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/eal/include/generic/rte_mcslock.h
> > b/lib/eal/include/generic/rte_mcslock.h
> > index 34f33c64a5..d5b9b293cd 100644
> > --- a/lib/eal/include/generic/rte_mcslock.h
> > +++ b/lib/eal/include/generic/rte_mcslock.h
> > @@ -116,8 +116,9 @@ rte_mcslock_unlock(rte_mcslock_t **msl,
> rte_mcslock_t *me)
> >                 /* More nodes added to the queue by other CPUs.
> >                  * Wait until the next pointer is set.
> >                  */
> > -               while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) ==
> NULL)
> > -                       rte_pause();
> > +               uintptr_t *next = NULL;
> 
> It is going to update in the next line. Why explicit NULL assignment?
You are right, it is unnecessary to initialize it as NULL. I will update this.
> 
> > +               next = (uintptr_t *)&me->next;
> > +               rte_wait_event(next, UINTPTR_MAX, ==, 0,
> > + __ATOMIC_RELAXED);
> >         }
> >
> >         /* Pass lock to next waiter. */
> > --
> > 2.25.1
> >

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v7 1/5] eal: add new definitions for wait scheme
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 1/5] eal: " Feifei Wang
@ 2021-10-28  7:15     ` Jerin Jacob
  2021-10-28  7:40       ` [dpdk-dev] 回复: " Feifei Wang
  2021-10-28 13:14     ` [dpdk-dev] " Ananyev, Konstantin
  1 sibling, 1 reply; 113+ messages in thread
From: Jerin Jacob @ 2021-10-28  7:15 UTC (permalink / raw)
  To: Feifei Wang
  Cc: Ruifeng Wang, dpdk-dev, nd, Ananyev, Konstantin,
	Stephen Hemminger, David Marchand, Thomas Monjalon,
	Mattias Rönnblom

On Thu, Oct 28, 2021 at 12:26 PM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Introduce macros as generic interface for address monitoring.
> For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
> and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.
>
> Furthermore, to prevent compilation warning in arm:
> ----------------------------------------------
> 'warning: implicit declaration of function ...'
> ----------------------------------------------
> Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
> And add ‘__RTE_ARM’ for these macros to fix the namespace.
>
> This is because original macros are undefine at the end of the file.
> If new macro 'rte_wait_event' calls them in other files, they will be
> seen as 'not defined'.
>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---

> +static __rte_always_inline void
> +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> +               int memorder)
> +{
> +       uint16_t value;
> +
> +       assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);

Assert is not good in the library, Why not RTE_BUILD_BUG_ON here


> +
> +       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
>         if (value != expected) {
> -               __SEVL()
> +                __RTE_ARM_SEVL()
>                 do {
> -                       __WFE()
> -                       __LOAD_EXC_16(addr, value, memorder)
> +                       __RTE_ARM_WFE()
> +                       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
>                 } while (value != expected);
>         }
> -#undef __LOAD_EXC_16
>  }
>
>  static __rte_always_inline void
> @@ -77,34 +124,14 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
>
>         assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
>
> -       /*
> -        * Atomic exclusive load from addr, it returns the 32-bit content of
> -        * *addr while making it 'monitored',when it is written by someone
> -        * else, the 'monitored' state is cleared and a event is generated
> -        * implicitly to exit WFE.
> -        */
> -#define __LOAD_EXC_32(src, dst, memorder) {              \
> -       if (memorder == __ATOMIC_RELAXED) {              \
> -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } else {                                         \
> -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } }
> -
> -       __LOAD_EXC_32(addr, value, memorder)
> +       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
>         if (value != expected) {
> -               __SEVL()
> +               __RTE_ARM_SEVL()
>                 do {
> -                       __WFE()
> -                       __LOAD_EXC_32(addr, value, memorder)
> +                       __RTE_ARM_WFE()
> +                       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
>                 } while (value != expected);
>         }
> -#undef __LOAD_EXC_32
>  }
>
>  static __rte_always_inline void
> @@ -115,38 +142,33 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>
>         assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);

remove assert and change to BUILD_BUG_ON

>
> -       /*
> -        * Atomic exclusive load from addr, it returns the 64-bit content of
> -        * *addr while making it 'monitored',when it is written by someone
> -        * else, the 'monitored' state is cleared and a event is generated
> -        * implicitly to exit WFE.
> -        */
> -#define __LOAD_EXC_64(src, dst, memorder) {              \
> -       if (memorder == __ATOMIC_RELAXED) {              \
> -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } else {                                         \
> -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } }
> -
> -       __LOAD_EXC_64(addr, value, memorder)
> +       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
>         if (value != expected) {
> -               __SEVL()
> +               __RTE_ARM_SEVL()
>                 do {
> -                       __WFE()
> -                       __LOAD_EXC_64(addr, value, memorder)
> +                       __RTE_ARM_WFE()
> +                       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
>                 } while (value != expected);
>         }
>  }
> -#undef __LOAD_EXC_64
>
> -#undef __SEVL
> -#undef __WFE
> +#define rte_wait_event(addr, mask, cond, expected, memorder)              \
> +do {                                                                      \
> +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
> +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                  \
> +                               memorder != __ATOMIC_RELAXED);            \
> +       uint32_t size = sizeof(*(addr)) << 3;

Add const

> +       typeof(*(addr)) expected_value = (expected);                      \
> +       typeof(*(addr)) value = 0;

Why zero assignment
                                        \
> +       __RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \

Assert is not good in the library, Why not RTE_BUILD_BUG_ON here


> +       if ((value & (mask)) cond expected_value) {                       \
> +               __RTE_ARM_SEVL()                                          \
> +               do {                                                      \
> +                       __RTE_ARM_WFE()                                   \
> +                       __RTE_ARM_LOAD_EXC((addr), value, memorder, size) \

if the address is the type of __int128_t. This logic will fail? Could
you add 128bit support too and
remove the assert from __RTE_ARM_LOAD_EXC


> +               } while ((value & (mask)) cond expected_value);           \
> +       }                                                                 \
> +} while (0)
>
>  #endif
>
> diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
> index 668ee4a184..d0c5b5a415 100644
> --- a/lib/eal/include/generic/rte_pause.h
> +++ b/lib/eal/include/generic/rte_pause.h
> @@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>         while (__atomic_load_n(addr, memorder) != expected)
>                 rte_pause();
>  }
> +
> +/*
> + * Wait until *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + *  A pointer to the memory location.
> + * @param mask
> + *  A mask of value bits in interest.
> + * @param cond
> + *  A symbol representing the condition.
> + * @param expected
> + *  An expected value to be in the memory location.
> + * @param memorder
> + *  Two different memory orders that can be specified:
> + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  C++11 memory orders with the same names, see the C++11 standard or
> + *  the GCC wiki on atomic synchronization for detailed definition.
> + */
> +#define rte_wait_event(addr, mask, cond, expected, memorder)                       \
> +do {                                                                               \
> +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                         \
> +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                           \
> +                               memorder != __ATOMIC_RELAXED);                     \
> +       typeof(*(addr)) expected_value = (expected);                               \
> +       while ((__atomic_load_n((addr), (memorder)) & (mask)) cond expected_value) \
> +               rte_pause();                                                       \
> +} while (0)
>  #endif
>
>  #endif /* _RTE_PAUSE_H_ */
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [PATCH v7 1/5] eal: add new definitions for wait scheme
  2021-10-28  7:15     ` Jerin Jacob
@ 2021-10-28  7:40       ` Feifei Wang
  2021-10-28  7:51         ` [dpdk-dev] " Jerin Jacob
  0 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-10-28  7:40 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: Ruifeng Wang, dpdk-dev, nd, Ananyev, Konstantin,
	Stephen Hemminger, David Marchand, thomas, Mattias Rönnblom,
	nd



> -----邮件原件-----
> 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> 发送时间: Thursday, October 28, 2021 3:16 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; dpdk-dev <dev@dpdk.org>;
> nd <nd@arm.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> Stephen Hemminger <stephen@networkplumber.org>; David Marchand
> <david.marchand@redhat.com>; thomas@monjalon.net; Mattias Rönnblom
> <mattias.ronnblom@ericsson.com>
> 主题: Re: [PATCH v7 1/5] eal: add new definitions for wait scheme
> 
> On Thu, Oct 28, 2021 at 12:26 PM Feifei Wang <feifei.wang2@arm.com>
> wrote:
> >
> > Introduce macros as generic interface for address monitoring.
> > For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
> > and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.
> >
> > Furthermore, to prevent compilation warning in arm:
> > ----------------------------------------------
> > 'warning: implicit declaration of function ...'
> > ----------------------------------------------
> > Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
> > And add ‘__RTE_ARM’ for these macros to fix the namespace.
> >
> > This is because original macros are undefine at the end of the file.
> > If new macro 'rte_wait_event' calls them in other files, they will be
> > seen as 'not defined'.
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> 
> > +static __rte_always_inline void
> > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > +               int memorder)
> > +{
> > +       uint16_t value;
> > +
> > +       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > + __ATOMIC_RELAXED);
> 
> Assert is not good in the library, Why not RTE_BUILD_BUG_ON here
[Feifei] This line is the original code which has nothing to do with this patch, 
I can change it in the next version.
> 
> 
> > +
> > +       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
> >         if (value != expected) {
> > -               __SEVL()
> > +                __RTE_ARM_SEVL()
> >                 do {
> > -                       __WFE()
> > -                       __LOAD_EXC_16(addr, value, memorder)
> > +                       __RTE_ARM_WFE()
> > +                       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
> >                 } while (value != expected);
> >         }
> > -#undef __LOAD_EXC_16
> >  }
> >
> >  static __rte_always_inline void
> > @@ -77,34 +124,14 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > uint32_t expected,
> >
> >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> >
> > -       /*
> > -        * Atomic exclusive load from addr, it returns the 32-bit content of
> > -        * *addr while making it 'monitored',when it is written by someone
> > -        * else, the 'monitored' state is cleared and a event is generated
> > -        * implicitly to exit WFE.
> > -        */
> > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > -       if (memorder == __ATOMIC_RELAXED) {              \
> > -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } else {                                         \
> > -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } }
> > -
> > -       __LOAD_EXC_32(addr, value, memorder)
> > +       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
> >         if (value != expected) {
> > -               __SEVL()
> > +               __RTE_ARM_SEVL()
> >                 do {
> > -                       __WFE()
> > -                       __LOAD_EXC_32(addr, value, memorder)
> > +                       __RTE_ARM_WFE()
> > +                       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
> >                 } while (value != expected);
> >         }
> > -#undef __LOAD_EXC_32
> >  }
> >
> >  static __rte_always_inline void
> > @@ -115,38 +142,33 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> >
> >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> 
> remove assert and change to BUILD_BUG_ON
[Feifei] OK
> 
> >
> > -       /*
> > -        * Atomic exclusive load from addr, it returns the 64-bit content of
> > -        * *addr while making it 'monitored',when it is written by someone
> > -        * else, the 'monitored' state is cleared and a event is generated
> > -        * implicitly to exit WFE.
> > -        */
> > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > -       if (memorder == __ATOMIC_RELAXED) {              \
> > -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } else {                                         \
> > -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } }
> > -
> > -       __LOAD_EXC_64(addr, value, memorder)
> > +       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
> >         if (value != expected) {
> > -               __SEVL()
> > +               __RTE_ARM_SEVL()
> >                 do {
> > -                       __WFE()
> > -                       __LOAD_EXC_64(addr, value, memorder)
> > +                       __RTE_ARM_WFE()
> > +                       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
> >                 } while (value != expected);
> >         }
> >  }
> > -#undef __LOAD_EXC_64
> >
> > -#undef __SEVL
> > -#undef __WFE
> > +#define rte_wait_event(addr, mask, cond, expected, memorder)              \
> > +do {                                                                      \
> > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
> > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> \
> > +                               memorder != __ATOMIC_RELAXED);            \
> > +       uint32_t size = sizeof(*(addr)) << 3;
> 
> Add const
[Feifei] OK. 
> > +       typeof(*(addr)) expected_value = (expected);                      \
> > +       typeof(*(addr)) value = 0;
> 
> Why zero assignment
I will delete this initialization.
>                                         \
> > +       __RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \
> 
> Assert is not good in the library, Why not RTE_BUILD_BUG_ON here
[Feifei] For __RTE_ARM_LOAD_EXC, 'size' is known until code is running.
So it cannot check 'size' in the compile time and BUILD_BUG_ON doesn't work here.
> 
> 
> > +       if ((value & (mask)) cond expected_value) {                       \
> > +               __RTE_ARM_SEVL()                                          \
> > +               do {                                                      \
> > +                       __RTE_ARM_WFE()                                   \
> > +                       __RTE_ARM_LOAD_EXC((addr), value, memorder,
> > + size) \
> 
> if the address is the type of __int128_t. This logic will fail? Could you add
> 128bit support too and remove the assert from __RTE_ARM_LOAD_EXC
[Feifei] There is no 128bit case in library. And maybe there will be 128bits case, we can
add 128 path here. Now there is assert check in  __RTE_ARM_LOAD_EXC to check
whether size is '16/32/64'.
> 
> 
> > +               } while ((value & (mask)) cond expected_value);           \
> > +       }                                                                 \
> > +} while (0)
> >
> >  #endif
> >
> > diff --git a/lib/eal/include/generic/rte_pause.h
> > b/lib/eal/include/generic/rte_pause.h
> > index 668ee4a184..d0c5b5a415 100644
> > --- a/lib/eal/include/generic/rte_pause.h
> > +++ b/lib/eal/include/generic/rte_pause.h
> > @@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> >         while (__atomic_load_n(addr, memorder) != expected)
> >                 rte_pause();
> >  }
> > +
> > +/*
> > + * Wait until *addr breaks the condition, with a relaxed memory
> > + * ordering model meaning the loads around this API can be reordered.
> > + *
> > + * @param addr
> > + *  A pointer to the memory location.
> > + * @param mask
> > + *  A mask of value bits in interest.
> > + * @param cond
> > + *  A symbol representing the condition.
> > + * @param expected
> > + *  An expected value to be in the memory location.
> > + * @param memorder
> > + *  Two different memory orders that can be specified:
> > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > + *  C++11 memory orders with the same names, see the C++11 standard
> > +or
> > + *  the GCC wiki on atomic synchronization for detailed definition.
> > + */
> > +#define rte_wait_event(addr, mask, cond, expected, memorder)
> \
> > +do {                                                                               \
> > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));
> \
> > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> \
> > +                               memorder != __ATOMIC_RELAXED);                     \
> > +       typeof(*(addr)) expected_value = (expected);                               \
> > +       while ((__atomic_load_n((addr), (memorder)) & (mask)) cond
> expected_value) \
> > +               rte_pause();                                                       \
> > +} while (0)
> >  #endif
> >
> >  #endif /* _RTE_PAUSE_H_ */
> > --
> > 2.25.1
> >

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v7 1/5] eal: add new definitions for wait scheme
  2021-10-28  7:40       ` [dpdk-dev] 回复: " Feifei Wang
@ 2021-10-28  7:51         ` Jerin Jacob
  2021-10-28  9:27           ` [dpdk-dev] 回复: " Feifei Wang
  0 siblings, 1 reply; 113+ messages in thread
From: Jerin Jacob @ 2021-10-28  7:51 UTC (permalink / raw)
  To: Feifei Wang
  Cc: Ruifeng Wang, dpdk-dev, nd, Ananyev, Konstantin,
	Stephen Hemminger, David Marchand, thomas, Mattias Rönnblom

On Thu, Oct 28, 2021 at 1:11 PM Feifei Wang <Feifei.Wang2@arm.com> wrote:
>
>
>
> > -----邮件原件-----
> > 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> > 发送时间: Thursday, October 28, 2021 3:16 PM
> > 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> > 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; dpdk-dev <dev@dpdk.org>;
> > nd <nd@arm.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> > Stephen Hemminger <stephen@networkplumber.org>; David Marchand
> > <david.marchand@redhat.com>; thomas@monjalon.net; Mattias Rönnblom
> > <mattias.ronnblom@ericsson.com>
> > 主题: Re: [PATCH v7 1/5] eal: add new definitions for wait scheme
> >
> > On Thu, Oct 28, 2021 at 12:26 PM Feifei Wang <feifei.wang2@arm.com>
> > wrote:
> > >
> > > Introduce macros as generic interface for address monitoring.
> > > For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
> > > and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.
> > >
> > > Furthermore, to prevent compilation warning in arm:
> > > ----------------------------------------------
> > > 'warning: implicit declaration of function ...'
> > > ----------------------------------------------
> > > Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
> > > And add ‘__RTE_ARM’ for these macros to fix the namespace.
> > >
> > > This is because original macros are undefine at the end of the file.
> > > If new macro 'rte_wait_event' calls them in other files, they will be
> > > seen as 'not defined'.
> > >
> > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > ---
> >
> > > +static __rte_always_inline void
> > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > +               int memorder)
> > > +{
> > > +       uint16_t value;
> > > +
> > > +       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > + __ATOMIC_RELAXED);
> >
> > Assert is not good in the library, Why not RTE_BUILD_BUG_ON here
> [Feifei] This line is the original code which has nothing to do with this patch,
> I can change it in the next version.
> >
> >
> > > +
> > > +       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
> > >         if (value != expected) {
> > > -               __SEVL()
> > > +                __RTE_ARM_SEVL()
> > >                 do {
> > > -                       __WFE()
> > > -                       __LOAD_EXC_16(addr, value, memorder)
> > > +                       __RTE_ARM_WFE()
> > > +                       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
> > >                 } while (value != expected);
> > >         }
> > > -#undef __LOAD_EXC_16
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -77,34 +124,14 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > > uint32_t expected,
> > >
> > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > >
> > > -       /*
> > > -        * Atomic exclusive load from addr, it returns the 32-bit content of
> > > -        * *addr while making it 'monitored',when it is written by someone
> > > -        * else, the 'monitored' state is cleared and a event is generated
> > > -        * implicitly to exit WFE.
> > > -        */
> > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } else {                                         \
> > > -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } }
> > > -
> > > -       __LOAD_EXC_32(addr, value, memorder)
> > > +       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
> > >         if (value != expected) {
> > > -               __SEVL()
> > > +               __RTE_ARM_SEVL()
> > >                 do {
> > > -                       __WFE()
> > > -                       __LOAD_EXC_32(addr, value, memorder)
> > > +                       __RTE_ARM_WFE()
> > > +                       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
> > >                 } while (value != expected);
> > >         }
> > > -#undef __LOAD_EXC_32
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -115,38 +142,33 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > > uint64_t expected,
> > >
> > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> >
> > remove assert and change to BUILD_BUG_ON
> [Feifei] OK
> >
> > >
> > > -       /*
> > > -        * Atomic exclusive load from addr, it returns the 64-bit content of
> > > -        * *addr while making it 'monitored',when it is written by someone
> > > -        * else, the 'monitored' state is cleared and a event is generated
> > > -        * implicitly to exit WFE.
> > > -        */
> > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } else {                                         \
> > > -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } }
> > > -
> > > -       __LOAD_EXC_64(addr, value, memorder)
> > > +       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
> > >         if (value != expected) {
> > > -               __SEVL()
> > > +               __RTE_ARM_SEVL()
> > >                 do {
> > > -                       __WFE()
> > > -                       __LOAD_EXC_64(addr, value, memorder)
> > > +                       __RTE_ARM_WFE()
> > > +                       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
> > >                 } while (value != expected);
> > >         }
> > >  }
> > > -#undef __LOAD_EXC_64
> > >
> > > -#undef __SEVL
> > > -#undef __WFE
> > > +#define rte_wait_event(addr, mask, cond, expected, memorder)              \
> > > +do {                                                                      \
> > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
> > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> > \
> > > +                               memorder != __ATOMIC_RELAXED);            \
> > > +       uint32_t size = sizeof(*(addr)) << 3;
> >
> > Add const
> [Feifei] OK.
> > > +       typeof(*(addr)) expected_value = (expected);                      \
> > > +       typeof(*(addr)) value = 0;
> >
> > Why zero assignment
> I will delete this initialization.
> >                                         \
> > > +       __RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \
> >
> > Assert is not good in the library, Why not RTE_BUILD_BUG_ON here
> [Feifei] For __RTE_ARM_LOAD_EXC, 'size' is known until code is running.
> So it cannot check 'size' in the compile time and BUILD_BUG_ON doesn't work here.

uint32_t size = sizeof(*(addr)) << 3 value will get in comple time as
_sizeof_ is preprocessor function.
So I think, BUILD_BUG_ON is fine.



> >
> >
> > > +       if ((value & (mask)) cond expected_value) {                       \
> > > +               __RTE_ARM_SEVL()                                          \
> > > +               do {                                                      \
> > > +                       __RTE_ARM_WFE()                                   \
> > > +                       __RTE_ARM_LOAD_EXC((addr), value, memorder,
> > > + size) \
> >
> > if the address is the type of __int128_t. This logic will fail? Could you add
> > 128bit support too and remove the assert from __RTE_ARM_LOAD_EXC
> [Feifei] There is no 128bit case in library. And maybe there will be 128bits case, we can
> add 128 path here. Now there is assert check in  __RTE_ARM_LOAD_EXC to check
> whether size is '16/32/64'.

API expects is only "addr" without any type so the application can use
128bit too.

Worst case for now we can fall back to __atomic_load_n() for  size
128, we dont want
to break applications while using this API. Or add support for 128 in code.


> >
> >
> > > +               } while ((value & (mask)) cond expected_value);           \
> > > +       }                                                                 \
> > > +} while (0)
> > >
> > >  #endif
> > >
> > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > b/lib/eal/include/generic/rte_pause.h
> > > index 668ee4a184..d0c5b5a415 100644
> > > --- a/lib/eal/include/generic/rte_pause.h
> > > +++ b/lib/eal/include/generic/rte_pause.h
> > > @@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> > >         while (__atomic_load_n(addr, memorder) != expected)
> > >                 rte_pause();
> > >  }
> > > +
> > > +/*
> > > + * Wait until *addr breaks the condition, with a relaxed memory
> > > + * ordering model meaning the loads around this API can be reordered.
> > > + *
> > > + * @param addr
> > > + *  A pointer to the memory location.
> > > + * @param mask
> > > + *  A mask of value bits in interest.
> > > + * @param cond
> > > + *  A symbol representing the condition.
> > > + * @param expected
> > > + *  An expected value to be in the memory location.
> > > + * @param memorder
> > > + *  Two different memory orders that can be specified:
> > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > + *  C++11 memory orders with the same names, see the C++11 standard
> > > +or
> > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > + */
> > > +#define rte_wait_event(addr, mask, cond, expected, memorder)
> > \
> > > +do {                                                                               \
> > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));
> > \
> > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> > \
> > > +                               memorder != __ATOMIC_RELAXED);                     \
> > > +       typeof(*(addr)) expected_value = (expected);                               \
> > > +       while ((__atomic_load_n((addr), (memorder)) & (mask)) cond
> > expected_value) \
> > > +               rte_pause();                                                       \
> > > +} while (0)
> > >  #endif
> > >
> > >  #endif /* _RTE_PAUSE_H_ */
> > > --
> > > 2.25.1
> > >

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [PATCH v7 1/5] eal: add new definitions for wait scheme
  2021-10-28  7:51         ` [dpdk-dev] " Jerin Jacob
@ 2021-10-28  9:27           ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-28  9:27 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: Ruifeng Wang, dpdk-dev, nd, Ananyev, Konstantin,
	Stephen Hemminger, David Marchand, thomas, Mattias Rönnblom,
	nd



> -----邮件原件-----
> 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> 发送时间: Thursday, October 28, 2021 3:51 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; dpdk-dev <dev@dpdk.org>;
> nd <nd@arm.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> Stephen Hemminger <stephen@networkplumber.org>; David Marchand
> <david.marchand@redhat.com>; thomas@monjalon.net; Mattias Rönnblom
> <mattias.ronnblom@ericsson.com>
> 主题: Re: [PATCH v7 1/5] eal: add new definitions for wait scheme
> 
> On Thu, Oct 28, 2021 at 1:11 PM Feifei Wang <Feifei.Wang2@arm.com>
> wrote:
> >
> >
> >
> > > -----邮件原件-----
> > > 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> > > 发送时间: Thursday, October 28, 2021 3:16 PM
> > > 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> > > 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; dpdk-dev
> <dev@dpdk.org>; nd
> > > <nd@arm.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> > > Stephen Hemminger <stephen@networkplumber.org>; David Marchand
> > > <david.marchand@redhat.com>; thomas@monjalon.net; Mattias
> Rönnblom
> > > <mattias.ronnblom@ericsson.com>
> > > 主题: Re: [PATCH v7 1/5] eal: add new definitions for wait scheme
> > >
> > > On Thu, Oct 28, 2021 at 12:26 PM Feifei Wang <feifei.wang2@arm.com>
> > > wrote:
> > > >
> > > > Introduce macros as generic interface for address monitoring.
> > > > For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
> > > > and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.
> > > >
> > > > Furthermore, to prevent compilation warning in arm:
> > > > ----------------------------------------------
> > > > 'warning: implicit declaration of function ...'
> > > > ----------------------------------------------
> > > > Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and
> '__WFE'.
> > > > And add ‘__RTE_ARM’ for these macros to fix the namespace.
> > > >
> > > > This is because original macros are undefine at the end of the file.
> > > > If new macro 'rte_wait_event' calls them in other files, they will
> > > > be seen as 'not defined'.
> > > >
> > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > ---
> > >
> > > > +static __rte_always_inline void
> > > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > > +               int memorder)
> > > > +{
> > > > +       uint16_t value;
> > > > +
> > > > +       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > + __ATOMIC_RELAXED);
> > >
> > > Assert is not good in the library, Why not RTE_BUILD_BUG_ON here
> > [Feifei] This line is the original code which has nothing to do with
> > this patch, I can change it in the next version.
> > >
> > >
> > > > +
> > > > +       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
> > > >         if (value != expected) {
> > > > -               __SEVL()
> > > > +                __RTE_ARM_SEVL()
> > > >                 do {
> > > > -                       __WFE()
> > > > -                       __LOAD_EXC_16(addr, value, memorder)
> > > > +                       __RTE_ARM_WFE()
> > > > +                       __RTE_ARM_LOAD_EXC_16(addr, value,
> > > > + memorder)
> > > >                 } while (value != expected);
> > > >         }
> > > > -#undef __LOAD_EXC_16
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -77,34 +124,14 @@ rte_wait_until_equal_32(volatile uint32_t
> > > > *addr, uint32_t expected,
> > > >
> > > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > >
> > > > -       /*
> > > > -        * Atomic exclusive load from addr, it returns the 32-bit content of
> > > > -        * *addr while making it 'monitored',when it is written by someone
> > > > -        * else, the 'monitored' state is cleared and a event is generated
> > > > -        * implicitly to exit WFE.
> > > > -        */
> > > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > > -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } else {                                         \
> > > > -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } }
> > > > -
> > > > -       __LOAD_EXC_32(addr, value, memorder)
> > > > +       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
> > > >         if (value != expected) {
> > > > -               __SEVL()
> > > > +               __RTE_ARM_SEVL()
> > > >                 do {
> > > > -                       __WFE()
> > > > -                       __LOAD_EXC_32(addr, value, memorder)
> > > > +                       __RTE_ARM_WFE()
> > > > +                       __RTE_ARM_LOAD_EXC_32(addr, value,
> > > > + memorder)
> > > >                 } while (value != expected);
> > > >         }
> > > > -#undef __LOAD_EXC_32
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -115,38 +142,33 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr, uint64_t expected,
> > > >
> > > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > >
> > > remove assert and change to BUILD_BUG_ON
> > [Feifei] OK
> > >
> > > >
> > > > -       /*
> > > > -        * Atomic exclusive load from addr, it returns the 64-bit content of
> > > > -        * *addr while making it 'monitored',when it is written by someone
> > > > -        * else, the 'monitored' state is cleared and a event is generated
> > > > -        * implicitly to exit WFE.
> > > > -        */
> > > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > > -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } else {                                         \
> > > > -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } }
> > > > -
> > > > -       __LOAD_EXC_64(addr, value, memorder)
> > > > +       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
> > > >         if (value != expected) {
> > > > -               __SEVL()
> > > > +               __RTE_ARM_SEVL()
> > > >                 do {
> > > > -                       __WFE()
> > > > -                       __LOAD_EXC_64(addr, value, memorder)
> > > > +                       __RTE_ARM_WFE()
> > > > +                       __RTE_ARM_LOAD_EXC_64(addr, value,
> > > > + memorder)
> > > >                 } while (value != expected);
> > > >         }
> > > >  }
> > > > -#undef __LOAD_EXC_64
> > > >
> > > > -#undef __SEVL
> > > > -#undef __WFE
> > > > +#define rte_wait_event(addr, mask, cond, expected, memorder)
> \
> > > > +do {                                                                      \
> > > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
> > > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> > > \
> > > > +                               memorder != __ATOMIC_RELAXED);            \
> > > > +       uint32_t size = sizeof(*(addr)) << 3;
> > >
> > > Add const
> > [Feifei] OK.
> > > > +       typeof(*(addr)) expected_value = (expected);                      \
> > > > +       typeof(*(addr)) value = 0;
> > >
> > > Why zero assignment
> > I will delete this initialization.
> > >                                         \
> > > > +       __RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \
> > >
> > > Assert is not good in the library, Why not RTE_BUILD_BUG_ON here
> > [Feifei] For __RTE_ARM_LOAD_EXC, 'size' is known until code is running.
> > So it cannot check 'size' in the compile time and BUILD_BUG_ON doesn't
> work here.
> 
> uint32_t size = sizeof(*(addr)) << 3 value will get in comple time as _sizeof_ is
> preprocessor function.
> So I think, BUILD_BUG_ON is fine.
[Feifei] You are right. I try to with build_bug_on, it is OK to check 'size'.
> 
> 
> 
> > >
> > >
> > > > +       if ((value & (mask)) cond expected_value) {                       \
> > > > +               __RTE_ARM_SEVL()                                          \
> > > > +               do {                                                      \
> > > > +                       __RTE_ARM_WFE()                                   \
> > > > +                       __RTE_ARM_LOAD_EXC((addr), value,
> > > > + memorder,
> > > > + size) \
> > >
> > > if the address is the type of __int128_t. This logic will fail?
> > > Could you add 128bit support too and remove the assert from
> > > __RTE_ARM_LOAD_EXC
> > [Feifei] There is no 128bit case in library. And maybe there will be
> > 128bits case, we can add 128 path here. Now there is assert check in
> > __RTE_ARM_LOAD_EXC to check whether size is '16/32/64'.
> 
> API expects is only "addr" without any type so the application can use 128bit
> too.
> 
> Worst case for now we can fall back to __atomic_load_n() for  size 128, we
> dont want to break applications while using this API. Or add support for 128 in
> code.
> 
[Feifei] All right, I will try to add 128load in the next version.
> 
> > >
> > >
> > > > +               } while ((value & (mask)) cond expected_value);           \
> > > > +       }                                                                 \
> > > > +} while (0)
> > > >
> > > >  #endif
> > > >
> > > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > > b/lib/eal/include/generic/rte_pause.h
> > > > index 668ee4a184..d0c5b5a415 100644
> > > > --- a/lib/eal/include/generic/rte_pause.h
> > > > +++ b/lib/eal/include/generic/rte_pause.h
> > > > @@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr,
> > > uint64_t expected,
> > > >         while (__atomic_load_n(addr, memorder) != expected)
> > > >                 rte_pause();
> > > >  }
> > > > +
> > > > +/*
> > > > + * Wait until *addr breaks the condition, with a relaxed memory
> > > > + * ordering model meaning the loads around this API can be reordered.
> > > > + *
> > > > + * @param addr
> > > > + *  A pointer to the memory location.
> > > > + * @param mask
> > > > + *  A mask of value bits in interest.
> > > > + * @param cond
> > > > + *  A symbol representing the condition.
> > > > + * @param expected
> > > > + *  An expected value to be in the memory location.
> > > > + * @param memorder
> > > > + *  Two different memory orders that can be specified:
> > > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > > + *  C++11 memory orders with the same names, see the C++11
> > > > +standard or
> > > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > > + */
> > > > +#define rte_wait_event(addr, mask, cond, expected, memorder)
> > > \
> > > > +do {                                                                               \
> > > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));
> > > \
> > > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> > > \
> > > > +                               memorder != __ATOMIC_RELAXED);                     \
> > > > +       typeof(*(addr)) expected_value = (expected);                               \
> > > > +       while ((__atomic_load_n((addr), (memorder)) & (mask)) cond
> > > expected_value) \
> > > > +               rte_pause();                                                       \
> > > > +} while (0)
> > > >  #endif
> > > >
> > > >  #endif /* _RTE_PAUSE_H_ */
> > > > --
> > > > 2.25.1
> > > >

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v7 1/5] eal: add new definitions for wait scheme
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 1/5] eal: " Feifei Wang
  2021-10-28  7:15     ` Jerin Jacob
@ 2021-10-28 13:14     ` Ananyev, Konstantin
  1 sibling, 0 replies; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-10-28 13:14 UTC (permalink / raw)
  To: Feifei Wang, Ruifeng Wang
  Cc: dev, nd, jerinjacobk, stephen, david.marchand, thomas, mattias.ronnblom

> 
> diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
> index 668ee4a184..d0c5b5a415 100644
> --- a/lib/eal/include/generic/rte_pause.h
> +++ b/lib/eal/include/generic/rte_pause.h
> @@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>  	while (__atomic_load_n(addr, memorder) != expected)
>  		rte_pause();
>  }
> +
> +/*
> + * Wait until *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + *  A pointer to the memory location.
> + * @param mask
> + *  A mask of value bits in interest.
> + * @param cond
> + *  A symbol representing the condition.
> + * @param expected
> + *  An expected value to be in the memory location.
> + * @param memorder
> + *  Two different memory orders that can be specified:
> + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  C++11 memory orders with the same names, see the C++11 standard or
> + *  the GCC wiki on atomic synchronization for detailed definition.
> + */
> +#define rte_wait_event(addr, mask, cond, expected, memorder)                       \
> +do {                                                                               \
> +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                         \
> +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                           \
> +				memorder != __ATOMIC_RELAXED);                     \
> +	typeof(*(addr)) expected_value = (expected);                               \
> +	while ((__atomic_load_n((addr), (memorder)) & (mask)) cond expected_value) \
> +		rte_pause();                                                       \
> +} while (0)
>  #endif
> 
>  #endif /* _RTE_PAUSE_H_ */
> --

From generic/x86 perspective:
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

> 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v7 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
@ 2021-10-28 13:15     ` Ananyev, Konstantin
  0 siblings, 0 replies; 113+ messages in thread
From: Ananyev, Konstantin @ 2021-10-28 13:15 UTC (permalink / raw)
  To: Feifei Wang
  Cc: dev, nd, jerinjacobk, stephen, david.marchand, thomas,
	mattias.ronnblom, Ruifeng Wang



> Instead of polling for cbi->use to be updated, use wait event scheme.
> 
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  lib/bpf/bpf_pkt.c | 9 +++------
>  1 file changed, 3 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
> index 6e8248f0d6..c8a1cd1eb8 100644
> --- a/lib/bpf/bpf_pkt.c
> +++ b/lib/bpf/bpf_pkt.c
> @@ -113,7 +113,7 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
>  static void
>  bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
>  {
> -	uint32_t nuse, puse;
> +	uint32_t puse;
> 
>  	/* make sure all previous loads and stores are completed */
>  	rte_smp_mb();
> @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
> 
>  	/* in use, busy wait till current RX/TX iteration is finished */
>  	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
> -		do {
> -			rte_pause();
> -			rte_compiler_barrier();
> -			nuse = cbi->use;
> -		} while (nuse == puse);
> +		rte_wait_event((uint32_t *)(uintptr_t)&cbi->use, UINT32_MAX,
> +				==, puse, __ATOMIC_RELAXED);
>  	}
>  }
> 
> --

Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

> 2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v8 0/5] add new definitions for wait scheme
  2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
                   ` (10 preceding siblings ...)
  2021-10-28  6:56 ` [dpdk-dev] [PATCH v7 0/5] " Feifei Wang
@ 2021-10-29  8:20 ` Feifei Wang
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 1/5] eal: " Feifei Wang
                     ` (4 more replies)
  2021-11-01  6:00 ` [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme Feifei Wang
  12 siblings, 5 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-29  8:20 UTC (permalink / raw)
  Cc: dev, nd, jerinjacobk, stephen, david.marchand, thomas,
	mattias.ronnblom, konstantin.ananyev, Feifei Wang

Add new definitions for wait scheme, and apply this new definitions into
lib to replace rte_pause.

v2:
1. use macro to create new wait scheme (Stephen)

v3:
1. delete unnecessary bug fix in bpf (Konstantin)

v4:
1. put size into the macro body (Konstantin)
2. replace assert with BUILD_BUG_ON (Stephen)
3. delete unnecessary compiler barrier for bpf (Konstantin)

v5:
1. 'size' is not the parameter (Konstantin)
2. put () around macro parameters (Konstantin)
3. fix some original typo issue (Jerin)
4. swap 'rte_wait_event' parameter location (Jerin)
5. add new macro '__LOAD_EXC'
6. delete 'undef' to prevent compilation warning

v6:
1. fix patch style check warning
2. delete 'bpf' patch due to 'const' limit

v7:
1. add __RTE_ARM to to fix the namespace (Jerin)
2. use 'uintptr_t *' in mcslock for different
architecture(32/64) (Mattias)
3. add a new pointer 'next' in mcslock to fix
compiler issue
4. add bpf patch and use 'uintptr_t' to fix const
discard warning (Konstantin)

v8:
1. add __LOAD_EXC_128 (Jerin)
2. use BUILD_BUG_ON to replace assert (Jerin)

Feifei Wang (5):
  eal: add new definitions for wait scheme
  eal: use wait event for read pflock
  eal: use wait event scheme for mcslock
  lib/bpf: use wait event scheme for Rx/Tx iteration
  lib/distributor: use wait event scheme

 lib/bpf/bpf_pkt.c                        |   9 +-
 lib/distributor/rte_distributor_single.c |  10 +-
 lib/eal/arm/include/rte_pause_64.h       | 202 ++++++++++++++---------
 lib/eal/include/generic/rte_mcslock.h    |   5 +-
 lib/eal/include/generic/rte_pause.h      |  28 ++++
 lib/eal/include/generic/rte_pflock.h     |   4 +-
 6 files changed, 165 insertions(+), 93 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v8 1/5] eal: add new definitions for wait scheme
  2021-10-29  8:20 ` [dpdk-dev] [PATCH v8 0/5] add new definitions for wait scheme Feifei Wang
@ 2021-10-29  8:20   ` Feifei Wang
  2021-10-29 13:54     ` Jerin Jacob
  2021-10-31  8:38     ` David Marchand
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 2/5] eal: use wait event for read pflock Feifei Wang
                     ` (3 subsequent siblings)
  4 siblings, 2 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-29  8:20 UTC (permalink / raw)
  To: Ruifeng Wang
  Cc: dev, nd, jerinjacobk, stephen, david.marchand, thomas,
	mattias.ronnblom, konstantin.ananyev, Feifei Wang

Introduce macros as generic interface for address monitoring.

Add '__LOAD_EXC_128' for size of 128. For different size, encapsulate
'__LOAD_EXC_16', '__LOAD_EXC_32', '__LOAD_EXC_64' and '__LOAD_EXC_128'
into a new macro '__LOAD_EXC'.

Furthermore, to prevent compilation warning in arm:
----------------------------------------------
'warning: implicit declaration of function ...'
----------------------------------------------
Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
And add ‘__RTE_ARM’ for these macros to fix the namespace.
This is because original macros are undefine at the end of the file.
If new macro 'rte_wait_event' calls them in other files, they will be
seen as 'not defined'.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
 lib/eal/arm/include/rte_pause_64.h  | 202 +++++++++++++++++-----------
 lib/eal/include/generic/rte_pause.h |  28 ++++
 2 files changed, 154 insertions(+), 76 deletions(-)

diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index e87d10b8cc..783c6aae87 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -26,47 +26,120 @@ static inline void rte_pause(void)
 #ifdef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
 
 /* Send an event to quit WFE. */
-#define __SEVL() { asm volatile("sevl" : : : "memory"); }
+#define __RTE_ARM_SEVL() { asm volatile("sevl" : : : "memory"); }
 
 /* Put processor into low power WFE(Wait For Event) state. */
-#define __WFE() { asm volatile("wfe" : : : "memory"); }
+#define __RTE_ARM_WFE() { asm volatile("wfe" : : : "memory"); }
 
-static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
-{
-	uint16_t value;
-
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 16-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_16(src, dst, memorder) {               \
+/*
+ * Atomic exclusive load from addr, it returns the 16-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_16(src, dst, memorder) {       \
 	if (memorder == __ATOMIC_RELAXED) {               \
 		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
 			: [tmp] "=&r" (dst)               \
-			: [addr] "r"(src)                 \
+			: [addr] "r" (src)                \
 			: "memory");                      \
 	} else {                                          \
 		asm volatile("ldaxrh %w[tmp], [%x[addr]]" \
 			: [tmp] "=&r" (dst)               \
-			: [addr] "r"(src)                 \
+			: [addr] "r" (src)                \
 			: "memory");                      \
 	} }
 
-	__LOAD_EXC_16(addr, value, memorder)
+/*
+ * Atomic exclusive load from addr, it returns the 32-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_32(src, dst, memorder) {      \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r" (src)               \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r" (src)               \
+			: "memory");                     \
+	} }
+
+/*
+ * Atomic exclusive load from addr, it returns the 64-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_64(src, dst, memorder) {      \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r" (src)               \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r" (src)               \
+			: "memory");                     \
+	} }
+
+/*
+ * Atomic exclusive load from addr, it returns the 128-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_128(src, dst, memorder) {                    \
+	volatile rte_int128_t *dst_128 = (volatile rte_int128_t *)&dst; \
+	if (memorder == __ATOMIC_RELAXED) {                             \
+		asm volatile("ldxp %x[tmp0], %x[tmp1], [%x[addr]]"      \
+			: [tmp0] "=&r" (dst_128->val[0]),               \
+			  [tmp1] "=&r" (dst_128->val[1])                \
+			: [addr] "r" (src)                              \
+			: "memory");                                    \
+	} else {                                                        \
+		asm volatile("ldaxp %x[tmp0], %x[tmp1], [%x[addr]]"     \
+			: [tmp0] "=&r" (dst_128->val[0]),               \
+			  [tmp1] "=&r" (dst_128->val[1])                \
+			: [addr] "r" (src)                              \
+			: "memory");                                    \
+	} }                                                             \
+
+#define __RTE_ARM_LOAD_EXC(src, dst, memorder, size) {          \
+	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64 \
+				&& size != 128);                \
+	if (size == 16)                                         \
+		__RTE_ARM_LOAD_EXC_16(src, dst, memorder)       \
+	else if (size == 32)                                    \
+		__RTE_ARM_LOAD_EXC_32(src, dst, memorder)       \
+	else if (size == 64)                                    \
+		__RTE_ARM_LOAD_EXC_64(src, dst, memorder)       \
+	else if (size == 128)                                   \
+		__RTE_ARM_LOAD_EXC_128(src, dst, memorder)      \
+}
+
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+		int memorder)
+{
+	uint16_t value;
+
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
+				memorder != __ATOMIC_RELAXED);
+
+	__RTE_ARM_LOAD_EXC_16(addr, value, memorder)
 	if (value != expected) {
-		__SEVL()
+		__RTE_ARM_SEVL()
 		do {
-			__WFE()
-			__LOAD_EXC_16(addr, value, memorder)
+			__RTE_ARM_WFE()
+			__RTE_ARM_LOAD_EXC_16(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_16
 }
 
 static __rte_always_inline void
@@ -75,36 +148,17 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 {
 	uint32_t value;
 
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 32-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_32(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
+				memorder != __ATOMIC_RELAXED);
 
-	__LOAD_EXC_32(addr, value, memorder)
+	__RTE_ARM_LOAD_EXC_32(addr, value, memorder)
 	if (value != expected) {
-		__SEVL()
+		__RTE_ARM_SEVL()
 		do {
-			__WFE()
-			__LOAD_EXC_32(addr, value, memorder)
+			__RTE_ARM_WFE()
+			__RTE_ARM_LOAD_EXC_32(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_32
 }
 
 static __rte_always_inline void
@@ -113,40 +167,36 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 {
 	uint64_t value;
 
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 64-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_64(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
+				memorder != __ATOMIC_RELAXED);
 
-	__LOAD_EXC_64(addr, value, memorder)
+	__RTE_ARM_LOAD_EXC_64(addr, value, memorder)
 	if (value != expected) {
-		__SEVL()
+		__RTE_ARM_SEVL()
 		do {
-			__WFE()
-			__LOAD_EXC_64(addr, value, memorder)
+			__RTE_ARM_WFE()
+			__RTE_ARM_LOAD_EXC_64(addr, value, memorder)
 		} while (value != expected);
 	}
 }
-#undef __LOAD_EXC_64
 
-#undef __SEVL
-#undef __WFE
+#define rte_wait_event(addr, mask, cond, expected, memorder)              \
+do {                                                                      \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                  \
+				memorder != __ATOMIC_RELAXED);            \
+	const uint32_t size = sizeof(*(addr)) << 3;                       \
+	typeof(*(addr)) expected_value = (expected);                      \
+	typeof(*(addr)) value;                                            \
+	__RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \
+	if ((value & (mask)) cond expected_value) {                       \
+		__RTE_ARM_SEVL()                                          \
+		do {                                                      \
+			__RTE_ARM_WFE()                                   \
+			__RTE_ARM_LOAD_EXC((addr), value, memorder, size) \
+		} while ((value & (mask)) cond expected_value);           \
+	}                                                                 \
+} while (0)
 
 #endif
 
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index 668ee4a184..d0c5b5a415 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 	while (__atomic_load_n(addr, memorder) != expected)
 		rte_pause();
 }
+
+/*
+ * Wait until *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest.
+ * @param cond
+ *  A symbol representing the condition.
+ * @param expected
+ *  An expected value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event(addr, mask, cond, expected, memorder)                       \
+do {                                                                               \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                         \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                           \
+				memorder != __ATOMIC_RELAXED);                     \
+	typeof(*(addr)) expected_value = (expected);                               \
+	while ((__atomic_load_n((addr), (memorder)) & (mask)) cond expected_value) \
+		rte_pause();                                                       \
+} while (0)
 #endif
 
 #endif /* _RTE_PAUSE_H_ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v8 2/5] eal: use wait event for read pflock
  2021-10-29  8:20 ` [dpdk-dev] [PATCH v8 0/5] add new definitions for wait scheme Feifei Wang
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 1/5] eal: " Feifei Wang
@ 2021-10-29  8:20   ` Feifei Wang
  2021-10-29 13:55     ` Jerin Jacob
  2021-10-31  8:37     ` David Marchand
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 3/5] eal: use wait event scheme for mcslock Feifei Wang
                     ` (2 subsequent siblings)
  4 siblings, 2 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-29  8:20 UTC (permalink / raw)
  Cc: dev, nd, jerinjacobk, stephen, david.marchand, thomas,
	mattias.ronnblom, konstantin.ananyev, Feifei Wang, Ruifeng Wang

Instead of polling for read pflock update, use wait event scheme for
this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_pflock.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/eal/include/generic/rte_pflock.h b/lib/eal/include/generic/rte_pflock.h
index e57c179ef2..7573b036bf 100644
--- a/lib/eal/include/generic/rte_pflock.h
+++ b/lib/eal/include/generic/rte_pflock.h
@@ -121,9 +121,7 @@ rte_pflock_read_lock(rte_pflock_t *pf)
 		return;
 
 	/* Wait for current write phase to complete. */
-	while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE)
-		& RTE_PFLOCK_WBITS) == w)
-		rte_pause();
+	rte_wait_event(&pf->rd.in, RTE_PFLOCK_WBITS, ==, w, __ATOMIC_ACQUIRE);
 }
 
 /**
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v8 3/5] eal: use wait event scheme for mcslock
  2021-10-29  8:20 ` [dpdk-dev] [PATCH v8 0/5] add new definitions for wait scheme Feifei Wang
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 1/5] eal: " Feifei Wang
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 2/5] eal: use wait event for read pflock Feifei Wang
@ 2021-10-29  8:20   ` Feifei Wang
  2021-10-29 13:55     ` Jerin Jacob
  2021-10-31  8:37     ` David Marchand
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 5/5] lib/distributor: use wait event scheme Feifei Wang
  4 siblings, 2 replies; 113+ messages in thread
From: Feifei Wang @ 2021-10-29  8:20 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: dev, nd, jerinjacobk, stephen, david.marchand, thomas,
	mattias.ronnblom, konstantin.ananyev, Feifei Wang, Ruifeng Wang

Instead of polling for mcslock to be updated, use wait event scheme
for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/include/generic/rte_mcslock.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/eal/include/generic/rte_mcslock.h b/lib/eal/include/generic/rte_mcslock.h
index 34f33c64a5..b4e07648ff 100644
--- a/lib/eal/include/generic/rte_mcslock.h
+++ b/lib/eal/include/generic/rte_mcslock.h
@@ -116,8 +116,9 @@ rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
 		/* More nodes added to the queue by other CPUs.
 		 * Wait until the next pointer is set.
 		 */
-		while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)
-			rte_pause();
+		uintptr_t *next;
+		next = (uintptr_t *)&me->next;
+		rte_wait_event(next, UINTPTR_MAX, ==, 0, __ATOMIC_RELAXED);
 	}
 
 	/* Pass lock to next waiter. */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v8 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-29  8:20 ` [dpdk-dev] [PATCH v8 0/5] add new definitions for wait scheme Feifei Wang
                     ` (2 preceding siblings ...)
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 3/5] eal: use wait event scheme for mcslock Feifei Wang
@ 2021-10-29  8:20   ` Feifei Wang
  2021-10-31  8:37     ` David Marchand
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 5/5] lib/distributor: use wait event scheme Feifei Wang
  4 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-10-29  8:20 UTC (permalink / raw)
  To: Konstantin Ananyev
  Cc: dev, nd, jerinjacobk, stephen, david.marchand, thomas,
	mattias.ronnblom, Feifei Wang, Ruifeng Wang

Instead of polling for cbi->use to be updated, use wait event scheme.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
 lib/bpf/bpf_pkt.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
index 6e8248f0d6..c8a1cd1eb8 100644
--- a/lib/bpf/bpf_pkt.c
+++ b/lib/bpf/bpf_pkt.c
@@ -113,7 +113,7 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
 static void
 bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
 {
-	uint32_t nuse, puse;
+	uint32_t puse;
 
 	/* make sure all previous loads and stores are completed */
 	rte_smp_mb();
@@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
 
 	/* in use, busy wait till current RX/TX iteration is finished */
 	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
-		do {
-			rte_pause();
-			rte_compiler_barrier();
-			nuse = cbi->use;
-		} while (nuse == puse);
+		rte_wait_event((uint32_t *)(uintptr_t)&cbi->use, UINT32_MAX,
+				==, puse, __ATOMIC_RELAXED);
 	}
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v8 5/5] lib/distributor: use wait event scheme
  2021-10-29  8:20 ` [dpdk-dev] [PATCH v8 0/5] add new definitions for wait scheme Feifei Wang
                     ` (3 preceding siblings ...)
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
@ 2021-10-29  8:20   ` Feifei Wang
  2021-10-29 13:58     ` Jerin Jacob
  4 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-10-29  8:20 UTC (permalink / raw)
  To: David Hunt
  Cc: dev, nd, jerinjacobk, stephen, david.marchand, thomas,
	mattias.ronnblom, konstantin.ananyev, Feifei Wang, Ruifeng Wang

Instead of polling for bufptr64 to be updated, use
wait event for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/distributor/rte_distributor_single.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index f4725b1d0b..d52b24a453 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -33,9 +33,8 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_GET_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+			!=, 0, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on GET_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
@@ -74,9 +73,8 @@ rte_distributor_return_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_RETURN_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+			!=, 0, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on RETURN_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v8 1/5] eal: add new definitions for wait scheme
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 1/5] eal: " Feifei Wang
@ 2021-10-29 13:54     ` Jerin Jacob
  2021-10-31  8:38     ` David Marchand
  1 sibling, 0 replies; 113+ messages in thread
From: Jerin Jacob @ 2021-10-29 13:54 UTC (permalink / raw)
  To: Feifei Wang
  Cc: Ruifeng Wang, dpdk-dev, nd, Stephen Hemminger, David Marchand,
	Thomas Monjalon, Mattias Rönnblom, Ananyev, Konstantin

On Fri, Oct 29, 2021 at 1:50 PM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Introduce macros as generic interface for address monitoring.
>
> Add '__LOAD_EXC_128' for size of 128. For different size, encapsulate
> '__LOAD_EXC_16', '__LOAD_EXC_32', '__LOAD_EXC_64' and '__LOAD_EXC_128'
> into a new macro '__LOAD_EXC'.
>
> Furthermore, to prevent compilation warning in arm:
> ----------------------------------------------
> 'warning: implicit declaration of function ...'
> ----------------------------------------------
> Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
> And add ‘__RTE_ARM’ for these macros to fix the namespace.
> This is because original macros are undefine at the end of the file.
> If new macro 'rte_wait_event' calls them in other files, they will be
> seen as 'not defined'.
>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

Acked-by: Jerin Jacob <jerinj@marvell.com>

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v8 2/5] eal: use wait event for read pflock
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 2/5] eal: use wait event for read pflock Feifei Wang
@ 2021-10-29 13:55     ` Jerin Jacob
  2021-10-31  8:37     ` David Marchand
  1 sibling, 0 replies; 113+ messages in thread
From: Jerin Jacob @ 2021-10-29 13:55 UTC (permalink / raw)
  To: Feifei Wang
  Cc: dpdk-dev, nd, Stephen Hemminger, David Marchand, Thomas Monjalon,
	Mattias Rönnblom, Ananyev, Konstantin, Ruifeng Wang

On Fri, Oct 29, 2021 at 1:50 PM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Instead of polling for read pflock update, use wait event scheme for
> this case.
>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

Acked-by: Jerin Jacob <jerinj@marvell.com>


> ---
>  lib/eal/include/generic/rte_pflock.h | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/lib/eal/include/generic/rte_pflock.h b/lib/eal/include/generic/rte_pflock.h
> index e57c179ef2..7573b036bf 100644
> --- a/lib/eal/include/generic/rte_pflock.h
> +++ b/lib/eal/include/generic/rte_pflock.h
> @@ -121,9 +121,7 @@ rte_pflock_read_lock(rte_pflock_t *pf)
>                 return;
>
>         /* Wait for current write phase to complete. */
> -       while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE)
> -               & RTE_PFLOCK_WBITS) == w)
> -               rte_pause();
> +       rte_wait_event(&pf->rd.in, RTE_PFLOCK_WBITS, ==, w, __ATOMIC_ACQUIRE);
>  }
>
>  /**
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v8 3/5] eal: use wait event scheme for mcslock
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 3/5] eal: use wait event scheme for mcslock Feifei Wang
@ 2021-10-29 13:55     ` Jerin Jacob
  2021-10-31  8:37     ` David Marchand
  1 sibling, 0 replies; 113+ messages in thread
From: Jerin Jacob @ 2021-10-29 13:55 UTC (permalink / raw)
  To: Feifei Wang
  Cc: Honnappa Nagarahalli, dpdk-dev, nd, Stephen Hemminger,
	David Marchand, Thomas Monjalon, Mattias Rönnblom, Ananyev,
	Konstantin, Ruifeng Wang

On Fri, Oct 29, 2021 at 1:50 PM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Instead of polling for mcslock to be updated, use wait event scheme
> for this case.
>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

Acked-by: Jerin Jacob <jerinj@marvell.com>



> ---
>  lib/eal/include/generic/rte_mcslock.h | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/lib/eal/include/generic/rte_mcslock.h b/lib/eal/include/generic/rte_mcslock.h
> index 34f33c64a5..b4e07648ff 100644
> --- a/lib/eal/include/generic/rte_mcslock.h
> +++ b/lib/eal/include/generic/rte_mcslock.h
> @@ -116,8 +116,9 @@ rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
>                 /* More nodes added to the queue by other CPUs.
>                  * Wait until the next pointer is set.
>                  */
> -               while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)
> -                       rte_pause();
> +               uintptr_t *next;
> +               next = (uintptr_t *)&me->next;
> +               rte_wait_event(next, UINTPTR_MAX, ==, 0, __ATOMIC_RELAXED);
>         }
>
>         /* Pass lock to next waiter. */
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v8 5/5] lib/distributor: use wait event scheme
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 5/5] lib/distributor: use wait event scheme Feifei Wang
@ 2021-10-29 13:58     ` Jerin Jacob
  2021-10-31  8:38       ` David Marchand
  2021-11-01 12:44       ` David Hunt
  0 siblings, 2 replies; 113+ messages in thread
From: Jerin Jacob @ 2021-10-29 13:58 UTC (permalink / raw)
  To: Feifei Wang
  Cc: David Hunt, dpdk-dev, nd, Stephen Hemminger, David Marchand,
	Thomas Monjalon, Mattias Rönnblom, Ananyev, Konstantin,
	Ruifeng Wang

On Fri, Oct 29, 2021 at 1:50 PM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Instead of polling for bufptr64 to be updated, use
> wait event for this case.
>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>


With the change in the subject as distributor: ...
Probably can be fixed in merge time.

Acked-by: Jerin Jacob <jerinj@marvell.com>

> ---
>  lib/distributor/rte_distributor_single.c | 10 ++++------
>  1 file changed, 4 insertions(+), 6 deletions(-)
>
> diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
> index f4725b1d0b..d52b24a453 100644
> --- a/lib/distributor/rte_distributor_single.c
> +++ b/lib/distributor/rte_distributor_single.c
> @@ -33,9 +33,8 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d,
>         union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
>         int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
>                         | RTE_DISTRIB_GET_BUF;
> -       while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
> -                       & RTE_DISTRIB_FLAGS_MASK))
> -               rte_pause();
> +       rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
> +                       !=, 0, __ATOMIC_RELAXED);
>
>         /* Sync with distributor on GET_BUF flag. */
>         __atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
> @@ -74,9 +73,8 @@ rte_distributor_return_pkt_single(struct rte_distributor_single *d,
>         union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
>         uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
>                         | RTE_DISTRIB_RETURN_BUF;
> -       while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
> -                       & RTE_DISTRIB_FLAGS_MASK))
> -               rte_pause();
> +       rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
> +                       !=, 0, __ATOMIC_RELAXED);
>
>         /* Sync with distributor on RETURN_BUF flag. */
>         __atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v8 2/5] eal: use wait event for read pflock
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 2/5] eal: use wait event for read pflock Feifei Wang
  2021-10-29 13:55     ` Jerin Jacob
@ 2021-10-31  8:37     ` David Marchand
  1 sibling, 0 replies; 113+ messages in thread
From: David Marchand @ 2021-10-31  8:37 UTC (permalink / raw)
  To: Feifei Wang
  Cc: dev, nd, Jerin Jacob, Stephen Hemminger, Thomas Monjalon,
	Mattias Rönnblom, Ananyev, Konstantin, Ruifeng Wang

On Fri, Oct 29, 2021 at 10:20 AM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Instead of polling for read pflock update, use wait event scheme for
> this case.

Please look at git history for this file.
Prefix should be pflock:



>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>


-- 
David Marchand


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v8 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
@ 2021-10-31  8:37     ` David Marchand
  0 siblings, 0 replies; 113+ messages in thread
From: David Marchand @ 2021-10-31  8:37 UTC (permalink / raw)
  To: Feifei Wang
  Cc: Konstantin Ananyev, dev, nd, Jerin Jacob, Stephen Hemminger,
	Thomas Monjalon, Mattias Rönnblom, Ruifeng Wang

On Fri, Oct 29, 2021 at 10:20 AM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Instead of polling for cbi->use to be updated, use wait event scheme.

We don't put lib/ in titles.



>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>


-- 
David Marchand


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v8 3/5] eal: use wait event scheme for mcslock
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 3/5] eal: use wait event scheme for mcslock Feifei Wang
  2021-10-29 13:55     ` Jerin Jacob
@ 2021-10-31  8:37     ` David Marchand
  1 sibling, 0 replies; 113+ messages in thread
From: David Marchand @ 2021-10-31  8:37 UTC (permalink / raw)
  To: Feifei Wang
  Cc: Honnappa Nagarahalli, dev, nd, Jerin Jacob, Stephen Hemminger,
	Thomas Monjalon, Mattias Rönnblom, Ananyev, Konstantin,
	Ruifeng Wang

On Fri, Oct 29, 2021 at 10:21 AM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Instead of polling for mcslock to be updated, use wait event scheme
> for this case.

Title prefix mcslock:


>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>


-- 
David Marchand


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v8 5/5] lib/distributor: use wait event scheme
  2021-10-29 13:58     ` Jerin Jacob
@ 2021-10-31  8:38       ` David Marchand
  2021-11-01 12:44       ` David Hunt
  1 sibling, 0 replies; 113+ messages in thread
From: David Marchand @ 2021-10-31  8:38 UTC (permalink / raw)
  To: Feifei Wang
  Cc: Jerin Jacob, David Hunt, dpdk-dev, nd, Stephen Hemminger,
	Thomas Monjalon, Mattias Rönnblom, Ananyev, Konstantin,
	Ruifeng Wang

On Fri, Oct 29, 2021 at 3:58 PM Jerin Jacob <jerinjacobk@gmail.com> wrote:
>
> On Fri, Oct 29, 2021 at 1:50 PM Feifei Wang <feifei.wang2@arm.com> wrote:
> >
> > Instead of polling for bufptr64 to be updated, use
> > wait event for this case.
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>
>
> With the change in the subject as distributor: ...

Yes.

-- 
David Marchand


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v8 1/5] eal: add new definitions for wait scheme
  2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 1/5] eal: " Feifei Wang
  2021-10-29 13:54     ` Jerin Jacob
@ 2021-10-31  8:38     ` David Marchand
  2021-11-01  2:29       ` [dpdk-dev] 回复: " Feifei Wang
  1 sibling, 1 reply; 113+ messages in thread
From: David Marchand @ 2021-10-31  8:38 UTC (permalink / raw)
  To: Feifei Wang
  Cc: Ruifeng Wang, dev, nd, Jerin Jacob, Stephen Hemminger,
	Thomas Monjalon, Mattias Rönnblom, Ananyev, Konstantin

On Fri, Oct 29, 2021 at 10:20 AM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Introduce macros as generic interface for address monitoring.

The main point of this patch is to add a new generic helper.


>
> Add '__LOAD_EXC_128' for size of 128. For different size, encapsulate
> '__LOAD_EXC_16', '__LOAD_EXC_32', '__LOAD_EXC_64' and '__LOAD_EXC_128'
> into a new macro '__LOAD_EXC'.

ARM macros are just a result of introducing this new helper as a macro.
I would not mention them.


>
> Furthermore, to prevent compilation warning in arm:
> ----------------------------------------------
> 'warning: implicit declaration of function ...'
> ----------------------------------------------
> Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
> And add ‘__RTE_ARM’ for these macros to fix the namespace.
> This is because original macros are undefine at the end of the file.
> If new macro 'rte_wait_event' calls them in other files, they will be
> seen as 'not defined'.


About this new helper, it's rather confusing:
- it is a macro, should be in capital letters,
- "rte_wait_event(addr, mask, cond, expected)" waits until "*addr &
mask cond expected" becomes false. I find this confusing. I would
invert the condition.
- so far, we had rte_wait_until_* helpers, rte_wait_event seems like a
step backward as it seems to talk about the ARM stuff (wfe),
- the masking part is artificial in some cases, at least let's avoid
using a too generic name, we can decide to add a non-masked helper
later.

For those reasons, I'd prefer we have something like:

/*
 * Wait until *addr & mask makes the condition true. With a relaxed memory
 * ordering model, the loads around this helper can be reordered.
 *
 * @param addr
 *  A pointer to the memory location.
 * @param mask
 *  A mask of *addr bits in interest.
 * @param cond
 *  A symbol representing the condition.
 * @param expected
 *  An expected value to be in the memory location.
 * @param memorder
 *  Two different memory orders that can be specified:
 *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
 *  C++11 memory orders with the same names, see the C++11 standard or
 *  the GCC wiki on atomic synchronization for detailed definition.
 */
#define RTE_WAIT_UNTIL_MASKED(addr, mask, cond, expected, memorder)
           \
do {
           \
        RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
           \
                memorder != __ATOMIC_RELAXED);
           \
        typeof(*(addr)) expected_value = expected;
           \
        while (!((__atomic_load_n(addr, memorder) & (mask)) cond
expected_value)) \
                rte_pause();
           \
} while (0)


Comments below.

>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> ---
>  lib/eal/arm/include/rte_pause_64.h  | 202 +++++++++++++++++-----------
>  lib/eal/include/generic/rte_pause.h |  28 ++++
>  2 files changed, 154 insertions(+), 76 deletions(-)
>
> diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
> index e87d10b8cc..783c6aae87 100644
> --- a/lib/eal/arm/include/rte_pause_64.h
> +++ b/lib/eal/arm/include/rte_pause_64.h

[snip]

> +/*
> + * Atomic exclusive load from addr, it returns the 64-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and an event is generated
> + * implicitly to exit WFE.
> + */
> +#define __RTE_ARM_LOAD_EXC_64(src, dst, memorder) {      \
> +       if (memorder == __ATOMIC_RELAXED) {              \
> +               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> +                       : [tmp] "=&r" (dst)              \
> +                       : [addr] "r" (src)               \
> +                       : "memory");                     \
> +       } else {                                         \
> +               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> +                       : [tmp] "=&r" (dst)              \
> +                       : [addr] "r" (src)               \
> +                       : "memory");                     \
> +       } }
> +
> +/*
> + * Atomic exclusive load from addr, it returns the 128-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and an event is generated
> + * implicitly to exit WFE.
> + */
> +#define __RTE_ARM_LOAD_EXC_128(src, dst, memorder) {                    \
> +       volatile rte_int128_t *dst_128 = (volatile rte_int128_t *)&dst; \

dst needs some () protection => &(dst)
Is volatile necessary?


> +       if (memorder == __ATOMIC_RELAXED) {                             \
> +               asm volatile("ldxp %x[tmp0], %x[tmp1], [%x[addr]]"      \
> +                       : [tmp0] "=&r" (dst_128->val[0]),               \
> +                         [tmp1] "=&r" (dst_128->val[1])                \
> +                       : [addr] "r" (src)                              \
> +                       : "memory");                                    \
> +       } else {                                                        \
> +               asm volatile("ldaxp %x[tmp0], %x[tmp1], [%x[addr]]"     \
> +                       : [tmp0] "=&r" (dst_128->val[0]),               \
> +                         [tmp1] "=&r" (dst_128->val[1])                \
> +                       : [addr] "r" (src)                              \
> +                       : "memory");                                    \
> +       } }                                                             \
> +
> +#define __RTE_ARM_LOAD_EXC(src, dst, memorder, size) {          \
> +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64 \
> +                               && size != 128);                \

Indent should be one tab (idem in other places of this patch).
Double tab is when we have line continuation in tests.


> +       if (size == 16)                                         \
> +               __RTE_ARM_LOAD_EXC_16(src, dst, memorder)       \
> +       else if (size == 32)                                    \
> +               __RTE_ARM_LOAD_EXC_32(src, dst, memorder)       \
> +       else if (size == 64)                                    \
> +               __RTE_ARM_LOAD_EXC_64(src, dst, memorder)       \
> +       else if (size == 128)                                   \
> +               __RTE_ARM_LOAD_EXC_128(src, dst, memorder)      \
> +}
> +

[snip]

> -#undef __LOAD_EXC_64
>
> -#undef __SEVL
> -#undef __WFE
> +#define rte_wait_event(addr, mask, cond, expected, memorder)              \
> +do {                                                                      \
> +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \

Is this check on memorder being constant necessary?
We have a build bug on, right after, would it not catch non constant cases?


> +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                  \
> +                               memorder != __ATOMIC_RELAXED);            \
> +       const uint32_t size = sizeof(*(addr)) << 3;                       \
> +       typeof(*(addr)) expected_value = (expected);                      \

No need for () around expected.


> +       typeof(*(addr)) value;                                            \
> +       __RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \

No need for () around addr.


> +       if ((value & (mask)) cond expected_value) {                       \
> +               __RTE_ARM_SEVL()                                          \
> +               do {                                                      \
> +                       __RTE_ARM_WFE()                                   \
> +                       __RTE_ARM_LOAD_EXC((addr), value, memorder, size) \

Idem.


> +               } while ((value & (mask)) cond expected_value);           \
> +       }                                                                 \
> +} while (0)
>
>  #endif
>
> diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
> index 668ee4a184..d0c5b5a415 100644
> --- a/lib/eal/include/generic/rte_pause.h
> +++ b/lib/eal/include/generic/rte_pause.h
> @@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>         while (__atomic_load_n(addr, memorder) != expected)
>                 rte_pause();
>  }

With this patch, ARM header goes though a conversion of assert() to
compilation checks (build bug on).
I don't see a reason not to do the same in generic header.

As a result of this conversion, #include <assert.h> then can be removed.
Though it triggers build failure on following files (afaics) who were
implictly relying on this inclusion:
drivers/net/ark/ark_ddm.c
drivers/net/ark/ark_udm.c
drivers/net/ice/ice_fdir_filter.c
drivers/net/ionic/ionic_rxtx.c
drivers/net/mlx4/mlx4_txq.c


-- 
David Marchand


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [PATCH v8 1/5] eal: add new definitions for wait scheme
  2021-10-31  8:38     ` David Marchand
@ 2021-11-01  2:29       ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-11-01  2:29 UTC (permalink / raw)
  To: David Marchand
  Cc: Ruifeng Wang, dev, nd, Jerin Jacob, Stephen Hemminger, thomas,
	Mattias Rönnblom, Ananyev, Konstantin, nd


> -----邮件原件-----
> 发件人: David Marchand <david.marchand@redhat.com>
> 发送时间: Sunday, October 31, 2021 4:39 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; dev <dev@dpdk.org>; nd
> <nd@arm.com>; Jerin Jacob <jerinjacobk@gmail.com>; Stephen Hemminger
> <stephen@networkplumber.org>; thomas@monjalon.net; Mattias Rönnblom
> <mattias.ronnblom@ericsson.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>
> 主题: Re: [PATCH v8 1/5] eal: add new definitions for wait scheme
> 
> On Fri, Oct 29, 2021 at 10:20 AM Feifei Wang <feifei.wang2@arm.com>
> wrote:
> >
> > Introduce macros as generic interface for address monitoring.
> 
> The main point of this patch is to add a new generic helper.
[Feifei] Thanks for the comments, I will change this commit message.
> 
> 
> >
> > Add '__LOAD_EXC_128' for size of 128. For different size, encapsulate
> > '__LOAD_EXC_16', '__LOAD_EXC_32', '__LOAD_EXC_64' and
> '__LOAD_EXC_128'
> > into a new macro '__LOAD_EXC'.
> 
> ARM macros are just a result of introducing this new helper as a macro.
> I would not mention them.
[Feifei] Ok, I will delete it.
> 
> 
> >
> > Furthermore, to prevent compilation warning in arm:
> > ----------------------------------------------
> > 'warning: implicit declaration of function ...'
> > ----------------------------------------------
> > Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
> > And add ‘__RTE_ARM’ for these macros to fix the namespace.
> > This is because original macros are undefine at the end of the file.
> > If new macro 'rte_wait_event' calls them in other files, they will be
> > seen as 'not defined'.
> 
> 
> About this new helper, it's rather confusing:
> - it is a macro, should be in capital letters,
> - "rte_wait_event(addr, mask, cond, expected)" waits until "*addr & mask
> cond expected" becomes false. I find this confusing. I would invert the
> condition.
> - so far, we had rte_wait_until_* helpers, rte_wait_event seems like a step
> backward as it seems to talk about the ARM stuff (wfe),
[Feifei] So if I understand correctly, we need to avoid using 'wait_event' as name.

> - the masking part is artificial in some cases, at least let's avoid using a too
> generic name, we can decide to add a non-masked helper later.
[Feifei] Ok, I will change this name to match the mask.
> 
> For those reasons, I'd prefer we have something like:
> 
> /*
>  * Wait until *addr & mask makes the condition true. With a relaxed memory
>  * ordering model, the loads around this helper can be reordered.
>  *
>  * @param addr
>  *  A pointer to the memory location.
>  * @param mask
>  *  A mask of *addr bits in interest.
>  * @param cond
>  *  A symbol representing the condition.
>  * @param expected
>  *  An expected value to be in the memory location.
>  * @param memorder
>  *  Two different memory orders that can be specified:
>  *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
>  *  C++11 memory orders with the same names, see the C++11 standard or
>  *  the GCC wiki on atomic synchronization for detailed definition.
>  */
> #define RTE_WAIT_UNTIL_MASKED(addr, mask, cond, expected, memorder)
>            \
> do {
>            \
>         RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
>            \
>                 memorder != __ATOMIC_RELAXED);
>            \
>         typeof(*(addr)) expected_value = expected;
>            \
>         while (!((__atomic_load_n(addr, memorder) & (mask)) cond
> expected_value)) \
>                 rte_pause();
>            \
> } while (0)
> 
> 
> Comments below.
> 
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> > ---
> >  lib/eal/arm/include/rte_pause_64.h  | 202
> > +++++++++++++++++-----------  lib/eal/include/generic/rte_pause.h |
> > 28 ++++
> >  2 files changed, 154 insertions(+), 76 deletions(-)
> >
> > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > b/lib/eal/arm/include/rte_pause_64.h
> > index e87d10b8cc..783c6aae87 100644
> > --- a/lib/eal/arm/include/rte_pause_64.h
> > +++ b/lib/eal/arm/include/rte_pause_64.h
> 
> [snip]
> 
> > +/*
> > + * Atomic exclusive load from addr, it returns the 64-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and an event is generated
> > + * implicitly to exit WFE.
> > + */
> > +#define __RTE_ARM_LOAD_EXC_64(src, dst, memorder) {      \
> > +       if (memorder == __ATOMIC_RELAXED) {              \
> > +               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > +                       : [tmp] "=&r" (dst)              \
> > +                       : [addr] "r" (src)               \
> > +                       : "memory");                     \
> > +       } else {                                         \
> > +               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > +                       : [tmp] "=&r" (dst)              \
> > +                       : [addr] "r" (src)               \
> > +                       : "memory");                     \
> > +       } }
> > +
> > +/*
> > + * Atomic exclusive load from addr, it returns the 128-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and an event is generated
> > + * implicitly to exit WFE.
> > + */
> > +#define __RTE_ARM_LOAD_EXC_128(src, dst, memorder) {                    \
> > +       volatile rte_int128_t *dst_128 = (volatile rte_int128_t
> > +*)&dst; \
> 
> dst needs some () protection => &(dst)
[Feifei] dst is from internal defined variable 'value' in
'rte_wait_event', we can ensure it is just a simple variable,
so we do not need to add '()'
> Is volatile necessary?
[Feifei] This volatile is for the case that our parameter 'addr' is volatile.
And we use 'typeof' for 'value', so the value will be defined as 'volatile':
+ typeof(*(addr)) value
So if there is now 'volatile' here, compiler will be report warning:
'volatile is  discard' .                                        
> 
> 
> > +       if (memorder == __ATOMIC_RELAXED) {                             \
> > +               asm volatile("ldxp %x[tmp0], %x[tmp1], [%x[addr]]"      \
> > +                       : [tmp0] "=&r" (dst_128->val[0]),               \
> > +                         [tmp1] "=&r" (dst_128->val[1])                \
> > +                       : [addr] "r" (src)                              \
> > +                       : "memory");                                    \
> > +       } else {                                                        \
> > +               asm volatile("ldaxp %x[tmp0], %x[tmp1], [%x[addr]]"     \
> > +                       : [tmp0] "=&r" (dst_128->val[0]),               \
> > +                         [tmp1] "=&r" (dst_128->val[1])                \
> > +                       : [addr] "r" (src)                              \
> > +                       : "memory");                                    \
> > +       } }                                                             \
> > +
> > +#define __RTE_ARM_LOAD_EXC(src, dst, memorder, size) {          \
> > +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64 \
> > +                               && size != 128);                \
> 
> Indent should be one tab (idem in other places of this patch).
> Double tab is when we have line continuation in tests.
[Feifei] Ok.
> 
> 
> > +       if (size == 16)                                         \
> > +               __RTE_ARM_LOAD_EXC_16(src, dst, memorder)       \
> > +       else if (size == 32)                                    \
> > +               __RTE_ARM_LOAD_EXC_32(src, dst, memorder)       \
> > +       else if (size == 64)                                    \
> > +               __RTE_ARM_LOAD_EXC_64(src, dst, memorder)       \
> > +       else if (size == 128)                                   \
> > +               __RTE_ARM_LOAD_EXC_128(src, dst, memorder)      \
> > +}
> > +
> 
> [snip]
> 
> > -#undef __LOAD_EXC_64
> >
> > -#undef __SEVL
> > -#undef __WFE
> > +#define rte_wait_event(addr, mask, cond, expected, memorder)              \
> > +do {                                                                      \
> > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
> 
> Is this check on memorder being constant necessary?
> We have a build bug on, right after, would it not catch non constant cases?
I think this can firstly check whether memorder has been assigned or NULL.
> 
> > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> \
> > +                               memorder != __ATOMIC_RELAXED);            \
> > +       const uint32_t size = sizeof(*(addr)) << 3;                       \
> > +       typeof(*(addr)) expected_value = (expected);                      \
> 
> No need for () around expected.
[Feifei] expected and addr are macro arguments, and we cannot
know what form users will define them, so in order to avoid un-predicted
side-effects with operands associativity, It is necessary to add them.
Please see the discussion with Konstantin:
http://patches.dpdk.org/project/dpdk/patch/20211020084523.1309177-2-feifei.wang2@arm.com/
> 
> 
> > +       typeof(*(addr)) value;                                            \
> > +       __RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \
> 
> No need for () around addr.
> 
> 
> > +       if ((value & (mask)) cond expected_value) {                       \
> > +               __RTE_ARM_SEVL()                                          \
> > +               do {                                                      \
> > +                       __RTE_ARM_WFE()                                   \
> > +                       __RTE_ARM_LOAD_EXC((addr), value, memorder,
> > + size) \
> 
> Idem.
> 
> 
> > +               } while ((value & (mask)) cond expected_value);           \
> > +       }                                                                 \
> > +} while (0)
> >
> >  #endif
> >
> > diff --git a/lib/eal/include/generic/rte_pause.h
> > b/lib/eal/include/generic/rte_pause.h
> > index 668ee4a184..d0c5b5a415 100644
> > --- a/lib/eal/include/generic/rte_pause.h
> > +++ b/lib/eal/include/generic/rte_pause.h
> > @@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> >         while (__atomic_load_n(addr, memorder) != expected)
> >                 rte_pause();
> >  }
> 
> With this patch, ARM header goes though a conversion of assert() to
> compilation checks (build bug on).
> I don't see a reason not to do the same in generic header.
> 
> As a result of this conversion, #include <assert.h> then can be removed.
> Though it triggers build failure on following files (afaics) who were implictly
> relying on this inclusion:
> drivers/net/ark/ark_ddm.c
> drivers/net/ark/ark_udm.c
> drivers/net/ice/ice_fdir_filter.c
> drivers/net/ionic/ionic_rxtx.c
> drivers/net/mlx4/mlx4_txq.c
[Feifei]You are right, and  we can put this change in another patch series. 
> 
> 
> --
> David Marchand


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme
  2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
                   ` (11 preceding siblings ...)
  2021-10-29  8:20 ` [dpdk-dev] [PATCH v8 0/5] add new definitions for wait scheme Feifei Wang
@ 2021-11-01  6:00 ` Feifei Wang
  2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 1/5] eal: add a new generic " Feifei Wang
                     ` (5 more replies)
  12 siblings, 6 replies; 113+ messages in thread
From: Feifei Wang @ 2021-11-01  6:00 UTC (permalink / raw)
  Cc: dev, nd, jerinjacobk, stephen, thomas, david.marchand, Feifei Wang

Add new helper for wait scheme, and apply this new definitions into
lib to replace rte_pause.

v2:
1. use macro to create new wait scheme (Stephen)

v3:
1. delete unnecessary bug fix in bpf (Konstantin)

v4:
1. put size into the macro body (Konstantin)
2. replace assert with BUILD_BUG_ON (Stephen)
3. delete unnecessary compiler barrier for bpf (Konstantin)

v5:
1. 'size' is not the parameter (Konstantin)
2. put () around macro parameters (Konstantin)
3. fix some original typo issue (Jerin)
4. swap 'rte_wait_event' parameter location (Jerin)
5. add new macro '__LOAD_EXC'
6. delete 'undef' to prevent compilation warning

v6:
1. fix patch style check warning
2. delete 'bpf' patch due to 'const' limit

v7:
1. add __RTE_ARM to to fix the namespace (Jerin)
2. use 'uintptr_t *' in mcslock for different
architecture(32/64) (Mattias)
3. add a new pointer 'next' in mcslock to fix
compiler issue
4. add bpf patch and use 'uintptr_t' to fix const
discard warning (Konstantin)

v8:
1. add __LOAD_EXC_128 (Jerin)
2. use BUILD_BUG_ON to replace assert (Jerin)

v9:
1. rename 'wait_event' as 'wait_until' (David)
2. change commit message (David)

Feifei Wang (5):
  eal: add a new generic helper for wait scheme
  pflock: use wait until scheme for read pflock
  mcslock: use wait until scheme for mcslock
  bpf: use wait until scheme for Rx/Tx iteration
  distributor: use wait until scheme

 lib/bpf/bpf_pkt.c                        |   9 +-
 lib/distributor/rte_distributor_single.c |  10 +-
 lib/eal/arm/include/rte_pause_64.h       | 202 ++++++++++++++---------
 lib/eal/include/generic/rte_mcslock.h    |   5 +-
 lib/eal/include/generic/rte_pause.h      |  29 ++++
 lib/eal/include/generic/rte_pflock.h     |   4 +-
 6 files changed, 166 insertions(+), 93 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v9 1/5] eal: add a new generic helper for wait scheme
  2021-11-01  6:00 ` [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme Feifei Wang
@ 2021-11-01  6:00   ` Feifei Wang
  2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 2/5] pflock: use wait until scheme for read pflock Feifei Wang
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-11-01  6:00 UTC (permalink / raw)
  To: Ruifeng Wang
  Cc: dev, nd, jerinjacobk, stephen, thomas, david.marchand,
	Feifei Wang, Konstantin Ananyev, Jerin Jacob

Add a new generic helper which is a macro for wait scheme.

Furthermore, to prevent compilation warning in arm:
----------------------------------------------
'warning: implicit declaration of function ...'
----------------------------------------------
Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
And add ‘__RTE_ARM’ for these macros to fix the namespace.
This is because original macros are undefine at the end of the file.
If new macro 'rte_wait_event' calls them in other files, they will be
seen as 'not defined'.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Jerin Jacob <jerinj@marvell.com>
---
 lib/eal/arm/include/rte_pause_64.h  | 202 +++++++++++++++++-----------
 lib/eal/include/generic/rte_pause.h |  29 ++++
 2 files changed, 155 insertions(+), 76 deletions(-)

diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index e87d10b8cc..0ca03c6130 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -26,47 +26,120 @@ static inline void rte_pause(void)
 #ifdef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
 
 /* Send an event to quit WFE. */
-#define __SEVL() { asm volatile("sevl" : : : "memory"); }
+#define __RTE_ARM_SEVL() { asm volatile("sevl" : : : "memory"); }
 
 /* Put processor into low power WFE(Wait For Event) state. */
-#define __WFE() { asm volatile("wfe" : : : "memory"); }
+#define __RTE_ARM_WFE() { asm volatile("wfe" : : : "memory"); }
 
-static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
-{
-	uint16_t value;
-
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 16-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_16(src, dst, memorder) {               \
+/*
+ * Atomic exclusive load from addr, it returns the 16-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_16(src, dst, memorder) {       \
 	if (memorder == __ATOMIC_RELAXED) {               \
 		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
 			: [tmp] "=&r" (dst)               \
-			: [addr] "r"(src)                 \
+			: [addr] "r" (src)                \
 			: "memory");                      \
 	} else {                                          \
 		asm volatile("ldaxrh %w[tmp], [%x[addr]]" \
 			: [tmp] "=&r" (dst)               \
-			: [addr] "r"(src)                 \
+			: [addr] "r" (src)                \
 			: "memory");                      \
 	} }
 
-	__LOAD_EXC_16(addr, value, memorder)
+/*
+ * Atomic exclusive load from addr, it returns the 32-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_32(src, dst, memorder) {      \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r" (src)               \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r" (src)               \
+			: "memory");                     \
+	} }
+
+/*
+ * Atomic exclusive load from addr, it returns the 64-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_64(src, dst, memorder) {      \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r" (src)               \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r" (src)               \
+			: "memory");                     \
+	} }
+
+/*
+ * Atomic exclusive load from addr, it returns the 128-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_128(src, dst, memorder) {                    \
+	volatile rte_int128_t *dst_128 = (volatile rte_int128_t *)&dst; \
+	if (memorder == __ATOMIC_RELAXED) {                             \
+		asm volatile("ldxp %x[tmp0], %x[tmp1], [%x[addr]]"      \
+			: [tmp0] "=&r" (dst_128->val[0]),               \
+			  [tmp1] "=&r" (dst_128->val[1])                \
+			: [addr] "r" (src)                              \
+			: "memory");                                    \
+	} else {                                                        \
+		asm volatile("ldaxp %x[tmp0], %x[tmp1], [%x[addr]]"     \
+			: [tmp0] "=&r" (dst_128->val[0]),               \
+			  [tmp1] "=&r" (dst_128->val[1])                \
+			: [addr] "r" (src)                              \
+			: "memory");                                    \
+	} }                                                             \
+
+#define __RTE_ARM_LOAD_EXC(src, dst, memorder, size) {          \
+	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64 \
+		&& size != 128);                                \
+	if (size == 16)                                         \
+		__RTE_ARM_LOAD_EXC_16(src, dst, memorder)       \
+	else if (size == 32)                                    \
+		__RTE_ARM_LOAD_EXC_32(src, dst, memorder)       \
+	else if (size == 64)                                    \
+		__RTE_ARM_LOAD_EXC_64(src, dst, memorder)       \
+	else if (size == 128)                                   \
+		__RTE_ARM_LOAD_EXC_128(src, dst, memorder)      \
+}
+
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+		int memorder)
+{
+	uint16_t value;
+
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
+		memorder != __ATOMIC_RELAXED);
+
+	__RTE_ARM_LOAD_EXC_16(addr, value, memorder)
 	if (value != expected) {
-		__SEVL()
+		__RTE_ARM_SEVL()
 		do {
-			__WFE()
-			__LOAD_EXC_16(addr, value, memorder)
+			__RTE_ARM_WFE()
+			__RTE_ARM_LOAD_EXC_16(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_16
 }
 
 static __rte_always_inline void
@@ -75,36 +148,17 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 {
 	uint32_t value;
 
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 32-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_32(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
+		memorder != __ATOMIC_RELAXED);
 
-	__LOAD_EXC_32(addr, value, memorder)
+	__RTE_ARM_LOAD_EXC_32(addr, value, memorder)
 	if (value != expected) {
-		__SEVL()
+		__RTE_ARM_SEVL()
 		do {
-			__WFE()
-			__LOAD_EXC_32(addr, value, memorder)
+			__RTE_ARM_WFE()
+			__RTE_ARM_LOAD_EXC_32(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_32
 }
 
 static __rte_always_inline void
@@ -113,40 +167,36 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 {
 	uint64_t value;
 
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 64-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_64(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
+		memorder != __ATOMIC_RELAXED);
 
-	__LOAD_EXC_64(addr, value, memorder)
+	__RTE_ARM_LOAD_EXC_64(addr, value, memorder)
 	if (value != expected) {
-		__SEVL()
+		__RTE_ARM_SEVL()
 		do {
-			__WFE()
-			__LOAD_EXC_64(addr, value, memorder)
+			__RTE_ARM_WFE()
+			__RTE_ARM_LOAD_EXC_64(addr, value, memorder)
 		} while (value != expected);
 	}
 }
-#undef __LOAD_EXC_64
 
-#undef __SEVL
-#undef __WFE
+#define RTE_WAIT_UNTIL_MASKED(addr, mask, cond, expected, memorder)       \
+do {                                                                      \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                  \
+		memorder != __ATOMIC_RELAXED);                            \
+	const uint32_t size = sizeof(*(addr)) << 3;                       \
+	typeof(*(addr)) expected_value = (expected);                      \
+	typeof(*(addr)) value;                                            \
+	__RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \
+	if (!((value & (mask)) cond expected_value)) {                    \
+		__RTE_ARM_SEVL()                                          \
+		do {                                                      \
+			__RTE_ARM_WFE()                                   \
+			__RTE_ARM_LOAD_EXC((addr), value, memorder, size) \
+		} while (!((value & (mask)) cond expected_value));        \
+	}                                                                 \
+} while (0)
 
 #endif
 
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index 668ee4a184..5894a0ad94 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -111,6 +111,35 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 	while (__atomic_load_n(addr, memorder) != expected)
 		rte_pause();
 }
+
+/*
+ * Wait until *addr & mask makes the condition true. With a relaxed memory
+ * ordering model, the loads around this helper can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest.
+ * @param cond
+ *  A symbol representing the condition.
+ * @param expected
+ *  An expected value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define RTE_WAIT_UNTIL_MASKED(addr, mask, cond, expected, memorder)  \
+do {                                                                 \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));           \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&             \
+		memorder != __ATOMIC_RELAXED);                       \
+	typeof(*(addr)) expected_value = (expected);                 \
+	while (!((__atomic_load_n((addr), (memorder)) & (mask)) cond \
+		expected_value))                                     \
+		rte_pause();                                         \
+} while (0)
 #endif
 
 #endif /* _RTE_PAUSE_H_ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v9 2/5] pflock: use wait until scheme for read pflock
  2021-11-01  6:00 ` [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme Feifei Wang
  2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 1/5] eal: add a new generic " Feifei Wang
@ 2021-11-01  6:00   ` Feifei Wang
  2021-11-03 14:46     ` David Marchand
  2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 3/5] mcslock: use wait until scheme for mcslock Feifei Wang
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-11-01  6:00 UTC (permalink / raw)
  Cc: dev, nd, jerinjacobk, stephen, thomas, david.marchand,
	Feifei Wang, Ruifeng Wang, Jerin Jacob

Instead of polling for read pflock update, use wait until scheme for
this case.

Jira: ENTNET-2903
Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Acked-by: Jerin Jacob <jerinj@marvell.com>
---
 lib/eal/include/generic/rte_pflock.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/eal/include/generic/rte_pflock.h b/lib/eal/include/generic/rte_pflock.h
index e57c179ef2..5d2864ee8e 100644
--- a/lib/eal/include/generic/rte_pflock.h
+++ b/lib/eal/include/generic/rte_pflock.h
@@ -121,9 +121,7 @@ rte_pflock_read_lock(rte_pflock_t *pf)
 		return;
 
 	/* Wait for current write phase to complete. */
-	while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE)
-		& RTE_PFLOCK_WBITS) == w)
-		rte_pause();
+	RTE_WAIT_UNTIL_MASKED(&pf->rd.in, RTE_PFLOCK_WBITS, !=, w, __ATOMIC_ACQUIRE);
 }
 
 /**
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v9 3/5] mcslock: use wait until scheme for mcslock
  2021-11-01  6:00 ` [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme Feifei Wang
  2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 1/5] eal: add a new generic " Feifei Wang
  2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 2/5] pflock: use wait until scheme for read pflock Feifei Wang
@ 2021-11-01  6:00   ` Feifei Wang
  2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 4/5] bpf: use wait until scheme for Rx/Tx iteration Feifei Wang
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-11-01  6:00 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: dev, nd, jerinjacobk, stephen, thomas, david.marchand,
	Feifei Wang, Ruifeng Wang, Jerin Jacob

Instead of polling for mcslock to be updated, use wait until scheme
for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Acked-by: Jerin Jacob <jerinj@marvell.com>
---
 lib/eal/include/generic/rte_mcslock.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/eal/include/generic/rte_mcslock.h b/lib/eal/include/generic/rte_mcslock.h
index 34f33c64a5..753836d23c 100644
--- a/lib/eal/include/generic/rte_mcslock.h
+++ b/lib/eal/include/generic/rte_mcslock.h
@@ -116,8 +116,9 @@ rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
 		/* More nodes added to the queue by other CPUs.
 		 * Wait until the next pointer is set.
 		 */
-		while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)
-			rte_pause();
+		uintptr_t *next;
+		next = (uintptr_t *)&me->next;
+		RTE_WAIT_UNTIL_MASKED(next, UINTPTR_MAX, !=, 0, __ATOMIC_RELAXED);
 	}
 
 	/* Pass lock to next waiter. */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v9 4/5] bpf: use wait until scheme for Rx/Tx iteration
  2021-11-01  6:00 ` [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme Feifei Wang
                     ` (2 preceding siblings ...)
  2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 3/5] mcslock: use wait until scheme for mcslock Feifei Wang
@ 2021-11-01  6:00   ` Feifei Wang
  2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 5/5] distributor: use wait until scheme Feifei Wang
  2021-11-03 14:55   ` [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme David Marchand
  5 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-11-01  6:00 UTC (permalink / raw)
  To: Konstantin Ananyev
  Cc: dev, nd, jerinjacobk, stephen, thomas, david.marchand,
	Feifei Wang, Ruifeng Wang

Instead of polling for cbi->use to be updated, use wait until scheme.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
 lib/bpf/bpf_pkt.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
index 6e8248f0d6..000b82d935 100644
--- a/lib/bpf/bpf_pkt.c
+++ b/lib/bpf/bpf_pkt.c
@@ -113,7 +113,7 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
 static void
 bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
 {
-	uint32_t nuse, puse;
+	uint32_t puse;
 
 	/* make sure all previous loads and stores are completed */
 	rte_smp_mb();
@@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
 
 	/* in use, busy wait till current RX/TX iteration is finished */
 	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
-		do {
-			rte_pause();
-			rte_compiler_barrier();
-			nuse = cbi->use;
-		} while (nuse == puse);
+		RTE_WAIT_UNTIL_MASKED((uint32_t *)(uintptr_t)&cbi->use, UINT32_MAX,
+			!=, puse, __ATOMIC_RELAXED);
 	}
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] [PATCH v9 5/5] distributor: use wait until scheme
  2021-11-01  6:00 ` [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme Feifei Wang
                     ` (3 preceding siblings ...)
  2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 4/5] bpf: use wait until scheme for Rx/Tx iteration Feifei Wang
@ 2021-11-01  6:00   ` Feifei Wang
  2021-11-01 16:05     ` Pattan, Reshma
  2021-11-03 14:55   ` [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme David Marchand
  5 siblings, 1 reply; 113+ messages in thread
From: Feifei Wang @ 2021-11-01  6:00 UTC (permalink / raw)
  To: David Hunt
  Cc: dev, nd, jerinjacobk, stephen, thomas, david.marchand,
	Feifei Wang, Ruifeng Wang, Jerin Jacob

Instead of polling for bufptr64 to be updated, use
wait until scheme for this case.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Acked-by: Jerin Jacob <jerinj@marvell.com>
---
 lib/distributor/rte_distributor_single.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index f4725b1d0b..b653620688 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -33,9 +33,8 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_GET_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	RTE_WAIT_UNTIL_MASKED(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+		==, 0, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on GET_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
@@ -74,9 +73,8 @@ rte_distributor_return_pkt_single(struct rte_distributor_single *d,
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_RETURN_BUF;
-	while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
-			& RTE_DISTRIB_FLAGS_MASK))
-		rte_pause();
+	RTE_WAIT_UNTIL_MASKED(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+		==, 0, __ATOMIC_RELAXED);
 
 	/* Sync with distributor on RETURN_BUF flag. */
 	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v8 5/5] lib/distributor: use wait event scheme
  2021-10-29 13:58     ` Jerin Jacob
  2021-10-31  8:38       ` David Marchand
@ 2021-11-01 12:44       ` David Hunt
  1 sibling, 0 replies; 113+ messages in thread
From: David Hunt @ 2021-11-01 12:44 UTC (permalink / raw)
  To: Jerin Jacob, Feifei Wang
  Cc: dpdk-dev, nd, Stephen Hemminger, David Marchand, Thomas Monjalon,
	Mattias Rönnblom, Ananyev, Konstantin, Ruifeng Wang


On 29/10/2021 2:58 PM, Jerin Jacob wrote:
> On Fri, Oct 29, 2021 at 1:50 PM Feifei Wang <feifei.wang2@arm.com> wrote:
>> Instead of polling for bufptr64 to be updated, use
>> wait event for this case.
>>
>> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>
> With the change in the subject as distributor: ...
> Probably can be fixed in merge time.
>
> Acked-by: Jerin Jacob <jerinj@marvell.com>


Acked-by David Hunt <david.hunt@intel.com>


>> ---
>>   lib/distributor/rte_distributor_single.c | 10 ++++------
>>   1 file changed, 4 insertions(+), 6 deletions(-)
>>
>> diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
>> index f4725b1d0b..d52b24a453 100644
>> --- a/lib/distributor/rte_distributor_single.c
>> +++ b/lib/distributor/rte_distributor_single.c
>> @@ -33,9 +33,8 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d,
>>          union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
>>          int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
>>                          | RTE_DISTRIB_GET_BUF;
>> -       while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
>> -                       & RTE_DISTRIB_FLAGS_MASK))
>> -               rte_pause();
>> +       rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
>> +                       !=, 0, __ATOMIC_RELAXED);
>>
>>          /* Sync with distributor on GET_BUF flag. */
>>          __atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
>> @@ -74,9 +73,8 @@ rte_distributor_return_pkt_single(struct rte_distributor_single *d,
>>          union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
>>          uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
>>                          | RTE_DISTRIB_RETURN_BUF;
>> -       while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
>> -                       & RTE_DISTRIB_FLAGS_MASK))
>> -               rte_pause();
>> +       rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
>> +                       !=, 0, __ATOMIC_RELAXED);
>>
>>          /* Sync with distributor on RETURN_BUF flag. */
>>          __atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
>> --
>> 2.25.1
>>

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v9 5/5] distributor: use wait until scheme
  2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 5/5] distributor: use wait until scheme Feifei Wang
@ 2021-11-01 16:05     ` Pattan, Reshma
  2021-11-02  2:00       ` [dpdk-dev] 回复: " Feifei Wang
  0 siblings, 1 reply; 113+ messages in thread
From: Pattan, Reshma @ 2021-11-01 16:05 UTC (permalink / raw)
  To: Feifei Wang, Hunt, David
  Cc: dev, nd, jerinjacobk, stephen, thomas, david.marchand,
	Ruifeng Wang, Jerin Jacob



> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Feifei Wang
> ---
>  lib/distributor/rte_distributor_single.c | 10 ++++------

Hi ,

rte_distributor.c  file also has couple of places where similar change can be done, just wondering was that missed ?

Thanks,
Reshma


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复:  [PATCH v9 5/5] distributor: use wait until scheme
  2021-11-01 16:05     ` Pattan, Reshma
@ 2021-11-02  2:00       ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-11-02  2:00 UTC (permalink / raw)
  To: Pattan, Reshma, Hunt, David
  Cc: dev, nd, jerinjacobk, stephen, thomas, david.marchand,
	Ruifeng Wang, jerinj, nd



> -----邮件原件-----
> 发件人: dev <dev-bounces@dpdk.org> 代表 Pattan, Reshma
> 发送时间: Tuesday, November 2, 2021 12:05 AM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Hunt, David
> <david.hunt@intel.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; jerinjacobk@gmail.com;
> stephen@networkplumber.org; thomas@monjalon.net;
> david.marchand@redhat.com; Ruifeng Wang <Ruifeng.Wang@arm.com>;
> jerinj@marvell.com
> 主题: Re: [dpdk-dev] [PATCH v9 5/5] distributor: use wait until scheme
> 
> 
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Feifei Wang
> > ---
> >  lib/distributor/rte_distributor_single.c | 10 ++++------
> 
> Hi ,
> 
> rte_distributor.c  file also has couple of places where similar change can be
> done, just wondering was that missed ?
Thanks for your reminding.
I just checked rte_distributor.c and there are 6 places which have rte_pause.

However, I think these places cannot apply the new helper due to that they need
to do other actions in the loop besides waiting. 
Our new helper can only wait for the change of this variable, not do other actions.

Best Regards
Feifei 
> 
> Thanks,
> Reshma


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v9 2/5] pflock: use wait until scheme for read pflock
  2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 2/5] pflock: use wait until scheme for read pflock Feifei Wang
@ 2021-11-03 14:46     ` David Marchand
  2021-11-04  1:24       ` [dpdk-dev] 回复: " Feifei Wang
  0 siblings, 1 reply; 113+ messages in thread
From: David Marchand @ 2021-11-03 14:46 UTC (permalink / raw)
  To: Feifei Wang
  Cc: dev, nd, Jerin Jacob, Stephen Hemminger, Thomas Monjalon,
	Ruifeng Wang, Jerin Jacob

On Mon, Nov 1, 2021 at 7:00 AM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Instead of polling for read pflock update, use wait until scheme for
> this case.
>
> Jira: ENTNET-2903

I dropped what looks like an internal tag.

> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Acked-by: Jerin Jacob <jerinj@marvell.com>


-- 
David Marchand


^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme
  2021-11-01  6:00 ` [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme Feifei Wang
                     ` (4 preceding siblings ...)
  2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 5/5] distributor: use wait until scheme Feifei Wang
@ 2021-11-03 14:55   ` David Marchand
  5 siblings, 0 replies; 113+ messages in thread
From: David Marchand @ 2021-11-03 14:55 UTC (permalink / raw)
  To: Feifei Wang; +Cc: dev, nd, Jerin Jacob, Stephen Hemminger, Thomas Monjalon

On Mon, Nov 1, 2021 at 7:00 AM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Add new helper for wait scheme, and apply this new definitions into
> lib to replace rte_pause.
>
> v2:
> 1. use macro to create new wait scheme (Stephen)
>
> v3:
> 1. delete unnecessary bug fix in bpf (Konstantin)
>
> v4:
> 1. put size into the macro body (Konstantin)
> 2. replace assert with BUILD_BUG_ON (Stephen)
> 3. delete unnecessary compiler barrier for bpf (Konstantin)
>
> v5:
> 1. 'size' is not the parameter (Konstantin)
> 2. put () around macro parameters (Konstantin)
> 3. fix some original typo issue (Jerin)
> 4. swap 'rte_wait_event' parameter location (Jerin)
> 5. add new macro '__LOAD_EXC'
> 6. delete 'undef' to prevent compilation warning
>
> v6:
> 1. fix patch style check warning
> 2. delete 'bpf' patch due to 'const' limit
>
> v7:
> 1. add __RTE_ARM to to fix the namespace (Jerin)
> 2. use 'uintptr_t *' in mcslock for different
> architecture(32/64) (Mattias)
> 3. add a new pointer 'next' in mcslock to fix
> compiler issue
> 4. add bpf patch and use 'uintptr_t' to fix const
> discard warning (Konstantin)
>
> v8:
> 1. add __LOAD_EXC_128 (Jerin)
> 2. use BUILD_BUG_ON to replace assert (Jerin)
>
> v9:
> 1. rename 'wait_event' as 'wait_until' (David)
> 2. change commit message (David)
>
> Feifei Wang (5):
>   eal: add a new generic helper for wait scheme
>   pflock: use wait until scheme for read pflock
>   mcslock: use wait until scheme for mcslock
>   bpf: use wait until scheme for Rx/Tx iteration
>   distributor: use wait until scheme

Series applied, thanks.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 113+ messages in thread

* [dpdk-dev] 回复: [PATCH v9 2/5] pflock: use wait until scheme for read pflock
  2021-11-03 14:46     ` David Marchand
@ 2021-11-04  1:24       ` Feifei Wang
  0 siblings, 0 replies; 113+ messages in thread
From: Feifei Wang @ 2021-11-04  1:24 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, nd, Jerin Jacob, Stephen Hemminger, thomas, Ruifeng Wang,
	jerinj, nd



> -----邮件原件-----
> 发件人: David Marchand <david.marchand@redhat.com>
> 发送时间: Wednesday, November 3, 2021 10:47 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: dev <dev@dpdk.org>; nd <nd@arm.com>; Jerin Jacob
> <jerinjacobk@gmail.com>; Stephen Hemminger
> <stephen@networkplumber.org>; thomas@monjalon.net; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; jerinj@marvell.com
> 主题: Re: [PATCH v9 2/5] pflock: use wait until scheme for read pflock
> 
> On Mon, Nov 1, 2021 at 7:00 AM Feifei Wang <feifei.wang2@arm.com>
> wrote:
> >
> > Instead of polling for read pflock update, use wait until scheme for
> > this case.
> >
> > Jira: ENTNET-2903
> 
> I dropped what looks like an internal tag.
Thanks very much. It's my fault to forget to delete this~
> 
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > Acked-by: Jerin Jacob <jerinj@marvell.com>
> 
> 
> --
> David Marchand


^ permalink raw reply	[flat|nested] 113+ messages in thread

end of thread, other threads:[~2021-11-04  1:24 UTC | newest]

Thread overview: 113+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-02  5:32 [dpdk-dev] [RFC PATCH v1 0/5] add new API for wait until scheme Feifei Wang
2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 1/5] eal: " Feifei Wang
2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 2/5] eal: use wait until scheme for read pflock Feifei Wang
2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 3/5] eal: use wait until scheme for mcslock Feifei Wang
2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 4/5] lib/bpf: use wait until scheme for Rx/Tx iteration Feifei Wang
2021-09-02  5:32 ` [dpdk-dev] [RFC PATCH v1 5/5] lib/distributor: use wait until scheme Feifei Wang
2021-09-02 15:22 ` [dpdk-dev] [RFC PATCH v1 0/5] add new API for " Stephen Hemminger
2021-09-03  7:02   ` [dpdk-dev] 回复: " Feifei Wang
2021-09-23  9:58 ` [dpdk-dev] [RFC PATCH v2 0/5] add new definitions for wait scheme Feifei Wang
2021-09-23  9:58   ` [dpdk-dev] [RFC PATCH v2 1/5] eal: " Feifei Wang
2021-09-23  9:58   ` [dpdk-dev] [RFC PATCH v2 2/5] eal: use wait event for read pflock Feifei Wang
2021-09-23  9:59   ` [dpdk-dev] [RFC PATCH v2 3/5] eal: use wait event scheme for mcslock Feifei Wang
2021-09-23  9:59   ` [dpdk-dev] [RFC PATCH v2 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
2021-09-24 18:07     ` Ananyev, Konstantin
2021-09-26  2:19       ` [dpdk-dev] 回复: " Feifei Wang
2021-09-23  9:59   ` [dpdk-dev] [RFC PATCH v2 5/5] lib/distributor: use wait event scheme Feifei Wang
2021-09-26  6:32 ` [dpdk-dev] [RFC PATCH v3 0/5] add new definitions for wait scheme Feifei Wang
2021-09-26  6:32   ` [dpdk-dev] [RFC PATCH v3 1/5] eal: " Feifei Wang
2021-10-07 16:18     ` Ananyev, Konstantin
2021-10-12  8:09       ` [dpdk-dev] 回复: " Feifei Wang
2021-10-13 15:03         ` [dpdk-dev] " Ananyev, Konstantin
2021-10-13 17:00           ` Stephen Hemminger
2021-10-14  3:14             ` [dpdk-dev] 回复: " Feifei Wang
2021-10-14  3:08           ` Feifei Wang
2021-09-26  6:32   ` [dpdk-dev] [RFC PATCH v3 2/5] eal: use wait event for read pflock Feifei Wang
2021-09-26  6:33   ` [dpdk-dev] [RFC PATCH v3 3/5] eal: use wait event scheme for mcslock Feifei Wang
2021-09-26  6:33   ` [dpdk-dev] [RFC PATCH v3 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
2021-10-07 15:50     ` Ananyev, Konstantin
2021-10-07 17:40       ` Ananyev, Konstantin
2021-10-20  6:20         ` [dpdk-dev] 回复: " Feifei Wang
2021-09-26  6:33   ` [dpdk-dev] [RFC PATCH v3 5/5] lib/distributor: use wait event scheme Feifei Wang
2021-10-20  8:45   ` [dpdk-dev] [PATCH v4 0/5] add new definitions for wait scheme Feifei Wang
2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 1/5] eal: " Feifei Wang
2021-10-21 16:24       ` Ananyev, Konstantin
2021-10-25  9:20         ` [dpdk-dev] 回复: " Feifei Wang
2021-10-25 14:28           ` [dpdk-dev] " Ananyev, Konstantin
2021-10-26  1:08             ` [dpdk-dev] 回复: " Feifei Wang
2021-10-22  0:10       ` [dpdk-dev] " Jerin Jacob
2021-10-25  9:30         ` [dpdk-dev] 回复: " Feifei Wang
2021-10-25  9:43           ` [dpdk-dev] " Jerin Jacob
2021-10-26  1:11             ` [dpdk-dev] 回复: " Feifei Wang
2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 2/5] eal: use wait event for read pflock Feifei Wang
2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 3/5] eal: use wait event scheme for mcslock Feifei Wang
2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
2021-10-20  8:45     ` [dpdk-dev] [PATCH v4 5/5] lib/distributor: use wait event scheme Feifei Wang
2021-10-26  8:01 ` [dpdk-dev] [PATCH v5 0/5] add new definitions for wait scheme Feifei Wang
2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 1/5] eal: " Feifei Wang
2021-10-26  8:08     ` [dpdk-dev] 回复: " Feifei Wang
2021-10-26  9:46       ` [dpdk-dev] " Ananyev, Konstantin
2021-10-26  9:59         ` Ananyev, Konstantin
2021-10-27  6:56           ` [dpdk-dev] 回复: " Feifei Wang
2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 2/5] eal: use wait event for read pflock Feifei Wang
2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 3/5] eal: use wait event scheme for mcslock Feifei Wang
2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
2021-10-26  8:18     ` [dpdk-dev] 回复: " Feifei Wang
2021-10-26  9:43       ` [dpdk-dev] " Ananyev, Konstantin
2021-10-26 12:56         ` Ananyev, Konstantin
2021-10-27  7:04           ` [dpdk-dev] 回复: " Feifei Wang
2021-10-27  7:31             ` Feifei Wang
2021-10-27 14:47             ` [dpdk-dev] " Ananyev, Konstantin
2021-10-28  6:24               ` [dpdk-dev] 回复: " Feifei Wang
2021-10-26  8:02   ` [dpdk-dev] [PATCH v5 5/5] lib/distributor: use wait event scheme Feifei Wang
2021-10-27  8:10 ` [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme Feifei Wang
2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 1/4] eal: " Feifei Wang
2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 2/4] eal: use wait event for read pflock Feifei Wang
2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 3/4] eal: use wait event scheme for mcslock Feifei Wang
2021-10-27 11:16     ` Mattias Rönnblom
2021-10-28  6:32       ` [dpdk-dev] 回复: " Feifei Wang
2021-10-27  8:10   ` [dpdk-dev] [PATCH v6 4/4] lib/distributor: use wait event scheme Feifei Wang
2021-10-27 10:57   ` [dpdk-dev] [PATCH v6 0/4] add new definitions for wait scheme Jerin Jacob
2021-10-28  6:33     ` [dpdk-dev] 回复: " Feifei Wang
2021-10-28  6:56 ` [dpdk-dev] [PATCH v7 0/5] " Feifei Wang
2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 1/5] eal: " Feifei Wang
2021-10-28  7:15     ` Jerin Jacob
2021-10-28  7:40       ` [dpdk-dev] 回复: " Feifei Wang
2021-10-28  7:51         ` [dpdk-dev] " Jerin Jacob
2021-10-28  9:27           ` [dpdk-dev] 回复: " Feifei Wang
2021-10-28 13:14     ` [dpdk-dev] " Ananyev, Konstantin
2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 2/5] eal: use wait event for read pflock Feifei Wang
2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 3/5] eal: use wait event scheme for mcslock Feifei Wang
2021-10-28  7:02     ` Jerin Jacob
2021-10-28  7:14       ` [dpdk-dev] 回复: " Feifei Wang
2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
2021-10-28 13:15     ` Ananyev, Konstantin
2021-10-28  6:56   ` [dpdk-dev] [PATCH v7 5/5] lib/distributor: use wait event scheme Feifei Wang
2021-10-29  8:20 ` [dpdk-dev] [PATCH v8 0/5] add new definitions for wait scheme Feifei Wang
2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 1/5] eal: " Feifei Wang
2021-10-29 13:54     ` Jerin Jacob
2021-10-31  8:38     ` David Marchand
2021-11-01  2:29       ` [dpdk-dev] 回复: " Feifei Wang
2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 2/5] eal: use wait event for read pflock Feifei Wang
2021-10-29 13:55     ` Jerin Jacob
2021-10-31  8:37     ` David Marchand
2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 3/5] eal: use wait event scheme for mcslock Feifei Wang
2021-10-29 13:55     ` Jerin Jacob
2021-10-31  8:37     ` David Marchand
2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration Feifei Wang
2021-10-31  8:37     ` David Marchand
2021-10-29  8:20   ` [dpdk-dev] [PATCH v8 5/5] lib/distributor: use wait event scheme Feifei Wang
2021-10-29 13:58     ` Jerin Jacob
2021-10-31  8:38       ` David Marchand
2021-11-01 12:44       ` David Hunt
2021-11-01  6:00 ` [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme Feifei Wang
2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 1/5] eal: add a new generic " Feifei Wang
2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 2/5] pflock: use wait until scheme for read pflock Feifei Wang
2021-11-03 14:46     ` David Marchand
2021-11-04  1:24       ` [dpdk-dev] 回复: " Feifei Wang
2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 3/5] mcslock: use wait until scheme for mcslock Feifei Wang
2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 4/5] bpf: use wait until scheme for Rx/Tx iteration Feifei Wang
2021-11-01  6:00   ` [dpdk-dev] [PATCH v9 5/5] distributor: use wait until scheme Feifei Wang
2021-11-01 16:05     ` Pattan, Reshma
2021-11-02  2:00       ` [dpdk-dev] 回复: " Feifei Wang
2021-11-03 14:55   ` [dpdk-dev] [PATCH v9 0/5] add new helper for wait scheme David Marchand

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ http://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git