From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gavin.hu@arm.com>
Received: from foss.arm.com (foss.arm.com [217.140.101.70])
 by dpdk.org (Postfix) with ESMTP id 50E254C9F
 for <dev@dpdk.org>; Thu, 27 Dec 2018 05:14:24 +0100 (CET)
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.72.51.249])
 by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id B677E15BF;
 Wed, 26 Dec 2018 20:14:23 -0800 (PST)
Received: from net-debian.shanghai.arm.com (net-debian.shanghai.arm.com
 [10.169.36.53])
 by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id EE89F3F5AF;
 Wed, 26 Dec 2018 20:14:21 -0800 (PST)
From: Gavin Hu <gavin.hu@arm.com>
To: dev@dpdk.org
Cc: thomas@monjalon.net, jerinj@marvell.com, hemant.agrawal@nxp.com,
 bruce.richardson@intel.com, chaozhu@linux.vnet.ibm.com,
 Honnappa.Nagarahalli@arm.com, stephen@networkplumber.org,
 david.marchand@redhat.com, nd@arm.com, Joyce Kong <joyce.kong@arm.com>
Date: Thu, 27 Dec 2018 12:13:49 +0800
Message-Id: <20181227041349.3058-7-gavin.hu@arm.com>
X-Mailer: git-send-email 2.11.0
In-Reply-To: <20181227041349.3058-1-gavin.hu@arm.com>
References: <20181227041349.3058-1-gavin.hu@arm.com>
Subject: [dpdk-dev] [PATCH v3 6/6] spinlock: ticket based to improve fairness
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Thu, 27 Dec 2018 04:14:25 -0000

From: Joyce Kong <joyce.kong@arm.com>

The old implementation is unfair, some threads may take locks aggressively
while leaving the other threads starving for long time. As shown in the
following test, within same period of time, there are threads taking locks
much more times than the others.

The new implementation gives each waiting thread a ticket and they can take
the lock one by one, first come, first serviced, this avoids starvation for
too long time and is more predictable.

*** spinlock_autotest without this patch ***
Core [0] count = 89
Core [1] count = 84
Core [2] count = 94
...
Core [208] count = 171
Core [209] count = 152
Core [210] count = 161
Core [211] count = 187

*** spinlock_autotest with this patch ***
Core [0] count = 534
Core [1] count = 533
Core [2] count = 542
...
Core [208] count = 554
Core [209] count = 556
Core [210] count = 555
Core [211] count = 551

The overal spinlock fairness increased on thundex-2.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
---
 .../common/include/arch/ppc_64/rte_spinlock.h      |  5 ++
 .../common/include/arch/x86/rte_spinlock.h         |  6 +++
 .../common/include/generic/rte_spinlock.h          | 53 +++++++++++++---------
 3 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
index 39815d9ee..9fa904f92 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
@@ -65,6 +65,11 @@ rte_spinlock_trylock(rte_spinlock_t *sl)
 	return __sync_lock_test_and_set(&sl->locked, 1) == 0;
 }
 
+static inline int
+rte_spinlock_is_locked(rte_spinlock_t *sl)
+{
+	return sl->locked;
+}
 #endif
 
 static inline int rte_tm_supported(void)
diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
index e2e2b2643..db80fa420 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
@@ -65,6 +65,12 @@ rte_spinlock_trylock (rte_spinlock_t *sl)
 
 	return lockval == 0;
 }
+
+static inline int
+rte_spinlock_is_locked(rte_spinlock_t *sl)
+{
+	return sl->locked;
+}
 #endif
 
 extern uint8_t rte_rtm_supported;
diff --git a/lib/librte_eal/common/include/generic/rte_spinlock.h b/lib/librte_eal/common/include/generic/rte_spinlock.h
index 87ae7a4f1..607abd400 100644
--- a/lib/librte_eal/common/include/generic/rte_spinlock.h
+++ b/lib/librte_eal/common/include/generic/rte_spinlock.h
@@ -27,8 +27,12 @@
 /**
  * The rte_spinlock_t type.
  */
-typedef struct {
-	volatile int locked; /**< lock status 0 = unlocked, 1 = locked */
+typedef union {
+	volatile int locked; /* lock status 0 = unlocked, 1 = locked */
+	struct {
+		uint16_t current;
+		uint16_t next;
+	} s;
 } rte_spinlock_t;
 
 /**
@@ -45,7 +49,8 @@ typedef struct {
 static inline void
 rte_spinlock_init(rte_spinlock_t *sl)
 {
-	sl->locked = 0;
+	__atomic_store_n(&sl->s.current, 0, __ATOMIC_RELAXED);
+	__atomic_store_n(&sl->s.next, 0, __ATOMIC_RELAXED);
 }
 
 /**
@@ -61,14 +66,9 @@ rte_spinlock_lock(rte_spinlock_t *sl);
 static inline void
 rte_spinlock_lock(rte_spinlock_t *sl)
 {
-	int exp = 0;
-
-	while (!__atomic_compare_exchange_n(&sl->locked, &exp, 1, 0,
-				__ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
-		while (__atomic_load_n(&sl->locked, __ATOMIC_RELAXED))
-			rte_pause();
-		exp = 0;
-	}
+	uint16_t me = __atomic_fetch_add(&sl->s.next, 1, __ATOMIC_RELAXED);
+	while (__atomic_load_n(&sl->s.current, __ATOMIC_ACQUIRE) != me)
+		rte_pause();
 }
 #endif
 
@@ -79,13 +79,15 @@ rte_spinlock_lock(rte_spinlock_t *sl)
  *   A pointer to the spinlock.
  */
 static inline void
-rte_spinlock_unlock (rte_spinlock_t *sl);
+rte_spinlock_unlock(rte_spinlock_t *sl);
 
 #ifdef RTE_FORCE_INTRINSICS
 static inline void
-rte_spinlock_unlock (rte_spinlock_t *sl)
+rte_spinlock_unlock(rte_spinlock_t *sl)
 {
-	__atomic_store_n(&sl->locked, 0, __ATOMIC_RELEASE);
+	uint16_t i = __atomic_load_n(&sl->s.current, __ATOMIC_RELAXED);
+	i++;
+	__atomic_store_n(&sl->s.current, i, __ATOMIC_RELAXED);
 }
 #endif
 
@@ -98,16 +100,19 @@ rte_spinlock_unlock (rte_spinlock_t *sl)
  *   1 if the lock is successfully taken; 0 otherwise.
  */
 static inline int
-rte_spinlock_trylock (rte_spinlock_t *sl);
+rte_spinlock_trylock(rte_spinlock_t *sl);
 
 #ifdef RTE_FORCE_INTRINSICS
 static inline int
-rte_spinlock_trylock (rte_spinlock_t *sl)
+rte_spinlock_trylock(rte_spinlock_t *sl)
 {
-	int exp = 0;
-	return __atomic_compare_exchange_n(&sl->locked, &exp, 1,
-				0, /* disallow spurious failure */
-				__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
+	uint16_t me = __atomic_fetch_add(&sl->s.next, 1, __ATOMIC_RELAXED);
+	while (__atomic_load_n(&sl->s.current, __ATOMIC_RELAXED) != me) {
+		__atomic_sub_fetch(&sl->s.next, 1, __ATOMIC_RELAXED);
+		return 0;
+	}
+
+	return 1;
 }
 #endif
 
@@ -119,10 +124,14 @@ rte_spinlock_trylock (rte_spinlock_t *sl)
  * @return
  *   1 if the lock is currently taken; 0 otherwise.
  */
-static inline int rte_spinlock_is_locked (rte_spinlock_t *sl)
+#ifdef RTE_FORCE_INTRINSICS
+static inline int
+rte_spinlock_is_locked(rte_spinlock_t *sl)
 {
-	return __atomic_load_n(&sl->locked, __ATOMIC_ACQUIRE);
+	return (__atomic_load_n(&sl->s.current, __ATOMIC_RELAXED) !=
+			__atomic_load_n(&sl->s.next, __ATOMIC_RELAXED));
 }
+#endif
 
 /**
  * Test if hardware transactional memory (lock elision) is supported
-- 
2.11.0