DPDK patches and discussions
 help / color / mirror / Atom feed
From: Phil Yang <phil.yang@arm.com>
To: jgrajcia@cisco.com, dev@dpdk.org
Cc: Honnappa.Nagarahalli@arm.com, Ruifeng.Wang@arm.com, nd@arm.com
Subject: [dpdk-dev] [PATCH] net/memif: relax barrier for zero copy path
Date: Fri, 11 Sep 2020 13:38:19 +0800
Message-ID: <1599802699-20876-1-git-send-email-phil.yang@arm.com> (raw)

Using 'rte_mb' to synchronize the shared ring head/tail between producer
and consumer will stall the pipeline and damage performance on the weak
memory model platforms, such like aarch64.

Relax the expensive barrier with c11 atomic with explicit memory
ordering can improve 3.6% performance on throughput.

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 drivers/net/memif/rte_eth_memif.c | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/drivers/net/memif/rte_eth_memif.c b/drivers/net/memif/rte_eth_memif.c
index c1c7e9f..a19c0f3 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -253,7 +253,12 @@ memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_q
 	memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
 
 	/* FIXME: improve performance */
-	while (mq->last_tail != ring->tail) {
+	/* The ring->tail acts as a guard variable between Tx and Rx
+	 * threads, so using load-acquire pairs with store-release
+	 * to synchronize it between threads.
+	 */
+	while (mq->last_tail != __atomic_load_n(&ring->tail,
+						__ATOMIC_ACQUIRE)) {
 		RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]);
 		/* Decrement refcnt and free mbuf. (current segment) */
 		rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1);
@@ -455,7 +460,11 @@ eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	mask = ring_size - 1;
 
 	cur_slot = mq->last_tail;
-	last_slot = ring->tail;
+	/* The ring->tail acts as a guard variable between Tx and Rx
+	 * threads, so using load-acquire pairs with store-release
+	 * to synchronize it between threads.
+	 */
+	last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
 	if (cur_slot == last_slot)
 		goto refill;
 	n_slots = last_slot - cur_slot;
@@ -501,7 +510,11 @@ eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 /* Supply master with new buffers */
 refill:
-	head = ring->head;
+	/* The ring->head acts as a guard variable between Tx and Rx
+	 * threads, so using load-acquire pairs with store-release
+	 * to synchronize it between threads.
+	 */
+	head = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
 	n_slots = ring_size - head + mq->last_tail;
 
 	if (n_slots < 32)
@@ -526,8 +539,7 @@ eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			(uint8_t *)proc_private->regions[d0->region]->addr;
 	}
 no_free_mbufs:
-	rte_mb();
-	ring->head = head;
+	__atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
 
 	mq->n_pkts += n_rx_pkts;
 
@@ -723,8 +735,12 @@ eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	memif_free_stored_mbufs(proc_private, mq);
 
 	/* ring type always MEMIF_RING_S2M */
-	slot = ring->head;
-	n_free = ring_size - ring->head + mq->last_tail;
+	/* The ring->head acts as a guard variable between Tx and Rx
+	 * threads, so using load-acquire pairs with store-release
+	 * to synchronize it between threads.
+	 */
+	slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
+	n_free = ring_size - slot + mq->last_tail;
 
 	int used_slots;
 
@@ -778,12 +794,11 @@ eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	}
 
 no_free_slots:
-	rte_mb();
 	/* update ring pointers */
 	if (type == MEMIF_RING_S2M)
-		ring->head = slot;
+		__atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
 	else
-		ring->tail = slot;
+		__atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE);
 
 	/* Send interrupt, if enabled. */
 	if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
-- 
2.7.4


             reply	other threads:[~2020-09-11  5:38 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-11  5:38 Phil Yang [this message]
2020-09-18 11:58 ` Ferruh Yigit
2020-09-18 22:49 ` Honnappa Nagarahalli
2020-09-21  9:03   ` Jakub Grajciar -X (jgrajcia - PANTHEON TECH SRO at Cisco)
2020-09-21 10:22     ` Phil Yang
2020-09-21 12:21       ` Jakub Grajciar -X (jgrajcia - PANTHEON TECH SRO at Cisco)
2020-09-21 13:27         ` Ferruh Yigit
2020-09-21 19:28     ` Honnappa Nagarahalli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1599802699-20876-1-git-send-email-phil.yang@arm.com \
    --to=phil.yang@arm.com \
    --cc=Honnappa.Nagarahalli@arm.com \
    --cc=Ruifeng.Wang@arm.com \
    --cc=dev@dpdk.org \
    --cc=jgrajcia@cisco.com \
    --cc=nd@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git