From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <stephen@networkplumber.org>
Received: from mail-pa0-f52.google.com (mail-pa0-f52.google.com
 [209.85.220.52]) by dpdk.org (Postfix) with ESMTP id DD40B3975
 for <dev@dpdk.org>; Tue, 26 Aug 2014 04:04:44 +0200 (CEST)
Received: by mail-pa0-f52.google.com with SMTP id bj1so22491952pad.11
 for <dev@dpdk.org>; Mon, 25 Aug 2014 19:08:41 -0700 (PDT)
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 d=1e100.net; s=20130820;
 h=x-gm-message-state:message-id:user-agent:date:from:to:cc:subject
 :references:mime-version:content-type:content-disposition;
 bh=wLFpgG1Cb0h9ul7AhHid9yZh29SbLJ74ewzxzYyA5yM=;
 b=SmEKg2gAipY21mGEHQQc9sV4kOwb1bpDX8rhjn5YE2c/6ekguxXHzheHGX2DYX2g3T
 YUpUoIJLxNN6cw2pm9fs0zzvSoAqLkNzQAQHFQVLlPIPaTiUXENyZYekHWzEJrr76hKT
 lXaXzx1qq0FBH8BSFdrPyrRgA9J28oIWaKyTuYY3GyAObh1KbCsj3Nidy4S6vZj5qmz2
 nVX69P7nmdrZgYe66I+H9e36aq5y3P58Y+60cRTZ8mJpeLWV5VCPJzOeTnG8IChR18h3
 9BLwNV+iSXUvbE+qx0+sEZR/S94O83qljOY8sooDblpwvEj/tJUoEkxI4qewM5DL7kMb
 28wA==
X-Gm-Message-State: ALoCoQmD+v8YPXfWGSqtu0WovW+MQ3721PSGqpr/wFWes8DEjq8Ulx/w/qpJ4GdBObeVdT8UkoO6
X-Received: by 10.66.249.34 with SMTP id yr2mr9015778pac.149.1409018921871;
 Mon, 25 Aug 2014 19:08:41 -0700 (PDT)
Received: from localhost (static-50-53-65-80.bvtn.or.frontiernet.net.
 [50.53.65.80])
 by mx.google.com with ESMTPSA id ur5sm4467129pac.46.2014.08.25.19.08.40
 for <multiple recipients>
 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128);
 Mon, 25 Aug 2014 19:08:41 -0700 (PDT)
Message-Id: <20140826020839.926714726@networkplumber.org>
User-Agent: quilt/0.63-1
Date: Mon, 25 Aug 2014 19:07:48 -0700
From: Stephen Hemminger <stephen@networkplumber.org>
To: Ouyang Changchun <changchun.ouyang@intel.com>
References: <20140826020746.062748014@networkplumber.org>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Disposition: inline; filename=virtio-barrier.patch
Cc: dev@dpdk.org
Subject: [dpdk-dev] [RFC 02/10] virtio: use weak barriers
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: patches and discussions about DPDK <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Tue, 26 Aug 2014 02:04:46 -0000

The DPDK driver only has to deal with the case of running on PCI
and with SMP. In this case, the code can use the weaker barriers
instead of using hard (fence) barriers. This will help performance.
The rationale is explained in Linux kernel virtio_ring.h.

To make it clearer that this is a virtio thing and not some generic
barrier, prefix the barrier calls with virtio_.

Add missing (and needed) barrier between updating ring data
structure and notifying host.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>


---
 lib/librte_pmd_virtio/virtio_ethdev.c |    2 +-
 lib/librte_pmd_virtio/virtio_rxtx.c   |    8 +++++---
 lib/librte_pmd_virtio/virtqueue.h     |   19 ++++++++++++++-----
 3 files changed, 20 insertions(+), 9 deletions(-)

--- a/lib/librte_pmd_virtio/virtio_rxtx.c	2014-08-25 19:00:04.146518448 -0700
+++ b/lib/librte_pmd_virtio/virtio_rxtx.c	2014-08-25 19:00:04.142518425 -0700
@@ -454,7 +454,7 @@ virtio_recv_pkts(void *rx_queue, struct
 
 	nb_used = VIRTQUEUE_NUSED(rxvq);
 
-	rmb();
+	virtio_rmb();
 
 	num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
 	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
@@ -514,6 +514,7 @@ virtio_recv_pkts(void *rx_queue, struct
 	}
 
 	if (likely(nb_enqueued)) {
+		virtio_wmb();
 		if (unlikely(virtqueue_kick_prepare(rxvq))) {
 			virtqueue_notify(rxvq);
 			PMD_RX_LOG(DEBUG, "Notified\n");
@@ -545,7 +546,7 @@ virtio_recv_mergeable_pkts(void *rx_queu
 
 	nb_used = VIRTQUEUE_NUSED(rxvq);
 
-	rmb();
+	virtio_rmb();
 
 	if (nb_used == 0)
 		return 0;
@@ -694,7 +695,7 @@ virtio_xmit_pkts(void *tx_queue, struct
 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
 	nb_used = VIRTQUEUE_NUSED(txvq);
 
-	rmb();
+	virtio_rmb();
 
 	num = (uint16_t)(likely(nb_used < VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ);
 
@@ -729,6 +730,7 @@ virtio_xmit_pkts(void *tx_queue, struct
 		}
 	}
 	vq_update_avail_idx(txvq);
+	virtio_wmb();
 
 	txvq->packets += nb_tx;
 
--- a/lib/librte_pmd_virtio/virtqueue.h	2014-08-25 19:00:04.146518448 -0700
+++ b/lib/librte_pmd_virtio/virtqueue.h	2014-08-25 19:00:04.142518425 -0700
@@ -46,9 +46,18 @@
 #include "virtio_ring.h"
 #include "virtio_logs.h"
 
-#define mb()  rte_mb()
-#define wmb() rte_wmb()
-#define rmb() rte_rmb()
+/*
+ * Per virtio_config.h in Linux.
+ *     For virtio_pci on SMP, we don't need to order with respect to MMIO
+ *     accesses through relaxed memory I/O windows, so smp_mb() et al are
+ *     sufficient.
+ *
+ * This driver is for virtio_pci on SMP and therefore can assume
+ * weaker (compiler barriers)
+ */
+#define virtio_mb()	rte_mb()
+#define virtio_rmb()	rte_compiler_barrier()
+#define virtio_wmb()	rte_compiler_barrier()
 
 #ifdef RTE_PMD_PACKET_PREFETCH
 #define rte_packet_prefetch(p)  rte_prefetch1(p)
@@ -226,7 +235,7 @@ virtqueue_full(const struct virtqueue *v
 static inline void
 vq_update_avail_idx(struct virtqueue *vq)
 {
-	rte_compiler_barrier();
+	virtio_rmb();
 	vq->vq_ring.avail->idx = vq->vq_avail_idx;
 }
 
@@ -256,7 +265,7 @@ static inline void
 virtqueue_notify(struct virtqueue *vq)
 {
 	/*
-	 * Ensure updated avail->idx is visible to host. mb() necessary?
+	 * Ensure updated avail->idx is visible to host.
 	 * For virtio on IA, the notificaiton is through io port operation
 	 * which is a serialization instruction itself.
 	 */
--- a/lib/librte_pmd_virtio/virtio_ethdev.c	2014-08-25 19:00:04.146518448 -0700
+++ b/lib/librte_pmd_virtio/virtio_ethdev.c	2014-08-25 19:00:04.142518425 -0700
@@ -171,7 +171,7 @@ virtio_send_command(struct virtqueue *vq
 		uint32_t idx, desc_idx, used_idx;
 		struct vring_used_elem *uep;
 
-		rmb();
+		virtio_rmb();
 
 		used_idx = (uint32_t)(vq->vq_used_cons_idx
 				& (vq->vq_nentries - 1));