From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <yuanhan.liu@linux.intel.com>
Received: from mga04.intel.com (mga04.intel.com [192.55.52.120])
 by dpdk.org (Postfix) with ESMTP id 108EA591F
 for <dev@dpdk.org>; Mon,  2 May 2016 23:20:33 +0200 (CEST)
Received: from fmsmga004.fm.intel.com ([10.253.24.48])
 by fmsmga104.fm.intel.com with ESMTP; 02 May 2016 14:20:34 -0700
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.24,569,1455004800"; d="scan'208";a="95880119"
Received: from yliu-dev.sh.intel.com ([10.239.67.162])
 by fmsmga004.fm.intel.com with ESMTP; 02 May 2016 14:20:32 -0700
From: Yuanhan Liu <yuanhan.liu@linux.intel.com>
To: dev@dpdk.org
Cc: huawei.xie@intel.com,
	Yuanhan Liu <yuanhan.liu@linux.intel.com>
Date: Mon,  2 May 2016 14:23:49 -0700
Message-Id: <1462224230-19460-8-git-send-email-yuanhan.liu@linux.intel.com>
X-Mailer: git-send-email 1.9.0
In-Reply-To: <1462224230-19460-1-git-send-email-yuanhan.liu@linux.intel.com>
References: <1461645951-14603-1-git-send-email-yuanhan.liu@linux.intel.com>
 <1462224230-19460-1-git-send-email-yuanhan.liu@linux.intel.com>
Subject: [dpdk-dev] [PATCH v2 7/8] examples/vhost: switch_worker cleanup
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: patches and discussions about DPDK <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Mon, 02 May 2016 21:20:35 -0000

switch_worker() is the last piece of code that is messy yet it touches
virtio/vhost device.

Here do a cleanup, so that we will be less painful for later vhost ABI
refactoring.

The cleanup is straigforward: break long lines, move some code into
functions. The last, comment a bit on switch_worker().

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
 examples/vhost/main.c | 253 +++++++++++++++++++++++++++-----------------------
 1 file changed, 136 insertions(+), 117 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index dbb42ee..66d3bf2 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -213,6 +213,8 @@ struct mbuf_table {
 /* TX queue for each data core. */
 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
 
+#define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
+				 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN       4
 
 /* Per-device statistics struct */
@@ -915,16 +917,35 @@ static void virtio_tx_offload(struct rte_mbuf *m)
 	tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
 }
 
+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+	while (n--)
+		rte_pktmbuf_free(pkts[n]);
+}
+
+static inline void __attribute__((always_inline))
+do_drain_mbuf_table(struct mbuf_table *tx_q)
+{
+	uint16_t count;
+
+	count = rte_eth_tx_burst(ports[0], tx_q->txq_id,
+				 tx_q->m_table, tx_q->len);
+	if (unlikely(count < tx_q->len))
+		free_pkts(&tx_q->m_table[count], tx_q->len - count);
+
+	tx_q->len = 0;
+}
+
 /*
- * This function routes the TX packet to the correct interface. This may be a local device
- * or the physical port.
+ * This function routes the TX packet to the correct interface. This
+ * may be a local device or the physical port.
  */
 static inline void __attribute__((always_inline))
 virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 {
 	struct mbuf_table *tx_q;
-	struct rte_mbuf **m_table;
-	unsigned len, ret, offset = 0;
+	unsigned offset = 0;
 	const uint16_t lcore_id = rte_lcore_id();
 	struct virtio_net *dev = vdev->dev;
 	struct ether_hdr *nh;
@@ -960,7 +981,6 @@ queue2nic:
 
 	/*Add packet to the port tx queue*/
 	tx_q = &lcore_tx_queue[lcore_id];
-	len = tx_q->len;
 
 	nh = rte_pktmbuf_mtod(m, struct ether_hdr *);
 	if (unlikely(nh->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))) {
@@ -998,55 +1018,130 @@ queue2nic:
 	if (m->ol_flags & PKT_TX_TCP_SEG)
 		virtio_tx_offload(m);
 
-	tx_q->m_table[len] = m;
-	len++;
+	tx_q->m_table[tx_q->len++] = m;
 	if (enable_stats) {
 		dev_statistics[dev->device_fh].tx_total++;
 		dev_statistics[dev->device_fh].tx++;
 	}
 
-	if (unlikely(len == MAX_PKT_BURST)) {
-		m_table = (struct rte_mbuf **)tx_q->m_table;
-		ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, m_table, (uint16_t) len);
-		/* Free any buffers not handled by TX and update the port stats. */
-		if (unlikely(ret < len)) {
-			do {
-				rte_pktmbuf_free(m_table[ret]);
-			} while (++ret < len);
+	if (unlikely(tx_q->len == MAX_PKT_BURST))
+		do_drain_mbuf_table(tx_q);
+}
+
+
+static inline void __attribute__((always_inline))
+drain_mbuf_table(struct mbuf_table *tx_q)
+{
+	static uint64_t prev_tsc;
+	uint64_t cur_tsc;
+
+	if (tx_q->len == 0)
+		return;
+
+	cur_tsc = rte_rdtsc();
+	if (unlikely(cur_tsc - prev_tsc > MBUF_TABLE_DRAIN_TSC)) {
+		prev_tsc = cur_tsc;
+
+		RTE_LOG(DEBUG, VHOST_DATA,
+			"TX queue drained after timeout with burst size %u\n",
+			tx_q->len);
+		do_drain_mbuf_table(tx_q);
+	}
+}
+
+static inline void __attribute__((always_inline))
+drain_eth_rx(struct vhost_dev *vdev)
+{
+	uint16_t rx_count, enqueue_count;
+	struct virtio_net *dev = vdev->dev;
+	struct rte_mbuf *pkts[MAX_PKT_BURST];
+
+	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
+				    pkts, MAX_PKT_BURST);
+	if (!rx_count)
+		return;
+
+	/*
+	 * When "enable_retry" is set, here we wait and retry when there
+	 * is no enough free slots in the queue to hold @rx_count packets,
+	 * to diminish packet loss.
+	 */
+	if (enable_retry &&
+	    unlikely(rx_count > rte_vring_available_entries(dev,
+			VIRTIO_RXQ))) {
+		uint32_t retry;
+
+		for (retry = 0; retry < burst_rx_retry_num; retry++) {
+			rte_delay_us(burst_rx_delay_time);
+			if (rx_count <= rte_vring_available_entries(dev,
+					VIRTIO_RXQ))
+				break;
 		}
+	}
+
+	enqueue_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ,
+						pkts, rx_count);
+	if (enable_stats) {
+		uint64_t fh = dev->device_fh;
+
+		rte_atomic64_add(&dev_statistics[fh].rx_total_atomic, rx_count);
+		rte_atomic64_add(&dev_statistics[fh].rx_atomic, enqueue_count);
+	}
 
-		len = 0;
+	free_pkts(pkts, rx_count);
+}
+
+static inline void __attribute__((always_inline))
+drain_virtio_tx(struct vhost_dev *vdev)
+{
+	struct rte_mbuf *pkts[MAX_PKT_BURST];
+	uint16_t count;
+	uint16_t i;
+
+	count = rte_vhost_dequeue_burst(vdev->dev, VIRTIO_TXQ, mbuf_pool,
+					pkts, MAX_PKT_BURST);
+
+	/* setup VMDq for the first packet */
+	if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
+		if (vdev->remove || link_vmdq(vdev, pkts[0]) == -1)
+			free_pkts(pkts, count);
 	}
 
-	tx_q->len = len;
-	return;
+	for (i = 0; i < count; ++i) {
+		virtio_tx_route(vdev, pkts[i],
+			vlan_tags[(uint16_t)vdev->dev->device_fh]);
+	}
 }
+
 /*
- * This function is called by each data core. It handles all RX/TX registered with the
- * core. For TX the specific lcore linked list is used. For RX, MAC addresses are compared
- * with all devices in the main linked list.
+ * Main function of vhost-switch. It basically does:
+ *
+ * for each vhost device {
+ *    - drain_eth_rx()
+ *
+ *      Which drains the host eth Rx queue linked to the vhost device,
+ *      and deliver all of them to guest virito Rx ring associated with
+ *      this vhost device.
+ *
+ *    - drain_virtio_tx()
+ *
+ *      Which drains the guest virtio Tx queue and deliver all of them
+ *      to the target, which could be another vhost device, or the
+ *      physical eth dev. The route is done in function "virtio_tx_route".
+ * }
  */
 static int
-switch_worker(__attribute__((unused)) void *arg)
+switch_worker(void *arg __rte_unused)
 {
-	struct virtio_net *dev = NULL;
-	struct vhost_dev *vdev = NULL;
-	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+	unsigned i;
+	unsigned lcore_id = rte_lcore_id();
+	struct vhost_dev *vdev;
 	struct mbuf_table *tx_q;
-	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
-	uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;
-	unsigned ret, i;
-	const uint16_t lcore_id = rte_lcore_id();
-	const uint16_t num_cores = (uint16_t)rte_lcore_count();
-	uint16_t rx_count = 0;
-	uint16_t tx_count;
-	uint32_t retry = 0;
 
 	RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id);
-	prev_tsc = 0;
 
 	tx_q = &lcore_tx_queue[lcore_id];
-	for (i = 0; i < num_cores; i ++) {
+	for (i = 0; i < rte_lcore_count(); i++) {
 		if (lcore_ids[i] == lcore_id) {
 			tx_q->txq_id = i;
 			break;
@@ -1054,34 +1149,7 @@ switch_worker(__attribute__((unused)) void *arg)
 	}
 
 	while(1) {
-		cur_tsc = rte_rdtsc();
-		/*
-		 * TX burst queue drain
-		 */
-		diff_tsc = cur_tsc - prev_tsc;
-		if (unlikely(diff_tsc > drain_tsc)) {
-
-			if (tx_q->len) {
-				RTE_LOG(DEBUG, VHOST_DATA,
-					"TX queue drained after timeout with burst size %u\n",
-					tx_q->len);
-
-				/*Tx any packets in the queue*/
-				ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id,
-									   (struct rte_mbuf **)tx_q->m_table,
-									   (uint16_t)tx_q->len);
-				if (unlikely(ret < tx_q->len)) {
-					do {
-						rte_pktmbuf_free(tx_q->m_table[ret]);
-					} while (++ret < tx_q->len);
-				}
-
-				tx_q->len = 0;
-			}
-
-			prev_tsc = cur_tsc;
-
-		}
+		drain_mbuf_table(tx_q);
 
 		/*
 		 * Inform the configuration core that we have exited the
@@ -1091,69 +1159,20 @@ switch_worker(__attribute__((unused)) void *arg)
 			lcore_info[lcore_id].dev_removal_flag = ACK_DEV_REMOVAL;
 
 		/*
-		 * Process devices
+		 * Process vhost devices
 		 */
 		TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list, next) {
-			uint64_t fh;
-
-			dev = vdev->dev;
-			fh  = dev->device_fh;
-
 			if (unlikely(vdev->remove)) {
 				unlink_vmdq(vdev);
 				vdev->ready = DEVICE_SAFE_REMOVE;
 				continue;
 			}
 
-			if (likely(vdev->ready == DEVICE_RX)) {
-				/*Handle guest RX*/
-				rx_count = rte_eth_rx_burst(ports[0],
-					vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
-
-				if (rx_count) {
-					/*
-					* Retry is enabled and the queue is full then we wait and retry to avoid packet loss
-					* Here MAX_PKT_BURST must be less than virtio queue size
-					*/
-					if (enable_retry && unlikely(rx_count > rte_vring_available_entries(dev, VIRTIO_RXQ))) {
-						for (retry = 0; retry < burst_rx_retry_num; retry++) {
-							rte_delay_us(burst_rx_delay_time);
-							if (rx_count <= rte_vring_available_entries(dev, VIRTIO_RXQ))
-								break;
-						}
-					}
-					ret_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ, pkts_burst, rx_count);
-					if (enable_stats) {
-						rte_atomic64_add(
-							&dev_statistics[fh].rx_total_atomic,
-							rx_count);
-						rte_atomic64_add(
-							&dev_statistics[fh].rx_atomic,
-							ret_count);
-					}
-					while (likely(rx_count)) {
-						rx_count--;
-						rte_pktmbuf_free(pkts_burst[rx_count]);
-					}
-
-				}
-			}
+			if (likely(vdev->ready == DEVICE_RX))
+				drain_eth_rx(vdev);
 
-			if (likely(!vdev->remove)) {
-				/* Handle guest TX*/
-				tx_count = rte_vhost_dequeue_burst(dev, VIRTIO_TXQ, mbuf_pool, pkts_burst, MAX_PKT_BURST);
-				/* If this is the first received packet we need to learn the MAC and setup VMDQ */
-				if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) {
-					if (vdev->remove || (link_vmdq(vdev, pkts_burst[0]) == -1)) {
-						while (tx_count)
-							rte_pktmbuf_free(pkts_burst[--tx_count]);
-					}
-				}
-				for (i = 0; i < tx_count; ++i) {
-					virtio_tx_route(vdev, pkts_burst[i],
-						vlan_tags[(uint16_t)dev->device_fh]);
-				}
-			}
+			if (likely(!vdev->remove))
+				drain_virtio_tx(vdev);
 		}
 	}
 
-- 
1.9.3