From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <couyang@shecgisg004.sh.intel.com>
Received: from mga03.intel.com (mga03.intel.com [134.134.136.65])
 by dpdk.org (Postfix) with ESMTP id 9D38320F
 for <dev@dpdk.org>; Wed, 12 Aug 2015 10:03:25 +0200 (CEST)
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
 by orsmga103.jf.intel.com with ESMTP; 12 Aug 2015 01:03:25 -0700
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.15,659,1432623600"; d="scan'208";a="767051330"
Received: from shvmail01.sh.intel.com ([10.239.29.42])
 by fmsmga001.fm.intel.com with ESMTP; 12 Aug 2015 01:03:24 -0700
Received: from shecgisg004.sh.intel.com (shecgisg004.sh.intel.com
 [10.239.29.89])
 by shvmail01.sh.intel.com with ESMTP id t7C83M4b027101;
 Wed, 12 Aug 2015 16:03:22 +0800
Received: from shecgisg004.sh.intel.com (localhost [127.0.0.1])
 by shecgisg004.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP id
 t7C83JZb003703; Wed, 12 Aug 2015 16:03:21 +0800
Received: (from couyang@localhost)
 by shecgisg004.sh.intel.com (8.13.6/8.13.6/Submit) id t7C83JTI003699;
 Wed, 12 Aug 2015 16:03:19 +0800
From: Ouyang Changchun <changchun.ouyang@intel.com>
To: dev@dpdk.org
Date: Wed, 12 Aug 2015 16:02:46 +0800
Message-Id: <1439366567-3402-12-git-send-email-changchun.ouyang@intel.com>
X-Mailer: git-send-email 1.7.12.2
In-Reply-To: <1439366567-3402-1-git-send-email-changchun.ouyang@intel.com>
References: <1434355006-30583-1-git-send-email-changchun.ouyang@intel.com>
 <1439366567-3402-1-git-send-email-changchun.ouyang@intel.com>
Subject: [dpdk-dev] [PATCH v4 11/12] vhost: alloc core to virtq
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: patches and discussions about DPDK <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Wed, 12 Aug 2015 08:03:26 -0000

This patch allocates the core on the granularity of virtq instead of virtio device.
This allows vhost having the capability of polling different virtq with different core,
which shows better performance on vhost/virtio ports with more cores.

Add 2 API: rte_vhost_core_id_get and rte_vhost_core_id_set.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
It is added since v4.

 examples/vhost/Makefile           |   4 +-
 examples/vhost/main.c             | 243 ++++++++++++++++++++------------------
 examples/vhost/main.h             |   3 +-
 lib/librte_vhost/rte_virtio_net.h |  25 ++++
 lib/librte_vhost/virtio-net.c     |  22 ++++
 5 files changed, 178 insertions(+), 119 deletions(-)

diff --git a/examples/vhost/Makefile b/examples/vhost/Makefile
index c269466..32a3dec 100644
--- a/examples/vhost/Makefile
+++ b/examples/vhost/Makefile
@@ -50,8 +50,8 @@ APP = vhost-switch
 # all source are stored in SRCS-y
 SRCS-y := main.c
 
-CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
-CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -O0 -g -D_FILE_OFFSET_BITS=64
+CFLAGS += $(WERROR_FLAGS) -Wno-maybe-uninitialized
 
 include $(RTE_SDK)/mk/rte.extapp.mk
 
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 54f9648..0a36c61 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -1386,60 +1386,58 @@ switch_worker(__attribute__((unused)) void *arg)
 			}
 			if (likely(vdev->ready == DEVICE_RX)) {
 				/*Handle guest RX*/
-				for (i = 0; i < rxq; i++) {
-					rx_count = rte_eth_rx_burst(ports[0],
-						vdev->vmdq_rx_q + i, pkts_burst, MAX_PKT_BURST);
+				uint16_t q_idx = dev_ll->work_q_idx;
+				rx_count = rte_eth_rx_burst(ports[0],
+					vdev->vmdq_rx_q + q_idx, pkts_burst, MAX_PKT_BURST);
 
-					if (rx_count) {
-						/*
-						* Retry is enabled and the queue is full then we wait and retry to avoid packet loss
-						* Here MAX_PKT_BURST must be less than virtio queue size
-						*/
-						if (enable_retry && unlikely(rx_count > rte_vring_available_entries(dev,
-											VIRTIO_RXQ + i * VIRTIO_QNUM))) {
-							for (retry = 0; retry < burst_rx_retry_num; retry++) {
-								rte_delay_us(burst_rx_delay_time);
-								if (rx_count <= rte_vring_available_entries(dev,
-											VIRTIO_RXQ + i * VIRTIO_QNUM))
-									break;
-							}
-						}
-						ret_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ + i * VIRTIO_QNUM,
-											pkts_burst, rx_count);
-						if (enable_stats) {
-							rte_atomic64_add(
-							&dev_statistics[dev_ll->vdev->dev->device_fh].qp_stats[i].rx_total_atomic,
-							rx_count);
-							rte_atomic64_add(
-							&dev_statistics[dev_ll->vdev->dev->device_fh].qp_stats[i].rx_atomic, ret_count);
-						}
-						while (likely(rx_count)) {
-							rx_count--;
-							rte_pktmbuf_free(pkts_burst[rx_count]);
+				if (rx_count) {
+					/*
+					* Retry is enabled and the queue is full then we wait and retry to avoid packet loss
+					* Here MAX_PKT_BURST must be less than virtio queue size
+					*/
+					if (enable_retry && unlikely(rx_count > rte_vring_available_entries(dev,
+										VIRTIO_RXQ + q_idx * VIRTIO_QNUM))) {
+						for (retry = 0; retry < burst_rx_retry_num; retry++) {
+							rte_delay_us(burst_rx_delay_time);
+							if (rx_count <= rte_vring_available_entries(dev,
+										VIRTIO_RXQ + q_idx * VIRTIO_QNUM))
+								break;
 						}
 					}
+					ret_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ + q_idx * VIRTIO_QNUM,
+										pkts_burst, rx_count);
+					if (enable_stats) {
+						rte_atomic64_add(
+						&dev_statistics[dev_ll->vdev->dev->device_fh].qp_stats[q_idx].rx_total_atomic,
+						rx_count);
+						rte_atomic64_add(
+						&dev_statistics[dev_ll->vdev->dev->device_fh].qp_stats[q_idx].rx_atomic, ret_count);
+					}
+					while (likely(rx_count)) {
+						rx_count--;
+						rte_pktmbuf_free(pkts_burst[rx_count]);
+					}
 				}
 			}
 
 			if (likely(!vdev->remove)) {
 				/* Handle guest TX*/
-				for (i = 0; i < rxq; i++) {
-					tx_count = rte_vhost_dequeue_burst(dev, VIRTIO_TXQ + i * VIRTIO_QNUM,
-							mbuf_pool, pkts_burst, MAX_PKT_BURST);
-					/*
-					 * If this is the first received packet we need to learn
-					 * the MAC and setup VMDQ
-					 */
-					if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) {
-						if (vdev->remove || (link_vmdq(vdev, pkts_burst[0]) == -1)) {
-							while (tx_count)
-								rte_pktmbuf_free(pkts_burst[--tx_count]);
-						}
+				uint16_t q_idx = dev_ll->work_q_idx;
+				tx_count = rte_vhost_dequeue_burst(dev, VIRTIO_TXQ + q_idx * VIRTIO_QNUM,
+						mbuf_pool, pkts_burst, MAX_PKT_BURST);
+				/*
+				 * If this is the first received packet we need to learn
+				 * the MAC and setup VMDQ
+				 */
+				if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) {
+					if (vdev->remove || (link_vmdq(vdev, pkts_burst[0]) == -1)) {
+						while (tx_count)
+							rte_pktmbuf_free(pkts_burst[--tx_count]);
 					}
-					while (tx_count)
-						virtio_tx_route(vdev, pkts_burst[--tx_count],
-								(uint16_t)dev->device_fh, i);
 				}
+				while (tx_count)
+					virtio_tx_route(vdev, pkts_burst[--tx_count],
+						(uint16_t)dev->device_fh, q_idx);
 			}
 
 			/*move to the next device in the list*/
@@ -2427,6 +2425,7 @@ destroy_device (volatile struct virtio_net *dev)
 	struct virtio_net_data_ll *ll_main_dev_last = NULL;
 	struct vhost_dev *vdev;
 	int lcore;
+	uint32_t i;
 
 	dev->flags &= ~VIRTIO_DEV_RUNNING;
 
@@ -2438,61 +2437,73 @@ destroy_device (volatile struct virtio_net *dev)
 	}
 
 	/* Search for entry to be removed from lcore ll */
-	ll_lcore_dev_cur = lcore_info[vdev->coreid].lcore_ll->ll_root_used;
-	while (ll_lcore_dev_cur != NULL) {
-		if (ll_lcore_dev_cur->vdev == vdev) {
-			break;
-		} else {
-			ll_lcore_dev_last = ll_lcore_dev_cur;
-			ll_lcore_dev_cur = ll_lcore_dev_cur->next;
+	for (i = 0; i < rxq; i++) {
+		uint16_t core_id = rte_vhost_core_id_get(dev, i);
+
+		ll_lcore_dev_cur = lcore_info[core_id].lcore_ll->ll_root_used;
+
+		while (ll_lcore_dev_cur != NULL) {
+			if (ll_lcore_dev_cur->vdev == vdev) {
+				break;
+			} else {
+				ll_lcore_dev_last = ll_lcore_dev_cur;
+				ll_lcore_dev_cur = ll_lcore_dev_cur->next;
+			}
 		}
-	}
 
-	if (ll_lcore_dev_cur == NULL) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to find the dev to be destroy.\n",
-			dev->device_fh);
-		return;
-	}
+		if (ll_lcore_dev_cur == NULL) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"(%"PRIu64") Failed to find the dev to be destroy.\n",
+				dev->device_fh);
+			if (i == 0)
+				return;
+			else
+				break;
+		}
 
-	/* Search for entry to be removed from main ll */
-	ll_main_dev_cur = ll_root_used;
-	ll_main_dev_last = NULL;
-	while (ll_main_dev_cur != NULL) {
-		if (ll_main_dev_cur->vdev == vdev) {
-			break;
-		} else {
-			ll_main_dev_last = ll_main_dev_cur;
-			ll_main_dev_cur = ll_main_dev_cur->next;
+		/* Search for entry to be removed from main ll */
+		if (i == 0) {
+			ll_main_dev_cur = ll_root_used;
+			ll_main_dev_last = NULL;
+			while (ll_main_dev_cur != NULL) {
+				if (ll_main_dev_cur->vdev == vdev) {
+					break;
+				} else {
+					ll_main_dev_last = ll_main_dev_cur;
+					ll_main_dev_cur = ll_main_dev_cur->next;
+				}
+			}
 		}
-	}
 
-	/* Remove entries from the lcore and main ll. */
-	rm_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used, ll_lcore_dev_cur, ll_lcore_dev_last);
-	rm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last);
+		/* Remove entries from the lcore and main ll. */
+		rm_data_ll_entry(&lcore_info[core_id].lcore_ll->ll_root_used, ll_lcore_dev_cur, ll_lcore_dev_last);
+		if (i == 0)
+			rm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last);
 
-	/* Set the dev_removal_flag on each lcore. */
-	RTE_LCORE_FOREACH_SLAVE(lcore) {
-		lcore_info[lcore].lcore_ll->dev_removal_flag = REQUEST_DEV_REMOVAL;
-	}
+		/* Set the dev_removal_flag on each lcore. */
+		RTE_LCORE_FOREACH_SLAVE(lcore) {
+			lcore_info[lcore].lcore_ll->dev_removal_flag = REQUEST_DEV_REMOVAL;
+		}
 
-	/*
-	 * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL we can be sure that
-	 * they can no longer access the device removed from the linked lists and that the devices
-	 * are no longer in use.
-	 */
-	RTE_LCORE_FOREACH_SLAVE(lcore) {
-		while (lcore_info[lcore].lcore_ll->dev_removal_flag != ACK_DEV_REMOVAL) {
-			rte_pause();
+		/*
+		 * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL we can be sure that
+		 * they can no longer access the device removed from the linked lists and that the devices
+		 * are no longer in use.
+		 */
+		RTE_LCORE_FOREACH_SLAVE(lcore) {
+			while (lcore_info[lcore].lcore_ll->dev_removal_flag != ACK_DEV_REMOVAL)
+				rte_pause();
 		}
-	}
 
-	/* Add the entries back to the lcore and main free ll.*/
-	put_data_ll_free_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_free, ll_lcore_dev_cur);
-	put_data_ll_free_entry(&ll_root_free, ll_main_dev_cur);
+		/* Add the entries back to the lcore and main free ll.*/
+		put_data_ll_free_entry(&lcore_info[core_id].lcore_ll->ll_root_free, ll_lcore_dev_cur);
 
-	/* Decrement number of device on the lcore. */
-	lcore_info[vdev->coreid].lcore_ll->device_num--;
+		if (i == 0)
+			put_data_ll_free_entry(&ll_root_free, ll_main_dev_cur);
+
+		/* Decrement number of device on the lcore. */
+		lcore_info[core_id].lcore_ll->device_num--;
+	}
 
 	RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been removed from data core\n", dev->device_fh);
 
@@ -2846,42 +2857,44 @@ new_device (struct virtio_net *dev)
 	vdev->remove = 0;
 
 	/* Find a suitable lcore to add the device. */
-	RTE_LCORE_FOREACH_SLAVE(lcore) {
-		if (lcore_info[lcore].lcore_ll->device_num < device_num_min) {
-			device_num_min = lcore_info[lcore].lcore_ll->device_num;
-			core_add = lcore;
+	for (i = 0; i < rxq; i++) {
+		device_num_min = num_devices;
+		RTE_LCORE_FOREACH_SLAVE(lcore) {
+			if (lcore_info[lcore].lcore_ll->device_num < device_num_min) {
+				device_num_min = lcore_info[lcore].lcore_ll->device_num;
+				core_add = lcore;
+			}
 		}
-	}
-	/* Add device to lcore ll */
-	ll_dev = get_data_ll_free_entry(&lcore_info[core_add].lcore_ll->ll_root_free);
-	if (ll_dev == NULL) {
-		RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Failed to add device to data core\n", dev->device_fh);
-		vdev->ready = DEVICE_SAFE_REMOVE;
-		destroy_device(dev);
-		rte_free(vdev->regions_hpa);
-		rte_free(vdev);
-		return -1;
-	}
-	ll_dev->vdev = vdev;
-	vdev->coreid = core_add;
+		/* Add device to lcore ll */
+		ll_dev = get_data_ll_free_entry(&lcore_info[core_add].lcore_ll->ll_root_free);
+		if (ll_dev == NULL) {
+			RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Failed to add device to data core\n", dev->device_fh);
+			vdev->ready = DEVICE_SAFE_REMOVE;
+			destroy_device(dev);
+			rte_free(vdev->regions_hpa);
+			rte_free(vdev);
+			return -1;
+		}
+		ll_dev->vdev = vdev;
+		ll_dev->work_q_idx = i;
+		rte_vhost_core_id_set(dev, i, core_add);
+		add_data_ll_entry(&lcore_info[core_add].lcore_ll->ll_root_used, ll_dev);
 
-	add_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used, ll_dev);
+		/* Disable notifications. */
+		rte_vhost_enable_guest_notification(dev, i * VIRTIO_QNUM + VIRTIO_RXQ, 0);
+		rte_vhost_enable_guest_notification(dev, i * VIRTIO_QNUM + VIRTIO_TXQ, 0);
+		lcore_info[core_add].lcore_ll->device_num++;
+		RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d for vq: %d\n",
+			dev->device_fh, core_add, i);
+	}
 
 	/* Initialize device stats */
 	if (enable_stats)
 		memset(dev_statistics[dev->device_fh].qp_stats, 0,
 			VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX * sizeof(struct qp_statistics));
 
-	/* Disable notifications. */
-	for (i = 0; i < rxq; i++) {
-		rte_vhost_enable_guest_notification(dev, i * VIRTIO_QNUM + VIRTIO_RXQ, 0);
-		rte_vhost_enable_guest_notification(dev, i * VIRTIO_QNUM + VIRTIO_TXQ, 0);
-	}
-
-	lcore_info[vdev->coreid].lcore_ll->device_num++;
 	dev->flags |= VIRTIO_DEV_RUNNING;
 
-	RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d\n", dev->device_fh, vdev->coreid);
 
 	return 0;
 }
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index d04e2be..42336bc 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -82,8 +82,6 @@ struct vhost_dev {
 	uint16_t vmdq_rx_q;
 	/**< Vlan tag assigned to the pool */
 	uint32_t vlan_tag;
-	/**< Data core that the device is added to. */
-	uint16_t coreid;
 	/**< A device is set as ready if the MAC address has been set. */
 	volatile uint8_t ready;
 	/**< Device is marked for removal from the data core. */
@@ -94,6 +92,7 @@ struct virtio_net_data_ll
 {
 	struct vhost_dev		*vdev;	/* Pointer to device created by configuration core. */
 	struct virtio_net_data_ll	*next;  /* Pointer to next device in linked list. */
+	uint32_t work_q_idx;
 };
 
 /*
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index e16ad3a..93d3e27 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -89,6 +89,7 @@ struct vhost_virtqueue {
 	eventfd_t		callfd;			/**< Used to notify the guest (trigger interrupt). */
 	eventfd_t		kickfd;			/**< Currently unused as polling mode is enabled. */
 	uint32_t		enabled;		/**< Indicate the queue is enabled or not. */
+	uint16_t		core_id;		/**< Data core that the vq is added to. */
 	struct buf_vector	buf_vec[BUF_VECTOR_MAX];	/**< for scatter RX. */
 } __rte_cache_aligned;
 
@@ -241,8 +242,32 @@ uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 
 /**
  * This function get the queue pair number of one vhost device.
+ * @param dev
+ *  virtio-net device
  * @return
  *  num of queue pair of specified virtio device.
  */
 uint16_t rte_vhost_qp_num_get(struct virtio_net *dev);
+
+/**
+ * This function get the data core id for queue pair in one vhost device.
+ * @param dev
+ *  virtio-net device
+ * @param queue_id
+ *  virtio queue index in mq case
+ * @return
+ *  core id of queue pair of specified virtio device.
+ */
+uint16_t rte_vhost_core_id_get(volatile struct virtio_net *dev, uint16_t queue_id);
+
+/**
+ * This function set the data core id for queue pair in one vhost device.
+ * @param dev
+ *  virtio-net device
+ * @param queue_id
+ *  virtio queue index in mq case
+ * @param core_id
+ *  data core id for virtio queue pair in mq case
+ */
+void rte_vhost_core_id_set(struct virtio_net *dev, uint16_t queue_id, uint16_t core_id);
 #endif /* _VIRTIO_NET_H_ */
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 24d0c53..d4c55c6 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -965,6 +965,28 @@ uint16_t rte_vhost_qp_num_get(struct virtio_net *dev)
 	return dev->virt_qp_nb;
 }
 
+uint16_t rte_vhost_core_id_get(volatile struct virtio_net *dev, uint16_t queue_id)
+{
+	if (dev == NULL)
+		return 0;
+
+	if (dev->virtqueue == NULL || dev->virtqueue[queue_id] == NULL)
+		return 0;
+
+	return dev->virtqueue[queue_id]->core_id;
+}
+
+void rte_vhost_core_id_set(struct virtio_net *dev, uint16_t queue_id, uint16_t core_id)
+{
+	if (dev == NULL)
+		return;
+
+	if (dev->virtqueue == NULL || dev->virtqueue[queue_id] == NULL)
+		return;
+
+	dev->virtqueue[queue_id]->core_id = core_id;
+}
+
 /*
  * Register ops so that we can add/remove device to data core.
  */
-- 
1.8.4.2