From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <dev-bounces@dpdk.org>
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id 729D8A00C4;
	Sat, 29 Jan 2022 04:10:00 +0100 (CET)
Received: from [217.70.189.124] (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id 0600D427A7;
	Sat, 29 Jan 2022 04:09:51 +0100 (CET)
Received: from mga04.intel.com (mga04.intel.com [192.55.52.120])
 by mails.dpdk.org (Postfix) with ESMTP id 2405D4279D
 for <dev@dpdk.org>; Sat, 29 Jan 2022 04:09:48 +0100 (CET)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1643425789; x=1674961789;
 h=from:to:cc:subject:date:message-id:in-reply-to: references;
 bh=mmQdtEyWueR0IpOBtyGW8/NFuPKxBfvpQaweaOKnDns=;
 b=Q13V6agXL3DCv2ni38tZ2CeWvdRE31eMiJnXCGUl195dJDDt5unX/BKY
 Lut+JL0MApqqxTjyNpP5LlnEgrbKJxxpxlVu1Ve/ZKeQMkpBLK7FdvSyX
 ndB6pgXmEsVOibEZf1l2rQFe9rGGoJ1dGlBpJoOTNQhMH9vAzZ9r2mu3Y
 4fE4dw3Dh1fOYUa/zR3ztcU7RCBRilL2qHJwLsRfBl4/0NUvAIwdzjJcF
 MAT7YAAcvr0X3g+1v+cnm35UKSnXex3h3cI9wUCEUMSLbWv8qxujIupLl
 GGf6OcedZqXlEtgto7A54YC/rbdzLUSyq/JicQcYn/opUCw3km3jeQDH/ g==;
X-IronPort-AV: E=McAfee;i="6200,9189,10241"; a="246076619"
X-IronPort-AV: E=Sophos;i="5.88,325,1635231600"; d="scan'208";a="246076619"
Received: from fmsmga008.fm.intel.com ([10.253.24.58])
 by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 28 Jan 2022 19:09:48 -0800
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.88,325,1635231600"; d="scan'208";a="582047799"
Received: from dpdk-dipei.sh.intel.com ([10.67.111.91])
 by fmsmga008.fm.intel.com with ESMTP; 28 Jan 2022 19:09:42 -0800
From: Andy Pei <andy.pei@intel.com>
To: dev@dpdk.org
Cc: chenbo.xia@intel.com, maxime.coquelin@redhat.com, gang.cao@intel.com,
 changpeng.liu@intel.com
Subject: [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration
Date: Sat, 29 Jan 2022 11:03:27 +0800
Message-Id: <1643425417-215270-6-git-send-email-andy.pei@intel.com>
X-Mailer: git-send-email 1.8.3.1
In-Reply-To: <1643425417-215270-1-git-send-email-andy.pei@intel.com>
References: <1643093258-47258-2-git-send-email-andy.pei@intel.com>
 <1643425417-215270-1-git-send-email-andy.pei@intel.com>
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

Enable virtio blk sw live migration relay callfd and log the dirty page.
In this version we ignore the write cmd and still mark it dirty.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 116 insertions(+), 22 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 721cb1d..3a69e53 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -189,7 +189,7 @@
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -238,7 +238,7 @@
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 4f99bb3..a930825 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -332,10 +332,67 @@ struct rte_vdpa_dev_info {
 
 	rte_vhost_get_negotiated_features(vid, &features);
 	if (RTE_VHOST_NEED_LOG(features)) {
-		ifcvf_disable_logging(hw);
-		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
-		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
-				log_base, IFCVF_LOG_BASE, log_size);
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
+		/* IFCVF marks dirty memory pages for only packet buffer,
+		 * SW helps to mark the used ring as dirty after device stops.
+		 */
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
 		/*
 		 * IFCVF marks dirty memory pages for only packet buffer,
 		 * SW helps to mark the used ring as dirty after device stops.
@@ -661,15 +718,17 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
+		/* NETWORK: Direct I/O for Tx queue, relay for Rx queue
+		 * BLK: relay every queue
+		 */
+		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
 			if (gpa == 0) {
 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
 				return -1;
 			}
 			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -688,7 +747,10 @@ struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -713,8 +775,10 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
+		     internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -726,6 +790,8 @@ struct rte_vdpa_dev_info {
 			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
 			m_vring_iova, size);
 
+		hw->vring[i].last_avail_idx = vq.used->idx;
+		hw->vring[i].last_used_idx = vq.used->idx;
 		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
 				hw->vring[i].last_used_idx);
 		rte_free(internal->m_vring[i].desc);
@@ -776,17 +842,36 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -874,7 +959,10 @@ struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1