From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id D79CEA034C; Sun, 27 Mar 2022 17:36:23 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 8438242881; Sun, 27 Mar 2022 17:35:58 +0200 (CEST) Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by mails.dpdk.org (Postfix) with ESMTP id 9F55042881 for ; Sun, 27 Mar 2022 17:35:56 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1648395356; x=1679931356; h=from:to:cc:subject:date:message-id:in-reply-to: references; bh=LVT+qXlVR4XjvNNS/0W0jV1fzKJ1YW3hp0aXF04YCAE=; b=iO0I5q/CJLq7EQ50VKz0zovUclNZn574cgBE8Okf5W7shQ5c13l4XpS2 9QXnZYZzPWUsCfHUgE+p21hm+aBh54HiZCYiDwN3K/GbH9RW+kAMQJbJk G2kmt6yfEtkxDQTLGMjXURuz0ZaNpn2SOIk9UiklJtBDQf/fo7xliu+td QFHgxbCnXsaFa5/SLbCWgYyzicU/6I+OqBsLJ5tOa3ax7OHqGZhMi9UIa mIkdzV5T//ZT1pORaGK0X4MRLtDsQMwHvLNq0OLst324yYaqI1BSYmz05 cSpkYyI0zRxQXecFJ4NdTTESVkveO+uvrI9IxCgPjy4Stof+Z6LLlIDFT Q==; X-IronPort-AV: E=McAfee;i="6200,9189,10299"; a="259045645" X-IronPort-AV: E=Sophos;i="5.90,215,1643702400"; d="scan'208";a="259045645" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by fmsmga103.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 27 Mar 2022 08:35:56 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.90,215,1643702400"; d="scan'208";a="562421534" Received: from dpdk-dipei.sh.intel.com ([10.67.110.238]) by orsmga008.jf.intel.com with ESMTP; 27 Mar 2022 08:35:54 -0700 From: Andy Pei To: dev@dpdk.org Cc: chenbo.xia@intel.com, maxime.coquelin@redhat.com, gang.cao@intel.com, changpeng.liu@intel.com Subject: [PATCH v4 06/16] vdpa/ifc: add block device SW live-migration Date: Sun, 27 Mar 2022 22:51:29 +0800 Message-Id: <1648392699-60113-7-git-send-email-andy.pei@intel.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1648392699-60113-1-git-send-email-andy.pei@intel.com> References: <1643093258-47258-2-git-send-email-andy.pei@intel.com> <1648392699-60113-1-git-send-email-andy.pei@intel.com> X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Add SW live-migration support to block device. Add dirty page logging to block device. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 4 +- drivers/vdpa/ifc/base/ifcvf.h | 6 ++ drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++------- 3 files changed, 115 insertions(+), 23 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index d10c1fd..e417c50 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -191,7 +191,7 @@ IFCVF_WRITE_REG32(val >> 32, hi); } -STATIC int +int ifcvf_hw_enable(struct ifcvf_hw *hw) { struct ifcvf_pci_common_cfg *cfg; @@ -240,7 +240,7 @@ return 0; } -STATIC void +void ifcvf_hw_disable(struct ifcvf_hw *hw) { u32 i; diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..6dd7925 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -179,4 +179,10 @@ struct ifcvf_hw { u64 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid); +int +ifcvf_hw_enable(struct ifcvf_hw *hw); + +void +ifcvf_hw_disable(struct ifcvf_hw *hw); + #endif /* _IFCVF_H_ */ diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 8d104b7..a23dc2d 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -345,6 +345,56 @@ struct rte_vdpa_dev_info { } } +static void +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) +{ + struct ifcvf_hw *hw = &internal->hw; + struct rte_vhost_vring vq; + int i, vid; + uint64_t features = 0; + uint64_t log_base = 0, log_size = 0; + uint64_t len; + + vid = internal->vid; + + if (internal->device_type == IFCVF_BLK) { + for (i = 0; i < hw->nr_vring; i++) { + rte_vhost_get_vhost_vring(internal->vid, i, &vq); + while (vq.avail->idx != vq.used->idx) { + ifcvf_notify_queue(hw, i); + usleep(10); + } + hw->vring[i].last_avail_idx = vq.avail->idx; + hw->vring[i].last_used_idx = vq.used->idx; + } + } + + ifcvf_hw_disable(hw); + + for (i = 0; i < hw->nr_vring; i++) + rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx, + hw->vring[i].last_used_idx); + + if (internal->sw_lm) + return; + + rte_vhost_get_negotiated_features(vid, &features); + if (RTE_VHOST_NEED_LOG(features)) { + ifcvf_disable_logging(hw); + rte_vhost_get_log_base(internal->vid, &log_base, &log_size); + rte_vfio_container_dma_unmap(internal->vfio_container_fd, + log_base, IFCVF_LOG_BASE, log_size); + /* + * IFCVF marks dirty memory pages for only packet buffer, + * SW helps to mark the used ring as dirty after device stops. + */ + for (i = 0; i < hw->nr_vring; i++) { + len = IFCVF_USED_RING_LEN(hw->vring[i].size); + rte_vhost_log_used_vring(vid, i, 0, len); + } + } +} + #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1)) static int @@ -659,15 +709,22 @@ struct rte_vdpa_dev_info { } hw->vring[i].avail = gpa; - /* Direct I/O for Tx queue, relay for Rx queue */ - if (i & 1) { - gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used); - if (gpa == 0) { - DRV_LOG(ERR, "Fail to get GPA for used ring."); - return -1; + if (internal->device_type == IFCVF_NET) { + /* Direct I/O for Tx queue, relay for Rx queue */ + if (i & 1) { + gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used); + if (gpa == 0) { + DRV_LOG(ERR, "Fail to get GPA for used ring."); + return -1; + } + hw->vring[i].used = gpa; + } else { + hw->vring[i].used = m_vring_iova + + (char *)internal->m_vring[i].used - + (char *)internal->m_vring[i].desc; } - hw->vring[i].used = gpa; - } else { + } else if (internal->device_type == IFCVF_BLK) { + /* BLK: relay every queue */ hw->vring[i].used = m_vring_iova + (char *)internal->m_vring[i].used - (char *)internal->m_vring[i].desc; @@ -686,7 +743,10 @@ struct rte_vdpa_dev_info { } hw->nr_vring = nr_vring; - return ifcvf_start_hw(&internal->hw); + if (internal->device_type == IFCVF_NET) + return ifcvf_start_hw(&internal->hw); + else if (internal->device_type == IFCVF_BLK) + return ifcvf_hw_enable(&internal->hw); error: for (i = 0; i < nr_vring; i++) @@ -710,8 +770,12 @@ struct rte_vdpa_dev_info { for (i = 0; i < hw->nr_vring; i++) { /* synchronize remaining new used entries if any */ - if ((i & 1) == 0) + if (internal->device_type == IFCVF_NET) { + if ((i & 1) == 0) + update_used_ring(internal, i); + } else if (internal->device_type == IFCVF_BLK) { update_used_ring(internal, i); + } rte_vhost_get_vhost_vring(vid, i, &vq); len = IFCVF_USED_RING_LEN(vq.size); @@ -773,17 +837,36 @@ struct rte_vdpa_dev_info { } } - for (qid = 0; qid < q_num; qid += 2) { - ev.events = EPOLLIN | EPOLLPRI; - /* leave a flag to mark it's for interrupt */ - ev.data.u64 = 1 | qid << 1 | - (uint64_t)internal->intr_fd[qid] << 32; - if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev) - < 0) { - DRV_LOG(ERR, "epoll add error: %s", strerror(errno)); - return NULL; + if (internal->device_type == IFCVF_NET) { + for (qid = 0; qid < q_num; qid += 2) { + ev.events = EPOLLIN | EPOLLPRI; + /* leave a flag to mark it's for interrupt */ + ev.data.u64 = 1 | qid << 1 | + (uint64_t)internal->intr_fd[qid] << 32; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, + internal->intr_fd[qid], &ev) + < 0) { + DRV_LOG(ERR, "epoll add error: %s", + strerror(errno)); + return NULL; + } + update_used_ring(internal, qid); + } + } else if (internal->device_type == IFCVF_BLK) { + for (qid = 0; qid < q_num; qid += 1) { + ev.events = EPOLLIN | EPOLLPRI; + /* leave a flag to mark it's for interrupt */ + ev.data.u64 = 1 | qid << 1 | + (uint64_t)internal->intr_fd[qid] << 32; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, + internal->intr_fd[qid], &ev) + < 0) { + DRV_LOG(ERR, "epoll add error: %s", + strerror(errno)); + return NULL; + } + update_used_ring(internal, qid); } - update_used_ring(internal, qid); } /* start relay with a first kick */ @@ -871,7 +954,10 @@ struct rte_vdpa_dev_info { /* stop the direct IO data path */ unset_notify_relay(internal); - vdpa_ifcvf_stop(internal); + if (internal->device_type == IFCVF_NET) + vdpa_ifcvf_stop(internal); + else if (internal->device_type == IFCVF_BLK) + vdpa_ifcvf_blk_pause(internal); vdpa_disable_vfio_intr(internal); ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false); -- 1.8.3.1