From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <aburakov@ecsmtp.ir.intel.com>
Received: from mga01.intel.com (mga01.intel.com [192.55.52.88])
 by dpdk.org (Postfix) with ESMTP id 4D3161BBB0
 for <dev@dpdk.org>; Wed, 11 Apr 2018 14:31:09 +0200 (CEST)
X-Amp-Result: SKIPPED(no attachment in message)
X-Amp-File-Uploaded: False
Received: from orsmga001.jf.intel.com ([10.7.209.18])
 by fmsmga101.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;
 11 Apr 2018 05:31:07 -0700
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.48,436,1517904000"; d="scan'208";a="46992940"
Received: from irvmail001.ir.intel.com ([163.33.26.43])
 by orsmga001.jf.intel.com with ESMTP; 11 Apr 2018 05:31:03 -0700
Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com
 [10.237.217.45])
 by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id
 w3BCV2Aa012595; Wed, 11 Apr 2018 13:31:02 +0100
Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1])
 by sivswdev01.ir.intel.com with ESMTP id w3BCV2w6014035;
 Wed, 11 Apr 2018 13:31:02 +0100
Received: (from aburakov@localhost)
 by sivswdev01.ir.intel.com with LOCAL id w3BCV2OU014031;
 Wed, 11 Apr 2018 13:31:02 +0100
From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Cc: Hemant Agrawal <hemant.agrawal@nxp.com>,
 Shreyansh Jain <shreyansh.jain@nxp.com>, keith.wiles@intel.com,
 jianfeng.tan@intel.com, andras.kovacs@ericsson.com,
 laszlo.vadkeri@ericsson.com, benjamin.walker@intel.com,
 bruce.richardson@intel.com, thomas@monjalon.net,
 konstantin.ananyev@intel.com, kuralamudhan.ramakrishnan@intel.com,
 louise.m.daly@intel.com, nelio.laranjeiro@6wind.com,
 yskoh@mellanox.com, pepperjo@japf.ch, jerin.jacob@caviumnetworks.com,
 olivier.matz@6wind.com, gowrishankar.m@linux.vnet.ibm.com
Date: Wed, 11 Apr 2018 13:30:41 +0100
Message-Id: <ffc78a8531c05e19e8b0e0a222abbb3a3c2c852c.1523448978.git.anatoly.burakov@intel.com>
X-Mailer: git-send-email 1.7.0.7
In-Reply-To: <cover.1523448978.git.anatoly.burakov@intel.com>
References: <cover.1523448978.git.anatoly.burakov@intel.com>
In-Reply-To: <cover.1523448978.git.anatoly.burakov@intel.com>
References: <cover.1523296700.git.anatoly.burakov@intel.com>
 <cover.1523448978.git.anatoly.burakov@intel.com>
Subject: [dpdk-dev] [PATCH v6 66/70] bus/fslmc: enable support for mem event
	callbacks for vfio
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Wed, 11 Apr 2018 12:31:09 -0000

VFIO needs to map and unmap segments for DMA whenever they
become available or unavailable, so register a callback for
memory events, and provide map/unmap functions.

Remove unneeded check for number of segments, as in non-legacy
mode this now becomes a valid scenario.

Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Tested-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Tested-by: Hemant Agrawal <hemant.agrawal@nxp.com>
Tested-by: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
---
 drivers/bus/fslmc/fslmc_vfio.c | 153 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 135 insertions(+), 18 deletions(-)

diff --git a/drivers/bus/fslmc/fslmc_vfio.c b/drivers/bus/fslmc/fslmc_vfio.c
index db3eb61..625fa7c 100644
--- a/drivers/bus/fslmc/fslmc_vfio.c
+++ b/drivers/bus/fslmc/fslmc_vfio.c
@@ -30,6 +30,7 @@
 #include <rte_kvargs.h>
 #include <rte_dev.h>
 #include <rte_bus.h>
+#include <rte_eal_memconfig.h>
 
 #include "rte_fslmc.h"
 #include "fslmc_vfio.h"
@@ -188,11 +189,62 @@ static int vfio_map_irq_region(struct fslmc_vfio_group *group)
 	return -errno;
 }
 
+static int fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len);
+static int fslmc_unmap_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len);
+
+static void
+fslmc_memevent_cb(enum rte_mem_event type, const void *addr, size_t len)
+{
+	struct rte_memseg_list *msl;
+	struct rte_memseg *ms;
+	size_t cur_len = 0, map_len = 0;
+	uint64_t virt_addr;
+	rte_iova_t iova_addr;
+	int ret;
+
+	msl = rte_mem_virt2memseg_list(addr);
+
+	while (cur_len < len) {
+		const void *va = RTE_PTR_ADD(addr, cur_len);
+
+		ms = rte_mem_virt2memseg(va, msl);
+		iova_addr = ms->iova;
+		virt_addr = ms->addr_64;
+		map_len = ms->len;
+
+		DPAA2_BUS_DEBUG("Request for %s, va=%p, "
+				"virt_addr=0x%" PRIx64 ", "
+				"iova=0x%" PRIx64 ", map_len=%zu",
+				type == RTE_MEM_EVENT_ALLOC ?
+					"alloc" : "dealloc",
+				va, virt_addr, iova_addr, map_len);
+
+		if (type == RTE_MEM_EVENT_ALLOC)
+			ret = fslmc_map_dma(virt_addr, iova_addr, map_len);
+		else
+			ret = fslmc_unmap_dma(virt_addr, iova_addr, map_len);
+
+		if (ret != 0) {
+			DPAA2_BUS_ERR("DMA Mapping/Unmapping failed. "
+					"Map=%d, addr=%p, len=%zu, err:(%d)",
+					type, va, map_len, ret);
+			return;
+		}
+
+		cur_len += map_len;
+	}
+
+	if (type == RTE_MEM_EVENT_ALLOC)
+		DPAA2_BUS_DEBUG("Total Mapped: addr=%p, len=%zu",
+				addr, len);
+	else
+		DPAA2_BUS_DEBUG("Total Unmapped: addr=%p, len=%zu",
+				addr, len);
+}
+
 static int
-fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
-		const struct rte_memseg *ms, void *arg)
+fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr __rte_unused, size_t len)
 {
-	int *n_segs = arg;
 	struct fslmc_vfio_group *group;
 	struct vfio_iommu_type1_dma_map dma_map = {
 		.argsz = sizeof(struct vfio_iommu_type1_dma_map),
@@ -200,10 +252,11 @@ fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
 	};
 	int ret;
 
-	dma_map.size = ms->len;
-	dma_map.vaddr = ms->addr_64;
+	dma_map.size = len;
+	dma_map.vaddr = vaddr;
+
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-	dma_map.iova = ms->iova;
+	dma_map.iova = iovaddr;
 #else
 	dma_map.iova = dma_map.vaddr;
 #endif
@@ -216,32 +269,91 @@ fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
 		return -1;
 	}
 
-	DPAA2_BUS_DEBUG("-->Initial SHM Virtual ADDR %llX",
-			dma_map.vaddr);
-	DPAA2_BUS_DEBUG("-----> DMA size 0x%llX", dma_map.size);
-	ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA,
-			&dma_map);
+	DPAA2_BUS_DEBUG("--> Map address: %llX, size: 0x%llX",
+			dma_map.vaddr, dma_map.size);
+	ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA, &dma_map);
 	if (ret) {
 		DPAA2_BUS_ERR("VFIO_IOMMU_MAP_DMA API(errno = %d)",
 				errno);
 		return -1;
 	}
-	(*n_segs)++;
+
 	return 0;
 }
 
-int rte_fslmc_vfio_dmamap(void)
+static int
+fslmc_unmap_dma(uint64_t vaddr, uint64_t iovaddr __rte_unused, size_t len)
 {
-	int i = 0;
+	struct fslmc_vfio_group *group;
+	struct vfio_iommu_type1_dma_unmap dma_unmap = {
+		.argsz = sizeof(struct vfio_iommu_type1_dma_unmap),
+		.flags = 0,
+	};
+	int ret;
+
+	dma_unmap.size = len;
+	dma_unmap.iova = vaddr;
 
-	if (rte_memseg_walk(fslmc_vfio_map, &i) < 0)
+	/* SET DMA MAP for IOMMU */
+	group = &vfio_group;
+
+	if (!group->container) {
+		DPAA2_BUS_ERR("Container is not connected ");
 		return -1;
+	}
 
-	/* Verifying that at least single segment is available */
-	if (i <= 0) {
-		DPAA2_BUS_ERR("No Segments found for VFIO Mapping");
+	DPAA2_BUS_DEBUG("--> Unmap address: %llX, size: 0x%llX",
+			dma_unmap.iova, dma_unmap.size);
+	ret = ioctl(group->container->fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
+	if (ret) {
+		DPAA2_BUS_ERR("VFIO_IOMMU_UNMAP_DMA API(errno = %d)",
+				errno);
 		return -1;
 	}
+
+	return 0;
+}
+
+static int
+fslmc_dmamap_seg(const struct rte_memseg_list *msl __rte_unused,
+		 const struct rte_memseg *ms, void *arg)
+{
+	int *n_segs = arg;
+	int ret;
+
+	ret = fslmc_map_dma(ms->addr_64, ms->iova, ms->len);
+	if (ret)
+		DPAA2_BUS_ERR("Unable to VFIO map (addr=%p, len=%zu)",
+				ms->addr, ms->len);
+	else
+		(*n_segs)++;
+
+	return ret;
+}
+
+int rte_fslmc_vfio_dmamap(void)
+{
+	int i = 0, ret;
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock;
+
+	/* Lock before parsing and registering callback to memory subsystem */
+	rte_rwlock_read_lock(mem_lock);
+
+	if (rte_memseg_walk(fslmc_dmamap_seg, &i) < 0) {
+		rte_rwlock_read_unlock(mem_lock);
+		return -1;
+	}
+
+	ret = rte_mem_event_callback_register("fslmc_memevent_clb",
+					      fslmc_memevent_cb);
+	if (ret && rte_errno == ENOTSUP)
+		DPAA2_BUS_DEBUG("Memory event callbacks not supported");
+	else if (ret)
+		DPAA2_BUS_DEBUG("Unable to install memory handler");
+	else
+		DPAA2_BUS_DEBUG("Installed memory callback handler");
+
 	DPAA2_BUS_DEBUG("Total %d segments found.", i);
 
 	/* TODO - This is a W.A. as VFIO currently does not add the mapping of
@@ -250,6 +362,11 @@ int rte_fslmc_vfio_dmamap(void)
 	 */
 	vfio_map_irq_region(&vfio_group);
 
+	/* Existing segments have been mapped and memory callback for hotplug
+	 * has been installed.
+	 */
+	rte_rwlock_read_unlock(mem_lock);
+
 	return 0;
 }
 
-- 
2.7.4