From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga12.intel.com (mga12.intel.com [192.55.52.136]) by dpdk.org (Postfix) with ESMTP id 04F0629D6 for ; Thu, 3 May 2018 05:05:33 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by fmsmga106.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 02 May 2018 20:05:33 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.49,356,1520924400"; d="scan'208";a="51969580" Received: from jguo15x-mobl3.ccr.corp.intel.com (HELO [10.67.68.67]) ([10.67.68.67]) by fmsmga001.fm.intel.com with ESMTP; 02 May 2018 20:05:30 -0700 To: "Ananyev, Konstantin" , "stephen@networkplumber.org" , "Richardson, Bruce" , "Yigit, Ferruh" , "gaetan.rivet@6wind.com" , "Wu, Jingjing" , "thomas@monjalon.net" , "motih@mellanox.com" , "matan@mellanox.com" , "Van Haaren, Harry" , "Tan, Jianfeng" References: <1498711073-42917-1-git-send-email-jia.guo@intel.com> <1524058689-4954-1-git-send-email-jia.guo@intel.com> <1524058689-4954-2-git-send-email-jia.guo@intel.com> <2601191342CEEE43887BDE71AB977258AE918A3B@IRSMSX102.ger.corp.intel.com> Cc: "jblunck@infradead.org" , "shreyansh.jain@nxp.com" , "dev@dpdk.org" , "Zhang, Helin" From: "Guo, Jia" Message-ID: <00555e8a-3490-23c6-c9b3-11df83c433cc@intel.com> Date: Thu, 3 May 2018 11:05:29 +0800 User-Agent: Mozilla/5.0 (Windows NT 6.3; WOW64; rv:45.0) Gecko/20100101 Thunderbird/45.4.0 MIME-Version: 1.0 In-Reply-To: <2601191342CEEE43887BDE71AB977258AE918A3B@IRSMSX102.ger.corp.intel.com> Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [dpdk-dev] [PATCH V20 1/4] bus/pci: introduce device hot unplug handle X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 03 May 2018 03:05:34 -0000 On 4/20/2018 6:32 PM, Ananyev, Konstantin wrote: > Hi Jeff, > >> As of device hot unplug, we need some preparatory measures so that we will >> not encounter memory fault after device be plug out of the system, >> and also let we could recover the running data path but not been break. >> This patch allows the buses to handle device hot unplug event. >> The patch only enable the ops in pci bus, when handle device hot unplug >> event, remap a dummy memory to avoid bus read/write error. >> Other buses could accordingly implement this ops specific by themselves. >> >> Signed-off-by: Jeff Guo >> --- >> v20->19: >> clean the code >> --- >> drivers/bus/pci/pci_common.c | 67 +++++++++++++++++++++++++++++++++ >> drivers/bus/pci/pci_common_uio.c | 32 ++++++++++++++++ >> drivers/bus/pci/private.h | 12 ++++++ >> lib/librte_eal/common/include/rte_bus.h | 16 ++++++++ >> 4 files changed, 127 insertions(+) >> >> diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c >> index 2a00f36..709eaf3 100644 >> --- a/drivers/bus/pci/pci_common.c >> +++ b/drivers/bus/pci/pci_common.c >> @@ -474,6 +474,72 @@ pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, >> } >> >> static int >> +pci_handle_hot_unplug(struct rte_device *dev, void *failure_addr) >> +{ >> + struct rte_pci_device *pdev = NULL; >> + int ret = 0, i, isfound = 0; >> + >> + if (failure_addr != NULL) { >> + FOREACH_DEVICE_ON_PCIBUS(pdev) { >> + for (i = 0; i != sizeof(pdev->mem_resource) / >> + sizeof(pdev->mem_resource[0]); i++) { > You can do i != RTE_DIM(pdev->mem_resource) here. sure. >> + if ((uint64_t)failure_addr >= >> + (uint64_t)pdev->mem_resource[i].addr && >> + (uint64_t)failure_addr <= >> + (uint64_t)pdev->mem_resource[i].addr + >> + pdev->mem_resource[i].len) { > > I think it should be failure_addr < addr + len i think you are right. >> + RTE_LOG(ERR, EAL, "Failure address " >> + "%16.16"PRIx64" is belong to " >> + "resource of device %s!\n", >> + (uint64_t)failure_addr, >> + pdev->device.name); >> + isfound = 1; >> + break; >> + } >> + } >> + if (isfound) >> + break; > > Might be it is a good thing to put the code that searches for address into a separate function. good idea. >> + } >> + } else if (dev != NULL) { >> + pdev = RTE_DEV_TO_PCI(dev); >> + } else { >> + return -EINVAL; >> + } >> + >> + if (!pdev) >> + return -1; >> + >> + /* remap resources for devices */ >> + switch (pdev->kdrv) { >> + case RTE_KDRV_VFIO: >> +#ifdef VFIO_PRESENT >> + /* TODO */ >> +#endif > Should set ret =-1 as not implemented now. ok. >> + break; >> + case RTE_KDRV_IGB_UIO: >> + case RTE_KDRV_UIO_GENERIC: >> + if (rte_eal_using_phys_addrs()) { >> + /* map resources for devices that use uio */ >> + ret = pci_uio_remap_resource(pdev); >> + } >> + break; >> + case RTE_KDRV_NIC_UIO: >> + ret = pci_uio_remap_resource(pdev); >> + break; >> + default: >> + RTE_LOG(DEBUG, EAL, >> + " Not managed by a supported kernel driver, skipped\n"); >> + ret = -1; >> + break; >> + } >> + >> + if (ret != 0) >> + RTE_LOG(ERR, EAL, "failed to handle hot unplug of %s", >> + pdev->name); >> + return ret; >> +} >> + >> +static int >> pci_plug(struct rte_device *dev) >> { >> return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev)); >> @@ -503,6 +569,7 @@ struct rte_pci_bus rte_pci_bus = { >> .unplug = pci_unplug, >> .parse = pci_parse, >> .get_iommu_class = rte_pci_get_iommu_class, >> + .handle_hot_unplug = pci_handle_hot_unplug, >> }, >> .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list), >> .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list), >> diff --git a/drivers/bus/pci/pci_common_uio.c b/drivers/bus/pci/pci_common_uio.c >> index 54bc20b..ba2c458 100644 >> --- a/drivers/bus/pci/pci_common_uio.c >> +++ b/drivers/bus/pci/pci_common_uio.c >> @@ -146,6 +146,38 @@ pci_uio_unmap(struct mapped_pci_resource *uio_res) >> } >> } >> >> +/* remap the PCI resource of a PCI device in anonymous virtual memory */ >> +int >> +pci_uio_remap_resource(struct rte_pci_device *dev) >> +{ >> + int i; >> + void *map_address; >> + >> + if (dev == NULL) >> + return -1; >> + >> + /* Remap all BARs */ >> + for (i = 0; i != PCI_MAX_RESOURCE; i++) { >> + /* skip empty BAR */ >> + if (dev->mem_resource[i].phys_addr == 0) >> + continue; >> + pci_unmap_resource(dev->mem_resource[i].addr, >> + (size_t)dev->mem_resource[i].len); >> + map_address = pci_map_resource( >> + dev->mem_resource[i].addr, -1, 0, >> + (size_t)dev->mem_resource[i].len, >> + MAP_ANONYMOUS | MAP_FIXED); > Instead of using mumap/mmap() can we use mremap() here? > Might be a bit safer approach. because of mremap not have the can not map an anonymous memory, so that is not fit for this case, and i check and found that MAP_FIXED could overlap the part of the existing mapping, no need to use unmap at first before remap. >> + if (map_address == MAP_FAILED) { >> + RTE_LOG(ERR, EAL, >> + "Cannot remap resource for device %s\n", >> + dev->name); >> + return -1; >> + } >> + } >> + >> + return 0; >> +} >> + >> static struct mapped_pci_resource * >> pci_uio_find_resource(struct rte_pci_device *dev) >> { >> diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h >> index 88fa587..cc1668c 100644 >> --- a/drivers/bus/pci/private.h >> +++ b/drivers/bus/pci/private.h >> @@ -173,6 +173,18 @@ void pci_uio_free_resource(struct rte_pci_device *dev, >> struct mapped_pci_resource *uio_res); >> >> /** >> + * remap the pci uio resource. >> + * >> + * @param dev >> + * Point to the struct rte pci device. >> + * @return >> + * - On success, zero. >> + * - On failure, a negative value. >> + */ >> +int >> +pci_uio_remap_resource(struct rte_pci_device *dev); >> + >> +/** >> * Map device memory to uio resource >> * >> * This function is private to EAL. >> diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h >> index 6fb0834..d2c5778 100644 >> --- a/lib/librte_eal/common/include/rte_bus.h >> +++ b/lib/librte_eal/common/include/rte_bus.h >> @@ -168,6 +168,20 @@ typedef int (*rte_bus_unplug_t)(struct rte_device *dev); >> typedef int (*rte_bus_parse_t)(const char *name, void *addr); >> >> /** >> + * Implementation specific hot unplug handler function which is responsible >> + * for handle the failure when hot unplug the device, guaranty the system >> + * would not crash in the case. >> + * @param dev >> + * Pointer of the device structure. >> + * >> + * @return >> + * 0 on success. >> + * !0 on error. >> + */ >> +typedef int (*rte_bus_handle_hot_unplug_t)(struct rte_device *dev, >> + void *dev_addr); >> + >> +/** >> * Bus scan policies >> */ >> enum rte_bus_scan_mode { >> @@ -209,6 +223,8 @@ struct rte_bus { >> rte_bus_plug_t plug; /**< Probe single device for drivers */ >> rte_bus_unplug_t unplug; /**< Remove single device from driver */ >> rte_bus_parse_t parse; /**< Parse a device name */ >> + rte_bus_handle_hot_unplug_t handle_hot_unplug; /**< handle hot unplug >> + device event */ >> struct rte_bus_conf conf; /**< Bus configuration */ >> rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */ >> }; >> -- >> 2.7.4