DPDK patches and discussions
 help / color / mirror / Atom feed
From: Jianfeng Tan <jianfeng.tan@intel.com>
To: dev@dpdk.org
Cc: thomas.monjalon@6wind.com, david.marchand@6wind.com,
	ferruh.yigit@intel.com, stephen@networkplumber.org,
	jing.d.chen@intel.com, Jianfeng Tan <jianfeng.tan@intel.com>
Subject: [dpdk-dev] [PATCH] igb_uio: stop device when closing /dev/uioX
Date: Fri,  2 Dec 2016 16:45:46 +0000	[thread overview]
Message-ID: <1480697146-118038-1-git-send-email-jianfeng.tan@intel.com> (raw)
In-Reply-To: <1480696111-116651-1-git-send-email-jianfeng.tan@intel.com>

Depends-on: http://dpdk.org/dev/patchwork/patch/17493/

When a DPDK application grab a PCI device, device and driver work
together to Rx/Tx packets. If the DPDK app crashes or gets killed,
there's no chance for DPDK driver to stop the device, which means
rte_eth_dev_stop() will not be called.

This could result in problems. In virtio's case, the device (the
vhost backend), will keep working. If packets come, the vhost will
copy them into the vring, which is negotiated with the previous DPDK
app. But the resources, especially hugepages, are recycled by VM
kernel. What's worse, the memory could be allocated for other usage,
and re-written. This leads to an uncertain situation. Like this post
has reported, https://bugs.launchpad.net/qemu/+bug/1558175, QEMU's
vhost finds out wrong value, and exits the whole QEMU process.

To make it right, we need to restart (or reset) the device and make
the device go into the initial state, when uio-generic or igb_uio
recycles the PCI device. There's a chance in uio framework to disable
devices when /dev/uioX gets closed. Here we enable the pci device
in open() hook and disable it in release() hook.

However, if device is not enabled in probe() phase any more, we can
not register irq in probe() through uio_register_device(). To address
that, we invoke request_irq() to register callback directly.

Similar change needs to be done in uio-generic driver. And vfio-pci
seems to have done that already.

Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
---
 lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 171 +++++++++++++++++++-----------
 1 file changed, 108 insertions(+), 63 deletions(-)

diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
index e9d78fb..048390d 100644
--- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
@@ -157,8 +157,10 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
  * If yes, disable it here and will be enable later.
  */
 static irqreturn_t
-igbuio_pci_irqhandler(int irq, struct uio_info *info)
+igbuio_pci_irqhandler(int irq, void *dev_id)
 {
+	struct uio_device *idev = (struct uio_device *)dev_id;
+	struct uio_info *info = idev->info;
 	struct rte_uio_pci_dev *udev = info->priv;
 
 	/* Legacy mode need to mask in hardware */
@@ -166,6 +168,8 @@ igbuio_pci_irqhandler(int irq, struct uio_info *info)
 	    !pci_check_and_mask_intx(udev->pdev))
 		return IRQ_NONE;
 
+	uio_event_notify(info);
+
 	/* Message signal mode, no share IRQ and automasked */
 	return IRQ_HANDLED;
 }
@@ -216,20 +220,58 @@ igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma)
 }
 #endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
-static int __devinit
-#else
 static int
-#endif
-igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+igbuio_intr_init(struct uio_info *info)
 {
-	struct rte_uio_pci_dev *udev;
+	struct rte_uio_pci_dev *udev = info->priv;
+	struct pci_dev *dev = udev->pdev;
 	struct msix_entry msix_entry;
-	int err;
+	int ret = 0;
 
-	udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);
-	if (!udev)
-		return -ENOMEM;
+	switch (igbuio_intr_mode_preferred) {
+	case RTE_INTR_MODE_MSIX:
+		/* Only 1 msi-x vector needed */
+		msix_entry.entry = 0;
+		if (pci_enable_msix(dev, &msix_entry, 1) == 0) {
+			dev_dbg(&dev->dev, "using MSI-X");
+			info->irq = msix_entry.vector;
+			udev->mode = RTE_INTR_MODE_MSIX;
+			break;
+		}
+		/* fall back to INTX */
+	case RTE_INTR_MODE_LEGACY:
+		if (pci_intx_mask_supported(dev)) {
+			dev_dbg(&dev->dev, "using INTX");
+			info->irq_flags = IRQF_SHARED;
+			info->irq = dev->irq;
+			udev->mode = RTE_INTR_MODE_LEGACY;
+			break;
+		}
+		dev_notice(&dev->dev, "PCI INTX mask not supported\n");
+		ret = -EOPNOTSUPP;
+		/* fall back to no IRQ */
+	default:
+		udev->mode = RTE_INTR_MODE_NONE;
+		info->irq = 0;
+	}
+
+	if (info->irq) {
+		ret = request_irq(info->irq, igbuio_pci_irqhandler,
+				  info->irq_flags, info->name,
+				  info->uio_dev);
+		if (ret && udev->mode == RTE_INTR_MODE_MSIX)
+			pci_disable_msix(udev->pdev);
+	}
+
+	return ret;
+}
+
+static int
+igbuio_pci_open(struct uio_info *info, struct inode *inode)
+{
+	struct rte_uio_pci_dev *udev = info->priv;
+	struct pci_dev *dev = udev->pdev;
+	int err;
 
 	/*
 	 * enable device: ask low-level code to enable I/O and
@@ -238,30 +280,77 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	err = pci_enable_device(dev);
 	if (err != 0) {
 		dev_err(&dev->dev, "Cannot enable PCI device\n");
-		goto fail_free;
+		return err;
 	}
 
 	/* enable bus mastering on the device */
 	pci_set_master(dev);
 
 	/* set 64-bit DMA mask */
-	err = pci_set_dma_mask(dev,  DMA_BIT_MASK(64));
-	if (err != 0) {
+	err = pci_set_dma_mask(dev, DMA_BIT_MASK(64));
+	if (err) {
 		dev_err(&dev->dev, "Cannot set DMA mask\n");
-		goto fail_disable_dev;
+		goto error;
 	}
 
 	err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64));
-	if (err != 0) {
+	if (err) {
 		dev_err(&dev->dev, "Cannot set consistent DMA mask\n");
-		goto fail_disable_dev;
+		goto error;
+	}
+
+	err = igbuio_intr_init(info);
+	if (err) {
+		dev_err(&dev->dev, "Enable interrupt fails\n");
+		goto error;
+	} else {
+		dev_info(&dev->dev, "uio device registered with irq %lx\n",
+			 udev->info.irq);
+	}
+
+	return 0;
+error:
+	pci_disable_device(dev);
+	return err;
+}
+
+static int
+igbuio_pci_release(struct uio_info *info, struct inode *inode)
+{
+	struct rte_uio_pci_dev *udev = info->priv;
+	struct pci_dev *dev = udev->pdev;
+
+	dev_info(&udev->pdev->dev, "will be disable\n");
+	if (info->irq) {
+		free_irq(info->irq, info->uio_dev);
+		info->irq = 0;
 	}
+	if (udev->mode == RTE_INTR_MODE_MSIX)
+		pci_disable_msix(dev);
+	pci_disable_device(udev->pdev);
+	return 0;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+static int __devinit
+#else
+static int
+#endif
+igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	struct rte_uio_pci_dev *udev;
+	int err;
+
+	udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);
+	if (!udev)
+		return -ENOMEM;
 
 	/* fill uio infos */
 	udev->info.name = "igb_uio";
 	udev->info.version = "0.1";
-	udev->info.handler = igbuio_pci_irqhandler;
 	udev->info.irqcontrol = igbuio_pci_irqcontrol;
+	udev->info.release = igbuio_pci_release;
+	udev->info.open = igbuio_pci_open;
 #ifdef CONFIG_XEN_DOM0
 	/* check if the driver run on Xen Dom0 */
 	if (xen_initial_domain())
@@ -270,42 +359,9 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	udev->info.priv = udev;
 	udev->pdev = dev;
 
-	switch (igbuio_intr_mode_preferred) {
-	case RTE_INTR_MODE_MSIX:
-		/* Only 1 msi-x vector needed */
-		msix_entry.entry = 0;
-		if (pci_enable_msix(dev, &msix_entry, 1) == 0) {
-			dev_dbg(&dev->dev, "using MSI-X");
-			udev->info.irq = msix_entry.vector;
-			udev->mode = RTE_INTR_MODE_MSIX;
-			break;
-		}
-		/* fall back to INTX */
-	case RTE_INTR_MODE_LEGACY:
-		if (pci_intx_mask_supported(dev)) {
-			dev_dbg(&dev->dev, "using INTX");
-			udev->info.irq_flags = IRQF_SHARED;
-			udev->info.irq = dev->irq;
-			udev->mode = RTE_INTR_MODE_LEGACY;
-			break;
-		}
-		dev_notice(&dev->dev, "PCI INTX mask not supported\n");
-		/* fall back to no IRQ */
-	case RTE_INTR_MODE_NONE:
-		udev->mode = RTE_INTR_MODE_NONE;
-		udev->info.irq = 0;
-		break;
-
-	default:
-		dev_err(&dev->dev, "invalid IRQ mode %u",
-			igbuio_intr_mode_preferred);
-		err = -EINVAL;
-		goto fail_disable_dev;
-	}
-
 	err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
 	if (err != 0)
-		goto fail_disable_irq;
+		goto fail_free;
 
 	/* register uio driver */
 	err = uio_register_device(&dev->dev, &udev->info);
@@ -314,18 +370,10 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 
 	pci_set_drvdata(dev, udev);
 
-	dev_info(&dev->dev, "uio device registered with irq %lx\n",
-		 udev->info.irq);
-
 	return 0;
 
 fail_remove_group:
 	sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
-fail_disable_irq:
-	if (udev->mode == RTE_INTR_MODE_MSIX)
-		pci_disable_msix(udev->pdev);
-fail_disable_dev:
-	pci_disable_device(dev);
 fail_free:
 	kfree(udev);
 
@@ -339,9 +387,6 @@ igbuio_pci_remove(struct pci_dev *dev)
 
 	sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
 	uio_unregister_device(&udev->info);
-	if (udev->mode == RTE_INTR_MODE_MSIX)
-		pci_disable_msix(dev);
-	pci_disable_device(dev);
 	pci_set_drvdata(dev, NULL);
 	kfree(udev);
 }
-- 
2.7.4

  reply	other threads:[~2016-12-02 16:45 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-01  2:16 [dpdk-dev] [RFC] igb_uio: deprecate iomem and ioport mapping Jianfeng Tan
2016-09-02 12:31 ` Ferruh Yigit
2016-09-02 12:59   ` Tan, Jianfeng
2016-09-09  9:06 ` David Marchand
2016-09-09  9:31   ` Tan, Jianfeng
2016-09-22  5:44 ` [dpdk-dev] [PATCH] doc: remove iomem and ioport handling in igb_uio Jianfeng Tan
2016-09-30 10:13   ` Ferruh Yigit
2016-11-11  2:12   ` Remy Horton
2016-11-13  8:55     ` Thomas Monjalon
2016-12-02 16:28 ` [dpdk-dev] [PATCH] igb_uio: deprecate iomem and ioport mapping Jianfeng Tan
2016-12-02 16:45   ` Jianfeng Tan [this message]
2017-03-30 20:22     ` [dpdk-dev] [PATCH] igb_uio: stop device when closing /dev/uioX Thomas Monjalon
2017-03-31 14:16       ` Ferruh Yigit
2016-12-02 23:47 ` [dpdk-dev] [RFC] igb_uio: deprecate iomem and ioport mapping Stephen Hemminger
2016-12-05  7:04   ` Tan, Jianfeng
2017-01-05 15:23     ` Ferruh Yigit
2017-01-06  1:52       ` Tan, Jianfeng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1480697146-118038-1-git-send-email-jianfeng.tan@intel.com \
    --to=jianfeng.tan@intel.com \
    --cc=david.marchand@6wind.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    --cc=jing.d.chen@intel.com \
    --cc=stephen@networkplumber.org \
    --cc=thomas.monjalon@6wind.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).