* [dpdk-dev] [PATCH v2] pci: support both PIO and MMIO BAR for legacy virtio on x86
@ 2020-09-30 14:59 谢华伟(此时此刻)
  2020-10-01 10:22 ` Burakov, Anatoly
                   ` (3 more replies)
  0 siblings, 4 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-09-30 14:59 UTC (permalink / raw)
  To: dev, ferruh.yigit
  Cc: Maxime Coquelin, Anatoly Burakov, David Marchand, Wang, Zhihong,
	Xia, Chenbo, Gaetan Rivet, 杨航(行宪)
 From c13f981e287254cd0877cc7b98ee2dd7b80c3b69 Mon Sep 17 00:00:00 2001
From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
Date: Wed, 30 Sep 2020 22:37:03 +0800
Subject: [PATCH v2] pci:  support both PIO and MMIO BAR for legacy virtio on
  x86
Legacy virtio-pci only supports PIO BAR resource. As we need to create 
lots of
virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We 
handles
different type of BAR in the similar way.
In previous implementation, with igb_uio we get PIO address from igb_uio
sysfs entry; with uio_pci_generic, we get PIO address from
/proc/ioports.
For PIO/MMIO RW, there is different path for different drivers and arch.
For VFIO, PIO/MMIO RW is through syscall, which has big performance
issue.
On X86, it assumes only PIO is supported.
All of the above is too much twisted.
This patch unifies the way to get both PIO and MMIO address for 
different driver
and arch, all from standard resource attr under pci sysfs.
We distinguish PIO and MMIO by their address like how kernel does. It is 
ugly but works.
Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
---
  drivers/bus/pci/linux/pci.c     |  89 +--------------------
  drivers/bus/pci/linux/pci_uio.c | 166 
+++++++++++++++++++++++++++-------------
  2 files changed, 118 insertions(+), 137 deletions(-)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index bf27594..885e54e 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -687,71 +687,6 @@ int rte_pci_write_config(const struct 
rte_pci_device *device,
      }
  }
-#if defined(RTE_ARCH_X86)
-static int
-pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
-        struct rte_pci_ioport *p)
-{
-    uint16_t start, end;
-    FILE *fp;
-    char *line = NULL;
-    char pci_id[16];
-    int found = 0;
-    size_t linesz;
-
-    if (rte_eal_iopl_init() != 0) {
-        RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for 
PCI device %s\n",
-            __func__, dev->name);
-        return -1;
-    }
-
-    snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
-         dev->addr.domain, dev->addr.bus,
-         dev->addr.devid, dev->addr.function);
-
-    fp = fopen("/proc/ioports", "r");
-    if (fp == NULL) {
-        RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
-        return -1;
-    }
-
-    while (getdelim(&line, &linesz, '\n', fp) > 0) {
-        char *ptr = line;
-        char *left;
-        int n;
-
-        n = strcspn(ptr, ":");
-        ptr[n] = 0;
-        left = &ptr[n + 1];
-
-        while (*left && isspace(*left))
-            left++;
-
-        if (!strncmp(left, pci_id, strlen(pci_id))) {
-            found = 1;
-
-            while (*ptr && isspace(*ptr))
-                ptr++;
-
-            sscanf(ptr, "%04hx-%04hx", &start, &end);
-
-            break;
-        }
-    }
-
-    free(line);
-    fclose(fp);
-
-    if (!found)
-        return -1;
-
-    p->base = start;
-    RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
-
-    return 0;
-}
-#endif
-
  int
  rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
          struct rte_pci_ioport *p)
@@ -762,18 +697,12 @@ int rte_pci_write_config(const struct 
rte_pci_device *device,
  #ifdef VFIO_PRESENT
      case RTE_PCI_KDRV_VFIO:
          if (pci_vfio_is_enabled())
-            ret = pci_vfio_ioport_map(dev, bar, p);
+            ret = pci_uio_ioport_map(dev, bar, p);
          break;
  #endif
      case RTE_PCI_KDRV_IGB_UIO:
-        ret = pci_uio_ioport_map(dev, bar, p);
-        break;
      case RTE_PCI_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
-        ret = pci_ioport_map(dev, bar, p);
-#else
          ret = pci_uio_ioport_map(dev, bar, p);
-#endif
          break;
      default:
          break;
@@ -792,12 +721,10 @@ int rte_pci_write_config(const struct 
rte_pci_device *device,
      switch (p->dev->kdrv) {
  #ifdef VFIO_PRESENT
      case RTE_PCI_KDRV_VFIO:
-        pci_vfio_ioport_read(p, data, len, offset);
+        pci_uio_ioport_read(p, data, len, offset);
          break;
  #endif
      case RTE_PCI_KDRV_IGB_UIO:
-        pci_uio_ioport_read(p, data, len, offset);
-        break;
      case RTE_PCI_KDRV_UIO_GENERIC:
          pci_uio_ioport_read(p, data, len, offset);
          break;
@@ -813,12 +740,10 @@ int rte_pci_write_config(const struct 
rte_pci_device *device,
      switch (p->dev->kdrv) {
  #ifdef VFIO_PRESENT
      case RTE_PCI_KDRV_VFIO:
-        pci_vfio_ioport_write(p, data, len, offset);
+        pci_uio_ioport_write(p, data, len, offset);
          break;
  #endif
      case RTE_PCI_KDRV_IGB_UIO:
-        pci_uio_ioport_write(p, data, len, offset);
-        break;
      case RTE_PCI_KDRV_UIO_GENERIC:
          pci_uio_ioport_write(p, data, len, offset);
          break;
@@ -836,18 +761,12 @@ int rte_pci_write_config(const struct 
rte_pci_device *device,
  #ifdef VFIO_PRESENT
      case RTE_PCI_KDRV_VFIO:
          if (pci_vfio_is_enabled())
-            ret = pci_vfio_ioport_unmap(p);
+            ret = pci_uio_ioport_unmap(p);
          break;
  #endif
      case RTE_PCI_KDRV_IGB_UIO:
-        ret = pci_uio_ioport_unmap(p);
-        break;
      case RTE_PCI_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
-        ret = 0;
-#else
          ret = pci_uio_ioport_unmap(p);
-#endif
          break;
      default:
          break;
diff --git a/drivers/bus/pci/linux/pci_uio.c 
b/drivers/bus/pci/linux/pci_uio.c
index f3305a2..cf49e8f 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -373,52 +373,83 @@
  pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
             struct rte_pci_ioport *p)
  {
+    FILE *f = NULL;
      char dirname[PATH_MAX];
      char filename[PATH_MAX];
+    char buf[BUFSIZ];
+    uint64_t phys_addr, end_addr, flags;
      int uio_num;
-    unsigned long start;
+    unsigned long base;
+    bool iobar;
+    int i;
-    if (rte_eal_iopl_init() != 0) {
-        RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for 
PCI device %s\n",
-            __func__, dev->name);
+    /* open and read addresses of the corresponding resource in sysfs */
+    snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
+        rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
+        dev->addr.devid, dev->addr.function);
+    f = fopen(filename, "r");
+    if (f == NULL) {
+        RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
+            strerror(errno));
          return -1;
      }
-    uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
-    if (uio_num < 0)
-        return -1;
+    for (i = 0; i < bar + 1; i++) {
+        if (fgets(buf, sizeof(buf), f) == NULL) {
+            RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
+            goto error;
+        }
+    }
+    if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
+        &end_addr, &flags) < 0)
+        goto error;
-    /* get portio start */
-    snprintf(filename, sizeof(filename),
-         "%s/portio/port%d/start", dirname, bar);
-    if (eal_parse_sysfs_value(filename, &start) < 0) {
-        RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
-            __func__);
-        return -1;
+    if (flags & IORESOURCE_IO) {
+        iobar = 1;
+        base = (unsigned long)phys_addr;
+        RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, 
base);
+    } else if (flags & IORESOURCE_MEM) {
+        iobar = 0;
+        base = (unsigned long)dev->mem_resource[bar].addr;
+        RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n", __func__, 
base);
+    } else {
+        RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
+        goto error;
+    }
+
+    if (iobar && rte_eal_iopl_init() != 0) {
+        RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for 
PCI device %s\n",
+            __func__, dev->name);
+        goto error;
      }
-    /* ensure we don't get anything funny here, read/write will cast to
-     * uin16_t */
-    if (start > UINT16_MAX)
-        return -1;
      /* FIXME only for primary process ? */
-    if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
+    if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN &&
+        dev->kdrv == RTE_KDRV_UIO_GENERIC) {
+        uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
+        if (uio_num < 0)
+            goto error;
          snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
          dev->intr_handle.fd = open(filename, O_RDWR);
          if (dev->intr_handle.fd < 0) {
              RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
                  filename, strerror(errno));
-            return -1;
+            goto error;
          }
          dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
      }
-    RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
+    RTE_LOG(DEBUG, EAL, "PCI IO port found start=0x%lx\n", base);
-    p->base = start;
+    p->base = base;
      p->len = 0;
+    fclose(f);
      return 0;
+error:
+    if (f)
+        fclose(f);
+    return -1;
  }
  #else
  int
@@ -489,6 +520,61 @@
  }
  #endif
+#define PIO_MAX 0x10000
+static inline uint8_t ioread8(void *addr)
+{
+    uint8_t val;
+
+    val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+        *(volatile uint8_t *)addr :
+        inb((unsigned long)addr);
+
+    return val;
+}
+
+static inline uint16_t ioread16(void *addr)
+{
+    uint16_t val;
+
+    val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+        *(volatile uint16_t *)addr :
+        inw((unsigned long)addr);
+
+    return val;
+}
+
+static inline uint32_t ioread32(void *addr)
+{
+    uint32_t val;
+
+    val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+        *(volatile uint32_t *)addr :
+    inl((unsigned long)addr);
+
+    return val;
+}
+
+static inline void iowrite8(uint8_t val, void *addr)
+{
+    (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+        *(volatile uint8_t *)addr = val :
+        outb(val, (unsigned long)addr);
+}
+
+static inline void iowrite16(uint16_t val, void *addr)
+{
+    (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+        *(volatile uint16_t *)addr = val :
+        outw(val, (unsigned long)addr);
+}
+
+static inline void iowrite32(uint32_t val, void *addr)
+{
+    (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+        *(volatile uint32_t *)addr = val :
+        outl(val, (unsigned long)addr);
+}
+
  void
  pci_uio_ioport_read(struct rte_pci_ioport *p,
              void *data, size_t len, off_t offset)
@@ -500,25 +586,13 @@
      for (d = data; len > 0; d += size, reg += size, len -= size) {
          if (len >= 4) {
              size = 4;
-#if defined(RTE_ARCH_X86)
-            *(uint32_t *)d = inl(reg);
-#else
-            *(uint32_t *)d = *(volatile uint32_t *)reg;
-#endif
+            *(uint32_t *)d = ioread32((void *)reg);
          } else if (len >= 2) {
              size = 2;
-#if defined(RTE_ARCH_X86)
-            *(uint16_t *)d = inw(reg);
-#else
-            *(uint16_t *)d = *(volatile uint16_t *)reg;
-#endif
+            *(uint16_t *)d = ioread16((void *)reg);
          } else {
              size = 1;
-#if defined(RTE_ARCH_X86)
-            *d = inb(reg);
-#else
-            *d = *(volatile uint8_t *)reg;
-#endif
+            *d = ioread8((void *)reg);
          }
      }
  }
@@ -534,25 +608,13 @@
      for (s = data; len > 0; s += size, reg += size, len -= size) {
          if (len >= 4) {
              size = 4;
-#if defined(RTE_ARCH_X86)
-            outl_p(*(const uint32_t *)s, reg);
-#else
-            *(volatile uint32_t *)reg = *(const uint32_t *)s;
-#endif
+            iowrite32(*(const uint32_t *)s, (void *)reg);
          } else if (len >= 2) {
              size = 2;
-#if defined(RTE_ARCH_X86)
-            outw_p(*(const uint16_t *)s, reg);
-#else
-            *(volatile uint16_t *)reg = *(const uint16_t *)s;
-#endif
+            iowrite16(*(const uint16_t *)s, (void *)reg);
          } else {
              size = 1;
-#if defined(RTE_ARCH_X86)
-            outb_p(*s, reg);
-#else
-            *(volatile uint8_t *)reg = *s;
-#endif
+            iowrite8(*s, (void *)reg);
          }
      }
  }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v2] pci: support both PIO and MMIO BAR for legacy virtio on x86
  2020-09-30 14:59 [dpdk-dev] [PATCH v2] pci: support both PIO and MMIO BAR for legacy virtio on x86 谢华伟(此时此刻)
@ 2020-10-01 10:22 ` Burakov, Anatoly
  2020-10-02  5:44   ` 谢华伟(此时此刻)
  2020-10-09  8:36 ` [dpdk-dev] [PATCH v3] " 谢华伟(此时此刻)
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 58+ messages in thread
From: Burakov, Anatoly @ 2020-10-01 10:22 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	dev, ferruh.yigit
  Cc: Maxime Coquelin, David Marchand, Wang, Zhihong, Xia, Chenbo,
	Gaetan Rivet, 杨航(行宪)
On 30-Sep-20 3:59 PM, 谢华伟(此时此刻) wrote:
>  From c13f981e287254cd0877cc7b98ee2dd7b80c3b69 Mon Sep 17 00:00:00 2001
> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
> Date: Wed, 30 Sep 2020 22:37:03 +0800
> Subject: [PATCH v2] pci:  support both PIO and MMIO BAR for legacy 
> virtio on
>   x86
> 
> Legacy virtio-pci only supports PIO BAR resource. As we need to create 
> lots of
> virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
> 
> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We 
> handles
> different type of BAR in the similar way.
> 
> In previous implementation, with igb_uio we get PIO address from igb_uio
> sysfs entry; with uio_pci_generic, we get PIO address from
> /proc/ioports.
> For PIO/MMIO RW, there is different path for different drivers and arch.
> For VFIO, PIO/MMIO RW is through syscall, which has big performance
> issue.
> On X86, it assumes only PIO is supported.
> 
> All of the above is too much twisted.
> This patch unifies the way to get both PIO and MMIO address for 
> different driver
> and arch, all from standard resource attr under pci sysfs.
> 
> We distinguish PIO and MMIO by their address like how kernel does. It is 
> ugly but works.
> 
> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
> ---
You patches are somehow malformed (at least according to my inline diff 
viewer). Are you using git-send-email to send patches?
-- 
Thanks,
Anatoly
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v2] pci: support both PIO and MMIO BAR for legacy virtio on x86
  2020-10-01 10:22 ` Burakov, Anatoly
@ 2020-10-02  5:44   ` 谢华伟(此时此刻)
  0 siblings, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-02  5:44 UTC (permalink / raw)
  To: Burakov, Anatoly, dev, ferruh.yigit
  Cc: Maxime Coquelin, David Marchand, Wang, Zhihong, Xia, Chenbo,
	Gaetan Rivet, 杨航(行宪)
[-- Attachment #1: Type: text/plain, Size: 1756 bytes --]
On 2020/10/1 18:22, Burakov, Anatoly wrote:
> On 30-Sep-20 3:59 PM, 谢华伟(此时此刻) wrote:
>>  From c13f981e287254cd0877cc7b98ee2dd7b80c3b69 Mon Sep 17 00:00:00 2001
>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>> Date: Wed, 30 Sep 2020 22:37:03 +0800
>> Subject: [PATCH v2] pci:  support both PIO and MMIO BAR for legacy 
>> virtio on
>>   x86
>>
>> Legacy virtio-pci only supports PIO BAR resource. As we need to 
>> create lots of
>> virtio devices and PIO resource on x86 is very limited, we expose 
>> MMIO BAR.
>>
>> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. 
>> We handles
>> different type of BAR in the similar way.
>>
>> In previous implementation, with igb_uio we get PIO address from igb_uio
>> sysfs entry; with uio_pci_generic, we get PIO address from
>> /proc/ioports.
>> For PIO/MMIO RW, there is different path for different drivers and arch.
>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>> issue.
>> On X86, it assumes only PIO is supported.
>>
>> All of the above is too much twisted.
>> This patch unifies the way to get both PIO and MMIO address for 
>> different driver
>> and arch, all from standard resource attr under pci sysfs.
>>
>> We distinguish PIO and MMIO by their address like how kernel does. It 
>> is ugly but works.
>>
>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>> ---
>
> You patches are somehow malformed (at least according to my inline 
> diff viewer). Are you using git-send-email to send patches?
>
Very sorry for the format issue. I still failed configuring send-email 
to work, and Thunderbird had issues with plain text settings.
Zhihong, Chenbo:
I attach this patch. Could you help send this patch for me?
Thanks!
[-- Attachment #2: v2.patch --]
[-- Type: text/plain, Size: 11178 bytes --]
From c13f981e287254cd0877cc7b98ee2dd7b80c3b69 Mon Sep 17 00:00:00 2001
From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
Date: Wed, 30 Sep 2020 22:37:03 +0800
Subject: [PATCH v2] pci:  support both PIO and MMIO BAR for legacy virtio on x86
Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We handles
different type of BAR in the similar way.
In previous implementation, with igb_uio we get PIO address from igb_uio
sysfs entry; with uio_pci_generic, we get PIO address from
/proc/ioports.
For PIO/MMIO RW, there is different path for different drivers and arch.
For VFIO, PIO/MMIO RW is through syscall, which has big performance
issue.
On X86, it assumes only PIO is supported.
All of the above is too much twisted.
This patch unifies the way to get both PIO and MMIO address for different driver
and arch, all from standard resource attr under pci sysfs.
We distinguish PIO and MMIO by their address like how kernel does. It is ugly but works.
Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
---
 drivers/bus/pci/linux/pci.c     |  89 +--------------------
 drivers/bus/pci/linux/pci_uio.c | 166 +++++++++++++++++++++++++++-------------
 2 files changed, 118 insertions(+), 137 deletions(-)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index bf27594..885e54e 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -687,71 +687,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 	}
 }
 
-#if defined(RTE_ARCH_X86)
-static int
-pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
-		struct rte_pci_ioport *p)
-{
-	uint16_t start, end;
-	FILE *fp;
-	char *line = NULL;
-	char pci_id[16];
-	int found = 0;
-	size_t linesz;
-
-	if (rte_eal_iopl_init() != 0) {
-		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
-			__func__, dev->name);
-		return -1;
-	}
-
-	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
-		 dev->addr.domain, dev->addr.bus,
-		 dev->addr.devid, dev->addr.function);
-
-	fp = fopen("/proc/ioports", "r");
-	if (fp == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
-		return -1;
-	}
-
-	while (getdelim(&line, &linesz, '\n', fp) > 0) {
-		char *ptr = line;
-		char *left;
-		int n;
-
-		n = strcspn(ptr, ":");
-		ptr[n] = 0;
-		left = &ptr[n + 1];
-
-		while (*left && isspace(*left))
-			left++;
-
-		if (!strncmp(left, pci_id, strlen(pci_id))) {
-			found = 1;
-
-			while (*ptr && isspace(*ptr))
-				ptr++;
-
-			sscanf(ptr, "%04hx-%04hx", &start, &end);
-
-			break;
-		}
-	}
-
-	free(line);
-	fclose(fp);
-
-	if (!found)
-		return -1;
-
-	p->base = start;
-	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
-
-	return 0;
-}
-#endif
-
 int
 rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
 		struct rte_pci_ioport *p)
@@ -762,18 +697,12 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
 		if (pci_vfio_is_enabled())
-			ret = pci_vfio_ioport_map(dev, bar, p);
+			ret = pci_uio_ioport_map(dev, bar, p);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		ret = pci_uio_ioport_map(dev, bar, p);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
-		ret = pci_ioport_map(dev, bar, p);
-#else
 		ret = pci_uio_ioport_map(dev, bar, p);
-#endif
 		break;
 	default:
 		break;
@@ -792,12 +721,10 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 	switch (p->dev->kdrv) {
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
-		pci_vfio_ioport_read(p, data, len, offset);
+		pci_uio_ioport_read(p, data, len, offset);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		pci_uio_ioport_read(p, data, len, offset);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
 		pci_uio_ioport_read(p, data, len, offset);
 		break;
@@ -813,12 +740,10 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 	switch (p->dev->kdrv) {
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
-		pci_vfio_ioport_write(p, data, len, offset);
+		pci_uio_ioport_write(p, data, len, offset);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		pci_uio_ioport_write(p, data, len, offset);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
 		pci_uio_ioport_write(p, data, len, offset);
 		break;
@@ -836,18 +761,12 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
 		if (pci_vfio_is_enabled())
-			ret = pci_vfio_ioport_unmap(p);
+			ret = pci_uio_ioport_unmap(p);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		ret = pci_uio_ioport_unmap(p);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
-		ret = 0;
-#else
 		ret = pci_uio_ioport_unmap(p);
-#endif
 		break;
 	default:
 		break;
diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index f3305a2..cf49e8f 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -373,52 +373,83 @@
 pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
 		   struct rte_pci_ioport *p)
 {
+	FILE *f = NULL;
 	char dirname[PATH_MAX];
 	char filename[PATH_MAX];
+	char buf[BUFSIZ];
+	uint64_t phys_addr, end_addr, flags;
 	int uio_num;
-	unsigned long start;
+	unsigned long base;
+	bool iobar;
+	int i;
 
-	if (rte_eal_iopl_init() != 0) {
-		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
-			__func__, dev->name);
+	/* open and read addresses of the corresponding resource in sysfs */
+	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
+		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
+		dev->addr.devid, dev->addr.function);
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
+			strerror(errno));
 		return -1;
 	}
 
-	uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
-	if (uio_num < 0)
-		return -1;
+	for (i = 0; i < bar + 1; i++) {
+		if (fgets(buf, sizeof(buf), f) == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
+			goto error;
+		}
+	}
+	if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
+		&end_addr, &flags) < 0)
+		goto error;
 
-	/* get portio start */
-	snprintf(filename, sizeof(filename),
-		 "%s/portio/port%d/start", dirname, bar);
-	if (eal_parse_sysfs_value(filename, &start) < 0) {
-		RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
-			__func__);
-		return -1;
+	if (flags & IORESOURCE_IO) {
+		iobar = 1;
+		base = (unsigned long)phys_addr;
+		RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
+	} else if (flags & IORESOURCE_MEM) {
+		iobar = 0;
+		base = (unsigned long)dev->mem_resource[bar].addr;
+		RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n", __func__, base);
+	} else {
+		RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
+		goto error;
+	}
+
+	if (iobar && rte_eal_iopl_init() != 0) {
+		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
+			__func__, dev->name);
+		goto error;
 	}
-	/* ensure we don't get anything funny here, read/write will cast to
-	 * uin16_t */
-	if (start > UINT16_MAX)
-		return -1;
 
 	/* FIXME only for primary process ? */
-	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
+	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN &&
+	    dev->kdrv == RTE_KDRV_UIO_GENERIC) {
+		uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
+		if (uio_num < 0)
+			goto error;
 
 		snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
 		dev->intr_handle.fd = open(filename, O_RDWR);
 		if (dev->intr_handle.fd < 0) {
 			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
 				filename, strerror(errno));
-			return -1;
+			goto error;
 		}
 		dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
 	}
 
-	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
+	RTE_LOG(DEBUG, EAL, "PCI IO port found start=0x%lx\n", base);
 
-	p->base = start;
+	p->base = base;
 	p->len = 0;
+	fclose(f);
 	return 0;
+error:
+	if (f)
+		fclose(f);
+	return -1;
 }
 #else
 int
@@ -489,6 +520,61 @@
 }
 #endif
 
+#define PIO_MAX 0x10000
+static inline uint8_t ioread8(void *addr)
+{
+	uint8_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint8_t *)addr :
+		inb((unsigned long)addr);
+
+	return val;
+}
+
+static inline uint16_t ioread16(void *addr)
+{
+	uint16_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint16_t *)addr :
+		inw((unsigned long)addr);
+
+	return val;
+}
+
+static inline uint32_t ioread32(void *addr)
+{
+	uint32_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint32_t *)addr :
+	inl((unsigned long)addr);
+
+	return val;
+}
+
+static inline void iowrite8(uint8_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint8_t *)addr = val :
+		outb(val, (unsigned long)addr);
+}
+
+static inline void iowrite16(uint16_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint16_t *)addr = val :
+		outw(val, (unsigned long)addr);
+}
+
+static inline void iowrite32(uint32_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint32_t *)addr = val :
+		outl(val, (unsigned long)addr);
+}
+
 void
 pci_uio_ioport_read(struct rte_pci_ioport *p,
 		    void *data, size_t len, off_t offset)
@@ -500,25 +586,13 @@
 	for (d = data; len > 0; d += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
-#if defined(RTE_ARCH_X86)
-			*(uint32_t *)d = inl(reg);
-#else
-			*(uint32_t *)d = *(volatile uint32_t *)reg;
-#endif
+			*(uint32_t *)d = ioread32((void *)reg);
 		} else if (len >= 2) {
 			size = 2;
-#if defined(RTE_ARCH_X86)
-			*(uint16_t *)d = inw(reg);
-#else
-			*(uint16_t *)d = *(volatile uint16_t *)reg;
-#endif
+			*(uint16_t *)d = ioread16((void *)reg);
 		} else {
 			size = 1;
-#if defined(RTE_ARCH_X86)
-			*d = inb(reg);
-#else
-			*d = *(volatile uint8_t *)reg;
-#endif
+			*d = ioread8((void *)reg);
 		}
 	}
 }
@@ -534,25 +608,13 @@
 	for (s = data; len > 0; s += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
-#if defined(RTE_ARCH_X86)
-			outl_p(*(const uint32_t *)s, reg);
-#else
-			*(volatile uint32_t *)reg = *(const uint32_t *)s;
-#endif
+			iowrite32(*(const uint32_t *)s, (void *)reg);
 		} else if (len >= 2) {
 			size = 2;
-#if defined(RTE_ARCH_X86)
-			outw_p(*(const uint16_t *)s, reg);
-#else
-			*(volatile uint16_t *)reg = *(const uint16_t *)s;
-#endif
+			iowrite16(*(const uint16_t *)s, (void *)reg);
 		} else {
 			size = 1;
-#if defined(RTE_ARCH_X86)
-			outb_p(*s, reg);
-#else
-			*(volatile uint8_t *)reg = *s;
-#endif
+			iowrite8(*s, (void *)reg);
 		}
 	}
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 58+ messages in thread
* [dpdk-dev] [PATCH v3] pci: support both PIO and MMIO BAR for legacy virtio on x86
  2020-09-30 14:59 [dpdk-dev] [PATCH v2] pci: support both PIO and MMIO BAR for legacy virtio on x86 谢华伟(此时此刻)
  2020-10-01 10:22 ` Burakov, Anatoly
@ 2020-10-09  8:36 ` 谢华伟(此时此刻)
  2020-10-13  8:41 ` [dpdk-dev] [PATCH v4] support both PIO and MMIO bar for virtio pci device 谢华伟(此时此刻)
  2020-10-22 15:51 ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
  3 siblings, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-09  8:36 UTC (permalink / raw)
  To: dev; +Cc: 谢华伟(此时此刻)
From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We handles
different type of BAR in the similar way.
In previous implementation, with igb_uio we get PIO address from igb_uio
sysfs entry; with uio_pci_generic, we get PIO address from
/proc/ioports.
For PIO/MMIO RW, there is different path for different drivers and arch.
For VFIO, PIO/MMIO RW is through syscall, which has big performance
issue.
On X86, it assumes only PIO is supported.
All of the above is too much twisted.
This patch unifies the way to get both PIO and MMIO address for different driver
and arch, all from standard resource attr under pci sysfs.
We distinguish PIO and MMIO by their address like how kernel does. It is ugly but works.
Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
---
 drivers/bus/pci/linux/pci.c     |  89 +--------------------
 drivers/bus/pci/linux/pci_uio.c | 166 +++++++++++++++++++++++++++-------------
 2 files changed, 118 insertions(+), 137 deletions(-)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index bf27594..885e54e 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -687,71 +687,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 	}
 }
 
-#if defined(RTE_ARCH_X86)
-static int
-pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
-		struct rte_pci_ioport *p)
-{
-	uint16_t start, end;
-	FILE *fp;
-	char *line = NULL;
-	char pci_id[16];
-	int found = 0;
-	size_t linesz;
-
-	if (rte_eal_iopl_init() != 0) {
-		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
-			__func__, dev->name);
-		return -1;
-	}
-
-	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
-		 dev->addr.domain, dev->addr.bus,
-		 dev->addr.devid, dev->addr.function);
-
-	fp = fopen("/proc/ioports", "r");
-	if (fp == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
-		return -1;
-	}
-
-	while (getdelim(&line, &linesz, '\n', fp) > 0) {
-		char *ptr = line;
-		char *left;
-		int n;
-
-		n = strcspn(ptr, ":");
-		ptr[n] = 0;
-		left = &ptr[n + 1];
-
-		while (*left && isspace(*left))
-			left++;
-
-		if (!strncmp(left, pci_id, strlen(pci_id))) {
-			found = 1;
-
-			while (*ptr && isspace(*ptr))
-				ptr++;
-
-			sscanf(ptr, "%04hx-%04hx", &start, &end);
-
-			break;
-		}
-	}
-
-	free(line);
-	fclose(fp);
-
-	if (!found)
-		return -1;
-
-	p->base = start;
-	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
-
-	return 0;
-}
-#endif
-
 int
 rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
 		struct rte_pci_ioport *p)
@@ -762,18 +697,12 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
 		if (pci_vfio_is_enabled())
-			ret = pci_vfio_ioport_map(dev, bar, p);
+			ret = pci_uio_ioport_map(dev, bar, p);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		ret = pci_uio_ioport_map(dev, bar, p);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
-		ret = pci_ioport_map(dev, bar, p);
-#else
 		ret = pci_uio_ioport_map(dev, bar, p);
-#endif
 		break;
 	default:
 		break;
@@ -792,12 +721,10 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 	switch (p->dev->kdrv) {
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
-		pci_vfio_ioport_read(p, data, len, offset);
+		pci_uio_ioport_read(p, data, len, offset);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		pci_uio_ioport_read(p, data, len, offset);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
 		pci_uio_ioport_read(p, data, len, offset);
 		break;
@@ -813,12 +740,10 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 	switch (p->dev->kdrv) {
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
-		pci_vfio_ioport_write(p, data, len, offset);
+		pci_uio_ioport_write(p, data, len, offset);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		pci_uio_ioport_write(p, data, len, offset);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
 		pci_uio_ioport_write(p, data, len, offset);
 		break;
@@ -836,18 +761,12 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
 		if (pci_vfio_is_enabled())
-			ret = pci_vfio_ioport_unmap(p);
+			ret = pci_uio_ioport_unmap(p);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		ret = pci_uio_ioport_unmap(p);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
-		ret = 0;
-#else
 		ret = pci_uio_ioport_unmap(p);
-#endif
 		break;
 	default:
 		break;
diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index f3305a2..cf49e8f 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -373,52 +373,83 @@
 pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
 		   struct rte_pci_ioport *p)
 {
+	FILE *f = NULL;
 	char dirname[PATH_MAX];
 	char filename[PATH_MAX];
+	char buf[BUFSIZ];
+	uint64_t phys_addr, end_addr, flags;
 	int uio_num;
-	unsigned long start;
+	unsigned long base;
+	bool iobar;
+	int i;
 
-	if (rte_eal_iopl_init() != 0) {
-		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
-			__func__, dev->name);
+	/* open and read addresses of the corresponding resource in sysfs */
+	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
+		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
+		dev->addr.devid, dev->addr.function);
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
+			strerror(errno));
 		return -1;
 	}
 
-	uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
-	if (uio_num < 0)
-		return -1;
+	for (i = 0; i < bar + 1; i++) {
+		if (fgets(buf, sizeof(buf), f) == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
+			goto error;
+		}
+	}
+	if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
+		&end_addr, &flags) < 0)
+		goto error;
 
-	/* get portio start */
-	snprintf(filename, sizeof(filename),
-		 "%s/portio/port%d/start", dirname, bar);
-	if (eal_parse_sysfs_value(filename, &start) < 0) {
-		RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
-			__func__);
-		return -1;
+	if (flags & IORESOURCE_IO) {
+		iobar = 1;
+		base = (unsigned long)phys_addr;
+		RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
+	} else if (flags & IORESOURCE_MEM) {
+		iobar = 0;
+		base = (unsigned long)dev->mem_resource[bar].addr;
+		RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n", __func__, base);
+	} else {
+		RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
+		goto error;
+	}
+
+	if (iobar && rte_eal_iopl_init() != 0) {
+		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
+			__func__, dev->name);
+		goto error;
 	}
-	/* ensure we don't get anything funny here, read/write will cast to
-	 * uin16_t */
-	if (start > UINT16_MAX)
-		return -1;
 
 	/* FIXME only for primary process ? */
-	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
+	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN &&
+	    dev->kdrv == RTE_KDRV_UIO_GENERIC) {
+		uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
+		if (uio_num < 0)
+			goto error;
 
 		snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
 		dev->intr_handle.fd = open(filename, O_RDWR);
 		if (dev->intr_handle.fd < 0) {
 			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
 				filename, strerror(errno));
-			return -1;
+			goto error;
 		}
 		dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
 	}
 
-	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
+	RTE_LOG(DEBUG, EAL, "PCI IO port found start=0x%lx\n", base);
 
-	p->base = start;
+	p->base = base;
 	p->len = 0;
+	fclose(f);
 	return 0;
+error:
+	if (f)
+		fclose(f);
+	return -1;
 }
 #else
 int
@@ -489,6 +520,61 @@
 }
 #endif
 
+#define PIO_MAX 0x10000
+static inline uint8_t ioread8(void *addr)
+{
+	uint8_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint8_t *)addr :
+		inb((unsigned long)addr);
+
+	return val;
+}
+
+static inline uint16_t ioread16(void *addr)
+{
+	uint16_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint16_t *)addr :
+		inw((unsigned long)addr);
+
+	return val;
+}
+
+static inline uint32_t ioread32(void *addr)
+{
+	uint32_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint32_t *)addr :
+	inl((unsigned long)addr);
+
+	return val;
+}
+
+static inline void iowrite8(uint8_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint8_t *)addr = val :
+		outb(val, (unsigned long)addr);
+}
+
+static inline void iowrite16(uint16_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint16_t *)addr = val :
+		outw(val, (unsigned long)addr);
+}
+
+static inline void iowrite32(uint32_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint32_t *)addr = val :
+		outl(val, (unsigned long)addr);
+}
+
 void
 pci_uio_ioport_read(struct rte_pci_ioport *p,
 		    void *data, size_t len, off_t offset)
@@ -500,25 +586,13 @@
 	for (d = data; len > 0; d += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
-#if defined(RTE_ARCH_X86)
-			*(uint32_t *)d = inl(reg);
-#else
-			*(uint32_t *)d = *(volatile uint32_t *)reg;
-#endif
+			*(uint32_t *)d = ioread32((void *)reg);
 		} else if (len >= 2) {
 			size = 2;
-#if defined(RTE_ARCH_X86)
-			*(uint16_t *)d = inw(reg);
-#else
-			*(uint16_t *)d = *(volatile uint16_t *)reg;
-#endif
+			*(uint16_t *)d = ioread16((void *)reg);
 		} else {
 			size = 1;
-#if defined(RTE_ARCH_X86)
-			*d = inb(reg);
-#else
-			*d = *(volatile uint8_t *)reg;
-#endif
+			*d = ioread8((void *)reg);
 		}
 	}
 }
@@ -534,25 +608,13 @@
 	for (s = data; len > 0; s += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
-#if defined(RTE_ARCH_X86)
-			outl_p(*(const uint32_t *)s, reg);
-#else
-			*(volatile uint32_t *)reg = *(const uint32_t *)s;
-#endif
+			iowrite32(*(const uint32_t *)s, (void *)reg);
 		} else if (len >= 2) {
 			size = 2;
-#if defined(RTE_ARCH_X86)
-			outw_p(*(const uint16_t *)s, reg);
-#else
-			*(volatile uint16_t *)reg = *(const uint16_t *)s;
-#endif
+			iowrite16(*(const uint16_t *)s, (void *)reg);
 		} else {
 			size = 1;
-#if defined(RTE_ARCH_X86)
-			outb_p(*s, reg);
-#else
-			*(volatile uint8_t *)reg = *s;
-#endif
+			iowrite8(*s, (void *)reg);
 		}
 	}
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 58+ messages in thread
* [dpdk-dev] [PATCH v4] support both PIO and MMIO bar for virtio pci device
  2020-09-30 14:59 [dpdk-dev] [PATCH v2] pci: support both PIO and MMIO BAR for legacy virtio on x86 谢华伟(此时此刻)
  2020-10-01 10:22 ` Burakov, Anatoly
  2020-10-09  8:36 ` [dpdk-dev] [PATCH v3] " 谢华伟(此时此刻)
@ 2020-10-13  8:41 ` 谢华伟(此时此刻)
  2020-10-13  8:41   ` [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86 谢华伟(此时此刻)
  2020-10-22 15:51 ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
  3 siblings, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-13  8:41 UTC (permalink / raw)
  To: ferruh.yigit
  Cc: dev, anatoly.burakov, maxime.coquelin, david.marchand, grive,
	zhihong.wang, chenbo.xia,
	谢华伟(此时此刻)
From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
v2 changes:
	 - add more explanation in the commit message
v3 changes:
	 - fix patch format issues
v4 changes:
	 - fixes for RTE_KDRV_UIO_GENERIC -> RTE_PCI_KDRV_UIO_GENERIC
huawei.xhw (1):
  pci: support both PIO and MMIO BAR for legacy virtio on x86
 drivers/bus/pci/linux/pci.c     |  89 +--------------------
 drivers/bus/pci/linux/pci_uio.c | 167 +++++++++++++++++++++++++++-------------
 2 files changed, 119 insertions(+), 137 deletions(-)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 58+ messages in thread
* [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86
  2020-10-13  8:41 ` [dpdk-dev] [PATCH v4] support both PIO and MMIO bar for virtio pci device 谢华伟(此时此刻)
@ 2020-10-13  8:41   ` 谢华伟(此时此刻)
  2020-10-13 12:34     ` 谢华伟(此时此刻)
                       ` (2 more replies)
  0 siblings, 3 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-13  8:41 UTC (permalink / raw)
  To: ferruh.yigit
  Cc: dev, anatoly.burakov, maxime.coquelin, david.marchand, grive,
	zhihong.wang, chenbo.xia,
	谢华伟(此时此刻)
From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We handles
different type of BAR in the similar way.
In previous implementation, with igb_uio we get PIO address from igb_uio
sysfs entry; with uio_pci_generic, we get PIO address from
/proc/ioports.
For PIO/MMIO RW, there is different path for different drivers and arch.
For VFIO, PIO/MMIO RW is through syscall, which has big performance
issue.
On X86, it assumes only PIO is supported.
All of the above is too much twisted.
This patch unifies the way to get both PIO and MMIO address for different driver
and arch, all from standard resource attr under pci sysfs.
We distinguish PIO and MMIO by their address like how kernel does. It is ugly but works.
Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
---
 drivers/bus/pci/linux/pci.c     |  89 +--------------------
 drivers/bus/pci/linux/pci_uio.c | 167 +++++++++++++++++++++++++++-------------
 2 files changed, 119 insertions(+), 137 deletions(-)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index bf27594..885e54e 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -687,71 +687,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 	}
 }
 
-#if defined(RTE_ARCH_X86)
-static int
-pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
-		struct rte_pci_ioport *p)
-{
-	uint16_t start, end;
-	FILE *fp;
-	char *line = NULL;
-	char pci_id[16];
-	int found = 0;
-	size_t linesz;
-
-	if (rte_eal_iopl_init() != 0) {
-		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
-			__func__, dev->name);
-		return -1;
-	}
-
-	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
-		 dev->addr.domain, dev->addr.bus,
-		 dev->addr.devid, dev->addr.function);
-
-	fp = fopen("/proc/ioports", "r");
-	if (fp == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
-		return -1;
-	}
-
-	while (getdelim(&line, &linesz, '\n', fp) > 0) {
-		char *ptr = line;
-		char *left;
-		int n;
-
-		n = strcspn(ptr, ":");
-		ptr[n] = 0;
-		left = &ptr[n + 1];
-
-		while (*left && isspace(*left))
-			left++;
-
-		if (!strncmp(left, pci_id, strlen(pci_id))) {
-			found = 1;
-
-			while (*ptr && isspace(*ptr))
-				ptr++;
-
-			sscanf(ptr, "%04hx-%04hx", &start, &end);
-
-			break;
-		}
-	}
-
-	free(line);
-	fclose(fp);
-
-	if (!found)
-		return -1;
-
-	p->base = start;
-	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
-
-	return 0;
-}
-#endif
-
 int
 rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
 		struct rte_pci_ioport *p)
@@ -762,18 +697,12 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
 		if (pci_vfio_is_enabled())
-			ret = pci_vfio_ioport_map(dev, bar, p);
+			ret = pci_uio_ioport_map(dev, bar, p);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		ret = pci_uio_ioport_map(dev, bar, p);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
-		ret = pci_ioport_map(dev, bar, p);
-#else
 		ret = pci_uio_ioport_map(dev, bar, p);
-#endif
 		break;
 	default:
 		break;
@@ -792,12 +721,10 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 	switch (p->dev->kdrv) {
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
-		pci_vfio_ioport_read(p, data, len, offset);
+		pci_uio_ioport_read(p, data, len, offset);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		pci_uio_ioport_read(p, data, len, offset);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
 		pci_uio_ioport_read(p, data, len, offset);
 		break;
@@ -813,12 +740,10 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 	switch (p->dev->kdrv) {
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
-		pci_vfio_ioport_write(p, data, len, offset);
+		pci_uio_ioport_write(p, data, len, offset);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		pci_uio_ioport_write(p, data, len, offset);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
 		pci_uio_ioport_write(p, data, len, offset);
 		break;
@@ -836,18 +761,12 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
 		if (pci_vfio_is_enabled())
-			ret = pci_vfio_ioport_unmap(p);
+			ret = pci_uio_ioport_unmap(p);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		ret = pci_uio_ioport_unmap(p);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
-		ret = 0;
-#else
 		ret = pci_uio_ioport_unmap(p);
-#endif
 		break;
 	default:
 		break;
diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index f3305a2..0062ac0 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -22,6 +22,7 @@
 #include <rte_bus_pci.h>
 #include <rte_common.h>
 #include <rte_malloc.h>
+#include <rte_bus.h>
 
 #include "eal_filesystem.h"
 #include "pci_init.h"
@@ -373,52 +374,83 @@
 pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
 		   struct rte_pci_ioport *p)
 {
+	FILE *f = NULL;
 	char dirname[PATH_MAX];
 	char filename[PATH_MAX];
+	char buf[BUFSIZ];
+	uint64_t phys_addr, end_addr, flags;
 	int uio_num;
-	unsigned long start;
+	unsigned long base;
+	bool iobar;
+	int i;
 
-	if (rte_eal_iopl_init() != 0) {
-		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
-			__func__, dev->name);
+	/* open and read addresses of the corresponding resource in sysfs */
+	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
+		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
+		dev->addr.devid, dev->addr.function);
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
+			strerror(errno));
 		return -1;
 	}
 
-	uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
-	if (uio_num < 0)
-		return -1;
+	for (i = 0; i < bar + 1; i++) {
+		if (fgets(buf, sizeof(buf), f) == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
+			goto error;
+		}
+	}
+	if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
+		&end_addr, &flags) < 0)
+		goto error;
 
-	/* get portio start */
-	snprintf(filename, sizeof(filename),
-		 "%s/portio/port%d/start", dirname, bar);
-	if (eal_parse_sysfs_value(filename, &start) < 0) {
-		RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
-			__func__);
-		return -1;
+	if (flags & IORESOURCE_IO) {
+		iobar = 1;
+		base = (unsigned long)phys_addr;
+		RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
+	} else if (flags & IORESOURCE_MEM) {
+		iobar = 0;
+		base = (unsigned long)dev->mem_resource[bar].addr;
+		RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n", __func__, base);
+	} else {
+		RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
+		goto error;
+	}
+
+	if (iobar && rte_eal_iopl_init() != 0) {
+		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
+			__func__, dev->name);
+		goto error;
 	}
-	/* ensure we don't get anything funny here, read/write will cast to
-	 * uin16_t */
-	if (start > UINT16_MAX)
-		return -1;
 
 	/* FIXME only for primary process ? */
-	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
+	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN &&
+	    dev->kdrv == RTE_PCI_KDRV_UIO_GENERIC) {
+		uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
+		if (uio_num < 0)
+			goto error;
 
 		snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
 		dev->intr_handle.fd = open(filename, O_RDWR);
 		if (dev->intr_handle.fd < 0) {
 			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
 				filename, strerror(errno));
-			return -1;
+			goto error;
 		}
 		dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
 	}
 
-	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
+	RTE_LOG(DEBUG, EAL, "PCI IO port found start=0x%lx\n", base);
 
-	p->base = start;
+	p->base = base;
 	p->len = 0;
+	fclose(f);
 	return 0;
+error:
+	if (f)
+		fclose(f);
+	return -1;
 }
 #else
 int
@@ -489,6 +521,61 @@
 }
 #endif
 
+#define PIO_MAX 0x10000
+static inline uint8_t ioread8(void *addr)
+{
+	uint8_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint8_t *)addr :
+		inb((unsigned long)addr);
+
+	return val;
+}
+
+static inline uint16_t ioread16(void *addr)
+{
+	uint16_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint16_t *)addr :
+		inw((unsigned long)addr);
+
+	return val;
+}
+
+static inline uint32_t ioread32(void *addr)
+{
+	uint32_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint32_t *)addr :
+	inl((unsigned long)addr);
+
+	return val;
+}
+
+static inline void iowrite8(uint8_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint8_t *)addr = val :
+		outb(val, (unsigned long)addr);
+}
+
+static inline void iowrite16(uint16_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint16_t *)addr = val :
+		outw(val, (unsigned long)addr);
+}
+
+static inline void iowrite32(uint32_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint32_t *)addr = val :
+		outl(val, (unsigned long)addr);
+}
+
 void
 pci_uio_ioport_read(struct rte_pci_ioport *p,
 		    void *data, size_t len, off_t offset)
@@ -500,25 +587,13 @@
 	for (d = data; len > 0; d += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
-#if defined(RTE_ARCH_X86)
-			*(uint32_t *)d = inl(reg);
-#else
-			*(uint32_t *)d = *(volatile uint32_t *)reg;
-#endif
+			*(uint32_t *)d = ioread32((void *)reg);
 		} else if (len >= 2) {
 			size = 2;
-#if defined(RTE_ARCH_X86)
-			*(uint16_t *)d = inw(reg);
-#else
-			*(uint16_t *)d = *(volatile uint16_t *)reg;
-#endif
+			*(uint16_t *)d = ioread16((void *)reg);
 		} else {
 			size = 1;
-#if defined(RTE_ARCH_X86)
-			*d = inb(reg);
-#else
-			*d = *(volatile uint8_t *)reg;
-#endif
+			*d = ioread8((void *)reg);
 		}
 	}
 }
@@ -534,25 +609,13 @@
 	for (s = data; len > 0; s += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
-#if defined(RTE_ARCH_X86)
-			outl_p(*(const uint32_t *)s, reg);
-#else
-			*(volatile uint32_t *)reg = *(const uint32_t *)s;
-#endif
+			iowrite32(*(const uint32_t *)s, (void *)reg);
 		} else if (len >= 2) {
 			size = 2;
-#if defined(RTE_ARCH_X86)
-			outw_p(*(const uint16_t *)s, reg);
-#else
-			*(volatile uint16_t *)reg = *(const uint16_t *)s;
-#endif
+			iowrite16(*(const uint16_t *)s, (void *)reg);
 		} else {
 			size = 1;
-#if defined(RTE_ARCH_X86)
-			outb_p(*s, reg);
-#else
-			*(volatile uint8_t *)reg = *s;
-#endif
+			iowrite8(*s, (void *)reg);
 		}
 	}
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86
  2020-10-13  8:41   ` [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86 谢华伟(此时此刻)
@ 2020-10-13 12:34     ` 谢华伟(此时此刻)
  2020-10-21  8:46     ` 谢华伟(此时此刻)
  2020-10-21 11:49     ` Ferruh Yigit
  2 siblings, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-13 12:34 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev
>   	switch (p->dev->kdrv) {
>   #ifdef VFIO_PRESENT
>   	case RTE_PCI_KDRV_VFIO:
> -		pci_vfio_ioport_read(p, data, len, offset);
> +		pci_uio_ioport_read(p, data, len, offset);
>   		break;
>   #endif
>   	case RTE_PCI_KDRV_IGB_UIO:
> -		pci_uio_ioport_read(p, data, len, offset);
> -		break;
>   	case RTE_PCI_KDRV_UIO_GENERIC:
>   		pci_uio_ioport_read(p, data, len, offset);
>   }
Maxime:
With this patch, virtio PIO/MMIO port RW is directly through user space 
instruction instead of vfio ioctl syscall.
/huawei
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86
  2020-10-13  8:41   ` [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86 谢华伟(此时此刻)
  2020-10-13 12:34     ` 谢华伟(此时此刻)
@ 2020-10-21  8:46     ` 谢华伟(此时此刻)
  2020-10-21 11:49     ` Ferruh Yigit
  2 siblings, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-21  8:46 UTC (permalink / raw)
  To: ferruh.yigit
  Cc: dev, anatoly.burakov, maxime.coquelin, david.marchand, grive,
	zhihong.wang, chenbo.xia
Hi Ferruh:
Comments to this patch? Customers are urging us to run DPDK with virtio 
mmio support.
@david
Though this patch is to support MMIO bar, it is the right thing to do.
Previous code with virtio (IO/MMIO) port map/RW under different driver 
is too complicated.
This patch also fixes the performance issue with VFIO port write(virtio 
only).
Besides, next thing we could do is to move some of those PCI codes to 
virtio PMD as they are for virtio
PMD only.
/huawei
On 2020/10/13 16:41, 谢华伟(此时此刻) wrote:
> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>
> Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
> virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
>
> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We handles
> different type of BAR in the similar way.
>
> In previous implementation, with igb_uio we get PIO address from igb_uio
> sysfs entry; with uio_pci_generic, we get PIO address from
> /proc/ioports.
> For PIO/MMIO RW, there is different path for different drivers and arch.
> For VFIO, PIO/MMIO RW is through syscall, which has big performance
> issue.
> On X86, it assumes only PIO is supported.
>
> All of the above is too much twisted.
> This patch unifies the way to get both PIO and MMIO address for different driver
> and arch, all from standard resource attr under pci sysfs.
>
> We distinguish PIO and MMIO by their address like how kernel does. It is ugly but works.
>
> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
> ---
>   drivers/bus/pci/linux/pci.c     |  89 +--------------------
>   drivers/bus/pci/linux/pci_uio.c | 167 +++++++++++++++++++++++++++-------------
>   2 files changed, 119 insertions(+), 137 deletions(-)
>
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> index bf27594..885e54e 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -687,71 +687,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>   	}
>   }
>   
>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86
  2020-10-13  8:41   ` [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86 谢华伟(此时此刻)
  2020-10-13 12:34     ` 谢华伟(此时此刻)
  2020-10-21  8:46     ` 谢华伟(此时此刻)
@ 2020-10-21 11:49     ` Ferruh Yigit
  2020-10-21 12:32       ` 谢华伟(此时此刻)
  2 siblings, 1 reply; 58+ messages in thread
From: Ferruh Yigit @ 2020-10-21 11:49 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	Maxime Coquelin
  Cc: dev, anatoly.burakov, david.marchand, grive, zhihong.wang, chenbo.xia
On 10/13/2020 9:41 AM, 谢华伟(此时此刻) wrote:
> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
> 
> Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
> virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
> 
> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We handles
> different type of BAR in the similar way.
> 
> In previous implementation, with igb_uio we get PIO address from igb_uio
> sysfs entry; with uio_pci_generic, we get PIO address from
> /proc/ioports.
> For PIO/MMIO RW, there is different path for different drivers and arch.
> For VFIO, PIO/MMIO RW is through syscall, which has big performance
> issue.
> On X86, it assumes only PIO is supported.
> 
> All of the above is too much twisted.
> This patch unifies the way to get both PIO and MMIO address for different driver
> and arch, all from standard resource attr under pci sysfs.
> 
As mentined above this patch does multiple things.
The main target is, as far as I understand, you have a legacy virtio device 
which supports "memory-mapped I/O" and "port-mapped I/O", but virtio logic 
forces legacy devices to use the PIO but you want to be able to use the MMIO 
with this device.
The solution below is adding MMIO support in the PIO funciton, and distinguish 
MMIO or PIO based on their address check.
Instead of this, can't this be resolved in the virtio side, like if the legacy 
device supports MMIO (detect this somehow) use the MMIO istead of hacking PIO 
mapping to support MMIO?
I have other concerns, specially mergin VFIO mapping too, but lets clarify above 
first.
Thanks,
ferruh
> We distinguish PIO and MMIO by their address like how kernel does. It is ugly but works.
> 
> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
> ---
>   drivers/bus/pci/linux/pci.c     |  89 +--------------------
>   drivers/bus/pci/linux/pci_uio.c | 167 +++++++++++++++++++++++++++-------------
>   2 files changed, 119 insertions(+), 137 deletions(-)
> 
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> index bf27594..885e54e 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -687,71 +687,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>   	}
>   }
>   
> -#if defined(RTE_ARCH_X86)
> -static int
> -pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
> -		struct rte_pci_ioport *p)
> -{
> -	uint16_t start, end;
> -	FILE *fp;
> -	char *line = NULL;
> -	char pci_id[16];
> -	int found = 0;
> -	size_t linesz;
> -
> -	if (rte_eal_iopl_init() != 0) {
> -		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
> -			__func__, dev->name);
> -		return -1;
> -	}
> -
> -	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
> -		 dev->addr.domain, dev->addr.bus,
> -		 dev->addr.devid, dev->addr.function);
> -
> -	fp = fopen("/proc/ioports", "r");
> -	if (fp == NULL) {
> -		RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
> -		return -1;
> -	}
> -
> -	while (getdelim(&line, &linesz, '\n', fp) > 0) {
> -		char *ptr = line;
> -		char *left;
> -		int n;
> -
> -		n = strcspn(ptr, ":");
> -		ptr[n] = 0;
> -		left = &ptr[n + 1];
> -
> -		while (*left && isspace(*left))
> -			left++;
> -
> -		if (!strncmp(left, pci_id, strlen(pci_id))) {
> -			found = 1;
> -
> -			while (*ptr && isspace(*ptr))
> -				ptr++;
> -
> -			sscanf(ptr, "%04hx-%04hx", &start, &end);
> -
> -			break;
> -		}
> -	}
> -
> -	free(line);
> -	fclose(fp);
> -
> -	if (!found)
> -		return -1;
> -
> -	p->base = start;
> -	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
> -
> -	return 0;
> -}
> -#endif
> -
>   int
>   rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
>   		struct rte_pci_ioport *p)
> @@ -762,18 +697,12 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>   #ifdef VFIO_PRESENT
>   	case RTE_PCI_KDRV_VFIO:
>   		if (pci_vfio_is_enabled())
> -			ret = pci_vfio_ioport_map(dev, bar, p);
> +			ret = pci_uio_ioport_map(dev, bar, p);
>   		break;
>   #endif
>   	case RTE_PCI_KDRV_IGB_UIO:
> -		ret = pci_uio_ioport_map(dev, bar, p);
> -		break;
>   	case RTE_PCI_KDRV_UIO_GENERIC:
> -#if defined(RTE_ARCH_X86)
> -		ret = pci_ioport_map(dev, bar, p);
> -#else
>   		ret = pci_uio_ioport_map(dev, bar, p);
> -#endif
>   		break;
>   	default:
>   		break;
> @@ -792,12 +721,10 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>   	switch (p->dev->kdrv) {
>   #ifdef VFIO_PRESENT
>   	case RTE_PCI_KDRV_VFIO:
> -		pci_vfio_ioport_read(p, data, len, offset);
> +		pci_uio_ioport_read(p, data, len, offset);
>   		break;
>   #endif
>   	case RTE_PCI_KDRV_IGB_UIO:
> -		pci_uio_ioport_read(p, data, len, offset);
> -		break;
>   	case RTE_PCI_KDRV_UIO_GENERIC:
>   		pci_uio_ioport_read(p, data, len, offset);
>   		break;
> @@ -813,12 +740,10 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>   	switch (p->dev->kdrv) {
>   #ifdef VFIO_PRESENT
>   	case RTE_PCI_KDRV_VFIO:
> -		pci_vfio_ioport_write(p, data, len, offset);
> +		pci_uio_ioport_write(p, data, len, offset);
>   		break;
>   #endif
>   	case RTE_PCI_KDRV_IGB_UIO:
> -		pci_uio_ioport_write(p, data, len, offset);
> -		break;
>   	case RTE_PCI_KDRV_UIO_GENERIC:
>   		pci_uio_ioport_write(p, data, len, offset);
>   		break;
> @@ -836,18 +761,12 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>   #ifdef VFIO_PRESENT
>   	case RTE_PCI_KDRV_VFIO:
>   		if (pci_vfio_is_enabled())
> -			ret = pci_vfio_ioport_unmap(p);
> +			ret = pci_uio_ioport_unmap(p);
>   		break;
>   #endif
>   	case RTE_PCI_KDRV_IGB_UIO:
> -		ret = pci_uio_ioport_unmap(p);
> -		break;
>   	case RTE_PCI_KDRV_UIO_GENERIC:
> -#if defined(RTE_ARCH_X86)
> -		ret = 0;
> -#else
>   		ret = pci_uio_ioport_unmap(p);
> -#endif
>   		break;
>   	default:
>   		break;
> diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
> index f3305a2..0062ac0 100644
> --- a/drivers/bus/pci/linux/pci_uio.c
> +++ b/drivers/bus/pci/linux/pci_uio.c
> @@ -22,6 +22,7 @@
>   #include <rte_bus_pci.h>
>   #include <rte_common.h>
>   #include <rte_malloc.h>
> +#include <rte_bus.h>
>   
>   #include "eal_filesystem.h"
>   #include "pci_init.h"
> @@ -373,52 +374,83 @@
>   pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
>   		   struct rte_pci_ioport *p)
>   {
> +	FILE *f = NULL;
>   	char dirname[PATH_MAX];
>   	char filename[PATH_MAX];
> +	char buf[BUFSIZ];
> +	uint64_t phys_addr, end_addr, flags;
>   	int uio_num;
> -	unsigned long start;
> +	unsigned long base;
> +	bool iobar;
> +	int i;
>   
> -	if (rte_eal_iopl_init() != 0) {
> -		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
> -			__func__, dev->name);
> +	/* open and read addresses of the corresponding resource in sysfs */
> +	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
> +		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
> +		dev->addr.devid, dev->addr.function);
> +	f = fopen(filename, "r");
> +	if (f == NULL) {
> +		RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
> +			strerror(errno));
>   		return -1;
>   	}
>   
> -	uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
> -	if (uio_num < 0)
> -		return -1;
> +	for (i = 0; i < bar + 1; i++) {
> +		if (fgets(buf, sizeof(buf), f) == NULL) {
> +			RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
> +			goto error;
> +		}
> +	}
> +	if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
> +		&end_addr, &flags) < 0)
> +		goto error;
>   
> -	/* get portio start */
> -	snprintf(filename, sizeof(filename),
> -		 "%s/portio/port%d/start", dirname, bar);
> -	if (eal_parse_sysfs_value(filename, &start) < 0) {
> -		RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
> -			__func__);
> -		return -1;
> +	if (flags & IORESOURCE_IO) {
> +		iobar = 1;
> +		base = (unsigned long)phys_addr;
> +		RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
> +	} else if (flags & IORESOURCE_MEM) {
> +		iobar = 0;
> +		base = (unsigned long)dev->mem_resource[bar].addr;
> +		RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n", __func__, base);
> +	} else {
> +		RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
> +		goto error;
> +	}
> +
> +	if (iobar && rte_eal_iopl_init() != 0) {
> +		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
> +			__func__, dev->name);
> +		goto error;
>   	}
> -	/* ensure we don't get anything funny here, read/write will cast to
> -	 * uin16_t */
> -	if (start > UINT16_MAX)
> -		return -1;
>   
>   	/* FIXME only for primary process ? */
> -	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
> +	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN &&
> +	    dev->kdrv == RTE_PCI_KDRV_UIO_GENERIC) {
> +		uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
> +		if (uio_num < 0)
> +			goto error;
>   
>   		snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
>   		dev->intr_handle.fd = open(filename, O_RDWR);
>   		if (dev->intr_handle.fd < 0) {
>   			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
>   				filename, strerror(errno));
> -			return -1;
> +			goto error;
>   		}
>   		dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
>   	}
>   
> -	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
> +	RTE_LOG(DEBUG, EAL, "PCI IO port found start=0x%lx\n", base);
>   
> -	p->base = start;
> +	p->base = base;
>   	p->len = 0;
> +	fclose(f);
>   	return 0;
> +error:
> +	if (f)
> +		fclose(f);
> +	return -1;
>   }
>   #else
>   int
> @@ -489,6 +521,61 @@
>   }
>   #endif
>   
> +#define PIO_MAX 0x10000
> +static inline uint8_t ioread8(void *addr)
> +{
> +	uint8_t val;
> +
> +	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint8_t *)addr :
> +		inb((unsigned long)addr);
> +
> +	return val;
> +}
> +
> +static inline uint16_t ioread16(void *addr)
> +{
> +	uint16_t val;
> +
> +	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint16_t *)addr :
> +		inw((unsigned long)addr);
> +
> +	return val;
> +}
> +
> +static inline uint32_t ioread32(void *addr)
> +{
> +	uint32_t val;
> +
> +	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint32_t *)addr :
> +	inl((unsigned long)addr);
> +
> +	return val;
> +}
> +
> +static inline void iowrite8(uint8_t val, void *addr)
> +{
> +	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint8_t *)addr = val :
> +		outb(val, (unsigned long)addr);
> +}
> +
> +static inline void iowrite16(uint16_t val, void *addr)
> +{
> +	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint16_t *)addr = val :
> +		outw(val, (unsigned long)addr);
> +}
> +
> +static inline void iowrite32(uint32_t val, void *addr)
> +{
> +	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint32_t *)addr = val :
> +		outl(val, (unsigned long)addr);
> +}
> +
>   void
>   pci_uio_ioport_read(struct rte_pci_ioport *p,
>   		    void *data, size_t len, off_t offset)
> @@ -500,25 +587,13 @@
>   	for (d = data; len > 0; d += size, reg += size, len -= size) {
>   		if (len >= 4) {
>   			size = 4;
> -#if defined(RTE_ARCH_X86)
> -			*(uint32_t *)d = inl(reg);
> -#else
> -			*(uint32_t *)d = *(volatile uint32_t *)reg;
> -#endif
> +			*(uint32_t *)d = ioread32((void *)reg);
>   		} else if (len >= 2) {
>   			size = 2;
> -#if defined(RTE_ARCH_X86)
> -			*(uint16_t *)d = inw(reg);
> -#else
> -			*(uint16_t *)d = *(volatile uint16_t *)reg;
> -#endif
> +			*(uint16_t *)d = ioread16((void *)reg);
>   		} else {
>   			size = 1;
> -#if defined(RTE_ARCH_X86)
> -			*d = inb(reg);
> -#else
> -			*d = *(volatile uint8_t *)reg;
> -#endif
> +			*d = ioread8((void *)reg);
>   		}
>   	}
>   }
> @@ -534,25 +609,13 @@
>   	for (s = data; len > 0; s += size, reg += size, len -= size) {
>   		if (len >= 4) {
>   			size = 4;
> -#if defined(RTE_ARCH_X86)
> -			outl_p(*(const uint32_t *)s, reg);
> -#else
> -			*(volatile uint32_t *)reg = *(const uint32_t *)s;
> -#endif
> +			iowrite32(*(const uint32_t *)s, (void *)reg);
>   		} else if (len >= 2) {
>   			size = 2;
> -#if defined(RTE_ARCH_X86)
> -			outw_p(*(const uint16_t *)s, reg);
> -#else
> -			*(volatile uint16_t *)reg = *(const uint16_t *)s;
> -#endif
> +			iowrite16(*(const uint16_t *)s, (void *)reg);
>   		} else {
>   			size = 1;
> -#if defined(RTE_ARCH_X86)
> -			outb_p(*s, reg);
> -#else
> -			*(volatile uint8_t *)reg = *s;
> -#endif
> +			iowrite8(*s, (void *)reg);
>   		}
>   	}
>   }
> 
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86
  2020-10-21 11:49     ` Ferruh Yigit
@ 2020-10-21 12:32       ` 谢华伟(此时此刻)
  2020-10-21 17:24         ` Ferruh Yigit
  0 siblings, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-21 12:32 UTC (permalink / raw)
  To: Ferruh Yigit, Maxime Coquelin
  Cc: dev, anatoly.burakov, david.marchand, grive, zhihong.wang, chenbo.xia
On 2020/10/21 19:49, Ferruh Yigit wrote:
> On 10/13/2020 9:41 AM, 谢华伟(此时此刻) wrote:
>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>
>> Legacy virtio-pci only supports PIO BAR resource. As we need to 
>> create lots of
>> virtio devices and PIO resource on x86 is very limited, we expose 
>> MMIO BAR.
>>
>> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. 
>> We handles
>> different type of BAR in the similar way.
>>
>> In previous implementation, with igb_uio we get PIO address from igb_uio
>> sysfs entry; with uio_pci_generic, we get PIO address from
>> /proc/ioports.
>> For PIO/MMIO RW, there is different path for different drivers and arch.
>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>> issue.
>> On X86, it assumes only PIO is supported.
>>
>> All of the above is too much twisted.
>> This patch unifies the way to get both PIO and MMIO address for 
>> different driver
>> and arch, all from standard resource attr under pci sysfs.
>>
>
> As mentined above this patch does multiple things.
>
> The main target is, as far as I understand, you have a legacy virtio 
> device which supports "memory-mapped I/O" and "port-mapped I/O", but 
> virtio logic forces legacy devices to use the PIO but you want to be 
> able to use the MMIO with this device.
yes.
>
> The solution below is adding MMIO support in the PIO funciton, and 
> distinguish MMIO or PIO based on their address check.
Yes, kernel does this in the similar way.
>
>
> Instead of this, can't this be resolved in the virtio side, like if 
> the legacy device supports MMIO (detect this somehow) use the MMIO 
> istead of hacking PIO mapping to support MMIO?
Get your concern.
1>
If we move, I think we should move all those PCI codes into virtio side, 
not just the mmio part.
Without my patch, those PCI codes are virtio-pci device specific, not 
generic.
With this patch, those pci ioport map/rw code could also be used for 
other devices if they support both PIO and MMIO.
Every option is ok. Hope i make myself clear.
2>  I don't think this is hacking. for rte_pci_ioport_map/read/write, if 
ioport could be both PIO and MMIO, then everything is reasonable.
Take how kernel does port map for example:
     vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
Here io doesn't mean PIO only. It could also be MMIO. Kernel then uses 
ioread/write to access PIO/MMIO port.
Actually we are pretty much the same in the interface.
I think this patch extends rather then hacks the ioport interface to 
support MMIO.
>
> I have other concerns, specially mergin VFIO mapping too, but lets 
> clarify above first.
vfio doesn't affect other driver but only virtio.
igb_uio, uio_pci_generic and vfio-pci all uses the same way to map/rw 
ioport.
>
> Thanks,
> ferruh
>
>
>
>> We distinguish PIO and MMIO by their address like how kernel does. It 
>> is ugly but works.
>>
>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>> ---
>>   drivers/bus/pci/linux/pci.c     |  89 +--------------------
>>   drivers/bus/pci/linux/pci_uio.c | 167 
>> +++++++++++++++++++++++++++-------------
>>   2 files changed, 119 insertions(+), 137 deletions(-)
>>
>> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
>> index bf27594..885e54e 100644
>> --- a/drivers/bus/pci/linux/pci.c
>> +++ b/drivers/bus/pci/linux/pci.c
>> @@ -687,71 +687,6 @@ int rte_pci_write_config(const struct 
>> rte_pci_device *device,
>>       }
>>   }
>>   -#if defined(RTE_ARCH_X86)
>> -static int
>> -pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
>> -        struct rte_pci_ioport *p)
>> -{
>> -    uint16_t start, end;
>> -    FILE *fp;
>> -    char *line = NULL;
>> -    char pci_id[16];
>> -    int found = 0;
>> -    size_t linesz;
>> -
>> -    if (rte_eal_iopl_init() != 0) {
>> -        RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for 
>> PCI device %s\n",
>> -            __func__, dev->name);
>> -        return -1;
>> -    }
>> -
>> -    snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
>> -         dev->addr.domain, dev->addr.bus,
>> -         dev->addr.devid, dev->addr.function);
>> -
>> -    fp = fopen("/proc/ioports", "r");
>> -    if (fp == NULL) {
>> -        RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
>> -        return -1;
>> -    }
>> -
>> -    while (getdelim(&line, &linesz, '\n', fp) > 0) {
>> -        char *ptr = line;
>> -        char *left;
>> -        int n;
>> -
>> -        n = strcspn(ptr, ":");
>> -        ptr[n] = 0;
>> -        left = &ptr[n + 1];
>> -
>> -        while (*left && isspace(*left))
>> -            left++;
>> -
>> -        if (!strncmp(left, pci_id, strlen(pci_id))) {
>> -            found = 1;
>> -
>> -            while (*ptr && isspace(*ptr))
>> -                ptr++;
>> -
>> -            sscanf(ptr, "%04hx-%04hx", &start, &end);
>> -
>> -            break;
>> -        }
>> -    }
>> -
>> -    free(line);
>> -    fclose(fp);
>> -
>> -    if (!found)
>> -        return -1;
>> -
>> -    p->base = start;
>> -    RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
>> -
>> -    return 0;
>> -}
>> -#endif
>> -
>>   int
>>   rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
>>           struct rte_pci_ioport *p)
>> @@ -762,18 +697,12 @@ int rte_pci_write_config(const struct 
>> rte_pci_device *device,
>>   #ifdef VFIO_PRESENT
>>       case RTE_PCI_KDRV_VFIO:
>>           if (pci_vfio_is_enabled())
>> -            ret = pci_vfio_ioport_map(dev, bar, p);
>> +            ret = pci_uio_ioport_map(dev, bar, p);
>>           break;
>>   #endif
>>       case RTE_PCI_KDRV_IGB_UIO:
>> -        ret = pci_uio_ioport_map(dev, bar, p);
>> -        break;
>>       case RTE_PCI_KDRV_UIO_GENERIC:
>> -#if defined(RTE_ARCH_X86)
>> -        ret = pci_ioport_map(dev, bar, p);
>> -#else
>>           ret = pci_uio_ioport_map(dev, bar, p);
>> -#endif
>>           break;
>>       default:
>>           break;
>> @@ -792,12 +721,10 @@ int rte_pci_write_config(const struct 
>> rte_pci_device *device,
>>       switch (p->dev->kdrv) {
>>   #ifdef VFIO_PRESENT
>>       case RTE_PCI_KDRV_VFIO:
>> -        pci_vfio_ioport_read(p, data, len, offset);
>> +        pci_uio_ioport_read(p, data, len, offset);
>>           break;
>>   #endif
>>       case RTE_PCI_KDRV_IGB_UIO:
>> -        pci_uio_ioport_read(p, data, len, offset);
>> -        break;
>>       case RTE_PCI_KDRV_UIO_GENERIC:
>>           pci_uio_ioport_read(p, data, len, offset);
>>           break;
>> @@ -813,12 +740,10 @@ int rte_pci_write_config(const struct 
>> rte_pci_device *device,
>>       switch (p->dev->kdrv) {
>>   #ifdef VFIO_PRESENT
>>       case RTE_PCI_KDRV_VFIO:
>> -        pci_vfio_ioport_write(p, data, len, offset);
>> +        pci_uio_ioport_write(p, data, len, offset);
>>           break;
>>   #endif
>>       case RTE_PCI_KDRV_IGB_UIO:
>> -        pci_uio_ioport_write(p, data, len, offset);
>> -        break;
>>       case RTE_PCI_KDRV_UIO_GENERIC:
>>           pci_uio_ioport_write(p, data, len, offset);
>>           break;
>> @@ -836,18 +761,12 @@ int rte_pci_write_config(const struct 
>> rte_pci_device *device,
>>   #ifdef VFIO_PRESENT
>>       case RTE_PCI_KDRV_VFIO:
>>           if (pci_vfio_is_enabled())
>> -            ret = pci_vfio_ioport_unmap(p);
>> +            ret = pci_uio_ioport_unmap(p);
>>           break;
>>   #endif
>>       case RTE_PCI_KDRV_IGB_UIO:
>> -        ret = pci_uio_ioport_unmap(p);
>> -        break;
>>       case RTE_PCI_KDRV_UIO_GENERIC:
>> -#if defined(RTE_ARCH_X86)
>> -        ret = 0;
>> -#else
>>           ret = pci_uio_ioport_unmap(p);
>> -#endif
>>           break;
>>       default:
>>           break;
>> diff --git a/drivers/bus/pci/linux/pci_uio.c 
>> b/drivers/bus/pci/linux/pci_uio.c
>> index f3305a2..0062ac0 100644
>> --- a/drivers/bus/pci/linux/pci_uio.c
>> +++ b/drivers/bus/pci/linux/pci_uio.c
>> @@ -22,6 +22,7 @@
>>   #include <rte_bus_pci.h>
>>   #include <rte_common.h>
>>   #include <rte_malloc.h>
>> +#include <rte_bus.h>
>>     #include "eal_filesystem.h"
>>   #include "pci_init.h"
>> @@ -373,52 +374,83 @@
>>   pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
>>              struct rte_pci_ioport *p)
>>   {
>> +    FILE *f = NULL;
>>       char dirname[PATH_MAX];
>>       char filename[PATH_MAX];
>> +    char buf[BUFSIZ];
>> +    uint64_t phys_addr, end_addr, flags;
>>       int uio_num;
>> -    unsigned long start;
>> +    unsigned long base;
>> +    bool iobar;
>> +    int i;
>>   -    if (rte_eal_iopl_init() != 0) {
>> -        RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for 
>> PCI device %s\n",
>> -            __func__, dev->name);
>> +    /* open and read addresses of the corresponding resource in 
>> sysfs */
>> +    snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
>> +        rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
>> +        dev->addr.devid, dev->addr.function);
>> +    f = fopen(filename, "r");
>> +    if (f == NULL) {
>> +        RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
>> +            strerror(errno));
>>           return -1;
>>       }
>>   -    uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
>> -    if (uio_num < 0)
>> -        return -1;
>> +    for (i = 0; i < bar + 1; i++) {
>> +        if (fgets(buf, sizeof(buf), f) == NULL) {
>> +            RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
>> +            goto error;
>> +        }
>> +    }
>> +    if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
>> +        &end_addr, &flags) < 0)
>> +        goto error;
>>   -    /* get portio start */
>> -    snprintf(filename, sizeof(filename),
>> -         "%s/portio/port%d/start", dirname, bar);
>> -    if (eal_parse_sysfs_value(filename, &start) < 0) {
>> -        RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
>> -            __func__);
>> -        return -1;
>> +    if (flags & IORESOURCE_IO) {
>> +        iobar = 1;
>> +        base = (unsigned long)phys_addr;
>> +        RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", 
>> __func__, base);
>> +    } else if (flags & IORESOURCE_MEM) {
>> +        iobar = 0;
>> +        base = (unsigned long)dev->mem_resource[bar].addr;
>> +        RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n", 
>> __func__, base);
>> +    } else {
>> +        RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
>> +        goto error;
>> +    }
>> +
>> +    if (iobar && rte_eal_iopl_init() != 0) {
>> +        RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for 
>> PCI device %s\n",
>> +            __func__, dev->name);
>> +        goto error;
>>       }
>> -    /* ensure we don't get anything funny here, read/write will cast to
>> -     * uin16_t */
>> -    if (start > UINT16_MAX)
>> -        return -1;
>>         /* FIXME only for primary process ? */
>> -    if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
>> +    if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN &&
>> +        dev->kdrv == RTE_PCI_KDRV_UIO_GENERIC) {
>> +        uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
>> +        if (uio_num < 0)
>> +            goto error;
>>             snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
>>           dev->intr_handle.fd = open(filename, O_RDWR);
>>           if (dev->intr_handle.fd < 0) {
>>               RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
>>                   filename, strerror(errno));
>> -            return -1;
>> +            goto error;
>>           }
>>           dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
>>       }
>>   -    RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
>> +    RTE_LOG(DEBUG, EAL, "PCI IO port found start=0x%lx\n", base);
>>   -    p->base = start;
>> +    p->base = base;
>>       p->len = 0;
>> +    fclose(f);
>>       return 0;
>> +error:
>> +    if (f)
>> +        fclose(f);
>> +    return -1;
>>   }
>>   #else
>>   int
>> @@ -489,6 +521,61 @@
>>   }
>>   #endif
>>   +#define PIO_MAX 0x10000
>> +static inline uint8_t ioread8(void *addr)
>> +{
>> +    uint8_t val;
>> +
>> +    val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>> +        *(volatile uint8_t *)addr :
>> +        inb((unsigned long)addr);
>> +
>> +    return val;
>> +}
>> +
>> +static inline uint16_t ioread16(void *addr)
>> +{
>> +    uint16_t val;
>> +
>> +    val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>> +        *(volatile uint16_t *)addr :
>> +        inw((unsigned long)addr);
>> +
>> +    return val;
>> +}
>> +
>> +static inline uint32_t ioread32(void *addr)
>> +{
>> +    uint32_t val;
>> +
>> +    val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>> +        *(volatile uint32_t *)addr :
>> +    inl((unsigned long)addr);
>> +
>> +    return val;
>> +}
>> +
>> +static inline void iowrite8(uint8_t val, void *addr)
>> +{
>> +    (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>> +        *(volatile uint8_t *)addr = val :
>> +        outb(val, (unsigned long)addr);
>> +}
>> +
>> +static inline void iowrite16(uint16_t val, void *addr)
>> +{
>> +    (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>> +        *(volatile uint16_t *)addr = val :
>> +        outw(val, (unsigned long)addr);
>> +}
>> +
>> +static inline void iowrite32(uint32_t val, void *addr)
>> +{
>> +    (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>> +        *(volatile uint32_t *)addr = val :
>> +        outl(val, (unsigned long)addr);
>> +}
>> +
>>   void
>>   pci_uio_ioport_read(struct rte_pci_ioport *p,
>>               void *data, size_t len, off_t offset)
>> @@ -500,25 +587,13 @@
>>       for (d = data; len > 0; d += size, reg += size, len -= size) {
>>           if (len >= 4) {
>>               size = 4;
>> -#if defined(RTE_ARCH_X86)
>> -            *(uint32_t *)d = inl(reg);
>> -#else
>> -            *(uint32_t *)d = *(volatile uint32_t *)reg;
>> -#endif
>> +            *(uint32_t *)d = ioread32((void *)reg);
>>           } else if (len >= 2) {
>>               size = 2;
>> -#if defined(RTE_ARCH_X86)
>> -            *(uint16_t *)d = inw(reg);
>> -#else
>> -            *(uint16_t *)d = *(volatile uint16_t *)reg;
>> -#endif
>> +            *(uint16_t *)d = ioread16((void *)reg);
>>           } else {
>>               size = 1;
>> -#if defined(RTE_ARCH_X86)
>> -            *d = inb(reg);
>> -#else
>> -            *d = *(volatile uint8_t *)reg;
>> -#endif
>> +            *d = ioread8((void *)reg);
>>           }
>>       }
>>   }
>> @@ -534,25 +609,13 @@
>>       for (s = data; len > 0; s += size, reg += size, len -= size) {
>>           if (len >= 4) {
>>               size = 4;
>> -#if defined(RTE_ARCH_X86)
>> -            outl_p(*(const uint32_t *)s, reg);
>> -#else
>> -            *(volatile uint32_t *)reg = *(const uint32_t *)s;
>> -#endif
>> +            iowrite32(*(const uint32_t *)s, (void *)reg);
>>           } else if (len >= 2) {
>>               size = 2;
>> -#if defined(RTE_ARCH_X86)
>> -            outw_p(*(const uint16_t *)s, reg);
>> -#else
>> -            *(volatile uint16_t *)reg = *(const uint16_t *)s;
>> -#endif
>> +            iowrite16(*(const uint16_t *)s, (void *)reg);
>>           } else {
>>               size = 1;
>> -#if defined(RTE_ARCH_X86)
>> -            outb_p(*s, reg);
>> -#else
>> -            *(volatile uint8_t *)reg = *s;
>> -#endif
>> +            iowrite8(*s, (void *)reg);
>>           }
>>       }
>>   }
>>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86
  2020-10-21 12:32       ` 谢华伟(此时此刻)
@ 2020-10-21 17:24         ` Ferruh Yigit
  2020-10-22  9:15           ` 谢华伟(此时此刻)
  0 siblings, 1 reply; 58+ messages in thread
From: Ferruh Yigit @ 2020-10-21 17:24 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	Maxime Coquelin
  Cc: dev, anatoly.burakov, david.marchand, grive, zhihong.wang, chenbo.xia
On 10/21/2020 1:32 PM, 谢华伟(此时此刻) wrote:
> 
> On 2020/10/21 19:49, Ferruh Yigit wrote:
>> On 10/13/2020 9:41 AM, 谢华伟(此时此刻) wrote:
>>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>>
>>> Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
>>> virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
>>>
>>> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We handles
>>> different type of BAR in the similar way.
>>>
>>> In previous implementation, with igb_uio we get PIO address from igb_uio
>>> sysfs entry; with uio_pci_generic, we get PIO address from
>>> /proc/ioports.
>>> For PIO/MMIO RW, there is different path for different drivers and arch.
>>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>>> issue.
>>> On X86, it assumes only PIO is supported.
>>>
>>> All of the above is too much twisted.
>>> This patch unifies the way to get both PIO and MMIO address for different driver
>>> and arch, all from standard resource attr under pci sysfs.
>>>
>>
>> As mentined above this patch does multiple things.
>>
>> The main target is, as far as I understand, you have a legacy virtio device 
>> which supports "memory-mapped I/O" and "port-mapped I/O", but virtio logic 
>> forces legacy devices to use the PIO but you want to be able to use the MMIO 
>> with this device.
> yes.
>>
>> The solution below is adding MMIO support in the PIO funciton, and distinguish 
>> MMIO or PIO based on their address check.
> Yes, kernel does this in the similar way.
>>
>>
>> Instead of this, can't this be resolved in the virtio side, like if the legacy 
>> device supports MMIO (detect this somehow) use the MMIO istead of hacking PIO 
>> mapping to support MMIO?
> 
> Get your concern.
> 
> 1>
> 
> If we move, I think we should move all those PCI codes into virtio side, not 
> just the mmio part.
> 
> Without my patch, those PCI codes are virtio-pci device specific, not generic.
> 
> With this patch, those pci ioport map/rw code could also be used for other 
> devices if they support both PIO and MMIO.
> 
I was not suggesting moving any code into virtio, but within 'vtpci_init()' what 
happens when "hw->modern = 1;" is set?
And if this is set for your device, will it work without change?
> Every option is ok. Hope i make myself clear.
> 
> 2>  I don't think this is hacking. for rte_pci_ioport_map/read/write, if ioport 
> could be both PIO and MMIO, then everything is reasonable.
> 
> Take how kernel does port map for example:
> 
>      vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
> 
> Here io doesn't mean PIO only. It could also be MMIO. Kernel then uses 
> ioread/write to access PIO/MMIO port.
> 
> Actually we are pretty much the same in the interface.
> 
> I think this patch extends rather then hacks the ioport interface to support MMIO.
> 
>>
>> I have other concerns, specially mergin VFIO mapping too, but lets clarify 
>> above first.
> 
> vfio doesn't affect other driver but only virtio.
> 
Why it doesn't affect other drivers, can't there be other driver using PIO?
> igb_uio, uio_pci_generic and vfio-pci all uses the same way to map/rw ioport.
> 
For vfio, code changes 'pci_vfio_ioport_read()' to the direct address read, 
first I don't know if this is always safe, and my question why there is a 
syscall introduced at first place if you can read from address directly?
Is your device works as expected when vfio-pci kernel module used? Since it is 
not suffering from PIO limitation, right?
And I wonder if the patch can be done as three patches to simply it, as:
1) Combine 'RTE_PCI_KDRV_IGB_UIO' & 'RTE_PCI_KDRV_UIO_GENERIC' (remove 
pci_ioport_map)
2) Update 'pci_uio_ioport_map()' to add memory map support (and update 
read/write functions according)
3) Combine vfio & uio
>>
>> Thanks,
>> ferruh
>>
>>
>>
>>> We distinguish PIO and MMIO by their address like how kernel does. It is ugly 
>>> but works.
>>>
>>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
<...>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86
  2020-10-21 17:24         ` Ferruh Yigit
@ 2020-10-22  9:15           ` 谢华伟(此时此刻)
  2020-10-22  9:44             ` Ferruh Yigit
  0 siblings, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-22  9:15 UTC (permalink / raw)
  To: Ferruh Yigit, Maxime Coquelin
  Cc: dev, anatoly.burakov, david.marchand, grive, zhihong.wang, chenbo.xia
On 2020/10/22 1:24, Ferruh Yigit wrote:
> On 10/21/2020 1:32 PM, 谢华伟(此时此刻) wrote:
>>
>> On 2020/10/21 19:49, Ferruh Yigit wrote:
>>> On 10/13/2020 9:41 AM, 谢华伟(此时此刻) wrote:
>>>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>>>
>>>> Legacy virtio-pci only supports PIO BAR resource. As we need to 
>>>> create lots of
>>>> virtio devices and PIO resource on x86 is very limited, we expose 
>>>> MMIO BAR.
>>>>
>>>> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci 
>>>> device. We handles
>>>> different type of BAR in the similar way.
>>>>
>>>> In previous implementation, with igb_uio we get PIO address from 
>>>> igb_uio
>>>> sysfs entry; with uio_pci_generic, we get PIO address from
>>>> /proc/ioports.
>>>> For PIO/MMIO RW, there is different path for different drivers and 
>>>> arch.
>>>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>>>> issue.
>>>> On X86, it assumes only PIO is supported.
>>>>
>>>> All of the above is too much twisted.
>>>> This patch unifies the way to get both PIO and MMIO address for 
>>>> different driver
>>>> and arch, all from standard resource attr under pci sysfs.
>>>>
>>>
>>> As mentined above this patch does multiple things.
>>>
>>> The main target is, as far as I understand, you have a legacy virtio 
>>> device which supports "memory-mapped I/O" and "port-mapped I/O", but 
>>> virtio logic forces legacy devices to use the PIO but you want to be 
>>> able to use the MMIO with this device.
>> yes.
>>>
>>> The solution below is adding MMIO support in the PIO funciton, and 
>>> distinguish MMIO or PIO based on their address check.
>> Yes, kernel does this in the similar way.
>>>
>>>
>>> Instead of this, can't this be resolved in the virtio side, like if 
>>> the legacy device supports MMIO (detect this somehow) use the MMIO 
>>> istead of hacking PIO mapping to support MMIO?
>>
>> Get your concern.
>>
>> 1>
>>
>> If we move, I think we should move all those PCI codes into virtio 
>> side, not just the mmio part.
>>
>> Without my patch, those PCI codes are virtio-pci device specific, not 
>> generic.
>>
>> With this patch, those pci ioport map/rw code could also be used for 
>> other devices if they support both PIO and MMIO.
>>
>
> I was not suggesting moving any code into virtio, but within 
> 'vtpci_init()' what happens when "hw->modern = 1;" is set?
> And if this is set for your device, will it work without change?
Yes, this will only affect legacy_device, which uses legacy_ops to 
access port io.
If is is modern_device, port access will go through modern_ops.
We only change the implementation in legacy_ops.
>
>> Every option is ok. Hope i make myself clear.
>>
>> 2>  I don't think this is hacking. for rte_pci_ioport_map/read/write, 
>> if ioport could be both PIO and MMIO, then everything is reasonable.
>>
>> Take how kernel does port map for example:
>>
>>      vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
>>
>> Here io doesn't mean PIO only. It could also be MMIO. Kernel then 
>> uses ioread/write to access PIO/MMIO port.
>>
>> Actually we are pretty much the same in the interface.
>>
>> I think this patch extends rather then hacks the ioport interface to 
>> support MMIO.
>>
>>>
>>> I have other concerns, specially mergin VFIO mapping too, but lets 
>>> clarify above first.
>>
>> vfio doesn't affect other driver but only virtio.
>>
>
> Why it doesn't affect other drivers, can't there be other driver using 
> PIO?
Currently only virtio-pci uses PIO, and only virtio PMD uses these port 
map/read/write functions.
I don't foresee in future any new device uses PIO.
/huawei
>
>> igb_uio, uio_pci_generic and vfio-pci all uses the same way to map/rw 
>> ioport.
>>
>
> For vfio, code changes 'pci_vfio_ioport_read()' to the direct address 
> read, first I don't know if this is always safe, and my question why 
> there is a syscall introduced at first place if you can read from 
> address directly?
Original vfio way works, but we don't need that syscall. Under whatever 
driver, we could use the simple way as in this patch.
/huawei
>
> Is your device works as expected when vfio-pci kernel module used? 
> Since it is not suffering from PIO limitation, right?
Certainly i tested vfio module. Firstly, i didn't intend to fix vfio 
performance issue, but i heard that igb_uio will be removed.
/huawei
>
>
> And I wonder if the patch can be done as three patches to simply it, as:
> 1) Combine 'RTE_PCI_KDRV_IGB_UIO' & 'RTE_PCI_KDRV_UIO_GENERIC' (remove 
> pci_ioport_map)
> 2) Update 'pci_uio_ioport_map()' to add memory map support (and update 
> read/write functions according)
> 3) Combine vfio & uio
>
Got it. It makes sense to split, but i think this patch is already 
simple enough.
Let me check.
/huawei
>>>
>>> Thanks,
>>> ferruh
>>>
>>>
>>>
>>>> We distinguish PIO and MMIO by their address like how kernel does. 
>>>> It is ugly but works.
>>>>
>>>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>
> <...>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86
  2020-10-22  9:15           ` 谢华伟(此时此刻)
@ 2020-10-22  9:44             ` Ferruh Yigit
  2020-10-22  9:57               ` 谢华伟(此时此刻)
  0 siblings, 1 reply; 58+ messages in thread
From: Ferruh Yigit @ 2020-10-22  9:44 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	Maxime Coquelin
  Cc: dev, anatoly.burakov, david.marchand, grive, zhihong.wang, chenbo.xia
On 10/22/2020 10:15 AM, 谢华伟(此时此刻) wrote:
> 
> On 2020/10/22 1:24, Ferruh Yigit wrote:
>> On 10/21/2020 1:32 PM, 谢华伟(此时此刻) wrote:
>>>
>>> On 2020/10/21 19:49, Ferruh Yigit wrote:
>>>> On 10/13/2020 9:41 AM, 谢华伟(此时此刻) wrote:
>>>>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>>>>
>>>>> Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
>>>>> virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
>>>>>
>>>>> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We 
>>>>> handles
>>>>> different type of BAR in the similar way.
>>>>>
>>>>> In previous implementation, with igb_uio we get PIO address from igb_uio
>>>>> sysfs entry; with uio_pci_generic, we get PIO address from
>>>>> /proc/ioports.
>>>>> For PIO/MMIO RW, there is different path for different drivers and arch.
>>>>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>>>>> issue.
>>>>> On X86, it assumes only PIO is supported.
>>>>>
>>>>> All of the above is too much twisted.
>>>>> This patch unifies the way to get both PIO and MMIO address for different 
>>>>> driver
>>>>> and arch, all from standard resource attr under pci sysfs.
>>>>>
>>>>
>>>> As mentined above this patch does multiple things.
>>>>
>>>> The main target is, as far as I understand, you have a legacy virtio device 
>>>> which supports "memory-mapped I/O" and "port-mapped I/O", but virtio logic 
>>>> forces legacy devices to use the PIO but you want to be able to use the MMIO 
>>>> with this device.
>>> yes.
>>>>
>>>> The solution below is adding MMIO support in the PIO funciton, and 
>>>> distinguish MMIO or PIO based on their address check.
>>> Yes, kernel does this in the similar way.
>>>>
>>>>
>>>> Instead of this, can't this be resolved in the virtio side, like if the 
>>>> legacy device supports MMIO (detect this somehow) use the MMIO istead of 
>>>> hacking PIO mapping to support MMIO?
>>>
>>> Get your concern.
>>>
>>> 1>
>>>
>>> If we move, I think we should move all those PCI codes into virtio side, not 
>>> just the mmio part.
>>>
>>> Without my patch, those PCI codes are virtio-pci device specific, not generic.
>>>
>>> With this patch, those pci ioport map/rw code could also be used for other 
>>> devices if they support both PIO and MMIO.
>>>
>>
>> I was not suggesting moving any code into virtio, but within 'vtpci_init()' 
>> what happens when "hw->modern = 1;" is set?
>> And if this is set for your device, will it work without change?
> 
> Yes, this will only affect legacy_device, which uses legacy_ops to access port io.
> 
> If is is modern_device, port access will go through modern_ops.
> 
> We only change the implementation in legacy_ops.
> 
I am saying something else.
When a device is marked as "hw->modern = 1;", it will use MMIO, right?
If, somehow, your device marked as "hw->modern = 1;", will that path work as 
expected for your device?
> 
>>
>>> Every option is ok. Hope i make myself clear.
>>>
>>> 2>  I don't think this is hacking. for rte_pci_ioport_map/read/write, if 
>>> ioport could be both PIO and MMIO, then everything is reasonable.
>>>
>>> Take how kernel does port map for example:
>>>
>>>      vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
>>>
>>> Here io doesn't mean PIO only. It could also be MMIO. Kernel then uses 
>>> ioread/write to access PIO/MMIO port.
>>>
>>> Actually we are pretty much the same in the interface.
>>>
>>> I think this patch extends rather then hacks the ioport interface to support 
>>> MMIO.
>>>
>>>>
>>>> I have other concerns, specially mergin VFIO mapping too, but lets clarify 
>>>> above first.
>>>
>>> vfio doesn't affect other driver but only virtio.
>>>
>>
>> Why it doesn't affect other drivers, can't there be other driver using PIO?
> 
> Currently only virtio-pci uses PIO, and only virtio PMD uses these port 
> map/read/write functions.
> 
> I don't foresee in future any new device uses PIO.
> 
I see but technically there can be other users.
> /huawei
> 
>>
>>> igb_uio, uio_pci_generic and vfio-pci all uses the same way to map/rw ioport.
>>>
>>
>> For vfio, code changes 'pci_vfio_ioport_read()' to the direct address read, 
>> first I don't know if this is always safe, and my question why there is a 
>> syscall introduced at first place if you can read from address directly?
> 
> Original vfio way works, but we don't need that syscall. Under whatever driver, 
> we could use the simple way as in this patch.
> 
If vfio works, you have already a solution, that is good. But I see you are not 
happy with its performance.
> /huawei
> 
>>
>> Is your device works as expected when vfio-pci kernel module used? Since it is 
>> not suffering from PIO limitation, right?
> 
> Certainly i tested vfio module. Firstly, i didn't intend to fix vfio performance 
> issue, but i heard that igb_uio will be removed.
> 
Yes, it will be removed in the long run.
> /huawei
> 
>>
>>
>> And I wonder if the patch can be done as three patches to simply it, as:
>> 1) Combine 'RTE_PCI_KDRV_IGB_UIO' & 'RTE_PCI_KDRV_UIO_GENERIC' (remove 
>> pci_ioport_map)
>> 2) Update 'pci_uio_ioport_map()' to add memory map support (and update 
>> read/write functions according)
>> 3) Combine vfio & uio
>>
> Got it. It makes sense to split, but i think this patch is already simple enough.
> 
The patch is doing many things in one patch, I think it is better to separate 
logically separate issues, although they are simple.
> Let me check.
> 
> /huawei
> 
>>>>
>>>> Thanks,
>>>> ferruh
>>>>
>>>>
>>>>
>>>>> We distinguish PIO and MMIO by their address like how kernel does. It is 
>>>>> ugly but works.
>>>>>
>>>>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>>
>> <...>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86
  2020-10-22  9:44             ` Ferruh Yigit
@ 2020-10-22  9:57               ` 谢华伟(此时此刻)
  0 siblings, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-22  9:57 UTC (permalink / raw)
  To: Ferruh Yigit, Maxime Coquelin
  Cc: dev, anatoly.burakov, david.marchand, grive, zhihong.wang, chenbo.xia
On 2020/10/22 17:44, Ferruh Yigit wrote:
> On 10/22/2020 10:15 AM, 谢华伟(此时此刻) wrote:
>>
>> On 2020/10/22 1:24, Ferruh Yigit wrote:
>>> On 10/21/2020 1:32 PM, 谢华伟(此时此刻) wrote:
>>>>
>>>> On 2020/10/21 19:49, Ferruh Yigit wrote:
>>>>> On 10/13/2020 9:41 AM, 谢华伟(此时此刻) wrote:
>>>>>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>>>>>
>>>>>> Legacy virtio-pci only supports PIO BAR resource. As we need to 
>>>>>> create lots of
>>>>>> virtio devices and PIO resource on x86 is very limited, we expose 
>>>>>> MMIO BAR.
>>>>>>
>>>>>> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci 
>>>>>> device. We handles
>>>>>> different type of BAR in the similar way.
>>>>>>
>>>>>> In previous implementation, with igb_uio we get PIO address from 
>>>>>> igb_uio
>>>>>> sysfs entry; with uio_pci_generic, we get PIO address from
>>>>>> /proc/ioports.
>>>>>> For PIO/MMIO RW, there is different path for different drivers 
>>>>>> and arch.
>>>>>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>>>>>> issue.
>>>>>> On X86, it assumes only PIO is supported.
>>>>>>
>>>>>> All of the above is too much twisted.
>>>>>> This patch unifies the way to get both PIO and MMIO address for 
>>>>>> different driver
>>>>>> and arch, all from standard resource attr under pci sysfs.
>>>>>>
>>>>>
>>>>> As mentined above this patch does multiple things.
>>>>>
>>>>> The main target is, as far as I understand, you have a legacy 
>>>>> virtio device which supports "memory-mapped I/O" and "port-mapped 
>>>>> I/O", but virtio logic forces legacy devices to use the PIO but 
>>>>> you want to be able to use the MMIO with this device.
>>>> yes.
>>>>>
>>>>> The solution below is adding MMIO support in the PIO funciton, and 
>>>>> distinguish MMIO or PIO based on their address check.
>>>> Yes, kernel does this in the similar way.
>>>>>
>>>>>
>>>>> Instead of this, can't this be resolved in the virtio side, like 
>>>>> if the legacy device supports MMIO (detect this somehow) use the 
>>>>> MMIO istead of hacking PIO mapping to support MMIO?
>>>>
>>>> Get your concern.
>>>>
>>>> 1>
>>>>
>>>> If we move, I think we should move all those PCI codes into virtio 
>>>> side, not just the mmio part.
>>>>
>>>> Without my patch, those PCI codes are virtio-pci device specific, 
>>>> not generic.
>>>>
>>>> With this patch, those pci ioport map/rw code could also be used 
>>>> for other devices if they support both PIO and MMIO.
>>>>
>>>
>>> I was not suggesting moving any code into virtio, but within 
>>> 'vtpci_init()' what happens when "hw->modern = 1;" is set?
>>> And if this is set for your device, will it work without change?
>>
>> Yes, this will only affect legacy_device, which uses legacy_ops to 
>> access port io.
>>
>> If is is modern_device, port access will go through modern_ops.
>>
>> We only change the implementation in legacy_ops.
>>
>
> I am saying something else.
>
> When a device is marked as "hw->modern = 1;", it will use MMIO, right?
> If, somehow, your device marked as "hw->modern = 1;", will that path 
> work as expected for your device?
modern device means virtio 1.0 and above. It has different register 
layout, so i couldn't mark legacy virtio device with MMIO as modern to 
make it work.
Is this your question?
/huawei
>
>>
>>>
>>>> Every option is ok. Hope i make myself clear.
>>>>
>>>> 2>  I don't think this is hacking. for 
>>>> rte_pci_ioport_map/read/write, if ioport could be both PIO and 
>>>> MMIO, then everything is reasonable.
>>>>
>>>> Take how kernel does port map for example:
>>>>
>>>>      vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
>>>>
>>>> Here io doesn't mean PIO only. It could also be MMIO. Kernel then 
>>>> uses ioread/write to access PIO/MMIO port.
>>>>
>>>> Actually we are pretty much the same in the interface.
>>>>
>>>> I think this patch extends rather then hacks the ioport interface 
>>>> to support MMIO.
>>>>
>>>>>
>>>>> I have other concerns, specially mergin VFIO mapping too, but lets 
>>>>> clarify above first.
>>>>
>>>> vfio doesn't affect other driver but only virtio.
>>>>
>>>
>>> Why it doesn't affect other drivers, can't there be other driver 
>>> using PIO?
>>
>> Currently only virtio-pci uses PIO, and only virtio PMD uses these 
>> port map/read/write functions.
>>
>> I don't foresee in future any new device uses PIO.
>>
>
> I see but technically there can be other users.
If there are other PIO users, what it matters is that we should keep 
these PCI port map/RW functions in pci layer rather than move to virtio PMD.
Our patch only makes things better, as it supports both PIO and MMIO.
>
>> /huawei
>>
>>>
>>>> igb_uio, uio_pci_generic and vfio-pci all uses the same way to 
>>>> map/rw ioport.
>>>>
>>>
>>> For vfio, code changes 'pci_vfio_ioport_read()' to the direct 
>>> address read, first I don't know if this is always safe, and my 
>>> question why there is a syscall introduced at first place if you can 
>>> read from address directly?
>>
>> Original vfio way works, but we don't need that syscall. Under 
>> whatever driver, we could use the simple way as in this patch.
>>
>
> If vfio works, you have already a solution, that is good. But I see 
> you are not happy with its performance.
Different driver could go through the same code path. It doesn't need to 
be that complicated.
Or we can say, this patch solves vfio performance issue in the mean time.
>
>> /huawei
>>
>>>
>>> Is your device works as expected when vfio-pci kernel module used? 
>>> Since it is not suffering from PIO limitation, right?
>>
>> Certainly i tested vfio module. Firstly, i didn't intend to fix vfio 
>> performance issue, but i heard that igb_uio will be removed.
>>
>
> Yes, it will be removed in the long run.
>
>> /huawei
>>
>>>
>>>
>>> And I wonder if the patch can be done as three patches to simply it, 
>>> as:
>>> 1) Combine 'RTE_PCI_KDRV_IGB_UIO' & 'RTE_PCI_KDRV_UIO_GENERIC' 
>>> (remove pci_ioport_map)
>>> 2) Update 'pci_uio_ioport_map()' to add memory map support (and 
>>> update read/write functions according)
>>> 3) Combine vfio & uio
>>>
>> Got it. It makes sense to split, but i think this patch is already 
>> simple enough.
>>
>
> The patch is doing many things in one patch, I think it is better to 
> separate logically separate issues, although they are simple.
Splitting.
>
>> Let me check.
>>
>> /huawei
>>
>>>>>
>>>>> Thanks,
>>>>> ferruh
>>>>>
>>>>>
>>>>>
>>>>>> We distinguish PIO and MMIO by their address like how kernel 
>>>>>> does. It is ugly but works.
>>>>>>
>>>>>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>>>
>>> <...>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2020-09-30 14:59 [dpdk-dev] [PATCH v2] pci: support both PIO and MMIO BAR for legacy virtio on x86 谢华伟(此时此刻)
                   ` (2 preceding siblings ...)
  2020-10-13  8:41 ` [dpdk-dev] [PATCH v4] support both PIO and MMIO bar for virtio pci device 谢华伟(此时此刻)
@ 2020-10-22 15:51 ` 谢华伟(此时此刻)
  2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 1/3] PCI: use PCI standard sysfs entry to get PIO address 谢华伟(此时此刻)
                     ` (6 more replies)
  3 siblings, 7 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-22 15:51 UTC (permalink / raw)
  To: ferruh.yigit
  Cc: dev, maxime.coquelin, anatoly.burakov, david.marchand,
	zhihong.wang, chenbo.xia, grive,
	谢华伟(此时此刻)
From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We handles
different type of BAR in the similar way.
In previous implementation, with igb_uio we get PIO address from igb_uio
sysfs entry; with uio_pci_generic, we get PIO address from
/proc/ioports.
For PIO/MMIO RW, there is different path for different drivers and arch.
For VFIO, PIO/MMIO RW is through syscall, which has big performance
issue.
On X86, it assumes only PIO is supported.
All of the above is too much twisted.
This patch unifies the way to get both PIO and MMIO address for different driver
and arch, all from standard resource attr under pci sysfs.
We distinguish PIO and MMIO by their address like how kernel does. It is ugly but works.
v2 changes:
	 - add more explanation in the commit message
v3 changes:
	 - fix patch format issues
v4 changes:
	 - fixes for RTE_KDRV_UIO_GENERIC -> RTE_PCI_KDRV_UIO_GENERIC
v5 changes:
	 - split into three seperate patches
huawei.xhw (3):
  PCI: use PCI standard sysfs entry to get PIO address
  PCI: support MMIO in rte_pci_ioport_map/unap/read/write
  PCI: don't use vfio ioctl call to access PIO resource
 drivers/bus/pci/linux/pci.c     |  89 +-------------------
 drivers/bus/pci/linux/pci_uio.c | 177 ++++++++++++++++++++++++++++------------
 2 files changed, 128 insertions(+), 138 deletions(-)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 58+ messages in thread
* [dpdk-dev] [PATCH v5 1/3] PCI: use PCI standard sysfs entry to get PIO address
  2020-10-22 15:51 ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
@ 2020-10-22 15:51   ` 谢华伟(此时此刻)
  2021-01-12  8:07     ` Maxime Coquelin
  2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write 谢华伟(此时此刻)
                     ` (5 subsequent siblings)
  6 siblings, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-22 15:51 UTC (permalink / raw)
  To: ferruh.yigit
  Cc: dev, maxime.coquelin, anatoly.burakov, david.marchand,
	zhihong.wang, chenbo.xia, grive,
	谢华伟(此时此刻)
From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
Previously with igb_uio we get PIO address from igb_uio sysfs entry, with
uio_pci_generic, we get PIO address from /proc/ioports.
Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
---
 drivers/bus/pci/linux/pci.c     | 77 -----------------------------------------
 drivers/bus/pci/linux/pci_uio.c | 64 ++++++++++++++++++++++++----------
 2 files changed, 46 insertions(+), 95 deletions(-)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 2e1808b..0f38abf 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -677,71 +677,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 	}
 }
 
-#if defined(RTE_ARCH_X86)
-static int
-pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
-		struct rte_pci_ioport *p)
-{
-	uint16_t start, end;
-	FILE *fp;
-	char *line = NULL;
-	char pci_id[16];
-	int found = 0;
-	size_t linesz;
-
-	if (rte_eal_iopl_init() != 0) {
-		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
-			__func__, dev->name);
-		return -1;
-	}
-
-	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
-		 dev->addr.domain, dev->addr.bus,
-		 dev->addr.devid, dev->addr.function);
-
-	fp = fopen("/proc/ioports", "r");
-	if (fp == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
-		return -1;
-	}
-
-	while (getdelim(&line, &linesz, '\n', fp) > 0) {
-		char *ptr = line;
-		char *left;
-		int n;
-
-		n = strcspn(ptr, ":");
-		ptr[n] = 0;
-		left = &ptr[n + 1];
-
-		while (*left && isspace(*left))
-			left++;
-
-		if (!strncmp(left, pci_id, strlen(pci_id))) {
-			found = 1;
-
-			while (*ptr && isspace(*ptr))
-				ptr++;
-
-			sscanf(ptr, "%04hx-%04hx", &start, &end);
-
-			break;
-		}
-	}
-
-	free(line);
-	fclose(fp);
-
-	if (!found)
-		return -1;
-
-	p->base = start;
-	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
-
-	return 0;
-}
-#endif
-
 int
 rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
 		struct rte_pci_ioport *p)
@@ -756,14 +691,8 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		ret = pci_uio_ioport_map(dev, bar, p);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
-		ret = pci_ioport_map(dev, bar, p);
-#else
 		ret = pci_uio_ioport_map(dev, bar, p);
-#endif
 		break;
 	default:
 		break;
@@ -830,14 +759,8 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		ret = pci_uio_ioport_unmap(p);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
-		ret = 0;
-#else
 		ret = pci_uio_ioport_unmap(p);
-#endif
 		break;
 	default:
 		break;
diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index f3305a2..01f2a40 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -373,10 +373,13 @@
 pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
 		   struct rte_pci_ioport *p)
 {
+	FILE *f = NULL;
 	char dirname[PATH_MAX];
 	char filename[PATH_MAX];
-	int uio_num;
-	unsigned long start;
+	char buf[BUFSIZ];
+	uint64_t phys_addr, end_addr, flags;
+	unsigned long base;
+	int i;
 
 	if (rte_eal_iopl_init() != 0) {
 		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
@@ -384,41 +387,66 @@
 		return -1;
 	}
 
-	uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
-	if (uio_num < 0)
+	/* open and read addresses of the corresponding resource in sysfs */
+	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
+		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
+		dev->addr.devid, dev->addr.function);
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): Cannot open sysfs resource: %s\n",
+			__func__, strerror(errno));
 		return -1;
+	}
 
-	/* get portio start */
-	snprintf(filename, sizeof(filename),
-		 "%s/portio/port%d/start", dirname, bar);
-	if (eal_parse_sysfs_value(filename, &start) < 0) {
-		RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
-			__func__);
-		return -1;
+	for (i = 0; i < bar + 1; i++) {
+		if (fgets(buf, sizeof(buf), f) == NULL) {
+			RTE_LOG(ERR, EAL, "%s(): Cannot read sysfs resource\n", __func__);
+			goto error;
+		}
 	}
-	/* ensure we don't get anything funny here, read/write will cast to
-	 * uin16_t */
-	if (start > UINT16_MAX)
-		return -1;
+	if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
+		&end_addr, &flags) < 0)
+		goto error;
+
+	if (!(flags & IORESOURCE_IO)) {
+		RTE_LOG(ERR, EAL, "%s(): bar resource other than IO is not supported\n", __func__);
+		goto error;
+	}
+	base = (unsigned long)phys_addr;
+	RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
+
+	if (base > UINT16_MAX)
+		goto error;
 
 	/* FIXME only for primary process ? */
 	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
+		int uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
+		if (uio_num < 0) {
+			RTE_LOG(ERR, EAL, "cannot open %s: %s\n",
+				dirname, strerror(errno));
+			goto error;
+		}
 
 		snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
 		dev->intr_handle.fd = open(filename, O_RDWR);
 		if (dev->intr_handle.fd < 0) {
 			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
 				filename, strerror(errno));
-			return -1;
+			goto error;
 		}
 		dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
 	}
 
-	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
+	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", base);
 
-	p->base = start;
+	p->base = base;
 	p->len = 0;
+	fclose(f);
 	return 0;
+error:
+	if (f)
+		fclose(f);
+	return -1;
 }
 #else
 int
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 58+ messages in thread
* [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write
  2020-10-22 15:51 ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
  2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 1/3] PCI: use PCI standard sysfs entry to get PIO address 谢华伟(此时此刻)
@ 2020-10-22 15:51   ` 谢华伟(此时此刻)
  2021-01-12  8:23     ` Maxime Coquelin
                       ` (2 more replies)
  2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource 谢华伟(此时此刻)
                     ` (4 subsequent siblings)
  6 siblings, 3 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-22 15:51 UTC (permalink / raw)
  To: ferruh.yigit
  Cc: dev, maxime.coquelin, anatoly.burakov, david.marchand,
	zhihong.wang, chenbo.xia, grive,
	谢华伟(此时此刻)
From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
If IO BAR, we get PIO address.
If MMIO BAR, we get mapped virtual address.
We distinguish PIO and MMIO by their address like how kernel does.
ioread/write8/16/32 is provided to access PIO/MMIO.
BTW, for virtio on arch other than x86, BAR flag indicates PIO but is mapped.
Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
---
 drivers/bus/pci/linux/pci.c     |   4 --
 drivers/bus/pci/linux/pci_uio.c | 123 ++++++++++++++++++++++++++--------------
 2 files changed, 82 insertions(+), 45 deletions(-)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 0f38abf..0dc99e9 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -715,8 +715,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		pci_uio_ioport_read(p, data, len, offset);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
 		pci_uio_ioport_read(p, data, len, offset);
 		break;
@@ -736,8 +734,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
-		pci_uio_ioport_write(p, data, len, offset);
-		break;
 	case RTE_PCI_KDRV_UIO_GENERIC:
 		pci_uio_ioport_write(p, data, len, offset);
 		break;
diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index 01f2a40..c19382f 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -379,14 +379,9 @@
 	char buf[BUFSIZ];
 	uint64_t phys_addr, end_addr, flags;
 	unsigned long base;
+	bool iobar;
 	int i;
 
-	if (rte_eal_iopl_init() != 0) {
-		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
-			__func__, dev->name);
-		return -1;
-	}
-
 	/* open and read addresses of the corresponding resource in sysfs */
 	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
 		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
@@ -408,15 +403,30 @@
 		&end_addr, &flags) < 0)
 		goto error;
 
-	if (!(flags & IORESOURCE_IO)) {
-		RTE_LOG(ERR, EAL, "%s(): bar resource other than IO is not supported\n", __func__);
+	if (flags & IORESOURCE_IO) {
+		iobar = 1;
+		base = (unsigned long)phys_addr;
+		RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
+	} else if (flags & IORESOURCE_MEM) {
+		iobar = 0;
+		base = (unsigned long)dev->mem_resource[bar].addr;
+		RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n", __func__, base);
+	} else {
+		RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
+		goto error;
+	}
+
+
+	if (iobar && rte_eal_iopl_init() != 0) {
+		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
+			__func__, dev->name);
 		goto error;
 	}
-	base = (unsigned long)phys_addr;
-	RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
 
-	if (base > UINT16_MAX)
+	if (iobar && (base > UINT16_MAX)) {
+		RTE_LOG(ERR, EAL, "%s(): %08lx too large PIO resource\n", __func__, base);
 		goto error;
+	}
 
 	/* FIXME only for primary process ? */
 	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
@@ -517,6 +527,61 @@
 }
 #endif
 
+#define PIO_MAX 0x10000
+static inline uint8_t ioread8(void *addr)
+{
+	uint8_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint8_t *)addr :
+		inb((unsigned long)addr);
+
+	return val;
+}
+
+static inline uint16_t ioread16(void *addr)
+{
+	uint16_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint16_t *)addr :
+		inw((unsigned long)addr);
+
+	return val;
+}
+
+static inline uint32_t ioread32(void *addr)
+{
+	uint32_t val;
+
+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint32_t *)addr :
+		inl((unsigned long)addr);
+
+	return val;
+}
+
+static inline void iowrite8(uint8_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint8_t *)addr = val :
+		outb(val, (unsigned long)addr);
+}
+
+static inline void iowrite16(uint16_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint16_t *)addr = val :
+		outw(val, (unsigned long)addr);
+}
+
+static inline void iowrite32(uint32_t val, void *addr)
+{
+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
+		*(volatile uint32_t *)addr = val :
+		outl(val, (unsigned long)addr);
+}
+
 void
 pci_uio_ioport_read(struct rte_pci_ioport *p,
 		    void *data, size_t len, off_t offset)
@@ -528,25 +593,13 @@
 	for (d = data; len > 0; d += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
-#if defined(RTE_ARCH_X86)
-			*(uint32_t *)d = inl(reg);
-#else
-			*(uint32_t *)d = *(volatile uint32_t *)reg;
-#endif
+			*(uint32_t *)d = ioread32((void *)reg);
 		} else if (len >= 2) {
 			size = 2;
-#if defined(RTE_ARCH_X86)
-			*(uint16_t *)d = inw(reg);
-#else
-			*(uint16_t *)d = *(volatile uint16_t *)reg;
-#endif
+			*(uint16_t *)d = ioread16((void *)reg);
 		} else {
 			size = 1;
-#if defined(RTE_ARCH_X86)
-			*d = inb(reg);
-#else
-			*d = *(volatile uint8_t *)reg;
-#endif
+			*d = ioread8((void *)reg);
 		}
 	}
 }
@@ -562,25 +615,13 @@
 	for (s = data; len > 0; s += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
-#if defined(RTE_ARCH_X86)
-			outl_p(*(const uint32_t *)s, reg);
-#else
-			*(volatile uint32_t *)reg = *(const uint32_t *)s;
-#endif
+			iowrite32(*(const uint32_t *)s, (void *)reg);
 		} else if (len >= 2) {
 			size = 2;
-#if defined(RTE_ARCH_X86)
-			outw_p(*(const uint16_t *)s, reg);
-#else
-			*(volatile uint16_t *)reg = *(const uint16_t *)s;
-#endif
+			iowrite16(*(const uint16_t *)s, (void *)reg);
 		} else {
 			size = 1;
-#if defined(RTE_ARCH_X86)
-			outb_p(*s, reg);
-#else
-			*(volatile uint8_t *)reg = *s;
-#endif
+			iowrite8(*s, (void *)reg);
 		}
 	}
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 58+ messages in thread
* [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2020-10-22 15:51 ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
  2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 1/3] PCI: use PCI standard sysfs entry to get PIO address 谢华伟(此时此刻)
  2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write 谢华伟(此时此刻)
@ 2020-10-22 15:51   ` 谢华伟(此时此刻)
  2021-01-12  9:37     ` Maxime Coquelin
  2020-10-27  8:50   ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
                     ` (3 subsequent siblings)
  6 siblings, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-22 15:51 UTC (permalink / raw)
  To: ferruh.yigit
  Cc: dev, maxime.coquelin, anatoly.burakov, david.marchand,
	zhihong.wang, chenbo.xia, grive,
	谢华伟(此时此刻)
From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
VFIO should use the same way to map/read/write PORT IO as UIO, for
virtio PMD.
Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
---
 drivers/bus/pci/linux/pci.c     | 8 ++++----
 drivers/bus/pci/linux/pci_uio.c | 4 +++-
 2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 0dc99e9..2ed9f2b 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -687,7 +687,7 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
 		if (pci_vfio_is_enabled())
-			ret = pci_vfio_ioport_map(dev, bar, p);
+			ret = pci_uio_ioport_map(dev, bar, p);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
@@ -711,7 +711,7 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 	switch (p->dev->kdrv) {
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
-		pci_vfio_ioport_read(p, data, len, offset);
+		pci_uio_ioport_read(p, data, len, offset);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
@@ -730,7 +730,7 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 	switch (p->dev->kdrv) {
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
-		pci_vfio_ioport_write(p, data, len, offset);
+		pci_uio_ioport_write(p, data, len, offset);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
@@ -751,7 +751,7 @@ int rte_pci_write_config(const struct rte_pci_device *device,
 #ifdef VFIO_PRESENT
 	case RTE_PCI_KDRV_VFIO:
 		if (pci_vfio_is_enabled())
-			ret = pci_vfio_ioport_unmap(p);
+			ret = pci_uio_ioport_unmap(p);
 		break;
 #endif
 	case RTE_PCI_KDRV_IGB_UIO:
diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index c19382f..463792b 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -429,7 +429,9 @@
 	}
 
 	/* FIXME only for primary process ? */
-	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
+	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN &&
+		(dev->kdrv == RTE_PCI_KDRV_IGB_UIO ||
+		 dev->kdrv == RTE_PCI_KDRV_UIO_GENERIC)) {
 		int uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
 		if (uio_num < 0) {
 			RTE_LOG(ERR, EAL, "cannot open %s: %s\n",
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2020-10-22 15:51 ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
                     ` (2 preceding siblings ...)
  2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource 谢华伟(此时此刻)
@ 2020-10-27  8:50   ` 谢华伟(此时此刻)
  2020-10-28  3:48     ` 谢华伟(此时此刻)
  2020-11-02 11:56   ` 谢华伟(此时此刻)
                     ` (2 subsequent siblings)
  6 siblings, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-27  8:50 UTC (permalink / raw)
  To: ferruh.yigit
  Cc: dev, maxime.coquelin, anatoly.burakov, david.marchand,
	zhihong.wang, chenbo.xia, grive
@Ferruh: this patch is tested with both PIO and MMIO bar using testpmd 
and start tx_first.
vfio/igb_uio tested with MMIO bar (uio_pci_generic doesn't work with 
msix, so it isn't tested)
uio_pci_generic tested with PIO bar (igb_uio has unknown symbols, not 
tested).
Weird igb_uio doens't have Makefile.
On 2020/10/22 23:51, 谢华伟(此时此刻) wrote:
> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>
> Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
> virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
>
> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We handles
> different type of BAR in the similar way.
>
> In previous implementation, with igb_uio we get PIO address from igb_uio
> sysfs entry; with uio_pci_generic, we get PIO address from
> /proc/ioports.
> For PIO/MMIO RW, there is different path for different drivers and arch.
> For VFIO, PIO/MMIO RW is through syscall, which has big performance
> issue.
> On X86, it assumes only PIO is supported.
>
> All of the above is too much twisted.
> This patch unifies the way to get both PIO and MMIO address for different driver
> and arch, all from standard resource attr under pci sysfs.
>
> We distinguish PIO and MMIO by their address like how kernel does. It is ugly but works.
>
> v2 changes:
> 	 - add more explanation in the commit message
>
> v3 changes:
> 	 - fix patch format issues
>
> v4 changes:
> 	 - fixes for RTE_KDRV_UIO_GENERIC -> RTE_PCI_KDRV_UIO_GENERIC
>
> v5 changes:
> 	 - split into three seperate patches
>
> huawei.xhw (3):
>    PCI: use PCI standard sysfs entry to get PIO address
>    PCI: support MMIO in rte_pci_ioport_map/unap/read/write
>    PCI: don't use vfio ioctl call to access PIO resource
>
>   drivers/bus/pci/linux/pci.c     |  89 +-------------------
>   drivers/bus/pci/linux/pci_uio.c | 177 ++++++++++++++++++++++++++++------------
>   2 files changed, 128 insertions(+), 138 deletions(-)
>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2020-10-27  8:50   ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
@ 2020-10-28  3:48     ` 谢华伟(此时此刻)
  0 siblings, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-10-28  3:48 UTC (permalink / raw)
  To: ferruh.yigit
  Cc: dev, maxime.coquelin, anatoly.burakov, david.marchand,
	zhihong.wang, chenbo.xia, grive
On 2020/10/27 16:50, chris wrote:
> @Ferruh: this patch is tested with both PIO and MMIO bar using testpmd 
> and start tx_first.
>
> vfio/igb_uio tested with MMIO bar (uio_pci_generic doesn't work with 
> msix, so it isn't tested)
>
> uio_pci_generic tested with PIO bar (igb_uio has unknown symbols, not 
> tested).
     igb_uio with PIO bar is also tested.
>
> Weird igb_uio doens't have Makefile.
>
>
> On 2020/10/22 23:51, 谢华伟(此时此刻) wrote:
>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>
>> Legacy virtio-pci only supports PIO BAR resource. As we need to 
>> create lots of
>> virtio devices and PIO resource on x86 is very limited, we expose 
>> MMIO BAR.
>>
>> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. 
>> We handles
>> different type of BAR in the similar way.
>>
>> In previous implementation, with igb_uio we get PIO address from igb_uio
>> sysfs entry; with uio_pci_generic, we get PIO address from
>> /proc/ioports.
>> For PIO/MMIO RW, there is different path for different drivers and arch.
>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>> issue.
>> On X86, it assumes only PIO is supported.
>>
>> All of the above is too much twisted.
>> This patch unifies the way to get both PIO and MMIO address for 
>> different driver
>> and arch, all from standard resource attr under pci sysfs.
>>
>> We distinguish PIO and MMIO by their address like how kernel does. It 
>> is ugly but works.
>>
>> v2 changes:
>>      - add more explanation in the commit message
>>
>> v3 changes:
>>      - fix patch format issues
>>
>> v4 changes:
>>      - fixes for RTE_KDRV_UIO_GENERIC -> RTE_PCI_KDRV_UIO_GENERIC
>>
>> v5 changes:
>>      - split into three seperate patches
>>
>> huawei.xhw (3):
>>    PCI: use PCI standard sysfs entry to get PIO address
>>    PCI: support MMIO in rte_pci_ioport_map/unap/read/write
>>    PCI: don't use vfio ioctl call to access PIO resource
>>
>>   drivers/bus/pci/linux/pci.c     |  89 +-------------------
>>   drivers/bus/pci/linux/pci_uio.c | 177 
>> ++++++++++++++++++++++++++++------------
>>   2 files changed, 128 insertions(+), 138 deletions(-)
>>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2020-10-22 15:51 ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
                     ` (3 preceding siblings ...)
  2020-10-27  8:50   ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
@ 2020-11-02 11:56   ` 谢华伟(此时此刻)
  2020-11-10 12:35   ` 谢华伟(此时此刻)
  2021-01-12 17:37   ` Maxime Coquelin
  6 siblings, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-11-02 11:56 UTC (permalink / raw)
  To: ferruh.yigit
  Cc: dev, maxime.coquelin, anatoly.burakov, david.marchand,
	zhihong.wang, chenbo.xia, grive
Hi Ferruh:
Comments to this v5 version?
On 2020/10/22 23:51, 谢华伟(此时此刻) wrote:
> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>
> Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
> virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
>
> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We handles
> different type of BAR in the similar way.
>
> In previous implementation, with igb_uio we get PIO address from igb_uio
> sysfs entry; with uio_pci_generic, we get PIO address from
> /proc/ioports.
> For PIO/MMIO RW, there is different path for different drivers and arch.
> For VFIO, PIO/MMIO RW is through syscall, which has big performance
> issue.
> On X86, it assumes only PIO is supported.
>
> All of the above is too much twisted.
> This patch unifies the way to get both PIO and MMIO address for different driver
> and arch, all from standard resource attr under pci sysfs.
>
> We distinguish PIO and MMIO by their address like how kernel does. It is ugly but works.
>
> v2 changes:
> 	 - add more explanation in the commit message
>
> v3 changes:
> 	 - fix patch format issues
>
> v4 changes:
> 	 - fixes for RTE_KDRV_UIO_GENERIC -> RTE_PCI_KDRV_UIO_GENERIC
>
> v5 changes:
> 	 - split into three seperate patches
>
> huawei.xhw (3):
>    PCI: use PCI standard sysfs entry to get PIO address
>    PCI: support MMIO in rte_pci_ioport_map/unap/read/write
>    PCI: don't use vfio ioctl call to access PIO resource
>
>   drivers/bus/pci/linux/pci.c     |  89 +-------------------
>   drivers/bus/pci/linux/pci_uio.c | 177 ++++++++++++++++++++++++++++------------
>   2 files changed, 128 insertions(+), 138 deletions(-)
>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2020-10-22 15:51 ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
                     ` (4 preceding siblings ...)
  2020-11-02 11:56   ` 谢华伟(此时此刻)
@ 2020-11-10 12:35   ` 谢华伟(此时此刻)
  2020-11-10 12:42     ` David Marchand
  2021-01-12 17:37   ` Maxime Coquelin
  6 siblings, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-11-10 12:35 UTC (permalink / raw)
  To: David Marchand, ferruh.yigit
  Cc: dev, maxime.coquelin, anatoly.burakov, zhihong.wang, chenbo.xia,
	grive, Thomas Monjalon
Hi David:
I see that you are assigned the reviewer of this patch, and Ferruh have 
helped reviewed it. I rebased this patch based on his comments.
Previously there are different ways to get port address based on 
different DPDK uio driver(IGB_UIO/UIO_PCI_GENERIC/VFIO), which is 
actually not necessary.
This patch makes IO/MMIO port map/RW API more generic, which also 
supports MMIO.  It also fixes performance issue with vfio.
Could you spare some time to have time to review this?
Thanks
On 2020/10/22 23:51, 谢华伟(此时此刻) wrote:
> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>
> Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
> virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
>
> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We handles
> different type of BAR in the similar way.
>
> In previous implementation, with igb_uio we get PIO address from igb_uio
> sysfs entry; with uio_pci_generic, we get PIO address from
> /proc/ioports.
> For PIO/MMIO RW, there is different path for different drivers and arch.
> For VFIO, PIO/MMIO RW is through syscall, which has big performance
> issue.
> On X86, it assumes only PIO is supported.
>
> All of the above is too much twisted.
> This patch unifies the way to get both PIO and MMIO address for different driver
> and arch, all from standard resource attr under pci sysfs.
>
> We distinguish PIO and MMIO by their address like how kernel does. It is ugly but works.
>
> v2 changes:
> 	 - add more explanation in the commit message
>
> v3 changes:
> 	 - fix patch format issues
>
> v4 changes:
> 	 - fixes for RTE_KDRV_UIO_GENERIC -> RTE_PCI_KDRV_UIO_GENERIC
>
> v5 changes:
> 	 - split into three seperate patches
>
> huawei.xhw (3):
>    PCI: use PCI standard sysfs entry to get PIO address
>    PCI: support MMIO in rte_pci_ioport_map/unap/read/write
>    PCI: don't use vfio ioctl call to access PIO resource
>
>   drivers/bus/pci/linux/pci.c     |  89 +-------------------
>   drivers/bus/pci/linux/pci_uio.c | 177 ++++++++++++++++++++++++++++------------
>   2 files changed, 128 insertions(+), 138 deletions(-)
>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2020-11-10 12:35   ` 谢华伟(此时此刻)
@ 2020-11-10 12:42     ` David Marchand
  2020-11-12 13:35       ` 谢华伟(此时此刻)
  2020-12-14 14:24       ` 谢华伟(此时此刻)
  0 siblings, 2 replies; 58+ messages in thread
From: David Marchand @ 2020-11-10 12:42 UTC (permalink / raw)
  To: 谢华伟(此时此刻)
  Cc: ferruh.yigit, dev, Maxime Coquelin, Burakov, Anatoly,
	Zhihong Wang, Xia, Chenbo, Gaetan Rivet, Thomas Monjalon
On Tue, Nov 10, 2020 at 1:35 PM 谢华伟(此时此刻) <huawei.xhw@alibaba-inc.com> wrote:
> Previously there are different ways to get port address based on
> different DPDK uio driver(IGB_UIO/UIO_PCI_GENERIC/VFIO), which is
> actually not necessary.
>
> This patch makes IO/MMIO port map/RW API more generic, which also
> supports MMIO.  It also fixes performance issue with vfio.
>
> Could you spare some time to have time to review this?
This is too touchy and I don't want to mess virtio support this late
in the release.
I asked for Maxime to have a look, but he seems really busy.
This will have to wait next release.
-- 
David Marchand
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2020-11-10 12:42     ` David Marchand
@ 2020-11-12 13:35       ` 谢华伟(此时此刻)
  2020-12-14 14:24       ` 谢华伟(此时此刻)
  1 sibling, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-11-12 13:35 UTC (permalink / raw)
  To: David Marchand, Maxime Coquelin, ferruh.yigit
  Cc: ferruh.yigit, dev, Burakov, Anatoly, Zhihong Wang, Xia, Chenbo,
	Gaetan Rivet, Thomas Monjalon
On 2020/11/10 20:42, David Marchand wrote:
> On Tue, Nov 10, 2020 at 1:35 PM 谢华伟(此时此刻) <huawei.xhw@alibaba-inc.com> wrote:
>> Previously there are different ways to get port address based on
>> different DPDK uio driver(IGB_UIO/UIO_PCI_GENERIC/VFIO), which is
>> actually not necessary.
>>
>> This patch makes IO/MMIO port map/RW API more generic, which also
>> supports MMIO.  It also fixes performance issue with vfio.
>>
>> Could you spare some time to have time to review this?
> This is too touchy and I don't want to mess virtio support this late
> in the release.
> I asked for Maxime to have a look, but he seems really busy.
>
> This will have to wait next release.
OK.  Actually it isn't that intrusive as it looks.
Then customers have to use git to clone latest DPDK when they run virtio 
with MMIO.  How about backporting this patch to this release after we 
merge it later?
Maxime:
Could we high prioritize this patch a bit? Customers are frequently 
pushing us.
Thanks.
/huawei
>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2020-11-10 12:42     ` David Marchand
  2020-11-12 13:35       ` 谢华伟(此时此刻)
@ 2020-12-14 14:24       ` 谢华伟(此时此刻)
  2020-12-16  7:54         ` Maxime Coquelin
  1 sibling, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2020-12-14 14:24 UTC (permalink / raw)
  To: David Marchand, Maxime Coquelin
  Cc: ferruh.yigit, dev, Burakov, Anatoly, Zhihong Wang, Xia, Chenbo,
	Gaetan Rivet, Thomas Monjalon
Hi Maxime and David:
Could we start to review this patch?
/Thanks, huawei
On 2020/11/10 20:42, David Marchand wrote:
> On Tue, Nov 10, 2020 at 1:35 PM 谢华伟(此时此刻) <huawei.xhw@alibaba-inc.com> wrote:
>> Previously there are different ways to get port address based on
>> different DPDK uio driver(IGB_UIO/UIO_PCI_GENERIC/VFIO), which is
>> actually not necessary.
>>
>> This patch makes IO/MMIO port map/RW API more generic, which also
>> supports MMIO.  It also fixes performance issue with vfio.
>>
>> Could you spare some time to have time to review this?
> This is too touchy and I don't want to mess virtio support this late
> in the release.
> I asked for Maxime to have a look, but he seems really busy.
>
> This will have to wait next release.
>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2020-12-14 14:24       ` 谢华伟(此时此刻)
@ 2020-12-16  7:54         ` Maxime Coquelin
  0 siblings, 0 replies; 58+ messages in thread
From: Maxime Coquelin @ 2020-12-16  7:54 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	David Marchand
  Cc: ferruh.yigit, dev, Burakov, Anatoly, Zhihong Wang, Xia, Chenbo,
	Gaetan Rivet, Thomas Monjalon
Hi Huawei,
On 12/14/20 3:24 PM, 谢华伟(此时此刻) wrote:
> Hi Maxime and David:
> 
> Could we start to review this patch?
Yes, I plan to work on it after the holidays, let's target -rc1.
Thanks,
Maxime
> /Thanks, huawei
> 
> 
> On 2020/11/10 20:42, David Marchand wrote:
>> On Tue, Nov 10, 2020 at 1:35 PM 谢华伟(此时此刻)
>> <huawei.xhw@alibaba-inc.com> wrote:
>>> Previously there are different ways to get port address based on
>>> different DPDK uio driver(IGB_UIO/UIO_PCI_GENERIC/VFIO), which is
>>> actually not necessary.
>>>
>>> This patch makes IO/MMIO port map/RW API more generic, which also
>>> supports MMIO.  It also fixes performance issue with vfio.
>>>
>>> Could you spare some time to have time to review this?
>> This is too touchy and I don't want to mess virtio support this late
>> in the release.
>> I asked for Maxime to have a look, but he seems really busy.
>>
>> This will have to wait next release.
>>
> 
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 1/3] PCI: use PCI standard sysfs entry to get PIO address
  2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 1/3] PCI: use PCI standard sysfs entry to get PIO address 谢华伟(此时此刻)
@ 2021-01-12  8:07     ` Maxime Coquelin
  2021-01-14 18:23       ` 谢华伟(此时此刻)
  0 siblings, 1 reply; 58+ messages in thread
From: Maxime Coquelin @ 2021-01-12  8:07 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
Hi Huawei,
The title should be under the form:
"bus/pci: use PCI standard sysfs entry to get PIO address"
On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
> 
> Previously with igb_uio we get PIO address from igb_uio sysfs entry, with
> uio_pci_generic, we get PIO address from /proc/ioports.
> 
> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
In order to comply with the contribution rules, your name must be
disaplyed under the form:
Signed-off-by: Firstname Lastname <huawei.xhw@alibaba-inc.com>
> ---
>  drivers/bus/pci/linux/pci.c     | 77 -----------------------------------------
>  drivers/bus/pci/linux/pci_uio.c | 64 ++++++++++++++++++++++++----------
>  2 files changed, 46 insertions(+), 95 deletions(-)
> 
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> index 2e1808b..0f38abf 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -677,71 +677,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>  	}
>  }
>  
> -#if defined(RTE_ARCH_X86)
> -static int
> -pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
> -		struct rte_pci_ioport *p)
> -{
> -	uint16_t start, end;
> -	FILE *fp;
> -	char *line = NULL;
> -	char pci_id[16];
> -	int found = 0;
> -	size_t linesz;
> -
> -	if (rte_eal_iopl_init() != 0) {
> -		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
> -			__func__, dev->name);
> -		return -1;
> -	}
> -
> -	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
> -		 dev->addr.domain, dev->addr.bus,
> -		 dev->addr.devid, dev->addr.function);
> -
> -	fp = fopen("/proc/ioports", "r");
> -	if (fp == NULL) {
> -		RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
> -		return -1;
> -	}
> -
> -	while (getdelim(&line, &linesz, '\n', fp) > 0) {
> -		char *ptr = line;
> -		char *left;
> -		int n;
> -
> -		n = strcspn(ptr, ":");
> -		ptr[n] = 0;
> -		left = &ptr[n + 1];
> -
> -		while (*left && isspace(*left))
> -			left++;
> -
> -		if (!strncmp(left, pci_id, strlen(pci_id))) {
> -			found = 1;
> -
> -			while (*ptr && isspace(*ptr))
> -				ptr++;
> -
> -			sscanf(ptr, "%04hx-%04hx", &start, &end);
> -
> -			break;
> -		}
> -	}
> -
> -	free(line);
> -	fclose(fp);
> -
> -	if (!found)
> -		return -1;
> -
> -	p->base = start;
> -	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
> -
> -	return 0;
> -}
> -#endif
> -
>  int
>  rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
>  		struct rte_pci_ioport *p)
> @@ -756,14 +691,8 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>  		break;
>  #endif
>  	case RTE_PCI_KDRV_IGB_UIO:
> -		ret = pci_uio_ioport_map(dev, bar, p);
> -		break;
>  	case RTE_PCI_KDRV_UIO_GENERIC:
> -#if defined(RTE_ARCH_X86)
> -		ret = pci_ioport_map(dev, bar, p);
> -#else
>  		ret = pci_uio_ioport_map(dev, bar, p);
> -#endif
>  		break;
>  	default:
>  		break;
> @@ -830,14 +759,8 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>  		break;
>  #endif
>  	case RTE_PCI_KDRV_IGB_UIO:
> -		ret = pci_uio_ioport_unmap(p);
> -		break;
>  	case RTE_PCI_KDRV_UIO_GENERIC:
> -#if defined(RTE_ARCH_X86)
> -		ret = 0;
> -#else
>  		ret = pci_uio_ioport_unmap(p);
> -#endif
>  		break;
>  	default:
>  		break;
> diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
> index f3305a2..01f2a40 100644
> --- a/drivers/bus/pci/linux/pci_uio.c
> +++ b/drivers/bus/pci/linux/pci_uio.c
> @@ -373,10 +373,13 @@
>  pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
>  		   struct rte_pci_ioport *p)
>  {
> +	FILE *f = NULL;
>  	char dirname[PATH_MAX];
>  	char filename[PATH_MAX];
> -	int uio_num;
> -	unsigned long start;
> +	char buf[BUFSIZ];
> +	uint64_t phys_addr, end_addr, flags;
> +	unsigned long base;
> +	int i;
>  
>  	if (rte_eal_iopl_init() != 0) {
>  		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
> @@ -384,41 +387,66 @@
>  		return -1;
>  	}
>  
> -	uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
> -	if (uio_num < 0)
> +	/* open and read addresses of the corresponding resource in sysfs */
> +	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
> +		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
> +		dev->addr.devid, dev->addr.function);
> +	f = fopen(filename, "r");
> +	if (f == NULL) {
> +		RTE_LOG(ERR, EAL, "%s(): Cannot open sysfs resource: %s\n",
> +			__func__, strerror(errno));
>  		return -1;
> +	}
>  
> -	/* get portio start */
> -	snprintf(filename, sizeof(filename),
> -		 "%s/portio/port%d/start", dirname, bar);
> -	if (eal_parse_sysfs_value(filename, &start) < 0) {
> -		RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
> -			__func__);
> -		return -1;
> +	for (i = 0; i < bar + 1; i++) {
> +		if (fgets(buf, sizeof(buf), f) == NULL) {
> +			RTE_LOG(ERR, EAL, "%s(): Cannot read sysfs resource\n", __func__);
> +			goto error;
> +		}
>  	}
> -	/* ensure we don't get anything funny here, read/write will cast to
> -	 * uin16_t */
> -	if (start > UINT16_MAX)
> -		return -1;
> +	if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
> +		&end_addr, &flags) < 0)
> +		goto error;
> +
> +	if (!(flags & IORESOURCE_IO)) {
> +		RTE_LOG(ERR, EAL, "%s(): bar resource other than IO is not supported\n", __func__);
> +		goto error;
> +	}
> +	base = (unsigned long)phys_addr;
> +	RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
> +
> +	if (base > UINT16_MAX)
> +		goto error;
>  
>  	/* FIXME only for primary process ? */
>  	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
> +		int uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
> +		if (uio_num < 0) {
> +			RTE_LOG(ERR, EAL, "cannot open %s: %s\n",
> +				dirname, strerror(errno));
> +			goto error;
> +		}
>  
>  		snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
>  		dev->intr_handle.fd = open(filename, O_RDWR);
>  		if (dev->intr_handle.fd < 0) {
>  			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
>  				filename, strerror(errno));
> -			return -1;
> +			goto error;
>  		}
>  		dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
>  	}
>  
> -	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
> +	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", base);
>  
> -	p->base = start;
> +	p->base = base;
>  	p->len = 0;
> +	fclose(f);
>  	return 0;
> +error:
> +	if (f)
> +		fclose(f);
> +	return -1;
>  }
>  #else
>  int
> 
I think it makes sense to have a common way for both igb_uio and
uio_pci_generic to get the PIO base address.
With commit message and title fixed, feel free to add my:
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Thanks,
Maxime
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write
  2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write 谢华伟(此时此刻)
@ 2021-01-12  8:23     ` Maxime Coquelin
  2021-01-21  6:30       ` 谢华伟(此时此刻)
  2021-01-24 15:22     ` Xueming(Steven) Li
  2021-01-27 10:40     ` Ferruh Yigit
  2 siblings, 1 reply; 58+ messages in thread
From: Maxime Coquelin @ 2021-01-12  8:23 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
Title should be something like:
"bus/pci: support MMIO in PCI ioport accessors
On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
> 
> If IO BAR, we get PIO address.
> If MMIO BAR, we get mapped virtual address.
> We distinguish PIO and MMIO by their address like how kernel does.
> ioread/write8/16/32 is provided to access PIO/MMIO.
> BTW, for virtio on arch other than x86, BAR flag indicates PIO but is mapped.
No acronym in the commit message.
Also, I am not sure to understand this comment.
Does it means in the case of ARM for example, the IORESOURCE_IO flag is
set but the base address is above PIO_MAX?
> 
> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
As in previous patch, we need your full name for the sign-off.
> ---
>  drivers/bus/pci/linux/pci.c     |   4 --
>  drivers/bus/pci/linux/pci_uio.c | 123 ++++++++++++++++++++++++++--------------
>  2 files changed, 82 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> index 0f38abf..0dc99e9 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -715,8 +715,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>  		break;
>  #endif
>  	case RTE_PCI_KDRV_IGB_UIO:
> -		pci_uio_ioport_read(p, data, len, offset);
> -		break;
I think this part should be in patch 1.
>  	case RTE_PCI_KDRV_UIO_GENERIC:
>  		pci_uio_ioport_read(p, data, len, offset);
>  		break;
> @@ -736,8 +734,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>  		break;
>  #endif
>  	case RTE_PCI_KDRV_IGB_UIO:
> -		pci_uio_ioport_write(p, data, len, offset);
> -		break;
Same here.
>  	case RTE_PCI_KDRV_UIO_GENERIC:
>  		pci_uio_ioport_write(p, data, len, offset);
>  		break;
> diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
> index 01f2a40..c19382f 100644
> --- a/drivers/bus/pci/linux/pci_uio.c
> +++ b/drivers/bus/pci/linux/pci_uio.c
> @@ -379,14 +379,9 @@
>  	char buf[BUFSIZ];
>  	uint64_t phys_addr, end_addr, flags;
>  	unsigned long base;
> +	bool iobar;
>  	int i;
>  
> -	if (rte_eal_iopl_init() != 0) {
> -		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
> -			__func__, dev->name);
> -		return -1;
> -	}
> -
>  	/* open and read addresses of the corresponding resource in sysfs */
>  	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
>  		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
> @@ -408,15 +403,30 @@
>  		&end_addr, &flags) < 0)
>  		goto error;
>  
> -	if (!(flags & IORESOURCE_IO)) {
> -		RTE_LOG(ERR, EAL, "%s(): bar resource other than IO is not supported\n", __func__);
> +	if (flags & IORESOURCE_IO) {
> +		iobar = 1;
> +		base = (unsigned long)phys_addr;
> +		RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
> +	} else if (flags & IORESOURCE_MEM) {
> +		iobar = 0;
> +		base = (unsigned long)dev->mem_resource[bar].addr;
> +		RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n", __func__, base);
> +	} else {
> +		RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
> +		goto error;
> +	}
> +
> +
> +	if (iobar && rte_eal_iopl_init() != 0) {
> +		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
> +			__func__, dev->name);
>  		goto error;
>  	}
> -	base = (unsigned long)phys_addr;
> -	RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
>  
> -	if (base > UINT16_MAX)
> +	if (iobar && (base > UINT16_MAX)) {
> +		RTE_LOG(ERR, EAL, "%s(): %08lx too large PIO resource\n", __func__, base);
>  		goto error;
> +	}
It looks like above check could be moved directly to (flags &
IORESOURCE_IO) case, so iobar boolean is not needed.
>  
>  	/* FIXME only for primary process ? */
>  	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
> @@ -517,6 +527,61 @@
>  }
>  #endif
>  
> +#define PIO_MAX 0x10000
> +static inline uint8_t ioread8(void *addr)
> +{
> +	uint8_t val;
> +
> +	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint8_t *)addr :
> +		inb((unsigned long)addr);
> +
> +	return val;
> +}
> +
> +static inline uint16_t ioread16(void *addr)
> +{
> +	uint16_t val;
> +
> +	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint16_t *)addr :
> +		inw((unsigned long)addr);
> +
> +	return val;
> +}
> +
> +static inline uint32_t ioread32(void *addr)
> +{
> +	uint32_t val;
> +
> +	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint32_t *)addr :
> +		inl((unsigned long)addr);
> +
> +	return val;
> +}
> +
> +static inline void iowrite8(uint8_t val, void *addr)
> +{
> +	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint8_t *)addr = val :
> +		outb(val, (unsigned long)addr);
> +}
> +
> +static inline void iowrite16(uint16_t val, void *addr)
> +{
> +	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint16_t *)addr = val :
> +		outw(val, (unsigned long)addr);
> +}
> +
> +static inline void iowrite32(uint32_t val, void *addr)
> +{
> +	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
> +		*(volatile uint32_t *)addr = val :
> +		outl(val, (unsigned long)addr);
> +}
> +
>  void
>  pci_uio_ioport_read(struct rte_pci_ioport *p,
>  		    void *data, size_t len, off_t offset)
> @@ -528,25 +593,13 @@
>  	for (d = data; len > 0; d += size, reg += size, len -= size) {
>  		if (len >= 4) {
>  			size = 4;
> -#if defined(RTE_ARCH_X86)
> -			*(uint32_t *)d = inl(reg);
> -#else
> -			*(uint32_t *)d = *(volatile uint32_t *)reg;
> -#endif
> +			*(uint32_t *)d = ioread32((void *)reg);
>  		} else if (len >= 2) {
>  			size = 2;
> -#if defined(RTE_ARCH_X86)
> -			*(uint16_t *)d = inw(reg);
> -#else
> -			*(uint16_t *)d = *(volatile uint16_t *)reg;
> -#endif
> +			*(uint16_t *)d = ioread16((void *)reg);
>  		} else {
>  			size = 1;
> -#if defined(RTE_ARCH_X86)
> -			*d = inb(reg);
> -#else
> -			*d = *(volatile uint8_t *)reg;
> -#endif
> +			*d = ioread8((void *)reg);
>  		}
>  	}
>  }
> @@ -562,25 +615,13 @@
>  	for (s = data; len > 0; s += size, reg += size, len -= size) {
>  		if (len >= 4) {
>  			size = 4;
> -#if defined(RTE_ARCH_X86)
> -			outl_p(*(const uint32_t *)s, reg);
> -#else
> -			*(volatile uint32_t *)reg = *(const uint32_t *)s;
> -#endif
> +			iowrite32(*(const uint32_t *)s, (void *)reg);
>  		} else if (len >= 2) {
>  			size = 2;
> -#if defined(RTE_ARCH_X86)
> -			outw_p(*(const uint16_t *)s, reg);
> -#else
> -			*(volatile uint16_t *)reg = *(const uint16_t *)s;
> -#endif
> +			iowrite16(*(const uint16_t *)s, (void *)reg);
>  		} else {
>  			size = 1;
> -#if defined(RTE_ARCH_X86)
> -			outb_p(*s, reg);
> -#else
> -			*(volatile uint8_t *)reg = *s;
> -#endif
> +			iowrite8(*s, (void *)reg);
>  		}
>  	}
>  }
> 
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource 谢华伟(此时此刻)
@ 2021-01-12  9:37     ` Maxime Coquelin
  2021-01-12 16:58       ` Maxime Coquelin
  0 siblings, 1 reply; 58+ messages in thread
From: Maxime Coquelin @ 2021-01-12  9:37 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
bus/pci: ...
On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
> 
> VFIO should use the same way to map/read/write PORT IO as UIO, for
> virtio PMD.
Please provide more details in the commit message on why the way VFIO
works today is wrong (The cover letter is lost once applied).
> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
Same comment about name format as on previous patches.
> ---
>  drivers/bus/pci/linux/pci.c     | 8 ++++----
>  drivers/bus/pci/linux/pci_uio.c | 4 +++-
>  2 files changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> index 0dc99e9..2ed9f2b 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -687,7 +687,7 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>  #ifdef VFIO_PRESENT
>  	case RTE_PCI_KDRV_VFIO:
>  		if (pci_vfio_is_enabled())
> -			ret = pci_vfio_ioport_map(dev, bar, p);
> +			ret = pci_uio_ioport_map(dev, bar, p);
Doesn't it create a regression with regards to needed capabilities?
My understanding is that before this patch we don't need to call iopl(),
whereas once applied it is required, correct?
Regards,
Maxime
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-12  9:37     ` Maxime Coquelin
@ 2021-01-12 16:58       ` Maxime Coquelin
  2021-01-20 14:54         ` 谢华伟(此时此刻)
  0 siblings, 1 reply; 58+ messages in thread
From: Maxime Coquelin @ 2021-01-12 16:58 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 1/12/21 10:37 AM, Maxime Coquelin wrote:
> bus/pci: ...
> 
> On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>
>> VFIO should use the same way to map/read/write PORT IO as UIO, for
>> virtio PMD.
> 
> Please provide more details in the commit message on why the way VFIO
> works today is wrong (The cover letter is lost once applied).
> 
>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
> 
> Same comment about name format as on previous patches.
> 
>> ---
>>  drivers/bus/pci/linux/pci.c     | 8 ++++----
>>  drivers/bus/pci/linux/pci_uio.c | 4 +++-
>>  2 files changed, 7 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
>> index 0dc99e9..2ed9f2b 100644
>> --- a/drivers/bus/pci/linux/pci.c
>> +++ b/drivers/bus/pci/linux/pci.c
>> @@ -687,7 +687,7 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>>  #ifdef VFIO_PRESENT
>>  	case RTE_PCI_KDRV_VFIO:
>>  		if (pci_vfio_is_enabled())
>> -			ret = pci_vfio_ioport_map(dev, bar, p);
>> +			ret = pci_uio_ioport_map(dev, bar, p);
> 
> Doesn't it create a regression with regards to needed capabilities?
> My understanding is that before this patch we don't need to call iopl(),
> whereas once applied it is required, correct?
I did some testing today, and think it is not a regression with para-
virtualized Virtio devices.
Indeed, I thought it would be a regression with Legacy devices when
IOMMU is enabled and the program is run as non-root (IOMMU enabled
just to suport IOVA as VA mode). But it turns out para-virtualized
Virtio legacy device and vIOMMU enabled is not a supported configuration
by QEMU.
Note that when noiommu mode is enabled, the app needs cap_sys_rawio, so
same as iopl(). No regression in this case too.
That said, with real (non para-virtualized) Virtio device using PIO like
yours, doesn't your patch introduce a restriction for your device that
it will require cap_sys_rawio whereas it would not be needed?
Thanks,
Maxime
> Regards,
> Maxime
> 
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2020-10-22 15:51 ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
                     ` (5 preceding siblings ...)
  2020-11-10 12:35   ` 谢华伟(此时此刻)
@ 2021-01-12 17:37   ` Maxime Coquelin
  2021-01-14 18:19     ` 谢华伟(此时此刻)
  2021-01-21  4:12     ` 谢华伟(此时此刻)
  6 siblings, 2 replies; 58+ messages in thread
From: Maxime Coquelin @ 2021-01-12 17:37 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
> 
> Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
> virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
> 
> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We handles
> different type of BAR in the similar way.
> 
> In previous implementation, with igb_uio we get PIO address from igb_uio
> sysfs entry; with uio_pci_generic, we get PIO address from
> /proc/ioports.
> For PIO/MMIO RW, there is different path for different drivers and arch.
> For VFIO, PIO/MMIO RW is through syscall, which has big performance
> issue.
Regarding the performance issue, do you have some numbers to share?
AFAICS, it can only have an impact on performance when interrupt mode is
used or queue notification is enabled.
Does your HW Virtio implementation requires notification?
Is performance the only issue to have your HW working with Virtio PMD,
or is this series also fixing some functionnal issues?
Best regards,
Maxime
> On X86, it assumes only PIO is supported.
> 
> All of the above is too much twisted.
> This patch unifies the way to get both PIO and MMIO address for different driver
> and arch, all from standard resource attr under pci sysfs.
> 
> We distinguish PIO and MMIO by their address like how kernel does. It is ugly but works.
> 
> v2 changes:
> 	 - add more explanation in the commit message
> 
> v3 changes:
> 	 - fix patch format issues
> 
> v4 changes:
> 	 - fixes for RTE_KDRV_UIO_GENERIC -> RTE_PCI_KDRV_UIO_GENERIC
> 
> v5 changes:
> 	 - split into three seperate patches
> 
> huawei.xhw (3):
>   PCI: use PCI standard sysfs entry to get PIO address
>   PCI: support MMIO in rte_pci_ioport_map/unap/read/write
>   PCI: don't use vfio ioctl call to access PIO resource
> 
>  drivers/bus/pci/linux/pci.c     |  89 +-------------------
>  drivers/bus/pci/linux/pci_uio.c | 177 ++++++++++++++++++++++++++++------------
>  2 files changed, 128 insertions(+), 138 deletions(-)
> 
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2021-01-12 17:37   ` Maxime Coquelin
@ 2021-01-14 18:19     ` 谢华伟(此时此刻)
  2021-01-21  4:12     ` 谢华伟(此时此刻)
  1 sibling, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-14 18:19 UTC (permalink / raw)
  To: Maxime Coquelin, ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 2021/1/13 1:37, Maxime Coquelin wrote:
>
> On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>
>> Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
>> virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
>>
>> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We handles
>> different type of BAR in the similar way.
>>
>> In previous implementation, with igb_uio we get PIO address from igb_uio
>> sysfs entry; with uio_pci_generic, we get PIO address from
>> /proc/ioports.
>> For PIO/MMIO RW, there is different path for different drivers and arch.
>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>> issue.
> Regarding the performance issue, do you have some numbers to share?
> AFAICS, it can only have an impact on performance when interrupt mode is
> used or queue notification is enabled.
I didn't have performance number, but would do it when have time.
Yes, it is not needed when virtio backend/device is working in polling mode.
But anyway, ioctl isn't needed at all.
/huawei
>
> Does your HW Virtio implementation requires notification?
>
> Is performance the only issue to have your HW working with Virtio PMD,
> or is this series also fixing some functionnal issues?
There is two purpose with this patch. One is to support MMIO, and the 
other is to
unify/simplify the way to get IO/MMIO resource and read/write IO/MMIO 
port for virtio PMD.
Current implementation is too complicated.
/huawei
>
> Best regards,
> Maxime
>> On X86, it assumes only PIO is supported.
>>
>> All of the above is too much twisted.
>> This patch unifies the way to get both PIO and MMIO address for different driver
>> and arch, all from standard resource attr under pci sysfs.
>>
>> We distinguish PIO and MMIO by their address like how kernel does. It is ugly but works.
>>
>> v2 changes:
>> 	 - add more explanation in the commit message
>>
>> v3 changes:
>> 	 - fix patch format issues
>>
>> v4 changes:
>> 	 - fixes for RTE_KDRV_UIO_GENERIC -> RTE_PCI_KDRV_UIO_GENERIC
>>
>> v5 changes:
>> 	 - split into three seperate patches
>>
>> huawei.xhw (3):
>>    PCI: use PCI standard sysfs entry to get PIO address
>>    PCI: support MMIO in rte_pci_ioport_map/unap/read/write
>>    PCI: don't use vfio ioctl call to access PIO resource
>>
>>   drivers/bus/pci/linux/pci.c     |  89 +-------------------
>>   drivers/bus/pci/linux/pci_uio.c | 177 ++++++++++++++++++++++++++++------------
>>   2 files changed, 128 insertions(+), 138 deletions(-)
>>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 1/3] PCI: use PCI standard sysfs entry to get PIO address
  2021-01-12  8:07     ` Maxime Coquelin
@ 2021-01-14 18:23       ` 谢华伟(此时此刻)
  2021-01-24 15:10         ` Xueming(Steven) Li
  0 siblings, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-14 18:23 UTC (permalink / raw)
  To: Maxime Coquelin, ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 2021/1/12 16:07, Maxime Coquelin wrote:
> Hi Huawei,
>
> The title should be under the form:
> "bus/pci: use PCI standard sysfs entry to get PIO address"
>
> On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>
>> Previously with igb_uio we get PIO address from igb_uio sysfs entry, with
>> uio_pci_generic, we get PIO address from /proc/ioports.
>>
>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
> In order to comply with the contribution rules, your name must be
> disaplyed under the form:
>
> Signed-off-by: Firstname Lastname <huawei.xhw@alibaba-inc.com>
Would fix this.
>> ---
>>   drivers/bus/pci/linux/pci.c     | 77 -----------------------------------------
>>   drivers/bus/pci/linux/pci_uio.c | 64 ++++++++++++++++++++++++----------
>>   2 files changed, 46 insertions(+), 95 deletions(-)
>>
>> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
>> index 2e1808b..0f38abf 100644
>> --- a/drivers/bus/pci/linux/pci.c
>> +++ b/drivers/bus/pci/linux/pci.c
>> @@ -677,71 +677,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>>   	}
>>   }
>>   
>> -#if defined(RTE_ARCH_X86)
>> -static int
>> -pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
>> -		struct rte_pci_ioport *p)
>> -{
>> -	uint16_t start, end;
>> -	FILE *fp;
>> -	char *line = NULL;
>> -	char pci_id[16];
>> -	int found = 0;
>> -	size_t linesz;
>> -
>> -	if (rte_eal_iopl_init() != 0) {
>> -		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
>> -			__func__, dev->name);
>> -		return -1;
>> -	}
>> -
>> -	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
>> -		 dev->addr.domain, dev->addr.bus,
>> -		 dev->addr.devid, dev->addr.function);
>> -
>> -	fp = fopen("/proc/ioports", "r");
>> -	if (fp == NULL) {
>> -		RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
>> -		return -1;
>> -	}
>> -
>> -	while (getdelim(&line, &linesz, '\n', fp) > 0) {
>> -		char *ptr = line;
>> -		char *left;
>> -		int n;
>> -
>> -		n = strcspn(ptr, ":");
>> -		ptr[n] = 0;
>> -		left = &ptr[n + 1];
>> -
>> -		while (*left && isspace(*left))
>> -			left++;
>> -
>> -		if (!strncmp(left, pci_id, strlen(pci_id))) {
>> -			found = 1;
>> -
>> -			while (*ptr && isspace(*ptr))
>> -				ptr++;
>> -
>> -			sscanf(ptr, "%04hx-%04hx", &start, &end);
>> -
>> -			break;
>> -		}
>> -	}
>> -
>> -	free(line);
>> -	fclose(fp);
>> -
>> -	if (!found)
>> -		return -1;
>> -
>> -	p->base = start;
>> -	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
>> -
>> -	return 0;
>> -}
>> -#endif
>> -
>>   int
>>   rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
>>   		struct rte_pci_ioport *p)
>> @@ -756,14 +691,8 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>>   		break;
>>   #endif
>>   	case RTE_PCI_KDRV_IGB_UIO:
>> -		ret = pci_uio_ioport_map(dev, bar, p);
>> -		break;
>>   	case RTE_PCI_KDRV_UIO_GENERIC:
>> -#if defined(RTE_ARCH_X86)
>> -		ret = pci_ioport_map(dev, bar, p);
>> -#else
>>   		ret = pci_uio_ioport_map(dev, bar, p);
>> -#endif
>>   		break;
>>   	default:
>>   		break;
>> @@ -830,14 +759,8 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>>   		break;
>>   #endif
>>   	case RTE_PCI_KDRV_IGB_UIO:
>> -		ret = pci_uio_ioport_unmap(p);
>> -		break;
>>   	case RTE_PCI_KDRV_UIO_GENERIC:
>> -#if defined(RTE_ARCH_X86)
>> -		ret = 0;
>> -#else
>>   		ret = pci_uio_ioport_unmap(p);
>> -#endif
>>   		break;
>>   	default:
>>   		break;
>> diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
>> index f3305a2..01f2a40 100644
>> --- a/drivers/bus/pci/linux/pci_uio.c
>> +++ b/drivers/bus/pci/linux/pci_uio.c
>> @@ -373,10 +373,13 @@
>>   pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
>>   		   struct rte_pci_ioport *p)
>>   {
>> +	FILE *f = NULL;
>>   	char dirname[PATH_MAX];
>>   	char filename[PATH_MAX];
>> -	int uio_num;
>> -	unsigned long start;
>> +	char buf[BUFSIZ];
>> +	uint64_t phys_addr, end_addr, flags;
>> +	unsigned long base;
>> +	int i;
>>   
>>   	if (rte_eal_iopl_init() != 0) {
>>   		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
>> @@ -384,41 +387,66 @@
>>   		return -1;
>>   	}
>>   
>> -	uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
>> -	if (uio_num < 0)
>> +	/* open and read addresses of the corresponding resource in sysfs */
>> +	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
>> +		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
>> +		dev->addr.devid, dev->addr.function);
>> +	f = fopen(filename, "r");
>> +	if (f == NULL) {
>> +		RTE_LOG(ERR, EAL, "%s(): Cannot open sysfs resource: %s\n",
>> +			__func__, strerror(errno));
>>   		return -1;
>> +	}
>>   
>> -	/* get portio start */
>> -	snprintf(filename, sizeof(filename),
>> -		 "%s/portio/port%d/start", dirname, bar);
>> -	if (eal_parse_sysfs_value(filename, &start) < 0) {
>> -		RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
>> -			__func__);
>> -		return -1;
>> +	for (i = 0; i < bar + 1; i++) {
>> +		if (fgets(buf, sizeof(buf), f) == NULL) {
>> +			RTE_LOG(ERR, EAL, "%s(): Cannot read sysfs resource\n", __func__);
>> +			goto error;
>> +		}
>>   	}
>> -	/* ensure we don't get anything funny here, read/write will cast to
>> -	 * uin16_t */
>> -	if (start > UINT16_MAX)
>> -		return -1;
>> +	if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
>> +		&end_addr, &flags) < 0)
>> +		goto error;
>> +
>> +	if (!(flags & IORESOURCE_IO)) {
>> +		RTE_LOG(ERR, EAL, "%s(): bar resource other than IO is not supported\n", __func__);
>> +		goto error;
>> +	}
>> +	base = (unsigned long)phys_addr;
>> +	RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
>> +
>> +	if (base > UINT16_MAX)
>> +		goto error;
>>   
>>   	/* FIXME only for primary process ? */
>>   	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
>> +		int uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
>> +		if (uio_num < 0) {
>> +			RTE_LOG(ERR, EAL, "cannot open %s: %s\n",
>> +				dirname, strerror(errno));
>> +			goto error;
>> +		}
>>   
>>   		snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
>>   		dev->intr_handle.fd = open(filename, O_RDWR);
>>   		if (dev->intr_handle.fd < 0) {
>>   			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
>>   				filename, strerror(errno));
>> -			return -1;
>> +			goto error;
>>   		}
>>   		dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
>>   	}
>>   
>> -	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
>> +	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", base);
>>   
>> -	p->base = start;
>> +	p->base = base;
>>   	p->len = 0;
>> +	fclose(f);
>>   	return 0;
>> +error:
>> +	if (f)
>> +		fclose(f);
>> +	return -1;
>>   }
>>   #else
>>   int
>>
> I think it makes sense to have a common way for both igb_uio and
> uio_pci_generic to get the PIO base address.
>
> With commit message and title fixed, feel free to add my:
>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Thanks Maxime.
>
> Thanks,
> Maxime
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-12 16:58       ` Maxime Coquelin
@ 2021-01-20 14:54         ` 谢华伟(此时此刻)
  2021-01-21  8:29           ` Maxime Coquelin
  0 siblings, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-20 14:54 UTC (permalink / raw)
  To: Maxime Coquelin, ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 2021/1/13 0:58, Maxime Coquelin wrote:
>
> On 1/12/21 10:37 AM, Maxime Coquelin wrote:
>> bus/pci: ...
>>
>> On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
>>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>>
>>> VFIO should use the same way to map/read/write PORT IO as UIO, for
>>> virtio PMD.
>> Please provide more details in the commit message on why the way VFIO
>> works today is wrong (The cover letter is lost once applied).
ok
>>
>>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>> Same comment about name format as on previous patches.
>>
>>> ---
>>>   drivers/bus/pci/linux/pci.c     | 8 ++++----
>>>   drivers/bus/pci/linux/pci_uio.c | 4 +++-
>>>   2 files changed, 7 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
>>> index 0dc99e9..2ed9f2b 100644
>>> --- a/drivers/bus/pci/linux/pci.c
>>> +++ b/drivers/bus/pci/linux/pci.c
>>> @@ -687,7 +687,7 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>>>   #ifdef VFIO_PRESENT
>>>   	case RTE_PCI_KDRV_VFIO:
>>>   		if (pci_vfio_is_enabled())
>>> -			ret = pci_vfio_ioport_map(dev, bar, p);
>>> +			ret = pci_uio_ioport_map(dev, bar, p);
>> Doesn't it create a regression with regards to needed capabilities?
>> My understanding is that before this patch we don't need to call iopl(),
>> whereas once applied it is required, correct?
> I did some testing today, and think it is not a regression with para-
> virtualized Virtio devices.
>
> Indeed, I thought it would be a regression with Legacy devices when
> IOMMU is enabled and the program is run as non-root (IOMMU enabled
> just to suport IOVA as VA mode). But it turns out para-virtualized
> Virtio legacy device and vIOMMU enabled is not a supported configuration
> by QEMU.
>
> Note that when noiommu mode is enabled, the app needs cap_sys_rawio, so
> same as iopl(). No regression in this case too.
>
> That said, with real (non para-virtualized) Virtio device using PIO like
> yours, doesn't your patch introduce a restriction for your device that
> it will require cap_sys_rawio whereas it would not be needed?
I don't catch the regression issue.
With real virtio device(hardware implemented), if it is using MMIO, no 
cap_sys_rawio is required.
If it is using PIO, iopl is required always.
> Thanks,
> Maxime
>
>> Regards,
>> Maxime
>>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2021-01-12 17:37   ` Maxime Coquelin
  2021-01-14 18:19     ` 谢华伟(此时此刻)
@ 2021-01-21  4:12     ` 谢华伟(此时此刻)
  2021-01-21  8:47       ` Maxime Coquelin
  1 sibling, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-21  4:12 UTC (permalink / raw)
  To: Maxime Coquelin, ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 2021/1/13 1:37, Maxime Coquelin wrote:
>
> On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>
>> Legacy virtio-pci only supports PIO BAR resource. As we need to create lots of
>> virtio devices and PIO resource on x86 is very limited, we expose MMIO BAR.
>>
>> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device. We handles
>> different type of BAR in the similar way.
>>
>> In previous implementation, with igb_uio we get PIO address from igb_uio
>> sysfs entry; with uio_pci_generic, we get PIO address from
>> /proc/ioports.
>> For PIO/MMIO RW, there is different path for different drivers and arch.
>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>> issue.
> Regarding the performance issue, do you have some numbers to share?
> AFAICS, it can only have an impact on performance when interrupt mode is
> used or queue notification is enabled.
>
> Does your HW Virtio implementation requires notification?
Yes, hardware needs notification to tell which queue has more buffer.
vhost backend also needs notification when it is not running in polling 
mode.
It is easy for software backend to sync with frontend whether it needs 
notification through memory but a big burden for hardware.
Anyway, using vfio ioctl isn't needed at all. virtio PMD is only the 
consumer of pci_vfio_ioport_read.
we could consider if we still need pci_vfio_ioport_read related API in 
future.
/huawei
>
> Is performance the only issue to have your HW working with Virtio PMD,
> or is this series also fixing some functionnal issues?
>
> Best regards,
> Maxime
>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write
  2021-01-12  8:23     ` Maxime Coquelin
@ 2021-01-21  6:30       ` 谢华伟(此时此刻)
  0 siblings, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-21  6:30 UTC (permalink / raw)
  To: Maxime Coquelin, ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 2021/1/12 16:23, Maxime Coquelin wrote:
> Title should be something like:
>
> "bus/pci: support MMIO in PCI ioport accessors
>
> On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>
>> If IO BAR, we get PIO address.
>> If MMIO BAR, we get mapped virtual address.
>> We distinguish PIO and MMIO by their address like how kernel does.
>> ioread/write8/16/32 is provided to access PIO/MMIO.
>> BTW, for virtio on arch other than x86, BAR flag indicates PIO but is mapped.
> No acronym in the commit message.
BTW? fixed. PIO(programmed IO) and MMIO(memory mapped IO) explained.
> Also, I am not sure to understand this comment.
> Does it means in the case of ARM for example, the IORESOURCE_IO flag is
> set but the base address is above PIO_MAX?
ARM doesn't have PIO but only MMIO.  The device sets IORESOURCE_IO flag 
anyway.
Should i remove this message as it causes confuse?
>
>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
> As in previous patch, we need your full name for the sign-off.
fixed.
>
>> ---
>>   drivers/bus/pci/linux/pci.c     |   4 --
>>   drivers/bus/pci/linux/pci_uio.c | 123 ++++++++++++++++++++++++++--------------
>>   2 files changed, 82 insertions(+), 45 deletions(-)
>>
>> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
>> index 0f38abf..0dc99e9 100644
>> --- a/drivers/bus/pci/linux/pci.c
>> +++ b/drivers/bus/pci/linux/pci.c
>> @@ -715,8 +715,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>>   		break;
>>   #endif
>>   	case RTE_PCI_KDRV_IGB_UIO:
>> -		pci_uio_ioport_read(p, data, len, offset);
>> -		break;
> I think this part should be in patch 1.
Patch 1 handles IO port map.
Patch 2 unifies IO/MMIO.
Patch 3 handles vfio.
I feel current split is more clear.
>
>>   	case RTE_PCI_KDRV_UIO_GENERIC:
>>   		pci_uio_ioport_read(p, data, len, offset);
>>   		break;
>> @@ -736,8 +734,6 @@ int rte_pci_write_config(const struct rte_pci_device *device,
>>   		break;
>>   #endif
>>   	case RTE_PCI_KDRV_IGB_UIO:
>> -		pci_uio_ioport_write(p, data, len, offset);
>> -		break;
> Same here.
>
>>   	case RTE_PCI_KDRV_UIO_GENERIC:
>>   		pci_uio_ioport_write(p, data, len, offset);
>>   		break;
>> diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
>> index 01f2a40..c19382f 100644
>> --- a/drivers/bus/pci/linux/pci_uio.c
>> +++ b/drivers/bus/pci/linux/pci_uio.c
>> @@ -379,14 +379,9 @@
>>   	char buf[BUFSIZ];
>>   	uint64_t phys_addr, end_addr, flags;
>>   	unsigned long base;
>> +	bool iobar;
>>   	int i;
>>   
>> -	if (rte_eal_iopl_init() != 0) {
>> -		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
>> -			__func__, dev->name);
>> -		return -1;
>> -	}
>> -
>>   	/* open and read addresses of the corresponding resource in sysfs */
>>   	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
>>   		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
>> @@ -408,15 +403,30 @@
>>   		&end_addr, &flags) < 0)
>>   		goto error;
>>   
>> -	if (!(flags & IORESOURCE_IO)) {
>> -		RTE_LOG(ERR, EAL, "%s(): bar resource other than IO is not supported\n", __func__);
>> +	if (flags & IORESOURCE_IO) {
>> +		iobar = 1;
>> +		base = (unsigned long)phys_addr;
>> +		RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
>> +	} else if (flags & IORESOURCE_MEM) {
>> +		iobar = 0;
>> +		base = (unsigned long)dev->mem_resource[bar].addr;
>> +		RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n", __func__, base);
>> +	} else {
>> +		RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
>> +		goto error;
>> +	}
>> +
>> +
>> +	if (iobar && rte_eal_iopl_init() != 0) {
>> +		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n",
>> +			__func__, dev->name);
>>   		goto error;
>>   	}
>> -	base = (unsigned long)phys_addr;
>> -	RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
>>   
>> -	if (base > UINT16_MAX)
>> +	if (iobar && (base > UINT16_MAX)) {
>> +		RTE_LOG(ERR, EAL, "%s(): %08lx too large PIO resource\n", __func__, base);
>>   		goto error;
>> +	}
> It looks like above check could be moved directly to (flags &
> IORESOURCE_IO) case, so iobar boolean is not needed.
yes, code is more clear with your suggestion.
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-20 14:54         ` 谢华伟(此时此刻)
@ 2021-01-21  8:29           ` Maxime Coquelin
  2021-01-21 14:57             ` 谢华伟(此时此刻)
  0 siblings, 1 reply; 58+ messages in thread
From: Maxime Coquelin @ 2021-01-21  8:29 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 1/20/21 3:54 PM, 谢华伟(此时此刻) wrote:
> 
> On 2021/1/13 0:58, Maxime Coquelin wrote:
>>
>> On 1/12/21 10:37 AM, Maxime Coquelin wrote:
>>> bus/pci: ...
>>>
>>> On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
>>>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>>>
>>>> VFIO should use the same way to map/read/write PORT IO as UIO, for
>>>> virtio PMD.
>>> Please provide more details in the commit message on why the way VFIO
>>> works today is wrong (The cover letter is lost once applied).
> ok
>>>
>>>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>>> Same comment about name format as on previous patches.
>>>
>>>> ---
>>>>   drivers/bus/pci/linux/pci.c     | 8 ++++----
>>>>   drivers/bus/pci/linux/pci_uio.c | 4 +++-
>>>>   2 files changed, 7 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
>>>> index 0dc99e9..2ed9f2b 100644
>>>> --- a/drivers/bus/pci/linux/pci.c
>>>> +++ b/drivers/bus/pci/linux/pci.c
>>>> @@ -687,7 +687,7 @@ int rte_pci_write_config(const struct
>>>> rte_pci_device *device,
>>>>   #ifdef VFIO_PRESENT
>>>>       case RTE_PCI_KDRV_VFIO:
>>>>           if (pci_vfio_is_enabled())
>>>> -            ret = pci_vfio_ioport_map(dev, bar, p);
>>>> +            ret = pci_uio_ioport_map(dev, bar, p);
>>> Doesn't it create a regression with regards to needed capabilities?
>>> My understanding is that before this patch we don't need to call iopl(),
>>> whereas once applied it is required, correct?
>> I did some testing today, and think it is not a regression with para-
>> virtualized Virtio devices.
>>
>> Indeed, I thought it would be a regression with Legacy devices when
>> IOMMU is enabled and the program is run as non-root (IOMMU enabled
>> just to suport IOVA as VA mode). But it turns out para-virtualized
>> Virtio legacy device and vIOMMU enabled is not a supported configuration
>> by QEMU.
>>
>> Note that when noiommu mode is enabled, the app needs cap_sys_rawio, so
>> same as iopl(). No regression in this case too.
>>
>> That said, with real (non para-virtualized) Virtio device using PIO like
>> yours, doesn't your patch introduce a restriction for your device that
>> it will require cap_sys_rawio whereas it would not be needed?
> 
> I don't catch the regression issue.
> 
> With real virtio device(hardware implemented), if it is using MMIO, no
> cap_sys_rawio is required.
> 
> If it is using PIO, iopl is required always.
My understanding of the Kernel VFIO driver is that cap_sys_rawio is only
necessary in noiommu mode, i.e. when VFIO is loaded with
enable_unsafe_noiommu parameter set. The doc for this parameters seems
to validate my understanding of the code:
"
MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU
mode.  This mode provides no device isolation, no DMA translation, no
host kernel protection, cannot be used for device assignment to virtual
machines, requires RAWIO permissions, and will taint the kernel.  If you
do not know what this is for, step away. (default: false)");
"
I think that using inb/outb in the case of VFIO with IOMMU enabled won't
work without cap_sys_rawio, and using it in the case of VFIO with IOMMU
disabled just bypasses VFIO and so is not correct.
In my opinion, what we should do is to add something like this in the
DPDK documentation:
 - MMIO BAR: VFIO with IOMMU enabled recommended. Equivalent performance
as with IGB UIO or VFIO with NOIOMMU. VFIO with IOMMU is recommended for
security reasons.
 - PIO BAR: VFIO with IOMMU enabled is recommended for security reasons,
providing proper isolation and not requiring cap_sys_rawio. However, use
of IOMMU is not always possible in some cases (e.g. para-virtualized
Virtio-net legacy device). Also, performance of using VFIO for PIO BARs
accesses has an impact on performance as it uses pread/pwrite syscalls,
whereas UIO drivers use inb/outb. If security is not a concern or IOMMU
is not available, one might consider using UIO driver in this case for
performance reasons.
What do you think?
Thanks,
Maxime
> 
>> Thanks,
>> Maxime
>>
>>> Regards,
>>> Maxime
>>>
> 
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2021-01-21  4:12     ` 谢华伟(此时此刻)
@ 2021-01-21  8:47       ` Maxime Coquelin
  2021-01-21 13:51         ` 谢华伟(此时此刻)
  0 siblings, 1 reply; 58+ messages in thread
From: Maxime Coquelin @ 2021-01-21  8:47 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 1/21/21 5:12 AM, 谢华伟(此时此刻) wrote:
> 
> On 2021/1/13 1:37, Maxime Coquelin wrote:
>>
>> On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
>>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>>
>>> Legacy virtio-pci only supports PIO BAR resource. As we need to
>>> create lots of
>>> virtio devices and PIO resource on x86 is very limited, we expose
>>> MMIO BAR.
>>>
>>> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device.
>>> We handles
>>> different type of BAR in the similar way.
>>>
>>> In previous implementation, with igb_uio we get PIO address from igb_uio
>>> sysfs entry; with uio_pci_generic, we get PIO address from
>>> /proc/ioports.
>>> For PIO/MMIO RW, there is different path for different drivers and arch.
>>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>>> issue.
>> Regarding the performance issue, do you have some numbers to share?
>> AFAICS, it can only have an impact on performance when interrupt mode is
>> used or queue notification is enabled.
>>
>> Does your HW Virtio implementation requires notification?
> 
> Yes, hardware needs notification to tell which queue has more buffer.
> 
> vhost backend also needs notification when it is not running in polling
> mode.
> 
> It is easy for software backend to sync with frontend whether it needs
> notification through memory but a big burden for hardware.
Yes, I understand, thanks for the clarification.
> Anyway, using vfio ioctl isn't needed at all. virtio PMD is only the
> consumer of pci_vfio_ioport_read.
My understanding is that using VFIO read/write ops is required for IOMMU
enabled case without cap_sys_rawio. And anyway, using inb/outb is just
bypassing VFIO. As I suggest in my other reply, it is better to document
that in the case of devices having PIO BARs, the user should consider
using UIO driver if performance is a concern.
> we could consider if we still need pci_vfio_ioport_read related API in
> future.
I disagree. I think the pci_vfio_ioport_* API is required at least for
the IOMMU enabled case.
Documentation is the way to go in my opinion, we can also add a warning
that performance may be degraded compared to UIO in
pci_vfio_ioport_map() when IOMMU is disabled if you think it may help
the users.
Thanks,
Maxime
> /huawei
>>
>> Is performance the only issue to have your HW working with Virtio PMD,
>> or is this series also fixing some functionnal issues?
>>
>> Best regards,
>> Maxime
>>
> 
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD
  2021-01-21  8:47       ` Maxime Coquelin
@ 2021-01-21 13:51         ` 谢华伟(此时此刻)
  0 siblings, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-21 13:51 UTC (permalink / raw)
  To: Maxime Coquelin, ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 2021/1/21 16:47, Maxime Coquelin wrote:
>
> On 1/21/21 5:12 AM, 谢华伟(此时此刻) wrote:
>> On 2021/1/13 1:37, Maxime Coquelin wrote:
>>> On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
>>>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>>>
>>>> Legacy virtio-pci only supports PIO BAR resource. As we need to
>>>> create lots of
>>>> virtio devices and PIO resource on x86 is very limited, we expose
>>>> MMIO BAR.
>>>>
>>>> Kernel supports both PIO  and MMIO BAR for legacy virtio-pci device.
>>>> We handles
>>>> different type of BAR in the similar way.
>>>>
>>>> In previous implementation, with igb_uio we get PIO address from igb_uio
>>>> sysfs entry; with uio_pci_generic, we get PIO address from
>>>> /proc/ioports.
>>>> For PIO/MMIO RW, there is different path for different drivers and arch.
>>>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>>>> issue.
>>> Regarding the performance issue, do you have some numbers to share?
>>> AFAICS, it can only have an impact on performance when interrupt mode is
>>> used or queue notification is enabled.
>>>
>>> Does your HW Virtio implementation requires notification?
>> Yes, hardware needs notification to tell which queue has more buffer.
>>
>> vhost backend also needs notification when it is not running in polling
>> mode.
>>
>> It is easy for software backend to sync with frontend whether it needs
>> notification through memory but a big burden for hardware.
> Yes, I understand, thanks for the clarification.
>
>> Anyway, using vfio ioctl isn't needed at all. virtio PMD is only the
>> consumer of pci_vfio_ioport_read.
> My understanding is that using VFIO read/write ops is required for IOMMU
> enabled case without cap_sys_rawio. And anyway, using inb/outb is just
> bypassing VFIO. As I suggest in my other reply, it is better to document
> that in the case of devices having PIO BARs, the user should consider
> using UIO driver if performance is a concern.
Get it. so user could read/write PIO using VFIO without iopl permission, 
with some performance penalty.
>> we could consider if we still need pci_vfio_ioport_read related API in
>> future.
> I disagree. I think the pci_vfio_ioport_* API is required at least for
> the IOMMU enabled case.
>
> Documentation is the way to go in my opinion, we can also add a warning
> that performance may be degraded compared to UIO in
> pci_vfio_ioport_map() when IOMMU is disabled if you think it may help
> the users.
>
> Thanks,
> Maxime
>
>> /huawei
>>> Is performance the only issue to have your HW working with Virtio PMD,
>>> or is this series also fixing some functionnal issues?
>>>
>>> Best regards,
>>> Maxime
>>>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-21  8:29           ` Maxime Coquelin
@ 2021-01-21 14:57             ` 谢华伟(此时此刻)
  2021-01-21 15:00               ` 谢华伟(此时此刻)
  2021-01-21 15:38               ` Maxime Coquelin
  0 siblings, 2 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-21 14:57 UTC (permalink / raw)
  To: Maxime Coquelin, ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 2021/1/21 16:29, Maxime Coquelin wrote:
>
> On 1/20/21 3:54 PM, 谢华伟(此时此刻) wrote:
>> On 2021/1/13 0:58, Maxime Coquelin wrote:
>>> On 1/12/21 10:37 AM, Maxime Coquelin wrote:
>>>> bus/pci: ...
>>>>
>>>> On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
>>>>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>>>>
>>>>> VFIO should use the same way to map/read/write PORT IO as UIO, for
>>>>> virtio PMD.
>>>> Please provide more details in the commit message on why the way VFIO
>>>> works today is wrong (The cover letter is lost once applied).
>> ok
>>>>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>>>> Same comment about name format as on previous patches.
>>>>
>>>>> ---
>>>>>    drivers/bus/pci/linux/pci.c     | 8 ++++----
>>>>>    drivers/bus/pci/linux/pci_uio.c | 4 +++-
>>>>>    2 files changed, 7 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
>>>>> index 0dc99e9..2ed9f2b 100644
>>>>> --- a/drivers/bus/pci/linux/pci.c
>>>>> +++ b/drivers/bus/pci/linux/pci.c
>>>>> @@ -687,7 +687,7 @@ int rte_pci_write_config(const struct
>>>>> rte_pci_device *device,
>>>>>    #ifdef VFIO_PRESENT
>>>>>        case RTE_PCI_KDRV_VFIO:
>>>>>            if (pci_vfio_is_enabled())
>>>>> -            ret = pci_vfio_ioport_map(dev, bar, p);
>>>>> +            ret = pci_uio_ioport_map(dev, bar, p);
>>>> Doesn't it create a regression with regards to needed capabilities?
>>>> My understanding is that before this patch we don't need to call iopl(),
>>>> whereas once applied it is required, correct?
>>> I did some testing today, and think it is not a regression with para-
>>> virtualized Virtio devices.
>>>
>>> Indeed, I thought it would be a regression with Legacy devices when
>>> IOMMU is enabled and the program is run as non-root (IOMMU enabled
>>> just to suport IOVA as VA mode). But it turns out para-virtualized
>>> Virtio legacy device and vIOMMU enabled is not a supported configuration
>>> by QEMU.
>>>
>>> Note that when noiommu mode is enabled, the app needs cap_sys_rawio, so
>>> same as iopl(). No regression in this case too.
>>>
>>> That said, with real (non para-virtualized) Virtio device using PIO like
>>> yours, doesn't your patch introduce a restriction for your device that
>>> it will require cap_sys_rawio whereas it would not be needed?
>> I don't catch the regression issue.
>>
>> With real virtio device(hardware implemented), if it is using MMIO, no
>> cap_sys_rawio is required.
>>
>> If it is using PIO, iopl is required always.
> My understanding of the Kernel VFIO driver is that cap_sys_rawio is only
> necessary in noiommu mode, i.e. when VFIO is loaded with
> enable_unsafe_noiommu parameter set. The doc for this parameters seems
> to validate my understanding of the code:
> "
> MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU
> mode.  This mode provides no device isolation, no DMA translation, no
> host kernel protection, cannot be used for device assignment to virtual
> machines, requires RAWIO permissions, and will taint the kernel.  If you
> do not know what this is for, step away. (default: false)");
> "
>
> I think that using inb/outb in the case of VFIO with IOMMU enabled won't
> work without cap_sys_rawio, and using it in the case of VFIO with IOMMU
> disabled just bypasses VFIO and so is not correct.
Get your concern.
PIO bar:
     HW virtio on HW machine: any vendor implements hardware virtio 
using PIO bar? I think this isn't right. And i dout if vfio doesn't 
check rawio perssion in the syscall in this case.
     Para virtio:  you have no choice to enable unsafe no-iommu mode.  
You must have RAWIO permission.
so with PIO bar, the regression doesn't exist in real world.
Btw, our virtio device is basically MMIO bar, either in hardware machine 
or in pass-throughed virtual machine.
Do you mean we apply or abandon patch 3? I am both OK. The first 
priority to me is to enable MMIO bar support.
> In my opinion, what we should do is to add something like this in the
> DPDK documentation:
>
>   - MMIO BAR: VFIO with IOMMU enabled recommended. Equivalent performance
> as with IGB UIO or VFIO with NOIOMMU. VFIO with IOMMU is recommended for
> security reasons.
>   - PIO BAR: VFIO with IOMMU enabled is recommended for security reasons,
> providing proper isolation and not requiring cap_sys_rawio. However, use
> of IOMMU is not always possible in some cases (e.g. para-virtualized
> Virtio-net legacy device). Also, performance of using VFIO for PIO BARs
> accesses has an impact on performance as it uses pread/pwrite syscalls,
> whereas UIO drivers use inb/outb. If security is not a concern or IOMMU
> is not available, one might consider using UIO driver in this case for
> performance reasons.
>
> What do you think?
>>> Thanks,
>>> Maxime
>>>
>>>> Regards,
>>>> Maxime
>>>>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-21 14:57             ` 谢华伟(此时此刻)
@ 2021-01-21 15:00               ` 谢华伟(此时此刻)
  2021-01-21 15:38               ` Maxime Coquelin
  1 sibling, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-21 15:00 UTC (permalink / raw)
  To: Maxime Coquelin, ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, chenbo.xia, grive
>> "
>>
>> I think that using inb/outb in the case of VFIO with IOMMU enabled won't
>> work without cap_sys_rawio, and using it in the case of VFIO with IOMMU
>> disabled just bypasses VFIO and so is not correct.
>
> Get your concern.
>
> PIO bar:
>
>     HW virtio on HW machine: any vendor implements hardware virtio 
> using PIO bar? I think this isn't right. And i dout if vfio doesn't 
> check rawio perssion in the syscall in this case.
>
>     Para virtio:  you have no choice to enable unsafe no-iommu mode.  
> You must have RAWIO permission.
Sorry. typo.  "you have no choice but to enable unsafe no-iommu mode. "
>
> so with PIO bar, the regression doesn't exist in real world.
>
> Btw, our virtio device is basically MMIO bar, either in hardware 
> machine or in pass-throughed virtual machine.
>
>
> Do you mean we apply or abandon patch 3? I am both OK. The first 
> priority to me is to enable MMIO bar support.
>
>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-21 14:57             ` 谢华伟(此时此刻)
  2021-01-21 15:00               ` 谢华伟(此时此刻)
@ 2021-01-21 15:38               ` Maxime Coquelin
  2021-01-22  7:25                 ` 谢华伟(此时此刻)
  1 sibling, 1 reply; 58+ messages in thread
From: Maxime Coquelin @ 2021-01-21 15:38 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 1/21/21 3:57 PM, 谢华伟(此时此刻) wrote:
> 
> On 2021/1/21 16:29, Maxime Coquelin wrote:
>>
>> On 1/20/21 3:54 PM, 谢华伟(此时此刻) wrote:
>>> On 2021/1/13 0:58, Maxime Coquelin wrote:
>>>> On 1/12/21 10:37 AM, Maxime Coquelin wrote:
>>>>> bus/pci: ...
>>>>>
>>>>> On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
>>>>>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>>>>>
>>>>>> VFIO should use the same way to map/read/write PORT IO as UIO, for
>>>>>> virtio PMD.
>>>>> Please provide more details in the commit message on why the way VFIO
>>>>> works today is wrong (The cover letter is lost once applied).
>>> ok
>>>>>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>>>>> Same comment about name format as on previous patches.
>>>>>
>>>>>> ---
>>>>>>    drivers/bus/pci/linux/pci.c     | 8 ++++----
>>>>>>    drivers/bus/pci/linux/pci_uio.c | 4 +++-
>>>>>>    2 files changed, 7 insertions(+), 5 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/bus/pci/linux/pci.c
>>>>>> b/drivers/bus/pci/linux/pci.c
>>>>>> index 0dc99e9..2ed9f2b 100644
>>>>>> --- a/drivers/bus/pci/linux/pci.c
>>>>>> +++ b/drivers/bus/pci/linux/pci.c
>>>>>> @@ -687,7 +687,7 @@ int rte_pci_write_config(const struct
>>>>>> rte_pci_device *device,
>>>>>>    #ifdef VFIO_PRESENT
>>>>>>        case RTE_PCI_KDRV_VFIO:
>>>>>>            if (pci_vfio_is_enabled())
>>>>>> -            ret = pci_vfio_ioport_map(dev, bar, p);
>>>>>> +            ret = pci_uio_ioport_map(dev, bar, p);
>>>>> Doesn't it create a regression with regards to needed capabilities?
>>>>> My understanding is that before this patch we don't need to call
>>>>> iopl(),
>>>>> whereas once applied it is required, correct?
>>>> I did some testing today, and think it is not a regression with para-
>>>> virtualized Virtio devices.
>>>>
>>>> Indeed, I thought it would be a regression with Legacy devices when
>>>> IOMMU is enabled and the program is run as non-root (IOMMU enabled
>>>> just to suport IOVA as VA mode). But it turns out para-virtualized
>>>> Virtio legacy device and vIOMMU enabled is not a supported
>>>> configuration
>>>> by QEMU.
>>>>
>>>> Note that when noiommu mode is enabled, the app needs cap_sys_rawio, so
>>>> same as iopl(). No regression in this case too.
>>>>
>>>> That said, with real (non para-virtualized) Virtio device using PIO
>>>> like
>>>> yours, doesn't your patch introduce a restriction for your device that
>>>> it will require cap_sys_rawio whereas it would not be needed?
>>> I don't catch the regression issue.
>>>
>>> With real virtio device(hardware implemented), if it is using MMIO, no
>>> cap_sys_rawio is required.
>>>
>>> If it is using PIO, iopl is required always.
>> My understanding of the Kernel VFIO driver is that cap_sys_rawio is only
>> necessary in noiommu mode, i.e. when VFIO is loaded with
>> enable_unsafe_noiommu parameter set. The doc for this parameters seems
>> to validate my understanding of the code:
>> "
>> MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU
>> mode.  This mode provides no device isolation, no DMA translation, no
>> host kernel protection, cannot be used for device assignment to virtual
>> machines, requires RAWIO permissions, and will taint the kernel.  If you
>> do not know what this is for, step away. (default: false)");
>> "
>>
>> I think that using inb/outb in the case of VFIO with IOMMU enabled won't
>> work without cap_sys_rawio, and using it in the case of VFIO with IOMMU
>> disabled just bypasses VFIO and so is not correct.
> 
> Get your concern.
> 
> PIO bar:
> 
>     HW virtio on HW machine: any vendor implements hardware virtio using
> PIO bar? I think this isn't right. And i dout if vfio doesn't check
> rawio perssion in the syscall in this case.
I checked VFIO code, and it only check for rawio permission if noiommu
mode is enabled.
>     Para virtio:  you have no choice to enable unsafe no-iommu mode. 
> You must have RAWIO permission.
> 
> so with PIO bar, the regression doesn't exist in real world.
>
> 
> Btw, our virtio device is basically MMIO bar, either in hardware machine
> or in pass-throughed virtual machine.
OK, that thing was not clear to me.
> 
> Do you mean we apply or abandon patch 3? I am both OK. The first
> priority to me is to enable MMIO bar support.
OK, so yes, I think we should abandon patch 2 and patch 3.
For patch 1, it looks valid to me, but I'll let Ferruh decide.
For your device, if my understanding is correct, what we need to do is
to support MMIO for legacy devices. Correct?
If so, the change should be in virtio_pci.c. In vtpci_init(), after
modern detection has failed, we should check the the BAR is PIO or MMIO
based on the flag. the result can be saved in struct virtio_pci_dev.
We would introduce new wrappers like vtpci_legacy_read,
vtpci_legacy_write that would either call rte_pci_ioport_read,
rte_pci_ioport_read in case of PIO, or rte_read32, rte_write32 in case
of MMIO.
It is not too late for this release, as the change will not be that
intrusive. But if you prepare such patch, please base it on top of my
virtio rework series; To make it easier to you, I added it to the dpdk-
next-virtio tree:
https://git.dpdk.org/next/dpdk-next-virtio/log/?h=virtio_pmd_rework_v2
Thanks,
Maxime
> 
>> In my opinion, what we should do is to add something like this in the
>> DPDK documentation:
>>
>>   - MMIO BAR: VFIO with IOMMU enabled recommended. Equivalent performance
>> as with IGB UIO or VFIO with NOIOMMU. VFIO with IOMMU is recommended for
>> security reasons.
>>   - PIO BAR: VFIO with IOMMU enabled is recommended for security reasons,
>> providing proper isolation and not requiring cap_sys_rawio. However, use
>> of IOMMU is not always possible in some cases (e.g. para-virtualized
>> Virtio-net legacy device). Also, performance of using VFIO for PIO BARs
>> accesses has an impact on performance as it uses pread/pwrite syscalls,
>> whereas UIO drivers use inb/outb. If security is not a concern or IOMMU
>> is not available, one might consider using UIO driver in this case for
>> performance reasons.
>>
>> What do you think?
>>>> Thanks,
>>>> Maxime
>>>>
>>>>> Regards,
>>>>> Maxime
>>>>>
> 
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-21 15:38               ` Maxime Coquelin
@ 2021-01-22  7:25                 ` 谢华伟(此时此刻)
  2021-01-26 10:44                   ` Maxime Coquelin
  2021-01-26 12:30                   ` 谢华伟(此时此刻)
  0 siblings, 2 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-22  7:25 UTC (permalink / raw)
  To: Maxime Coquelin, ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 2021/1/21 23:38, Maxime Coquelin wrote:
>> Do you mean we apply or abandon patch 3? I am both OK. The first
>> priority to me is to enable MMIO bar support.
> OK, so yes, I think we should abandon patch 2 and patch 3.
> For patch 1, it looks valid to me, but I'll let Ferruh decide.
>
> For your device, if my understanding is correct, what we need to do is
> to support MMIO for legacy devices. Correct?
yes.
> If so, the change should be in virtio_pci.c. In vtpci_init(), after
> modern detection has failed, we should check the the BAR is PIO or MMIO
> based on the flag. the result can be saved in struct virtio_pci_dev.
>
>
> We would introduce new wrappers like vtpci_legacy_read,
> vtpci_legacy_write that would either call rte_pci_ioport_read,
> rte_pci_ioport_read in case of PIO, or rte_read32, rte_write32 in case
> of MMIO.
There are two choices.
1, apply patch 2.
     IO/MMIO port are mapped and accessed using the same API. Kernel is 
doing in the same way like the following.
             io_addr = pci_iomap
                 get PIO directly or ioremap
             iowrite16/32(val, io_addr + offset)
I think applying patch 2 is a correct choice. It is a fix. Driver had 
better not know if bar is PIO or MMIO.  ioport in ioport_xx API means 
IO, not PIO.
Btw, it only affects virtio PMD,  not that intrusive.
  2, virtio specific change to enable MMIO support.
Comparing with choice 1, i feels it is not that clean and pretty.
>
> It is not too late for this release, as the change will not be that
> intrusive. But if you prepare such patch, please base it on top of my
> virtio rework series; To make it easier to you, I added it to the dpdk-
> next-virtio tree:
> https://git.dpdk.org/next/dpdk-next-virtio/log/?h=virtio_pmd_rework_v2
>
> Thanks,
> Maxime
>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 1/3] PCI: use PCI standard sysfs entry to get PIO address
  2021-01-14 18:23       ` 谢华伟(此时此刻)
@ 2021-01-24 15:10         ` Xueming(Steven) Li
  0 siblings, 0 replies; 58+ messages in thread
From: Xueming(Steven) Li @ 2021-01-24 15:10 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	Maxime Coquelin, ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
Hi Huawei,
>-----Original Message-----
>From: dev <dev-bounces@dpdk.org> On Behalf Of 谢华伟(此时此刻)
>Sent: Friday, January 15, 2021 2:24 AM
>To: Maxime Coquelin <maxime.coquelin@redhat.com>;
>ferruh.yigit@intel.com
>Cc: dev@dpdk.org; anatoly.burakov@intel.com;
>david.marchand@redhat.com; zhihong.wang@intel.com;
>chenbo.xia@intel.com; grive@u256.net
>Subject: Re: [dpdk-dev] [PATCH v5 1/3] PCI: use PCI standard sysfs entry to get
>PIO address
>
>
>On 2021/1/12 16:07, Maxime Coquelin wrote:
>> Hi Huawei,
>>
>> The title should be under the form:
>> "bus/pci: use PCI standard sysfs entry to get PIO address"
>>
>> On 10/22/20 5:51 PM, 谢华伟(此时此刻) wrote:
>>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>>
>>> Previously with igb_uio we get PIO address from igb_uio sysfs entry,
>>> with uio_pci_generic, we get PIO address from /proc/ioports.
It will be great to explain a little bit more what this patch is trying to do.
>>>
>>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>> In order to comply with the contribution rules, your name must be
>> disaplyed under the form:
>>
>> Signed-off-by: Firstname Lastname <huawei.xhw@alibaba-inc.com>
>Would fix this.
>>> ---
>>>   drivers/bus/pci/linux/pci.c     | 77 -----------------------------------------
>>>   drivers/bus/pci/linux/pci_uio.c | 64 ++++++++++++++++++++++++----------
>>>   2 files changed, 46 insertions(+), 95 deletions(-)
>>>
>>> diff --git a/drivers/bus/pci/linux/pci.c
>>> b/drivers/bus/pci/linux/pci.c index 2e1808b..0f38abf 100644
>>> --- a/drivers/bus/pci/linux/pci.c
>>> +++ b/drivers/bus/pci/linux/pci.c
>>> @@ -677,71 +677,6 @@ int rte_pci_write_config(const struct
>rte_pci_device *device,
>>>   	}
>>>   }
>>>
>>> -#if defined(RTE_ARCH_X86)
>>> -static int
>>> -pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
>>> -		struct rte_pci_ioport *p)
>>> -{
>>> -	uint16_t start, end;
>>> -	FILE *fp;
>>> -	char *line = NULL;
>>> -	char pci_id[16];
>>> -	int found = 0;
>>> -	size_t linesz;
>>> -
>>> -	if (rte_eal_iopl_init() != 0) {
>>> -		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for
>PCI device %s\n",
>>> -			__func__, dev->name);
>>> -		return -1;
>>> -	}
>>> -
>>> -	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
>>> -		 dev->addr.domain, dev->addr.bus,
>>> -		 dev->addr.devid, dev->addr.function);
>>> -
>>> -	fp = fopen("/proc/ioports", "r");
>>> -	if (fp == NULL) {
>>> -		RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
>>> -		return -1;
>>> -	}
>>> -
>>> -	while (getdelim(&line, &linesz, '\n', fp) > 0) {
>>> -		char *ptr = line;
>>> -		char *left;
>>> -		int n;
>>> -
>>> -		n = strcspn(ptr, ":");
>>> -		ptr[n] = 0;
>>> -		left = &ptr[n + 1];
>>> -
>>> -		while (*left && isspace(*left))
>>> -			left++;
>>> -
>>> -		if (!strncmp(left, pci_id, strlen(pci_id))) {
>>> -			found = 1;
>>> -
>>> -			while (*ptr && isspace(*ptr))
>>> -				ptr++;
>>> -
>>> -			sscanf(ptr, "%04hx-%04hx", &start, &end);
>>> -
>>> -			break;
>>> -		}
>>> -	}
>>> -
>>> -	free(line);
>>> -	fclose(fp);
>>> -
>>> -	if (!found)
>>> -		return -1;
>>> -
>>> -	p->base = start;
>>> -	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
>>> -
>>> -	return 0;
>>> -}
>>> -#endif
>>> -
>>>   int
>>>   rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
>>>   		struct rte_pci_ioport *p)
>>> @@ -756,14 +691,8 @@ int rte_pci_write_config(const struct
>rte_pci_device *device,
>>>   		break;
>>>   #endif
>>>   	case RTE_PCI_KDRV_IGB_UIO:
>>> -		ret = pci_uio_ioport_map(dev, bar, p);
>>> -		break;
>>>   	case RTE_PCI_KDRV_UIO_GENERIC:
>>> -#if defined(RTE_ARCH_X86)
>>> -		ret = pci_ioport_map(dev, bar, p);
>>> -#else
>>>   		ret = pci_uio_ioport_map(dev, bar, p); -#endif
>>>   		break;
>>>   	default:
>>>   		break;
>>> @@ -830,14 +759,8 @@ int rte_pci_write_config(const struct
>rte_pci_device *device,
>>>   		break;
>>>   #endif
>>>   	case RTE_PCI_KDRV_IGB_UIO:
>>> -		ret = pci_uio_ioport_unmap(p);
>>> -		break;
>>>   	case RTE_PCI_KDRV_UIO_GENERIC:
>>> -#if defined(RTE_ARCH_X86)
>>> -		ret = 0;
>>> -#else
>>>   		ret = pci_uio_ioport_unmap(p);
>>> -#endif
>>>   		break;
>>>   	default:
>>>   		break;
>>> diff --git a/drivers/bus/pci/linux/pci_uio.c
>>> b/drivers/bus/pci/linux/pci_uio.c index f3305a2..01f2a40 100644
>>> --- a/drivers/bus/pci/linux/pci_uio.c
>>> +++ b/drivers/bus/pci/linux/pci_uio.c
>>> @@ -373,10 +373,13 @@
>>>   pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
>>>   		   struct rte_pci_ioport *p)
>>>   {
>>> +	FILE *f = NULL;
>>>   	char dirname[PATH_MAX];
>>>   	char filename[PATH_MAX];
>>> -	int uio_num;
>>> -	unsigned long start;
>>> +	char buf[BUFSIZ];
>>> +	uint64_t phys_addr, end_addr, flags;
>>> +	unsigned long base;
>>> +	int i;
>>>
>>>   	if (rte_eal_iopl_init() != 0) {
>>>   		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for
>PCI
>>> device %s\n", @@ -384,41 +387,66 @@
>>>   		return -1;
>>>   	}
>>>
>>> -	uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
>>> -	if (uio_num < 0)
>>> +	/* open and read addresses of the corresponding resource in sysfs */
>>> +	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
>>> +		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
>>> +		dev->addr.devid, dev->addr.function);
>>> +	f = fopen(filename, "r");
>>> +	if (f == NULL) {
>>> +		RTE_LOG(ERR, EAL, "%s(): Cannot open sysfs resource: %s\n",
>>> +			__func__, strerror(errno));
>>>   		return -1;
>>> +	}
>>>
>>> -	/* get portio start */
>>> -	snprintf(filename, sizeof(filename),
>>> -		 "%s/portio/port%d/start", dirname, bar);
>>> -	if (eal_parse_sysfs_value(filename, &start) < 0) {
>>> -		RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
>>> -			__func__);
>>> -		return -1;
>>> +	for (i = 0; i < bar + 1; i++) {
>>> +		if (fgets(buf, sizeof(buf), f) == NULL) {
>>> +			RTE_LOG(ERR, EAL, "%s(): Cannot read sysfs
>resource\n", __func__);
>>> +			goto error;
>>> +		}
>>>   	}
>>> -	/* ensure we don't get anything funny here, read/write will cast to
>>> -	 * uin16_t */
>>> -	if (start > UINT16_MAX)
>>> -		return -1;
>>> +	if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
>>> +		&end_addr, &flags) < 0)
>>> +		goto error;
>>> +
>>> +	if (!(flags & IORESOURCE_IO)) {
>>> +		RTE_LOG(ERR, EAL, "%s(): bar resource other than IO is not
>supported\n", __func__);
>>> +		goto error;
>>> +	}
>>> +	base = (unsigned long)phys_addr;
>>> +	RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__,
>>> +base);
Are you sure INFO level is required? BTW, there is a same log below.
>>> +
>>> +	if (base > UINT16_MAX)
>>> +		goto error;
>>>
>>>   	/* FIXME only for primary process ? */
>>>   	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
>>> +		int uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname),
>0);
>>> +		if (uio_num < 0) {
>>> +			RTE_LOG(ERR, EAL, "cannot open %s: %s\n",
>>> +				dirname, strerror(errno));
>>> +			goto error;
>>> +		}
>>>
>>>   		snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
>>>   		dev->intr_handle.fd = open(filename, O_RDWR);
>>>   		if (dev->intr_handle.fd < 0) {
>>>   			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
>>>   				filename, strerror(errno));
>>> -			return -1;
>>> +			goto error;
>>>   		}
>>>   		dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
>>>   	}
>>>
>>> -	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
>>> +	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", base);
>>>
>>> -	p->base = start;
>>> +	p->base = base;
>>>   	p->len = 0;
>>> +	fclose(f);
>>>   	return 0;
>>> +error:
>>> +	if (f)
>>> +		fclose(f);
>>> +	return -1;
>>>   }
>>>   #else
>>>   int
>>>
>> I think it makes sense to have a common way for both igb_uio and
>> uio_pci_generic to get the PIO base address.
>>
>> With commit message and title fixed, feel free to add my:
>>
>> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>Thanks Maxime.
>>
>> Thanks,
>> Maxime
Thanks,
Xueming
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write
  2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write 谢华伟(此时此刻)
  2021-01-12  8:23     ` Maxime Coquelin
@ 2021-01-24 15:22     ` Xueming(Steven) Li
  2021-01-25  3:08       ` 谢华伟(此时此刻)
  2021-01-27 10:40     ` Ferruh Yigit
  2 siblings, 1 reply; 58+ messages in thread
From: Xueming(Steven) Li @ 2021-01-24 15:22 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	ferruh.yigit
  Cc: dev, maxime.coquelin, anatoly.burakov, david.marchand,
	zhihong.wang, chenbo.xia, grive
Hi Huawei,
Nice work, just some small comments.
>-----Original Message-----
>From: dev <dev-bounces@dpdk.org> On Behalf Of 谢华伟(此时此刻)
>Sent: Thursday, October 22, 2020 11:51 PM
>To: ferruh.yigit@intel.com
>Cc: dev@dpdk.org; maxime.coquelin@redhat.com;
>anatoly.burakov@intel.com; david.marchand@redhat.com;
>zhihong.wang@intel.com; chenbo.xia@intel.com; grive@u256.net; 谢华伟(此
>时此刻) <huawei.xhw@alibaba-inc.com>
>Subject: [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in
>rte_pci_ioport_map/unap/read/write
>
>From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>
>If IO BAR, we get PIO address.
>If MMIO BAR, we get mapped virtual address.
>We distinguish PIO and MMIO by their address like how kernel does.
>ioread/write8/16/32 is provided to access PIO/MMIO.
>BTW, for virtio on arch other than x86, BAR flag indicates PIO but is mapped.
>
>Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>---
> drivers/bus/pci/linux/pci.c     |   4 --
> drivers/bus/pci/linux/pci_uio.c | 123 ++++++++++++++++++++++++++-----------
>---
> 2 files changed, 82 insertions(+), 45 deletions(-)
>
>diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index
>0f38abf..0dc99e9 100644
>--- a/drivers/bus/pci/linux/pci.c
>+++ b/drivers/bus/pci/linux/pci.c
>@@ -715,8 +715,6 @@ int rte_pci_write_config(const struct rte_pci_device
>*device,
> 		break;
> #endif
> 	case RTE_PCI_KDRV_IGB_UIO:
>-		pci_uio_ioport_read(p, data, len, offset);
>-		break;
> 	case RTE_PCI_KDRV_UIO_GENERIC:
> 		pci_uio_ioport_read(p, data, len, offset);
> 		break;
>@@ -736,8 +734,6 @@ int rte_pci_write_config(const struct rte_pci_device
>*device,
> 		break;
> #endif
> 	case RTE_PCI_KDRV_IGB_UIO:
>-		pci_uio_ioport_write(p, data, len, offset);
>-		break;
> 	case RTE_PCI_KDRV_UIO_GENERIC:
> 		pci_uio_ioport_write(p, data, len, offset);
> 		break;
>diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
>index 01f2a40..c19382f 100644
>--- a/drivers/bus/pci/linux/pci_uio.c
>+++ b/drivers/bus/pci/linux/pci_uio.c
>@@ -379,14 +379,9 @@
> 	char buf[BUFSIZ];
> 	uint64_t phys_addr, end_addr, flags;
> 	unsigned long base;
>+	bool iobar;
> 	int i;
>
>-	if (rte_eal_iopl_init() != 0) {
>-		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for
>PCI device %s\n",
>-			__func__, dev->name);
>-		return -1;
>-	}
>-
> 	/* open and read addresses of the corresponding resource in sysfs */
> 	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
> 		rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
>@@ -408,15 +403,30 @@
> 		&end_addr, &flags) < 0)
> 		goto error;
>
>-	if (!(flags & IORESOURCE_IO)) {
>-		RTE_LOG(ERR, EAL, "%s(): bar resource other than IO is not
>supported\n", __func__);
>+	if (flags & IORESOURCE_IO) {
>+		iobar = 1;
>+		base = (unsigned long)phys_addr;
>+		RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n",
>__func__, base);
>+	} else if (flags & IORESOURCE_MEM) {
>+		iobar = 0;
>+		base = (unsigned long)dev->mem_resource[bar].addr;
>+		RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n",
>__func__, base);
Same here, INFO level seems chatty.
>+	} else {
>+		RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
>+		goto error;
>+	}
>+
>+
>+	if (iobar && rte_eal_iopl_init() != 0) {
>+		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for
>PCI device %s\n",
>+			__func__, dev->name);
> 		goto error;
> 	}
Same as Maxime's suggestion, please move this block as well.
>-	base = (unsigned long)phys_addr;
>-	RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__,
>base);
>
>-	if (base > UINT16_MAX)
>+	if (iobar && (base > UINT16_MAX)) {
PIO_MAX defined below, please use it here. UNI16_MAX used in patch 1/3 as well.
>+		RTE_LOG(ERR, EAL, "%s(): %08lx too large PIO resource\n",
>__func__,
>+base);
> 		goto error;
>+	}
>
> 	/* FIXME only for primary process ? */
> 	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) { @@ -
>517,6 +527,61 @@  }  #endif
>
>+#define PIO_MAX 0x10000
>+static inline uint8_t ioread8(void *addr) {
>+	uint8_t val;
>+
>+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>+		*(volatile uint8_t *)addr :
>+		inb((unsigned long)addr);
>+
>+	return val;
>+}
>+
>+static inline uint16_t ioread16(void *addr) {
>+	uint16_t val;
>+
>+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>+		*(volatile uint16_t *)addr :
>+		inw((unsigned long)addr);
>+
>+	return val;
>+}
>+
>+static inline uint32_t ioread32(void *addr) {
>+	uint32_t val;
>+
>+	val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>+		*(volatile uint32_t *)addr :
>+		inl((unsigned long)addr);
>+
>+	return val;
>+}
>+
>+static inline void iowrite8(uint8_t val, void *addr) {
>+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
>+		*(volatile uint8_t *)addr = val :
>+		outb(val, (unsigned long)addr);
>+}
>+
>+static inline void iowrite16(uint16_t val, void *addr) {
>+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
>+		*(volatile uint16_t *)addr = val :
>+		outw(val, (unsigned long)addr);
>+}
>+
>+static inline void iowrite32(uint32_t val, void *addr) {
>+	(uint64_t)(uintptr_t)addr >= PIO_MAX ?
>+		*(volatile uint32_t *)addr = val :
>+		outl(val, (unsigned long)addr);
>+}
>+
> void
> pci_uio_ioport_read(struct rte_pci_ioport *p,
> 		    void *data, size_t len, off_t offset) @@ -528,25 +593,13
>@@
> 	for (d = data; len > 0; d += size, reg += size, len -= size) {
> 		if (len >= 4) {
> 			size = 4;
>-#if defined(RTE_ARCH_X86)
>-			*(uint32_t *)d = inl(reg);
>-#else
>-			*(uint32_t *)d = *(volatile uint32_t *)reg;
>-#endif
>+			*(uint32_t *)d = ioread32((void *)reg);
> 		} else if (len >= 2) {
> 			size = 2;
>-#if defined(RTE_ARCH_X86)
>-			*(uint16_t *)d = inw(reg);
>-#else
>-			*(uint16_t *)d = *(volatile uint16_t *)reg;
>-#endif
>+			*(uint16_t *)d = ioread16((void *)reg);
> 		} else {
> 			size = 1;
>-#if defined(RTE_ARCH_X86)
>-			*d = inb(reg);
>-#else
>-			*d = *(volatile uint8_t *)reg;
>-#endif
>+			*d = ioread8((void *)reg);
> 		}
> 	}
> }
>@@ -562,25 +615,13 @@
> 	for (s = data; len > 0; s += size, reg += size, len -= size) {
> 		if (len >= 4) {
> 			size = 4;
>-#if defined(RTE_ARCH_X86)
>-			outl_p(*(const uint32_t *)s, reg);
>-#else
>-			*(volatile uint32_t *)reg = *(const uint32_t *)s;
>-#endif
>+			iowrite32(*(const uint32_t *)s, (void *)reg);
> 		} else if (len >= 2) {
> 			size = 2;
>-#if defined(RTE_ARCH_X86)
>-			outw_p(*(const uint16_t *)s, reg);
>-#else
>-			*(volatile uint16_t *)reg = *(const uint16_t *)s;
>-#endif
>+			iowrite16(*(const uint16_t *)s, (void *)reg);
> 		} else {
> 			size = 1;
>-#if defined(RTE_ARCH_X86)
>-			outb_p(*s, reg);
>-#else
>-			*(volatile uint8_t *)reg = *s;
>-#endif
>+			iowrite8(*s, (void *)reg);
> 		}
> 	}
> }
>--
>1.8.3.1
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write
  2021-01-24 15:22     ` Xueming(Steven) Li
@ 2021-01-25  3:08       ` 谢华伟(此时此刻)
  0 siblings, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-25  3:08 UTC (permalink / raw)
  To: Xueming(Steven) Li, ferruh.yigit
  Cc: dev, maxime.coquelin, anatoly.burakov, david.marchand,
	zhihong.wang, chenbo.xia, grive
On 2021/1/24 23:22, Xueming(Steven) Li wrote:
>> +	} else if (flags & IORESOURCE_MEM) {
>> +		iobar = 0;
>> +		base = (unsigned long)dev->mem_resource[bar].addr;
>> +		RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n",
>> __func__, base);
> Same here, INFO level seems chatty.
makes sense. would remove it.
>
>> +	} else {
>> +		RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
>> +		goto error;
>> +	}
>> +
>> +
>> +	if (iobar && rte_eal_iopl_init() != 0) {
>> +		RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for
>> PCI device %s\n",
>> +			__func__, dev->name);
>> 		goto error;
>> 	}
> Same as Maxime's suggestion, please move this block as well.
Thanks. It is already moved in v6 patch.
>> -	base = (unsigned long)phys_addr;
>> -	RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__,
>> base);
>>
>> -	if (base > UINT16_MAX)
>> +	if (iobar && (base > UINT16_MAX)) {
> PIO_MAX defined below, please use it here. UNI16_MAX used in patch 1/3 as well.
ok.
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-22  7:25                 ` 谢华伟(此时此刻)
@ 2021-01-26 10:44                   ` Maxime Coquelin
  2021-01-27 10:32                     ` Ferruh Yigit
  2021-01-26 12:30                   ` 谢华伟(此时此刻)
  1 sibling, 1 reply; 58+ messages in thread
From: Maxime Coquelin @ 2021-01-26 10:44 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 1/22/21 8:25 AM, 谢华伟(此时此刻) wrote:
> 
> On 2021/1/21 23:38, Maxime Coquelin wrote:
>>> Do you mean we apply or abandon patch 3? I am both OK. The first
>>> priority to me is to enable MMIO bar support.
>> OK, so yes, I think we should abandon patch 2 and patch 3.
>> For patch 1, it looks valid to me, but I'll let Ferruh decide.
>>
>> For your device, if my understanding is correct, what we need to do is
>> to support MMIO for legacy devices. Correct?
> yes.
>> If so, the change should be in virtio_pci.c. In vtpci_init(), after
>> modern detection has failed, we should check the the BAR is PIO or MMIO
>> based on the flag. the result can be saved in struct virtio_pci_dev.
>>
>>
>> We would introduce new wrappers like vtpci_legacy_read,
>> vtpci_legacy_write that would either call rte_pci_ioport_read,
>> rte_pci_ioport_read in case of PIO, or rte_read32, rte_write32 in case
>> of MMIO.
> 
> There are two choices.
> 
> 1, apply patch 2.
> 
>     IO/MMIO port are mapped and accessed using the same API. Kernel is
> doing in the same way like the following.
> 
>             io_addr = pci_iomap
> 
>                 get PIO directly or ioremap
> 
>             iowrite16/32(val, io_addr + offset)
> 
> I think applying patch 2 is a correct choice. It is a fix. Driver had
> better not know if bar is PIO or MMIO.  ioport in ioport_xx API means
> IO, not PIO.
> 
> Btw, it only affects virtio PMD,  not that intrusive.
> 
>  2, virtio specific change to enable MMIO support.
> 
> Comparing with choice 1, i feels it is not that clean and pretty.
OK, that makes sense. I am OK with keeping patch 2, but would like
Ferruh's ACK.
Could you please post v6?
Thanks,
Maxime
>>
>> It is not too late for this release, as the change will not be that
>> intrusive. But if you prepare such patch, please base it on top of my
>> virtio rework series; To make it easier to you, I added it to the dpdk-
>> next-virtio tree:
>> https://git.dpdk.org/next/dpdk-next-virtio/log/?h=virtio_pmd_rework_v2
>>
>> Thanks,
>> Maxime
>>
> 
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-22  7:25                 ` 谢华伟(此时此刻)
  2021-01-26 10:44                   ` Maxime Coquelin
@ 2021-01-26 12:30                   ` 谢华伟(此时此刻)
  2021-01-26 12:35                     ` Maxime Coquelin
  1 sibling, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-26 12:30 UTC (permalink / raw)
  To: Maxime Coquelin, ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia,
	grive, Xueming(Steven) Li
On 2021/1/22 15:25, chris wrote:
>
> On 2021/1/21 23:38, Maxime Coquelin wrote:
>>> Do you mean we apply or abandon patch 3? I am both OK. The first
>>> priority to me is to enable MMIO bar support.
>> OK, so yes, I think we should abandon patch 2 and patch 3.
>> For patch 1, it looks valid to me, but I'll let Ferruh decide.
>>
>> For your device, if my understanding is correct, what we need to do is
>> to support MMIO for legacy devices. Correct?
> yes.
>> If so, the change should be in virtio_pci.c. In vtpci_init(), after
>> modern detection has failed, we should check the the BAR is PIO or MMIO
>> based on the flag. the result can be saved in struct virtio_pci_dev.
>>
>>
>> We would introduce new wrappers like vtpci_legacy_read,
>> vtpci_legacy_write that would either call rte_pci_ioport_read,
>> rte_pci_ioport_read in case of PIO, or rte_read32, rte_write32 in case
>> of MMIO.
>
> There are two choices.
>
> 1, apply patch 2.
>
>     IO/MMIO port are mapped and accessed using the same API. Kernel is 
> doing in the same way like the following.
>
>             io_addr = pci_iomap
>
>                 get PIO directly or ioremap
>
>             iowrite16/32(val, io_addr + offset)
>
> I think applying patch 2 is a correct choice. It is a fix. Driver had 
> better not know if bar is PIO or MMIO.  ioport in ioport_xx API means 
> IO, not PIO.
>
> Btw, it only affects virtio PMD,  not that intrusive.
>
>  2, virtio specific change to enable MMIO support.
>
> Comparing with choice 1, i feels it is not that clean and pretty.
>
>>
>> It is not too late for this release, as the change will not be that
>> intrusive. But if you prepare such patch, please base it on top of my
>> virtio rework series; To make it easier to you, I added it to the dpdk-
>> next-virtio tree:
>> https://git.dpdk.org/next/dpdk-next-virtio/log/?h=virtio_pmd_rework_v2
>>
Hi Maxime:
Decision on patch 2?
I still think current patch 2 is cleaner.
Thanks,  huawei
>> Maxime
>>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-26 12:30                   ` 谢华伟(此时此刻)
@ 2021-01-26 12:35                     ` Maxime Coquelin
  2021-01-26 14:24                       ` 谢华伟(此时此刻)
  0 siblings, 1 reply; 58+ messages in thread
From: Maxime Coquelin @ 2021-01-26 12:35 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia,
	grive, Xueming(Steven) Li
On 1/26/21 1:30 PM, 谢华伟(此时此刻) wrote:
> 
> On 2021/1/22 15:25, chris wrote:
>>
>> On 2021/1/21 23:38, Maxime Coquelin wrote:
>>>> Do you mean we apply or abandon patch 3? I am both OK. The first
>>>> priority to me is to enable MMIO bar support.
>>> OK, so yes, I think we should abandon patch 2 and patch 3.
>>> For patch 1, it looks valid to me, but I'll let Ferruh decide.
>>>
>>> For your device, if my understanding is correct, what we need to do is
>>> to support MMIO for legacy devices. Correct?
>> yes.
>>> If so, the change should be in virtio_pci.c. In vtpci_init(), after
>>> modern detection has failed, we should check the the BAR is PIO or MMIO
>>> based on the flag. the result can be saved in struct virtio_pci_dev.
>>>
>>>
>>> We would introduce new wrappers like vtpci_legacy_read,
>>> vtpci_legacy_write that would either call rte_pci_ioport_read,
>>> rte_pci_ioport_read in case of PIO, or rte_read32, rte_write32 in case
>>> of MMIO.
>>
>> There are two choices.
>>
>> 1, apply patch 2.
>>
>>     IO/MMIO port are mapped and accessed using the same API. Kernel is
>> doing in the same way like the following.
>>
>>             io_addr = pci_iomap
>>
>>                 get PIO directly or ioremap
>>
>>             iowrite16/32(val, io_addr + offset)
>>
>> I think applying patch 2 is a correct choice. It is a fix. Driver had
>> better not know if bar is PIO or MMIO.  ioport in ioport_xx API means
>> IO, not PIO.
>>
>> Btw, it only affects virtio PMD,  not that intrusive.
>>
>>  2, virtio specific change to enable MMIO support.
>>
>> Comparing with choice 1, i feels it is not that clean and pretty.
>>
>>>
>>> It is not too late for this release, as the change will not be that
>>> intrusive. But if you prepare such patch, please base it on top of my
>>> virtio rework series; To make it easier to you, I added it to the dpdk-
>>> next-virtio tree:
>>> https://git.dpdk.org/next/dpdk-next-virtio/log/?h=virtio_pmd_rework_v2
>>>
> Hi Maxime:
> 
> Decision on patch 2?
> 
> I still think current patch 2 is cleaner.
Hi,
I actually replied one hour ago:
"
OK, that makes sense. I am OK with keeping patch 2, but would like
Ferruh's ACK.
Could you please post v6?
"
Thanks,
Maxime
> Thanks,  huawei
> 
> 
>>> Maxime
>>>
> 
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-26 12:35                     ` Maxime Coquelin
@ 2021-01-26 14:24                       ` 谢华伟(此时此刻)
  0 siblings, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-26 14:24 UTC (permalink / raw)
  To: Maxime Coquelin, ferruh.yigit
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia,
	grive, Xueming(Steven) Li
On 2021/1/26 20:35, Maxime Coquelin wrote:
>
> On 1/26/21 1:30 PM, 谢华伟(此时此刻) wrote:
>> On 2021/1/22 15:25, chris wrote:
>>> On 2021/1/21 23:38, Maxime Coquelin wrote:
>>>>> Do you mean we apply or abandon patch 3? I am both OK. The first
>>>>> priority to me is to enable MMIO bar support.
>>>> OK, so yes, I think we should abandon patch 2 and patch 3.
>>>> For patch 1, it looks valid to me, but I'll let Ferruh decide.
>>>>
>>>> For your device, if my understanding is correct, what we need to do is
>>>> to support MMIO for legacy devices. Correct?
>>> yes.
>>>> If so, the change should be in virtio_pci.c. In vtpci_init(), after
>>>> modern detection has failed, we should check the the BAR is PIO or MMIO
>>>> based on the flag. the result can be saved in struct virtio_pci_dev.
>>>>
>>>>
>>>> We would introduce new wrappers like vtpci_legacy_read,
>>>> vtpci_legacy_write that would either call rte_pci_ioport_read,
>>>> rte_pci_ioport_read in case of PIO, or rte_read32, rte_write32 in case
>>>> of MMIO.
>>> There are two choices.
>>>
>>> 1, apply patch 2.
>>>
>>>      IO/MMIO port are mapped and accessed using the same API. Kernel is
>>> doing in the same way like the following.
>>>
>>>              io_addr = pci_iomap
>>>
>>>                  get PIO directly or ioremap
>>>
>>>              iowrite16/32(val, io_addr + offset)
>>>
>>> I think applying patch 2 is a correct choice. It is a fix. Driver had
>>> better not know if bar is PIO or MMIO.  ioport in ioport_xx API means
>>> IO, not PIO.
>>>
>>> Btw, it only affects virtio PMD,  not that intrusive.
>>>
>>>   2, virtio specific change to enable MMIO support.
>>>
>>> Comparing with choice 1, i feels it is not that clean and pretty.
>>>
>>>> It is not too late for this release, as the change will not be that
>>>> intrusive. But if you prepare such patch, please base it on top of my
>>>> virtio rework series; To make it easier to you, I added it to the dpdk-
>>>> next-virtio tree:
>>>> https://git.dpdk.org/next/dpdk-next-virtio/log/?h=virtio_pmd_rework_v2
>>>>
>> Hi Maxime:
>>
>> Decision on patch 2?
>>
>> I still think current patch 2 is cleaner.
> Hi,
>
> I actually replied one hour ago:
> "
> OK, that makes sense. I am OK with keeping patch 2, but would like
> Ferruh's ACK.
>
> Could you please post v6?
> "
Sorry, missed it. would do it.
>
> Thanks,
> Maxime
>
>
>> Thanks,  huawei
>>
>>
>>>> Maxime
>>>>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-26 10:44                   ` Maxime Coquelin
@ 2021-01-27 10:32                     ` Ferruh Yigit
  2021-01-27 12:17                       ` Maxime Coquelin
  2021-01-27 14:43                       ` 谢华伟(此时此刻)
  0 siblings, 2 replies; 58+ messages in thread
From: Ferruh Yigit @ 2021-01-27 10:32 UTC (permalink / raw)
  To: Maxime Coquelin,
	谢华伟(此时此刻)
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 1/26/2021 10:44 AM, Maxime Coquelin wrote:
> 
> 
> On 1/22/21 8:25 AM, 谢华伟(此时此刻) wrote:
>>
>> On 2021/1/21 23:38, Maxime Coquelin wrote:
>>>> Do you mean we apply or abandon patch 3? I am both OK. The first
>>>> priority to me is to enable MMIO bar support.
>>> OK, so yes, I think we should abandon patch 2 and patch 3.
>>> For patch 1, it looks valid to me, but I'll let Ferruh decide.
>>>
>>> For your device, if my understanding is correct, what we need to do is
>>> to support MMIO for legacy devices. Correct?
>> yes.
>>> If so, the change should be in virtio_pci.c. In vtpci_init(), after
>>> modern detection has failed, we should check the the BAR is PIO or MMIO
>>> based on the flag. the result can be saved in struct virtio_pci_dev.
>>>
>>>
>>> We would introduce new wrappers like vtpci_legacy_read,
>>> vtpci_legacy_write that would either call rte_pci_ioport_read,
>>> rte_pci_ioport_read in case of PIO, or rte_read32, rte_write32 in case
>>> of MMIO.
>>
>> There are two choices.
>>
>> 1, apply patch 2.
>>
>>      IO/MMIO port are mapped and accessed using the same API. Kernel is
>> doing in the same way like the following.
>>
>>              io_addr = pci_iomap
>>
>>                  get PIO directly or ioremap
>>
>>              iowrite16/32(val, io_addr + offset)
>>
>> I think applying patch 2 is a correct choice. It is a fix. Driver had
>> better not know if bar is PIO or MMIO.  ioport in ioport_xx API means
>> IO, not PIO.
>>
>> Btw, it only affects virtio PMD,  not that intrusive.
>>
>>   2, virtio specific change to enable MMIO support.
>>
>> Comparing with choice 1, i feels it is not that clean and pretty.
> 
> OK, that makes sense. I am OK with keeping patch 2, but would like
> Ferruh's ACK.
> 
I was waiting for clarification if this can be solved in virtio, which seems 
clarified and decided to go with this patch, I am OK to proceed with patch 1 & 2.
But first patch changes how PIO address get, it changes the Linux interface used 
to get the PIO.
And as far as I can see second patch requires this new interface to be able to 
access the MEM resources.
I have a concern that this interface change may cause issues with various 
distros, kernel versions etc.. And prefer it goes through a full -rc1 validation 
cycle.
Huawei, I am aware the patch is around for a while but to play safe, I suggest 
considering it for early next release, so it can be tested enough, instead of 
getting if for -rc2/3 in this release.
Thanks,
ferruh
> Could you please post v6?
> 
> Thanks,
> Maxime
> 
>>>
>>> It is not too late for this release, as the change will not be that
>>> intrusive. But if you prepare such patch, please base it on top of my
>>> virtio rework series; To make it easier to you, I added it to the dpdk-
>>> next-virtio tree:
>>> https://git.dpdk.org/next/dpdk-next-virtio/log/?h=virtio_pmd_rework_v2
>>>
>>> Thanks,
>>> Maxime
>>>
>>
> 
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write
  2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write 谢华伟(此时此刻)
  2021-01-12  8:23     ` Maxime Coquelin
  2021-01-24 15:22     ` Xueming(Steven) Li
@ 2021-01-27 10:40     ` Ferruh Yigit
  2021-01-27 15:34       ` 谢华伟(此时此刻)
  2 siblings, 1 reply; 58+ messages in thread
From: Ferruh Yigit @ 2021-01-27 10:40 UTC (permalink / raw)
  To: 谢华伟(此时此刻)
  Cc: dev, maxime.coquelin, anatoly.burakov, david.marchand,
	zhihong.wang, chenbo.xia, grive
On 10/22/2020 4:51 PM, 谢华伟(此时此刻) wrote:
> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
> 
> If IO BAR, we get PIO address.
> If MMIO BAR, we get mapped virtual address.
> We distinguish PIO and MMIO by their address like how kernel does.
> ioread/write8/16/32 is provided to access PIO/MMIO.
> BTW, for virtio on arch other than x86, BAR flag indicates PIO but is mapped.
> 
> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
<...>
> @@ -408,15 +403,30 @@
>   		&end_addr, &flags) < 0)
>   		goto error;
>   
> -	if (!(flags & IORESOURCE_IO)) {
> -		RTE_LOG(ERR, EAL, "%s(): bar resource other than IO is not supported\n", __func__);
> +	if (flags & IORESOURCE_IO) {
> +		iobar = 1;
> +		base = (unsigned long)phys_addr;
> +		RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
> +	} else if (flags & IORESOURCE_MEM) {
> +		iobar = 0;
> +		base = (unsigned long)dev->mem_resource[bar].addr;
Hi Huawei,
At this stage, to have a valid 'addr' it should be already mmap'ed, can you 
please provide the call stack when it is set/mmaped, to confirm it will be 
always valid at this point?
Thanks,
ferruh
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-27 10:32                     ` Ferruh Yigit
@ 2021-01-27 12:17                       ` Maxime Coquelin
  2021-01-27 14:43                       ` 谢华伟(此时此刻)
  1 sibling, 0 replies; 58+ messages in thread
From: Maxime Coquelin @ 2021-01-27 12:17 UTC (permalink / raw)
  To: Ferruh Yigit,
	谢华伟(此时此刻)
  Cc: dev, anatoly.burakov, david.marchand, zhihong.wang, chenbo.xia, grive
On 1/27/21 11:32 AM, Ferruh Yigit wrote:
> On 1/26/2021 10:44 AM, Maxime Coquelin wrote:
>>
>>
>> On 1/22/21 8:25 AM, 谢华伟(此时此刻) wrote:
>>>
>>> On 2021/1/21 23:38, Maxime Coquelin wrote:
>>>>> Do you mean we apply or abandon patch 3? I am both OK. The first
>>>>> priority to me is to enable MMIO bar support.
>>>> OK, so yes, I think we should abandon patch 2 and patch 3.
>>>> For patch 1, it looks valid to me, but I'll let Ferruh decide.
>>>>
>>>> For your device, if my understanding is correct, what we need to do is
>>>> to support MMIO for legacy devices. Correct?
>>> yes.
>>>> If so, the change should be in virtio_pci.c. In vtpci_init(), after
>>>> modern detection has failed, we should check the the BAR is PIO or MMIO
>>>> based on the flag. the result can be saved in struct virtio_pci_dev.
>>>>
>>>>
>>>> We would introduce new wrappers like vtpci_legacy_read,
>>>> vtpci_legacy_write that would either call rte_pci_ioport_read,
>>>> rte_pci_ioport_read in case of PIO, or rte_read32, rte_write32 in case
>>>> of MMIO.
>>>
>>> There are two choices.
>>>
>>> 1, apply patch 2.
>>>
>>>      IO/MMIO port are mapped and accessed using the same API. Kernel is
>>> doing in the same way like the following.
>>>
>>>              io_addr = pci_iomap
>>>
>>>                  get PIO directly or ioremap
>>>
>>>              iowrite16/32(val, io_addr + offset)
>>>
>>> I think applying patch 2 is a correct choice. It is a fix. Driver had
>>> better not know if bar is PIO or MMIO.  ioport in ioport_xx API means
>>> IO, not PIO.
>>>
>>> Btw, it only affects virtio PMD,  not that intrusive.
>>>
>>>   2, virtio specific change to enable MMIO support.
>>>
>>> Comparing with choice 1, i feels it is not that clean and pretty.
>>
>> OK, that makes sense. I am OK with keeping patch 2, but would like
>> Ferruh's ACK.
>>
> 
> I was waiting for clarification if this can be solved in virtio, which
> seems clarified and decided to go with this patch, I am OK to proceed
> with patch 1 & 2.
> 
> But first patch changes how PIO address get, it changes the Linux
> interface used to get the PIO.
> And as far as I can see second patch requires this new interface to be
> able to access the MEM resources.
> 
> I have a concern that this interface change may cause issues with
> various distros, kernel versions etc.. And prefer it goes through a full
> -rc1 validation cycle.
While I think the risk for patch 2 is close to zero, I understand your
concern on patch 1 (especially with the upcoming holidays in China,
which will have an impact on QE capacity).
Huawei, do you think patch 2 can be slightly modified to be applied
alone, without patch 1? If possible, we may be able to pick patch2 for
this release and postpone patch 1 to v21.05?
> Huawei, I am aware the patch is around for a while but to play safe, I
> suggest considering it for early next release, so it can be tested
> enough, instead of getting if for -rc2/3 in this release.
> 
> Thanks,
> ferruh
> 
> 
>> Could you please post v6?
>>
>> Thanks,
>> Maxime
>>
>>>>
>>>> It is not too late for this release, as the change will not be that
>>>> intrusive. But if you prepare such patch, please base it on top of my
>>>> virtio rework series; To make it easier to you, I added it to the dpdk-
>>>> next-virtio tree:
>>>> https://git.dpdk.org/next/dpdk-next-virtio/log/?h=virtio_pmd_rework_v2
>>>>
>>>> Thanks,
>>>> Maxime
>>>>
>>>
>>
> 
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-27 10:32                     ` Ferruh Yigit
  2021-01-27 12:17                       ` Maxime Coquelin
@ 2021-01-27 14:43                       ` 谢华伟(此时此刻)
  2021-01-27 16:45                         ` Ferruh Yigit
  1 sibling, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-27 14:43 UTC (permalink / raw)
  To: Ferruh Yigit, Maxime Coquelin
  Cc: dev, anatoly.burakov, david.marchand, chenbo.xia, grive,
	Xueming(Steven) Li
On 2021/1/27 18:32, Ferruh Yigit wrote:
> I was waiting for clarification if this can be solved in virtio, which 
> seems clarified and decided to go with this patch, I am OK to proceed 
> with patch 1 & 2.
>
> But first patch changes how PIO address get, it changes the Linux 
> interface used to get the PIO.
> And as far as I can see second patch requires this new interface to be 
> able to access the MEM resources.
>
> I have a concern that this interface change may cause issues with 
> various distros, kernel versions etc.. And prefer it goes through a 
> full -rc1 validation cycle.
>
> Huawei, I am aware the patch is around for a while but to play safe, I 
> suggest considering it for early next release, so it can be tested 
> enough, instead of getting if for -rc2/3 in this release.
>
> Thanks,
> ferruh
>
Hi Ferruh and Maxime:
igb_uio kernel driver gets resource through pci_resource_start, i.e, 
(dev)->resource[(bar)].start
uio_pci_generic and the generic way in my patch 1 gets resource through 
the same interface:
         pci dev driver exports to userspace the bar resource attributes 
through pci_dev->resource (check resource_show in kernel's 
drivers/pci/pci-sysfs.c)
Other arch than x86 uses the same interface in their pci_uio_ioport_map.
So patch 1 is the most generic way and shouldn't break things. 
/proc/ioports should be fully dropped.
Using /proc/ioport is partly my fault at the very beginning. It causes 
so much mess.
Could you please confirm this?
Thanks huawei
>> Could you please post v6?
>>
>> Thanks,
>> Maxime
>>
>>>>
>>>> It is not too late for this release, as the change will not be that
>>>> intrusive. But if you prepare such patch, please base it on top of my
>>>> virtio rework series; To make it easier to you, I added it to the 
>>>> dpdk-
>>>> next-virtio tree:
>>>> https://git.dpdk.org/next/dpdk-next-virtio/log/?h=virtio_pmd_rework_v2
>>>>
>>>> Thanks,
>>>> Maxime
>>>>
>>>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write
  2021-01-27 10:40     ` Ferruh Yigit
@ 2021-01-27 15:34       ` 谢华伟(此时此刻)
  2021-01-27 16:45         ` Ferruh Yigit
  0 siblings, 1 reply; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-27 15:34 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: dev, maxime.coquelin, anatoly.burakov, david.marchand,
	zhihong.wang, chenbo.xia, grive
On 2021/1/27 18:40, Ferruh Yigit wrote:
> On 10/22/2020 4:51 PM, 谢华伟(此时此刻) wrote:
>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>
>> If IO BAR, we get PIO address.
>> If MMIO BAR, we get mapped virtual address.
>> We distinguish PIO and MMIO by their address like how kernel does.
>> ioread/write8/16/32 is provided to access PIO/MMIO.
>> BTW, for virtio on arch other than x86, BAR flag indicates PIO but is 
>> mapped.
>>
>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>
> <...>
>
>> @@ -408,15 +403,30 @@
>>           &end_addr, &flags) < 0)
>>           goto error;
>>   -    if (!(flags & IORESOURCE_IO)) {
>> -        RTE_LOG(ERR, EAL, "%s(): bar resource other than IO is not 
>> supported\n", __func__);
>> +    if (flags & IORESOURCE_IO) {
>> +        iobar = 1;
>> +        base = (unsigned long)phys_addr;
>> +        RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", 
>> __func__, base);
>> +    } else if (flags & IORESOURCE_MEM) {
>> +        iobar = 0;
>> +        base = (unsigned long)dev->mem_resource[bar].addr;
>
> Hi Huawei,
>
> At this stage, to have a valid 'addr' it should be already mmap'ed, 
> can you please provide the call stack when it is set/mmaped, to 
> confirm it will be always valid at this point?
>
> Thanks,
> ferruh
#0  pci_uio_map_resource_by_index (dev=0x420c700, res_idx=0, 
uio_res=0x1003b19c0, map_idx=0) at ../drivers/bus/pci/linux/pci_uio.c:286
#1  0x000000000095f047 in pci_uio_map_resource (dev=0x420c700) at 
../drivers/bus/pci/pci_common_uio.c:112
#2  0x000000000095f645 in rte_pci_map_device (dev=0x420c700) at 
../drivers/bus/pci/linux/pci.c:81
#3  0x000000000174b5b9 in virtio_read_caps (dev=0x420c700, 
hw=0x1003b2d80) at ../drivers/net/virtio/virtio_pci.c:574
#4  0x000000000174baf9 in vtpci_init (dev=0x420c700, hw=0x1003b2d80) at 
../drivers/net/virtio/virtio_pci.c:697
#5  0x0000000001743c84 in eth_virtio_dev_init (eth_dev=0x3461e40 
<rte_eth_devices>) at ../drivers/net/virtio/virtio_ethdev.c:1954
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write
  2021-01-27 15:34       ` 谢华伟(此时此刻)
@ 2021-01-27 16:45         ` Ferruh Yigit
  0 siblings, 0 replies; 58+ messages in thread
From: Ferruh Yigit @ 2021-01-27 16:45 UTC (permalink / raw)
  To: 谢华伟(此时此刻)
  Cc: dev, maxime.coquelin, anatoly.burakov, david.marchand,
	zhihong.wang, chenbo.xia, grive
On 1/27/2021 3:34 PM, 谢华伟(此时此刻) wrote:
> 
> On 2021/1/27 18:40, Ferruh Yigit wrote:
>> On 10/22/2020 4:51 PM, 谢华伟(此时此刻) wrote:
>>> From: "huawei.xhw" <huawei.xhw@alibaba-inc.com>
>>>
>>> If IO BAR, we get PIO address.
>>> If MMIO BAR, we get mapped virtual address.
>>> We distinguish PIO and MMIO by their address like how kernel does.
>>> ioread/write8/16/32 is provided to access PIO/MMIO.
>>> BTW, for virtio on arch other than x86, BAR flag indicates PIO but is mapped.
>>>
>>> Signed-off-by: huawei.xhw <huawei.xhw@alibaba-inc.com>
>>
>> <...>
>>
>>> @@ -408,15 +403,30 @@
>>>           &end_addr, &flags) < 0)
>>>           goto error;
>>>   -    if (!(flags & IORESOURCE_IO)) {
>>> -        RTE_LOG(ERR, EAL, "%s(): bar resource other than IO is not 
>>> supported\n", __func__);
>>> +    if (flags & IORESOURCE_IO) {
>>> +        iobar = 1;
>>> +        base = (unsigned long)phys_addr;
>>> +        RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n", __func__, base);
>>> +    } else if (flags & IORESOURCE_MEM) {
>>> +        iobar = 0;
>>> +        base = (unsigned long)dev->mem_resource[bar].addr;
>>
>> Hi Huawei,
>>
>> At this stage, to have a valid 'addr' it should be already mmap'ed, can you 
>> please provide the call stack when it is set/mmaped, to confirm it will be 
>> always valid at this point?
>>
>> Thanks,
>> ferruh
> 
> #0  pci_uio_map_resource_by_index (dev=0x420c700, res_idx=0, 
> uio_res=0x1003b19c0, map_idx=0) at ../drivers/bus/pci/linux/pci_uio.c:286
> #1  0x000000000095f047 in pci_uio_map_resource (dev=0x420c700) at 
> ../drivers/bus/pci/pci_common_uio.c:112
> #2  0x000000000095f645 in rte_pci_map_device (dev=0x420c700) at 
> ../drivers/bus/pci/linux/pci.c:81
> #3  0x000000000174b5b9 in virtio_read_caps (dev=0x420c700, hw=0x1003b2d80) at 
> ../drivers/net/virtio/virtio_pci.c:574
> #4  0x000000000174baf9 in vtpci_init (dev=0x420c700, hw=0x1003b2d80) at 
> ../drivers/net/virtio/virtio_pci.c:697
> #5  0x0000000001743c84 in eth_virtio_dev_init (eth_dev=0x3461e40 
> <rte_eth_devices>) at ../drivers/net/virtio/virtio_ethdev.c:1954
> 
> 
Thanks. This looks good.
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-27 14:43                       ` 谢华伟(此时此刻)
@ 2021-01-27 16:45                         ` Ferruh Yigit
  2021-01-28 13:43                           ` 谢华伟(此时此刻)
  0 siblings, 1 reply; 58+ messages in thread
From: Ferruh Yigit @ 2021-01-27 16:45 UTC (permalink / raw)
  To: 谢华伟(此时此刻),
	Maxime Coquelin
  Cc: dev, anatoly.burakov, david.marchand, chenbo.xia, grive,
	Xueming(Steven) Li
On 1/27/2021 2:43 PM, 谢华伟(此时此刻) wrote:
> 
> On 2021/1/27 18:32, Ferruh Yigit wrote:
>> I was waiting for clarification if this can be solved in virtio, which seems 
>> clarified and decided to go with this patch, I am OK to proceed with patch 1 & 2.
>>
>> But first patch changes how PIO address get, it changes the Linux interface 
>> used to get the PIO.
>> And as far as I can see second patch requires this new interface to be able to 
>> access the MEM resources.
>>
>> I have a concern that this interface change may cause issues with various 
>> distros, kernel versions etc.. And prefer it goes through a full -rc1 
>> validation cycle.
>>
>> Huawei, I am aware the patch is around for a while but to play safe, I suggest 
>> considering it for early next release, so it can be tested enough, instead of 
>> getting if for -rc2/3 in this release.
>>
>> Thanks,
>> ferruh
>>
> Hi Ferruh and Maxime:
> 
> igb_uio kernel driver gets resource through pci_resource_start, i.e, 
> (dev)->resource[(bar)].start
> 
> uio_pci_generic and the generic way in my patch 1 gets resource through the same 
> interface:
> 
>          pci dev driver exports to userspace the bar resource attributes through 
> pci_dev->resource (check resource_show in kernel's drivers/pci/pci-sysfs.c)
> 
> Other arch than x86 uses the same interface in their pci_uio_ioport_map.
> 
> So patch 1 is the most generic way and shouldn't break things. /proc/ioports 
> should be fully dropped.
> 
> Using /proc/ioport is partly my fault at the very beginning. It causes so much 
> mess.
> 
> Could you please confirm this?
> 
Hi Huawei,
I confirm that interface is already in use, 'pci_parse_sysfs_resource()' does 
similar parsing.
Most probably it is safe as you and Maxime said, and I am not trying to be 
difficult but extra conscious here.
Will it cause too much trouble to consider the patch early next release? This 
gives more time and testing after the patch merged.
Thanks,
ferruh
> Thanks huawei
> 
>>> Could you please post v6?
>>>
>>> Thanks,
>>> Maxime
>>>
>>>>>
>>>>> It is not too late for this release, as the change will not be that
>>>>> intrusive. But if you prepare such patch, please base it on top of my
>>>>> virtio rework series; To make it easier to you, I added it to the dpdk-
>>>>> next-virtio tree:
>>>>> https://git.dpdk.org/next/dpdk-next-virtio/log/?h=virtio_pmd_rework_v2
>>>>>
>>>>> Thanks,
>>>>> Maxime
>>>>>
>>>>
^ permalink raw reply	[flat|nested] 58+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource
  2021-01-27 16:45                         ` Ferruh Yigit
@ 2021-01-28 13:43                           ` 谢华伟(此时此刻)
  0 siblings, 0 replies; 58+ messages in thread
From: 谢华伟(此时此刻) @ 2021-01-28 13:43 UTC (permalink / raw)
  To: Ferruh Yigit, Maxime Coquelin
  Cc: dev, anatoly.burakov, david.marchand, chenbo.xia, grive,
	Xueming(Steven) Li
On 2021/1/28 0:45, Ferruh Yigit wrote:
> On 1/27/2021 2:43 PM, 谢华伟(此时此刻) wrote:
>>
>> On 2021/1/27 18:32, Ferruh Yigit wrote:
>>> I was waiting for clarification if this can be solved in virtio, 
>>> which seems clarified and decided to go with this patch, I am OK to 
>>> proceed with patch 1 & 2.
>>>
>>> But first patch changes how PIO address get, it changes the Linux 
>>> interface used to get the PIO.
>>> And as far as I can see second patch requires this new interface to 
>>> be able to access the MEM resources.
>>>
>>> I have a concern that this interface change may cause issues with 
>>> various distros, kernel versions etc.. And prefer it goes through a 
>>> full -rc1 validation cycle.
>>>
>>> Huawei, I am aware the patch is around for a while but to play safe, 
>>> I suggest considering it for early next release, so it can be tested 
>>> enough, instead of getting if for -rc2/3 in this release.
>>>
>>> Thanks,
>>> ferruh
>>>
>> Hi Ferruh and Maxime:
>>
>> igb_uio kernel driver gets resource through pci_resource_start, i.e, 
>> (dev)->resource[(bar)].start
>>
>> uio_pci_generic and the generic way in my patch 1 gets resource 
>> through the same interface:
>>
>>          pci dev driver exports to userspace the bar resource 
>> attributes through pci_dev->resource (check resource_show in kernel's 
>> drivers/pci/pci-sysfs.c)
>>
>> Other arch than x86 uses the same interface in their pci_uio_ioport_map.
>>
>> So patch 1 is the most generic way and shouldn't break things. 
>> /proc/ioports should be fully dropped.
>>
>> Using /proc/ioport is partly my fault at the very beginning. It 
>> causes so much mess.
>>
>> Could you please confirm this?
>>
>
> Hi Huawei,
>
> I confirm that interface is already in use, 
> 'pci_parse_sysfs_resource()' does similar parsing.
>
> Most probably it is safe as you and Maxime said, and I am not trying 
> to be difficult but extra conscious here.
>
> Will it cause too much trouble to consider the patch early next 
> release? This gives more time and testing after the patch merged.
>
> Thanks,
> ferruh
Hi Ferruh:
If early next release, what is about the schedule, early next February?
In summary, patch 1 is simple and straightforward. It just don't use 
/proc/ioports and don't use resource attribute created by igb_uio and 
use the standard resource attribute under /sys/pci/.
As i explained, the resource attribute created by igb_uio is exactly the 
same thing as the standard resource attribute under /sys/pci/.
Patch 1 fixes messy things. Patch 2 fixes wrong assumptions (BAR is IO bar).
Customers have been pushing us for quite a long time. Besides, at least 
in China,  virtio in VM with MMIO bar is a de-facto implementation. It 
brings quite much trouble not only to cloud users, but also to us self 
when we run DPDK.
>
>> Thanks huawei
>>
>>>> Could you please post v6?
>>>>
>>>> Thanks,
>>>> Maxime
>>>>
>>>>>>
>>>>>> It is not too late for this release, as the change will not be that
>>>>>> intrusive. But if you prepare such patch, please base it on top 
>>>>>> of my
>>>>>> virtio rework series; To make it easier to you, I added it to the 
>>>>>> dpdk-
>>>>>> next-virtio tree:
>>>>>> https://git.dpdk.org/next/dpdk-next-virtio/log/?h=virtio_pmd_rework_v2 
>>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>> Maxime
>>>>>>
>>>>>
^ permalink raw reply	[flat|nested] 58+ messages in thread
end of thread, other threads:[~2021-01-28 13:44 UTC | newest]
Thread overview: 58+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-30 14:59 [dpdk-dev] [PATCH v2] pci: support both PIO and MMIO BAR for legacy virtio on x86 谢华伟(此时此刻)
2020-10-01 10:22 ` Burakov, Anatoly
2020-10-02  5:44   ` 谢华伟(此时此刻)
2020-10-09  8:36 ` [dpdk-dev] [PATCH v3] " 谢华伟(此时此刻)
2020-10-13  8:41 ` [dpdk-dev] [PATCH v4] support both PIO and MMIO bar for virtio pci device 谢华伟(此时此刻)
2020-10-13  8:41   ` [dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86 谢华伟(此时此刻)
2020-10-13 12:34     ` 谢华伟(此时此刻)
2020-10-21  8:46     ` 谢华伟(此时此刻)
2020-10-21 11:49     ` Ferruh Yigit
2020-10-21 12:32       ` 谢华伟(此时此刻)
2020-10-21 17:24         ` Ferruh Yigit
2020-10-22  9:15           ` 谢华伟(此时此刻)
2020-10-22  9:44             ` Ferruh Yigit
2020-10-22  9:57               ` 谢华伟(此时此刻)
2020-10-22 15:51 ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 1/3] PCI: use PCI standard sysfs entry to get PIO address 谢华伟(此时此刻)
2021-01-12  8:07     ` Maxime Coquelin
2021-01-14 18:23       ` 谢华伟(此时此刻)
2021-01-24 15:10         ` Xueming(Steven) Li
2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 2/3] PCI: support MMIO in rte_pci_ioport_map/unap/read/write 谢华伟(此时此刻)
2021-01-12  8:23     ` Maxime Coquelin
2021-01-21  6:30       ` 谢华伟(此时此刻)
2021-01-24 15:22     ` Xueming(Steven) Li
2021-01-25  3:08       ` 谢华伟(此时此刻)
2021-01-27 10:40     ` Ferruh Yigit
2021-01-27 15:34       ` 谢华伟(此时此刻)
2021-01-27 16:45         ` Ferruh Yigit
2020-10-22 15:51   ` [dpdk-dev] [PATCH v5 3/3] PCI: don't use vfio ioctl call to access PIO resource 谢华伟(此时此刻)
2021-01-12  9:37     ` Maxime Coquelin
2021-01-12 16:58       ` Maxime Coquelin
2021-01-20 14:54         ` 谢华伟(此时此刻)
2021-01-21  8:29           ` Maxime Coquelin
2021-01-21 14:57             ` 谢华伟(此时此刻)
2021-01-21 15:00               ` 谢华伟(此时此刻)
2021-01-21 15:38               ` Maxime Coquelin
2021-01-22  7:25                 ` 谢华伟(此时此刻)
2021-01-26 10:44                   ` Maxime Coquelin
2021-01-27 10:32                     ` Ferruh Yigit
2021-01-27 12:17                       ` Maxime Coquelin
2021-01-27 14:43                       ` 谢华伟(此时此刻)
2021-01-27 16:45                         ` Ferruh Yigit
2021-01-28 13:43                           ` 谢华伟(此时此刻)
2021-01-26 12:30                   ` 谢华伟(此时此刻)
2021-01-26 12:35                     ` Maxime Coquelin
2021-01-26 14:24                       ` 谢华伟(此时此刻)
2020-10-27  8:50   ` [dpdk-dev] [PATCH v5 0/3] support both PIO and MMIO BAR for virtio PMD 谢华伟(此时此刻)
2020-10-28  3:48     ` 谢华伟(此时此刻)
2020-11-02 11:56   ` 谢华伟(此时此刻)
2020-11-10 12:35   ` 谢华伟(此时此刻)
2020-11-10 12:42     ` David Marchand
2020-11-12 13:35       ` 谢华伟(此时此刻)
2020-12-14 14:24       ` 谢华伟(此时此刻)
2020-12-16  7:54         ` Maxime Coquelin
2021-01-12 17:37   ` Maxime Coquelin
2021-01-14 18:19     ` 谢华伟(此时此刻)
2021-01-21  4:12     ` 谢华伟(此时此刻)
2021-01-21  8:47       ` Maxime Coquelin
2021-01-21 13:51         ` 谢华伟(此时此刻)
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).