DPDK patches and discussions
 help / color / mirror / Atom feed
* Re: [dpdk-dev] [PATCH v2 01/16] Separate igb_uio mapping into a separate file
@ 2014-05-27  2:25 Xu, HuilongX
  0 siblings, 0 replies; 3+ messages in thread
From: Xu, HuilongX @ 2014-05-27  2:25 UTC (permalink / raw)
  To: dev, Burakov, Anatoly

In order to make the code a bit more clean while using multiple
drivers, IGB_UIO mapping has been separated into its own file.

Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
Test-by: HuilongX Xu <huilongx.xu@intel.com<mailto:huilongx.xu@intel.com>>
Compile pass
     >>Compile OS: FC20 x86_64
     >>Kernel version: 3.13.6-200
     >>GCC version: 4.8.2
     >>Server: Crownpass

---
lib/librte_eal/linuxapp/eal/Makefile               |    1 +
lib/librte_eal/linuxapp/eal/eal_pci.c              |  424 +-------------------
lib/librte_eal/linuxapp/eal/eal_pci_uio.c          |  403 +++++++++++++++++++
lib/librte_eal/linuxapp/eal/include/eal_pci_init.h |   65 +++
4 files changed, 478 insertions(+), 415 deletions(-)
create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_uio.c
create mode 100644 lib/librte_eal/linuxapp/eal/include/eal_pci_init.h

diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index b00e3ec..527fa2a 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -57,6 +57,7 @@ endif
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_thread.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_log.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_uio.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_debug.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_lcore.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_timer.c
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index ac2c1fe..cd5b797 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -31,82 +31,31 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-#include <ctype.h>
-#include <stdio.h>
-#include <stdlib.h>
#include <string.h>
-#include <stdarg.h>
-#include <unistd.h>
-#include <inttypes.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdarg.h>
-#include <errno.h>
#include <dirent.h>
-#include <limits.h>
-#include <sys/queue.h>
#include <sys/mman.h>
-#include <sys/ioctl.h>
-#include <rte_interrupts.h>
#include <rte_log.h>
#include <rte_pci.h>
-#include <rte_common.h>
-#include <rte_launch.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_tailq.h>
-#include <rte_eal.h>
#include <rte_eal_memconfig.h>
-#include <rte_per_lcore.h>
-#include <rte_lcore.h>
-#include <rte_malloc.h>
-#include <rte_string_fns.h>
-#include <rte_debug.h>
#include <rte_devargs.h>
 #include "rte_pci_dev_ids.h"
#include "eal_filesystem.h"
#include "eal_private.h"
+#include "eal_pci_init.h"
 /**
  * @file
  * PCI probing under linux
  *
  * This code is used to simulate a PCI probe by parsing information in
- * sysfs. Moreover, when a registered driver matches a device, the
- * kernel driver currently using it is unloaded and replaced by
- * igb_uio module, which is a very minimal userland driver for Intel
- * network card, only providing access to PCI BAR to applications, and
- * enabling bus master.
+ * sysfs. When a registered device matches a driver, it is then initialized
+ * with either VFIO or IGB_UIO driver (or doesn't initialize), whichever
+ * driver the device is bound to.
  */
-struct uio_map {
-              void *addr;
-              uint64_t offset;
-              uint64_t size;
-              uint64_t phaddr;
-};
-
-/*
- * For multi-process we need to reproduce all PCI mappings in secondary
- * processes, so save them in a tailq.
- */
-struct uio_resource {
-              TAILQ_ENTRY(uio_resource) next;
-
-              struct rte_pci_addr pci_addr;
-              char path[PATH_MAX];
-              size_t nb_maps;
-              struct uio_map maps[PCI_MAX_RESOURCE];
-};
-
-TAILQ_HEAD(uio_res_list, uio_resource);
-
-static struct uio_res_list *uio_res_list = NULL;
-static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
-
/* unbind kernel driver for this device */
static int
pci_unbind_kernel_driver(struct rte_pci_device *dev)
@@ -147,31 +96,19 @@ error:
}
 /* map a particular resource from a file */
-static void *
-pci_map_resource(void *requested_addr, const char *devname, off_t offset,
+void *
+pci_map_resource(void *requested_addr, int fd, off_t offset,
                                size_t size)
{
-              int fd;
               void *mapaddr;
-              /*
-              * open devname, to mmap it
-              */
-              fd = open(devname, O_RDWR);
-              if (fd < 0) {
-                              RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
-                                              devname, strerror(errno));
-                              goto fail;
-              }
-
               /* Map the PCI memory resource of device */
               mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
                                               MAP_SHARED, fd, offset);
-              close(fd);
               if (mapaddr == MAP_FAILED ||
                                               (requested_addr != NULL && mapaddr != requested_addr)) {
-                              RTE_LOG(ERR, EAL, "%s(): cannot mmap(%s(%d), %p, 0x%lx, 0x%lx):"
-                                              " %s (%p)\n", __func__, devname, fd, requested_addr,
+                             RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx):"
+                                             " %s (%p)\n", __func__, fd, requested_addr,
                                               (unsigned long)size, (unsigned long)offset,
                                               strerror(errno), mapaddr);
                               goto fail;
@@ -185,314 +122,6 @@ fail:
               return NULL;
}
-#define OFF_MAX              ((uint64_t)(off_t)-1)
-static ssize_t
-pci_uio_get_mappings(const char *devname, struct uio_map maps[], size_t nb_maps)
-{
-              size_t i;
-              char dirname[PATH_MAX];
-              char filename[PATH_MAX];
-              uint64_t offset, size;
-
-              for (i = 0; i != nb_maps; i++) {
-
-                              /* check if map directory exists */
-                              rte_snprintf(dirname, sizeof(dirname),
-                                              "%s/maps/map%u", devname, i);
-
-                              if (access(dirname, F_OK) != 0)
-                                              break;
-
-                              /* get mapping offset */
-                              rte_snprintf(filename, sizeof(filename),
-                                              "%s/offset", dirname);
-                              if (pci_parse_sysfs_value(filename, &offset) < 0) {
-                                              RTE_LOG(ERR, EAL,
-                                                              "%s(): cannot parse offset of %s\n",
-                                                              __func__, dirname);
-                                              return (-1);
-                              }
-
-                              /* get mapping size */
-                              rte_snprintf(filename, sizeof(filename),
-                                              "%s/size", dirname);
-                              if (pci_parse_sysfs_value(filename, &size) < 0) {
-                                              RTE_LOG(ERR, EAL,
-                                                              "%s(): cannot parse size of %s\n",
-                                                              __func__, dirname);
-                                              return (-1);
-                              }
-
-                              /* get mapping physical address */
-                              rte_snprintf(filename, sizeof(filename),
-                                              "%s/addr", dirname);
-                              if (pci_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
-                                              RTE_LOG(ERR, EAL,
-                                                              "%s(): cannot parse addr of %s\n",
-                                                              __func__, dirname);
-                                              return (-1);
-                              }
-
-                              if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
-                                              RTE_LOG(ERR, EAL,
-                                                              "%s(): offset/size exceed system max value\n",
-                                                              __func__);
-                                              return (-1);
-                              }
-
-                              maps[i].offset = offset;
-                              maps[i].size = size;
-        }
-              return (i);
-}
-
-static int
-pci_uio_map_secondary(struct rte_pci_device *dev)
-{
-        size_t i;
-        struct uio_resource *uio_res;
-
-              TAILQ_FOREACH(uio_res, uio_res_list, next) {
-
-                              /* skip this element if it doesn't match our PCI address */
-                              if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
-                                              continue;
-
-                              for (i = 0; i != uio_res->nb_maps; i++) {
-                                              if (pci_map_resource(uio_res->maps[i].addr,
-                                                                                   uio_res->path,
-                                                                                   (off_t)uio_res->maps[i].offset,
-                                                                                   (size_t)uio_res->maps[i].size)
-                                                  != uio_res->maps[i].addr) {
-                                                              RTE_LOG(ERR, EAL,
-                                                                              "Cannot mmap device resource\n");
-                                                              return (-1);
-                                              }
-                              }
-                              return (0);
-              }
-
-              RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
-              return -1;
-}
-
-static int pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num)
-{
-              FILE *f;
-              char filename[PATH_MAX];
-              int ret;
-              unsigned major, minor;
-              dev_t dev;
-
-              /* get the name of the sysfs file that contains the major and minor
-              * of the uio device and read its content */
-              rte_snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path);
-
-              f = fopen(filename, "r");
-              if (f == NULL) {
-                              RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n",
-                                              __func__);
-                              return -1;
-              }
-
-              ret = fscanf(f, "%d:%d", &major, &minor);
-              if (ret != 2) {
-                              RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n",
-                                              __func__);
-                              fclose(f);
-                              return -1;
-              }
-              fclose(f);
-
-              /* create the char device "mknod /dev/uioX c major minor" */
-              rte_snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
-              dev = makedev(major, minor);
-              ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev);
-              if (f == NULL) {
-                              RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n",
-                                              __func__, strerror(errno));
-                              return -1;
-              }
-
-              return ret;
-}
-
-/*
- * Return the uioX char device used for a pci device. On success, return
- * the UIO number and fill dstbuf string with the path of the device in
- * sysfs. On error, return a negative value. In this case dstbuf is
- * invalid.
- */
-static int pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf,
-                                                 unsigned int buflen)
-{
-              struct rte_pci_addr *loc = &dev->addr;
-              unsigned int uio_num;
-              struct dirent *e;
-              DIR *dir;
-              char dirname[PATH_MAX];
-
-              /* depending on kernel version, uio can be located in uio/uioX
-              * or uio:uioX */
-
-              rte_snprintf(dirname, sizeof(dirname),
-                       SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio",
-                       loc->domain, loc->bus, loc->devid, loc->function);
-
-              dir = opendir(dirname);
-              if (dir == NULL) {
-                              /* retry with the parent directory */
-                              rte_snprintf(dirname, sizeof(dirname),
-                                       SYSFS_PCI_DEVICES "/" PCI_PRI_FMT,
-                                       loc->domain, loc->bus, loc->devid, loc->function);
-                              dir = opendir(dirname);
-
-                              if (dir == NULL) {
-                                              RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname);
-                                              return -1;
-                              }
-              }
-
-              /* take the first file starting with "uio" */
-              while ((e = readdir(dir)) != NULL) {
-                              /* format could be uio%d ...*/
-                              int shortprefix_len = sizeof("uio") - 1;
-                              /* ... or uio:uio%d */
-                              int longprefix_len = sizeof("uio:uio") - 1;
-                              char *endptr;
-
-                              if (strncmp(e->d_name, "uio", 3) != 0)
-                                              continue;
-
-                              /* first try uio%d */
-                              errno = 0;
-                              uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
-                              if (errno == 0 && endptr != (e->d_name + shortprefix_len)) {
-                                              rte_snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
-                                              break;
-                              }
-
-                              /* then try uio:uio%d */
-                              errno = 0;
-                              uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
-                              if (errno == 0 && endptr != (e->d_name + longprefix_len)) {
-                                              rte_snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num);
-                                              break;
-                              }
-              }
-              closedir(dir);
-
-              /* No uio resource found */
-              if (e == NULL)
-                              return -1;
-
-              /* create uio device if we've been asked to */
-              if (internal_config.create_uio_dev && pci_mknod_uio_dev(dstbuf, uio_num) < 0)
-                              RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num);
-
-              return uio_num;
-}
-
-/* map the PCI resource of a PCI device in virtual memory */
-static int
-pci_uio_map_resource(struct rte_pci_device *dev)
-{
-              int i, j;
-              char dirname[PATH_MAX];
-              char devname[PATH_MAX]; /* contains the /dev/uioX */
-              void *mapaddr;
-              int uio_num;
-              uint64_t phaddr;
-              uint64_t offset;
-              uint64_t pagesz;
-              ssize_t nb_maps;
-              struct rte_pci_addr *loc = &dev->addr;
-              struct uio_resource *uio_res;
-              struct uio_map *maps;
-
-              dev->intr_handle.fd = -1;
-              dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
-
-              /* secondary processes - use already recorded details */
-              if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-                              return (pci_uio_map_secondary(dev));
-
-              /* find uio resource */
-              uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname));
-              if (uio_num < 0) {
-                              RTE_LOG(WARNING, EAL, "  "PCI_PRI_FMT" not managed by UIO driver, "
-                                                              "skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
-                              return -1;
-              }
-              rte_snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
-
-              /* save fd if in primary process */
-              dev->intr_handle.fd = open(devname, O_RDWR);
-              if (dev->intr_handle.fd < 0) {
-                              RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
-                                              devname, strerror(errno));
-                              return -1;
-              }
-              dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
-
-              /* allocate the mapping details for secondary processes*/
-              if ((uio_res = rte_zmalloc("UIO_RES", sizeof (*uio_res), 0)) == NULL) {
-                              RTE_LOG(ERR, EAL,
-                                              "%s(): cannot store uio mmap details\n", __func__);
-                              return (-1);
-              }
-
-              rte_snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
-              memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr));
-
-              /* collect info about device mappings */
-              nb_maps = pci_uio_get_mappings(dirname, uio_res->maps,
-                                                                     RTE_DIM(uio_res->maps));
-              if (nb_maps < 0) {
-                              rte_free(uio_res);
-                              return (nb_maps);
-              }
-
-              uio_res->nb_maps = nb_maps;
-
-              /* Map all BARs */
-              pagesz = sysconf(_SC_PAGESIZE);
-
-              maps = uio_res->maps;
-              for (i = 0; i != PCI_MAX_RESOURCE; i++) {
-
-                              /* skip empty BAR */
-                              if ((phaddr = dev->mem_resource[i].phys_addr) == 0)
-                                              continue;
-
-                              for (j = 0; j != nb_maps && (phaddr != maps[j].phaddr ||
-                                                              dev->mem_resource[i].len != maps[j].size);
-                                                              j++)
-                                              ;
-
-                              /* if matching map is found, then use it */
-                              if (j != nb_maps) {
-                                              offset = j * pagesz;
-                                              if (maps[j].addr != NULL ||
-                                                  (mapaddr = pci_map_resource(NULL, devname,
-                                                                                                              (off_t)offset,
-                                                                                                              (size_t)maps[j].size)
-                                                  ) == NULL) {
-                                                              rte_free(uio_res);
-                                                              return (-1);
-                                              }
-
-                                              maps[j].addr = mapaddr;
-                                              maps[j].offset = offset;
-                                              dev->mem_resource[i].addr = mapaddr;
-                              }
-              }
-
-              TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
-
-              return (0);
-}
-
/* parse the "resource" sysfs file */
#define IORESOURCE_MEM  0x00000200
@@ -556,41 +185,6 @@ error:
               return -1;
}
-/*
- * parse a sysfs file containing one integer value
- * different to the eal version, as it needs to work with 64-bit values
- */
-static int
-pci_parse_sysfs_value(const char *filename, uint64_t *val)
-{
-        FILE *f;
-        char buf[BUFSIZ];
-        char *end = NULL;
-
-        f = fopen(filename, "r");
-        if (f == NULL) {
-                RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
-                        __func__, filename);
-                return -1;
-        }
-
-        if (fgets(buf, sizeof(buf), f) == NULL) {
-                RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
-                        __func__, filename);
-                fclose(f);
-                return -1;
-        }
-        *val = strtoull(buf, &end, 0);
-        if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
-                RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
-                                __func__, filename);
-                fclose(f);
-                return -1;
-        }
-        fclose(f);
-        return 0;
-}
-
/* Compare two PCI device addresses. */
static int
pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr *addr2)
@@ -866,7 +460,7 @@ rte_eal_pci_init(void)
{
               TAILQ_INIT(&pci_driver_list);
               TAILQ_INIT(&pci_device_list);
-              uio_res_list = RTE_TAILQ_RESERVE_BY_IDX(RTE_TAILQ_PCI, uio_res_list);
+             pci_res_list = RTE_TAILQ_RESERVE_BY_IDX(RTE_TAILQ_PCI, mapped_pci_res_list);
                /* for debug purposes, PCI can be disabled */
               if (internal_config.no_pci)
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
new file mode 100644
index 0000000..f29fee5
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -0,0 +1,403 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/stat.h>
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_tailq.h>
+
+#include "rte_pci_dev_ids.h"
+#include "eal_filesystem.h"
+#include "eal_pci_init.h"
+
+static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
+
+#define OFF_MAX              ((uint64_t)(off_t)-1)
+static ssize_t
+pci_uio_get_mappings(const char *devname, struct pci_map maps[], size_t nb_maps) {
+             size_t i;
+             char dirname[PATH_MAX];
+             char filename[PATH_MAX];
+             uint64_t offset, size;
+
+             for (i = 0; i != nb_maps; i++) {
+
+                             /* check if map directory exists */
+                             rte_snprintf(dirname, sizeof(dirname), "%s/maps/map%u", devname, i);
+
+                             if (access(dirname, F_OK) != 0)
+                                             break;
+
+                             /* get mapping offset */
+                             rte_snprintf(filename, sizeof(filename), "%s/offset", dirname);
+                             if (pci_parse_sysfs_value(filename, &offset) < 0) {
+                                             RTE_LOG(ERR, EAL,
+                                                                             "%s(): cannot parse offset of %s\n", __func__, dirname);
+                                             return (-1);
+                             }
+
+                             /* get mapping size */
+                             rte_snprintf(filename, sizeof(filename), "%s/size", dirname);
+                             if (pci_parse_sysfs_value(filename, &size) < 0) {
+                                             RTE_LOG(ERR, EAL,
+                                                                             "%s(): cannot parse size of %s\n", __func__, dirname);
+                                             return (-1);
+                             }
+
+                             /* get mapping physical address */
+                             rte_snprintf(filename, sizeof(filename), "%s/addr", dirname);
+                             if (pci_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
+                                             RTE_LOG(ERR, EAL,
+                                                                             "%s(): cannot parse addr of %s\n", __func__, dirname);
+                                             return (-1);
+                             }
+
+                             if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
+                                             RTE_LOG(ERR, EAL,
+                                                                             "%s(): offset/size exceed system max value\n", __func__);
+                                             return (-1);
+                             }
+
+                             maps[i].offset = offset;
+                             maps[i].size = size;
+             }
+
+             return (i);
+}
+
+static int
+pci_uio_map_secondary(struct rte_pci_device *dev) {
+             int fd, i;
+             struct mapped_pci_resource *uio_res;
+
+             TAILQ_FOREACH(uio_res, pci_res_list, next) {
+
+                             /* skip this element if it doesn't match our PCI address */
+                             if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
+                                             continue;
+
+                             for (i = 0; i != uio_res->nb_maps; i++) {
+                                             /*
+                                             * open devname, to mmap it
+                                             */
+                                             fd = open(uio_res->path, O_RDWR);
+                                             if (fd < 0) {
+                                                             RTE_LOG(ERR, EAL,
+                                                                                             "Cannot open %s: %s\n", uio_res->path, strerror(errno));
+                                                             return -1;
+                                             }
+
+                                             if (pci_map_resource(uio_res->maps[i].addr, fd,
+                                                                             (off_t) uio_res->maps[i].offset,
+                                                                             (size_t) uio_res->maps[i].size) != uio_res->maps[i].addr) {
+                                                             RTE_LOG(ERR, EAL, "Cannot mmap device resource\n");
+                                                             close(fd);
+                                                             return (-1);
+                                             }
+                                             /* fd is not needed in slave process, close it */
+                                             close(fd);
+                             }
+                             return (0);
+             }
+
+             RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
+             return -1;
+}
+
+static int
+pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num) {
+             FILE *f;
+             char filename[PATH_MAX];
+             int ret;
+             unsigned major, minor;
+             dev_t dev;
+
+             /* get the name of the sysfs file that contains the major and minor
+             * of the uio device and read its content */
+             rte_snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path);
+
+             f = fopen(filename, "r");
+             if (f == NULL) {
+                             RTE_LOG(ERR, EAL,
+                                                             "%s(): cannot open sysfs to get major:minor\n", __func__);
+                             return -1;
+             }
+
+             ret = fscanf(f, "%d:%d", &major, &minor);
+             if (ret != 2) {
+                             RTE_LOG(ERR, EAL,
+                                                             "%s(): cannot parse sysfs to get major:minor\n", __func__);
+                             fclose(f);
+                             return -1;
+             }
+             fclose(f);
+
+             /* create the char device "mknod /dev/uioX c major minor" */
+             rte_snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
+             dev = makedev(major, minor);
+             ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev);
+             if (f == NULL) {
+                             RTE_LOG(ERR, EAL,
+                                                             "%s(): mknod() failed %s\n", __func__, strerror(errno));
+                             return -1;
+             }
+
+             return ret;
+}
+
+/*
+ * Return the uioX char device used for a pci device. On success, return
+ * the UIO number and fill dstbuf string with the path of the device in
+ * sysfs. On error, return a negative value. In this case dstbuf is
+ * invalid.
+ */
+static int
+pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf,
+                             unsigned int buflen) {
+             struct rte_pci_addr *loc = &dev->addr;
+             unsigned int uio_num;
+             struct dirent *e;
+             DIR *dir;
+             char dirname[PATH_MAX];
+
+             /* depending on kernel version, uio can be located in uio/uioX
+             * or uio:uioX */
+
+             rte_snprintf(dirname, sizeof(dirname),
+                                             SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio", loc->domain, loc->bus,
+                                             loc->devid, loc->function);
+
+             dir = opendir(dirname);
+             if (dir == NULL) {
+                             /* retry with the parent directory */
+                             rte_snprintf(dirname, sizeof(dirname),
+                                                             SYSFS_PCI_DEVICES "/" PCI_PRI_FMT, loc->domain, loc->bus,
+                                                             loc->devid, loc->function);
+                             dir = opendir(dirname);
+
+                             if (dir == NULL) {
+                                             RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname);
+                                             return -1;
+                             }
+             }
+
+             /* take the first file starting with "uio" */
+             while ((e = readdir(dir)) != NULL) {
+                             /* format could be uio%d ...*/
+                             int shortprefix_len = sizeof("uio") - 1;
+                             /* ... or uio:uio%d */
+                             int longprefix_len = sizeof("uio:uio") - 1;
+                             char *endptr;
+
+                             if (strncmp(e->d_name, "uio", 3) != 0)
+                                             continue;
+
+                             /* first try uio%d */errno = 0;
+                             uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
+                             if (errno == 0 && endptr != (e->d_name + shortprefix_len)) {
+                                             rte_snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
+                                             break;
+                             }
+
+                             /* then try uio:uio%d */errno = 0;
+                             uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
+                             if (errno == 0 && endptr != (e->d_name + longprefix_len)) {
+                                             rte_snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num);
+                                             break;
+                             }
+             }
+             closedir(dir);
+
+             /* No uio resource found */
+             if (e == NULL)
+                             return -1;
+
+             /* create uio device if we've been asked to */
+             if (internal_config.create_uio_dev
+                                             && pci_mknod_uio_dev(dstbuf, uio_num) < 0)
+                             RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num);
+
+             return uio_num;
+}
+
+/* map the PCI resource of a PCI device in virtual memory */
+int
+pci_uio_map_resource(struct rte_pci_device *dev) {
+             int i, j;
+             char dirname[PATH_MAX];
+             char devname[PATH_MAX]; /* contains the /dev/uioX */
+             void *mapaddr;
+             int uio_num;
+             uint64_t phaddr;
+             uint64_t offset;
+             uint64_t pagesz;
+             ssize_t nb_maps;
+             struct rte_pci_addr *loc = &dev->addr;
+             struct mapped_pci_resource *uio_res;
+             struct pci_map *maps;
+
+             dev->intr_handle.fd = -1;
+             dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+             /* secondary processes - use already recorded details */
+             if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+                             return (pci_uio_map_secondary(dev));
+
+             /* find uio resource */
+             uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname));
+             if (uio_num < 0) {
+                             RTE_LOG(WARNING, EAL, "  "PCI_PRI_FMT" not managed by UIO driver, "
+                             "skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
+                             return -1;
+             }
+             rte_snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
+
+             /* save fd if in primary process */
+             dev->intr_handle.fd = open(devname, O_RDWR);
+             if (dev->intr_handle.fd < 0) {
+                             RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", devname, strerror(errno));
+                             return -1;
+             }
+             dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
+
+             /* allocate the mapping details for secondary processes*/
+             if ((uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0)) == NULL) {
+                             RTE_LOG(ERR, EAL, "%s(): cannot store uio mmap details\n", __func__);
+                             return (-1);
+             }
+
+             rte_snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
+             memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr));
+
+             /* collect info about device mappings */
+             nb_maps = pci_uio_get_mappings(dirname, uio_res->maps,
+                                             RTE_DIM(uio_res->maps));
+             if (nb_maps < 0) {
+                             rte_free(uio_res);
+                             return (nb_maps);
+             }
+
+             uio_res->nb_maps = nb_maps;
+
+             /* Map all BARs */
+             pagesz = sysconf(_SC_PAGESIZE);
+
+             maps = uio_res->maps;
+             for (i = 0; i != PCI_MAX_RESOURCE; i++) {
+                             int fd;
+
+                             /* skip empty BAR */
+                             if ((phaddr = dev->mem_resource[i].phys_addr) == 0)
+                                             continue;
+
+                             for (j = 0;
+                                                             j != nb_maps
+                                                                                             && (phaddr != maps[j].phaddr
+                                                                                                                             || dev->mem_resource[i].len != maps[j].size);
+                                                             j++)
+                                             ;
+
+                             /* if matching map is found, then use it */
+                             if (j != nb_maps) {
+                                             offset = j * pagesz;
+
+                                             /*
+                                             * open devname, to mmap it
+                                             */
+                                             fd = open(uio_res->path, O_RDWR);
+                                             if (fd < 0) {
+                                                             RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+                                                                             uio_res->path, strerror(errno));
+                                                             rte_free(uio_res);
+                                                             return -1;
+                                             }
+
+                                             if (maps[j].addr != NULL
+                                                                             || (mapaddr = pci_map_resource(NULL, fd,
+                                                                                                             (off_t) offset, (size_t) maps[j].size)) == NULL) {
+                                                             rte_free(uio_res);
+                                                             close(fd);
+                                                             return (-1);
+                                             }
+                                             close(fd);
+
+                                             maps[j].addr = mapaddr;
+                                             maps[j].offset = offset;
+                                             dev->mem_resource[i].addr = mapaddr;
+                             }
+             }
+
+             TAILQ_INSERT_TAIL(pci_res_list, uio_res, next);
+
+             return (0);
+}
+
+/*
+ * parse a sysfs file containing one integer value
+ * different to the eal version, as it needs to work with 64-bit values
+ */
+static int
+pci_parse_sysfs_value(const char *filename, uint64_t *val) {
+             FILE *f;
+             char buf[BUFSIZ];
+             char *end = NULL;
+
+             f = fopen(filename, "r");
+             if (f == NULL) {
+                             RTE_LOG(ERR, EAL,
+                                                             "%s(): cannot open sysfs value %s\n", __func__, filename);
+                             return -1;
+             }
+
+             if (fgets(buf, sizeof(buf), f) == NULL) {
+                             RTE_LOG(ERR, EAL,
+                                                             "%s(): cannot read sysfs value %s\n", __func__, filename);
+                             fclose(f);
+                             return -1;
+             }
+             *val = strtoull(buf, &end, 0);
+             if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+                             RTE_LOG(ERR, EAL,
+                                                             "%s(): cannot parse sysfs value %s\n", __func__, filename);
+                             fclose(f);
+                             return -1;
+             }
+             fclose(f);
+             return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
new file mode 100644
index 0000000..699e80d
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
@@ -0,0 +1,65 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef EAL_PCI_INIT_H_
+#define EAL_PCI_INIT_H_
+
+struct pci_map {
+             void *addr;
+             uint64_t offset;
+             uint64_t size;
+             uint64_t phaddr;
+};
+
+/*
+ * For multi-process we need to reproduce all PCI mappings in secondary
+ * processes, so save them in a tailq.
+ */
+struct mapped_pci_resource {
+             TAILQ_ENTRY(mapped_pci_resource) next;
+
+             struct rte_pci_addr pci_addr;
+             char path[PATH_MAX];
+             int nb_maps;
+             struct pci_map maps[PCI_MAX_RESOURCE];
+};
+
+TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
+struct mapped_pci_res_list *pci_res_list;
+
+void * pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size);
+
+/* map IGB_UIO resource prototype */
+int pci_uio_map_resource(struct rte_pci_device *dev);
+
+#endif /* EAL_PCI_INIT_H_ */
--
1.7.0.7

^ permalink raw reply	[flat|nested] 3+ messages in thread
* [dpdk-dev] [PATCH 00/16] [RFC] [VFIO] Add VFIO support to DPDK
@ 2014-05-01 11:05 Burakov, Anatoly
  2014-05-19 15:51 ` [dpdk-dev] [PATCH v2 01/16] Separate igb_uio mapping into a separate file Anatoly Burakov
  0 siblings, 1 reply; 3+ messages in thread
From: Burakov, Anatoly @ 2014-05-01 11:05 UTC (permalink / raw)
  To: dev

This patchset adds support for using VFIO instead of IGB_UIO to
map the device BARs.

VFIO is a kernel 3.6+ driver allowing secure DMA from userspace
by means of using IOMMU instead of working directly with physical
memory like igb_uio does.

Short summary:
* Adding support for VFIO in EAL PCI code
* Adding new command-line parameter for VFIO interrupt type
* Adding support for VFIO in setup.sh
* Renaming igb_uio_bind to dpdk_nic_bind and adding support for
  VFIO there
* Removing PCI ID list from igb_uio, effectively making it another
  generic PCI driver similar to pci_stub, vfio-pci et al
* Adding autotest for VFIO interrupt types
* Making igb_uio and VFIO compilation optional

I'm submitting this as an RFC because this patch is based off
current dpdk.org branch with David Marchand's RTE_EAL_UNBIND_PORTS
patchset. IOW, this will *not* apply to the dpdk.org tree *unless* you
also apply David's patches beforehand.

Signed-off by: Anatoly Burakov <anatoly.burakov@intel.com>

Anatoly Burakov (16):
  Separate igb_uio mapping into a separate file
  Distinguish between legitimate failures and non-fatal errors
  Rename RTE_PCI_DRV_NEED_IGB_UIO to RTE_PCI_DRV_NEED_MAPPING
  Make igb_uio compilation optional
  Moved interrupt type out of igb_uio
  Add support for VFIO in Linuxapp targets
  Add support for VFIO interrupts, add VFIO header
  Add support for mapping devices through VFIO.
  Enable VFIO device binding
  Added support for selecting VFIO interrupt type from EAL command-line
  Make --no-huge use mmap instead of malloc.
  Adding unit tests for VFIO EAL command-line parameter
  Removed PCI ID table from igb_uio
  Renamed igb_uio_bind to dpdk_nic_bind
  Added support for VFIO drivers in dpdk_nic_bind.py
  Adding support for VFIO to setup.sh

 app/test/test_eal_flags.c                          |  24 +
 app/test/test_pci.c                                |   4 +-
 config/defconfig_i686-default-linuxapp-gcc         |   2 +
 config/defconfig_i686-default-linuxapp-icc         |   2 +
 config/defconfig_x86_64-default-linuxapp-gcc       |   2 +
 config/defconfig_x86_64-default-linuxapp-icc       |   2 +
 lib/librte_eal/bsdapp/eal/eal_pci.c                |   2 +-
 lib/librte_eal/common/Makefile                     |   1 +
 lib/librte_eal/common/eal_common_pci.c             |  17 +-
 lib/librte_eal/common/include/rte_pci.h            |   7 +-
 .../common/include/rte_pci_dev_feature_defs.h      |  46 ++
 .../common/include/rte_pci_dev_features.h          |  42 ++
 lib/librte_eal/linuxapp/Makefile                   |   2 +
 lib/librte_eal/linuxapp/eal/Makefile               |   6 +-
 lib/librte_eal/linuxapp/eal/eal.c                  |  35 +
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 203 +++++-
 lib/librte_eal/linuxapp/eal/eal_memory.c           |   8 +-
 lib/librte_eal/linuxapp/eal/eal_pci.c              | 480 ++------------
 lib/librte_eal/linuxapp/eal/eal_pci_uio.c          | 416 ++++++++++++
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         | 709 +++++++++++++++++++++
 lib/librte_eal/linuxapp/eal/eal_pci_vfio_socket.c  | 367 +++++++++++
 .../linuxapp/eal/include/eal_internal_cfg.h        |   3 +
 lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 120 ++++
 lib/librte_eal/linuxapp/eal/include/eal_vfio.h     |  55 ++
 .../linuxapp/eal/include/exec-env/rte_interrupts.h |   7 +-
 lib/librte_eal/linuxapp/igb_uio/igb_uio.c          |  70 +-
 lib/librte_pmd_e1000/em_ethdev.c                   |   2 +-
 lib/librte_pmd_e1000/igb_ethdev.c                  |   4 +-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c                |   4 +-
 lib/librte_pmd_vmxnet3/vmxnet3_ethdev.c            |   2 +-
 tools/dpdk_nic_bind.py                             | 500 +++++++++++++++
 tools/igb_uio_bind.py                              | 485 --------------
 tools/setup.sh                                     | 168 ++++-
 33 files changed, 2797 insertions(+), 1000 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_pci_dev_feature_defs.h
 create mode 100644 lib/librte_eal/common/include/rte_pci_dev_features.h
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_uio.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_vfio_socket.c
 create mode 100644 lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
 create mode 100644 lib/librte_eal/linuxapp/eal/include/eal_vfio.h
 create mode 100755 tools/dpdk_nic_bind.py
 delete mode 100755 tools/igb_uio_bind.py

-- 
1.8.1.4

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-05-27  2:25 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-27  2:25 [dpdk-dev] [PATCH v2 01/16] Separate igb_uio mapping into a separate file Xu, HuilongX
  -- strict thread matches above, loose matches on Subject: below --
2014-05-01 11:05 [dpdk-dev] [PATCH 00/16] [RFC] [VFIO] Add VFIO support to DPDK Burakov, Anatoly
2014-05-19 15:51 ` [dpdk-dev] [PATCH v2 01/16] Separate igb_uio mapping into a separate file Anatoly Burakov
2014-05-21 12:42   ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).