From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-oi0-f45.google.com (mail-oi0-f45.google.com [209.85.218.45]) by dpdk.org (Postfix) with ESMTP id 19F1E9AB6 for ; Mon, 23 Feb 2015 16:24:32 +0100 (CET) Received: by mail-oi0-f45.google.com with SMTP id i138so13971463oig.4 for ; Mon, 23 Feb 2015 07:24:31 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:mime-version:in-reply-to:references:date :message-id:subject:from:to:cc:content-type; bh=/Ne8EnYQLiOSRKt2MrDQzUccbqYJ1xupNC4nrxFvC9U=; b=VGHxj4Wmv9hPXDnYRJImLxTNxHunugV0UAiRu0OEBXk1xAWH/PJRFpvrJHDXDkIbvZ ss/EqNjNT/wLZCApWbcpuwGnuJyercHDmX74MAZX147dY+HExOObKnuHyaTnmDoN5rWo aYEAWtA3FE++6P7df4XHNytt2yok4nVt+jWqJCWOG7KSNjb/dK1P8jVPUmsyXcsZlRvB ZsCH3X5VHGxyehbg6eEe5NyNkruDccdncp8fGKM41ABSwxShANkAZWy1snPuIS/gDKhQ qvq8+T9Vh19NHiqYB4uO0CHCDJh94ROZ1TVVKcD1a5B5x/+4bIkAZIpbUER+oC83U3Iu 4vmw== X-Gm-Message-State: ALoCoQnLqkCPo2lICgfAFDkMrjdntNFWS/ZAFwOjYS+Jd013WYe9LctdNFUssdNGulqxAQg2h2xt MIME-Version: 1.0 X-Received: by 10.60.42.42 with SMTP id k10mr4756235oel.15.1424705071490; Mon, 23 Feb 2015 07:24:31 -0800 (PST) Received: by 10.76.133.162 with HTTP; Mon, 23 Feb 2015 07:24:31 -0800 (PST) In-Reply-To: <1424451557-27419-2-git-send-email-bruce.richardson@intel.com> References: <1424365731-32228-1-git-send-email-danny.zhou@intel.com> <1424451557-27419-1-git-send-email-bruce.richardson@intel.com> <1424451557-27419-2-git-send-email-bruce.richardson@intel.com> Date: Mon, 23 Feb 2015 16:24:31 +0100 Message-ID: From: David Marchand To: Bruce Richardson Content-Type: text/plain; charset=UTF-8 X-Content-Filtered-By: Mailman/MimeDel 2.1.15 Cc: "dev@dpdk.org" Subject: Re: [dpdk-dev] [PATCH v3 1/3] eal: enable uio_pci_generic support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 23 Feb 2015 15:24:32 -0000 Hello, Ok this is coming too late, but anyway, my comments. On Fri, Feb 20, 2015 at 5:59 PM, Bruce Richardson < bruce.richardson@intel.com> wrote: [snip] diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > index 54cce08..2b16fcb 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > @@ -36,6 +36,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -47,71 +48,73 @@ > #include "eal_filesystem.h" > #include "eal_pci_init.h" > > -static int pci_parse_sysfs_value(const char *filename, uint64_t *val); > - > void *pci_map_addr = NULL; > > > #define OFF_MAX ((uint64_t)(off_t)-1) > static int > -pci_uio_get_mappings(const char *devname, struct pci_map maps[], int > nb_maps) > +pci_uio_get_mappings(struct rte_pci_device *dev, > + struct pci_map maps[], int nb_maps) > { > - int i; > - char dirname[PATH_MAX]; > + struct rte_pci_addr *loc = &dev->addr; > + int i = 0; > char filename[PATH_MAX]; > - uint64_t offset, size; > + unsigned long long start_addr, end_addr, flags; > + FILE *f; > > - for (i = 0; i != nb_maps; i++) { > + snprintf(filename, sizeof(filename), > + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource", > + loc->domain, loc->bus, loc->devid, loc->function); > > - /* check if map directory exists */ > - snprintf(dirname, sizeof(dirname), > - "%s/maps/map%u", devname, i); > - > - if (access(dirname, F_OK) != 0) > - break; > + f = fopen(filename, "r"); > + if (f == NULL) { > + RTE_LOG(ERR, EAL, > + "%s(): cannot open sysfs %s\n", > + __func__, filename); > + return -1; > + } > > - /* get mapping offset */ > - snprintf(filename, sizeof(filename), > - "%s/offset", dirname); > - if (pci_parse_sysfs_value(filename, &offset) < 0) { > - RTE_LOG(ERR, EAL, > - "%s(): cannot parse offset of %s\n", > - __func__, dirname); > - return -1; > + while (fscanf(f, "%llx %llx %llx", &start_addr, > + &end_addr, &flags) == 3 && i < nb_maps) { > + if (flags & IORESOURCE_MEM) { > + maps[i].offset = 0x0; > + maps[i].size = end_addr - start_addr + 1; > + maps[i].phaddr = start_addr; > + i++; > } > + } > + fclose(f); > > - /* get mapping size */ > - snprintf(filename, sizeof(filename), > - "%s/size", dirname); > - if (pci_parse_sysfs_value(filename, &size) < 0) { > - RTE_LOG(ERR, EAL, > - "%s(): cannot parse size of %s\n", > - __func__, dirname); > - return -1; > - } > + return i; > +} > > - /* get mapping physical address */ > - snprintf(filename, sizeof(filename), > - "%s/addr", dirname); > - if (pci_parse_sysfs_value(filename, &maps[i].phaddr) < 0) { > - RTE_LOG(ERR, EAL, > - "%s(): cannot parse addr of %s\n", > - __func__, dirname); > - return -1; > - } > This function ends up reinventing the wheel from eal_pci.c plus it adds some new array with mappings in them but not indexed the same way as eal_pci.c see comments at the end of this mail. > +static int > +pci_uio_set_bus_master(int dev_fd) > +{ > + uint16_t reg; > + int ret; > > - if ((offset > OFF_MAX) || (size > SIZE_MAX)) { > - RTE_LOG(ERR, EAL, > - "%s(): offset/size exceed system max > value\n", > - __func__); > - return -1; > - } > + ret = pread(dev_fd, ®, sizeof(reg), PCI_COMMAND); > + if (ret != sizeof(reg)) { > + RTE_LOG(ERR, EAL, > + "Cannot read command from PCI config space!\n"); > + return -1; > + } > + > + /* return if bus mastering is already on */ > + if (reg & PCI_COMMAND_MASTER) > + return 0; > + > + reg |= PCI_COMMAND_MASTER; > > - maps[i].offset = offset; > - maps[i].size = size; > + ret = pwrite(dev_fd, ®, sizeof(reg), PCI_COMMAND); > + if (ret != sizeof(reg)) { > + RTE_LOG(ERR, EAL, > + "Cannot write command to PCI config space!\n"); > + return -1; > } > > - return i; > + return 0; > } > It would have been the good time to have a generic api to access pci config space. Something like what Stephen proposed. > static int > @@ -127,6 +130,10 @@ pci_uio_map_secondary(struct rte_pci_device *dev) > continue; > > for (i = 0; i != uio_res->nb_maps; i++) { > + /* ignore mappings unused in primary process */ > + if (uio_res->maps[i].addr == NULL) > + continue; > + /* > * open devname, to mmap it > */ > @@ -282,6 +289,7 @@ pci_uio_map_resource(struct rte_pci_device *dev) > { > int i, j; > char dirname[PATH_MAX]; > + char cfgname[PATH_MAX]; > char devname[PATH_MAX]; /* contains the /dev/uioX */ > void *mapaddr; > int uio_num; > @@ -294,6 +302,7 @@ pci_uio_map_resource(struct rte_pci_device *dev) > struct pci_map *maps; > > dev->intr_handle.fd = -1; > + dev->intr_handle.uio_cfg_fd = -1; > dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; > > /* secondary processes - use already recorded details */ > @@ -318,6 +327,28 @@ pci_uio_map_resource(struct rte_pci_device *dev) > } > dev->intr_handle.type = RTE_INTR_HANDLE_UIO; > > + snprintf(cfgname, sizeof(cfgname), > + "/sys/class/uio/uio%u/device/config", uio_num); > + dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR); > + if (dev->intr_handle.uio_cfg_fd < 0) { > + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", > + cfgname, strerror(errno)); > + return -1; > + } > + > + /* update devname for mmap */ > + snprintf(devname, sizeof(devname), > + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d", > + loc->domain, loc->bus, loc->devid, loc->function, 0); > Why bother with a %d if you hardcode 0 ? More importantly, since you hardcode "devname" to /sys/.../resource0, then the mmap code will use a fd on this file. I really am skeptical on the fact that it can work for devices that have no bar0. Then, how is this supposed to work ? You rely on sysfs mmap code for pci resources. Is this really equivalent to uio mmap operations ? > + > + /* set bus master that is not done by uio_pci_generic */ > + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { > + if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) { > + RTE_LOG(ERR, EAL, "Cannot set up bus > mastering!\n"); > + return -1; > + } > + } > + > You are already running in a primary process, this check is useless. > /* allocate the mapping details for secondary processes*/ > uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0); > if (uio_res == NULL) { > @@ -330,13 +361,12 @@ pci_uio_map_resource(struct rte_pci_device *dev) > memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr)); > > /* collect info about device mappings */ > - nb_maps = pci_uio_get_mappings(dirname, uio_res->maps, > - RTE_DIM(uio_res->maps)); > + nb_maps = pci_uio_get_mappings(dev, uio_res->maps, > + RTE_DIM(uio_res->maps)); > if (nb_maps < 0) { > rte_free(uio_res); > return nb_maps; > } > - > uio_res->nb_maps = nb_maps; > > /* Map all BARs */ > Ok then after this, we use this temp array uio_res->maps and we loop all over the pci resources. Why do we have all these loops ? I could see no point before, and with this change, it is bothering me again. Won't it be easier to loop on the pci resources discovered by eal_pci.c before this function is called ? eal_pci.c is responsible for discovering pci devices, prepare those devices (filling mem_resource[] for example), then eal_pci_uio.c / eal_pci_vfio.c do the "mapping" stuff. So uio / vfio must not overwrite what has already been set before. -- David Marchand