From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pd0-f180.google.com (mail-pd0-f180.google.com [209.85.192.180]) by dpdk.org (Postfix) with ESMTP id 1EA039A87 for ; Tue, 21 Apr 2015 19:32:45 +0200 (CEST) Received: by pdbqa5 with SMTP id qa5so247897424pdb.1 for ; Tue, 21 Apr 2015 10:32:44 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=4J7rT9fo9VgV4PoX2ajEbsrSTI0R5G9wrr0RlaqHB50=; b=dIiHYoHpWsiCSclN111x1QKCBzVha+VqokLHAZBJTbh+7Yr0t2W1dHdTyXMaVAbFUQ mZfTyiZVRjTlZuX5cfs4+TAKvj7u0z1MiMQEuDHMZqUS1oA5ZjedKhafuNpbX4HSBbRs XhiakGyPc2Atkfyjw7lDsLromg6KNkhnm484eO2jVKpiIn4aldwNr/pVweJU5iBZLG2F +ISzoqaS9JUt0pLwf/+QzzR1HFJc9dsGjUriNtU8ZgPTeJ+xRpVnIWCFJucsuFX61ol6 /quMCEfWBo4Qo1p/dLl2qtZWuQf5ZP70zTLKRUakavyN/vtRcy68YWRW+QgvEIElQSSl B83A== X-Gm-Message-State: ALoCoQm5T/ZbrMW2kTQhwtpeYv+szM7gOeBdd6K/sXsGfsCelIgCUDIK7JQNhgIhiYLDrT6JYTH1 X-Received: by 10.68.206.8 with SMTP id lk8mr39739213pbc.13.1429637564485; Tue, 21 Apr 2015 10:32:44 -0700 (PDT) Received: from urahara.home.lan (static-50-53-82-155.bvtn.or.frontiernet.net. [50.53.82.155]) by mx.google.com with ESMTPSA id qz3sm2674040pab.13.2015.04.21.10.32.43 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Tue, 21 Apr 2015 10:32:43 -0700 (PDT) From: Stephen Hemminger To: alexmay@microsoft.com Date: Tue, 21 Apr 2015 10:32:40 -0700 Message-Id: <1429637564-5656-4-git-send-email-stephen@networkplumber.org> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1429637564-5656-1-git-send-email-stephen@networkplumber.org> References: <1429637564-5656-1-git-send-email-stephen@networkplumber.org> Cc: dev@dpdk.org Subject: [dpdk-dev] [PATCH v4 3/7] hv: add basic vmbus support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 21 Apr 2015 17:32:45 -0000 The hyper-v device driver forces the base EAL code to change to support multiple bus types. This is done changing the pci_device in ether driver to a generic union. As much as possible this is done in a backwards source compatiable way. It will break ABI for device drivers. Signed-off-by: Stephen Hemminger --- lib/librte_eal/common/Makefile | 2 +- lib/librte_eal/common/eal_common_options.c | 5 + lib/librte_eal/common/eal_internal_cfg.h | 1 + lib/librte_eal/common/eal_options.h | 2 + lib/librte_eal/common/eal_private.h | 10 + lib/librte_eal/common/include/rte_vmbus.h | 159 +++++++ lib/librte_eal/linuxapp/eal/Makefile | 3 + lib/librte_eal/linuxapp/eal/eal.c | 11 + lib/librte_eal/linuxapp/eal/eal_vmbus.c | 641 +++++++++++++++++++++++++++++ lib/librte_ether/rte_ethdev.c | 128 +++++- lib/librte_ether/rte_ethdev.h | 15 +- 11 files changed, 968 insertions(+), 9 deletions(-) create mode 100644 lib/librte_eal/common/include/rte_vmbus.h create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index 3ea3bbf..202485e 100644 --- a/lib/librte_eal/common/Makefile +++ b/lib/librte_eal/common/Makefile @@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk INC := rte_branch_prediction.h rte_common.h INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h -INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h +INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h INC += rte_rwlock.h rte_tailq.h rte_interrupts.h rte_alarm.h INC += rte_string_fns.h rte_version.h diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 8fcb1ab..76a3394 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -80,6 +80,7 @@ eal_long_options[] = { {OPT_NO_HPET, 0, NULL, OPT_NO_HPET_NUM }, {OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM }, {OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM }, + {OPT_NO_VMBUS, 0, NULL, OPT_NO_VMBUS_NUM }, {OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM }, {OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM }, {OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM }, @@ -726,6 +727,10 @@ eal_parse_common_option(int opt, const char *optarg, conf->no_pci = 1; break; + case OPT_NO_VMBUS_NUM: + conf->no_vmbus = 1; + break; + case OPT_NO_HPET_NUM: conf->no_hpet = 1; break; diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index e2ecb0d..0e7de34 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -66,6 +66,7 @@ struct internal_config { volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */ volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/ volatile unsigned no_pci; /**< true to disable PCI */ + volatile unsigned no_vmbus; /**< true to disable VMBUS */ volatile unsigned no_hpet; /**< true to disable HPET */ volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping * instead of native TSC */ diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h index f6714d9..54f03dc 100644 --- a/lib/librte_eal/common/eal_options.h +++ b/lib/librte_eal/common/eal_options.h @@ -67,6 +67,8 @@ enum { OPT_NO_HUGE_NUM, #define OPT_NO_PCI "no-pci" OPT_NO_PCI_NUM, +#define OPT_NO_VMBUS "no-vmbus" + OPT_NO_VMBUS_NUM, #define OPT_NO_SHCONF "no-shconf" OPT_NO_SHCONF_NUM, #define OPT_SOCKET_MEM "socket-mem" diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 4acf5a0..039e9f3 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -180,6 +180,16 @@ int rte_eal_pci_close_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev); /** + * VMBUS related functions and structures + */ +int rte_eal_vmbus_init(void); + +struct rte_vmbus_driver; +struct rte_vmbus_device; + +int rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr, + struct rte_vmbus_device *dev); +/** * Init tail queues for non-EAL library structures. This is to allow * the rings, mempools, etc. lists to be shared among multiple processes * diff --git a/lib/librte_eal/common/include/rte_vmbus.h b/lib/librte_eal/common/include/rte_vmbus.h new file mode 100644 index 0000000..e632572 --- /dev/null +++ b/lib/librte_eal/common/include/rte_vmbus.h @@ -0,0 +1,159 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2015 Brocade Communications Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _RTE_VMBUS_H_ +#define _RTE_VMBUS_H_ + +/** + * @file + * + * RTE VMBUS Interface + */ + +#include + +/** Pathname of VMBUS devices directory. */ +#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices" + +/** Formatting string for VMBUS device identifier: Ex: vmbus_0_9 */ +#define VMBUS_PRI_FMT "vmbus_0_%u" + +#define VMBUS_ID_ANY 0xFFFF + +#define VMBUS_NETWORK_DEVICE "{f8615163-df3e-46c5-913f-f2d2f965ed0e}" + +/** Maximum number of VMBUS resources. */ +#define VMBUS_MAX_RESOURCE 7 + +/** + * A structure describing an ID for a VMBUS driver. Each driver provides a + * table of these IDs for each device that it supports. + */ +struct rte_vmbus_id { + uint16_t device_id; /**< VMBUS Device ID */ + uint16_t sysfs_num; /**< vmbus_0_X */ +}; + +/** + * A structure describing a VMBUS memory resource. + */ +struct rte_vmbus_resource { + uint64_t phys_addr; /**< Physical address, 0 if no resource. */ + uint64_t len; /**< Length of the resource. */ + void *addr; /**< Virtual address, NULL when not mapped. */ +}; + +/** + * A structure describing a VMBUS device. + */ +struct rte_vmbus_device { + TAILQ_ENTRY(rte_vmbus_device) next; /**< Next probed VMBUS device. */ + struct rte_vmbus_id id; /**< VMBUS ID. */ + const struct rte_vmbus_driver *driver; /**< Associated driver */ + int numa_node; /**< NUMA node connection */ + unsigned int blacklisted:1; /**< Device is blacklisted */ + struct rte_vmbus_resource mem_resource[VMBUS_MAX_RESOURCE]; /**< VMBUS Memory Resource */ + uint32_t vmbus_monitor_id; /**< VMBus monitor ID for device */ + int uio_fd; /** UIO device file descriptor */ +}; + +/** Macro used to help building up tables of device IDs */ +#define RTE_VMBUS_DEVICE(dev) \ + .device_id = (dev) + +struct rte_vmbus_driver; + +/** + * Initialisation function for the driver called during VMBUS probing. + */ +typedef int (vmbus_devinit_t)(struct rte_vmbus_driver *, struct rte_vmbus_device *); + +/** + * Uninitialisation function for the driver called during hotplugging. + */ +typedef int (vmbus_devuninit_t)(struct rte_vmbus_device *); + +/** + * A structure describing a VMBUS driver. + */ +struct rte_vmbus_driver { + TAILQ_ENTRY(rte_vmbus_driver) next; /**< Next in list. */ + const char *name; /**< Driver name. */ + vmbus_devinit_t *devinit; /**< Device init. function. */ + vmbus_devuninit_t *devuninit; /**< Device uninit function. */ + const struct rte_vmbus_id *id_table; /**< ID table, NULL terminated. */ + uint32_t drv_flags; /**< Flags contolling handling of device. */ + const char *module_name; /**< Associated kernel module */ +}; + +/** + * Probe the VMBUS device for registered drivers. + * + * Scan the content of the vmbus, and call the probe() function for + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_eal_vmbus_probe(void); + +/** + * Dump the content of the vmbus. + */ +void rte_eal_vmbus_dump(void); + +/** + * Register a VMBUS driver. + * + * @param driver + * A pointer to a rte_vmbus_driver structure describing the driver + * to be registered. + */ +void rte_eal_vmbus_register(struct rte_vmbus_driver *driver); + +/** + * Unregister a VMBUS driver. + * + * @param driver + * A pointer to a rte_vmbus_driver structure describing the driver + * to be unregistered. + */ +void rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver); + +int vmbus_uio_map_resource(struct rte_vmbus_device *dev); + +#endif /* _RTE_VMBUS_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 01f7b70..acd5127 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -74,6 +74,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_alarm.c ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y) SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_ivshmem.c endif +ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y) +SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_vmbus.c +endif # from common dir SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index bd770cf..86d0e31 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include @@ -796,6 +797,11 @@ rte_eal_init(int argc, char **argv) rte_eal_mcfg_complete(); +#ifdef RTE_LIBRTE_HV_PMD + if (rte_eal_vmbus_init() < 0) + RTE_LOG(ERR, EAL, "Cannot init VMBUS\n"); +#endif + TAILQ_FOREACH(solib, &solib_list, next) { RTE_LOG(INFO, EAL, "open shared lib %s\n", solib->name); solib->lib_handle = dlopen(solib->name, RTLD_NOW); @@ -845,6 +851,11 @@ rte_eal_init(int argc, char **argv) if (rte_eal_pci_probe()) rte_panic("Cannot probe PCI\n"); +#ifdef RTE_LIBRTE_HV_PMD + if (rte_eal_vmbus_probe() < 0) + rte_panic("Cannot probe VMBUS\n"); +#endif + return fctret; } diff --git a/lib/librte_eal/linuxapp/eal/eal_vmbus.c b/lib/librte_eal/linuxapp/eal/eal_vmbus.c new file mode 100644 index 0000000..165edd6 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_vmbus.c @@ -0,0 +1,641 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2015 Brocade Communications Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_private.h" + +#define PROC_MODULES "/proc/modules" +#define VMBUS_DRV_PATH "/sys/bus/vmbus/drivers/%s" + +TAILQ_HEAD(vmbus_device_list, rte_vmbus_device); /**< VMBUS devices in D-linked Q. */ +TAILQ_HEAD(vmbus_driver_list, rte_vmbus_driver); /**< VMBUS drivers in D-linked Q. */ + +static struct vmbus_driver_list vmbus_driver_list = + TAILQ_HEAD_INITIALIZER(vmbus_driver_list); +static struct vmbus_device_list vmbus_device_list = + TAILQ_HEAD_INITIALIZER(vmbus_device_list); + +struct uio_map { + void *addr; + uint64_t offset; + uint64_t size; + uint64_t phaddr; +}; + +/* + * For multi-process we need to reproduce all vmbus mappings in secondary + * processes, so save them in a tailq. + */ +struct uio_resource { + TAILQ_ENTRY(uio_resource) next; + + struct rte_vmbus_id vmbus_addr; + char path[PATH_MAX]; + size_t nb_maps; + struct uio_map maps[VMBUS_MAX_RESOURCE]; +}; + +/* + * parse a sysfs file containing one integer value + * different to the eal version, as it needs to work with 64-bit values + */ +static int +vmbus_parse_sysfs_value(const char *filename, uint64_t *val) +{ + FILE *f; + char buf[BUFSIZ]; + char *end = NULL; + + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n", + __func__, filename); + return -1; + } + + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + *val = strtoull(buf, &end, 0); + if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) { + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + fclose(f); + return 0; +} + +#define OFF_MAX ((uint64_t)(off_t)-1) +static ssize_t +vmbus_uio_get_mappings(const char *devname, struct uio_map maps[], size_t nb_maps) +{ + size_t i; + char dirname[PATH_MAX]; + char filename[PATH_MAX]; + uint64_t offset, size; + + for (i = 0; i != nb_maps; i++) { + + /* check if map directory exists */ + snprintf(dirname, sizeof(dirname), + "%s/maps/map%zu", devname, i); + + RTE_LOG(DEBUG, EAL, "Scanning maps in %s\n", (char *)dirname); + + if (access(dirname, F_OK) != 0) + break; + + /* get mapping offset */ + snprintf(filename, sizeof(filename), + "%s/offset", dirname); + if (vmbus_parse_sysfs_value(filename, &offset) < 0) { + RTE_LOG(ERR, EAL, + "%s(): cannot parse offset of %s\n", + __func__, dirname); + return -1; + } + + /* get mapping size */ + snprintf(filename, sizeof(filename), + "%s/size", dirname); + if (vmbus_parse_sysfs_value(filename, &size) < 0) { + RTE_LOG(ERR, EAL, + "%s(): cannot parse size of %s\n", + __func__, dirname); + return -1; + } + + /* get mapping physical address */ + snprintf(filename, sizeof(filename), + "%s/addr", dirname); + if (vmbus_parse_sysfs_value(filename, &maps[i].phaddr) < 0) { + RTE_LOG(ERR, EAL, + "%s(): cannot parse addr of %s\n", + __func__, dirname); + return -1; + } + + if ((offset > OFF_MAX) || (size > SIZE_MAX)) { + RTE_LOG(ERR, EAL, + "%s(): offset/size exceed system max value\n", + __func__); + return -1; + } + + maps[i].offset = offset; + maps[i].size = size; + } + return i; +} + +/* maximum time to wait that /dev/uioX appears */ +#define UIO_DEV_WAIT_TIMEOUT 3 /* seconds */ + +/* map a particular resource from a file */ +static void * +vmbus_map_resource(struct rte_vmbus_device *dev, void *requested_addr, + const char *devname, off_t offset, size_t size) +{ + int fd; + void *mapaddr; + + if (dev->uio_fd <= 0) + fd = open(devname, O_RDWR); + else + fd = dev->uio_fd; + + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto fail; + } + + dev->uio_fd = fd; + /* Map the memory resource of device */ + mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, offset); + if (mapaddr == MAP_FAILED || + (requested_addr != NULL && mapaddr != requested_addr)) { + RTE_LOG(ERR, EAL, + "%s(): cannot mmap(%s(%d), %p, 0x%lx, 0x%lx):" + " %s (%p)\n", __func__, devname, fd, requested_addr, + (unsigned long)size, (unsigned long)offset, + strerror(errno), mapaddr); + close(fd); + goto fail; + } + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + close(fd); + + RTE_LOG(DEBUG, EAL, " VMBUS memory mapped at %p\n", mapaddr); + + return mapaddr; + +fail: + return NULL; +} + +/* map the resources of a vmbus device in virtual memory */ +int +vmbus_uio_map_resource(struct rte_vmbus_device *dev) +{ + int i; + struct dirent *e; + DIR *dir; + char dirname[PATH_MAX]; + char dirname2[PATH_MAX]; + char devname[PATH_MAX]; /* contains the /dev/uioX */ + void *mapaddr; + unsigned uio_num; + uint64_t phaddr; + uint64_t offset; + uint64_t pagesz; + ssize_t nb_maps; + struct rte_vmbus_id *loc = &dev->id; + struct uio_resource *uio_res; + struct uio_map *maps; + + /* depending on kernel version, uio can be located in uio/uioX + * or uio:uioX */ + snprintf(dirname, sizeof(dirname), + "/sys/bus/vmbus/devices/" VMBUS_PRI_FMT "/uio", loc->sysfs_num); + + dir = opendir(dirname); + if (dir == NULL) { + /* retry with the parent directory */ + snprintf(dirname, sizeof(dirname), + "/sys/bus/vmbus/devices/" VMBUS_PRI_FMT, loc->sysfs_num); + dir = opendir(dirname); + + if (dir == NULL) { + RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname); + return -1; + } + } + + /* take the first file starting with "uio" */ + while ((e = readdir(dir)) != NULL) { + /* format could be uio%d ...*/ + int shortprefix_len = sizeof("uio") - 1; + /* ... or uio:uio%d */ + int longprefix_len = sizeof("uio:uio") - 1; + char *endptr; + + if (strncmp(e->d_name, "uio", 3) != 0) + continue; + + /* first try uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10); + if (errno == 0 && endptr != e->d_name) { + snprintf(dirname2, sizeof(dirname2), + "%s/uio%u", dirname, uio_num); + break; + } + + /* then try uio:uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10); + if (errno == 0 && endptr != e->d_name) { + snprintf(dirname2, sizeof(dirname2), + "%s/uio:uio%u", dirname, uio_num); + break; + } + } + closedir(dir); + + /* No uio resource found */ + if (e == NULL) { + RTE_LOG(WARNING, EAL, + VMBUS_PRI_FMT" not managed by UIO driver, skipping\n", + loc->sysfs_num); + return -1; + } + + /* allocate the mapping details for secondary processes*/ + uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0); + if (uio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + return -1; + } + + snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); + snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname); + memcpy(&uio_res->vmbus_addr, &dev->id, sizeof(uio_res->vmbus_addr)); + + /* collect info about device mappings */ + nb_maps = vmbus_uio_get_mappings(dirname2, uio_res->maps, + sizeof(uio_res->maps) / sizeof(uio_res->maps[0])); + if (nb_maps < 0) + return nb_maps; + + RTE_LOG(DEBUG, EAL, "Found %d memory maps for device "VMBUS_PRI_FMT"\n", + (int)nb_maps, loc->sysfs_num); + + uio_res->nb_maps = nb_maps; + + pagesz = sysconf(_SC_PAGESIZE); + + maps = uio_res->maps; + for (i = 0; i != VMBUS_MAX_RESOURCE; i++) { + phaddr = maps[i].phaddr; + if (phaddr == 0) + continue; + + RTE_LOG(DEBUG, EAL, " mem_map%d: addr=0x%lx len = %lu\n", + i, + maps[i].phaddr, + maps[i].size); + + if (i != nb_maps) { + offset = i * pagesz; + mapaddr = vmbus_map_resource(dev, NULL, devname, (off_t)offset, + (size_t)maps[i].size); + if (mapaddr == NULL) + return -1; + + /* Important: offset for mapping can be non-zero, pad the addr */ + mapaddr = ((char *)mapaddr + maps[i].offset); + maps[i].addr = mapaddr; + maps[i].offset = offset; + dev->mem_resource[i].addr = mapaddr; + dev->mem_resource[i].phys_addr = phaddr; + dev->mem_resource[i].len = maps[i].size; + } + } + + return 0; +} + +/* Compare two VMBUS device addresses. */ +static int +vmbus_compare(struct rte_vmbus_id *id, struct rte_vmbus_id *id2) +{ + return id->device_id > id2->device_id; +} + +/* Scan one vmbus sysfs entry, and fill the devices list from it. */ +static int +vmbus_scan_one(const char *name) +{ + char filename[PATH_MAX]; + char buf[BUFSIZ]; + char dirname[PATH_MAX]; + unsigned long tmp; + unsigned int sysfs_num; + struct rte_vmbus_device *dev; + FILE *f; + + dev = rte_zmalloc("vmbus_device", sizeof(*dev), 0); + if (dev == NULL) + return -1; + + snprintf(dirname, sizeof(dirname), "%s/%s", + SYSFS_VMBUS_DEVICES, name); + + /* parse directory name in sysfs. this does not always reflect + * the device id read below. + */ + if (sscanf(name, VMBUS_PRI_FMT, &sysfs_num) != 1) { + RTE_LOG(ERR, EAL, "Unable to parse vmbus sysfs name\n"); + rte_free(dev); + return -1; + } + dev->id.sysfs_num = sysfs_num; + + /* get device id */ + snprintf(filename, sizeof(filename), "%s/id", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + rte_free(dev); + return -1; + } + dev->id.device_id = (uint16_t)tmp; + + /* get monitor id */ + snprintf(filename, sizeof(filename), "%s/monitor_id", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + rte_free(dev); + return -1; + } + dev->vmbus_monitor_id = tmp; + + /* compare class_id of device with {f8615163-df3e-46c5-913ff2d2f965ed0e} */ + snprintf(filename, sizeof(filename), "%s/class_id", dirname); + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n", + __func__, filename); + rte_free(dev); + return -1; + } + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n", + __func__, filename); + fclose(f); + rte_free(dev); + return -1; + } + fclose(f); + + if (strncmp(buf, VMBUS_NETWORK_DEVICE, strlen(VMBUS_NETWORK_DEVICE))) { + RTE_LOG(DEBUG, EAL, "%s(): skip vmbus_0_%u with class_id = %s", + __func__, dev->id.sysfs_num, buf); + rte_free(dev); + return 0; + } + + /* device is valid, add in list (sorted) */ + RTE_LOG(DEBUG, EAL, "Adding vmbus device %d\n", dev->id.device_id); + if (!TAILQ_EMPTY(&vmbus_device_list)) { + struct rte_vmbus_device *dev2 = NULL; + + TAILQ_FOREACH(dev2, &vmbus_device_list, next) { + if (vmbus_compare(&dev->id, &dev2->id)) + continue; + + TAILQ_INSERT_BEFORE(dev2, dev, next); + return 0; + } + } + + TAILQ_INSERT_TAIL(&vmbus_device_list, dev, next); + + return 0; +} + +static int +check_vmbus_device(const char *buf, int bufsize) +{ + char *n, *buf_copy, *endp; + unsigned long err; + + /* the format is 'vmbus_0_%d' */ + n = strrchr(buf, '_'); + if (n == NULL) + return -1; + n++; + buf_copy = strndup(n, bufsize); + if (buf_copy == NULL) { + RTE_LOG(ERR, EAL, "%s(): failed to strndup: %s\n", + __func__, strerror(errno)); + return -1; + } + + err = strtoul(buf_copy, &endp, 10); + free(buf_copy); + + if (*endp != '\0' || (err == ULONG_MAX && errno == ERANGE)) { + RTE_LOG(ERR, EAL, "%s(): can't parse devid: %s\n", + __func__, strerror(errno)); + return -1; + } + + return 0; +} + +/* + * Scan the content of the vmbus, and the devices in the devices list + */ +static int +vmbus_scan(void) +{ + struct dirent *e; + DIR *dir; + + dir = opendir(SYSFS_VMBUS_DEVICES); + if (dir == NULL) { + if (errno == ENOENT) + return 0; + + RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", + __func__, strerror(errno)); + return -1; + } + + while ((e = readdir(dir)) != NULL) { + if (e->d_name[0] == '.') + continue; + + if (check_vmbus_device(e->d_name, sizeof(e->d_name))) + continue; + + if (vmbus_scan_one(e->d_name) < 0) + goto error; + } + closedir(dir); + return 0; + + error: + closedir(dir); + return -1; +} + +/* Init the VMBUS EAL subsystem */ +int rte_eal_vmbus_init(void) +{ + /* VMBUS can be disabled */ + if (internal_config.no_vmbus) + return 0; + + if (vmbus_scan() < 0) { + RTE_LOG(ERR, EAL, "%s(): Cannot scan vmbus\n", __func__); + return -1; + } + return 0; +} + +/* Below is PROBE part of eal_vmbus library */ + +/* + * If device ID match, call the devinit() function of the driver. + */ +int +rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr, + struct rte_vmbus_device *dev) +{ + const struct rte_vmbus_id *id_table; + + for (id_table = dr->id_table; id_table->device_id != VMBUS_ID_ANY; id_table++) { + const struct rte_vmbus_id *loc = &dev->id; + + RTE_LOG(DEBUG, EAL, "VMBUS device "VMBUS_PRI_FMT"\n", + loc->sysfs_num); + RTE_LOG(DEBUG, EAL, " probe driver: %s\n", dr->name); + + /* no initialization when blacklisted, return without error */ + if (dev->blacklisted) { + RTE_LOG(DEBUG, EAL, " Device is blacklisted, not initializing\n"); + return 0; + } + + /* map the resources */ + if (vmbus_uio_map_resource(dev) < 0) + return -1; + + /* reference driver structure */ + dev->driver = dr; + + /* call the driver devinit() function */ + return dr->devinit(dr, dev); + } + + /* return positive value if driver is not found */ + return 1; +} + +/* + * call the devinit() function of all + * registered drivers for the vmbus device. Return -1 if no driver is + * found for this class of vmbus device. + * The present assumption is that we have drivers only for vmbus network + * devices. That's why we don't check driver's id_table now. + */ +static int +vmbus_probe_all_drivers(struct rte_vmbus_device *dev) +{ + struct rte_vmbus_driver *dr = NULL; + int ret; + + TAILQ_FOREACH(dr, &vmbus_driver_list, next) { + ret = rte_eal_vmbus_probe_one_driver(dr, dev); + if (ret < 0) { + /* negative value is an error */ + RTE_LOG(ERR, EAL, "Failed to probe driver %s\n", dr->name); + break; + } + if (ret > 0) { + /* positive value means driver not found */ + RTE_LOG(DEBUG, EAL, "Driver %s not found", dr->name); + continue; + } + + RTE_LOG(DEBUG, EAL, "OK. Driver was found and probed.\n"); + return 0; + } + return -1; +} + + +/* + * Scan the vmbus, and call the devinit() function for + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + */ +int +rte_eal_vmbus_probe(void) +{ + struct rte_vmbus_device *dev = NULL; + + TAILQ_FOREACH(dev, &vmbus_device_list, next) { + RTE_LOG(DEBUG, EAL, "Probing driver for device %d ...\n", + dev->id.device_id); + vmbus_probe_all_drivers(dev); + } + return 0; +} + +/* register vmbus driver */ +void +rte_eal_vmbus_register(struct rte_vmbus_driver *driver) +{ + TAILQ_INSERT_TAIL(&vmbus_driver_list, driver, next); +} + +/* unregister vmbus driver */ +void +rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver) +{ + TAILQ_REMOVE(&vmbus_driver_list, driver, next); +} + diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 9577d17..9093966 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -379,6 +379,98 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev) return 0; } +#ifdef RTE_LIBRTE_HV_PMD +static int +rte_vmbus_dev_init(struct rte_vmbus_driver *vmbus_drv, + struct rte_vmbus_device *vmbus_dev) +{ + struct eth_driver *eth_drv = (struct eth_driver *)vmbus_drv; + struct rte_eth_dev *eth_dev; + char ethdev_name[RTE_ETH_NAME_MAX_LEN]; + int diag; + + snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%u_%u", + vmbus_dev->id.device_id, vmbus_dev->id.sysfs_num); + + eth_dev = rte_eth_dev_allocate(ethdev_name, RTE_ETH_DEV_PCI); + if (eth_dev == NULL) + return -ENOMEM; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eth_dev->data->dev_private = rte_zmalloc("ethdev private structure", + eth_drv->dev_private_size, + RTE_CACHE_LINE_SIZE); + if (eth_dev->data->dev_private == NULL) + rte_panic("Cannot allocate memzone for private port data\n"); + } + eth_dev->vmbus_dev = vmbus_dev; + eth_dev->driver = eth_drv; + eth_dev->data->rx_mbuf_alloc_failed = 0; + + /* init user callbacks */ + TAILQ_INIT(&(eth_dev->link_intr_cbs)); + + /* + * Set the default maximum frame size. + */ + eth_dev->data->mtu = ETHER_MTU; + + /* Invoke PMD device initialization function */ + diag = (*eth_drv->eth_dev_init)(eth_dev); + if (diag == 0) + return 0; + + PMD_DEBUG_TRACE("driver %s: eth_dev_init(device_id=0x%x)" + " failed\n", vmbus_drv->name, + (unsigned) vmbus_dev->id.device_id); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(eth_dev->data->dev_private); + nb_ports--; + return diag; +} + + +static int +rte_vmbus_dev_uninit(struct rte_vmbus_device *vmbus_dev) +{ + const struct eth_driver *eth_drv; + struct rte_eth_dev *eth_dev; + char ethdev_name[RTE_ETH_NAME_MAX_LEN]; + int ret; + + if (vmbus_dev == NULL) + return -EINVAL; + + snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%u_%u", + vmbus_dev->id.device_id, vmbus_dev->id.sysfs_num); + + eth_dev = rte_eth_dev_allocated(ethdev_name); + if (eth_dev == NULL) + return -ENODEV; + + eth_drv = (const struct eth_driver *)vmbus_dev->driver; + + /* Invoke PMD device uninit function */ + if (*eth_drv->eth_dev_uninit) { + ret = (*eth_drv->eth_dev_uninit)(eth_dev); + if (ret) + return ret; + } + + /* free ether device */ + rte_eth_dev_release_port(eth_dev); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(eth_dev->data->dev_private); + + eth_dev->pci_dev = NULL; + eth_dev->driver = NULL; + eth_dev->data = NULL; + + return 0; +} +#endif + /** * Register an Ethernet [Poll Mode] driver. * @@ -396,9 +488,22 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev) void rte_eth_driver_register(struct eth_driver *eth_drv) { - eth_drv->pci_drv.devinit = rte_eth_dev_init; - eth_drv->pci_drv.devuninit = rte_eth_dev_uninit; - rte_eal_pci_register(ð_drv->pci_drv); + switch (eth_drv->bus_type) { + case RTE_BUS_PCI: + eth_drv->pci_drv.devinit = rte_eth_dev_init; + eth_drv->pci_drv.devuninit = rte_eth_dev_uninit; + rte_eal_pci_register(ð_drv->pci_drv); + break; +#ifdef RTE_LIBRTE_HV_PMD + case RTE_BUS_VMBUS: + eth_drv->vmbus_drv.devinit = rte_vmbus_dev_init; + eth_drv->vmbus_drv.devuninit = rte_vmbus_dev_uninit; + rte_eal_vmbus_register(ð_drv->vmbus_drv); + break; +#endif + default: + rte_panic("unknown bus type %u\n", eth_drv->bus_type); + } } static int @@ -1351,6 +1456,9 @@ rte_eth_has_link_state(uint8_t port_id) } dev = &rte_eth_devices[port_id]; + if (dev->driver->bus_type != RTE_BUS_PCI) + return 0; + return (dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) != 0; } @@ -1901,9 +2009,17 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info) FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); (*dev->dev_ops->dev_infos_get)(dev, dev_info); - dev_info->pci_dev = dev->pci_dev; - if (dev->driver) - dev_info->driver_name = dev->driver->pci_drv.name; + + if (dev->driver) { + switch (dev->driver->bus_type) { + case RTE_BUS_PCI: + dev_info->driver_name = dev->driver->pci_drv.name; + dev_info->pci_dev = dev->pci_dev; + break; + case RTE_BUS_VMBUS: + dev_info->driver_name = dev->driver->vmbus_drv.name; + } + } } void diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 991023b..9e08f3e 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -178,6 +178,7 @@ extern "C" { #include #include #include +#include #include #include #include @@ -1477,7 +1478,10 @@ struct rte_eth_dev { struct rte_eth_dev_data *data; /**< Pointer to device data */ const struct eth_driver *driver;/**< Driver for this device */ const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ - struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */ + union { + struct rte_pci_device *pci_dev; /**< PCI info. supplied by probig */ + struct rte_vmbus_device *vmbus_dev; /**< VMBUS info. supplied by probing */ + }; /** User application callbacks for NIC interrupts */ struct rte_eth_dev_cb_list link_intr_cbs; /** @@ -1696,7 +1700,14 @@ typedef int (*eth_dev_uninit_t)(struct rte_eth_dev *eth_dev); * - The size of the private data to allocate for each matching device. */ struct eth_driver { - struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */ + union { + struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */ + struct rte_vmbus_driver vmbus_drv;/**< The PMD is also a VMBUS drv. */ + }; + enum { + RTE_BUS_PCI=0, + RTE_BUS_VMBUS + } bus_type; /**< Device bus type. */ eth_dev_init_t eth_dev_init; /**< Device init function. */ eth_dev_uninit_t eth_dev_uninit; /**< Device uninit function. */ unsigned int dev_private_size; /**< Size of device private data. */ -- 2.1.4