From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id D8DC51D8A for ; Tue, 28 Aug 2018 09:54:31 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga105.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 28 Aug 2018 00:54:30 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.53,298,1531810800"; d="scan'208";a="76132199" Received: from btwcube1.sh.intel.com ([10.67.104.194]) by FMSMGA003.fm.intel.com with ESMTP; 28 Aug 2018 00:54:29 -0700 From: Tiwei Bie To: maxime.coquelin@redhat.com, zhihong.wang@intel.com, anatoly.burakov@intel.com, dev@dpdk.org Date: Tue, 28 Aug 2018 15:53:27 +0800 Message-Id: <20180828075327.18842-1-tiwei.bie@intel.com> X-Mailer: git-send-email 2.18.0 Subject: [dpdk-dev] [RFC] net/virtio-user: avoid parsing process mappings X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 28 Aug 2018 07:54:32 -0000 Recently some memory APIs were introduced to allow users to get the file descriptors for each memory segments. We can leverage those APIs to get rid of the /proc magic on memory table preparation in vhost-user backend. Signed-off-by: Tiwei Bie --- This patch depends on below patch set: https://patches.dpdk.org/project/dpdk/list/?series=1040 drivers/net/virtio/virtio_user/vhost_user.c | 214 ++++++++---------- .../net/virtio/virtio_user/virtio_user_dev.c | 19 ++ 2 files changed, 112 insertions(+), 121 deletions(-) diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c index ef6e43df8..770cfdc9a 100644 --- a/drivers/net/virtio/virtio_user/vhost_user.c +++ b/drivers/net/virtio/virtio_user/vhost_user.c @@ -11,6 +11,9 @@ #include #include +#include +#include + #include "vhost.h" #include "virtio_user_dev.h" @@ -121,133 +124,106 @@ vhost_user_read(int fd, struct vhost_user_msg *msg) return -1; } -struct hugepage_file_info { - uint64_t addr; /**< virtual addr */ - size_t size; /**< the file size */ - char path[PATH_MAX]; /**< path to backing file */ +struct walk_arg { + struct vhost_memory *vm; + int *fds; + int region_nr; }; -/* Two possible options: - * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file - * array. This is simple but cannot be used in secondary process because - * secondary process will close and munmap that file. - * 2. Match HUGEFILE_FMT to find hugepage files directly. - * - * We choose option 2. - */ static int -get_hugepage_file_info(struct hugepage_file_info huges[], int max) +update_memory_region(const struct rte_memseg_list *msl __rte_unused, + const struct rte_memseg *ms, void *arg) { - int idx, k, exist; - FILE *f; - char buf[BUFSIZ], *tmp, *tail; - char *str_underline, *str_start; - int huge_index; - uint64_t v_start, v_end; - struct stat stats; - - f = fopen("/proc/self/maps", "r"); - if (!f) { - PMD_DRV_LOG(ERR, "cannot open /proc/self/maps"); - return -1; - } - - idx = 0; - while (fgets(buf, sizeof(buf), f) != NULL) { - if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) { - PMD_DRV_LOG(ERR, "Failed to parse address"); - goto error; - } - - tmp = strchr(buf, ' ') + 1; /** skip address */ - tmp = strchr(tmp, ' ') + 1; /** skip perm */ - tmp = strchr(tmp, ' ') + 1; /** skip offset */ - tmp = strchr(tmp, ' ') + 1; /** skip dev */ - tmp = strchr(tmp, ' ') + 1; /** skip inode */ - while (*tmp == ' ') /** skip spaces */ - tmp++; - tail = strrchr(tmp, '\n'); /** remove newline if exists */ - if (tail) - *tail = '\0'; - - /* Match HUGEFILE_FMT, aka "%s/%smap_%d", - * which is defined in eal_filesystem.h - */ - str_underline = strrchr(tmp, '_'); - if (!str_underline) - continue; - - str_start = str_underline - strlen("map"); - if (str_start < tmp) - continue; - - if (sscanf(str_start, "map_%d", &huge_index) != 1) - continue; - - /* skip duplicated file which is mapped to different regions */ - for (k = 0, exist = -1; k < idx; ++k) { - if (!strcmp(huges[k].path, tmp)) { - exist = k; - break; - } - } - if (exist >= 0) - continue; - - if (idx >= max) { - PMD_DRV_LOG(ERR, "Exceed maximum of %d", max); - goto error; - } - - huges[idx].addr = v_start; - huges[idx].size = v_end - v_start; /* To be corrected later */ - snprintf(huges[idx].path, PATH_MAX, "%s", tmp); - idx++; - } - - /* correct the size for files who have many regions */ - for (k = 0; k < idx; ++k) { - if (stat(huges[k].path, &stats) < 0) { - PMD_DRV_LOG(ERR, "Failed to stat %s, %s\n", - huges[k].path, strerror(errno)); - continue; - } - huges[k].size = stats.st_size; - PMD_DRV_LOG(INFO, "file %s, size %zx\n", - huges[k].path, huges[k].size); - } - - fclose(f); - return idx; - -error: - fclose(f); - return -1; -} - -static int -prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) -{ - int i, num; - struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS]; + struct walk_arg *wa = arg; struct vhost_memory_region *mr; + uint64_t start_addr, end_addr, offset; + int i, fd; - num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS); - if (num < 0) { - PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user"); + fd = rte_memseg_get_fd_thread_unsafe(ms); + if (fd < 0) { + PMD_DRV_LOG(ERR, "Failed to get fd, ms=%p", ms); return -1; } - for (i = 0; i < num; ++i) { - mr = &msg->payload.memory.regions[i]; - mr->guest_phys_addr = huges[i].addr; /* use vaddr! */ - mr->userspace_addr = huges[i].addr; - mr->memory_size = huges[i].size; - mr->mmap_offset = 0; - fds[i] = open(huges[i].path, O_RDWR); + start_addr = (uint64_t)(uintptr_t)ms->addr; + end_addr = start_addr + ms->len; + + /* + * XXX workaround! + * + * For --single-file-segments case, offset should be: + * offset = rte_fbarray_find_idx(&msl->memseg_arr, ms) * msl->page_sz; + * + * But it's not true for non-single-file-segments case. + * + * This is a temporary workaround which assumes the file will + * also be mapped from the beginning in --single-file-segments + * case. It should be replaced when we have a memory API to + * get the offset. + */ + offset = 0; + + for (i = 0; i < wa->region_nr; i++) { + if (wa->fds[i] != fd) + continue; + + mr = &wa->vm->regions[i]; + + if (mr->userspace_addr + mr->memory_size < end_addr) + mr->memory_size = end_addr - mr->userspace_addr; + + if (mr->userspace_addr > start_addr) { + mr->userspace_addr = start_addr; + mr->guest_phys_addr = start_addr; + } + + if (mr->mmap_offset > offset) + mr->mmap_offset = offset; + + PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64 + " addr=0x%" PRIx64 " len=%" PRIu64, i, fd, + mr->mmap_offset, mr->userspace_addr, + mr->memory_size); + + return 0; + } + + if (i >= VHOST_MEMORY_MAX_NREGIONS) { + PMD_DRV_LOG(ERR, "Too many memory regions"); + return -1; } - msg->payload.memory.nregions = num; + mr = &wa->vm->regions[i]; + wa->fds[i] = fd; + + mr->guest_phys_addr = start_addr; + mr->userspace_addr = start_addr; + mr->memory_size = ms->len; + mr->mmap_offset = offset; + + PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64 + " addr=0x%" PRIx64 " len=%" PRIu64, i, fd, + mr->mmap_offset, mr->userspace_addr, + mr->memory_size); + + wa->region_nr++; + + return 0; +} + +static int +prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) +{ + struct walk_arg wa; + + wa.region_nr = 0; + wa.vm = &msg->payload.memory; + wa.fds = fds; + + if (rte_memseg_walk_thread_unsafe(update_memory_region, &wa) < 0) + return -1; + + msg->payload.memory.nregions = wa.region_nr; msg->payload.memory.padding = 0; return 0; @@ -280,7 +256,7 @@ vhost_user_sock(struct virtio_user_dev *dev, int need_reply = 0; int fds[VHOST_MEMORY_MAX_NREGIONS]; int fd_num = 0; - int i, len; + int len; int vhostfd = dev->vhostfd; RTE_SET_USED(m); @@ -364,10 +340,6 @@ vhost_user_sock(struct virtio_user_dev *dev, return -1; } - if (req == VHOST_USER_SET_MEM_TABLE) - for (i = 0; i < fd_num; ++i) - close(fds[i]); - if (need_reply) { if (vhost_user_read(vhostfd, &msg) < 0) { PMD_DRV_LOG(ERR, "Received msg failed: %s", diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 7df600b02..869e96f87 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -13,6 +13,8 @@ #include #include +#include + #include "vhost.h" #include "virtio_user_dev.h" #include "../virtio_ethdev.h" @@ -109,9 +111,24 @@ is_vhost_user_by_type(const char *path) int virtio_user_start_device(struct virtio_user_dev *dev) { + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; uint64_t features; int ret; + /* + * XXX workaround! + * + * We need to make sure that the locks will be + * taken in the correct order to avoid deadlocks. + * + * Before releasing this lock, this thread should + * not trigger any memory hotplug events. + * + * This is a temporary workaround, and should be + * replaced when we get proper supports from the + * memory subsystem in the future. + */ + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); pthread_mutex_lock(&dev->mutex); if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0) @@ -152,10 +169,12 @@ virtio_user_start_device(struct virtio_user_dev *dev) dev->started = true; pthread_mutex_unlock(&dev->mutex); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); return 0; error: pthread_mutex_unlock(&dev->mutex); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); /* TODO: free resource here or caller to check */ return -1; } -- 2.18.0