DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [RFC] net/virtio-user: avoid parsing process mappings
@ 2018-08-28  7:53 Tiwei Bie
  2018-08-28  8:12 ` Burakov, Anatoly
  0 siblings, 1 reply; 4+ messages in thread
From: Tiwei Bie @ 2018-08-28  7:53 UTC (permalink / raw)
  To: maxime.coquelin, zhihong.wang, anatoly.burakov, dev

Recently some memory APIs were introduced to allow users
to get the file descriptors for each memory segments. We
can leverage those APIs to get rid of the /proc magic on
memory table preparation in vhost-user backend.

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
This patch depends on below patch set:
https://patches.dpdk.org/project/dpdk/list/?series=1040

 drivers/net/virtio/virtio_user/vhost_user.c   | 214 ++++++++----------
 .../net/virtio/virtio_user/virtio_user_dev.c  |  19 ++
 2 files changed, 112 insertions(+), 121 deletions(-)

diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
index ef6e43df8..770cfdc9a 100644
--- a/drivers/net/virtio/virtio_user/vhost_user.c
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -11,6 +11,9 @@
 #include <string.h>
 #include <errno.h>
 
+#include <rte_fbarray.h>
+#include <rte_eal_memconfig.h>
+
 #include "vhost.h"
 #include "virtio_user_dev.h"
 
@@ -121,133 +124,106 @@ vhost_user_read(int fd, struct vhost_user_msg *msg)
 	return -1;
 }
 
-struct hugepage_file_info {
-	uint64_t addr;            /**< virtual addr */
-	size_t   size;            /**< the file size */
-	char     path[PATH_MAX];  /**< path to backing file */
+struct walk_arg {
+	struct vhost_memory *vm;
+	int *fds;
+	int region_nr;
 };
 
-/* Two possible options:
- * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
- * array. This is simple but cannot be used in secondary process because
- * secondary process will close and munmap that file.
- * 2. Match HUGEFILE_FMT to find hugepage files directly.
- *
- * We choose option 2.
- */
 static int
-get_hugepage_file_info(struct hugepage_file_info huges[], int max)
+update_memory_region(const struct rte_memseg_list *msl __rte_unused,
+		const struct rte_memseg *ms, void *arg)
 {
-	int idx, k, exist;
-	FILE *f;
-	char buf[BUFSIZ], *tmp, *tail;
-	char *str_underline, *str_start;
-	int huge_index;
-	uint64_t v_start, v_end;
-	struct stat stats;
-
-	f = fopen("/proc/self/maps", "r");
-	if (!f) {
-		PMD_DRV_LOG(ERR, "cannot open /proc/self/maps");
-		return -1;
-	}
-
-	idx = 0;
-	while (fgets(buf, sizeof(buf), f) != NULL) {
-		if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) {
-			PMD_DRV_LOG(ERR, "Failed to parse address");
-			goto error;
-		}
-
-		tmp = strchr(buf, ' ') + 1; /** skip address */
-		tmp = strchr(tmp, ' ') + 1; /** skip perm */
-		tmp = strchr(tmp, ' ') + 1; /** skip offset */
-		tmp = strchr(tmp, ' ') + 1; /** skip dev */
-		tmp = strchr(tmp, ' ') + 1; /** skip inode */
-		while (*tmp == ' ')         /** skip spaces */
-			tmp++;
-		tail = strrchr(tmp, '\n');  /** remove newline if exists */
-		if (tail)
-			*tail = '\0';
-
-		/* Match HUGEFILE_FMT, aka "%s/%smap_%d",
-		 * which is defined in eal_filesystem.h
-		 */
-		str_underline = strrchr(tmp, '_');
-		if (!str_underline)
-			continue;
-
-		str_start = str_underline - strlen("map");
-		if (str_start < tmp)
-			continue;
-
-		if (sscanf(str_start, "map_%d", &huge_index) != 1)
-			continue;
-
-		/* skip duplicated file which is mapped to different regions */
-		for (k = 0, exist = -1; k < idx; ++k) {
-			if (!strcmp(huges[k].path, tmp)) {
-				exist = k;
-				break;
-			}
-		}
-		if (exist >= 0)
-			continue;
-
-		if (idx >= max) {
-			PMD_DRV_LOG(ERR, "Exceed maximum of %d", max);
-			goto error;
-		}
-
-		huges[idx].addr = v_start;
-		huges[idx].size = v_end - v_start; /* To be corrected later */
-		snprintf(huges[idx].path, PATH_MAX, "%s", tmp);
-		idx++;
-	}
-
-	/* correct the size for files who have many regions */
-	for (k = 0; k < idx; ++k) {
-		if (stat(huges[k].path, &stats) < 0) {
-			PMD_DRV_LOG(ERR, "Failed to stat %s, %s\n",
-				    huges[k].path, strerror(errno));
-			continue;
-		}
-		huges[k].size = stats.st_size;
-		PMD_DRV_LOG(INFO, "file %s, size %zx\n",
-			    huges[k].path, huges[k].size);
-	}
-
-	fclose(f);
-	return idx;
-
-error:
-	fclose(f);
-	return -1;
-}
-
-static int
-prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
-{
-	int i, num;
-	struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS];
+	struct walk_arg *wa = arg;
 	struct vhost_memory_region *mr;
+	uint64_t start_addr, end_addr, offset;
+	int i, fd;
 
-	num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS);
-	if (num < 0) {
-		PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user");
+	fd = rte_memseg_get_fd_thread_unsafe(ms);
+	if (fd < 0) {
+		PMD_DRV_LOG(ERR, "Failed to get fd, ms=%p", ms);
 		return -1;
 	}
 
-	for (i = 0; i < num; ++i) {
-		mr = &msg->payload.memory.regions[i];
-		mr->guest_phys_addr = huges[i].addr; /* use vaddr! */
-		mr->userspace_addr = huges[i].addr;
-		mr->memory_size = huges[i].size;
-		mr->mmap_offset = 0;
-		fds[i] = open(huges[i].path, O_RDWR);
+	start_addr = (uint64_t)(uintptr_t)ms->addr;
+	end_addr = start_addr + ms->len;
+
+	/*
+	 * XXX workaround!
+	 *
+	 * For --single-file-segments case, offset should be:
+	 * offset = rte_fbarray_find_idx(&msl->memseg_arr, ms) * msl->page_sz;
+	 *
+	 * But it's not true for non-single-file-segments case.
+	 *
+	 * This is a temporary workaround which assumes the file will
+	 * also be mapped from the beginning in --single-file-segments
+	 * case. It should be replaced when we have a memory API to
+	 * get the offset.
+	 */
+	offset = 0;
+
+	for (i = 0; i < wa->region_nr; i++) {
+		if (wa->fds[i] != fd)
+			continue;
+
+		mr = &wa->vm->regions[i];
+
+		if (mr->userspace_addr + mr->memory_size < end_addr)
+			mr->memory_size = end_addr - mr->userspace_addr;
+
+		if (mr->userspace_addr > start_addr) {
+			mr->userspace_addr = start_addr;
+			mr->guest_phys_addr = start_addr;
+		}
+
+		if (mr->mmap_offset > offset)
+			mr->mmap_offset = offset;
+
+		PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
+			" addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
+			mr->mmap_offset, mr->userspace_addr,
+			mr->memory_size);
+
+		return 0;
+	}
+
+	if (i >= VHOST_MEMORY_MAX_NREGIONS) {
+		PMD_DRV_LOG(ERR, "Too many memory regions");
+		return -1;
 	}
 
-	msg->payload.memory.nregions = num;
+	mr = &wa->vm->regions[i];
+	wa->fds[i] = fd;
+
+	mr->guest_phys_addr = start_addr;
+	mr->userspace_addr = start_addr;
+	mr->memory_size = ms->len;
+	mr->mmap_offset = offset;
+
+	PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
+		" addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
+		mr->mmap_offset, mr->userspace_addr,
+		mr->memory_size);
+
+	wa->region_nr++;
+
+	return 0;
+}
+
+static int
+prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
+{
+	struct walk_arg wa;
+
+	wa.region_nr = 0;
+	wa.vm = &msg->payload.memory;
+	wa.fds = fds;
+
+	if (rte_memseg_walk_thread_unsafe(update_memory_region, &wa) < 0)
+		return -1;
+
+	msg->payload.memory.nregions = wa.region_nr;
 	msg->payload.memory.padding = 0;
 
 	return 0;
@@ -280,7 +256,7 @@ vhost_user_sock(struct virtio_user_dev *dev,
 	int need_reply = 0;
 	int fds[VHOST_MEMORY_MAX_NREGIONS];
 	int fd_num = 0;
-	int i, len;
+	int len;
 	int vhostfd = dev->vhostfd;
 
 	RTE_SET_USED(m);
@@ -364,10 +340,6 @@ vhost_user_sock(struct virtio_user_dev *dev,
 		return -1;
 	}
 
-	if (req == VHOST_USER_SET_MEM_TABLE)
-		for (i = 0; i < fd_num; ++i)
-			close(fds[i]);
-
 	if (need_reply) {
 		if (vhost_user_read(vhostfd, &msg) < 0) {
 			PMD_DRV_LOG(ERR, "Received msg failed: %s",
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index 7df600b02..869e96f87 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -13,6 +13,8 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <rte_eal_memconfig.h>
+
 #include "vhost.h"
 #include "virtio_user_dev.h"
 #include "../virtio_ethdev.h"
@@ -109,9 +111,24 @@ is_vhost_user_by_type(const char *path)
 int
 virtio_user_start_device(struct virtio_user_dev *dev)
 {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	uint64_t features;
 	int ret;
 
+	/*
+	 * XXX workaround!
+	 *
+	 * We need to make sure that the locks will be
+	 * taken in the correct order to avoid deadlocks.
+	 *
+	 * Before releasing this lock, this thread should
+	 * not trigger any memory hotplug events.
+	 *
+	 * This is a temporary workaround, and should be
+	 * replaced when we get proper supports from the
+	 * memory subsystem in the future.
+	 */
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
 	pthread_mutex_lock(&dev->mutex);
 
 	if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0)
@@ -152,10 +169,12 @@ virtio_user_start_device(struct virtio_user_dev *dev)
 
 	dev->started = true;
 	pthread_mutex_unlock(&dev->mutex);
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 
 	return 0;
 error:
 	pthread_mutex_unlock(&dev->mutex);
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 	/* TODO: free resource here or caller to check */
 	return -1;
 }
-- 
2.18.0

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dpdk-dev] [RFC] net/virtio-user: avoid parsing process mappings
  2018-08-28  7:53 [dpdk-dev] [RFC] net/virtio-user: avoid parsing process mappings Tiwei Bie
@ 2018-08-28  8:12 ` Burakov, Anatoly
  2018-08-28  8:42   ` Tiwei Bie
  0 siblings, 1 reply; 4+ messages in thread
From: Burakov, Anatoly @ 2018-08-28  8:12 UTC (permalink / raw)
  To: Tiwei Bie, maxime.coquelin, zhihong.wang, dev

On 28-Aug-18 8:53 AM, Tiwei Bie wrote:
> Recently some memory APIs were introduced to allow users
> to get the file descriptors for each memory segments. We
> can leverage those APIs to get rid of the /proc magic on
> memory table preparation in vhost-user backend.
> 
> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> ---
> This patch depends on below patch set:
> https://patches.dpdk.org/project/dpdk/list/?series=1040

Great to see this done so quickly!

> 
>   drivers/net/virtio/virtio_user/vhost_user.c   | 214 ++++++++----------
>   .../net/virtio/virtio_user/virtio_user_dev.c  |  19 ++
>   2 files changed, 112 insertions(+), 121 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
> index ef6e43df8..770cfdc9a 100644
> --- a/drivers/net/virtio/virtio_user/vhost_user.c
> +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> @@ -11,6 +11,9 @@
>   #include <string.h>
>   #include <errno.h>
>   
> +#include <rte_fbarray.h>
> +#include <rte_eal_memconfig.h>
> +
>   #include "vhost.h"
>   #include "virtio_user_dev.h"
>   
> @@ -121,133 +124,106 @@ vhost_user_read(int fd, struct vhost_user_msg *msg)
>   	return -1;
>   }
>   
> -struct hugepage_file_info {
> -	uint64_t addr;            /**< virtual addr */
> -	size_t   size;            /**< the file size */
> -	char     path[PATH_MAX];  /**< path to backing file */
> +struct walk_arg {
> +	struct vhost_memory *vm;
> +	int *fds;
> +	int region_nr;
>   };
>   
> -/* Two possible options:
> - * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
> - * array. This is simple but cannot be used in secondary process because
> - * secondary process will close and munmap that file.
> - * 2. Match HUGEFILE_FMT to find hugepage files directly.
> - *
> - * We choose option 2.
> - */
>   static int
> -get_hugepage_file_info(struct hugepage_file_info huges[], int max)
> +update_memory_region(const struct rte_memseg_list *msl __rte_unused,
> +		const struct rte_memseg *ms, void *arg)
>   {
> -	int idx, k, exist;
> -	FILE *f;
> -	char buf[BUFSIZ], *tmp, *tail;
> -	char *str_underline, *str_start;
> -	int huge_index;
> -	uint64_t v_start, v_end;
> -	struct stat stats;
> -
> -	f = fopen("/proc/self/maps", "r");
> -	if (!f) {
> -		PMD_DRV_LOG(ERR, "cannot open /proc/self/maps");
> -		return -1;
> -	}
> -
> -	idx = 0;
> -	while (fgets(buf, sizeof(buf), f) != NULL) {
> -		if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) {
> -			PMD_DRV_LOG(ERR, "Failed to parse address");
> -			goto error;
> -		}
> -
> -		tmp = strchr(buf, ' ') + 1; /** skip address */
> -		tmp = strchr(tmp, ' ') + 1; /** skip perm */
> -		tmp = strchr(tmp, ' ') + 1; /** skip offset */
> -		tmp = strchr(tmp, ' ') + 1; /** skip dev */
> -		tmp = strchr(tmp, ' ') + 1; /** skip inode */
> -		while (*tmp == ' ')         /** skip spaces */
> -			tmp++;
> -		tail = strrchr(tmp, '\n');  /** remove newline if exists */
> -		if (tail)
> -			*tail = '\0';
> -
> -		/* Match HUGEFILE_FMT, aka "%s/%smap_%d",
> -		 * which is defined in eal_filesystem.h
> -		 */
> -		str_underline = strrchr(tmp, '_');
> -		if (!str_underline)
> -			continue;
> -
> -		str_start = str_underline - strlen("map");
> -		if (str_start < tmp)
> -			continue;
> -
> -		if (sscanf(str_start, "map_%d", &huge_index) != 1)
> -			continue;
> -
> -		/* skip duplicated file which is mapped to different regions */
> -		for (k = 0, exist = -1; k < idx; ++k) {
> -			if (!strcmp(huges[k].path, tmp)) {
> -				exist = k;
> -				break;
> -			}
> -		}
> -		if (exist >= 0)
> -			continue;
> -
> -		if (idx >= max) {
> -			PMD_DRV_LOG(ERR, "Exceed maximum of %d", max);
> -			goto error;
> -		}
> -
> -		huges[idx].addr = v_start;
> -		huges[idx].size = v_end - v_start; /* To be corrected later */
> -		snprintf(huges[idx].path, PATH_MAX, "%s", tmp);
> -		idx++;
> -	}
> -
> -	/* correct the size for files who have many regions */
> -	for (k = 0; k < idx; ++k) {
> -		if (stat(huges[k].path, &stats) < 0) {
> -			PMD_DRV_LOG(ERR, "Failed to stat %s, %s\n",
> -				    huges[k].path, strerror(errno));
> -			continue;
> -		}
> -		huges[k].size = stats.st_size;
> -		PMD_DRV_LOG(INFO, "file %s, size %zx\n",
> -			    huges[k].path, huges[k].size);
> -	}
> -
> -	fclose(f);
> -	return idx;
> -
> -error:
> -	fclose(f);
> -	return -1;
> -}
> -
> -static int
> -prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
> -{
> -	int i, num;
> -	struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS];
> +	struct walk_arg *wa = arg;
>   	struct vhost_memory_region *mr;
> +	uint64_t start_addr, end_addr, offset;
> +	int i, fd;
>   
> -	num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS);
> -	if (num < 0) {
> -		PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user");
> +	fd = rte_memseg_get_fd_thread_unsafe(ms);
> +	if (fd < 0) {
> +		PMD_DRV_LOG(ERR, "Failed to get fd, ms=%p", ms);
>   		return -1;
>   	}
>   
> -	for (i = 0; i < num; ++i) {
> -		mr = &msg->payload.memory.regions[i];
> -		mr->guest_phys_addr = huges[i].addr; /* use vaddr! */
> -		mr->userspace_addr = huges[i].addr;
> -		mr->memory_size = huges[i].size;
> -		mr->mmap_offset = 0;
> -		fds[i] = open(huges[i].path, O_RDWR);
> +	start_addr = (uint64_t)(uintptr_t)ms->addr;
> +	end_addr = start_addr + ms->len;
> +
> +	/*
> +	 * XXX workaround!
> +	 *
> +	 * For --single-file-segments case, offset should be:
> +	 * offset = rte_fbarray_find_idx(&msl->memseg_arr, ms) * msl->page_sz;
> +	 *
> +	 * But it's not true for non-single-file-segments case.
> +	 *
> +	 * This is a temporary workaround which assumes the file will
> +	 * also be mapped from the beginning in --single-file-segments
> +	 * case. It should be replaced when we have a memory API to
> +	 * get the offset.

Yes, this is an unfortunate consequence of having split personalities in 
memory subsystem. A good solution would be to just deprecate 
non-single-file segments mode and always use it, but we cannot do that 
because we support kernel versions that do not support fallocate() on 
hugetlbfs (and it still leaves legacy mem mode, which effectively 
behaves like non-single-file segments as far as virtio is concerned).

How about we add this API in v2 for seg fd patchset?

> +	 */
> +	offset = 0;
> +
> +	for (i = 0; i < wa->region_nr; i++) {
> +		if (wa->fds[i] != fd)
> +			continue;
> +
> +		mr = &wa->vm->regions[i];
> +
> +		if (mr->userspace_addr + mr->memory_size < end_addr)
> +			mr->memory_size = end_addr - mr->userspace_addr;
> +
> +		if (mr->userspace_addr > start_addr) {
> +			mr->userspace_addr = start_addr;
> +			mr->guest_phys_addr = start_addr;
> +		}
> +
> +		if (mr->mmap_offset > offset)
> +			mr->mmap_offset = offset;
> +
> +		PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
> +			" addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
> +			mr->mmap_offset, mr->userspace_addr,
> +			mr->memory_size);
> +
> +		return 0;
> +	}
> +
> +	if (i >= VHOST_MEMORY_MAX_NREGIONS) {
> +		PMD_DRV_LOG(ERR, "Too many memory regions");
> +		return -1;
>   	}
>   
> -	msg->payload.memory.nregions = num;
> +	mr = &wa->vm->regions[i];
> +	wa->fds[i] = fd;
> +
> +	mr->guest_phys_addr = start_addr;
> +	mr->userspace_addr = start_addr;
> +	mr->memory_size = ms->len;
> +	mr->mmap_offset = offset;
> +
> +	PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
> +		" addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
> +		mr->mmap_offset, mr->userspace_addr,
> +		mr->memory_size);
> +
> +	wa->region_nr++;
> +
> +	return 0;
> +}
> +
> +static int
> +prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
> +{
> +	struct walk_arg wa;
> +
> +	wa.region_nr = 0;
> +	wa.vm = &msg->payload.memory;
> +	wa.fds = fds;
> +
> +	if (rte_memseg_walk_thread_unsafe(update_memory_region, &wa) < 0)
> +		return -1;
> +
> +	msg->payload.memory.nregions = wa.region_nr;
>   	msg->payload.memory.padding = 0;
>   
>   	return 0;
> @@ -280,7 +256,7 @@ vhost_user_sock(struct virtio_user_dev *dev,
>   	int need_reply = 0;
>   	int fds[VHOST_MEMORY_MAX_NREGIONS];
>   	int fd_num = 0;
> -	int i, len;
> +	int len;
>   	int vhostfd = dev->vhostfd;
>   
>   	RTE_SET_USED(m);
> @@ -364,10 +340,6 @@ vhost_user_sock(struct virtio_user_dev *dev,
>   		return -1;
>   	}
>   
> -	if (req == VHOST_USER_SET_MEM_TABLE)
> -		for (i = 0; i < fd_num; ++i)
> -			close(fds[i]);
> -
>   	if (need_reply) {
>   		if (vhost_user_read(vhostfd, &msg) < 0) {
>   			PMD_DRV_LOG(ERR, "Received msg failed: %s",
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> index 7df600b02..869e96f87 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> @@ -13,6 +13,8 @@
>   #include <sys/types.h>
>   #include <sys/stat.h>
>   
> +#include <rte_eal_memconfig.h>
> +
>   #include "vhost.h"
>   #include "virtio_user_dev.h"
>   #include "../virtio_ethdev.h"
> @@ -109,9 +111,24 @@ is_vhost_user_by_type(const char *path)
>   int
>   virtio_user_start_device(struct virtio_user_dev *dev)
>   {
> +	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
>   	uint64_t features;
>   	int ret;
>   
> +	/*
> +	 * XXX workaround!
> +	 *
> +	 * We need to make sure that the locks will be
> +	 * taken in the correct order to avoid deadlocks.
> +	 *
> +	 * Before releasing this lock, this thread should
> +	 * not trigger any memory hotplug events.
> +	 *
> +	 * This is a temporary workaround, and should be
> +	 * replaced when we get proper supports from the
> +	 * memory subsystem in the future.
> +	 */
> +	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);

This feels like it should be a separate patch, because it fixes the 
issue that exists independently of whether we have segment fd API or not.

>   	pthread_mutex_lock(&dev->mutex);
>   
>   	if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0)
> @@ -152,10 +169,12 @@ virtio_user_start_device(struct virtio_user_dev *dev)
>   
>   	dev->started = true;
>   	pthread_mutex_unlock(&dev->mutex);
> +	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
>   
>   	return 0;
>   error:
>   	pthread_mutex_unlock(&dev->mutex);
> +	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
>   	/* TODO: free resource here or caller to check */
>   	return -1;
>   }
> 


-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dpdk-dev] [RFC] net/virtio-user: avoid parsing process mappings
  2018-08-28  8:12 ` Burakov, Anatoly
@ 2018-08-28  8:42   ` Tiwei Bie
  2018-08-28 12:56     ` Burakov, Anatoly
  0 siblings, 1 reply; 4+ messages in thread
From: Tiwei Bie @ 2018-08-28  8:42 UTC (permalink / raw)
  To: Burakov, Anatoly; +Cc: maxime.coquelin, zhihong.wang, dev

On Tue, Aug 28, 2018 at 09:12:38AM +0100, Burakov, Anatoly wrote:
> On 28-Aug-18 8:53 AM, Tiwei Bie wrote:
[...]
> >   
> > -	for (i = 0; i < num; ++i) {
> > -		mr = &msg->payload.memory.regions[i];
> > -		mr->guest_phys_addr = huges[i].addr; /* use vaddr! */
> > -		mr->userspace_addr = huges[i].addr;
> > -		mr->memory_size = huges[i].size;
> > -		mr->mmap_offset = 0;
> > -		fds[i] = open(huges[i].path, O_RDWR);
> > +	start_addr = (uint64_t)(uintptr_t)ms->addr;
> > +	end_addr = start_addr + ms->len;
> > +
> > +	/*
> > +	 * XXX workaround!
> > +	 *
> > +	 * For --single-file-segments case, offset should be:
> > +	 * offset = rte_fbarray_find_idx(&msl->memseg_arr, ms) * msl->page_sz;
> > +	 *
> > +	 * But it's not true for non-single-file-segments case.
> > +	 *
> > +	 * This is a temporary workaround which assumes the file will
> > +	 * also be mapped from the beginning in --single-file-segments
> > +	 * case. It should be replaced when we have a memory API to
> > +	 * get the offset.
> 
> Yes, this is an unfortunate consequence of having split personalities in 
> memory subsystem. A good solution would be to just deprecate 
> non-single-file segments mode and always use it, but we cannot do that 
> because we support kernel versions that do not support fallocate() on 
> hugetlbfs (and it still leaves legacy mem mode, which effectively 
> behaves like non-single-file segments as far as virtio is concerned).
> 
> How about we add this API in v2 for seg fd patchset?

Sure, thanks!

Besides, I saw another minor issue. When --legacy-mem
and --single-file-segments are both specified, the EAL
init doesn't fail, but fd can't be got successfully.

> 
> > +	 */
> > +	offset = 0;
> > +
[...]
> > @@ -109,9 +111,24 @@ is_vhost_user_by_type(const char *path)
> >   int
> >   virtio_user_start_device(struct virtio_user_dev *dev)
> >   {
> > +	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
> >   	uint64_t features;
> >   	int ret;
> >   
> > +	/*
> > +	 * XXX workaround!
> > +	 *
> > +	 * We need to make sure that the locks will be
> > +	 * taken in the correct order to avoid deadlocks.
> > +	 *
> > +	 * Before releasing this lock, this thread should
> > +	 * not trigger any memory hotplug events.
> > +	 *
> > +	 * This is a temporary workaround, and should be
> > +	 * replaced when we get proper supports from the
> > +	 * memory subsystem in the future.
> > +	 */
> > +	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
> 
> This feels like it should be a separate patch, because it fixes the 
> issue that exists independently of whether we have segment fd API or not.

Yeah, I'll put it in a separate patch.

> 
> >   	pthread_mutex_lock(&dev->mutex);
> >   
> >   	if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0)
> > @@ -152,10 +169,12 @@ virtio_user_start_device(struct virtio_user_dev *dev)
> >   
> >   	dev->started = true;
> >   	pthread_mutex_unlock(&dev->mutex);
> > +	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
> >   
> >   	return 0;
> >   error:
> >   	pthread_mutex_unlock(&dev->mutex);
> > +	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
> >   	/* TODO: free resource here or caller to check */
> >   	return -1;
> >   }
> > 
> 
> 
> -- 
> Thanks,
> Anatoly

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dpdk-dev] [RFC] net/virtio-user: avoid parsing process mappings
  2018-08-28  8:42   ` Tiwei Bie
@ 2018-08-28 12:56     ` Burakov, Anatoly
  0 siblings, 0 replies; 4+ messages in thread
From: Burakov, Anatoly @ 2018-08-28 12:56 UTC (permalink / raw)
  To: Tiwei Bie; +Cc: maxime.coquelin, zhihong.wang, dev

On 28-Aug-18 9:42 AM, Tiwei Bie wrote:
> On Tue, Aug 28, 2018 at 09:12:38AM +0100, Burakov, Anatoly wrote:
>> On 28-Aug-18 8:53 AM, Tiwei Bie wrote:
> [...]
>>>    
>>> -	for (i = 0; i < num; ++i) {
>>> -		mr = &msg->payload.memory.regions[i];
>>> -		mr->guest_phys_addr = huges[i].addr; /* use vaddr! */
>>> -		mr->userspace_addr = huges[i].addr;
>>> -		mr->memory_size = huges[i].size;
>>> -		mr->mmap_offset = 0;
>>> -		fds[i] = open(huges[i].path, O_RDWR);
>>> +	start_addr = (uint64_t)(uintptr_t)ms->addr;
>>> +	end_addr = start_addr + ms->len;
>>> +
>>> +	/*
>>> +	 * XXX workaround!
>>> +	 *
>>> +	 * For --single-file-segments case, offset should be:
>>> +	 * offset = rte_fbarray_find_idx(&msl->memseg_arr, ms) * msl->page_sz;
>>> +	 *
>>> +	 * But it's not true for non-single-file-segments case.
>>> +	 *
>>> +	 * This is a temporary workaround which assumes the file will
>>> +	 * also be mapped from the beginning in --single-file-segments
>>> +	 * case. It should be replaced when we have a memory API to
>>> +	 * get the offset.
>>
>> Yes, this is an unfortunate consequence of having split personalities in
>> memory subsystem. A good solution would be to just deprecate
>> non-single-file segments mode and always use it, but we cannot do that
>> because we support kernel versions that do not support fallocate() on
>> hugetlbfs (and it still leaves legacy mem mode, which effectively
>> behaves like non-single-file segments as far as virtio is concerned).
>>
>> How about we add this API in v2 for seg fd patchset?
> 
> Sure, thanks!
> 
> Besides, I saw another minor issue. When --legacy-mem
> and --single-file-segments are both specified, the EAL
> init doesn't fail, but fd can't be got successfully.
> 

I'll have to look into this, because as far as i recall, i've submitted 
a patch specifically prohibiting legacy mem combined with single file 
segments. I'll double check though. Thanks for testing!

-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-08-28 12:56 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-28  7:53 [dpdk-dev] [RFC] net/virtio-user: avoid parsing process mappings Tiwei Bie
2018-08-28  8:12 ` Burakov, Anatoly
2018-08-28  8:42   ` Tiwei Bie
2018-08-28 12:56     ` Burakov, Anatoly

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).