From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by dpdk.org (Postfix) with ESMTP id 0E3592C30 for ; Tue, 26 Feb 2019 18:13:13 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 26 Feb 2019 09:13:12 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.58,416,1544515200"; d="scan'208";a="125379795" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga007.fm.intel.com with ESMTP; 26 Feb 2019 09:13:12 -0800 Received: from sivswdev05.ir.intel.com (sivswdev05.ir.intel.com [10.243.17.64]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id x1QHDB6j032008 for ; Tue, 26 Feb 2019 17:13:11 GMT Received: from sivswdev05.ir.intel.com (localhost [127.0.0.1]) by sivswdev05.ir.intel.com with ESMTP id x1QHDBQ8005720 for ; Tue, 26 Feb 2019 17:13:11 GMT Received: (from aburakov@localhost) by sivswdev05.ir.intel.com with LOCAL id x1QHDBcX005522 for dev@dpdk.org; Tue, 26 Feb 2019 17:13:11 GMT From: Anatoly Burakov To: dev@dpdk.org Date: Tue, 26 Feb 2019 17:13:11 +0000 Message-Id: X-Mailer: git-send-email 1.7.0.7 Subject: [dpdk-dev] [PATCH] fbarray: add internal tailq for mapped areas X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 26 Feb 2019 17:13:14 -0000 Currently, there are numerous reliability issues with fbarray, such as: - There is no way to prevent attaching to overlapping memory areas - There is no way to prevent double-detach - Failed destroy leaves fbarray in an invalid state (fbarray itself is valid, but its backing memory area is already detached) In addition, on FreeBSD, doing mmap() on a file descriptor does not keep the lock, so we also need to store the fd in order to keep the lock. This patch improves upon fbarray to address both of these issues by adding an internal tailq to track allocated areas and their respective file descriptors. Signed-off-by: Anatoly Burakov --- lib/librte_eal/common/eal_common_fbarray.c | 212 +++++++++++++++++---- 1 file changed, 179 insertions(+), 33 deletions(-) diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c index ea0735cb8..819d097bf 100644 --- a/lib/librte_eal/common/eal_common_fbarray.c +++ b/lib/librte_eal/common/eal_common_fbarray.c @@ -28,6 +28,22 @@ #define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN)) #define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod) +/* + * We use this to keep track of created/attached memory areas to prevent user + * errors in API usage. + */ +struct mem_area { + TAILQ_ENTRY(mem_area) next; + void *addr; + size_t len; + int fd; +}; +TAILQ_HEAD(mem_area_head, mem_area); +/* local per-process tailq */ +static struct mem_area_head mem_area_tailq = + TAILQ_HEAD_INITIALIZER(mem_area_tailq); +static rte_spinlock_t mem_area_lock = RTE_SPINLOCK_INITIALIZER; + /* * This is a mask that is always stored at the end of array, to provide fast * way of finding free/used spots without looping through each element. @@ -87,6 +103,22 @@ resize_and_map(int fd, void *addr, size_t len) return 0; } +static int +overlap(const struct mem_area *ma, const void *start, size_t len) +{ + const void *end = RTE_PTR_ADD(start, len); + const void *ma_start = ma->addr; + const void *ma_end = RTE_PTR_ADD(ma->addr, ma->len); + + /* start overlap? */ + if (start >= ma_start && start < ma_end) + return 1; + /* end overlap? */ + if (end >= ma_start && end < ma_end) + return 1; + return 0; +} + static int find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n, bool used) @@ -684,6 +716,7 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, size_t page_sz, mmap_len; char path[PATH_MAX]; struct used_mask *msk; + struct mem_area *ma = NULL; void *data = NULL; int fd = -1; @@ -695,6 +728,13 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, if (fully_validate(name, elt_sz, len)) return -1; + /* allocate mem area before doing anything */ + ma = malloc(sizeof(*ma)); + if (ma == NULL) { + rte_errno = ENOMEM; + return -1; + } + page_sz = sysconf(_SC_PAGESIZE); if (page_sz == (size_t)-1) goto fail; @@ -706,10 +746,14 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, if (data == NULL) goto fail; + rte_spinlock_lock(&mem_area_lock); + + fd = -1; + if (internal_config.no_shconf) { /* remap virtual area as writable */ void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, fd, 0); if (new_data == MAP_FAILED) { RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n", __func__, strerror(errno)); @@ -748,10 +792,13 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, if (resize_and_map(fd, data, mmap_len)) goto fail; - - /* we've mmap'ed the file, we can now close the fd */ - close(fd); } + ma->addr = data; + ma->len = mmap_len; + ma->fd = fd; + + /* do not close fd - keep it until detach/destroy */ + TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next); /* initialize the data */ memset(data, 0, mmap_len); @@ -768,18 +815,24 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, rte_rwlock_init(&arr->rwlock); + rte_spinlock_unlock(&mem_area_lock); + return 0; fail: if (data) munmap(data, mmap_len); if (fd >= 0) close(fd); + free(ma); + + rte_spinlock_unlock(&mem_area_lock); return -1; } int __rte_experimental rte_fbarray_attach(struct rte_fbarray *arr) { + struct mem_area *ma = NULL, *tmp = NULL; size_t page_sz, mmap_len; char path[PATH_MAX]; void *data = NULL; @@ -799,12 +852,30 @@ rte_fbarray_attach(struct rte_fbarray *arr) if (fully_validate(arr->name, arr->elt_sz, arr->len)) return -1; + ma = malloc(sizeof(*ma)); + if (ma == NULL) { + rte_errno = ENOMEM; + return -1; + } + page_sz = sysconf(_SC_PAGESIZE); if (page_sz == (size_t)-1) goto fail; mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); + /* check the tailq - maybe user has already mapped this address space */ + rte_spinlock_lock(&mem_area_lock); + + TAILQ_FOREACH(tmp, &mem_area_tailq, next) { + if (overlap(tmp, arr->data, mmap_len)) { + rte_errno = EEXIST; + goto fail; + } + } + + /* we know this memory area is unique, so proceed */ + data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0); if (data == NULL) goto fail; @@ -826,7 +897,12 @@ rte_fbarray_attach(struct rte_fbarray *arr) if (resize_and_map(fd, data, mmap_len)) goto fail; - close(fd); + /* store our new memory area */ + ma->addr = data; + ma->fd = fd; /* keep fd until detach/destroy */ + ma->len = mmap_len; + + TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next); /* we're done */ @@ -836,12 +912,17 @@ rte_fbarray_attach(struct rte_fbarray *arr) munmap(data, mmap_len); if (fd >= 0) close(fd); + free(ma); return -1; } int __rte_experimental rte_fbarray_detach(struct rte_fbarray *arr) { + struct mem_area *tmp = NULL; + size_t mmap_len; + int ret = -1; + if (arr == NULL) { rte_errno = EINVAL; return -1; @@ -860,49 +941,114 @@ rte_fbarray_detach(struct rte_fbarray *arr) if (page_sz == (size_t)-1) return -1; - /* this may already be unmapped (e.g. repeated call from previously - * failed destroy(), but this is on user, we can't (easily) know if this - * is still mapped. - */ - munmap(arr->data, calc_data_size(page_sz, arr->elt_sz, arr->len)); + mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); - return 0; + /* does this area exist? */ + rte_spinlock_lock(&mem_area_lock); + + TAILQ_FOREACH(tmp, &mem_area_tailq, next) { + if (tmp->addr == arr->data && tmp->len == mmap_len) + break; + } + if (tmp == NULL) { + rte_errno = ENOENT; + ret = -1; + goto out; + } + + munmap(arr->data, mmap_len); + + /* area is unmapped, close fd and remove the tailq entry */ + if (tmp->fd >= 0) + close(tmp->fd); + TAILQ_REMOVE(&mem_area_tailq, tmp, next); + free(tmp); + + ret = 0; +out: + rte_spinlock_unlock(&mem_area_lock); + return ret; } int __rte_experimental rte_fbarray_destroy(struct rte_fbarray *arr) { + struct mem_area *tmp = NULL; + size_t mmap_len; int fd, ret; char path[PATH_MAX]; - ret = rte_fbarray_detach(arr); - if (ret) - return ret; + if (arr == NULL) { + rte_errno = EINVAL; + return -1; + } + /* + * we don't need to synchronize detach as two values we need (element + * size and total capacity) are constant for the duration of life of + * the array, so the parts we care about will not race. if the user is + * detaching while doing something else in the same process, we can't + * really do anything about it, things will blow up either way. + */ + + size_t page_sz = sysconf(_SC_PAGESIZE); + + if (page_sz == (size_t)-1) + return -1; + + mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); + + /* does this area exist? */ + rte_spinlock_lock(&mem_area_lock); + + TAILQ_FOREACH(tmp, &mem_area_tailq, next) { + if (tmp->addr == arr->data && tmp->len == mmap_len) + break; + } + if (tmp == NULL) { + rte_errno = ENOENT; + ret = -1; + goto out; + } /* with no shconf, there were never any files to begin with */ - if (internal_config.no_shconf) - return 0; + if (!internal_config.no_shconf) { + /* + * attempt to get an exclusive lock on the file, to ensure it + * has been detached by all other processes + */ + fd = tmp->fd; + if (flock(fd, LOCK_EX | LOCK_NB)) { + RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n"); + rte_errno = EBUSY; + ret = -1; + goto out; + } - /* try deleting the file */ - eal_get_fbarray_path(path, sizeof(path), arr->name); + /* we're OK to destroy the file */ + eal_get_fbarray_path(path, sizeof(path), arr->name); + if (unlink(path)) { + RTE_LOG(DEBUG, EAL, "Cannot unlink fbarray: %s\n", + strerror(errno)); + rte_errno = errno; + /* + * we're still holding an exclusive lock, so drop it to + * shared. + */ + flock(fd, LOCK_SH | LOCK_NB); - fd = open(path, O_RDONLY); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open fbarray file: %s\n", - strerror(errno)); - return -1; + ret = -1; + goto out; + } + close(fd); } - if (flock(fd, LOCK_EX | LOCK_NB)) { - RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n"); - rte_errno = EBUSY; - ret = -1; - } else { - ret = 0; - unlink(path); - memset(arr, 0, sizeof(*arr)); - } - close(fd); + munmap(arr->data, mmap_len); + /* area is unmapped, remove the tailq entry */ + TAILQ_REMOVE(&mem_area_tailq, tmp, next); + free(tmp); + ret = 0; +out: + rte_spinlock_unlock(&mem_area_lock); return ret; } -- 2.17.1