From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [RFC] mem: add atomic lookup-and-reserve/free memzone API
Date: Tue, 5 May 2020 14:24:07 +0000 [thread overview]
Message-ID: <b0ef92d3be578c1dbcc6fd61a12dd943decaa15c.1588688636.git.anatoly.burakov@intel.com> (raw)
Currently, in order to perform a memzone lookup and create/free
the memzone, the user has to call two API's, creating a race
condition. This is particularly destructive for memzone_free call
because the reference provided to memzone_free at the time of call
may be stale or refer to a different memzone altogether.
Fix this race condition by adding an API to perform lookup and
create/free memzone in one go.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
lib/librte_eal/common/eal_common_memzone.c | 125 ++++++++---
lib/librte_eal/include/rte_memzone.h | 235 +++++++++++++++++++++
lib/librte_eal/rte_eal_version.map | 4 +
3 files changed, 340 insertions(+), 24 deletions(-)
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 7c21aa921e..38dc995a39 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -189,7 +189,8 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
static const struct rte_memzone *
rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id,
- unsigned int flags, unsigned int align, unsigned int bound)
+ unsigned int flags, unsigned int align, unsigned int bound,
+ bool lookup)
{
struct rte_mem_config *mcfg;
const struct rte_memzone *mz = NULL;
@@ -199,11 +200,17 @@ rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id,
rte_rwlock_write_lock(&mcfg->mlock);
- mz = memzone_reserve_aligned_thread_unsafe(
- name, len, socket_id, flags, align, bound);
+ if (lookup) {
+ mz = memzone_lookup_thread_unsafe(name);
+ rte_eal_trace_memzone_lookup(name, mz);
+ }
+ if (mz == NULL) {
+ mz = memzone_reserve_aligned_thread_unsafe(
+ name, len, socket_id, flags, align, bound);
- rte_eal_trace_memzone_reserve(name, len, socket_id, flags, align,
- bound, mz);
+ rte_eal_trace_memzone_reserve(name, len, socket_id, flags,
+ align, bound, mz);
+ }
rte_rwlock_write_unlock(&mcfg->mlock);
@@ -220,7 +227,7 @@ rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id,
unsigned flags, unsigned align, unsigned bound)
{
return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
- align, bound);
+ align, bound, false);
}
/*
@@ -232,7 +239,7 @@ rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id,
unsigned flags, unsigned align)
{
return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
- align, 0);
+ align, 0, false);
}
/*
@@ -244,49 +251,119 @@ rte_memzone_reserve(const char *name, size_t len, int socket_id,
unsigned flags)
{
return rte_memzone_reserve_thread_safe(name, len, socket_id,
- flags, RTE_CACHE_LINE_SIZE, 0);
+ flags, RTE_CACHE_LINE_SIZE, 0, false);
}
-int
-rte_memzone_free(const struct rte_memzone *mz)
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment and boundary). If the allocation cannot be done,
+ * return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_lookup_reserve_bounded(const char *name, size_t len, int socket_id,
+ unsigned int flags, unsigned int align, unsigned int bound)
{
- char name[RTE_MEMZONE_NAMESIZE];
+ return rte_memzone_reserve_thread_safe(name, len, socket_id,
+ flags, align, bound, true);
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment). If the allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_lookup_reserve_aligned(const char *name, size_t len, int socket_id,
+ unsigned int flags, unsigned int align)
+{
+ return rte_memzone_reserve_thread_safe(name, len, socket_id,
+ flags, align, 0, true);
+}
+
+/*
+ * Return existing memzone, or create a new one if it doesn't exist.
+ */
+const struct rte_memzone *
+rte_memzone_lookup_reserve(const char *name, size_t len, int socket_id,
+ unsigned int flags)
+{
+ return rte_memzone_reserve_thread_safe(name, len, socket_id,
+ flags, RTE_CACHE_LINE_SIZE, 0, true);
+}
+
+static int
+rte_memzone_free_thread_unsafe(const struct rte_memzone *mz)
+{
+ struct rte_memzone *found_mz;
struct rte_mem_config *mcfg;
+ char name[RTE_MEMZONE_NAMESIZE];
struct rte_fbarray *arr;
- struct rte_memzone *found_mz;
+ void *addr;
+ unsigned int idx;
int ret = 0;
- void *addr = NULL;
- unsigned idx;
-
- if (mz == NULL)
- return -EINVAL;
- rte_strlcpy(name, mz->name, RTE_MEMZONE_NAMESIZE);
mcfg = rte_eal_get_configuration()->mem_config;
arr = &mcfg->memzones;
-
- rte_rwlock_write_lock(&mcfg->mlock);
-
idx = rte_fbarray_find_idx(arr, mz);
found_mz = rte_fbarray_get(arr, idx);
+ rte_strlcpy(name, mz->name, RTE_MEMZONE_NAMESIZE);
if (found_mz == NULL) {
+ addr = NULL;
ret = -EINVAL;
} else if (found_mz->addr == NULL) {
RTE_LOG(ERR, EAL, "Memzone is not allocated\n");
+ addr = NULL;
ret = -EINVAL;
} else {
addr = found_mz->addr;
+
memset(found_mz, 0, sizeof(*found_mz));
rte_fbarray_set_free(arr, idx);
+
+ rte_free(addr);
}
+ rte_eal_trace_memzone_free(name, addr, ret);
+
+ return ret;
+}
+
+int
+rte_memzone_free(const struct rte_memzone *mz)
+{
+ struct rte_mem_config *mcfg;
+ int ret;
+
+ if (mz == NULL)
+ return -EINVAL;
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+ ret = rte_memzone_free_thread_unsafe(mz);
rte_rwlock_write_unlock(&mcfg->mlock);
- if (addr != NULL)
- rte_free(addr);
+ return ret;
+}
- rte_eal_trace_memzone_free(name, addr, ret);
+int
+rte_memzone_lookup_free(const char *name)
+{
+ const struct rte_memzone *memzone;
+ struct rte_mem_config *mcfg;
+ int ret;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_read_lock(&mcfg->mlock);
+
+ memzone = memzone_lookup_thread_unsafe(name);
+ rte_eal_trace_memzone_lookup(name, memzone);
+ if (memzone != NULL)
+ ret = rte_memzone_free_thread_unsafe(memzone);
+ else
+ ret = -ENOENT;
+
+ rte_rwlock_read_unlock(&mcfg->mlock);
return ret;
}
diff --git a/lib/librte_eal/include/rte_memzone.h b/lib/librte_eal/include/rte_memzone.h
index 091c9522f7..824dae7df9 100644
--- a/lib/librte_eal/include/rte_memzone.h
+++ b/lib/librte_eal/include/rte_memzone.h
@@ -270,6 +270,224 @@ const struct rte_memzone *rte_memzone_reserve_bounded(const char *name,
size_t len, int socket_id,
unsigned flags, unsigned align, unsigned bound);
+/**
+ * Reserve a portion of physical memory if it doesn't exist.
+ *
+ * This function reserves some memory and returns a pointer to a
+ * correctly filled memzone descriptor. If the memory already exists, return
+ * a pointer to pre-existing memzone descriptor. If the allocation cannot be
+ * done, return NULL.
+ *
+ * @note If memzone with a given name already exists, it will be returned
+ * regardless of whether it matches the requirements specified for allocation.
+ * It is the responsibility of the user to ensure that two different memzones
+ * with identical names are not attempted to be created.
+ *
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ * memzones from memory that is already available. It will not trigger any
+ * new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
+ *
+ * @param name
+ * The name of the memzone. If the memzone with this name already exists, the
+ * function will return existing memzone instead of allocating a new one.
+ * @param len
+ * The size of the memory to be reserved. If it
+ * is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ * The socket identifier in the case of
+ * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ * constraint for the reserved zone.
+ * @param flags
+ * The flags parameter is used to request memzones to be
+ * taken from specifically sized hugepages.
+ * - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ * - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ * - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ * - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ * - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ * - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ * - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ * - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ * the requested page size is unavailable.
+ * If this flag is not set, the function
+ * will return error on an unavailable size
+ * request.
+ * - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ * This option should be used when allocating
+ * memory intended for hardware rings etc.
+ * @return
+ * A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ * on error.
+ * On error case, rte_errno will be set appropriately:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ * - EINVAL - invalid parameters
+ */
+__rte_experimental
+const struct rte_memzone *rte_memzone_lookup_reserve(const char *name,
+ size_t len, int socket_id, unsigned int flags);
+
+/**
+ * Reserve a portion of physical memory if it doesn't exist, with alignment on
+ * a specified boundary.
+ *
+ * This function reserves some memory with alignment on a specified
+ * boundary, and returns a pointer to a correctly filled memzone
+ * descriptor. If memory already exists, return pointer to pre-existing
+ * memzone descriptor. If the allocation cannot be done or if the alignment
+ * is not a power of 2, returns NULL.
+ *
+ * @note If memzone with a given name already exists, it will be returned
+ * regardless of whether it matches the requirements specified for allocation.
+ * It is the responsibility of the user to ensure that two different memzones
+ * with identical names are not attempted to be created.
+ *
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ * memzones from memory that is already available. It will not trigger any
+ * new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
+ *
+ * @param name
+ * The name of the memzone. If the memzone with this name already exists, the
+ * function will return existing memzone instead of allocating a new one.
+ * @param len
+ * The size of the memory to be reserved. If it
+ * is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ * The socket identifier in the case of
+ * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ * constraint for the reserved zone.
+ * @param flags
+ * The flags parameter is used to request memzones to be
+ * taken from specifically sized hugepages.
+ * - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ * - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ * - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ * - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ * - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ * - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ * - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ * - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ * the requested page size is unavailable.
+ * If this flag is not set, the function
+ * will return error on an unavailable size
+ * request.
+ * - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ * This option should be used when allocating
+ * memory intended for hardware rings etc.
+ * @param align
+ * Alignment for resulting memzone. Must be a power of 2.
+ * @return
+ * A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ * on error.
+ * On error case, rte_errno will be set appropriately:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ * - EINVAL - invalid parameters
+ */
+__rte_experimental
+const struct rte_memzone *rte_memzone_lookup_reserve_aligned(const char *name,
+ size_t len, int socket_id, unsigned int flags,
+ unsigned int align);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reserve a portion of physical memory if it doesn't exist, with specified
+ * alignment and boundary.
+ *
+ * This function reserves some memory with specified alignment and
+ * boundary, and returns a pointer to a correctly filled memzone
+ * descriptor. If memory already exists, return pointer to pre-existing
+ * memzone descriptor. If the allocation cannot be done or if the alignment
+ * or boundary are not a power of 2, returns NULL.
+ * Memory buffer is reserved in a way, that it wouldn't cross specified
+ * boundary. That implies that requested length should be less or equal
+ * then boundary.
+ *
+ * @note If memzone with a given name already exists, it will be returned
+ * regardless of whether it matches the requirements specified for allocation.
+ * It is the responsibility of the user to ensure that two different memzones
+ * with identical names are not attempted to be created.
+ *
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ * memzones from memory that is already available. It will not trigger any
+ * new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
+ *
+ * @param name
+ * The name of the memzone. If the memzone with this name already exists, the
+ * function will return existing memzone instead of allocating a new one.
+ * @param len
+ * The size of the memory to be reserved. If it
+ * is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ * The socket identifier in the case of
+ * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ * constraint for the reserved zone.
+ * @param flags
+ * The flags parameter is used to request memzones to be
+ * taken from specifically sized hugepages.
+ * - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ * - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ * - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ * - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ * - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ * - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ * - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ * - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ * the requested page size is unavailable.
+ * If this flag is not set, the function
+ * will return error on an unavailable size
+ * request.
+ * - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ * This option should be used when allocating
+ * memory intended for hardware rings etc.
+ * @param align
+ * Alignment for resulting memzone. Must be a power of 2.
+ * @param bound
+ * Boundary for resulting memzone. Must be a power of 2 or zero.
+ * Zero value implies no boundary condition.
+ * @return
+ * A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ * on error.
+ * On error case, rte_errno will be set appropriately:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ * - EINVAL - invalid parameters
+ */
+__rte_experimental
+const struct rte_memzone *rte_memzone_lookup_reserve_bounded(const char *name,
+ size_t len, int socket_id,
+ unsigned int flags, unsigned int align,
+ unsigned int bound);
+
/**
* Free a memzone.
*
@@ -281,6 +499,23 @@ const struct rte_memzone *rte_memzone_reserve_bounded(const char *name,
*/
int rte_memzone_free(const struct rte_memzone *mz);
+/**
+ * Lookup and free a memzone if it exists.
+ *
+ * @param name
+ * The name of the memzone to lookup and free.
+ * @return
+ * A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ * on error.
+ * On error case, rte_errno will be set appropriately:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ * - EINVAL - invalid parameters
+ */
+__rte_experimental
+int rte_memzone_lookup_free(const char *name);
+
/**
* Lookup for a memzone.
*
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 6088e7f6c3..c394dc22bc 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -374,6 +374,10 @@ EXPERIMENTAL {
per_lcore_trace_mem;
per_lcore_trace_point_sz;
rte_log_can_log;
+ rte_memzone_lookup_reserve;
+ rte_memzone_lookup_reserve_aligned;
+ rte_memzone_lookup_reserve_bounded;
+ rte_memzone_lookup_free;
rte_thread_getname;
rte_trace_dump;
rte_trace_is_enabled;
--
2.17.1
next reply other threads:[~2020-05-05 14:24 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-05-05 14:24 Anatoly Burakov [this message]
2020-05-05 15:01 ` Bruce Richardson
2020-05-06 9:35 ` Burakov, Anatoly
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b0ef92d3be578c1dbcc6fd61a12dd943decaa15c.1588688636.git.anatoly.burakov@intel.com \
--to=anatoly.burakov@intel.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).