DPDK patches and discussions
 help / color / mirror / Atom feed
From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [RFC] mem: add atomic lookup-and-reserve/free memzone API
Date: Tue,  5 May 2020 14:24:07 +0000	[thread overview]
Message-ID: <b0ef92d3be578c1dbcc6fd61a12dd943decaa15c.1588688636.git.anatoly.burakov@intel.com> (raw)

Currently, in order to perform a memzone lookup and create/free
the memzone, the user has to call two API's, creating a race
condition. This is particularly destructive for memzone_free call
because the reference provided to memzone_free at the time of call
may be stale or refer to a different memzone altogether.

Fix this race condition by adding an API to perform lookup and
create/free memzone in one go.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/librte_eal/common/eal_common_memzone.c | 125 ++++++++---
 lib/librte_eal/include/rte_memzone.h       | 235 +++++++++++++++++++++
 lib/librte_eal/rte_eal_version.map         |   4 +
 3 files changed, 340 insertions(+), 24 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 7c21aa921e..38dc995a39 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -189,7 +189,8 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 
 static const struct rte_memzone *
 rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id,
-		unsigned int flags, unsigned int align, unsigned int bound)
+		unsigned int flags, unsigned int align, unsigned int bound,
+		bool lookup)
 {
 	struct rte_mem_config *mcfg;
 	const struct rte_memzone *mz = NULL;
@@ -199,11 +200,17 @@ rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id,
 
 	rte_rwlock_write_lock(&mcfg->mlock);
 
-	mz = memzone_reserve_aligned_thread_unsafe(
-		name, len, socket_id, flags, align, bound);
+	if (lookup) {
+		mz = memzone_lookup_thread_unsafe(name);
+		rte_eal_trace_memzone_lookup(name, mz);
+	}
+	if (mz == NULL) {
+		mz = memzone_reserve_aligned_thread_unsafe(
+			name, len, socket_id, flags, align, bound);
 
-	rte_eal_trace_memzone_reserve(name, len, socket_id, flags, align,
-		bound, mz);
+		rte_eal_trace_memzone_reserve(name, len, socket_id, flags,
+				align, bound, mz);
+	}
 
 	rte_rwlock_write_unlock(&mcfg->mlock);
 
@@ -220,7 +227,7 @@ rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id,
 			    unsigned flags, unsigned align, unsigned bound)
 {
 	return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
-					       align, bound);
+			align, bound, false);
 }
 
 /*
@@ -232,7 +239,7 @@ rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id,
 			    unsigned flags, unsigned align)
 {
 	return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
-					       align, 0);
+			align, 0, false);
 }
 
 /*
@@ -244,49 +251,119 @@ rte_memzone_reserve(const char *name, size_t len, int socket_id,
 		    unsigned flags)
 {
 	return rte_memzone_reserve_thread_safe(name, len, socket_id,
-					       flags, RTE_CACHE_LINE_SIZE, 0);
+			flags, RTE_CACHE_LINE_SIZE, 0, false);
 }
 
-int
-rte_memzone_free(const struct rte_memzone *mz)
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment and boundary). If the allocation cannot be done,
+ * return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_lookup_reserve_bounded(const char *name, size_t len, int socket_id,
+		unsigned int flags, unsigned int align, unsigned int bound)
 {
-	char name[RTE_MEMZONE_NAMESIZE];
+	return rte_memzone_reserve_thread_safe(name, len, socket_id,
+			flags, align, bound, true);
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment). If the allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_lookup_reserve_aligned(const char *name, size_t len, int socket_id,
+		unsigned int flags, unsigned int align)
+{
+	return rte_memzone_reserve_thread_safe(name, len, socket_id,
+			flags, align, 0, true);
+}
+
+/*
+ * Return existing memzone, or create a new one if it doesn't exist.
+ */
+const struct rte_memzone *
+rte_memzone_lookup_reserve(const char *name, size_t len, int socket_id,
+		unsigned int flags)
+{
+	return rte_memzone_reserve_thread_safe(name, len, socket_id,
+			flags, RTE_CACHE_LINE_SIZE, 0, true);
+}
+
+static int
+rte_memzone_free_thread_unsafe(const struct rte_memzone *mz)
+{
+	struct rte_memzone *found_mz;
 	struct rte_mem_config *mcfg;
+	char name[RTE_MEMZONE_NAMESIZE];
 	struct rte_fbarray *arr;
-	struct rte_memzone *found_mz;
+	void *addr;
+	unsigned int idx;
 	int ret = 0;
-	void *addr = NULL;
-	unsigned idx;
-
-	if (mz == NULL)
-		return -EINVAL;
 
-	rte_strlcpy(name, mz->name, RTE_MEMZONE_NAMESIZE);
 	mcfg = rte_eal_get_configuration()->mem_config;
 	arr = &mcfg->memzones;
-
-	rte_rwlock_write_lock(&mcfg->mlock);
-
 	idx = rte_fbarray_find_idx(arr, mz);
 	found_mz = rte_fbarray_get(arr, idx);
+	rte_strlcpy(name, mz->name, RTE_MEMZONE_NAMESIZE);
 
 	if (found_mz == NULL) {
+		addr = NULL;
 		ret = -EINVAL;
 	} else if (found_mz->addr == NULL) {
 		RTE_LOG(ERR, EAL, "Memzone is not allocated\n");
+		addr = NULL;
 		ret = -EINVAL;
 	} else {
 		addr = found_mz->addr;
+
 		memset(found_mz, 0, sizeof(*found_mz));
 		rte_fbarray_set_free(arr, idx);
+
+		rte_free(addr);
 	}
+	rte_eal_trace_memzone_free(name, addr, ret);
+
+	return ret;
+}
+
+int
+rte_memzone_free(const struct rte_memzone *mz)
+{
+	struct rte_mem_config *mcfg;
+	int ret;
+
+	if (mz == NULL)
+		return -EINVAL;
 
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	rte_rwlock_write_lock(&mcfg->mlock);
+	ret = rte_memzone_free_thread_unsafe(mz);
 	rte_rwlock_write_unlock(&mcfg->mlock);
 
-	if (addr != NULL)
-		rte_free(addr);
+	return ret;
+}
 
-	rte_eal_trace_memzone_free(name, addr, ret);
+int
+rte_memzone_lookup_free(const char *name)
+{
+	const struct rte_memzone *memzone;
+	struct rte_mem_config *mcfg;
+	int ret;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	rte_rwlock_read_lock(&mcfg->mlock);
+
+	memzone = memzone_lookup_thread_unsafe(name);
+	rte_eal_trace_memzone_lookup(name, memzone);
+	if (memzone != NULL)
+		ret = rte_memzone_free_thread_unsafe(memzone);
+	else
+		ret = -ENOENT;
+
+	rte_rwlock_read_unlock(&mcfg->mlock);
 	return ret;
 }
 
diff --git a/lib/librte_eal/include/rte_memzone.h b/lib/librte_eal/include/rte_memzone.h
index 091c9522f7..824dae7df9 100644
--- a/lib/librte_eal/include/rte_memzone.h
+++ b/lib/librte_eal/include/rte_memzone.h
@@ -270,6 +270,224 @@ const struct rte_memzone *rte_memzone_reserve_bounded(const char *name,
 			size_t len, int socket_id,
 			unsigned flags, unsigned align, unsigned bound);
 
+/**
+ * Reserve a portion of physical memory if it doesn't exist.
+ *
+ * This function reserves some memory and returns a pointer to a
+ * correctly filled memzone descriptor. If the memory already exists, return
+ * a pointer to pre-existing memzone descriptor. If the allocation cannot be
+ * done, return NULL.
+ *
+ * @note If memzone with a given name already exists, it will be returned
+ *   regardless of whether it matches the requirements specified for allocation.
+ *   It is the responsibility of the user to ensure that two different memzones
+ *   with identical names are not attempted to be created.
+ *
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ *   memzones from memory that is already available. It will not trigger any
+ *   new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ *   set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ *   will likely not yield expected results. Specifically, the resulting memzone
+ *   may not necessarily be the biggest memzone available, but rather biggest
+ *   memzone available on socket id corresponding to an lcore from which
+ *   reservation was called.
+ *
+ * @param name
+ *   The name of the memzone. If the memzone with this name already exists, the
+ *   function will return existing memzone instead of allocating a new one.
+ * @param len
+ *   The size of the memory to be reserved. If it
+ *   is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ *   The socket identifier in the case of
+ *   NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ *   constraint for the reserved zone.
+ * @param flags
+ *   The flags parameter is used to request memzones to be
+ *   taken from specifically sized hugepages.
+ *   - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ *   - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ *   - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ *   - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ *   - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ *   - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ *   - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ *   - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ *   - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ *                                  the requested page size is unavailable.
+ *                                  If this flag is not set, the function
+ *                                  will return error on an unavailable size
+ *                                  request.
+ *   - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ *                               This option should be used when allocating
+ *                               memory intended for hardware rings etc.
+ * @return
+ *   A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ *   on error.
+ *   On error case, rte_errno will be set appropriately:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ *    - EINVAL - invalid parameters
+ */
+__rte_experimental
+const struct rte_memzone *rte_memzone_lookup_reserve(const char *name,
+		size_t len, int socket_id, unsigned int flags);
+
+/**
+ * Reserve a portion of physical memory if it doesn't exist, with alignment on
+ * a specified boundary.
+ *
+ * This function reserves some memory with alignment on a specified
+ * boundary, and returns a pointer to a correctly filled memzone
+ * descriptor. If memory already exists, return pointer to pre-existing
+ * memzone descriptor. If the allocation cannot be done or if the alignment
+ * is not a power of 2, returns NULL.
+ *
+ * @note If memzone with a given name already exists, it will be returned
+ *   regardless of whether it matches the requirements specified for allocation.
+ *   It is the responsibility of the user to ensure that two different memzones
+ *   with identical names are not attempted to be created.
+ *
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ *   memzones from memory that is already available. It will not trigger any
+ *   new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ *   set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ *   will likely not yield expected results. Specifically, the resulting memzone
+ *   may not necessarily be the biggest memzone available, but rather biggest
+ *   memzone available on socket id corresponding to an lcore from which
+ *   reservation was called.
+ *
+ * @param name
+ *   The name of the memzone. If the memzone with this name already exists, the
+ *   function will return existing memzone instead of allocating a new one.
+ * @param len
+ *   The size of the memory to be reserved. If it
+ *   is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ *   The socket identifier in the case of
+ *   NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ *   constraint for the reserved zone.
+ * @param flags
+ *   The flags parameter is used to request memzones to be
+ *   taken from specifically sized hugepages.
+ *   - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ *   - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ *   - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ *   - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ *   - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ *   - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ *   - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ *   - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ *   - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ *                                  the requested page size is unavailable.
+ *                                  If this flag is not set, the function
+ *                                  will return error on an unavailable size
+ *                                  request.
+ *   - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ *                               This option should be used when allocating
+ *                               memory intended for hardware rings etc.
+ * @param align
+ *   Alignment for resulting memzone. Must be a power of 2.
+ * @return
+ *   A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ *   on error.
+ *   On error case, rte_errno will be set appropriately:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ *    - EINVAL - invalid parameters
+ */
+__rte_experimental
+const struct rte_memzone *rte_memzone_lookup_reserve_aligned(const char *name,
+		size_t len, int socket_id, unsigned int flags,
+		unsigned int align);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reserve a portion of physical memory if it doesn't exist, with specified
+ * alignment and boundary.
+ *
+ * This function reserves some memory with specified alignment and
+ * boundary, and returns a pointer to a correctly filled memzone
+ * descriptor. If memory already exists, return pointer to pre-existing
+ * memzone descriptor. If the allocation cannot be done or if the alignment
+ * or boundary are not a power of 2, returns NULL.
+ * Memory buffer is reserved in a way, that it wouldn't cross specified
+ * boundary. That implies that requested length should be less or equal
+ * then boundary.
+ *
+ * @note If memzone with a given name already exists, it will be returned
+ *   regardless of whether it matches the requirements specified for allocation.
+ *   It is the responsibility of the user to ensure that two different memzones
+ *   with identical names are not attempted to be created.
+ *
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ *   memzones from memory that is already available. It will not trigger any
+ *   new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ *   set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ *   will likely not yield expected results. Specifically, the resulting memzone
+ *   may not necessarily be the biggest memzone available, but rather biggest
+ *   memzone available on socket id corresponding to an lcore from which
+ *   reservation was called.
+ *
+ * @param name
+ *   The name of the memzone. If the memzone with this name already exists, the
+ *   function will return existing memzone instead of allocating a new one.
+ * @param len
+ *   The size of the memory to be reserved. If it
+ *   is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ *   The socket identifier in the case of
+ *   NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ *   constraint for the reserved zone.
+ * @param flags
+ *   The flags parameter is used to request memzones to be
+ *   taken from specifically sized hugepages.
+ *   - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ *   - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ *   - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ *   - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ *   - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ *   - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ *   - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ *   - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ *   - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ *                                  the requested page size is unavailable.
+ *                                  If this flag is not set, the function
+ *                                  will return error on an unavailable size
+ *                                  request.
+ *   - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ *                               This option should be used when allocating
+ *                               memory intended for hardware rings etc.
+ * @param align
+ *   Alignment for resulting memzone. Must be a power of 2.
+ * @param bound
+ *   Boundary for resulting memzone. Must be a power of 2 or zero.
+ *   Zero value implies no boundary condition.
+ * @return
+ *   A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ *   on error.
+ *   On error case, rte_errno will be set appropriately:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ *    - EINVAL - invalid parameters
+ */
+__rte_experimental
+const struct rte_memzone *rte_memzone_lookup_reserve_bounded(const char *name,
+			size_t len, int socket_id,
+			unsigned int flags, unsigned int align,
+			unsigned int bound);
+
 /**
  * Free a memzone.
  *
@@ -281,6 +499,23 @@ const struct rte_memzone *rte_memzone_reserve_bounded(const char *name,
  */
 int rte_memzone_free(const struct rte_memzone *mz);
 
+/**
+ * Lookup and free a memzone if it exists.
+ *
+ * @param name
+ *   The name of the memzone to lookup and free.
+ * @return
+ *   A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ *   on error.
+ *   On error case, rte_errno will be set appropriately:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ *    - EINVAL - invalid parameters
+ */
+__rte_experimental
+int rte_memzone_lookup_free(const char *name);
+
 /**
  * Lookup for a memzone.
  *
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 6088e7f6c3..c394dc22bc 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -374,6 +374,10 @@ EXPERIMENTAL {
 	per_lcore_trace_mem;
 	per_lcore_trace_point_sz;
 	rte_log_can_log;
+	rte_memzone_lookup_reserve;
+	rte_memzone_lookup_reserve_aligned;
+	rte_memzone_lookup_reserve_bounded;
+	rte_memzone_lookup_free;
 	rte_thread_getname;
 	rte_trace_dump;
 	rte_trace_is_enabled;
-- 
2.17.1

             reply	other threads:[~2020-05-05 14:24 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-05 14:24 Anatoly Burakov [this message]
2020-05-05 15:01 ` Bruce Richardson
2020-05-06  9:35   ` Burakov, Anatoly

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b0ef92d3be578c1dbcc6fd61a12dd943decaa15c.1588688636.git.anatoly.burakov@intel.com \
    --to=anatoly.burakov@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).