DPDK patches and discussions
 help / color / mirror / Atom feed
From: leeopop <dlrmsghd@gmail.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH 1/2] eal/persistent: new library to hold memory region after program exit
Date: Mon,  6 Jul 2015 22:28:16 +0900	[thread overview]
Message-ID: <1436189297-7780-2-git-send-email-dlrmsghd@gmail.com> (raw)
In-Reply-To: <1436189297-7780-1-git-send-email-dlrmsghd@gmail.com>

Some NICs use host memory region as their scratch area.
When DPDK user applications terminate, all the memory regions are lost,
re-initialized (memzone), which causes HW faults.
This libraray maintains shared memory regions that is persistent across
multiple execution and termination of user level applications.
It also manages physically contiguous memory regions.

Signed-off-by: leeopop <dlrmsghd@gmail.com>
---
 drivers/net/mlnx_uio/LICENSE                       |  30 ++++
 lib/Makefile                                       |   1 +
 lib/librte_eal/common/Makefile                     |   3 +
 lib/librte_eal/common/include/rte_pci.h            |   1 +
 lib/librte_eal/common/include/rte_persistent_mem.h |  26 +++
 lib/librte_eal/linuxapp/eal/Makefile               |   6 +
 lib/librte_eal/linuxapp/eal/eal.c                  |   9 +
 lib/librte_eal/linuxapp/eal/eal_persistent_mem.c   | 148 +++++++++++++++
 .../eal/include/exec-env/rte_persistent_mem.h      |  15 ++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map    |   2 +
 lib/librte_persistent/Makefile                     |  55 ++++++
 lib/librte_persistent/rte_persistent.c             | 198 +++++++++++++++++++++
 lib/librte_persistent/rte_persistent.h             |  20 +++
 lib/librte_persistent/rte_persistent_version.map   |  11 ++
 14 files changed, 525 insertions(+)
 create mode 100644 drivers/net/mlnx_uio/LICENSE
 create mode 100644 lib/librte_eal/common/include/rte_persistent_mem.h
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_persistent_mem.c
 create mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_persistent_mem.h
 create mode 100644 lib/librte_persistent/Makefile
 create mode 100644 lib/librte_persistent/rte_persistent.c
 create mode 100644 lib/librte_persistent/rte_persistent.h
 create mode 100644 lib/librte_persistent/rte_persistent_version.map

diff --git a/drivers/net/mlnx_uio/LICENSE b/drivers/net/mlnx_uio/LICENSE
new file mode 100644
index 0000000..7ef5b4b
--- /dev/null
+++ b/drivers/net/mlnx_uio/LICENSE
@@ -0,0 +1,30 @@
+* Source code in kernel/ directory follows GPLv2 license.
+
+
+Copyright (c) 2015, Keunhong Lee
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of bsd nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/lib/Makefile b/lib/Makefile
index 5f480f9..7a491d3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -57,6 +57,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port
 DIRS-$(CONFIG_RTE_LIBRTE_TABLE) += librte_table
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += librte_pipeline
 DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder
+DIRS-$(CONFIG_RTE_LIBRTE_PERSISTENT) += librte_persistent
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 38772d4..ce4b0a7 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -40,6 +40,9 @@ INC += rte_string_fns.h rte_version.h
 INC += rte_eal_memconfig.h rte_malloc_heap.h
 INC += rte_hexdump.h rte_devargs.h rte_dev.h
 INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
+ifeq ($(CONFIG_RTE_EAL_PERSISTENT_MEM),y)
+INC += rte_persistent_mem.h
+endif
 
 ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
 INC += rte_warnings.h
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 7801fa0..a323e74 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -207,6 +207,7 @@ struct rte_pci_driver {
 	pci_devuninit_t *devuninit;             /**< Device uninit function. */
 	const struct rte_pci_id *id_table;	/**< ID table, NULL terminated. */
 	uint32_t drv_flags;                     /**< Flags contolling handling of device. */
+	void* priv; /**< Private data. */
 };
 
 /** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
diff --git a/lib/librte_eal/common/include/rte_persistent_mem.h b/lib/librte_eal/common/include/rte_persistent_mem.h
new file mode 100644
index 0000000..3a8ff23
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_persistent_mem.h
@@ -0,0 +1,26 @@
+/*
+ * rte_persistent_memory.h
+ *
+ *  Created on: Jun 22, 2015
+ *      Author: leeopop
+ */
+
+#ifndef LIBRTE_EAL_COMMON_INCLUDE_RTE_PERSISTENT_MEM_H_
+#define LIBRTE_EAL_COMMON_INCLUDE_RTE_PERSISTENT_MEM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <exec-env/rte_persistent_mem.h>
+
+int rte_persistent_memory_init(void);
+int rte_persistent_memory_num_numa(void);
+
+extern void* persistent_allocated_memory[RTE_MAX_NUMA_NODES][RTE_EAL_PERSISTENT_MEM_COUNT];
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBRTE_EAL_COMMON_INCLUDE_RTE_PERSISTENT_MEM_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index e99d7a3..139b608 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -74,6 +74,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_alarm.c
 ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_ivshmem.c
 endif
+ifeq ($(CONFIG_RTE_EAL_PERSISTENT_MEM),y)
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_persistent_mem.c
+endif
 
 # from common dir
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
@@ -112,6 +115,9 @@ CFLAGS_eal_thread.o += -Wno-return-type
 endif
 
 INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
+ifeq ($(CONFIG_RTE_EAL_PERSISTENT_MEM),y)
+INC += rte_persistent_mem.h
+endif
 
 SYMLINK-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP)-include/exec-env := \
 	$(addprefix include/exec-env/,$(INC))
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 8809f57..b3f05a8 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -77,6 +77,10 @@
 #include <malloc_heap.h>
 #include <rte_eth_ring.h>
 
+#ifdef RTE_EAL_PERSISTENT_MEM
+#include <rte_persistent_mem.h>
+#endif
+
 #include "eal_private.h"
 #include "eal_thread.h"
 #include "eal_internal_cfg.h"
@@ -759,6 +763,11 @@ rte_eal_init(int argc, char **argv)
 	if (fctret < 0)
 		exit(1);
 
+#ifdef RTE_EAL_PERSISTENT_MEM
+	if (rte_persistent_memory_init() < 0)
+		rte_panic("Cannot init persistent memory\n");
+#endif
+
 	if (internal_config.no_hugetlbfs == 0 &&
 			internal_config.process_type != RTE_PROC_SECONDARY &&
 			internal_config.xen_dom0_support == 0 &&
diff --git a/lib/librte_eal/linuxapp/eal/eal_persistent_mem.c b/lib/librte_eal/linuxapp/eal/eal_persistent_mem.c
new file mode 100644
index 0000000..f72c148
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_persistent_mem.c
@@ -0,0 +1,148 @@
+/*
+ * eal_persistent_mem.c
+ *
+ *  Created on: Jun 22, 2015
+ *      Author: leeopop
+ */
+
+
+/*
+ * dma_memory.c
+ *
+ *  Created on: Oct 4, 2014
+ *      Author: leeopop
+ */
+
+
+#include <rte_persistent_mem.h>
+
+#include <sys/io.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <unistd.h>
+#include <numa.h>
+#include <numaif.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_memory.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+#define SHM_SIZE RTE_EAL_PERSISTENT_MEM_UNIT
+#define SHM_COUNT RTE_EAL_PERSISTENT_MEM_COUNT
+
+#define SHM_KEY_BASE (0x861591B)
+#define SHM_KEY ((SHM_KEY_BASE / SHM_COUNT)*SHM_COUNT)
+
+static void* reserve_shared_zone(int subindex, uint32_t len, int socket_id)
+{
+	assert(subindex < SHM_COUNT);
+	uint32_t shared_key = SHM_KEY_BASE + subindex;
+
+	int shmget_flag = IPC_CREAT | SHM_R | SHM_W | IPC_EXCL; // | SHM_LOCKED;
+	int shmid = -1;
+	int err;
+	if((len / RTE_PGSIZE_4K) > 1)
+	{
+		shmget_flag |= SHM_HUGETLB;
+	}
+
+	shmid = shmget(shared_key, len, shmget_flag);
+	void* addr = 0;
+	int clear = 1;
+	if(shmid < 0)
+	{
+		//Reuse existing
+		shmid = shmget(shared_key, len, shmget_flag &= ~IPC_EXCL);
+		assert(shmid >= 0);
+		clear = 0;
+	}
+	addr = shmat(shmid, 0, SHM_RND);
+	assert(addr);
+
+	if(socket_id != SOCKET_ID_ANY)
+	{
+		struct bitmask * mask = numa_bitmask_alloc(RTE_MAX_NUMA_NODES);
+		mask = numa_bitmask_clearall(mask);
+		mask = numa_bitmask_setbit(mask, socket_id);
+		long ret = mbind(addr, len, MPOL_BIND,
+				mask->maskp, RTE_MAX_NUMA_NODES,
+				MPOL_MF_MOVE_ALL | MPOL_MF_STRICT);
+		if(ret < 0)
+		{
+			RTE_LOG(WARNING, EAL, "Cannot mbind memory. Are you running with root?\n");
+		}
+		numa_bitmask_free(mask);
+	}
+	rte_mb();
+
+	if(clear)
+	{
+		memset(addr, 0, len);
+	}
+
+	size_t size;
+	volatile uint8_t reader = 0; //this prevents from being optimized out
+	volatile uint8_t* readp = (uint8_t*)addr;
+	for(size = 0; size < len; size++)
+	{
+		reader += *readp;
+		readp++;
+	}
+
+	rte_mb();
+	err = shmctl(shmid, SHM_LOCK, 0);
+	assert(err == 0);
+	return addr;
+}
+
+void* persistent_allocated_memory[RTE_MAX_NUMA_NODES][SHM_COUNT];
+
+static int numa_count = 0;
+
+int rte_persistent_memory_num_numa(void)
+{
+	return numa_count;
+}
+
+int rte_persistent_memory_init(void)
+{
+	assert(SHM_SIZE == RTE_PGSIZE_2M); //XXX considering only 2MB pages.
+	int num_numa = numa_num_configured_nodes();
+	if(num_numa == 0)
+		num_numa = 1;
+	numa_count = num_numa;
+	int node;
+	int k;
+	for(node = 0; node < RTE_MAX_NUMA_NODES; node++)
+		for(k=0; k<SHM_COUNT; k++)
+			persistent_allocated_memory[node][k] = 0;
+
+	for(node = 0; node < num_numa; node++)
+	{
+		int cur_socket = num_numa > 1 ? node : SOCKET_ID_ANY;
+		for(k=0; k<SHM_COUNT/num_numa; k++)
+		{
+			int zone_index = ((SHM_COUNT/num_numa)*node + k);
+			persistent_allocated_memory[node][k] = reserve_shared_zone(zone_index,
+					SHM_SIZE, cur_socket);
+			if(persistent_allocated_memory[node][k] == 0)
+			{
+				RTE_LOG(ERR, EAL, "Cannot allocate shared zone index %d."
+						"node: %d, local index: %d\n", zone_index, node, k);
+				return -1;
+			}
+		}
+		RTE_LOG(INFO, EAL, "Initialized %lu bytes shared zone on socket %d.\n",
+				((uint64_t)(SHM_COUNT/num_numa)) * ((uint64_t)(SHM_SIZE)),
+				cur_socket);
+	}
+	return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_persistent_mem.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_persistent_mem.h
new file mode 100644
index 0000000..4038cd5
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_persistent_mem.h
@@ -0,0 +1,15 @@
+/*
+ * rte_persistent_mem.h
+ *
+ *  Created on: Jun 22, 2015
+ *      Author: leeopop
+ */
+
+#ifndef LIBRTE_EAL_LINUXAPP_EAL_INCLUDE_EXEC_ENV_RTE_PERSISTENT_MEM_H_
+#define LIBRTE_EAL_LINUXAPP_EAL_INCLUDE_EXEC_ENV_RTE_PERSISTENT_MEM_H_
+
+#ifndef LIBRTE_EAL_COMMON_INCLUDE_RTE_PERSISTENT_MEM_H_
+#error "don't include this file directly, please include generic <rte_persistent_mem.h>"
+#endif
+
+#endif /* LIBRTE_EAL_LINUXAPP_EAL_INCLUDE_EXEC_ENV_RTE_PERSISTENT_MEM_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 7e850a9..4382a01 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -95,6 +95,8 @@ DPDK_2.0 {
 	rte_xen_dom0_memory_attach;
 	rte_xen_dom0_memory_init;
 	test_mp_secondary;
+	rte_persistent_memory_init;
+	rte_persistent_memory_num_numa;
 
 	local: *;
 };
diff --git a/lib/librte_persistent/Makefile b/lib/librte_persistent/Makefile
new file mode 100644
index 0000000..a233d95
--- /dev/null
+++ b/lib/librte_persistent/Makefile
@@ -0,0 +1,55 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_persistent.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := rte_persistent_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_PERSISTENT) := rte_persistent.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_PERSISTENT)-include := rte_persistent.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PERSISTENT) += lib/librte_hash
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PERSISTENT) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PERSISTENT) += lib/librte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_persistent/rte_persistent.c b/lib/librte_persistent/rte_persistent.c
new file mode 100644
index 0000000..a21f9dc
--- /dev/null
+++ b/lib/librte_persistent/rte_persistent.c
@@ -0,0 +1,198 @@
+/*
+ * rte_persistent.c
+ *
+ *  Created on: Jun 23, 2015
+ *      Author: leeopop
+ */
+
+#include <rte_persistent_mem.h>
+#include <rte_persistent.h>
+#include <rte_hash.h>
+#include <rte_memory.h>
+
+#include <memory.h>
+#include <string.h>
+#include <rte_common.h>
+#include <rte_random.h>
+#include <rte_log.h>
+#include <assert.h>
+
+#define ALLOC_UNIT RTE_PGSIZE_4K
+#define MAX_CONT_MEMORY RTE_EAL_PERSISTENT_MEM_UNIT
+#define MAX_ALLOC_COUNT (RTE_EAL_PERSISTENT_MEM_COUNT*(RTE_EAL_PERSISTENT_MEM_UNIT/ALLOC_UNIT))
+#define SEGMENT_COUNT (RTE_EAL_PERSISTENT_MEM_COUNT)
+#define SUBSEGMENT_COUNT (RTE_EAL_PERSISTENT_MEM_UNIT/ALLOC_UNIT)
+
+static struct rte_hash* allocated_segments = 0;
+
+struct alloc_info
+{
+	void* addr; //0 if not allocated
+	phys_addr_t hw_addr;
+	int seg_index;
+	int sub_index;
+	int seg_count;
+};
+
+struct alloc_info info_array[MAX_ALLOC_COUNT];
+char alloc_array[SEGMENT_COUNT][SUBSEGMENT_COUNT+1];
+
+#define ALLOCATED 'a'
+#define FREE 'f'
+
+static int __initialized = 0;
+
+int rte_persistent_init(void)
+{
+	if(!__initialized)
+	{
+		struct rte_hash_parameters hash_param =
+		{
+				.name = "Persistent memory segments",
+				.entries = MAX_ALLOC_COUNT,
+				.bucket_entries = RTE_HASH_BUCKET_ENTRIES_MAX,
+				.key_len = sizeof(void*),
+				.hash_func = 0, //DEFAULT_HASH_FUNC,
+				.hash_func_init_val = 0,
+				.socket_id = SOCKET_ID_ANY,
+		};
+		allocated_segments = rte_hash_create(&hash_param);
+		memset(info_array, 0, sizeof(info_array));
+		memset(alloc_array, (int)FREE, sizeof(alloc_array));
+
+		int k;
+		for(k=0; k<SEGMENT_COUNT; k++)
+			alloc_array[k][SUBSEGMENT_COUNT] = 0;
+		__initialized = 1;
+	}
+	return 0;
+}
+
+static int global_to_local_start(int total_numa, int numa)
+{
+	return ((RTE_EAL_PERSISTENT_MEM_COUNT/total_numa)*numa);
+}
+
+static int global_to_local_range(int total_numa)
+{
+	return ((RTE_EAL_PERSISTENT_MEM_COUNT/total_numa));
+}
+
+void* rte_persistent_alloc(size_t size, int socket)
+{
+	int num_numa = rte_persistent_memory_num_numa();
+	if(socket == SOCKET_ID_ANY)
+	{
+		socket = rte_rand() % num_numa;
+	}
+
+	int l_start = global_to_local_start(num_numa, socket);
+	int l_range = global_to_local_range(num_numa);
+
+	int num_page = (size / ALLOC_UNIT);
+	if(size % ALLOC_UNIT)
+		num_page++;
+
+	char find_str[SUBSEGMENT_COUNT+1];
+	int k;
+	for(k=0; k<num_page; k++)
+	{
+		find_str[k] = FREE;
+	}
+	find_str[k] = 0;
+
+	void* found_buffer = 0;
+	for(k=l_start; k<(l_start + l_range); k++)
+	{
+		char* start = alloc_array[k];
+		char* found = strstr(start, find_str);
+
+		if(found)
+		{
+			int offset = found - start;
+			found_buffer = persistent_allocated_memory[socket][k];
+			assert(found_buffer);
+			found_buffer = RTE_PTR_ADD(found_buffer, ALLOC_UNIT*offset);
+			int j;
+			for(j=0; j<num_page; j++)
+			{
+				found[j] = ALLOCATED;
+			}
+			int index = rte_hash_add_key(allocated_segments, &found_buffer);
+			assert(index >= 0);
+			assert(info_array[index].addr == 0);
+			info_array[index].addr = found_buffer;
+			info_array[index].hw_addr = rte_mem_virt2phy(found_buffer);
+			info_array[index].seg_count = num_page;
+			info_array[index].seg_index = k;
+			info_array[index].sub_index = offset;
+			memset(found_buffer, 0, num_page*ALLOC_UNIT);
+
+
+			void* user = found_buffer;
+			uint64_t hw = rte_mem_virt2phy(user);
+			size_t diff = RTE_MAX((uint64_t)user, hw) - RTE_MIN((uint64_t)user, hw);
+			for(j = 0; j < num_page; j++)
+			{
+				size_t shift = ALLOC_UNIT * j;
+				void* cur_user = ((char*)user + shift);
+				uint64_t cur_hw = rte_mem_virt2phy(cur_user);
+				size_t cur_diff = RTE_MAX((uint64_t)cur_user, cur_hw) - RTE_MIN((uint64_t)cur_user, cur_hw);
+
+				if(cur_diff != diff)
+				{
+					RTE_LOG(ERR, EAL, "Hugepage is not contiguous, curdiff: %lX, expected: %lX\n", cur_diff, diff);
+					assert(0);
+				}
+			}
+			break;
+		}
+	}
+	if(!found_buffer)
+		RTE_LOG(ERR, EAL, "Cannot allocate persistent memory, size: %lu, socket: %d\n", size, socket);
+	return found_buffer;
+}
+
+phys_addr_t rte_persistent_hw_addr(const void* addr)
+{
+	if(addr == 0)
+		return 0;
+	int index = rte_hash_lookup(allocated_segments, (const void*)&addr);
+	assert(index >= 0);
+	assert(info_array[index].addr);
+	assert(info_array[index].addr == addr);
+	return info_array[index].hw_addr;
+}
+
+size_t rte_persistent_mem_length(const void* addr)
+{
+	int index = rte_hash_lookup(allocated_segments, (const void*)&addr);
+	assert(index >= 0);
+	assert(info_array[index].addr);
+	assert(info_array[index].addr == addr);
+	return info_array[index].seg_count * ALLOC_UNIT;
+}
+
+void rte_persistent_free(void* addr)
+{
+	int index = rte_hash_lookup(allocated_segments, (const void*)&addr);
+	assert(index >= 0);
+	assert(info_array[index].addr);
+	assert(info_array[index].addr == addr);
+
+	int seg_index = info_array[index].seg_index;
+	int sub_index = info_array[index].sub_index;
+	int len = info_array[index].seg_count;
+
+	info_array[index].seg_index = 0;
+	info_array[index].sub_index = 0;
+	info_array[index].seg_count = 0;
+	info_array[index].addr = 0;
+	info_array[index].hw_addr = 0;
+
+	rte_hash_del_key(allocated_segments, (const void*)&addr);
+
+	int k;
+	for(k=0; k<len; k++)
+		alloc_array[seg_index][sub_index+k] = FREE;
+}
diff --git a/lib/librte_persistent/rte_persistent.h b/lib/librte_persistent/rte_persistent.h
new file mode 100644
index 0000000..b59bd86
--- /dev/null
+++ b/lib/librte_persistent/rte_persistent.h
@@ -0,0 +1,20 @@
+/*
+ * rte_persistent.h
+ *
+ *  Created on: Jun 23, 2015
+ *      Author: leeopop
+ */
+
+#ifndef LIBRTE_PERSISTENT_RTE_PERSISTENT_H_
+#define LIBRTE_PERSISTENT_RTE_PERSISTENT_H_
+
+#include <rte_common.h>
+#include <rte_memory.h>
+
+int rte_persistent_init(void);
+void* rte_persistent_alloc(size_t size, int socket);
+phys_addr_t rte_persistent_hw_addr(const void* addr);
+void rte_persistent_free(void* addr);
+size_t rte_persistent_mem_length(const void* addr);
+
+#endif /* LIBRTE_PERSISTENT_RTE_PERSISTENT_H_ */
diff --git a/lib/librte_persistent/rte_persistent_version.map b/lib/librte_persistent/rte_persistent_version.map
new file mode 100644
index 0000000..f81d505
--- /dev/null
+++ b/lib/librte_persistent/rte_persistent_version.map
@@ -0,0 +1,11 @@
+DPDK_2.0 {
+	global:
+
+	rte_persistent_init;
+	rte_persistent_alloc;
+	rte_persistent_hw_addr;
+	rte_persistent_free;
+	rte_persistent_mem_length;
+
+	local: *;
+};
-- 
2.1.4

  reply	other threads:[~2015-07-06 13:28 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-06 13:28 [dpdk-dev] [PATCH 0/2] Native uio-based PMD for Mellanox ConnectX-3 devices leeopop
2015-07-06 13:28 ` leeopop [this message]
2015-07-06 14:34   ` [dpdk-dev] [PATCH 1/2] eal/persistent: new library to hold memory region after program exit Avi Kivity
2015-07-06 14:41     ` Thomas Monjalon
2015-07-06 19:19   ` Stephen Hemminger
2015-07-06 13:28 ` [dpdk-dev] [PATCH 2/2] mlnx_uio: new poll mode driver leeopop
2015-07-06 14:17 ` [dpdk-dev] [PATCH 0/2] Native uio-based PMD for Mellanox ConnectX-3 devices Thomas Monjalon
2015-07-06 15:57   ` Keunhong Lee
2015-07-06 16:14     ` Thomas Monjalon
2015-07-06 17:55       ` Keunhong Lee
2015-07-07  6:50         ` Olga Shern
2015-07-07  7:02           ` Pavel Odintsov
2015-07-07  9:18             ` Olga Shern

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1436189297-7780-2-git-send-email-dlrmsghd@gmail.com \
    --to=dlrmsghd@gmail.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).