DPDK patches and discussions
 help / color / mirror / Atom feed
From: Tetsuya Mukawa <mukawa@igel.co.jp>
To: dev@dpdk.org
Cc: nakajima.yoshihiro@lab.ntt.co.jp, mst@redhat.com
Subject: [dpdk-dev] [PATCH v1 1/2] EAL: Add new EAL "--contig-mem" option
Date: Wed, 16 Dec 2015 17:37:28 +0900	[thread overview]
Message-ID: <1450255049-2263-2-git-send-email-mukawa@igel.co.jp> (raw)
In-Reply-To: <1450255049-2263-1-git-send-email-mukawa@igel.co.jp>

This option is for allocating physically contiguous memory for EAL.
EAL will provide only one file descriptor for the memory.
So far, this memory will be used by virtio-net PMD on host or container.

DPDK already has had "RTE_EAL_SINGLE_FILE_SEGMENTS" compile option.
It allows us to create one file descriptor for each contiguous memory
regions. But with this option, DPDK may allocate memory that consists of
multiple contiguous memory regions.

The patch adds "--contig-mem" option. It is only valid if
"RTE_EAL_SINGLE_FILE_SEGMENTS" is enabled.
If this option is specified, EAL memory will consist of
only one contiguous memory.

To implement this option, EAL implementation is changed like below.
 - In calc_num_pages_per_socket(), EAL checks whether we can allocate
   memory that has enough size and consists of one contiguous memory.
 - In unmap_unneeded_hugepages(), EAL unmap memory that doesn't have
   enough memory size.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 lib/librte_eal/common/eal_common_options.c |  7 +++
 lib/librte_eal/common/eal_internal_cfg.h   |  1 +
 lib/librte_eal/common/eal_options.h        |  2 +
 lib/librte_eal/linuxapp/eal/eal_memory.c   | 77 ++++++++++++++++++++++++++++--
 4 files changed, 82 insertions(+), 5 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 29942ea..55d537e 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -95,6 +95,7 @@ eal_long_options[] = {
 	{OPT_VFIO_INTR,         1, NULL, OPT_VFIO_INTR_NUM        },
 	{OPT_VMWARE_TSC_MAP,    0, NULL, OPT_VMWARE_TSC_MAP_NUM   },
 	{OPT_XEN_DOM0,          0, NULL, OPT_XEN_DOM0_NUM         },
+	{OPT_CONTIG_MEM,        0, NULL, OPT_CONTIG_MEM_NUM       },
 	{0,                     0, NULL, 0                        }
 };
 
@@ -854,6 +855,12 @@ eal_parse_common_option(int opt, const char *optarg,
 		conf->process_type = eal_parse_proc_type(optarg);
 		break;
 
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+	case OPT_CONTIG_MEM_NUM:
+		conf->contig_mem = 1;
+		break;
+#endif
+
 	case OPT_MASTER_LCORE_NUM:
 		if (eal_parse_master_lcore(optarg) < 0) {
 			RTE_LOG(ERR, EAL, "invalid parameter for --"
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 5f1367e..c02220d 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -66,6 +66,7 @@ struct internal_config {
 	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
 	unsigned hugepage_unlink;         /**< true to unlink backing files */
 	volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
+	volatile unsigned contig_mem;     /**< true to create contiguous eal memory */
 	volatile unsigned no_pci;         /**< true to disable PCI */
 	volatile unsigned no_hpet;        /**< true to disable HPET */
 	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index a881c62..a58e371 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -55,6 +55,8 @@ enum {
 	OPT_HUGE_DIR_NUM,
 #define OPT_HUGE_UNLINK       "huge-unlink"
 	OPT_HUGE_UNLINK_NUM,
+#define OPT_CONTIG_MEM        "contig-mem"
+	OPT_CONTIG_MEM_NUM,
 #define OPT_LCORES            "lcores"
 	OPT_LCORES_NUM,
 #define OPT_LOG_LEVEL         "log-level"
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 846fd31..63e5296 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -851,9 +851,21 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
 				/* find a page that matches the criteria */
 				if ((hp->size == hpi[size].hugepage_sz) &&
 						(hp->socket_id == (int) socket)) {
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+					int nr_pg_left = hpi[size].num_pages[socket] - pages_found;
 
+					/*
+					 * if contig_mem is enabled and the page doesn't have
+					 * requested space, unmap it.
+					 * Also, if we skipped enough pages, unmap the rest.
+					 */
+					if ((pages_found == hpi[size].num_pages[socket]) ||
+							((internal_config.contig_mem) &&
+							(hp->repeated < nr_pg_left))) {
+#else
 					/* if we skipped enough pages, unmap the rest */
 					if (pages_found == hpi[size].num_pages[socket]) {
+#endif
 						uint64_t unmap_len;
 
 #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
@@ -875,9 +887,6 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
 #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
 					/* else, check how much do we need to map */
 					else {
-						int nr_pg_left =
-								hpi[size].num_pages[socket] - pages_found;
-
 						/* if we need enough memory to fit into the segment */
 						if (hp->repeated <= nr_pg_left) {
 							pages_found += hp->repeated;
@@ -949,7 +958,9 @@ static int
 calc_num_pages_per_socket(uint64_t * memory,
 		struct hugepage_info *hp_info,
 		struct hugepage_info *hp_used,
-		unsigned num_hp_info)
+		unsigned num_hp_info,
+		struct hugepage_file *hugepg_tbl __rte_unused,
+		unsigned nr_hugefiles __rte_unused)
 {
 	unsigned socket, j, i = 0;
 	unsigned requested, available;
@@ -960,6 +971,46 @@ calc_num_pages_per_socket(uint64_t * memory,
 	if (num_hp_info == 0)
 		return -1;
 
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+	/*
+	 * If contiguous memory is required, but specific memory amounts
+	 * per socket weren't requested
+	 */
+	if ((internal_config.force_sockets == 0)
+			&& (internal_config.contig_mem == 1)) {
+		size_t max_contig_memory_per_socket[RTE_MAX_NUMA_NODES];
+		size_t total_size, max_contig_memory = 0;
+
+		memset(max_contig_memory_per_socket, 0,
+				sizeof(max_contig_memory_per_socket));
+
+		/* Calculate maximum contiguous memory size */
+		for (i = 0; i < nr_hugefiles; i++) {
+			socket = hugepg_tbl[i].socket_id;
+
+			max_contig_memory_per_socket[socket] =
+				RTE_MAX(max_contig_memory_per_socket[socket],
+				(hugepg_tbl[i].size * hugepg_tbl[i].repeated));
+			max_contig_memory = RTE_MAX(max_contig_memory,
+				max_contig_memory_per_socket[socket]);
+		}
+
+		total_size = internal_config.memory;
+
+		/* If no enough contiguous memory */
+		if (max_contig_memory < total_mem) {
+			/* To display warning, set how much we can find */
+			total_mem -= max_contig_memory;
+			goto out;
+		}
+
+		/* Find suitable contiguous memory */
+		for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
+			if (total_size <= max_contig_memory_per_socket[socket])
+				memory[socket] = total_size;
+		}
+	} else
+#endif
 	/* if specific memory amounts per socket weren't requested */
 	if (internal_config.force_sockets == 0) {
 		int cpu_per_socket[RTE_MAX_NUMA_NODES];
@@ -1009,6 +1060,18 @@ calc_num_pages_per_socket(uint64_t * memory,
 	for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) {
 		/* skips if the memory on specific socket wasn't requested */
 		for (i = 0; i < num_hp_info && memory[socket] != 0; i++){
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+			if (internal_config.contig_mem) {
+				size_t memory_size;
+
+				memory_size = hp_info[i].num_pages[socket] *
+							hp_info[i].hugepage_sz;
+				/* If memory size isn't enough, skip it */
+				if (memory[socket] > memory_size)
+					continue;
+			}
+#endif
+
 			hp_used[i].hugedir = hp_info[i].hugedir;
 			hp_used[i].num_pages[socket] = RTE_MIN(
 					memory[socket] / hp_info[i].hugepage_sz,
@@ -1064,6 +1127,9 @@ calc_num_pages_per_socket(uint64_t * memory,
 		}
 	}
 
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+out:
+#endif
 	/* if we didn't satisfy total memory requirements */
 	if (total_mem > 0) {
 		requested = (unsigned) (internal_config.memory / 0x100000);
@@ -1268,7 +1334,8 @@ rte_eal_hugepage_init(void)
 	/* calculate final number of pages */
 	nr_hugepages = calc_num_pages_per_socket(memory,
 			internal_config.hugepage_info, used_hp,
-			internal_config.num_hugepage_sizes);
+			internal_config.num_hugepage_sizes,
+			tmp_hp, nr_hugefiles);
 
 	/* error if not enough memory available */
 	if (nr_hugepages < 0)
-- 
2.1.4

  reply	other threads:[~2015-12-16  8:37 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-19 10:57 [dpdk-dev] [RFC PATCH 0/2] Virtio-net PMD Extension to work on host Tetsuya Mukawa
2015-11-19 10:57 ` [dpdk-dev] [RFC PATCH 1/2] EAL: Add new EAL "--shm" option Tetsuya Mukawa
2015-12-16  8:37   ` [dpdk-dev] [PATCH v1 0/2] Virtio-net PMD Extension to work on host Tetsuya Mukawa
2015-12-16  8:37     ` Tetsuya Mukawa [this message]
2015-12-16  8:37     ` [dpdk-dev] [PATCH v1 2/2] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
2015-12-28 11:57       ` Pavel Fedin
2016-01-06  3:57         ` Tetsuya Mukawa
2016-01-06  5:56           ` Tan, Jianfeng
2016-01-06  7:27             ` Tetsuya Mukawa
2015-12-24 14:05     ` [dpdk-dev] [PATCH v1 0/2] Virtio-net PMD Extension to work on host Tan, Jianfeng
2015-12-28 11:06       ` Tetsuya Mukawa
2016-01-06  3:57         ` Tetsuya Mukawa
2016-01-06  5:42           ` Tan, Jianfeng
2016-01-06  7:35             ` Tetsuya Mukawa
2016-01-11  5:31               ` Tan, Jianfeng
2015-11-19 10:57 ` [dpdk-dev] [RFC PATCH 2/2] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
2015-11-19 18:16 ` [dpdk-dev] [RFC PATCH 0/2] Virtio-net PMD Extension to work on host Rich Lane
2015-11-20  2:00   ` Xie, Huawei
2015-11-20  2:35     ` Tetsuya Mukawa
2015-11-20  2:53       ` Tetsuya Mukawa
2015-12-28  5:15 ` Qiu, Michael
2015-12-28 11:06   ` Tetsuya Mukawa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1450255049-2263-2-git-send-email-mukawa@igel.co.jp \
    --to=mukawa@igel.co.jp \
    --cc=dev@dpdk.org \
    --cc=mst@redhat.com \
    --cc=nakajima.yoshihiro@lab.ntt.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).