From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by dpdk.org (Postfix) with ESMTP id A14B6C370 for ; Fri, 5 Feb 2016 19:20:59 +0100 (CET) Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga104.fm.intel.com with ESMTP; 05 Feb 2016 10:20:59 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.22,401,1449561600"; d="scan'208";a="647822653" Received: from dpdk06.sh.intel.com ([10.239.128.225]) by FMSMGA003.fm.intel.com with ESMTP; 05 Feb 2016 10:20:57 -0800 From: Jianfeng Tan To: dev@dpdk.org Date: Fri, 5 Feb 2016 19:20:24 +0800 Message-Id: <1454671228-33284-2-git-send-email-jianfeng.tan@intel.com> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1454671228-33284-1-git-send-email-jianfeng.tan@intel.com> References: <1446748276-132087-1-git-send-email-jianfeng.tan@intel.com> <1454671228-33284-1-git-send-email-jianfeng.tan@intel.com> Cc: nakajima.yoshihiro@lab.ntt.co.jp, mst@redhat.com, ann.zhuangyanying@huawei.com Subject: [dpdk-dev] [PATCH v2 1/5] mem: add --single-file to create single mem-backed file X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 05 Feb 2016 18:21:00 -0000 Originally, there're two cons in using hugepage: a. needs root privilege to touch /proc/self/pagemap, which is a premise to alllocate physically contiguous memseg; b. possibly too many hugepage file are created, especially used with 2M hugepage. For virtual devices, they don't care about physical-contiguity of allocated hugepages at all. Option --single-file is to provide a way to allocate all hugepages into single mem-backed file. Known issue: a. single-file option relys on kernel to allocate numa-affinitive memory. b. possible ABI break, originally, --no-huge uses anonymous memory instead of file-backed way to create memory. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan --- lib/librte_eal/common/eal_common_options.c | 17 ++++++++++ lib/librte_eal/common/eal_internal_cfg.h | 1 + lib/librte_eal/common/eal_options.h | 2 ++ lib/librte_eal/linuxapp/eal/eal.c | 4 +-- lib/librte_eal/linuxapp/eal/eal_memory.c | 50 +++++++++++++++++++++++++----- 5 files changed, 64 insertions(+), 10 deletions(-) diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 29942ea..65bccbd 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -95,6 +95,7 @@ eal_long_options[] = { {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM }, {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM }, {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM }, + {OPT_SINGLE_FILE, 0, NULL, OPT_SINGLE_FILE_NUM }, {0, 0, NULL, 0 } }; @@ -897,6 +898,10 @@ eal_parse_common_option(int opt, const char *optarg, } break; + case OPT_SINGLE_FILE_NUM: + conf->single_file = 1; + break; + /* don't know what to do, leave this to caller */ default: return 1; @@ -956,6 +961,16 @@ eal_check_common_options(struct internal_config *internal_cfg) "be specified together with --"OPT_NO_HUGE"\n"); return -1; } + if (internal_cfg->single_file && internal_cfg->force_sockets == 1) { + RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE" cannot " + "be specified together with --"OPT_SOCKET_MEM"\n"); + return -1; + } + if (internal_cfg->single_file && internal_cfg->hugepage_unlink) { + RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot " + "be specified together with --"OPT_SINGLE_FILE"\n"); + return -1; + } if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) { RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot " @@ -994,6 +1009,8 @@ eal_common_usage(void) " -n CHANNELS Number of memory channels\n" " -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n" " -r RANKS Force number of memory ranks (don't detect)\n" + " --"OPT_SINGLE_FILE" Create just single file for shared memory, and \n" + " do not promise physical contiguity of memseg\n" " -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n" " Prevent EAL from using this PCI device. The argument\n" " format is .\n" diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index 5f1367e..9117ed9 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -61,6 +61,7 @@ struct hugepage_info { */ struct internal_config { volatile size_t memory; /**< amount of asked memory */ + volatile unsigned single_file; /**< mmap all hugepages in single file */ volatile unsigned force_nchannel; /**< force number of channels */ volatile unsigned force_nrank; /**< force number of ranks */ volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */ diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h index a881c62..e5da14a 100644 --- a/lib/librte_eal/common/eal_options.h +++ b/lib/librte_eal/common/eal_options.h @@ -83,6 +83,8 @@ enum { OPT_VMWARE_TSC_MAP_NUM, #define OPT_XEN_DOM0 "xen-dom0" OPT_XEN_DOM0_NUM, +#define OPT_SINGLE_FILE "single-file" + OPT_SINGLE_FILE_NUM, OPT_LONG_MAX_NUM }; diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 635ec36..2bc84f7 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -790,6 +790,8 @@ rte_eal_init(int argc, char **argv) rte_panic("Cannot init IVSHMEM\n"); #endif + eal_thread_init_master(rte_config.master_lcore); + if (rte_eal_memory_init() < 0) rte_panic("Cannot init memory\n"); @@ -823,8 +825,6 @@ rte_eal_init(int argc, char **argv) if (eal_plugins_init() < 0) rte_panic("Cannot init plugins\n"); - eal_thread_init_master(rte_config.master_lcore); - ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n", diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 6008533..68ef49a 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -1102,20 +1102,54 @@ rte_eal_hugepage_init(void) /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; - /* hugetlbfs can be disabled */ - if (internal_config.no_hugetlbfs) { - addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + /* when hugetlbfs is disabled or single-file option is specified */ + if (internal_config.no_hugetlbfs || internal_config.single_file) { + int fd; + uint64_t pagesize; + unsigned socket_id = rte_socket_id(); + char filepath[MAX_HUGEPAGE_PATH]; + + if (internal_config.no_hugetlbfs) { + eal_get_hugefile_path(filepath, sizeof(filepath), + "/dev/shm", 0); + pagesize = RTE_PGSIZE_4K; + } else { + struct hugepage_info *hpi; + + hpi = &internal_config.hugepage_info[0]; + eal_get_hugefile_path(filepath, sizeof(filepath), + hpi->hugedir, 0); + pagesize = hpi->hugepage_sz; + } + fd = open(filepath, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s: open %s failed: %s\n", + __func__, filepath, strerror(errno)); + return -1; + } + + if (ftruncate(fd, internal_config.memory) < 0) { + RTE_LOG(ERR, EAL, "ftuncate %s failed: %s\n", + filepath, strerror(errno)); + return -1; + } + + addr = mmap(NULL, internal_config.memory, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); if (addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, - strerror(errno)); + RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", + __func__, strerror(errno)); return -1; } mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; mcfg->memseg[0].addr = addr; - mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; + mcfg->memseg[0].hugepage_sz = pagesize; mcfg->memseg[0].len = internal_config.memory; - mcfg->memseg[0].socket_id = 0; + mcfg->memseg[0].socket_id = socket_id; + + close(fd); + return 0; } -- 2.1.4