From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by dpdk.org (Postfix) with ESMTP id DB1085686 for ; Sun, 10 Jan 2016 19:43:17 +0100 (CET) Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by fmsmga103.fm.intel.com with ESMTP; 10 Jan 2016 10:43:17 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.20,548,1444719600"; d="scan'208";a="26927873" Received: from dpdk06.sh.intel.com ([10.239.128.225]) by fmsmga004.fm.intel.com with ESMTP; 10 Jan 2016 10:43:15 -0800 From: Jianfeng Tan To: dev@dpdk.org Date: Sun, 10 Jan 2016 19:42:59 +0800 Message-Id: <1452426182-86851-2-git-send-email-jianfeng.tan@intel.com> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1452426182-86851-1-git-send-email-jianfeng.tan@intel.com> References: <1446748276-132087-1-git-send-email-jianfeng.tan@intel.com> <1452426182-86851-1-git-send-email-jianfeng.tan@intel.com> Cc: nakajima.yoshihiro@lab.ntt.co.jp, mst@redhat.com, ann.zhuangyanying@huawei.com Subject: [dpdk-dev] [PATCH 1/4] mem: add --single-file to create single mem-backed file X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 10 Jan 2016 18:43:18 -0000 Originally, there're two cons in using hugepage: a. needs root privilege to touch /proc/self/pagemap, which is a premise to alllocate physically contiguous memseg; b. possibly too many hugepage file are created, especially used with 2M hugepage. For virtual devices, they don't care about physical-contiguity of allocated hugepages at all. Option --single-file is to provide a way to allocate all hugepages into single mem-backed file. Known issue: a. single-file option relys on kernel to allocate numa-affinitive memory. b. possible ABI break, originally, --no-huge uses anonymous memory instead of file-backed way to create memory. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan --- lib/librte_eal/common/eal_common_options.c | 17 +++++++++++ lib/librte_eal/common/eal_internal_cfg.h | 1 + lib/librte_eal/common/eal_options.h | 2 ++ lib/librte_eal/linuxapp/eal/eal_memory.c | 45 ++++++++++++++++++++++++++---- 4 files changed, 60 insertions(+), 5 deletions(-) diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 29942ea..65bccbd 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -95,6 +95,7 @@ eal_long_options[] = { {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM }, {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM }, {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM }, + {OPT_SINGLE_FILE, 0, NULL, OPT_SINGLE_FILE_NUM }, {0, 0, NULL, 0 } }; @@ -897,6 +898,10 @@ eal_parse_common_option(int opt, const char *optarg, } break; + case OPT_SINGLE_FILE_NUM: + conf->single_file = 1; + break; + /* don't know what to do, leave this to caller */ default: return 1; @@ -956,6 +961,16 @@ eal_check_common_options(struct internal_config *internal_cfg) "be specified together with --"OPT_NO_HUGE"\n"); return -1; } + if (internal_cfg->single_file && internal_cfg->force_sockets == 1) { + RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE" cannot " + "be specified together with --"OPT_SOCKET_MEM"\n"); + return -1; + } + if (internal_cfg->single_file && internal_cfg->hugepage_unlink) { + RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot " + "be specified together with --"OPT_SINGLE_FILE"\n"); + return -1; + } if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) { RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot " @@ -994,6 +1009,8 @@ eal_common_usage(void) " -n CHANNELS Number of memory channels\n" " -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n" " -r RANKS Force number of memory ranks (don't detect)\n" + " --"OPT_SINGLE_FILE" Create just single file for shared memory, and \n" + " do not promise physical contiguity of memseg\n" " -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n" " Prevent EAL from using this PCI device. The argument\n" " format is .\n" diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index 5f1367e..9117ed9 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -61,6 +61,7 @@ struct hugepage_info { */ struct internal_config { volatile size_t memory; /**< amount of asked memory */ + volatile unsigned single_file; /**< mmap all hugepages in single file */ volatile unsigned force_nchannel; /**< force number of channels */ volatile unsigned force_nrank; /**< force number of ranks */ volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */ diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h index a881c62..e5da14a 100644 --- a/lib/librte_eal/common/eal_options.h +++ b/lib/librte_eal/common/eal_options.h @@ -83,6 +83,8 @@ enum { OPT_VMWARE_TSC_MAP_NUM, #define OPT_XEN_DOM0 "xen-dom0" OPT_XEN_DOM0_NUM, +#define OPT_SINGLE_FILE "single-file" + OPT_SINGLE_FILE_NUM, OPT_LONG_MAX_NUM }; diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 846fd31..2bb1163 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -80,6 +80,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -92,6 +96,9 @@ #include #include +#define _GNU_SOURCE +#include + #include "eal_private.h" #include "eal_internal_cfg.h" #include "eal_filesystem.h" @@ -768,6 +775,7 @@ create_shared_memory(const char *filename, const size_t mem_size) } retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); close(fd); + return retval; } @@ -1110,10 +1118,34 @@ rte_eal_hugepage_init(void) /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; - /* hugetlbfs can be disabled */ - if (internal_config.no_hugetlbfs) { + /* when hugetlbfs is disabled or single-file option is specified */ + if (internal_config.no_hugetlbfs || internal_config.single_file) { + int fd; + uint64_t pagesize; + unsigned socket_id; + char filepath[MAX_HUGEPAGE_PATH]; + + syscall(SYS_getcpu, NULL, &socket_id, NULL); + + if (internal_config.no_hugetlbfs) { + eal_get_hugefile_path(filepath, sizeof(filepath), + "/dev/shm", 0); + pagesize = RTE_PGSIZE_4K; + } else { + struct hugepage_info *hpi = &internal_config.hugepage_info[0]; + eal_get_hugefile_path(filepath, sizeof(filepath), + hpi->hugedir, 0); + pagesize = hpi->hugepage_sz; + } + fd = open(filepath, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s: open %s failed: %s\n", __func__, + filepath, strerror(errno)); + return -1; + } + addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + MAP_SHARED | MAP_POPULATE, fd, 0); if (addr == MAP_FAILED) { RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, strerror(errno)); @@ -1121,9 +1153,12 @@ rte_eal_hugepage_init(void) } mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; mcfg->memseg[0].addr = addr; - mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; + mcfg->memseg[0].hugepage_sz = pagesize; mcfg->memseg[0].len = internal_config.memory; - mcfg->memseg[0].socket_id = 0; + mcfg->memseg[0].socket_id = socket_id; + + close(fd); + return 0; } -- 2.1.4