* [dpdk-dev] [PATCH] mem: fix how to calculate space left in a hugetlbfs @ 2015-11-12 0:17 Jianfeng Tan 2015-11-12 2:10 ` [dpdk-dev] [PATCH v2] mem: " Jianfeng Tan ` (3 more replies) 0 siblings, 4 replies; 10+ messages in thread From: Jianfeng Tan @ 2015-11-12 0:17 UTC (permalink / raw) To: dev This patch enables calculating space left in a hugetlbfs. There are three sources to get the information: 1. from sysfs; 2. from option size specified when mount; 3. use statfs. We should use the minimum one of these three sizes. Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com> --- lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 85 ++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 18858e2..6db8c33 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -44,6 +44,8 @@ #include <unistd.h> #include <errno.h> #include <sys/queue.h> +#include <sys/vfs.h> +#include <mntent.h> #include <rte_memory.h> #include <rte_memzone.h> @@ -189,6 +191,70 @@ get_hugepage_dir(uint64_t hugepage_sz) return retval; } +/* Caller to make sure this mnt_dir exist + */ +static uint64_t +get_hugetlbfs_mount_size(const char *mnt_dir) +{ + char *start, *end, *opt_size; + struct mntent *ent; + uint64_t size; + FILE *f; + int len; + + f = setmntent("/proc/mounts", "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "setmntent() error: %s\n", + strerror(errno)); + return 0; + } + while (NULL != (ent = getmntent(f))) { + if (!strcmp(ent->mnt_dir, mnt_dir)) + break; + } + + start = hasmntopt(ent, "size"); + if (start == NULL) { + RTE_LOG(DEBUG, EAL, "option size not specified for %s\n", + mnt_dir); + size = 0; + goto end; + } + start += strlen("size="); + end = strstr(start, ","); + if (end != NULL) + len = end - start; + else + len = strlen(start); + opt_size = strndup(start, len); + size = rte_str_to_size(opt_size); + free(opt_size); + +end: + endmntent(f); + return size; +} + +/* Caller to make sure this mount has option size + * so that statfs is not zero. + */ +static uint64_t +get_hugetlbfs_free_size(const char *mnt_dir) +{ + int r; + struct statfs stats; + + r = statfs(mnt_dir, &stats); + if (r != 0) { + RTE_LOG(ERR, EAL, "statfs() error: %s\n", + strerror(errno)); + return 0; + } + + return stats.f_bfree * stats.f_bsize; +} + + /* * Clear the hugepage directory of whatever hugepage files * there are. Checks if the file is locked (i.e. @@ -329,9 +395,26 @@ eal_hugepage_info_init(void) if (clear_hugedir(hpi->hugedir) == -1) break; + /* there are three souces of how much space left in a + * hugetlbfs dir. + */ + uint64_t sz_left, sz_sysfs, sz_option, sz_statfs; + + sz_sysfs = get_num_hugepages(dirent->d_name) * + hpi->hugepage_sz; + sz_left = sz_sysfs; + sz_option = get_hugetlbfs_mount_size(hpi->hugedir); + if (sz_option) { + sz_statfs = get_hugetlbfs_free_size(hpi->hugedir); + sz_left = RTE_MIN(sz_sysfs, sz_statfs); + RTE_LOG(INFO, "sz_sysfs: %"PRIu64", sz_option: " + "%"PRIu64", sz_statfs: %"PRIu64"\n", + sz_sysfs, sz_option, sz_statfs); + } + /* for now, put all pages into socket 0, * later they will be sorted */ - hpi->num_pages[0] = get_num_hugepages(dirent->d_name); + hpi->num_pages[0] = sz_left / hpi->hugepage_sz; #ifndef RTE_ARCH_64 /* for 32-bit systems, limit number of hugepages to -- 2.1.4 ^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [PATCH v2] mem: calculate space left in a hugetlbfs 2015-11-12 0:17 [dpdk-dev] [PATCH] mem: fix how to calculate space left in a hugetlbfs Jianfeng Tan @ 2015-11-12 2:10 ` Jianfeng Tan 2015-11-12 13:14 ` Sergio Gonzalez Monroy 2015-11-12 7:48 ` [dpdk-dev] [PATCH] mem: fix how to " De Lara Guarch, Pablo ` (2 subsequent siblings) 3 siblings, 1 reply; 10+ messages in thread From: Jianfeng Tan @ 2015-11-12 2:10 UTC (permalink / raw) To: dev This patch enables calculating space left in a hugetlbfs. There are three sources to get the information: 1. from sysfs; 2. from option size specified when mount; 3. use statfs. We should use the minimum one of these three sizes. Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com> --- Changes in v2: - reword title - fix compiler error of v1 lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 85 ++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 18858e2..8305a58 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -44,6 +44,8 @@ #include <unistd.h> #include <errno.h> #include <sys/queue.h> +#include <sys/vfs.h> +#include <mntent.h> #include <rte_memory.h> #include <rte_memzone.h> @@ -189,6 +191,70 @@ get_hugepage_dir(uint64_t hugepage_sz) return retval; } +/* Caller to make sure this mnt_dir exist + */ +static uint64_t +get_hugetlbfs_mount_size(const char *mnt_dir) +{ + char *start, *end, *opt_size; + struct mntent *ent; + uint64_t size; + FILE *f; + int len; + + f = setmntent("/proc/mounts", "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "setmntent() error: %s\n", + strerror(errno)); + return 0; + } + while (NULL != (ent = getmntent(f))) { + if (!strcmp(ent->mnt_dir, mnt_dir)) + break; + } + + start = hasmntopt(ent, "size"); + if (start == NULL) { + RTE_LOG(DEBUG, EAL, "option size not specified for %s\n", + mnt_dir); + size = 0; + goto end; + } + start += strlen("size="); + end = strstr(start, ","); + if (end != NULL) + len = end - start; + else + len = strlen(start); + opt_size = strndup(start, len); + size = rte_str_to_size(opt_size); + free(opt_size); + +end: + endmntent(f); + return size; +} + +/* Caller to make sure this mount has option size + * so that statfs is not zero. + */ +static uint64_t +get_hugetlbfs_free_size(const char *mnt_dir) +{ + int r; + struct statfs stats; + + r = statfs(mnt_dir, &stats); + if (r != 0) { + RTE_LOG(ERR, EAL, "statfs() error: %s\n", + strerror(errno)); + return 0; + } + + return stats.f_bfree * stats.f_bsize; +} + + /* * Clear the hugepage directory of whatever hugepage files * there are. Checks if the file is locked (i.e. @@ -329,9 +395,26 @@ eal_hugepage_info_init(void) if (clear_hugedir(hpi->hugedir) == -1) break; + /* there are three souces of how much space left in a + * hugetlbfs dir. + */ + uint64_t sz_left, sz_sysfs, sz_option, sz_statfs; + + sz_sysfs = get_num_hugepages(dirent->d_name) * + hpi->hugepage_sz; + sz_left = sz_sysfs; + sz_option = get_hugetlbfs_mount_size(hpi->hugedir); + if (sz_option) { + sz_statfs = get_hugetlbfs_free_size(hpi->hugedir); + sz_left = RTE_MIN(sz_sysfs, sz_statfs); + RTE_LOG(INFO, EAL, "sz_sysfs: %"PRIu64", sz_option: " + "%"PRIu64", sz_statfs: %"PRIu64"\n", + sz_sysfs, sz_option, sz_statfs); + } + /* for now, put all pages into socket 0, * later they will be sorted */ - hpi->num_pages[0] = get_num_hugepages(dirent->d_name); + hpi->num_pages[0] = sz_left / hpi->hugepage_sz; #ifndef RTE_ARCH_64 /* for 32-bit systems, limit number of hugepages to -- 2.1.4 ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH v2] mem: calculate space left in a hugetlbfs 2015-11-12 2:10 ` [dpdk-dev] [PATCH v2] mem: " Jianfeng Tan @ 2015-11-12 13:14 ` Sergio Gonzalez Monroy 0 siblings, 0 replies; 10+ messages in thread From: Sergio Gonzalez Monroy @ 2015-11-12 13:14 UTC (permalink / raw) To: Jianfeng Tan, dev Hi, On 12/11/2015 02:10, Jianfeng Tan wrote: > This patch enables calculating space left in a hugetlbfs. > There are three sources to get the information: 1. from > sysfs; 2. from option size specified when mount; 3. use > statfs. We should use the minimum one of these three sizes. We could improve the message by stating the current issue (when the hugetlbfs mount specifies size= option), then how the patch deals with the problem and also outstanding issues. > Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com> > --- > Changes in v2: > - reword title > - fix compiler error of v1 > > lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 85 ++++++++++++++++++++++++- > 1 file changed, 84 insertions(+), 1 deletion(-) > > diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c > index 18858e2..8305a58 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c > +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c > @@ -44,6 +44,8 @@ > #include <unistd.h> > #include <errno.h> > #include <sys/queue.h> > +#include <sys/vfs.h> > +#include <mntent.h> > > #include <rte_memory.h> > #include <rte_memzone.h> > @@ -189,6 +191,70 @@ get_hugepage_dir(uint64_t hugepage_sz) > return retval; > } > > +/* Caller to make sure this mnt_dir exist > + */ > +static uint64_t > +get_hugetlbfs_mount_size(const char *mnt_dir) > +{ > + char *start, *end, *opt_size; > + struct mntent *ent; > + uint64_t size; > + FILE *f; > + int len; > + > + f = setmntent("/proc/mounts", "r"); > + if (f == NULL) { > + RTE_LOG(ERR, EAL, "setmntent() error: %s\n", > + strerror(errno)); > + return 0; > + } > + while (NULL != (ent = getmntent(f))) { > + if (!strcmp(ent->mnt_dir, mnt_dir)) > + break; > + } > + > + start = hasmntopt(ent, "size"); > + if (start == NULL) { > + RTE_LOG(DEBUG, EAL, "option size not specified for %s\n", > + mnt_dir); > + size = 0; > + goto end; > + } > + start += strlen("size="); > + end = strstr(start, ","); > + if (end != NULL) > + len = end - start; > + else > + len = strlen(start); > + opt_size = strndup(start, len); > + size = rte_str_to_size(opt_size); > + free(opt_size); > + > +end: > + endmntent(f); > + return size; > +} > + The function above is very similar to get_hugepage_dir, ie. open and parse /proc/mounts. I think it would be better to have a more generic function that retrieves all needed info from /proc/mounts. > +/* Caller to make sure this mount has option size > + * so that statfs is not zero. > + */ > +static uint64_t > +get_hugetlbfs_free_size(const char *mnt_dir) > +{ > + int r; > + struct statfs stats; > + > + r = statfs(mnt_dir, &stats); > + if (r != 0) { > + RTE_LOG(ERR, EAL, "statfs() error: %s\n", > + strerror(errno)); > + return 0; > + } > + > + return stats.f_bfree * stats.f_bsize; > +} > + > + > /* > * Clear the hugepage directory of whatever hugepage files > * there are. Checks if the file is locked (i.e. > @@ -329,9 +395,26 @@ eal_hugepage_info_init(void) > if (clear_hugedir(hpi->hugedir) == -1) > break; > > + /* there are three souces of how much space left in a > + * hugetlbfs dir. > + */ > + uint64_t sz_left, sz_sysfs, sz_option, sz_statfs; > + > + sz_sysfs = get_num_hugepages(dirent->d_name) * > + hpi->hugepage_sz; > + sz_left = sz_sysfs; > + sz_option = get_hugetlbfs_mount_size(hpi->hugedir); > + if (sz_option) { > + sz_statfs = get_hugetlbfs_free_size(hpi->hugedir); > + sz_left = RTE_MIN(sz_sysfs, sz_statfs); > + RTE_LOG(INFO, EAL, "sz_sysfs: %"PRIu64", sz_option: " > + "%"PRIu64", sz_statfs: %"PRIu64"\n", > + sz_sysfs, sz_option, sz_statfs); > + } > + > /* for now, put all pages into socket 0, > * later they will be sorted */ > - hpi->num_pages[0] = get_num_hugepages(dirent->d_name); > + hpi->num_pages[0] = sz_left / hpi->hugepage_sz; > > #ifndef RTE_ARCH_64 > /* for 32-bit systems, limit number of hugepages to A couple more things: - Update release-notes and/or relevant doc about improved detection of free hugepages - Update the status of previous/old patches in patchwork Sergio ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH] mem: fix how to calculate space left in a hugetlbfs 2015-11-12 0:17 [dpdk-dev] [PATCH] mem: fix how to calculate space left in a hugetlbfs Jianfeng Tan 2015-11-12 2:10 ` [dpdk-dev] [PATCH v2] mem: " Jianfeng Tan @ 2015-11-12 7:48 ` De Lara Guarch, Pablo 2015-11-12 1:57 ` [dpdk-dev] [PATCH v2] mem: " Jianfeng Tan 2015-11-12 17:38 ` [dpdk-dev] [PATCH] mem: fix how to " Stephen Hemminger 2015-11-18 2:42 ` [dpdk-dev] [PATCH v3] mem: " Jianfeng Tan 3 siblings, 1 reply; 10+ messages in thread From: De Lara Guarch, Pablo @ 2015-11-12 7:48 UTC (permalink / raw) To: Tan, Jianfeng, dev Hi Jianfeng, > -----Original Message----- > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Jianfeng Tan > Sent: Thursday, November 12, 2015 12:18 AM > To: dev@dpdk.org > Subject: [dpdk-dev] [PATCH] mem: fix how to calculate space left in a > hugetlbfs > > This patch enables calculating space left in a hugetlbfs. > There are three sources to get the information: 1. from > sysfs; 2. from option size specified when mount; 3. use > statfs. We should use the minimum one of these three sizes. > > Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com> You should reword the title of the patch, as this does not look like a fix. ^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [PATCH v2] mem: calculate space left in a hugetlbfs 2015-11-12 7:48 ` [dpdk-dev] [PATCH] mem: fix how to " De Lara Guarch, Pablo @ 2015-11-12 1:57 ` Jianfeng Tan 0 siblings, 0 replies; 10+ messages in thread From: Jianfeng Tan @ 2015-11-12 1:57 UTC (permalink / raw) To: dev This patch enables calculating space left in a hugetlbfs. There are three sources to get the information: 1. from sysfs; 2. from option size specified when mount; 3. use statfs. We should use the minimum one of these three sizes. Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com> --- lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 85 ++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 18858e2..8305a58 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -44,6 +44,8 @@ #include <unistd.h> #include <errno.h> #include <sys/queue.h> +#include <sys/vfs.h> +#include <mntent.h> #include <rte_memory.h> #include <rte_memzone.h> @@ -189,6 +191,70 @@ get_hugepage_dir(uint64_t hugepage_sz) return retval; } +/* Caller to make sure this mnt_dir exist + */ +static uint64_t +get_hugetlbfs_mount_size(const char *mnt_dir) +{ + char *start, *end, *opt_size; + struct mntent *ent; + uint64_t size; + FILE *f; + int len; + + f = setmntent("/proc/mounts", "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "setmntent() error: %s\n", + strerror(errno)); + return 0; + } + while (NULL != (ent = getmntent(f))) { + if (!strcmp(ent->mnt_dir, mnt_dir)) + break; + } + + start = hasmntopt(ent, "size"); + if (start == NULL) { + RTE_LOG(DEBUG, EAL, "option size not specified for %s\n", + mnt_dir); + size = 0; + goto end; + } + start += strlen("size="); + end = strstr(start, ","); + if (end != NULL) + len = end - start; + else + len = strlen(start); + opt_size = strndup(start, len); + size = rte_str_to_size(opt_size); + free(opt_size); + +end: + endmntent(f); + return size; +} + +/* Caller to make sure this mount has option size + * so that statfs is not zero. + */ +static uint64_t +get_hugetlbfs_free_size(const char *mnt_dir) +{ + int r; + struct statfs stats; + + r = statfs(mnt_dir, &stats); + if (r != 0) { + RTE_LOG(ERR, EAL, "statfs() error: %s\n", + strerror(errno)); + return 0; + } + + return stats.f_bfree * stats.f_bsize; +} + + /* * Clear the hugepage directory of whatever hugepage files * there are. Checks if the file is locked (i.e. @@ -329,9 +395,26 @@ eal_hugepage_info_init(void) if (clear_hugedir(hpi->hugedir) == -1) break; + /* there are three souces of how much space left in a + * hugetlbfs dir. + */ + uint64_t sz_left, sz_sysfs, sz_option, sz_statfs; + + sz_sysfs = get_num_hugepages(dirent->d_name) * + hpi->hugepage_sz; + sz_left = sz_sysfs; + sz_option = get_hugetlbfs_mount_size(hpi->hugedir); + if (sz_option) { + sz_statfs = get_hugetlbfs_free_size(hpi->hugedir); + sz_left = RTE_MIN(sz_sysfs, sz_statfs); + RTE_LOG(INFO, EAL, "sz_sysfs: %"PRIu64", sz_option: " + "%"PRIu64", sz_statfs: %"PRIu64"\n", + sz_sysfs, sz_option, sz_statfs); + } + /* for now, put all pages into socket 0, * later they will be sorted */ - hpi->num_pages[0] = get_num_hugepages(dirent->d_name); + hpi->num_pages[0] = sz_left / hpi->hugepage_sz; #ifndef RTE_ARCH_64 /* for 32-bit systems, limit number of hugepages to -- 2.1.4 ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH] mem: fix how to calculate space left in a hugetlbfs 2015-11-12 0:17 [dpdk-dev] [PATCH] mem: fix how to calculate space left in a hugetlbfs Jianfeng Tan 2015-11-12 2:10 ` [dpdk-dev] [PATCH v2] mem: " Jianfeng Tan 2015-11-12 7:48 ` [dpdk-dev] [PATCH] mem: fix how to " De Lara Guarch, Pablo @ 2015-11-12 17:38 ` Stephen Hemminger 2015-11-12 17:49 ` Thomas Monjalon 2015-11-18 2:42 ` [dpdk-dev] [PATCH v3] mem: " Jianfeng Tan 3 siblings, 1 reply; 10+ messages in thread From: Stephen Hemminger @ 2015-11-12 17:38 UTC (permalink / raw) To: Jianfeng Tan; +Cc: dev On Thu, 12 Nov 2015 08:17:57 +0800 Jianfeng Tan <jianfeng.tan@intel.com> wrote: > This patch enables calculating space left in a hugetlbfs. > There are three sources to get the information: 1. from > sysfs; 2. from option size specified when mount; 3. use > statfs. We should use the minimum one of these three sizes. > > Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com> Thanks, the hugetlbfs usage up until now has been rather brute force. I wonder if long term it might be better to defer all this stuff to another library like libhugetlbfs. https://github.com/libhugetlbfs/libhugetlbfs Especially wen dealing with other architectures it might provide some nice abstraction. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH] mem: fix how to calculate space left in a hugetlbfs 2015-11-12 17:38 ` [dpdk-dev] [PATCH] mem: fix how to " Stephen Hemminger @ 2015-11-12 17:49 ` Thomas Monjalon 0 siblings, 0 replies; 10+ messages in thread From: Thomas Monjalon @ 2015-11-12 17:49 UTC (permalink / raw) To: Stephen Hemminger; +Cc: dev 2015-11-12 09:38, Stephen Hemminger: > On Thu, 12 Nov 2015 08:17:57 +0800 > Jianfeng Tan <jianfeng.tan@intel.com> wrote: > > > This patch enables calculating space left in a hugetlbfs. > > There are three sources to get the information: 1. from > > sysfs; 2. from option size specified when mount; 3. use > > statfs. We should use the minimum one of these three sizes. > > > > Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com> > > Thanks, the hugetlbfs usage up until now has been rather brute force. > I wonder if long term it might be better to defer all this stuff > to another library like libhugetlbfs. > https://github.com/libhugetlbfs/libhugetlbfs > > Especially wen dealing with other architectures it might provide > some nice abstraction. Maybe, maybe not :) Sergio arleady looked at it: http://dpdk.org/ml/archives/dev/2015-July/022080.html ^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [PATCH v3] mem: calculate space left in a hugetlbfs 2015-11-12 0:17 [dpdk-dev] [PATCH] mem: fix how to calculate space left in a hugetlbfs Jianfeng Tan ` (2 preceding siblings ...) 2015-11-12 17:38 ` [dpdk-dev] [PATCH] mem: fix how to " Stephen Hemminger @ 2015-11-18 2:42 ` Jianfeng Tan 2015-11-18 10:28 ` Sergio Gonzalez Monroy 2015-12-21 8:34 ` Qiu, Michael 3 siblings, 2 replies; 10+ messages in thread From: Jianfeng Tan @ 2015-11-18 2:42 UTC (permalink / raw) To: dev Currently DPDK does not respect the quota of a hugetblfs mount. It will fail to init the EAL because it tries to map the number of free hugepages in the system rather than using the number specified in the quota for that mount. To solve this issue, we take the quota into consideration when calculating the number of hugepages to map. We use either the number specified in the quota, or number of available hugepages, whichever is lower. There are possible race conditions when multiple applications allocate hugepages in different hugetlbfs mounts of the same size, so the suggested system would have a pool with enough hugepages for all hugetlbfs mount quotas. There is, however, still an open issue with CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS. When this option is enabled (IVSHMEM target does this by default), having hugetlbfs mounts with quota will fail to remap hugepages because it relies on having mapped all free hugepages in the system. Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com> --- v3 changes: - commit msg rework - add hpi->quota to record quota of each hugetlbfs - get_hugepage_dir -> get_hugepage_mnt_info to fill hugedir and quota - add info in release note v2 changes: - reword title - fix compiler error of v1 doc/guides/rel_notes/release_2_2.rst | 5 + lib/librte_eal/common/eal_internal_cfg.h | 1 + lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 145 +++++++++++++++--------- 3 files changed, 98 insertions(+), 53 deletions(-) diff --git a/doc/guides/rel_notes/release_2_2.rst b/doc/guides/rel_notes/release_2_2.rst index 0781ae6..5b8777a 100644 --- a/doc/guides/rel_notes/release_2_2.rst +++ b/doc/guides/rel_notes/release_2_2.rst @@ -102,6 +102,11 @@ New Features * **Added port hotplug support to xenvirt.** +* **Added support of taking mount quota into account.** + + Take the quota into consideration when calculating the number of hugepages + to map. We use either the number specified in the quota, or number of + available hugepages, whichever is lower. Resolved Issues --------------- diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index 5f1367e..38ca410 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -50,6 +50,7 @@ */ struct hugepage_info { uint64_t hugepage_sz; /**< size of a huge page */ + uint64_t quota; /**< quota of a hugetlbfs */ const char *hugedir; /**< dir where hugetlbfs is mounted */ uint32_t num_pages[RTE_MAX_NUMA_NODES]; /**< number of hugepages of that size on each socket */ diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 18858e2..612d87d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -44,6 +44,8 @@ #include <unistd.h> #include <errno.h> #include <sys/queue.h> +#include <sys/vfs.h> +#include <mntent.h> #include <rte_memory.h> #include <rte_memzone.h> @@ -124,71 +126,90 @@ get_default_hp_size(void) return size; } -static const char * -get_hugepage_dir(uint64_t hugepage_sz) +static void +get_hugetlbfs_mnt_info(struct hugepage_info *hpi) { - enum proc_mount_fieldnames { - DEVICE = 0, - MOUNTPT, - FSTYPE, - OPTIONS, - _FIELDNAME_MAX - }; + FILE *f; + struct mntent *ent; + char *str_size; + char *str_pagesz; + uint64_t pagesz; + + static const char *proc_mounts = "/proc/mounts"; + static const char *hugetlbfs_str = "hugetlbfs"; + static const char *opt_pagesize = "pagesize"; + static const size_t opt_pagesize_len = sizeof("pagesize") - 1; + static const char *opt_size = "size"; + static const size_t opt_size_len = sizeof("size") - 1; static uint64_t default_size = 0; - const char proc_mounts[] = "/proc/mounts"; - const char hugetlbfs_str[] = "hugetlbfs"; - const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1; - const char pagesize_opt[] = "pagesize="; - const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1; - const char split_tok = ' '; - char *splitstr[_FIELDNAME_MAX]; - char buf[BUFSIZ]; - char *retval = NULL; - - FILE *fd = fopen(proc_mounts, "r"); - if (fd == NULL) - rte_panic("Cannot open %s\n", proc_mounts); if (default_size == 0) default_size = get_default_hp_size(); - while (fgets(buf, sizeof(buf), fd)){ - if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX, - split_tok) != _FIELDNAME_MAX) { - RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts); - break; /* return NULL */ - } + f = setmntent(proc_mounts, "r"); + if (f == NULL) + rte_panic("Cannot open %s\n", proc_mounts); + + while (NULL != (ent = getmntent(f))) { + + if (strcmp(ent->mnt_type, hugetlbfs_str) != 0) + continue; /* we have a specified --huge-dir option, only examine that dir */ if (internal_config.hugepage_dir != NULL && - strcmp(splitstr[MOUNTPT], internal_config.hugepage_dir) != 0) + strcmp(ent->mnt_dir, internal_config.hugepage_dir) != 0) continue; - if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){ - const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt); - - /* if no explicit page size, the default page size is compared */ - if (pagesz_str == NULL){ - if (hugepage_sz == default_size){ - retval = strdup(splitstr[MOUNTPT]); - break; - } - } - /* there is an explicit page size, so check it */ - else { - uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]); - if (pagesz == hugepage_sz) { - retval = strdup(splitstr[MOUNTPT]); - break; - } - } - } /* end if strncmp hugetlbfs */ - } /* end while fgets */ + str_pagesz = hasmntopt(ent, opt_pagesize); + /* if no explicit page size, the default page size is compared */ + if (!str_pagesz) + pagesz = default_size; + /* there is an explicit page size, so check it */ + else + pagesz = rte_str_to_size(&str_pagesz[opt_pagesize_len + 1]); - fclose(fd); - return retval; + if (pagesz == hpi->hugepage_sz) + break; + } + + if (ent == NULL) { + hpi->hugedir = NULL; + goto end; + } + + hpi->hugedir = strdup(ent->mnt_dir); + + str_size = hasmntopt(ent, opt_size); + if (str_size == NULL) { + RTE_LOG(DEBUG, EAL, "size not specified for %s\n", + hpi->hugedir); + hpi->quota = 0; + goto end; + } + hpi->quota = rte_str_to_size(&str_size[opt_size_len + 1]); + +end: + endmntent(f); } +/* Caller to make sure this mount has option size + * so that statistics from statfs is valid. + */ +static uint32_t +get_hugetlbfs_free_pages(const char *mnt_dir) +{ + int r; + struct statfs stats; + + r = statfs(mnt_dir, &stats); + if (r != 0) + rte_panic("statfs() %s error: %s\n", + mnt_dir, strerror(errno)); + + return (uint32_t)stats.f_bfree; +} + + /* * Clear the hugepage directory of whatever hugepage files * there are. Checks if the file is locked (i.e. @@ -300,7 +321,8 @@ eal_hugepage_info_init(void) hpi = &internal_config.hugepage_info[num_sizes]; hpi->hugepage_sz = rte_str_to_size(&dirent->d_name[dirent_start_len]); - hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz); + + get_hugetlbfs_mnt_info(hpi); /* first, check if we have a mountpoint */ if (hpi->hugedir == NULL) { @@ -329,9 +351,26 @@ eal_hugepage_info_init(void) if (clear_hugedir(hpi->hugedir) == -1) break; + uint32_t num_left, num_statfs; + num_left = get_num_hugepages(dirent->d_name); + if (hpi->quota) { + /* when option size is specified, calculate free + * pages left in this hugetlbfs using statfs. + */ + num_statfs = get_hugetlbfs_free_pages(hpi->hugedir); + RTE_LOG(DEBUG, EAL, + "%u free hugepages from a quota of 0x%" PRIx64 + ", of size 0x%" PRIx64 " mounted at %s\n", + num_statfs, + hpi->quota, + hpi->hugepage_sz, + hpi->hugedir); + num_left = RTE_MIN(num_left, num_statfs); + } + /* for now, put all pages into socket 0, * later they will be sorted */ - hpi->num_pages[0] = get_num_hugepages(dirent->d_name); + hpi->num_pages[0] = num_left; #ifndef RTE_ARCH_64 /* for 32-bit systems, limit number of hugepages to -- 2.1.4 ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH v3] mem: calculate space left in a hugetlbfs 2015-11-18 2:42 ` [dpdk-dev] [PATCH v3] mem: " Jianfeng Tan @ 2015-11-18 10:28 ` Sergio Gonzalez Monroy 2015-12-21 8:34 ` Qiu, Michael 1 sibling, 0 replies; 10+ messages in thread From: Sergio Gonzalez Monroy @ 2015-11-18 10:28 UTC (permalink / raw) To: Jianfeng Tan; +Cc: dev On 18/11/2015 02:42, Jianfeng Tan wrote: > Currently DPDK does not respect the quota of a hugetblfs mount. > It will fail to init the EAL because it tries to map the number of > free hugepages in the system rather than using the number specified > in the quota for that mount. > > To solve this issue, we take the quota into consideration when > calculating the number of hugepages to map. We use either the number > specified in the quota, or number of available hugepages, whichever > is lower. > > There are possible race conditions when multiple applications > allocate hugepages in different hugetlbfs mounts of the same size, > so the suggested system would have a pool with enough hugepages for > all hugetlbfs mount quotas. > > There is, however, still an open issue with > CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS. When this option is enabled > (IVSHMEM target does this by default), having hugetlbfs mounts with > quota will fail to remap hugepages because it relies on having > mapped all free hugepages in the system. > > Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com> > --- > v3 changes: > - commit msg rework > - add hpi->quota to record quota of each hugetlbfs > - get_hugepage_dir -> get_hugepage_mnt_info to fill hugedir and quota > - add info in release note > > v2 changes: > - reword title > - fix compiler error of v1 > > doc/guides/rel_notes/release_2_2.rst | 5 + > lib/librte_eal/common/eal_internal_cfg.h | 1 + > lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 145 +++++++++++++++--------- > 3 files changed, 98 insertions(+), 53 deletions(-) > > diff --git a/doc/guides/rel_notes/release_2_2.rst b/doc/guides/rel_notes/release_2_2.rst > index 0781ae6..5b8777a 100644 > --- a/doc/guides/rel_notes/release_2_2.rst > +++ b/doc/guides/rel_notes/release_2_2.rst > @@ -102,6 +102,11 @@ New Features > > * **Added port hotplug support to xenvirt.** > > +* **Added support of taking mount quota into account.** > + > + Take the quota into consideration when calculating the number of hugepages > + to map. We use either the number specified in the quota, or number of > + available hugepages, whichever is lower. > > Resolved Issues > --------------- > diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h > index 5f1367e..38ca410 100644 > --- a/lib/librte_eal/common/eal_internal_cfg.h > +++ b/lib/librte_eal/common/eal_internal_cfg.h > @@ -50,6 +50,7 @@ > */ > struct hugepage_info { > uint64_t hugepage_sz; /**< size of a huge page */ > + uint64_t quota; /**< quota of a hugetlbfs */ > const char *hugedir; /**< dir where hugetlbfs is mounted */ > uint32_t num_pages[RTE_MAX_NUMA_NODES]; > /**< number of hugepages of that size on each socket */ > diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c > index 18858e2..612d87d 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c > +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c > @@ -44,6 +44,8 @@ > #include <unistd.h> > #include <errno.h> > #include <sys/queue.h> > +#include <sys/vfs.h> > +#include <mntent.h> > > #include <rte_memory.h> > #include <rte_memzone.h> > @@ -124,71 +126,90 @@ get_default_hp_size(void) > return size; > } > > -static const char * > -get_hugepage_dir(uint64_t hugepage_sz) > +static void > +get_hugetlbfs_mnt_info(struct hugepage_info *hpi) > { > - enum proc_mount_fieldnames { > - DEVICE = 0, > - MOUNTPT, > - FSTYPE, > - OPTIONS, > - _FIELDNAME_MAX > - }; > + FILE *f; > + struct mntent *ent; > + char *str_size; > + char *str_pagesz; > + uint64_t pagesz; > + > + static const char *proc_mounts = "/proc/mounts"; > + static const char *hugetlbfs_str = "hugetlbfs"; > + static const char *opt_pagesize = "pagesize"; > + static const size_t opt_pagesize_len = sizeof("pagesize") - 1; > + static const char *opt_size = "size"; > + static const size_t opt_size_len = sizeof("size") - 1; > static uint64_t default_size = 0; > - const char proc_mounts[] = "/proc/mounts"; > - const char hugetlbfs_str[] = "hugetlbfs"; > - const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1; > - const char pagesize_opt[] = "pagesize="; > - const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1; > - const char split_tok = ' '; > - char *splitstr[_FIELDNAME_MAX]; > - char buf[BUFSIZ]; > - char *retval = NULL; > - > - FILE *fd = fopen(proc_mounts, "r"); > - if (fd == NULL) > - rte_panic("Cannot open %s\n", proc_mounts); > > if (default_size == 0) > default_size = get_default_hp_size(); > > - while (fgets(buf, sizeof(buf), fd)){ > - if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX, > - split_tok) != _FIELDNAME_MAX) { > - RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts); > - break; /* return NULL */ > - } > + f = setmntent(proc_mounts, "r"); > + if (f == NULL) > + rte_panic("Cannot open %s\n", proc_mounts); > + > + while (NULL != (ent = getmntent(f))) { > + > + if (strcmp(ent->mnt_type, hugetlbfs_str) != 0) > + continue; > > /* we have a specified --huge-dir option, only examine that dir */ > if (internal_config.hugepage_dir != NULL && > - strcmp(splitstr[MOUNTPT], internal_config.hugepage_dir) != 0) > + strcmp(ent->mnt_dir, internal_config.hugepage_dir) != 0) > continue; > > - if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){ > - const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt); > - > - /* if no explicit page size, the default page size is compared */ > - if (pagesz_str == NULL){ > - if (hugepage_sz == default_size){ > - retval = strdup(splitstr[MOUNTPT]); > - break; > - } > - } > - /* there is an explicit page size, so check it */ > - else { > - uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]); > - if (pagesz == hugepage_sz) { > - retval = strdup(splitstr[MOUNTPT]); > - break; > - } > - } > - } /* end if strncmp hugetlbfs */ > - } /* end while fgets */ > + str_pagesz = hasmntopt(ent, opt_pagesize); > + /* if no explicit page size, the default page size is compared */ > + if (!str_pagesz) > + pagesz = default_size; > + /* there is an explicit page size, so check it */ > + else > + pagesz = rte_str_to_size(&str_pagesz[opt_pagesize_len + 1]); > > - fclose(fd); > - return retval; > + if (pagesz == hpi->hugepage_sz) > + break; > + } > + > + if (ent == NULL) { > + hpi->hugedir = NULL; > + goto end; > + } > + > + hpi->hugedir = strdup(ent->mnt_dir); > + > + str_size = hasmntopt(ent, opt_size); > + if (str_size == NULL) { > + RTE_LOG(DEBUG, EAL, "size not specified for %s\n", > + hpi->hugedir); > + hpi->quota = 0; > + goto end; > + } > + hpi->quota = rte_str_to_size(&str_size[opt_size_len + 1]); > + > +end: > + endmntent(f); > } > > +/* Caller to make sure this mount has option size > + * so that statistics from statfs is valid. > + */ > +static uint32_t > +get_hugetlbfs_free_pages(const char *mnt_dir) > +{ > + int r; > + struct statfs stats; > + > + r = statfs(mnt_dir, &stats); > + if (r != 0) > + rte_panic("statfs() %s error: %s\n", > + mnt_dir, strerror(errno)); > + > + return (uint32_t)stats.f_bfree; > +} > + > + > /* > * Clear the hugepage directory of whatever hugepage files > * there are. Checks if the file is locked (i.e. > @@ -300,7 +321,8 @@ eal_hugepage_info_init(void) > hpi = &internal_config.hugepage_info[num_sizes]; > hpi->hugepage_sz = > rte_str_to_size(&dirent->d_name[dirent_start_len]); > - hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz); > + > + get_hugetlbfs_mnt_info(hpi); > > /* first, check if we have a mountpoint */ > if (hpi->hugedir == NULL) { > @@ -329,9 +351,26 @@ eal_hugepage_info_init(void) > if (clear_hugedir(hpi->hugedir) == -1) > break; > > + uint32_t num_left, num_statfs; > + num_left = get_num_hugepages(dirent->d_name); > + if (hpi->quota) { > + /* when option size is specified, calculate free > + * pages left in this hugetlbfs using statfs. > + */ > + num_statfs = get_hugetlbfs_free_pages(hpi->hugedir); > + RTE_LOG(DEBUG, EAL, > + "%u free hugepages from a quota of 0x%" PRIx64 > + ", of size 0x%" PRIx64 " mounted at %s\n", > + num_statfs, > + hpi->quota, > + hpi->hugepage_sz, > + hpi->hugedir); > + num_left = RTE_MIN(num_left, num_statfs); > + } > + > /* for now, put all pages into socket 0, > * later they will be sorted */ > - hpi->num_pages[0] = get_num_hugepages(dirent->d_name); > + hpi->num_pages[0] = num_left; > > #ifndef RTE_ARCH_64 > /* for 32-bit systems, limit number of hugepages to Acked-by: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com> ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH v3] mem: calculate space left in a hugetlbfs 2015-11-18 2:42 ` [dpdk-dev] [PATCH v3] mem: " Jianfeng Tan 2015-11-18 10:28 ` Sergio Gonzalez Monroy @ 2015-12-21 8:34 ` Qiu, Michael 1 sibling, 0 replies; 10+ messages in thread From: Qiu, Michael @ 2015-12-21 8:34 UTC (permalink / raw) To: Tan, Jianfeng, dev On 2015/11/18 17:42, Jianfeng Tan wrote: > Currently DPDK does not respect the quota of a hugetblfs mount. > It will fail to init the EAL because it tries to map the number of > free hugepages in the system rather than using the number specified > in the quota for that mount. > > To solve this issue, we take the quota into consideration when > calculating the number of hugepages to map. We use either the number > specified in the quota, or number of available hugepages, whichever > is lower. > > There are possible race conditions when multiple applications > allocate hugepages in different hugetlbfs mounts of the same size, > so the suggested system would have a pool with enough hugepages for > all hugetlbfs mount quotas. > > There is, however, still an open issue with > CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS. When this option is enabled > (IVSHMEM target does this by default), having hugetlbfs mounts with > quota will fail to remap hugepages because it relies on having > mapped all free hugepages in the system. > > Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com> > Acked-by: Michael Qiu <michael.qiu@intel.com> ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2015-12-21 8:34 UTC | newest] Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2015-11-12 0:17 [dpdk-dev] [PATCH] mem: fix how to calculate space left in a hugetlbfs Jianfeng Tan 2015-11-12 2:10 ` [dpdk-dev] [PATCH v2] mem: " Jianfeng Tan 2015-11-12 13:14 ` Sergio Gonzalez Monroy 2015-11-12 7:48 ` [dpdk-dev] [PATCH] mem: fix how to " De Lara Guarch, Pablo 2015-11-12 1:57 ` [dpdk-dev] [PATCH v2] mem: " Jianfeng Tan 2015-11-12 17:38 ` [dpdk-dev] [PATCH] mem: fix how to " Stephen Hemminger 2015-11-12 17:49 ` Thomas Monjalon 2015-11-18 2:42 ` [dpdk-dev] [PATCH v3] mem: " Jianfeng Tan 2015-11-18 10:28 ` Sergio Gonzalez Monroy 2015-12-21 8:34 ` Qiu, Michael
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).