DPDK patches and discussions
 help / color / mirror / Atom feed
From: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
To: Jianfeng Tan <jianfeng.tan@intel.com>
Cc: dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v3] mem: calculate space left in a hugetlbfs
Date: Wed, 18 Nov 2015 10:28:43 +0000	[thread overview]
Message-ID: <564C52DB.9000306@intel.com> (raw)
In-Reply-To: <1447814537-133466-1-git-send-email-jianfeng.tan@intel.com>

On 18/11/2015 02:42, Jianfeng Tan wrote:
> Currently DPDK does not respect the quota of a hugetblfs mount.
> It will fail to init the EAL because it tries to map the number of
> free hugepages in the system rather than using the number specified
> in the quota for that mount.
>
> To solve this issue, we take the quota into consideration when
> calculating the number of hugepages to map.  We use either the number
> specified in the quota, or number of available hugepages, whichever
> is lower.
>
> There are possible race conditions when multiple applications
> allocate hugepages in different hugetlbfs mounts of the same size,
> so the suggested system would have a pool with enough hugepages for
> all hugetlbfs mount quotas.
>
> There is, however, still an open issue with
> CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS. When this option is enabled
> (IVSHMEM target does this by default), having hugetlbfs mounts with
> quota will fail to remap hugepages because it relies on having
> mapped all free hugepages in the system.
>
> Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
> ---
> v3 changes:
>   - commit msg rework
>   - add hpi->quota to record quota of each hugetlbfs
>   - get_hugepage_dir -> get_hugepage_mnt_info to fill hugedir and quota
>   - add info in release note
>
> v2 changes:
>   - reword title
>   - fix compiler error of v1
>
>   doc/guides/rel_notes/release_2_2.rst            |   5 +
>   lib/librte_eal/common/eal_internal_cfg.h        |   1 +
>   lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 145 +++++++++++++++---------
>   3 files changed, 98 insertions(+), 53 deletions(-)
>
> diff --git a/doc/guides/rel_notes/release_2_2.rst b/doc/guides/rel_notes/release_2_2.rst
> index 0781ae6..5b8777a 100644
> --- a/doc/guides/rel_notes/release_2_2.rst
> +++ b/doc/guides/rel_notes/release_2_2.rst
> @@ -102,6 +102,11 @@ New Features
>   
>   * **Added port hotplug support to xenvirt.**
>   
> +* **Added support of taking mount quota into account.**
> +
> +  Take the quota into consideration when calculating the number of hugepages
> +  to map. We use either the number specified in the quota, or number of
> +  available hugepages, whichever is lower.
>   
>   Resolved Issues
>   ---------------
> diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
> index 5f1367e..38ca410 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -50,6 +50,7 @@
>    */
>   struct hugepage_info {
>   	uint64_t hugepage_sz;   /**< size of a huge page */
> +	uint64_t quota;   /**< quota of a hugetlbfs */
>   	const char *hugedir;    /**< dir where hugetlbfs is mounted */
>   	uint32_t num_pages[RTE_MAX_NUMA_NODES];
>   				/**< number of hugepages of that size on each socket */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
> index 18858e2..612d87d 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
> @@ -44,6 +44,8 @@
>   #include <unistd.h>
>   #include <errno.h>
>   #include <sys/queue.h>
> +#include <sys/vfs.h>
> +#include <mntent.h>
>   
>   #include <rte_memory.h>
>   #include <rte_memzone.h>
> @@ -124,71 +126,90 @@ get_default_hp_size(void)
>   	return size;
>   }
>   
> -static const char *
> -get_hugepage_dir(uint64_t hugepage_sz)
> +static void
> +get_hugetlbfs_mnt_info(struct hugepage_info *hpi)
>   {
> -	enum proc_mount_fieldnames {
> -		DEVICE = 0,
> -		MOUNTPT,
> -		FSTYPE,
> -		OPTIONS,
> -		_FIELDNAME_MAX
> -	};
> +	FILE *f;
> +	struct mntent *ent;
> +	char *str_size;
> +	char *str_pagesz;
> +	uint64_t pagesz;
> +
> +	static const char *proc_mounts = "/proc/mounts";
> +	static const char *hugetlbfs_str = "hugetlbfs";
> +	static const char *opt_pagesize = "pagesize";
> +	static const size_t opt_pagesize_len = sizeof("pagesize") - 1;
> +	static const char *opt_size = "size";
> +	static const size_t opt_size_len = sizeof("size") - 1;
>   	static uint64_t default_size = 0;
> -	const char proc_mounts[] = "/proc/mounts";
> -	const char hugetlbfs_str[] = "hugetlbfs";
> -	const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1;
> -	const char pagesize_opt[] = "pagesize=";
> -	const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1;
> -	const char split_tok = ' ';
> -	char *splitstr[_FIELDNAME_MAX];
> -	char buf[BUFSIZ];
> -	char *retval = NULL;
> -
> -	FILE *fd = fopen(proc_mounts, "r");
> -	if (fd == NULL)
> -		rte_panic("Cannot open %s\n", proc_mounts);
>   
>   	if (default_size == 0)
>   		default_size = get_default_hp_size();
>   
> -	while (fgets(buf, sizeof(buf), fd)){
> -		if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX,
> -				split_tok) != _FIELDNAME_MAX) {
> -			RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts);
> -			break; /* return NULL */
> -		}
> +	f = setmntent(proc_mounts, "r");
> +	if (f == NULL)
> +		rte_panic("Cannot open %s\n", proc_mounts);
> +
> +	while (NULL != (ent = getmntent(f))) {
> +
> +		if (strcmp(ent->mnt_type, hugetlbfs_str) != 0)
> +			continue;
>   
>   		/* we have a specified --huge-dir option, only examine that dir */
>   		if (internal_config.hugepage_dir != NULL &&
> -				strcmp(splitstr[MOUNTPT], internal_config.hugepage_dir) != 0)
> +				strcmp(ent->mnt_dir, internal_config.hugepage_dir) != 0)
>   			continue;
>   
> -		if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){
> -			const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt);
> -
> -			/* if no explicit page size, the default page size is compared */
> -			if (pagesz_str == NULL){
> -				if (hugepage_sz == default_size){
> -					retval = strdup(splitstr[MOUNTPT]);
> -					break;
> -				}
> -			}
> -			/* there is an explicit page size, so check it */
> -			else {
> -				uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]);
> -				if (pagesz == hugepage_sz) {
> -					retval = strdup(splitstr[MOUNTPT]);
> -					break;
> -				}
> -			}
> -		} /* end if strncmp hugetlbfs */
> -	} /* end while fgets */
> +		str_pagesz = hasmntopt(ent, opt_pagesize);
> +		/* if no explicit page size, the default page size is compared */
> +		if (!str_pagesz)
> +			pagesz = default_size;
> +		/* there is an explicit page size, so check it */
> +		else
> +			pagesz = rte_str_to_size(&str_pagesz[opt_pagesize_len + 1]);
>   
> -	fclose(fd);
> -	return retval;
> +		if (pagesz == hpi->hugepage_sz)
> +			break;
> +	}
> +
> +	if (ent == NULL) {
> +		hpi->hugedir = NULL;
> +		goto end;
> +	}
> +
> +	hpi->hugedir = strdup(ent->mnt_dir);
> +
> +	str_size = hasmntopt(ent, opt_size);
> +	if (str_size == NULL) {
> +		RTE_LOG(DEBUG, EAL, "size not specified for %s\n",
> +			hpi->hugedir);
> +		hpi->quota = 0;
> +		goto end;
> +	}
> +	hpi->quota = rte_str_to_size(&str_size[opt_size_len + 1]);
> +
> +end:
> +	endmntent(f);
>   }
>   
> +/* Caller to make sure this mount has option size
> + * so that statistics from statfs is valid.
> + */
> +static uint32_t
> +get_hugetlbfs_free_pages(const char *mnt_dir)
> +{
> +	int r;
> +	struct statfs stats;
> +
> +	r = statfs(mnt_dir, &stats);
> +	if (r != 0)
> +		rte_panic("statfs() %s error: %s\n",
> +				mnt_dir, strerror(errno));
> +
> +	return (uint32_t)stats.f_bfree;
> +}
> +
> +
>   /*
>    * Clear the hugepage directory of whatever hugepage files
>    * there are. Checks if the file is locked (i.e.
> @@ -300,7 +321,8 @@ eal_hugepage_info_init(void)
>   		hpi = &internal_config.hugepage_info[num_sizes];
>   		hpi->hugepage_sz =
>   			rte_str_to_size(&dirent->d_name[dirent_start_len]);
> -		hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz);
> +
> +		get_hugetlbfs_mnt_info(hpi);
>   
>   		/* first, check if we have a mountpoint */
>   		if (hpi->hugedir == NULL) {
> @@ -329,9 +351,26 @@ eal_hugepage_info_init(void)
>   		if (clear_hugedir(hpi->hugedir) == -1)
>   			break;
>   
> +		uint32_t num_left, num_statfs;
> +		num_left = get_num_hugepages(dirent->d_name);
> +		if (hpi->quota) {
> +			/* when option size is specified, calculate free
> +			 * pages left in this hugetlbfs using statfs.
> +			 */
> +			num_statfs = get_hugetlbfs_free_pages(hpi->hugedir);
> +			RTE_LOG(DEBUG, EAL,
> +					"%u free hugepages from a quota of 0x%" PRIx64
> +					", of size 0x%" PRIx64 " mounted at %s\n",
> +					num_statfs,
> +					hpi->quota,
> +					hpi->hugepage_sz,
> +					hpi->hugedir);
> +			num_left = RTE_MIN(num_left, num_statfs);
> +		}
> +
>   		/* for now, put all pages into socket 0,
>   		 * later they will be sorted */
> -		hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
> +		hpi->num_pages[0] = num_left;
>   
>   #ifndef RTE_ARCH_64
>   		/* for 32-bit systems, limit number of hugepages to
Acked-by: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>

  reply	other threads:[~2015-11-18 10:28 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-12  0:17 [dpdk-dev] [PATCH] mem: fix how to " Jianfeng Tan
2015-11-12  2:10 ` [dpdk-dev] [PATCH v2] mem: " Jianfeng Tan
2015-11-12 13:14   ` Sergio Gonzalez Monroy
2015-11-12  7:48 ` [dpdk-dev] [PATCH] mem: fix how to " De Lara Guarch, Pablo
2015-11-12  1:57   ` [dpdk-dev] [PATCH v2] mem: " Jianfeng Tan
2015-11-12 17:38 ` [dpdk-dev] [PATCH] mem: fix how to " Stephen Hemminger
2015-11-12 17:49   ` Thomas Monjalon
2015-11-18  2:42 ` [dpdk-dev] [PATCH v3] mem: " Jianfeng Tan
2015-11-18 10:28   ` Sergio Gonzalez Monroy [this message]
2015-12-21  8:34   ` Qiu, Michael

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=564C52DB.9000306@intel.com \
    --to=sergio.gonzalez.monroy@intel.com \
    --cc=dev@dpdk.org \
    --cc=jianfeng.tan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).