DPDK patches and discussions
 help / color / mirror / Atom feed
From: Shahaf Shuler <shahafs@mellanox.com>
To: Anatoly Burakov <anatoly.burakov@intel.com>,
	"dev@dpdk.org" <dev@dpdk.org>
Cc: "arybchenko@solarflare.com" <arybchenko@solarflare.com>
Subject: Re: [dpdk-dev] [PATCH] eal: check if hugedir write lock is already	being held
Date: Mon, 30 Apr 2018 12:48:29 +0000	[thread overview]
Message-ID: <DB7PR05MB442656D10AEEF11EB97558B3C3820@DB7PR05MB4426.eurprd05.prod.outlook.com> (raw)
In-Reply-To: <00fe124e2057fe6c5596fb0a24bdcce9b36c3b90.1525082912.git.anatoly.burakov@intel.com>

Monday, April 30, 2018 1:38 PM, Anatoly Burakov:
> Cc: arybchenko@solarflare.com; anatoly.burakov@intel.com
> Subject: [dpdk-dev] [PATCH] eal: check if hugedir write lock is already being
> held
> 
> At hugepage info initialization, EAL takes out a write lock on hugetlbfs
> directories, and drops it after the memory init is finished. However, in non-
> legacy mode, if "-m" or "--socket-mem"
> switches are passed, this leads to a deadlock because EAL tries to allocate
> pages (and thus take out a write lock on hugedir) while still holding a
> separate hugedir write lock in EAL.
> 
> Fix it by checking if write lock in hugepage info is active, and not trying to lock
> the directory if the hugedir fd is valid.
> 
> Fixes: 1a7dc2252f28 ("mem: revert to using flock and add per-segment
> lockfiles")
> Cc: anatoly.burakov@intel.com
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
>  lib/librte_eal/linuxapp/eal/eal_memalloc.c | 71 ++++++++++++++++++------
> ------
>  1 file changed, 42 insertions(+), 29 deletions(-)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
> b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
> index 00d7886..360d8f7 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
> @@ -666,7 +666,7 @@ alloc_seg_walk(const struct rte_memseg_list *msl,
> void *arg)
>  	struct alloc_walk_param *wa = arg;
>  	struct rte_memseg_list *cur_msl;
>  	size_t page_sz;
> -	int cur_idx, start_idx, j, dir_fd;
> +	int cur_idx, start_idx, j, dir_fd = -1;
>  	unsigned int msl_idx, need, i;
> 
>  	if (msl->page_sz != wa->page_sz)
> @@ -691,19 +691,24 @@ alloc_seg_walk(const struct rte_memseg_list *msl,
> void *arg)
>  	 * because file creation and locking operations are not atomic,
>  	 * and we might be the first or the last ones to use a particular page,
>  	 * so we need to ensure atomicity of every operation.
> +	 *
> +	 * during init, we already hold a write lock, so don't try to take out
> +	 * another one.
>  	 */
> -	dir_fd = open(wa->hi->hugedir, O_RDONLY);
> -	if (dir_fd < 0) {
> -		RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
> __func__,
> -			wa->hi->hugedir, strerror(errno));
> -		return -1;
> -	}
> -	/* blocking writelock */
> -	if (flock(dir_fd, LOCK_EX)) {
> -		RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n", __func__,
> -			wa->hi->hugedir, strerror(errno));
> -		close(dir_fd);
> -		return -1;
> +	if (wa->hi->lock_descriptor == -1) {
> +		dir_fd = open(wa->hi->hugedir, O_RDONLY);
> +		if (dir_fd < 0) {
> +			RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
> +				__func__, wa->hi->hugedir, strerror(errno));
> +			return -1;
> +		}
> +		/* blocking writelock */
> +		if (flock(dir_fd, LOCK_EX)) {
> +			RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n",
> +				__func__, wa->hi->hugedir, strerror(errno));
> +			close(dir_fd);
> +			return -1;
> +		}
>  	}
> 
>  	for (i = 0; i < need; i++, cur_idx++) { @@ -742,7 +747,8 @@
> alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
>  			if (wa->ms)
>  				memset(wa->ms, 0, sizeof(*wa->ms) * wa-
> >n_segs);
> 
> -			close(dir_fd);
> +			if (dir_fd >= 0)
> +				close(dir_fd);
>  			return -1;
>  		}
>  		if (wa->ms)
> @@ -754,7 +760,8 @@ alloc_seg_walk(const struct rte_memseg_list *msl,
> void *arg)
>  	wa->segs_allocated = i;
>  	if (i > 0)
>  		cur_msl->version++;
> -	close(dir_fd);
> +	if (dir_fd >= 0)
> +		close(dir_fd);
>  	return 1;
>  }
> 
> @@ -769,7 +776,7 @@ free_seg_walk(const struct rte_memseg_list *msl,
> void *arg)
>  	struct rte_memseg_list *found_msl;
>  	struct free_walk_param *wa = arg;
>  	uintptr_t start_addr, end_addr;
> -	int msl_idx, seg_idx, ret, dir_fd;
> +	int msl_idx, seg_idx, ret, dir_fd = -1;
> 
>  	start_addr = (uintptr_t) msl->base_va;
>  	end_addr = start_addr + msl->memseg_arr.len * (size_t)msl-
> >page_sz; @@ -788,19 +795,24 @@ free_seg_walk(const struct
> rte_memseg_list *msl, void *arg)
>  	 * because file creation and locking operations are not atomic,
>  	 * and we might be the first or the last ones to use a particular page,
>  	 * so we need to ensure atomicity of every operation.
> +	 *
> +	 * during init, we already hold a write lock, so don't try to take out
> +	 * another one.
>  	 */
> -	dir_fd = open(wa->hi->hugedir, O_RDONLY);
> -	if (dir_fd < 0) {
> -		RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
> __func__,
> -			wa->hi->hugedir, strerror(errno));
> -		return -1;
> -	}
> -	/* blocking writelock */
> -	if (flock(dir_fd, LOCK_EX)) {
> -		RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n", __func__,
> -			wa->hi->hugedir, strerror(errno));
> -		close(dir_fd);
> -		return -1;
> +	if (wa->hi->lock_descriptor == -1) {
> +		dir_fd = open(wa->hi->hugedir, O_RDONLY);
> +		if (dir_fd < 0) {
> +			RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
> +				__func__, wa->hi->hugedir, strerror(errno));
> +			return -1;
> +		}
> +		/* blocking writelock */
> +		if (flock(dir_fd, LOCK_EX)) {
> +			RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n",
> +				__func__, wa->hi->hugedir, strerror(errno));
> +			close(dir_fd);
> +			return -1;
> +		}
>  	}
> 
>  	found_msl->version++;
> @@ -809,7 +821,8 @@ free_seg_walk(const struct rte_memseg_list *msl,
> void *arg)
> 
>  	ret = free_seg(wa->ms, wa->hi, msl_idx, seg_idx);
> 
> -	close(dir_fd);
> +	if (dir_fd >= 0)
> +		close(dir_fd);
> 
>  	if (ret < 0)
>  		return -1;

Tested-By: Shahaf Shuler <shahafs@mellanox.com> 

> --
> 2.7.4

  reply	other threads:[~2018-04-30 12:48 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-30 10:38 Anatoly Burakov
2018-04-30 12:48 ` Shahaf Shuler [this message]
2018-04-30 13:07 ` Andrew Rybchenko
2018-04-30 13:27   ` Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DB7PR05MB442656D10AEEF11EB97558B3C3820@DB7PR05MB4426.eurprd05.prod.outlook.com \
    --to=shahafs@mellanox.com \
    --cc=anatoly.burakov@intel.com \
    --cc=arybchenko@solarflare.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).