DPDK patches and discussions
 help / color / mirror / Atom feed
From: "Burakov, Anatoly" <anatoly.burakov@intel.com>
To: Fengnan Chang <changfengnan@bytedance.com>, <dev@dpdk.org>
Cc: Lin Li <lilintjpu@bytedance.com>
Subject: Re: [PATCH v2] eal: fix eal init may failed when too much continuous memsegs under legacy mode
Date: Mon, 22 May 2023 14:28:15 +0100	[thread overview]
Message-ID: <e4bea695-086b-12ca-7123-a66cb873cc1c@intel.com> (raw)
In-Reply-To: <20230522124107.99877-1-changfengnan@bytedance.com>

On 5/22/2023 1:41 PM, Fengnan Chang wrote:
> Under legacy mode, if the number of continuous memsegs greater
> than RTE_MAX_MEMSEG_PER_LIST, eal init will failed even though
> another memseg list is empty, because only one memseg list used
> to check in remap_needed_hugepages.
> Fix this by add a argment indicate how many pages mapped in
> remap_segment, remap_segment try to mapped most pages it can, if
> exceed it's capbility, remap_needed_hugepages will continue to
> map other left pages.
> 
> For example:
> hugepage configure:
> cat /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages
> 10241
> 10239
> 
> startup log:
> EAL: Detected memory type: socket_id:0 hugepage_sz:2097152
> EAL: Detected memory type: socket_id:1 hugepage_sz:2097152
> EAL: Creating 4 segment lists: n_segs:8192 socket_id:0 hugepage_sz:2097152
> EAL: Creating 4 segment lists: n_segs:8192 socket_id:1 hugepage_sz:2097152
> EAL: Requesting 13370 pages of size 2MB from socket 0
> EAL: Requesting 7110 pages of size 2MB from socket 1
> EAL: Attempting to map 14220M on socket 1
> EAL: Allocated 14220M on socket 1
> EAL: Attempting to map 26740M on socket 0
> EAL: Could not find space for memseg. Please increase 32768 and/or 65536 in
> configuration.
> EAL: Couldn't remap hugepage files into memseg lists
> EAL: FATAL: Cannot init memory
> EAL: Cannot init memory
> 
> Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
> Signed-off-by: Lin Li <lilintjpu@bytedance.com>
> ---
>   lib/eal/linux/eal_memory.c | 33 +++++++++++++++++++++------------
>   1 file changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c
> index 60fc8cc6ca..b2e6453fbe 100644
> --- a/lib/eal/linux/eal_memory.c
> +++ b/lib/eal/linux/eal_memory.c
> @@ -657,12 +657,12 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
>   }
>   
>   static int
> -remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
> +remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end, int *mapped_seg_len)
>   {
>   	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
>   	struct rte_memseg_list *msl;
>   	struct rte_fbarray *arr;
> -	int cur_page, seg_len;
> +	int cur_page, seg_len, cur_len;
>   	unsigned int msl_idx;
>   	int ms_idx;
>   	uint64_t page_sz;
> @@ -692,8 +692,9 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
>   
>   		/* leave space for a hole if array is not empty */
>   		empty = arr->count == 0;
> +		cur_len = RTE_MIN((unsigned int)seg_len, arr->len - arr->count - (empty ? 0 : 1));
>   		ms_idx = rte_fbarray_find_next_n_free(arr, 0,
> -				seg_len + (empty ? 0 : 1));
> +				cur_len + (empty ? 0 : 1));
>   
>   		/* memseg list is full? */
>   		if (ms_idx < 0)
> @@ -704,12 +705,12 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
>   		 */
>   		if (!empty)
>   			ms_idx++;
> +		*mapped_seg_len = cur_len;
>   		break;
>   	}
>   	if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
> -		RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
> -				RTE_STR(RTE_MAX_MEMSEG_PER_TYPE),
> -				RTE_STR(RTE_MAX_MEM_MB_PER_TYPE));
> +		RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase RTE_MAX_MEMSEG_PER_LIST "
> +				"RTE_MAX_MEMSEG_PER_TYPE and/or RTE_MAX_MEM_MB_PER_TYPE in configuration.\n");
>   		return -1;
>   	}
>   
> @@ -725,6 +726,8 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
>   		void *addr;
>   		int fd;
>   
> +		if (cur_page - seg_start == *mapped_seg_len)
> +			break;
>   		fd = open(hfile->filepath, O_RDWR);
>   		if (fd < 0) {
>   			RTE_LOG(ERR, EAL, "Could not open '%s': %s\n",
> @@ -986,7 +989,7 @@ prealloc_segments(struct hugepage_file *hugepages, int n_pages)
>   static int
>   remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
>   {
> -	int cur_page, seg_start_page, new_memseg, ret;
> +	int cur_page, seg_start_page, new_memseg, ret, mapped_seg_len = 0;
>   
>   	seg_start_page = 0;
>   	for (cur_page = 0; cur_page < n_pages; cur_page++) {
> @@ -1023,21 +1026,27 @@ remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
>   			/* if this isn't the first time, remap segment */
>   			if (cur_page != 0) {
>   				ret = remap_segment(hugepages, seg_start_page,
> -						cur_page);
> +						cur_page, &mapped_seg_len);
>   				if (ret != 0)
>   					return -1;
>   			}
> +			cur_page = seg_start_page + mapped_seg_len;
>   			/* remember where we started */
>   			seg_start_page = cur_page;
> +			mapped_seg_len = 0;
>   		}
>   		/* continuation of previous memseg */
>   	}
>   	/* we were stopped, but we didn't remap the last segment, do it now */
>   	if (cur_page != 0) {
> -		ret = remap_segment(hugepages, seg_start_page,
> -				cur_page);
> -		if (ret != 0)
> -			return -1;
> +		while (seg_start_page < n_pages) {
> +			ret = remap_segment(hugepages, seg_start_page,
> +					cur_page, &mapped_seg_len);
> +			if (ret != 0)
> +				return -1;
> +			seg_start_page = seg_start_page + mapped_seg_len;
> +			mapped_seg_len = 0;
> +		}
>   	}
>   	return 0;
>   }

This works, but I feel like it's overcomplicated - the same logic you're 
trying to use can just be implemented using `find_biggest_free()` + 
`find_contig_free()` and returning `seg_len` from `remap_segment()`?

Something like the following:

---

diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c
index 60fc8cc6ca..08acc787fc 100644
--- a/lib/eal/linux/eal_memory.c
+++ b/lib/eal/linux/eal_memory.c
@@ -681,6 +681,7 @@ remap_segment(struct hugepage_file *hugepages, int 
seg_start, int seg_end)

  	/* find free space in memseg lists */
  	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+		int free_len;
  		bool empty;
  		msl = &mcfg->memsegs[msl_idx];
  		arr = &msl->memseg_arr;
@@ -692,18 +693,27 @@ remap_segment(struct hugepage_file *hugepages, int 
seg_start, int seg_end)

  		/* leave space for a hole if array is not empty */
  		empty = arr->count == 0;
-		ms_idx = rte_fbarray_find_next_n_free(arr, 0,
-				seg_len + (empty ? 0 : 1));

-		/* memseg list is full? */
-		if (ms_idx < 0)
-			continue;
+		/* find start of the biggest contiguous block and its size */
+		ms_idx = rte_fbarray_find_biggest_free(arr, 0);
+		free_len = rte_fbarray_find_contig_free(arr, ms_idx);

  		/* leave some space between memsegs, they are not IOVA
  		 * contiguous, so they shouldn't be VA contiguous either.
  		 */
-		if (!empty)
+		if (!empty) {
  			ms_idx++;
+			free_len--;
+		}
+
+		/* memseg list is full? */
+		if (free_len < 1)
+			continue;
+
+		/* we might not get all of the space we wanted */
+		free_len = RTE_MIN(seg_len, free_len);
+		seg_end = seg_start + free_len;
+		seg_len = seg_end - seg_start;
  		break;
  	}
  	if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
@@ -787,7 +797,7 @@ remap_segment(struct hugepage_file *hugepages, int 
seg_start, int seg_end)
  	}
  	RTE_LOG(DEBUG, EAL, "Allocated %" PRIu64 "M on socket %i\n",
  			(seg_len * page_sz) >> 20, socket_id);
-	return 0;
+	return seg_len;
  }

  static uint64_t
@@ -1022,10 +1032,17 @@ remap_needed_hugepages(struct hugepage_file 
*hugepages, int n_pages)
  		if (new_memseg) {
  			/* if this isn't the first time, remap segment */
  			if (cur_page != 0) {
-				ret = remap_segment(hugepages, seg_start_page,
-						cur_page);
-				if (ret != 0)
-					return -1;
+				int n_remapped = 0;
+				int n_needed = cur_page - seg_start_page;
+
+				while (n_remapped < n_needed) {
+					ret = remap_segment(hugepages,
+							seg_start_page,
+							cur_page);
+					if (ret < 0)
+						return -1;
+					n_remapped += ret;
+				}
  			}
  			/* remember where we started */
  			seg_start_page = cur_page;
@@ -1034,10 +1051,16 @@ remap_needed_hugepages(struct hugepage_file 
*hugepages, int n_pages)
  	}
  	/* we were stopped, but we didn't remap the last segment, do it now */
  	if (cur_page != 0) {
-		ret = remap_segment(hugepages, seg_start_page,
-				cur_page);
-		if (ret != 0)
-			return -1;
+		int n_remapped = 0;
+		int n_needed = cur_page - seg_start_page;
+
+		while (n_remapped < n_needed) {
+			ret = remap_segment(
+					hugepages, seg_start_page, cur_page);
+			if (ret < 0)
+				return -1;
+			n_remapped += ret;
+		}
  	}
  	return 0;
  }

---

This should do the trick? Also, this probably needs to be duplicated for 
Windows and FreeBSD init as well, because AFAIK they follow the legacy 
mem init logic.

-- 
Thanks,
Anatoly


  reply	other threads:[~2023-05-22 13:28 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-22 12:41 Fengnan Chang
2023-05-22 13:28 ` Burakov, Anatoly [this message]
2023-05-25 12:49   ` [External] " Fengnan Chang
2023-05-25 14:26     ` Burakov, Anatoly

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e4bea695-086b-12ca-7123-a66cb873cc1c@intel.com \
    --to=anatoly.burakov@intel.com \
    --cc=changfengnan@bytedance.com \
    --cc=dev@dpdk.org \
    --cc=lilintjpu@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).