DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH] eal: speed up dpdk init time
@ 2024-05-28  6:12 Fengnan Chang
  2024-05-29 21:51 ` Stephen Hemminger
  0 siblings, 1 reply; 2+ messages in thread
From: Fengnan Chang @ 2024-05-28  6:12 UTC (permalink / raw)
  To: anatoly.burakov, dev, xuemingl; +Cc: Fengnan Chang

If we have a lot of huge pages in system, the memory init will
cost long time in legacy-mem mode. For example, we have 120G memory
in unit of 2MB hugepage, the env init will cost 43s. Almost half
of time spent on find_numasocket, since the address in
/proc/self/numa_maps is orderd, we can sort hugepg_tbl by orig_va
first and then just read numa_maps line by line is enough to find
socket. In my test, spent time reduced to 19s.

Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
---
 lib/eal/linux/eal_memory.c | 115 +++++++++++++++++++++++--------------
 1 file changed, 72 insertions(+), 43 deletions(-)

diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c
index 45879ca743..28cc136ac0 100644
--- a/lib/eal/linux/eal_memory.c
+++ b/lib/eal/linux/eal_memory.c
@@ -414,7 +414,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
 static int
 find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 {
-	int socket_id;
+	int socket_id = -1;
 	char *end, *nodestr;
 	unsigned i, hp_count = 0;
 	uint64_t virt_addr;
@@ -432,54 +432,61 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 	snprintf(hugedir_str, sizeof(hugedir_str),
 			"%s/%s", hpi->hugedir, eal_get_hugefile_prefix());
 
-	/* parse numa map */
-	while (fgets(buf, sizeof(buf), f) != NULL) {
-
-		/* ignore non huge page */
-		if (strstr(buf, " huge ") == NULL &&
+	/* if we find this page in our mappings, set socket_id */
+	for (i = 0; i < hpi->num_pages[0]; i++) {
+		void *va = NULL;
+		/* parse numa map */
+		while (fgets(buf, sizeof(buf), f) != NULL) {
+			if (strstr(buf, " huge ") == NULL &&
 				strstr(buf, hugedir_str) == NULL)
-			continue;
-
-		/* get zone addr */
-		virt_addr = strtoull(buf, &end, 16);
-		if (virt_addr == 0 || end == buf) {
-			EAL_LOG(ERR, "%s(): error in numa_maps parsing", __func__);
-			goto error;
-		}
+				continue;
+			/* get zone addr */
+			virt_addr = strtoull(buf, &end, 16);
+			if (virt_addr == 0 || end == buf) {
+				EAL_LOG(ERR, "error in numa_maps parsing");
+				goto error;
+			}
 
-		/* get node id (socket id) */
-		nodestr = strstr(buf, " N");
-		if (nodestr == NULL) {
-			EAL_LOG(ERR, "%s(): error in numa_maps parsing", __func__);
-			goto error;
-		}
-		nodestr += 2;
-		end = strstr(nodestr, "=");
-		if (end == NULL) {
-			EAL_LOG(ERR, "%s(): error in numa_maps parsing", __func__);
-			goto error;
-		}
-		end[0] = '\0';
-		end = NULL;
+			/* get node id (socket id) */
+			nodestr = strstr(buf, " N");
+			if (nodestr == NULL) {
+				EAL_LOG(ERR, "error in numa_maps parsing");
+				goto error;
+			}
+			nodestr += 2;
+			end = strstr(nodestr, "=");
+			if (end == NULL) {
+				EAL_LOG(ERR, "error in numa_maps parsing");
+				goto error;
+			}
+			end[0] = '\0';
+			end = NULL;
 
-		socket_id = strtoul(nodestr, &end, 0);
-		if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) {
-			EAL_LOG(ERR, "%s(): error in numa_maps parsing", __func__);
-			goto error;
+			socket_id = strtoul(nodestr, &end, 0);
+			if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) {
+				EAL_LOG(ERR, "error in numa_maps parsing");
+				goto error;
+			}
+			va = (void *)(unsigned long)virt_addr;
+			if (hugepg_tbl[i].orig_va != va) {
+				EAL_LOG(DEBUG, "search %p not seq, let's start from begin",
+					hugepg_tbl[i].orig_va);
+				fseek(f, 0, SEEK_SET);
+			} else {
+				break;
+			}
 		}
-
-		/* if we find this page in our mappings, set socket_id */
-		for (i = 0; i < hpi->num_pages[0]; i++) {
-			void *va = (void *)(unsigned long)virt_addr;
-			if (hugepg_tbl[i].orig_va == va) {
-				hugepg_tbl[i].socket_id = socket_id;
-				hp_count++;
+		if (hugepg_tbl[i].orig_va == va) {
+			hugepg_tbl[i].socket_id = socket_id;
+			hp_count++;
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
-				EAL_LOG(DEBUG,
-					"Hugepage %s is on socket %d",
-					hugepg_tbl[i].filepath, socket_id);
+			EAL_LOG(DEBUG,
+				"Hugepage %s is on socket %d",
+				hugepg_tbl[i].filepath, socket_id);
 #endif
-			}
+		} else {
+			EAL_LOG(ERR,
+				"shoudn't happen %p", hugepg_tbl[i].orig_va);
 		}
 	}
 
@@ -494,6 +501,25 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 	return -1;
 }
 
+static int
+cmp_orig_va(const void *a, const void *b)
+{
+#ifndef RTE_ARCH_PPC_64
+	const struct hugepage_file *p1 = a;
+	const struct hugepage_file *p2 = b;
+#else
+	/* PowerPC needs memory sorted in reverse order from x86 */
+	const struct hugepage_file *p1 = b;
+	const struct hugepage_file *p2 = a;
+#endif
+	if (p1->orig_va < p2->orig_va)
+		return -1;
+	else if (p1->orig_va > p2->orig_va)
+		return 1;
+	else
+		return 0;
+}
+
 static int
 cmp_physaddr(const void *a, const void *b)
 {
@@ -1324,6 +1350,9 @@ eal_legacy_hugepage_init(void)
 			}
 		}
 
+		qsort(&tmp_hp[hp_offset], hpi->num_pages[0],
+		      sizeof(struct hugepage_file), cmp_orig_va);
+
 		if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
 			EAL_LOG(DEBUG, "Failed to find NUMA socket for %u MB pages",
 					(unsigned)(hpi->hugepage_sz / 0x100000));
-- 
2.37.1 (Apple Git-137.1)


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] eal: speed up dpdk init time
  2024-05-28  6:12 [PATCH] eal: speed up dpdk init time Fengnan Chang
@ 2024-05-29 21:51 ` Stephen Hemminger
  0 siblings, 0 replies; 2+ messages in thread
From: Stephen Hemminger @ 2024-05-29 21:51 UTC (permalink / raw)
  To: Fengnan Chang; +Cc: anatoly.burakov, dev, xuemingl

On Tue, 28 May 2024 14:12:59 +0800
Fengnan Chang <changfengnan@bytedance.com> wrote:

> If we have a lot of huge pages in system, the memory init will
> cost long time in legacy-mem mode. For example, we have 120G memory
> in unit of 2MB hugepage, the env init will cost 43s. Almost half
> of time spent on find_numasocket, since the address in
> /proc/self/numa_maps is orderd, we can sort hugepg_tbl by orig_va
> first and then just read numa_maps line by line is enough to find
> socket. In my test, spent time reduced to 19s.
> 
> Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
> ---

Good speed up, but you could do much better if the code only read
/proc/self/numa_maps once and constructed an internal table.
Could use a hash or tree to store the relatively small table.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-05-29 21:51 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-28  6:12 [PATCH] eal: speed up dpdk init time Fengnan Chang
2024-05-29 21:51 ` Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).