DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory
@ 2022-05-02 14:10 Don Wallwork
  2022-05-03  6:10 ` Morten Brørup
                   ` (6 more replies)
  0 siblings, 7 replies; 25+ messages in thread
From: Don Wallwork @ 2022-05-02 14:10 UTC (permalink / raw)
  To: dev
  Cc: donw, stephen, mb, anatoly.burakov, dmitry.kozliuk,
	bruce.richardson, Honnappa.Nagarahalli, nd

Add support for using hugepages for worker lcore stack memory.  The
intent is to improve performance by reducing stack memory related TLB
misses and also by using memory local to the NUMA node of each lcore.

EAL option '--huge-worker-stack [stack-size-kbytes]' is added to allow
the feature to be enabled at runtime.  If the size is not specified,
the system pthread stack size will be used.

Signed-off-by: Don Wallwork <donw@xsightlabs.com>
---
 lib/eal/common/eal_common_options.c | 31 ++++++++++++++
 lib/eal/common/eal_internal_cfg.h   |  4 ++
 lib/eal/common/eal_options.h        |  2 +
 lib/eal/linux/eal.c                 | 65 ++++++++++++++++++++++++++++-
 4 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
index f247a42455..be9db9ee37 100644
--- a/lib/eal/common/eal_common_options.c
+++ b/lib/eal/common/eal_common_options.c
@@ -103,6 +103,7 @@ eal_long_options[] = {
 	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
 	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
 	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
+	{OPT_HUGE_WORKER_STACK, 2, NULL, OPT_HUGE_WORKER_STACK_NUM     },
 
 	{0,                     0, NULL, 0                        }
 };
@@ -1618,6 +1619,22 @@ eal_parse_huge_unlink(const char *arg, struct hugepage_file_discipline *out)
 	return -1;
 }
 
+static int
+eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
+{
+	size_t worker_stack_size;
+	if (arg == NULL) {
+		*huge_worker_stack_size = USE_OS_STACK_SIZE;
+		return 0;
+	}
+	worker_stack_size = atoi(arg);
+	if (worker_stack_size == 0)
+		return -1;
+
+	*huge_worker_stack_size = worker_stack_size * 1024;
+	return 0;
+}
+
 int
 eal_parse_common_option(int opt, const char *optarg,
 			struct internal_config *conf)
@@ -1921,6 +1938,15 @@ eal_parse_common_option(int opt, const char *optarg,
 		}
 		break;
 
+	case OPT_HUGE_WORKER_STACK_NUM:
+		if (eal_parse_huge_worker_stack(optarg,
+						&conf->huge_worker_stack_size) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+				OPT_HUGE_WORKER_STACK"\n");
+			return -1;
+		}
+		break;
+
 	/* don't know what to do, leave this to caller */
 	default:
 		return 1;
@@ -2235,5 +2261,10 @@ eal_common_usage(void)
 	       "  --"OPT_NO_PCI"            Disable PCI\n"
 	       "  --"OPT_NO_HPET"           Disable HPET\n"
 	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
+	       "  --"OPT_HUGE_WORKER_STACK"[=size]\n"
+	       "                      Allocate worker thread stacks from\n"
+	       "                      hugepage memory.  Size is in units of\n"
+	       "                      kbytes and defaults to system thread\n"
+	       "                      stack size if not specified.\n"
 	       "\n", RTE_MAX_LCORE);
 }
diff --git a/lib/eal/common/eal_internal_cfg.h b/lib/eal/common/eal_internal_cfg.h
index b71faadd18..6a43c872fc 100644
--- a/lib/eal/common/eal_internal_cfg.h
+++ b/lib/eal/common/eal_internal_cfg.h
@@ -48,6 +48,9 @@ struct hugepage_file_discipline {
 	bool unlink_existing;
 };
 
+/** Worker hugepage stack size should default to OS value. */
+#define USE_OS_STACK_SIZE ((size_t)~0)
+
 /**
  * internal configuration
  */
@@ -102,6 +105,7 @@ struct internal_config {
 	unsigned int no_telemetry; /**< true to disable Telemetry */
 	struct simd_bitwidth max_simd_bitwidth;
 	/**< max simd bitwidth path to use */
+	size_t huge_worker_stack_size; /**< worker thread stack size in bytes */
 };
 
 void eal_reset_internal_config(struct internal_config *internal_cfg);
diff --git a/lib/eal/common/eal_options.h b/lib/eal/common/eal_options.h
index 8e4f7202a2..3cc9cb6412 100644
--- a/lib/eal/common/eal_options.h
+++ b/lib/eal/common/eal_options.h
@@ -87,6 +87,8 @@ enum {
 	OPT_NO_TELEMETRY_NUM,
 #define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
 	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
+#define OPT_HUGE_WORKER_STACK  "huge-worker-stack"
+	OPT_HUGE_WORKER_STACK_NUM,
 
 	OPT_LONG_MAX_NUM
 };
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 1ef263434a..e8c872ef7b 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -1144,8 +1144,69 @@ rte_eal_init(int argc, char **argv)
 		lcore_config[i].state = WAIT;
 
 		/* create a thread for each lcore */
-		ret = pthread_create(&lcore_config[i].thread_id, NULL,
-				     eal_thread_loop, (void *)(uintptr_t)i);
+		if (internal_conf->huge_worker_stack_size == 0) {
+			ret = pthread_create(&lcore_config[i].thread_id, NULL,
+					     eal_thread_loop,
+					     (void *)(uintptr_t)i);
+		} else {
+			/* Allocate NUMA aware stack memory and set
+			 * pthread attributes
+			 */
+			pthread_attr_t attr;
+			size_t stack_size;
+			void *stack_ptr;
+
+			if (pthread_attr_init(&attr) != 0) {
+				rte_eal_init_alert("Cannot init pthread "
+						   "attributes");
+				rte_errno = EFAULT;
+				return -1;
+			}
+			if (internal_conf->huge_worker_stack_size ==
+			    USE_OS_STACK_SIZE) {
+				if (pthread_attr_getstacksize(&attr,
+							      &stack_size) != 0) {
+					rte_errno = EFAULT;
+					return -1;
+				}
+			} else {
+				stack_size =
+					internal_conf->huge_worker_stack_size;
+			}
+			stack_ptr =
+				rte_zmalloc_socket("lcore_stack",
+						   stack_size,
+						   stack_size,
+						   rte_lcore_to_socket_id(i));
+
+			if (stack_ptr == NULL) {
+				rte_eal_init_alert("Cannot allocate stack "
+						   "memory for worker lcore");
+				rte_errno = ENOMEM;
+				return -1;
+			}
+
+			if (pthread_attr_setstack(&attr,
+						  stack_ptr,
+						  stack_size) != 0) {
+				rte_eal_init_alert("Cannot set pthread "
+						   "stack attributes");
+				rte_errno = EFAULT;
+				return -1;
+			}
+
+			/* create a thread for each lcore */
+			ret = pthread_create(&lcore_config[i].thread_id, &attr,
+					     eal_thread_loop,
+					     (void *)(uintptr_t)i);
+
+			if (pthread_attr_destroy(&attr) != 0) {
+				rte_eal_init_alert("Cannot destroy pthread "
+						   "attributes");
+				rte_errno = EFAULT;
+				return -1;
+			}
+		}
 		if (ret != 0)
 			rte_panic("Cannot create thread\n");
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-02 14:10 [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory Don Wallwork
@ 2022-05-03  6:10 ` Morten Brørup
  2022-05-03 13:08 ` Wang, Haiyue
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 25+ messages in thread
From: Morten Brørup @ 2022-05-03  6:10 UTC (permalink / raw)
  To: Don Wallwork, dev
  Cc: stephen, anatoly.burakov, dmitry.kozliuk, bruce.richardson,
	Honnappa.Nagarahalli, nd

> From: Don Wallwork [mailto:donw@xsightlabs.com]
> Sent: Monday, 2 May 2022 16.11
> 
> Add support for using hugepages for worker lcore stack memory.  The
> intent is to improve performance by reducing stack memory related TLB
> misses and also by using memory local to the NUMA node of each lcore.
> 
> EAL option '--huge-worker-stack [stack-size-kbytes]' is added to allow
> the feature to be enabled at runtime.  If the size is not specified,
> the system pthread stack size will be used.
> 
> Signed-off-by: Don Wallwork <donw@xsightlabs.com>
> ---
>  lib/eal/common/eal_common_options.c | 31 ++++++++++++++
>  lib/eal/common/eal_internal_cfg.h   |  4 ++
>  lib/eal/common/eal_options.h        |  2 +
>  lib/eal/linux/eal.c                 | 65 ++++++++++++++++++++++++++++-
>  4 files changed, 100 insertions(+), 2 deletions(-)
> 

Acked-by: Morten Brørup <mb@smartsharesystems.com>


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-02 14:10 [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory Don Wallwork
  2022-05-03  6:10 ` Morten Brørup
@ 2022-05-03 13:08 ` Wang, Haiyue
  2022-05-03 19:46   ` Don Wallwork
  2022-05-13 17:58 ` [PATCH v2] " Don Wallwork
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 25+ messages in thread
From: Wang, Haiyue @ 2022-05-03 13:08 UTC (permalink / raw)
  To: Don Wallwork, dev
  Cc: stephen, mb, Burakov, Anatoly, dmitry.kozliuk, Richardson, Bruce,
	Honnappa.Nagarahalli, nd

> -----Original Message-----
> From: Don Wallwork <donw@xsightlabs.com>
> Sent: Monday, May 2, 2022 22:11
> To: dev@dpdk.org
> Cc: donw@xsightlabs.com; stephen@networkplumber.org; mb@smartsharesystems.com; Burakov, Anatoly
> <anatoly.burakov@intel.com>; dmitry.kozliuk@gmail.com; Richardson, Bruce <bruce.richardson@intel.com>;
> Honnappa.Nagarahalli@arm.com; nd@arm.com
> Subject: [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory
> 
> Add support for using hugepages for worker lcore stack memory.  The
> intent is to improve performance by reducing stack memory related TLB
> misses and also by using memory local to the NUMA node of each lcore.
> 
> EAL option '--huge-worker-stack [stack-size-kbytes]' is added to allow
> the feature to be enabled at runtime.  If the size is not specified,
> the system pthread stack size will be used.
> 
> Signed-off-by: Don Wallwork <donw@xsightlabs.com>
> ---
>  lib/eal/common/eal_common_options.c | 31 ++++++++++++++
>  lib/eal/common/eal_internal_cfg.h   |  4 ++
>  lib/eal/common/eal_options.h        |  2 +
>  lib/eal/linux/eal.c                 | 65 ++++++++++++++++++++++++++++-
>  4 files changed, 100 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
> index f247a42455..be9db9ee37 100644
> --- a/lib/eal/common/eal_common_options.c
> +++ b/lib/eal/common/eal_common_options.c
> @@ -103,6 +103,7 @@ eal_long_options[] = {
>  	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
>  	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
>  	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
> +	{OPT_HUGE_WORKER_STACK, 2, NULL, OPT_HUGE_WORKER_STACK_NUM     },
> 
>  	{0,                     0, NULL, 0                        }
>  };
> @@ -1618,6 +1619,22 @@ eal_parse_huge_unlink(const char *arg, struct hugepage_file_discipline *out)
>  	return -1;
>  }
> 
> +static int
> +eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
> +{
> +	size_t worker_stack_size;
> +	if (arg == NULL) {
> +		*huge_worker_stack_size = USE_OS_STACK_SIZE;
> +		return 0;
> +	}
> +	worker_stack_size = atoi(arg);
> +	if (worker_stack_size == 0)
> +		return -1;

Should we also to check "worker_stack_size *1024  < PTHREAD_STACK_MIN" ?

> +
> +	*huge_worker_stack_size = worker_stack_size * 1024;
> +	return 0;
> +}
> +


> --
> 2.17.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-03 13:08 ` Wang, Haiyue
@ 2022-05-03 19:46   ` Don Wallwork
  2022-05-04  3:08     ` Wang, Haiyue
  0 siblings, 1 reply; 25+ messages in thread
From: Don Wallwork @ 2022-05-03 19:46 UTC (permalink / raw)
  To: Wang, Haiyue, dev
  Cc: stephen, mb, Burakov, Anatoly, dmitry.kozliuk, Richardson, Bruce,
	Honnappa.Nagarahalli, nd

On 5/3/2022 9:08 AM, Wang, Haiyue wrote:
>> -----Original Message-----
>> From: Don Wallwork <donw@xsightlabs.com>
>> Sent: Monday, May 2, 2022 22:11
>> To: dev@dpdk.org
>> Cc: donw@xsightlabs.com; stephen@networkplumber.org; mb@smartsharesystems.com; Burakov, Anatoly
>> <anatoly.burakov@intel.com>; dmitry.kozliuk@gmail.com; Richardson, Bruce <bruce.richardson@intel.com>;
>> Honnappa.Nagarahalli@arm.com; nd@arm.com
>> Subject: [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory
>>
>> Add support for using hugepages for worker lcore stack memory.  The
>> intent is to improve performance by reducing stack memory related TLB
>> misses and also by using memory local to the NUMA node of each lcore.
>>
>> EAL option '--huge-worker-stack [stack-size-kbytes]' is added to allow
>> the feature to be enabled at runtime.  If the size is not specified,
>> the system pthread stack size will be used.
>>
>> Signed-off-by: Don Wallwork <donw@xsightlabs.com>
>> ---
>>   lib/eal/common/eal_common_options.c | 31 ++++++++++++++
>>   lib/eal/common/eal_internal_cfg.h   |  4 ++
>>   lib/eal/common/eal_options.h        |  2 +
>>   lib/eal/linux/eal.c                 | 65 ++++++++++++++++++++++++++++-
>>   4 files changed, 100 insertions(+), 2 deletions(-)
>>
>> diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
>> index f247a42455..be9db9ee37 100644
>> --- a/lib/eal/common/eal_common_options.c
>> +++ b/lib/eal/common/eal_common_options.c
>> @@ -103,6 +103,7 @@ eal_long_options[] = {
>>   	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
>>   	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
>>   	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
>> +	{OPT_HUGE_WORKER_STACK, 2, NULL, OPT_HUGE_WORKER_STACK_NUM     },
>>
>>   	{0,                     0, NULL, 0                        }
>>   };
>> @@ -1618,6 +1619,22 @@ eal_parse_huge_unlink(const char *arg, struct hugepage_file_discipline *out)
>>   	return -1;
>>   }
>>
>> +static int
>> +eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
>> +{
>> +	size_t worker_stack_size;
>> +	if (arg == NULL) {
>> +		*huge_worker_stack_size = USE_OS_STACK_SIZE;
>> +		return 0;
>> +	}
>> +	worker_stack_size = atoi(arg);
>> +	if (worker_stack_size == 0)
>> +		return -1;
> Should we also to check "worker_stack_size *1024  < PTHREAD_STACK_MIN" ?
This may be too restrictive in certain environments.  For example, 
memory constrained platforms may require a smaller worker stack size 
than this limit would allow.
>> +
>> +	*huge_worker_stack_size = worker_stack_size * 1024;
>> +	return 0;
>> +}
>> +
>
>> --
>> 2.17.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-03 19:46   ` Don Wallwork
@ 2022-05-04  3:08     ` Wang, Haiyue
  0 siblings, 0 replies; 25+ messages in thread
From: Wang, Haiyue @ 2022-05-04  3:08 UTC (permalink / raw)
  To: Don Wallwork, dev
  Cc: stephen, mb, Burakov, Anatoly, dmitry.kozliuk, Richardson, Bruce,
	Honnappa.Nagarahalli, nd

> -----Original Message-----
> From: Don Wallwork <donw@xsightlabs.com>
> Sent: Wednesday, May 4, 2022 03:47
> To: Wang, Haiyue <haiyue.wang@intel.com>; dev@dpdk.org
> Cc: stephen@networkplumber.org; mb@smartsharesystems.com; Burakov, Anatoly <anatoly.burakov@intel.com>;
> dmitry.kozliuk@gmail.com; Richardson, Bruce <bruce.richardson@intel.com>; Honnappa.Nagarahalli@arm.com;
> nd@arm.com
> Subject: Re: [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory
> 
> On 5/3/2022 9:08 AM, Wang, Haiyue wrote:
> >> -----Original Message-----
> >> From: Don Wallwork <donw@xsightlabs.com>
> >> Sent: Monday, May 2, 2022 22:11
> >> To: dev@dpdk.org
> >> Cc: donw@xsightlabs.com; stephen@networkplumber.org; mb@smartsharesystems.com; Burakov, Anatoly
> >> <anatoly.burakov@intel.com>; dmitry.kozliuk@gmail.com; Richardson, Bruce
> <bruce.richardson@intel.com>;
> >> Honnappa.Nagarahalli@arm.com; nd@arm.com
> >> Subject: [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory
> >>
> >> Add support for using hugepages for worker lcore stack memory.  The
> >> intent is to improve performance by reducing stack memory related TLB
> >> misses and also by using memory local to the NUMA node of each lcore.
> >>
> >> EAL option '--huge-worker-stack [stack-size-kbytes]' is added to allow
> >> the feature to be enabled at runtime.  If the size is not specified,
> >> the system pthread stack size will be used.
> >>
> >> Signed-off-by: Don Wallwork <donw@xsightlabs.com>
> >> ---
> >>   lib/eal/common/eal_common_options.c | 31 ++++++++++++++
> >>   lib/eal/common/eal_internal_cfg.h   |  4 ++
> >>   lib/eal/common/eal_options.h        |  2 +
> >>   lib/eal/linux/eal.c                 | 65 ++++++++++++++++++++++++++++-
> >>   4 files changed, 100 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
> >> index f247a42455..be9db9ee37 100644
> >> --- a/lib/eal/common/eal_common_options.c
> >> +++ b/lib/eal/common/eal_common_options.c
> >> @@ -103,6 +103,7 @@ eal_long_options[] = {
> >>   	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
> >>   	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
> >>   	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
> >> +	{OPT_HUGE_WORKER_STACK, 2, NULL, OPT_HUGE_WORKER_STACK_NUM     },
> >>
> >>   	{0,                     0, NULL, 0                        }
> >>   };
> >> @@ -1618,6 +1619,22 @@ eal_parse_huge_unlink(const char *arg, struct hugepage_file_discipline *out)
> >>   	return -1;
> >>   }
> >>
> >> +static int
> >> +eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
> >> +{
> >> +	size_t worker_stack_size;
> >> +	if (arg == NULL) {
> >> +		*huge_worker_stack_size = USE_OS_STACK_SIZE;
> >> +		return 0;
> >> +	}
> >> +	worker_stack_size = atoi(arg);
> >> +	if (worker_stack_size == 0)
> >> +		return -1;
> > Should we also to check "worker_stack_size *1024  < PTHREAD_STACK_MIN" ?
> This may be too restrictive in certain environments.  For example,
> memory constrained platforms may require a smaller worker stack size
> than this limit would allow.

Understood, thanks.

> >> +
> >> +	*huge_worker_stack_size = worker_stack_size * 1024;
> >> +	return 0;
> >> +}
> >> +
> >
> >> --
> >> 2.17.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v2] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-02 14:10 [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory Don Wallwork
  2022-05-03  6:10 ` Morten Brørup
  2022-05-03 13:08 ` Wang, Haiyue
@ 2022-05-13 17:58 ` Don Wallwork
  2022-05-13 21:38   ` Stephen Hemminger
                     ` (2 more replies)
  2022-05-16 19:50 ` [PATCH v3] " Don Wallwork
                   ` (3 subsequent siblings)
  6 siblings, 3 replies; 25+ messages in thread
From: Don Wallwork @ 2022-05-13 17:58 UTC (permalink / raw)
  To: dev
  Cc: donw, stephen, mb, anatoly.burakov, dmitry.kozliuk,
	bruce.richardson, Honnappa.Nagarahalli, nd, haiyue.wang

Add support for using hugepages for worker lcore stack memory.  The
intent is to improve performance by reducing stack memory related TLB
misses and also by using memory local to the NUMA node of each lcore.

EAL option '--huge-worker-stack [stack-size-in-KiB]' is added to allow
the feature to be enabled at runtime.  If the size is not specified,
the system pthread stack size will be used.

Signed-off-by: Don Wallwork <donw@xsightlabs.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 doc/guides/linux_gsg/eal_args.include.rst     |  6 ++
 .../prog_guide/env_abstraction_layer.rst      | 21 ++++++
 lib/eal/common/eal_common_options.c           | 28 ++++++++
 lib/eal/common/eal_internal_cfg.h             |  4 ++
 lib/eal/common/eal_options.h                  |  2 +
 lib/eal/linux/eal.c                           | 65 ++++++++++++++++++-
 6 files changed, 124 insertions(+), 2 deletions(-)

diff --git a/doc/guides/linux_gsg/eal_args.include.rst b/doc/guides/linux_gsg/eal_args.include.rst
index 3549a0cf56..d189109a55 100644
--- a/doc/guides/linux_gsg/eal_args.include.rst
+++ b/doc/guides/linux_gsg/eal_args.include.rst
@@ -116,6 +116,12 @@ Memory-related options
 
     Force IOVA mode to a specific value.
 
+*   ``--huge-worker-stack[=size]``
+
+    Allocate worker stack memory from hugepage memory.  Stack size defaults
+    to system pthread stack size unless the optional size (in kbytes) is
+    specified.
+
 Debugging options
 ~~~~~~~~~~~~~~~~~
 
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index 5f0748fba1..e74516f0cf 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -329,6 +329,27 @@ Another option is to use bigger page sizes. Since fewer pages are required to
 cover the same memory area, fewer file descriptors will be stored internally
 by EAL.
 
+.. _huge-worker-stack:
+
+Hugepage Worker Stacks
+^^^^^^^^^^^^^^^^^^^^^^
+
+When the ``--huge-worker-stack[=size]`` EAL option is specified, worker
+thread stacks are allocated from hugepage memory local to the NUMA node
+of the thread. Worker stack size defaults to system pthread stack size
+if the optional size parameter is not specified.
+
+.. warning::
+    Stacks allocated from hugepage memory are not protected by guard
+    pages. Worker stacks must be sufficiently sized to prevent stack
+    overflow when this option is used.
+
+    As with normal thread stacks, hugepage worker thread stack size is
+    fixed and is not dynamically resized. Therefore, an application that
+    is free of stack page faults under a given load should be safe with
+    hugepage worker thread stacks given the same thread stack size and
+    loading conditions.
+
 Support for Externally Allocated Memory
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
index f247a42455..7fc5e10928 100644
--- a/lib/eal/common/eal_common_options.c
+++ b/lib/eal/common/eal_common_options.c
@@ -103,6 +103,7 @@ eal_long_options[] = {
 	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
 	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
 	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
+	{OPT_HUGE_WORKER_STACK, 2, NULL, OPT_HUGE_WORKER_STACK_NUM     },
 
 	{0,                     0, NULL, 0                        }
 };
@@ -1618,6 +1619,22 @@ eal_parse_huge_unlink(const char *arg, struct hugepage_file_discipline *out)
 	return -1;
 }
 
+static int
+eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
+{
+	size_t worker_stack_size;
+	if (arg == NULL) {
+		*huge_worker_stack_size = USE_OS_STACK_SIZE;
+		return 0;
+	}
+	worker_stack_size = atoi(arg);
+	if (worker_stack_size == 0)
+		return -1;
+
+	*huge_worker_stack_size = worker_stack_size * 1024;
+	return 0;
+}
+
 int
 eal_parse_common_option(int opt, const char *optarg,
 			struct internal_config *conf)
@@ -1921,6 +1938,17 @@ eal_parse_common_option(int opt, const char *optarg,
 		}
 		break;
 
+#ifndef RTE_EXEC_ENV_WINDOWS
+	case OPT_HUGE_WORKER_STACK_NUM:
+		if (eal_parse_huge_worker_stack(optarg,
+						&conf->huge_worker_stack_size) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+				OPT_HUGE_WORKER_STACK"\n");
+			return -1;
+		}
+		break;
+#endif /* !RTE_EXEC_ENV_WINDOWS */
+
 	/* don't know what to do, leave this to caller */
 	default:
 		return 1;
diff --git a/lib/eal/common/eal_internal_cfg.h b/lib/eal/common/eal_internal_cfg.h
index b71faadd18..8ac710da02 100644
--- a/lib/eal/common/eal_internal_cfg.h
+++ b/lib/eal/common/eal_internal_cfg.h
@@ -48,6 +48,9 @@ struct hugepage_file_discipline {
 	bool unlink_existing;
 };
 
+/** Worker hugepage stack size should default to OS value. */
+#define USE_OS_STACK_SIZE ((size_t)~0)
+
 /**
  * internal configuration
  */
@@ -102,6 +105,7 @@ struct internal_config {
 	unsigned int no_telemetry; /**< true to disable Telemetry */
 	struct simd_bitwidth max_simd_bitwidth;
 	/**< max simd bitwidth path to use */
+	size_t huge_worker_stack_size; /**< worker thread stack size in KiB */
 };
 
 void eal_reset_internal_config(struct internal_config *internal_cfg);
diff --git a/lib/eal/common/eal_options.h b/lib/eal/common/eal_options.h
index 8e4f7202a2..3cc9cb6412 100644
--- a/lib/eal/common/eal_options.h
+++ b/lib/eal/common/eal_options.h
@@ -87,6 +87,8 @@ enum {
 	OPT_NO_TELEMETRY_NUM,
 #define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
 	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
+#define OPT_HUGE_WORKER_STACK  "huge-worker-stack"
+	OPT_HUGE_WORKER_STACK_NUM,
 
 	OPT_LONG_MAX_NUM
 };
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 1ef263434a..e8c872ef7b 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -1144,8 +1144,69 @@ rte_eal_init(int argc, char **argv)
 		lcore_config[i].state = WAIT;
 
 		/* create a thread for each lcore */
-		ret = pthread_create(&lcore_config[i].thread_id, NULL,
-				     eal_thread_loop, (void *)(uintptr_t)i);
+		if (internal_conf->huge_worker_stack_size == 0) {
+			ret = pthread_create(&lcore_config[i].thread_id, NULL,
+					     eal_thread_loop,
+					     (void *)(uintptr_t)i);
+		} else {
+			/* Allocate NUMA aware stack memory and set
+			 * pthread attributes
+			 */
+			pthread_attr_t attr;
+			size_t stack_size;
+			void *stack_ptr;
+
+			if (pthread_attr_init(&attr) != 0) {
+				rte_eal_init_alert("Cannot init pthread "
+						   "attributes");
+				rte_errno = EFAULT;
+				return -1;
+			}
+			if (internal_conf->huge_worker_stack_size ==
+			    USE_OS_STACK_SIZE) {
+				if (pthread_attr_getstacksize(&attr,
+							      &stack_size) != 0) {
+					rte_errno = EFAULT;
+					return -1;
+				}
+			} else {
+				stack_size =
+					internal_conf->huge_worker_stack_size;
+			}
+			stack_ptr =
+				rte_zmalloc_socket("lcore_stack",
+						   stack_size,
+						   stack_size,
+						   rte_lcore_to_socket_id(i));
+
+			if (stack_ptr == NULL) {
+				rte_eal_init_alert("Cannot allocate stack "
+						   "memory for worker lcore");
+				rte_errno = ENOMEM;
+				return -1;
+			}
+
+			if (pthread_attr_setstack(&attr,
+						  stack_ptr,
+						  stack_size) != 0) {
+				rte_eal_init_alert("Cannot set pthread "
+						   "stack attributes");
+				rte_errno = EFAULT;
+				return -1;
+			}
+
+			/* create a thread for each lcore */
+			ret = pthread_create(&lcore_config[i].thread_id, &attr,
+					     eal_thread_loop,
+					     (void *)(uintptr_t)i);
+
+			if (pthread_attr_destroy(&attr) != 0) {
+				rte_eal_init_alert("Cannot destroy pthread "
+						   "attributes");
+				rte_errno = EFAULT;
+				return -1;
+			}
+		}
 		if (ret != 0)
 			rte_panic("Cannot create thread\n");
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v2] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-13 17:58 ` [PATCH v2] " Don Wallwork
@ 2022-05-13 21:38   ` Stephen Hemminger
  2022-05-16 19:43     ` Don Wallwork
  2022-05-13 21:41   ` Stephen Hemminger
  2022-05-14  3:31   ` fengchengwen
  2 siblings, 1 reply; 25+ messages in thread
From: Stephen Hemminger @ 2022-05-13 21:38 UTC (permalink / raw)
  To: Don Wallwork
  Cc: dev, mb, anatoly.burakov, dmitry.kozliuk, bruce.richardson,
	Honnappa.Nagarahalli, nd, haiyue.wang

On Fri, 13 May 2022 13:58:22 -0400
Don Wallwork <donw@xsightlabs.com> wrote:

> +static int
> +eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
> +{
> +	size_t worker_stack_size;
> +	if (arg == NULL) {
> +		*huge_worker_stack_size = USE_OS_STACK_SIZE;
> +		return 0;
> +	}
> +	worker_stack_size = atoi(arg);
> +	if (worker_stack_size == 0)
> +		return -1;
> +

Since worker_stack_size is size_t you are better off using something
like strtoul() and check for more errors from that

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v2] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-13 17:58 ` [PATCH v2] " Don Wallwork
  2022-05-13 21:38   ` Stephen Hemminger
@ 2022-05-13 21:41   ` Stephen Hemminger
  2022-05-14  3:31   ` fengchengwen
  2 siblings, 0 replies; 25+ messages in thread
From: Stephen Hemminger @ 2022-05-13 21:41 UTC (permalink / raw)
  To: Don Wallwork
  Cc: dev, mb, anatoly.burakov, dmitry.kozliuk, bruce.richardson,
	Honnappa.Nagarahalli, nd, haiyue.wang

On Fri, 13 May 2022 13:58:22 -0400
Don Wallwork <donw@xsightlabs.com> wrote:

> +		if (internal_conf->huge_worker_stack_size == 0) {
> +			ret = pthread_create(&lcore_config[i].thread_id, NULL,
> +					     eal_thread_loop,
> +					     (void *)(uintptr_t)i);
> +		} else {
> +			/* Allocate NUMA aware stack memory and set
> +			 * pthread attributes
> +			 */
> +			pthread_attr_t attr;
> +			size_t stack_size;
> +			void *stack_ptr;
> +
> +			if (pthread_attr_init(&attr) != 0) {
> +				rte_eal_init_alert("Cannot init pthread "
> +						   "attributes");
> +				rte_errno = EFAULT;
> +				return -1;
> +			}
> +			if (internal_conf->huge_worker_stack_size ==
> +			    USE_OS_STACK_SIZE) {
> +				if (pthread_attr_getstacksize(&attr,
> +							      &stack_size) != 0) {
> +					rte_errno = EFAULT;
> +					return -1;
> +				}
> +			} else {
> +				stack_size =
> +					internal_conf->huge_worker_stack_size;
> +			}
> +			stack_ptr =
> +				rte_zmalloc_socket("lcore_stack",
> +						   stack_size,
> +						   stack_size,
> +						   rte_lcore_to_socket_id(i));
> +
> +			if (stack_ptr == NULL) {
> +				rte_eal_init_alert("Cannot allocate stack "
> +						   "memory for worker lcore");
> +				rte_errno = ENOMEM;
> +				return -1;
> +			}
> +
> +			if (pthread_attr_setstack(&attr,
> +						  stack_ptr,
> +						  stack_size) != 0) {
> +				rte_eal_init_alert("Cannot set pthread "
> +						   "stack attributes");
> +				rte_errno = EFAULT;
> +				return -1;
> +			}
> +
> +			/* create a thread for each lcore */
> +			ret = pthread_create(&lcore_config[i].thread_id, &attr,
> +					     eal_thread_loop,
> +					     (void *)(uintptr_t)i);
> +
> +			if (pthread_attr_destroy(&attr) != 0) {
> +				rte_eal_init_alert("Cannot destroy pthread "
> +						   "attributes");
> +				rte_errno = EFAULT;
> +				return -1;
> +			}

The indentation is getting kind of deep here, and to me that indicates
a good place to split this into a helper function?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v2] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-13 17:58 ` [PATCH v2] " Don Wallwork
  2022-05-13 21:38   ` Stephen Hemminger
  2022-05-13 21:41   ` Stephen Hemminger
@ 2022-05-14  3:31   ` fengchengwen
  2022-05-16 19:47     ` Don Wallwork
  2 siblings, 1 reply; 25+ messages in thread
From: fengchengwen @ 2022-05-14  3:31 UTC (permalink / raw)
  To: Don Wallwork, dev
  Cc: stephen, mb, anatoly.burakov, dmitry.kozliuk, bruce.richardson,
	Honnappa.Nagarahalli, nd, haiyue.wang

On 2022/5/14 1:58, Don Wallwork wrote:
> Add support for using hugepages for worker lcore stack memory.  The
> intent is to improve performance by reducing stack memory related TLB
> misses and also by using memory local to the NUMA node of each lcore.
> 
> EAL option '--huge-worker-stack [stack-size-in-KiB]' is added to allow
> the feature to be enabled at runtime.  If the size is not specified,
> the system pthread stack size will be used.
> 
> Signed-off-by: Don Wallwork <donw@xsightlabs.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
>  doc/guides/linux_gsg/eal_args.include.rst     |  6 ++
>  .../prog_guide/env_abstraction_layer.rst      | 21 ++++++
>  lib/eal/common/eal_common_options.c           | 28 ++++++++
>  lib/eal/common/eal_internal_cfg.h             |  4 ++
>  lib/eal/common/eal_options.h                  |  2 +
>  lib/eal/linux/eal.c                           | 65 ++++++++++++++++++-
>  6 files changed, 124 insertions(+), 2 deletions(-)
> 
> diff --git a/doc/guides/linux_gsg/eal_args.include.rst b/doc/guides/linux_gsg/eal_args.include.rst
> index 3549a0cf56..d189109a55 100644
> --- a/doc/guides/linux_gsg/eal_args.include.rst
> +++ b/doc/guides/linux_gsg/eal_args.include.rst
> @@ -116,6 +116,12 @@ Memory-related options
>  
>      Force IOVA mode to a specific value.
>  
> +*   ``--huge-worker-stack[=size]``
> +
> +    Allocate worker stack memory from hugepage memory.  Stack size defaults

Two consecutive spaces befor 'Stack' ?

> +    to system pthread stack size unless the optional size (in kbytes) is
> +    specified.
> +
>  Debugging options
>  ~~~~~~~~~~~~~~~~~
>  
> diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
> index 5f0748fba1..e74516f0cf 100644
> --- a/doc/guides/prog_guide/env_abstraction_layer.rst
> +++ b/doc/guides/prog_guide/env_abstraction_layer.rst
> @@ -329,6 +329,27 @@ Another option is to use bigger page sizes. Since fewer pages are required to
>  cover the same memory area, fewer file descriptors will be stored internally
>  by EAL.
>  
> +.. _huge-worker-stack:
> +
> +Hugepage Worker Stacks
> +^^^^^^^^^^^^^^^^^^^^^^
> +
> +When the ``--huge-worker-stack[=size]`` EAL option is specified, worker
> +thread stacks are allocated from hugepage memory local to the NUMA node
> +of the thread. Worker stack size defaults to system pthread stack size
> +if the optional size parameter is not specified.
> +
> +.. warning::
> +    Stacks allocated from hugepage memory are not protected by guard
> +    pages. Worker stacks must be sufficiently sized to prevent stack
> +    overflow when this option is used.
> +
> +    As with normal thread stacks, hugepage worker thread stack size is
> +    fixed and is not dynamically resized. Therefore, an application that
> +    is free of stack page faults under a given load should be safe with
> +    hugepage worker thread stacks given the same thread stack size and
> +    loading conditions.
> +
>  Support for Externally Allocated Memory
>  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>  
> diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
> index f247a42455..7fc5e10928 100644
> --- a/lib/eal/common/eal_common_options.c
> +++ b/lib/eal/common/eal_common_options.c
> @@ -103,6 +103,7 @@ eal_long_options[] = {
>  	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
>  	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
>  	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
> +	{OPT_HUGE_WORKER_STACK, 2, NULL, OPT_HUGE_WORKER_STACK_NUM     },
>  
>  	{0,                     0, NULL, 0                        }
>  };
> @@ -1618,6 +1619,22 @@ eal_parse_huge_unlink(const char *arg, struct hugepage_file_discipline *out)
>  	return -1;
>  }
>  
> +static int
> +eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
> +{
> +	size_t worker_stack_size;
> +	if (arg == NULL) {

Also consider arg[0] = '\0', maybe: if (arg == NULL || arg[0] == '\0')

> +		*huge_worker_stack_size = USE_OS_STACK_SIZE;
> +		return 0;
> +	}
> +	worker_stack_size = atoi(arg);

Suggest use strtoul because atoi does not detect errors.
also suggest check for convert error.

Suggest refer eal_parse_simd_bitwidth()

> +	if (worker_stack_size == 0)
> +		return -1;
> +
> +	*huge_worker_stack_size = worker_stack_size * 1024;

Should consider overflow with multiple 1024 ?

> +	return 0;
> +}
> +
>  int
>  eal_parse_common_option(int opt, const char *optarg,
>  			struct internal_config *conf)
> @@ -1921,6 +1938,17 @@ eal_parse_common_option(int opt, const char *optarg,
>  		}
>  		break;
>  
> +#ifndef RTE_EXEC_ENV_WINDOWS
> +	case OPT_HUGE_WORKER_STACK_NUM:
> +		if (eal_parse_huge_worker_stack(optarg,
> +						&conf->huge_worker_stack_size) < 0) {
> +			RTE_LOG(ERR, EAL, "invalid parameter for --"
> +				OPT_HUGE_WORKER_STACK"\n");
> +			return -1;
> +		}
> +		break;
> +#endif /* !RTE_EXEC_ENV_WINDOWS */
> +
>  	/* don't know what to do, leave this to caller */
>  	default:
>  		return 1;
> diff --git a/lib/eal/common/eal_internal_cfg.h b/lib/eal/common/eal_internal_cfg.h
> index b71faadd18..8ac710da02 100644
> --- a/lib/eal/common/eal_internal_cfg.h
> +++ b/lib/eal/common/eal_internal_cfg.h
> @@ -48,6 +48,9 @@ struct hugepage_file_discipline {
>  	bool unlink_existing;
>  };
>  
> +/** Worker hugepage stack size should default to OS value. */
> +#define USE_OS_STACK_SIZE ((size_t)~0)

the USE is verb, suggest HUGE_WORKER_STACK_DEFAULT_SIZE or HUGE_WORKER_STACK_DEFAULT_OS_SIZE

> +
>  /**
>   * internal configuration
>   */
> @@ -102,6 +105,7 @@ struct internal_config {
>  	unsigned int no_telemetry; /**< true to disable Telemetry */
>  	struct simd_bitwidth max_simd_bitwidth;
>  	/**< max simd bitwidth path to use */
> +	size_t huge_worker_stack_size; /**< worker thread stack size in KiB */

the huge_worker_stack_size already multi 1024, so it unit is byte not KiB.

>  };
>  
>  void eal_reset_internal_config(struct internal_config *internal_cfg);
> diff --git a/lib/eal/common/eal_options.h b/lib/eal/common/eal_options.h
> index 8e4f7202a2..3cc9cb6412 100644
> --- a/lib/eal/common/eal_options.h
> +++ b/lib/eal/common/eal_options.h
> @@ -87,6 +87,8 @@ enum {
>  	OPT_NO_TELEMETRY_NUM,
>  #define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
>  	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
> +#define OPT_HUGE_WORKER_STACK  "huge-worker-stack"
> +	OPT_HUGE_WORKER_STACK_NUM,
>  
>  	OPT_LONG_MAX_NUM
>  };
> diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
> index 1ef263434a..e8c872ef7b 100644
> --- a/lib/eal/linux/eal.c
> +++ b/lib/eal/linux/eal.c
> @@ -1144,8 +1144,69 @@ rte_eal_init(int argc, char **argv)
>  		lcore_config[i].state = WAIT;
>  
>  		/* create a thread for each lcore */
> -		ret = pthread_create(&lcore_config[i].thread_id, NULL,
> -				     eal_thread_loop, (void *)(uintptr_t)i);
> +		if (internal_conf->huge_worker_stack_size == 0) {
> +			ret = pthread_create(&lcore_config[i].thread_id, NULL,
> +					     eal_thread_loop,
> +					     (void *)(uintptr_t)i);
> +		} else {
> +			/* Allocate NUMA aware stack memory and set
> +			 * pthread attributes
> +			 */
> +			pthread_attr_t attr;
> +			size_t stack_size;
> +			void *stack_ptr;
> +
> +			if (pthread_attr_init(&attr) != 0) {
> +				rte_eal_init_alert("Cannot init pthread "
> +						   "attributes");
> +				rte_errno = EFAULT;
> +				return -1;
> +			}
> +			if (internal_conf->huge_worker_stack_size ==
> +			    USE_OS_STACK_SIZE) {
> +				if (pthread_attr_getstacksize(&attr,
> +							      &stack_size) != 0) {
> +					rte_errno = EFAULT;
> +					return -1;
> +				}
> +			} else {
> +				stack_size =
> +					internal_conf->huge_worker_stack_size;
> +			}
> +			stack_ptr =
> +				rte_zmalloc_socket("lcore_stack",
> +						   stack_size,
> +						   stack_size,
> +						   rte_lcore_to_socket_id(i));
> +
> +			if (stack_ptr == NULL) {
> +				rte_eal_init_alert("Cannot allocate stack "
> +						   "memory for worker lcore");
> +				rte_errno = ENOMEM;
> +				return -1;
> +			}
> +
> +			if (pthread_attr_setstack(&attr,
> +						  stack_ptr,
> +						  stack_size) != 0) {
> +				rte_eal_init_alert("Cannot set pthread "
> +						   "stack attributes");
> +				rte_errno = EFAULT;
> +				return -1;
> +			}
> +
> +			/* create a thread for each lcore */
> +			ret = pthread_create(&lcore_config[i].thread_id, &attr,
> +					     eal_thread_loop,
> +					     (void *)(uintptr_t)i);
> +
> +			if (pthread_attr_destroy(&attr) != 0) {
> +				rte_eal_init_alert("Cannot destroy pthread "
> +						   "attributes");
> +				rte_errno = EFAULT;
> +				return -1;
> +			}
> +		}
>  		if (ret != 0)
>  			rte_panic("Cannot create thread\n");

it's recommended that the function be independent.

>  
> 

Also, this patch seem only adapt linux, what about freebsd/windows?


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v2] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-13 21:38   ` Stephen Hemminger
@ 2022-05-16 19:43     ` Don Wallwork
  0 siblings, 0 replies; 25+ messages in thread
From: Don Wallwork @ 2022-05-16 19:43 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, mb, anatoly.burakov, dmitry.kozliuk, bruce.richardson,
	Honnappa.Nagarahalli, nd, haiyue.wang



On 5/13/2022 5:38 PM, Stephen Hemminger wrote:
> On Fri, 13 May 2022 13:58:22 -0400
> Don Wallwork <donw@xsightlabs.com> wrote:
>
>> +static int
>> +eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
>> +{
>> +	size_t worker_stack_size;
>> +	if (arg == NULL) {
>> +		*huge_worker_stack_size = USE_OS_STACK_SIZE;
>> +		return 0;
>> +	}
>> +	worker_stack_size = atoi(arg);
>> +	if (worker_stack_size == 0)
>> +		return -1;
>> +
> Since worker_stack_size is size_t you are better off using something
> like strtoul() and check for more errors from that
This and your other comment are addressed in the v3 patch.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v2] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-14  3:31   ` fengchengwen
@ 2022-05-16 19:47     ` Don Wallwork
  2022-05-17  6:28       ` Morten Brørup
  0 siblings, 1 reply; 25+ messages in thread
From: Don Wallwork @ 2022-05-16 19:47 UTC (permalink / raw)
  To: fengchengwen, dev
  Cc: stephen, mb, anatoly.burakov, dmitry.kozliuk, bruce.richardson,
	Honnappa.Nagarahalli, nd, haiyue.wang

On 5/13/2022 11:31 PM, fengchengwen wrote:
> Also, this patch seem only adapt linux, what about freebsd/windows? 

The intent was to add this support for this optimization for Linix only 
initially.  Support for other OSes can be added later.  I currently 
don't have a means to test the other environments.

All of your other comments are addressed by the v3 patch.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v3] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-02 14:10 [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory Don Wallwork
                   ` (2 preceding siblings ...)
  2022-05-13 17:58 ` [PATCH v2] " Don Wallwork
@ 2022-05-16 19:50 ` Don Wallwork
  2022-05-16 20:28   ` Stephen Hemminger
  2022-05-17 15:31 ` [PATCH v4] " Don Wallwork
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 25+ messages in thread
From: Don Wallwork @ 2022-05-16 19:50 UTC (permalink / raw)
  To: dev
  Cc: donw, stephen, fengchengwen, mb, anatoly.burakov, dmitry.kozliuk,
	bruce.richardson, Honnappa.Nagarahalli, nd, haiyue.wang

Add support for using hugepages for worker lcore stack memory.  The
intent is to improve performance by reducing stack memory related TLB
misses and also by using memory local to the NUMA node of each lcore.

EAL option '--huge-worker-stack [stack-size-in-kbytes]' is added to allow
the feature to be enabled at runtime.  If the size is not specified,
the system pthread stack size will be used.

Signed-off-by: Don Wallwork <donw@xsightlabs.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 doc/guides/linux_gsg/eal_args.include.rst     |  6 ++
 .../prog_guide/env_abstraction_layer.rst      | 21 +++++++
 lib/eal/common/eal_common_options.c           | 36 +++++++++++
 lib/eal/common/eal_internal_cfg.h             |  4 ++
 lib/eal/common/eal_options.h                  |  2 +
 lib/eal/linux/eal.c                           | 61 ++++++++++++++++++-
 6 files changed, 128 insertions(+), 2 deletions(-)

diff --git a/doc/guides/linux_gsg/eal_args.include.rst b/doc/guides/linux_gsg/eal_args.include.rst
index 3549a0cf56..9cfbf7de84 100644
--- a/doc/guides/linux_gsg/eal_args.include.rst
+++ b/doc/guides/linux_gsg/eal_args.include.rst
@@ -116,6 +116,12 @@ Memory-related options
 
     Force IOVA mode to a specific value.
 
+*   ``--huge-worker-stack[=size]``
+
+    Allocate worker stack memory from hugepage memory. Stack size defaults
+    to system pthread stack size unless the optional size (in kbytes) is
+    specified.
+
 Debugging options
 ~~~~~~~~~~~~~~~~~
 
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index 5f0748fba1..e74516f0cf 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -329,6 +329,27 @@ Another option is to use bigger page sizes. Since fewer pages are required to
 cover the same memory area, fewer file descriptors will be stored internally
 by EAL.
 
+.. _huge-worker-stack:
+
+Hugepage Worker Stacks
+^^^^^^^^^^^^^^^^^^^^^^
+
+When the ``--huge-worker-stack[=size]`` EAL option is specified, worker
+thread stacks are allocated from hugepage memory local to the NUMA node
+of the thread. Worker stack size defaults to system pthread stack size
+if the optional size parameter is not specified.
+
+.. warning::
+    Stacks allocated from hugepage memory are not protected by guard
+    pages. Worker stacks must be sufficiently sized to prevent stack
+    overflow when this option is used.
+
+    As with normal thread stacks, hugepage worker thread stack size is
+    fixed and is not dynamically resized. Therefore, an application that
+    is free of stack page faults under a given load should be safe with
+    hugepage worker thread stacks given the same thread stack size and
+    loading conditions.
+
 Support for Externally Allocated Memory
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
index f247a42455..b0d429ed34 100644
--- a/lib/eal/common/eal_common_options.c
+++ b/lib/eal/common/eal_common_options.c
@@ -103,6 +103,7 @@ eal_long_options[] = {
 	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
 	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
 	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
+	{OPT_HUGE_WORKER_STACK, 2, NULL, OPT_HUGE_WORKER_STACK_NUM     },
 
 	{0,                     0, NULL, 0                        }
 };
@@ -1618,6 +1619,25 @@ eal_parse_huge_unlink(const char *arg, struct hugepage_file_discipline *out)
 	return -1;
 }
 
+static int
+eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
+{
+	size_t worker_stack_size;
+	char *end;
+	if (arg == NULL || arg[0] == '\0') {
+		*huge_worker_stack_size = WORKER_STACK_SIZE_FROM_OS;
+		return 0;
+	}
+	errno = 0;
+	worker_stack_size = strtoul(arg, &end, 10);
+	if (errno || end == NULL || worker_stack_size == 0 ||
+	    worker_stack_size >= (size_t)-1 / 1024)
+		return -1;
+
+	*huge_worker_stack_size = worker_stack_size * 1024;
+	return 0;
+}
+
 int
 eal_parse_common_option(int opt, const char *optarg,
 			struct internal_config *conf)
@@ -1921,6 +1941,17 @@ eal_parse_common_option(int opt, const char *optarg,
 		}
 		break;
 
+#ifndef RTE_EXEC_ENV_WINDOWS
+	case OPT_HUGE_WORKER_STACK_NUM:
+		if (eal_parse_huge_worker_stack(optarg,
+						&conf->huge_worker_stack_size) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+				OPT_HUGE_WORKER_STACK"\n");
+			return -1;
+		}
+		break;
+#endif /* !RTE_EXEC_ENV_WINDOWS */
+
 	/* don't know what to do, leave this to caller */
 	default:
 		return 1;
@@ -2235,5 +2266,10 @@ eal_common_usage(void)
 	       "  --"OPT_NO_PCI"            Disable PCI\n"
 	       "  --"OPT_NO_HPET"           Disable HPET\n"
 	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
+	       "  --"OPT_HUGE_WORKER_STACK"[=size]\n"
+	       "                      Allocate worker thread stacks from\n"
+	       "                      hugepage memory. Size is in units of\n"
+	       "                      kbytes and defaults to system thread\n"
+	       "                      stack size if not specified.\n"
 	       "\n", RTE_MAX_LCORE);
 }
diff --git a/lib/eal/common/eal_internal_cfg.h b/lib/eal/common/eal_internal_cfg.h
index b71faadd18..5e154967e4 100644
--- a/lib/eal/common/eal_internal_cfg.h
+++ b/lib/eal/common/eal_internal_cfg.h
@@ -48,6 +48,9 @@ struct hugepage_file_discipline {
 	bool unlink_existing;
 };
 
+/** Worker hugepage stack size should default to OS value. */
+#define WORKER_STACK_SIZE_FROM_OS ((size_t)~0)
+
 /**
  * internal configuration
  */
@@ -102,6 +105,7 @@ struct internal_config {
 	unsigned int no_telemetry; /**< true to disable Telemetry */
 	struct simd_bitwidth max_simd_bitwidth;
 	/**< max simd bitwidth path to use */
+	size_t huge_worker_stack_size; /**< worker thread stack size */
 };
 
 void eal_reset_internal_config(struct internal_config *internal_cfg);
diff --git a/lib/eal/common/eal_options.h b/lib/eal/common/eal_options.h
index 8e4f7202a2..3cc9cb6412 100644
--- a/lib/eal/common/eal_options.h
+++ b/lib/eal/common/eal_options.h
@@ -87,6 +87,8 @@ enum {
 	OPT_NO_TELEMETRY_NUM,
 #define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
 	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
+#define OPT_HUGE_WORKER_STACK  "huge-worker-stack"
+	OPT_HUGE_WORKER_STACK_NUM,
 
 	OPT_LONG_MAX_NUM
 };
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 1ef263434a..2bee66577e 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -857,6 +857,64 @@ is_iommu_enabled(void)
 	return n > 2;
 }
 
+static int
+eal_worker_thread_create(struct internal_config *internal_conf,
+			 int lcore_id)
+{
+	pthread_attr_t attr;
+	size_t stack_size;
+	void *stack_ptr;
+	int ret;
+
+	if (internal_conf->huge_worker_stack_size == 0)
+		return pthread_create(&lcore_config[lcore_id].thread_id,
+				      NULL,
+				      eal_thread_loop,
+				      (void *)(uintptr_t)lcore_id);
+
+	/* Allocate NUMA aware stack memory and set pthread attributes */
+	if (pthread_attr_init(&attr) != 0) {
+		rte_eal_init_alert("Cannot init pthread attributes");
+		rte_errno = EFAULT;
+		return -1;
+	}
+	if (internal_conf->huge_worker_stack_size == WORKER_STACK_SIZE_FROM_OS) {
+		if (pthread_attr_getstacksize(&attr, &stack_size) != 0) {
+			rte_errno = EFAULT;
+			return -1;
+		}
+	} else {
+		stack_size = internal_conf->huge_worker_stack_size;
+	}
+	stack_ptr = rte_zmalloc_socket("lcore_stack",
+				       stack_size,
+				       stack_size,
+				       rte_lcore_to_socket_id(lcore_id));
+
+	if (stack_ptr == NULL) {
+		rte_eal_init_alert("Cannot allocate worker lcore stack memory");
+		rte_errno = ENOMEM;
+		return -1;
+	}
+
+	if (pthread_attr_setstack(&attr, stack_ptr, stack_size) != 0) {
+		rte_eal_init_alert("Cannot set pthread stack attributes");
+		rte_errno = EFAULT;
+		return -1;
+	}
+
+	ret = pthread_create(&lcore_config[lcore_id].thread_id, &attr,
+			     eal_thread_loop,
+			     (void *)(uintptr_t)lcore_id);
+
+	if (pthread_attr_destroy(&attr) != 0) {
+		rte_eal_init_alert("Cannot destroy pthread attributes");
+		rte_errno = EFAULT;
+		return -1;
+	}
+	return ret;
+}
+
 /* Launch threads, called at application init(). */
 int
 rte_eal_init(int argc, char **argv)
@@ -1144,8 +1202,7 @@ rte_eal_init(int argc, char **argv)
 		lcore_config[i].state = WAIT;
 
 		/* create a thread for each lcore */
-		ret = pthread_create(&lcore_config[i].thread_id, NULL,
-				     eal_thread_loop, (void *)(uintptr_t)i);
+		ret = eal_worker_thread_create(internal_conf, i);
 		if (ret != 0)
 			rte_panic("Cannot create thread\n");
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v3] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-16 19:50 ` [PATCH v3] " Don Wallwork
@ 2022-05-16 20:28   ` Stephen Hemminger
  2022-05-16 20:29     ` Don Wallwork
  0 siblings, 1 reply; 25+ messages in thread
From: Stephen Hemminger @ 2022-05-16 20:28 UTC (permalink / raw)
  To: Don Wallwork
  Cc: dev, fengchengwen, mb, anatoly.burakov, dmitry.kozliuk,
	bruce.richardson, Honnappa.Nagarahalli, nd, haiyue.wang

On Mon, 16 May 2022 15:50:04 -0400
Don Wallwork <donw@xsightlabs.com> wrote:


> +static int
> +eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
> +{
> +	size_t worker_stack_size;
> +	char *end;
> +	if (arg == NULL || arg[0] == '\0') {
> +		*huge_worker_stack_size = WORKER_STACK_SIZE_FROM_OS;
> +		return 0;
> +	}

Looks good. Minor nit if you do another version.
Please put blank line after declarations.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v3] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-16 20:28   ` Stephen Hemminger
@ 2022-05-16 20:29     ` Don Wallwork
  0 siblings, 0 replies; 25+ messages in thread
From: Don Wallwork @ 2022-05-16 20:29 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, fengchengwen, mb, anatoly.burakov, dmitry.kozliuk,
	bruce.richardson, Honnappa.Nagarahalli, nd, haiyue.wang

On 5/16/2022 4:28 PM, Stephen Hemminger wrote:
> On Mon, 16 May 2022 15:50:04 -0400
> Don Wallwork <donw@xsightlabs.com> wrote:
>
>
>> +static int
>> +eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
>> +{
>> +	size_t worker_stack_size;
>> +	char *end;
>> +	if (arg == NULL || arg[0] == '\0') {
>> +		*huge_worker_stack_size = WORKER_STACK_SIZE_FROM_OS;
>> +		return 0;
>> +	}
> Looks good. Minor nit if you do another version.
> Please put blank line after declarations.
Thanks.  Hopefully another version won't be required, but if so I'll 
make that change.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH v2] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-16 19:47     ` Don Wallwork
@ 2022-05-17  6:28       ` Morten Brørup
  0 siblings, 0 replies; 25+ messages in thread
From: Morten Brørup @ 2022-05-17  6:28 UTC (permalink / raw)
  To: Don Wallwork, fengchengwen, dev
  Cc: stephen, anatoly.burakov, dmitry.kozliuk, bruce.richardson,
	Honnappa.Nagarahalli, nd, haiyue.wang

> From: Don Wallwork [mailto:donw@xsightlabs.com]
> Sent: Monday, 16 May 2022 21.48
> 
> On 5/13/2022 11:31 PM, fengchengwen wrote:
> > Also, this patch seem only adapt linux, what about freebsd/windows?
> 
> The intent was to add this support for this optimization for Linix only
> initially.  Support for other OSes can be added later.  I currently
> don't have a means to test the other environments.

I agree with this approach.

And it should suffice if the EAL aborts with an error message if the OPT_HUGE_WORKER_STACK parameter is used in a non-supported environment.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v4] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-02 14:10 [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory Don Wallwork
                   ` (3 preceding siblings ...)
  2022-05-16 19:50 ` [PATCH v3] " Don Wallwork
@ 2022-05-17 15:31 ` Don Wallwork
  2022-05-17 15:56   ` Stephen Hemminger
                     ` (3 more replies)
  2022-05-24 19:46 ` [PATCH v5] " Don Wallwork
  2022-05-24 19:51 ` [PATCH v6] " Don Wallwork
  6 siblings, 4 replies; 25+ messages in thread
From: Don Wallwork @ 2022-05-17 15:31 UTC (permalink / raw)
  To: dev
  Cc: donw, stephen, fengchengwen, mb, anatoly.burakov, dmitry.kozliuk,
	bruce.richardson, Honnappa.Nagarahalli, nd, haiyue.wang

Add support for using hugepages for worker lcore stack memory.  The
intent is to improve performance by reducing stack memory related TLB
misses and also by using memory local to the NUMA node of each lcore.

EAL option '--huge-worker-stack [stack-size-in-kbytes]' is added to allow
the feature to be enabled at runtime.  If the size is not specified,
the system pthread stack size will be used.

Signed-off-by: Don Wallwork <donw@xsightlabs.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 doc/guides/linux_gsg/eal_args.include.rst     |  6 ++
 .../prog_guide/env_abstraction_layer.rst      | 21 +++++++
 lib/eal/common/eal_common_options.c           | 41 +++++++++++++
 lib/eal/common/eal_internal_cfg.h             |  4 ++
 lib/eal/common/eal_options.h                  |  2 +
 lib/eal/linux/eal.c                           | 61 ++++++++++++++++++-
 6 files changed, 133 insertions(+), 2 deletions(-)

diff --git a/doc/guides/linux_gsg/eal_args.include.rst b/doc/guides/linux_gsg/eal_args.include.rst
index 3549a0cf56..9cfbf7de84 100644
--- a/doc/guides/linux_gsg/eal_args.include.rst
+++ b/doc/guides/linux_gsg/eal_args.include.rst
@@ -116,6 +116,12 @@ Memory-related options
 
     Force IOVA mode to a specific value.
 
+*   ``--huge-worker-stack[=size]``
+
+    Allocate worker stack memory from hugepage memory. Stack size defaults
+    to system pthread stack size unless the optional size (in kbytes) is
+    specified.
+
 Debugging options
 ~~~~~~~~~~~~~~~~~
 
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index 5f0748fba1..e74516f0cf 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -329,6 +329,27 @@ Another option is to use bigger page sizes. Since fewer pages are required to
 cover the same memory area, fewer file descriptors will be stored internally
 by EAL.
 
+.. _huge-worker-stack:
+
+Hugepage Worker Stacks
+^^^^^^^^^^^^^^^^^^^^^^
+
+When the ``--huge-worker-stack[=size]`` EAL option is specified, worker
+thread stacks are allocated from hugepage memory local to the NUMA node
+of the thread. Worker stack size defaults to system pthread stack size
+if the optional size parameter is not specified.
+
+.. warning::
+    Stacks allocated from hugepage memory are not protected by guard
+    pages. Worker stacks must be sufficiently sized to prevent stack
+    overflow when this option is used.
+
+    As with normal thread stacks, hugepage worker thread stack size is
+    fixed and is not dynamically resized. Therefore, an application that
+    is free of stack page faults under a given load should be safe with
+    hugepage worker thread stacks given the same thread stack size and
+    loading conditions.
+
 Support for Externally Allocated Memory
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
index f247a42455..370801f19b 100644
--- a/lib/eal/common/eal_common_options.c
+++ b/lib/eal/common/eal_common_options.c
@@ -103,6 +103,7 @@ eal_long_options[] = {
 	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
 	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
 	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
+	{OPT_HUGE_WORKER_STACK, 2, NULL, OPT_HUGE_WORKER_STACK_NUM     },
 
 	{0,                     0, NULL, 0                        }
 };
@@ -1618,6 +1619,28 @@ eal_parse_huge_unlink(const char *arg, struct hugepage_file_discipline *out)
 	return -1;
 }
 
+#ifndef RTE_EXEC_ENV_WINDOWS
+static int
+eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
+{
+	size_t worker_stack_size;
+	char *end;
+
+	if (arg == NULL || arg[0] == '\0') {
+		*huge_worker_stack_size = WORKER_STACK_SIZE_FROM_OS;
+		return 0;
+	}
+	errno = 0;
+	worker_stack_size = strtoul(arg, &end, 10);
+	if (errno || end == NULL || worker_stack_size == 0 ||
+	    worker_stack_size >= (size_t)-1 / 1024)
+		return -1;
+
+	*huge_worker_stack_size = worker_stack_size * 1024;
+	return 0;
+}
+#endif
+
 int
 eal_parse_common_option(int opt, const char *optarg,
 			struct internal_config *conf)
@@ -1921,6 +1944,17 @@ eal_parse_common_option(int opt, const char *optarg,
 		}
 		break;
 
+#ifndef RTE_EXEC_ENV_WINDOWS
+	case OPT_HUGE_WORKER_STACK_NUM:
+		if (eal_parse_huge_worker_stack(optarg,
+						&conf->huge_worker_stack_size) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+				OPT_HUGE_WORKER_STACK"\n");
+			return -1;
+		}
+		break;
+#endif /* !RTE_EXEC_ENV_WINDOWS */
+
 	/* don't know what to do, leave this to caller */
 	default:
 		return 1;
@@ -2235,5 +2269,12 @@ eal_common_usage(void)
 	       "  --"OPT_NO_PCI"            Disable PCI\n"
 	       "  --"OPT_NO_HPET"           Disable HPET\n"
 	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
+#ifndef RTE_EXEC_ENV_WINDOWS
+	       "  --"OPT_HUGE_WORKER_STACK"[=size]\n"
+	       "                      Allocate worker thread stacks from\n"
+	       "                      hugepage memory. Size is in units of\n"
+	       "                      kbytes and defaults to system thread\n"
+	       "                      stack size if not specified.\n"
+#endif
 	       "\n", RTE_MAX_LCORE);
 }
diff --git a/lib/eal/common/eal_internal_cfg.h b/lib/eal/common/eal_internal_cfg.h
index b71faadd18..5e154967e4 100644
--- a/lib/eal/common/eal_internal_cfg.h
+++ b/lib/eal/common/eal_internal_cfg.h
@@ -48,6 +48,9 @@ struct hugepage_file_discipline {
 	bool unlink_existing;
 };
 
+/** Worker hugepage stack size should default to OS value. */
+#define WORKER_STACK_SIZE_FROM_OS ((size_t)~0)
+
 /**
  * internal configuration
  */
@@ -102,6 +105,7 @@ struct internal_config {
 	unsigned int no_telemetry; /**< true to disable Telemetry */
 	struct simd_bitwidth max_simd_bitwidth;
 	/**< max simd bitwidth path to use */
+	size_t huge_worker_stack_size; /**< worker thread stack size */
 };
 
 void eal_reset_internal_config(struct internal_config *internal_cfg);
diff --git a/lib/eal/common/eal_options.h b/lib/eal/common/eal_options.h
index 8e4f7202a2..3cc9cb6412 100644
--- a/lib/eal/common/eal_options.h
+++ b/lib/eal/common/eal_options.h
@@ -87,6 +87,8 @@ enum {
 	OPT_NO_TELEMETRY_NUM,
 #define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
 	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
+#define OPT_HUGE_WORKER_STACK  "huge-worker-stack"
+	OPT_HUGE_WORKER_STACK_NUM,
 
 	OPT_LONG_MAX_NUM
 };
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 1ef263434a..2bee66577e 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -857,6 +857,64 @@ is_iommu_enabled(void)
 	return n > 2;
 }
 
+static int
+eal_worker_thread_create(struct internal_config *internal_conf,
+			 int lcore_id)
+{
+	pthread_attr_t attr;
+	size_t stack_size;
+	void *stack_ptr;
+	int ret;
+
+	if (internal_conf->huge_worker_stack_size == 0)
+		return pthread_create(&lcore_config[lcore_id].thread_id,
+				      NULL,
+				      eal_thread_loop,
+				      (void *)(uintptr_t)lcore_id);
+
+	/* Allocate NUMA aware stack memory and set pthread attributes */
+	if (pthread_attr_init(&attr) != 0) {
+		rte_eal_init_alert("Cannot init pthread attributes");
+		rte_errno = EFAULT;
+		return -1;
+	}
+	if (internal_conf->huge_worker_stack_size == WORKER_STACK_SIZE_FROM_OS) {
+		if (pthread_attr_getstacksize(&attr, &stack_size) != 0) {
+			rte_errno = EFAULT;
+			return -1;
+		}
+	} else {
+		stack_size = internal_conf->huge_worker_stack_size;
+	}
+	stack_ptr = rte_zmalloc_socket("lcore_stack",
+				       stack_size,
+				       stack_size,
+				       rte_lcore_to_socket_id(lcore_id));
+
+	if (stack_ptr == NULL) {
+		rte_eal_init_alert("Cannot allocate worker lcore stack memory");
+		rte_errno = ENOMEM;
+		return -1;
+	}
+
+	if (pthread_attr_setstack(&attr, stack_ptr, stack_size) != 0) {
+		rte_eal_init_alert("Cannot set pthread stack attributes");
+		rte_errno = EFAULT;
+		return -1;
+	}
+
+	ret = pthread_create(&lcore_config[lcore_id].thread_id, &attr,
+			     eal_thread_loop,
+			     (void *)(uintptr_t)lcore_id);
+
+	if (pthread_attr_destroy(&attr) != 0) {
+		rte_eal_init_alert("Cannot destroy pthread attributes");
+		rte_errno = EFAULT;
+		return -1;
+	}
+	return ret;
+}
+
 /* Launch threads, called at application init(). */
 int
 rte_eal_init(int argc, char **argv)
@@ -1144,8 +1202,7 @@ rte_eal_init(int argc, char **argv)
 		lcore_config[i].state = WAIT;
 
 		/* create a thread for each lcore */
-		ret = pthread_create(&lcore_config[i].thread_id, NULL,
-				     eal_thread_loop, (void *)(uintptr_t)i);
+		ret = eal_worker_thread_create(internal_conf, i);
 		if (ret != 0)
 			rte_panic("Cannot create thread\n");
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-17 15:31 ` [PATCH v4] " Don Wallwork
@ 2022-05-17 15:56   ` Stephen Hemminger
  2022-05-18 14:10     ` Don Wallwork
  2022-05-20  8:30   ` fengchengwen
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 25+ messages in thread
From: Stephen Hemminger @ 2022-05-17 15:56 UTC (permalink / raw)
  To: Don Wallwork
  Cc: dev, fengchengwen, mb, anatoly.burakov, dmitry.kozliuk,
	bruce.richardson, Honnappa.Nagarahalli, nd, haiyue.wang

On Tue, 17 May 2022 11:31:36 -0400
Don Wallwork <donw@xsightlabs.com> wrote:

> Add support for using hugepages for worker lcore stack memory.  The
> intent is to improve performance by reducing stack memory related TLB
> misses and also by using memory local to the NUMA node of each lcore.
> 
> EAL option '--huge-worker-stack [stack-size-in-kbytes]' is added to allow
> the feature to be enabled at runtime.  If the size is not specified,
> the system pthread stack size will be used.
> 
> Signed-off-by: Don Wallwork <donw@xsightlabs.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---

This looks great, just thinking a little more about what the impact
of using it would be.

Since the memory region for the stack is never freed, it will cause
complaints from address sanitizer and maybe from valgrind.

One way to workaround that would be to use the lower level allocation
routine to get the memory segments. This would make stacks a multiple
of page size which would not be bad idea anyway. 
Plus you could use eal_memalloc_seg_bulk.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-17 15:56   ` Stephen Hemminger
@ 2022-05-18 14:10     ` Don Wallwork
  0 siblings, 0 replies; 25+ messages in thread
From: Don Wallwork @ 2022-05-18 14:10 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, fengchengwen, mb, anatoly.burakov, dmitry.kozliuk,
	bruce.richardson, Honnappa.Nagarahalli, nd, haiyue.wang

On 5/17/2022 11:56 AM, Stephen Hemminger wrote:
> On Tue, 17 May 2022 11:31:36 -0400
> Don Wallwork <donw@xsightlabs.com> wrote:
>
>> Add support for using hugepages for worker lcore stack memory.  The
>> intent is to improve performance by reducing stack memory related TLB
>> misses and also by using memory local to the NUMA node of each lcore.
>>
>> EAL option '--huge-worker-stack [stack-size-in-kbytes]' is added to allow
>> the feature to be enabled at runtime.  If the size is not specified,
>> the system pthread stack size will be used.
>>
>> Signed-off-by: Don Wallwork <donw@xsightlabs.com>
>> Acked-by: Morten Brørup <mb@smartsharesystems.com>
>> ---
> This looks great, just thinking a little more about what the impact
> of using it would be.
>
> Since the memory region for the stack is never freed, it will cause
> complaints from address sanitizer and maybe from valgrind.
>
> One way to workaround that would be to use the lower level allocation
> routine to get the memory segments. This would make stacks a multiple
> of page size which would not be bad idea anyway.
> Plus you could use eal_memalloc_seg_bulk.
>
The problem with using this API is that it requires allocating page 
sized stacks
which would be undesirable in memory constrained environments or when the
huge page size is 1GB.

We looked for a place to free this memory, but could not find any place 
in DPDK
where the worker threads are canceled.  Obviously the worker threads 
have to
be stopped before we can free this memory.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-17 15:31 ` [PATCH v4] " Don Wallwork
  2022-05-17 15:56   ` Stephen Hemminger
@ 2022-05-20  8:30   ` fengchengwen
  2022-05-23 22:35   ` Kathleen Capella
  2022-05-24 14:40   ` Burakov, Anatoly
  3 siblings, 0 replies; 25+ messages in thread
From: fengchengwen @ 2022-05-20  8:30 UTC (permalink / raw)
  To: Don Wallwork, dev
  Cc: stephen, mb, anatoly.burakov, dmitry.kozliuk, bruce.richardson,
	Honnappa.Nagarahalli, nd, haiyue.wang

Acked-by: Chengwen Feng <fengchengwen@huawei.com>

On 2022/5/17 23:31, Don Wallwork wrote:
> Add support for using hugepages for worker lcore stack memory.  The
> intent is to improve performance by reducing stack memory related TLB
> misses and also by using memory local to the NUMA node of each lcore.
> 
> EAL option '--huge-worker-stack [stack-size-in-kbytes]' is added to allow
> the feature to be enabled at runtime.  If the size is not specified,
> the system pthread stack size will be used.
> 
> Signed-off-by: Don Wallwork <donw@xsightlabs.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---

snip


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH v4] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-17 15:31 ` [PATCH v4] " Don Wallwork
  2022-05-17 15:56   ` Stephen Hemminger
  2022-05-20  8:30   ` fengchengwen
@ 2022-05-23 22:35   ` Kathleen Capella
  2022-05-24 13:48     ` Don Wallwork
  2022-05-24 14:40   ` Burakov, Anatoly
  3 siblings, 1 reply; 25+ messages in thread
From: Kathleen Capella @ 2022-05-23 22:35 UTC (permalink / raw)
  To: Don Wallwork, dev
  Cc: stephen, fengchengwen, mb, anatoly.burakov, dmitry.kozliuk,
	bruce.richardson, Honnappa Nagarahalli, nd, haiyue.wang

In this section of the code:

stack_ptr = rte_zmalloc_socket("lcore_stack",
				       stack_size,
				       stack_size,
				       rte_lcore_to_socket_id(lcore_id));

stack memory is aligned to the stack_size. According to the implementation of rte_zmalloc_socket, the alignment must be a power of two. If the user inputs a number of KBs that is not a power of two, this will fail with a generic error message of " EAL: Cannot allocate worker lcore stack memory." A check for this occurrence with a more descriptive error message and a note in the documentation would be good to include.

> -----Original Message-----
> From: Don Wallwork <donw@xsightlabs.com>
> Sent: Tuesday, May 17, 2022 10:32 AM
> To: dev@dpdk.org
> Cc: donw@xsightlabs.com; stephen@networkplumber.org;
> fengchengwen@huawei.com; mb@smartsharesystems.com;
> anatoly.burakov@intel.com; dmitry.kozliuk@gmail.com;
> bruce.richardson@intel.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>;
> haiyue.wang@intel.com
> Subject: [PATCH v4] eal: allow worker lcore stacks to be allocated from
> hugepage memory
> 
> Add support for using hugepages for worker lcore stack memory.  The intent
> is to improve performance by reducing stack memory related TLB misses and
> also by using memory local to the NUMA node of each lcore.
> 
> EAL option '--huge-worker-stack [stack-size-in-kbytes]' is added to allow the
> feature to be enabled at runtime.  If the size is not specified, the system
> pthread stack size will be used.
> 
> Signed-off-by: Don Wallwork <donw@xsightlabs.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
>  doc/guides/linux_gsg/eal_args.include.rst     |  6 ++
>  .../prog_guide/env_abstraction_layer.rst      | 21 +++++++
>  lib/eal/common/eal_common_options.c           | 41 +++++++++++++
>  lib/eal/common/eal_internal_cfg.h             |  4 ++
>  lib/eal/common/eal_options.h                  |  2 +
>  lib/eal/linux/eal.c                           | 61 ++++++++++++++++++-
>  6 files changed, 133 insertions(+), 2 deletions(-)
> 
> diff --git a/doc/guides/linux_gsg/eal_args.include.rst
> b/doc/guides/linux_gsg/eal_args.include.rst
> index 3549a0cf56..9cfbf7de84 100644
> --- a/doc/guides/linux_gsg/eal_args.include.rst
> +++ b/doc/guides/linux_gsg/eal_args.include.rst
> @@ -116,6 +116,12 @@ Memory-related options
> 
>      Force IOVA mode to a specific value.
> 
> +*   ``--huge-worker-stack[=size]``
> +
> +    Allocate worker stack memory from hugepage memory. Stack size
> defaults
> +    to system pthread stack size unless the optional size (in kbytes) is
> +    specified.
> +
>  Debugging options
>  ~~~~~~~~~~~~~~~~~
> 
> diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst
> b/doc/guides/prog_guide/env_abstraction_layer.rst
> index 5f0748fba1..e74516f0cf 100644
> --- a/doc/guides/prog_guide/env_abstraction_layer.rst
> +++ b/doc/guides/prog_guide/env_abstraction_layer.rst
> @@ -329,6 +329,27 @@ Another option is to use bigger page sizes. Since
> fewer pages are required to  cover the same memory area, fewer file
> descriptors will be stored internally  by EAL.
> 
> +.. _huge-worker-stack:
> +
> +Hugepage Worker Stacks
> +^^^^^^^^^^^^^^^^^^^^^^
> +
> +When the ``--huge-worker-stack[=size]`` EAL option is specified, worker
> +thread stacks are allocated from hugepage memory local to the NUMA node
> +of the thread. Worker stack size defaults to system pthread stack size
> +if the optional size parameter is not specified.
> +
> +.. warning::
> +    Stacks allocated from hugepage memory are not protected by guard
> +    pages. Worker stacks must be sufficiently sized to prevent stack
> +    overflow when this option is used.
> +
> +    As with normal thread stacks, hugepage worker thread stack size is
> +    fixed and is not dynamically resized. Therefore, an application that
> +    is free of stack page faults under a given load should be safe with
> +    hugepage worker thread stacks given the same thread stack size and
> +    loading conditions.
> +
>  Support for Externally Allocated Memory
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> 
> diff --git a/lib/eal/common/eal_common_options.c
> b/lib/eal/common/eal_common_options.c
> index f247a42455..370801f19b 100644
> --- a/lib/eal/common/eal_common_options.c
> +++ b/lib/eal/common/eal_common_options.c
> @@ -103,6 +103,7 @@ eal_long_options[] = {
>  	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
>  	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
>  	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL,
> OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
> +	{OPT_HUGE_WORKER_STACK, 2, NULL,
> OPT_HUGE_WORKER_STACK_NUM     },
> 
>  	{0,                     0, NULL, 0                        }
>  };
> @@ -1618,6 +1619,28 @@ eal_parse_huge_unlink(const char *arg, struct
> hugepage_file_discipline *out)
>  	return -1;
>  }
> 
> +#ifndef RTE_EXEC_ENV_WINDOWS
> +static int
> +eal_parse_huge_worker_stack(const char *arg, size_t
> +*huge_worker_stack_size) {
> +	size_t worker_stack_size;
> +	char *end;
> +
> +	if (arg == NULL || arg[0] == '\0') {
> +		*huge_worker_stack_size = WORKER_STACK_SIZE_FROM_OS;
> +		return 0;
> +	}
> +	errno = 0;
> +	worker_stack_size = strtoul(arg, &end, 10);
> +	if (errno || end == NULL || worker_stack_size == 0 ||
> +	    worker_stack_size >= (size_t)-1 / 1024)
> +		return -1;
> +
> +	*huge_worker_stack_size = worker_stack_size * 1024;
> +	return 0;
> +}
> +#endif
> +
>  int
>  eal_parse_common_option(int opt, const char *optarg,
>  			struct internal_config *conf)
> @@ -1921,6 +1944,17 @@ eal_parse_common_option(int opt, const char
> *optarg,
>  		}
>  		break;
> 
> +#ifndef RTE_EXEC_ENV_WINDOWS
> +	case OPT_HUGE_WORKER_STACK_NUM:
> +		if (eal_parse_huge_worker_stack(optarg,
> +						&conf-
> >huge_worker_stack_size) < 0) {
> +			RTE_LOG(ERR, EAL, "invalid parameter for --"
> +				OPT_HUGE_WORKER_STACK"\n");
> +			return -1;
> +		}
> +		break;
> +#endif /* !RTE_EXEC_ENV_WINDOWS */
> +
>  	/* don't know what to do, leave this to caller */
>  	default:
>  		return 1;
> @@ -2235,5 +2269,12 @@ eal_common_usage(void)
>  	       "  --"OPT_NO_PCI"            Disable PCI\n"
>  	       "  --"OPT_NO_HPET"           Disable HPET\n"
>  	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
> +#ifndef RTE_EXEC_ENV_WINDOWS
> +	       "  --"OPT_HUGE_WORKER_STACK"[=size]\n"
> +	       "                      Allocate worker thread stacks from\n"
> +	       "                      hugepage memory. Size is in units of\n"
> +	       "                      kbytes and defaults to system thread\n"
> +	       "                      stack size if not specified.\n"
> +#endif
>  	       "\n", RTE_MAX_LCORE);
>  }
> diff --git a/lib/eal/common/eal_internal_cfg.h
> b/lib/eal/common/eal_internal_cfg.h
> index b71faadd18..5e154967e4 100644
> --- a/lib/eal/common/eal_internal_cfg.h
> +++ b/lib/eal/common/eal_internal_cfg.h
> @@ -48,6 +48,9 @@ struct hugepage_file_discipline {
>  	bool unlink_existing;
>  };
> 
> +/** Worker hugepage stack size should default to OS value. */ #define
> +WORKER_STACK_SIZE_FROM_OS ((size_t)~0)
> +
>  /**
>   * internal configuration
>   */
> @@ -102,6 +105,7 @@ struct internal_config {
>  	unsigned int no_telemetry; /**< true to disable Telemetry */
>  	struct simd_bitwidth max_simd_bitwidth;
>  	/**< max simd bitwidth path to use */
> +	size_t huge_worker_stack_size; /**< worker thread stack size */
>  };
> 
>  void eal_reset_internal_config(struct internal_config *internal_cfg); diff --git
> a/lib/eal/common/eal_options.h b/lib/eal/common/eal_options.h index
> 8e4f7202a2..3cc9cb6412 100644
> --- a/lib/eal/common/eal_options.h
> +++ b/lib/eal/common/eal_options.h
> @@ -87,6 +87,8 @@ enum {
>  	OPT_NO_TELEMETRY_NUM,
>  #define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
>  	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
> +#define OPT_HUGE_WORKER_STACK  "huge-worker-stack"
> +	OPT_HUGE_WORKER_STACK_NUM,
> 
>  	OPT_LONG_MAX_NUM
>  };
> diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c index
> 1ef263434a..2bee66577e 100644
> --- a/lib/eal/linux/eal.c
> +++ b/lib/eal/linux/eal.c
> @@ -857,6 +857,64 @@ is_iommu_enabled(void)
>  	return n > 2;
>  }
> 
> +static int
> +eal_worker_thread_create(struct internal_config *internal_conf,
> +			 int lcore_id)
> +{
> +	pthread_attr_t attr;
> +	size_t stack_size;
> +	void *stack_ptr;
> +	int ret;
> +
> +	if (internal_conf->huge_worker_stack_size == 0)
> +		return pthread_create(&lcore_config[lcore_id].thread_id,
> +				      NULL,
> +				      eal_thread_loop,
> +				      (void *)(uintptr_t)lcore_id);
> +
> +	/* Allocate NUMA aware stack memory and set pthread attributes */
> +	if (pthread_attr_init(&attr) != 0) {
> +		rte_eal_init_alert("Cannot init pthread attributes");
> +		rte_errno = EFAULT;
> +		return -1;
> +	}
> +	if (internal_conf->huge_worker_stack_size ==
> WORKER_STACK_SIZE_FROM_OS) {
> +		if (pthread_attr_getstacksize(&attr, &stack_size) != 0) {
> +			rte_errno = EFAULT;
> +			return -1;
> +		}
> +	} else {
> +		stack_size = internal_conf->huge_worker_stack_size;
> +	}
> +	stack_ptr = rte_zmalloc_socket("lcore_stack",
> +				       stack_size,
> +				       stack_size,
> +				       rte_lcore_to_socket_id(lcore_id));
> +
> +	if (stack_ptr == NULL) {
> +		rte_eal_init_alert("Cannot allocate worker lcore stack
> memory");
> +		rte_errno = ENOMEM;
> +		return -1;
> +	}
> +
> +	if (pthread_attr_setstack(&attr, stack_ptr, stack_size) != 0) {
> +		rte_eal_init_alert("Cannot set pthread stack attributes");
> +		rte_errno = EFAULT;
> +		return -1;
> +	}
> +
> +	ret = pthread_create(&lcore_config[lcore_id].thread_id, &attr,
> +			     eal_thread_loop,
> +			     (void *)(uintptr_t)lcore_id);
> +
> +	if (pthread_attr_destroy(&attr) != 0) {
> +		rte_eal_init_alert("Cannot destroy pthread attributes");
> +		rte_errno = EFAULT;
> +		return -1;
> +	}
> +	return ret;
> +}
> +
>  /* Launch threads, called at application init(). */  int  rte_eal_init(int argc,
> char **argv) @@ -1144,8 +1202,7 @@ rte_eal_init(int argc, char **argv)
>  		lcore_config[i].state = WAIT;
> 
>  		/* create a thread for each lcore */
> -		ret = pthread_create(&lcore_config[i].thread_id, NULL,
> -				     eal_thread_loop, (void *)(uintptr_t)i);
> +		ret = eal_worker_thread_create(internal_conf, i);
>  		if (ret != 0)
>  			rte_panic("Cannot create thread\n");
> 
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-23 22:35   ` Kathleen Capella
@ 2022-05-24 13:48     ` Don Wallwork
  0 siblings, 0 replies; 25+ messages in thread
From: Don Wallwork @ 2022-05-24 13:48 UTC (permalink / raw)
  To: Kathleen Capella, dev
  Cc: stephen, fengchengwen, mb, anatoly.burakov, dmitry.kozliuk,
	bruce.richardson, Honnappa Nagarahalli, nd, haiyue.wang

On 5/23/2022 6:35 PM, Kathleen Capella wrote:
> In this section of the code:
>
> stack_ptr = rte_zmalloc_socket("lcore_stack",
> 				       stack_size,
> 				       stack_size,
> 				       rte_lcore_to_socket_id(lcore_id));
>
> stack memory is aligned to the stack_size. According to the implementation of rte_zmalloc_socket, the alignment must be a power of two. If the user inputs a number of KBs that is not a power of two, this will fail with a generic error message of " EAL: Cannot allocate worker lcore stack memory." A check for this occurrence with a more descriptive error message and a note in the documentation would be good to include.
Good point.  Alignment to stack size is not necessary.  I'll post a new 
version that only requires cache line alignment.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-17 15:31 ` [PATCH v4] " Don Wallwork
                     ` (2 preceding siblings ...)
  2022-05-23 22:35   ` Kathleen Capella
@ 2022-05-24 14:40   ` Burakov, Anatoly
  2022-05-24 19:38     ` Don Wallwork
  3 siblings, 1 reply; 25+ messages in thread
From: Burakov, Anatoly @ 2022-05-24 14:40 UTC (permalink / raw)
  To: Don Wallwork, dev
  Cc: stephen, fengchengwen, mb, dmitry.kozliuk, bruce.richardson,
	Honnappa.Nagarahalli, nd, haiyue.wang

On 17-May-22 4:31 PM, Don Wallwork wrote:
> Add support for using hugepages for worker lcore stack memory.  The
> intent is to improve performance by reducing stack memory related TLB
> misses and also by using memory local to the NUMA node of each lcore.
> 
> EAL option '--huge-worker-stack [stack-size-in-kbytes]' is added to allow
> the feature to be enabled at runtime.  If the size is not specified,
> the system pthread stack size will be used.
> 
> Signed-off-by: Don Wallwork <donw@xsightlabs.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---

<snip>

> +++ b/lib/eal/common/eal_common_options.c
> @@ -103,6 +103,7 @@ eal_long_options[] = {
>   	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
>   	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
>   	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
> +	{OPT_HUGE_WORKER_STACK, 2, NULL, OPT_HUGE_WORKER_STACK_NUM     },
>   
>   	{0,                     0, NULL, 0                        }
>   };
> @@ -1618,6 +1619,28 @@ eal_parse_huge_unlink(const char *arg, struct hugepage_file_discipline *out)
>   	return -1;
>   }
>   
> +#ifndef RTE_EXEC_ENV_WINDOWS

Why the #ifdef-ery? This is common code, I think we can just leave it 
there? You could just add a check for `huge_worker_stack_size` in 
Windows EAL to guard against using this setting for Windows, but 
otherwise I see no need for an #ifdef here.

-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v4] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-24 14:40   ` Burakov, Anatoly
@ 2022-05-24 19:38     ` Don Wallwork
  0 siblings, 0 replies; 25+ messages in thread
From: Don Wallwork @ 2022-05-24 19:38 UTC (permalink / raw)
  To: Burakov, Anatoly, dev
  Cc: stephen, fengchengwen, mb, dmitry.kozliuk, bruce.richardson,
	Honnappa.Nagarahalli, nd, haiyue.wang



On 5/24/2022 10:40 AM, Burakov, Anatoly wrote:
> On 17-May-22 4:31 PM, Don Wallwork wrote:
>> Add support for using hugepages for worker lcore stack memory.  The
>> intent is to improve performance by reducing stack memory related TLB
>> misses and also by using memory local to the NUMA node of each lcore.
>>
>> EAL option '--huge-worker-stack [stack-size-in-kbytes]' is added to 
>> allow
>> the feature to be enabled at runtime.  If the size is not specified,
>> the system pthread stack size will be used.
>>
>> Signed-off-by: Don Wallwork <donw@xsightlabs.com>
>> Acked-by: Morten Brørup <mb@smartsharesystems.com>
>> ---
>
> <snip>
>
>> +++ b/lib/eal/common/eal_common_options.c
>> @@ -103,6 +103,7 @@ eal_long_options[] = {
>>       {OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM },
>>       {OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM },
>>       {OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, 
>> OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
>> +    {OPT_HUGE_WORKER_STACK, 2, NULL, OPT_HUGE_WORKER_STACK_NUM     },
>>         {0,                     0, NULL, 0                        }
>>   };
>> @@ -1618,6 +1619,28 @@ eal_parse_huge_unlink(const char *arg, struct 
>> hugepage_file_discipline *out)
>>       return -1;
>>   }
>>   +#ifndef RTE_EXEC_ENV_WINDOWS
>
> Why the #ifdef-ery? This is common code, I think we can just leave it 
> there? You could just add a check for `huge_worker_stack_size` in 
> Windows EAL to guard against using this setting for Windows, but 
> otherwise I see no need for an #ifdef here.
>

Was trying to follow the convention used in other cases, but I will post 
a new version that eliminates the ifdefs and checks 
huge_worker_stack_size in FreeBSD and Windows EAL.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v5] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-02 14:10 [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory Don Wallwork
                   ` (4 preceding siblings ...)
  2022-05-17 15:31 ` [PATCH v4] " Don Wallwork
@ 2022-05-24 19:46 ` Don Wallwork
  2022-05-24 19:51 ` [PATCH v6] " Don Wallwork
  6 siblings, 0 replies; 25+ messages in thread
From: Don Wallwork @ 2022-05-24 19:46 UTC (permalink / raw)
  To: dev
  Cc: donw, stephen, fengchengwen, mb, anatoly.burakov, dmitry.kozliuk,
	bruce.richardson, Honnappa.Nagarahalli, nd, haiyue.wang,
	Kathleen.Capella

Add support for using hugepages for worker lcore stack memory.  The
intent is to improve performance by reducing stack memory related TLB
misses and also by using memory local to the NUMA node of each lcore.

EAL option '--huge-worker-stack [stack-size-in-kbytes]' is added to allow
the feature to be enabled at runtime.  If the size is not specified,
the system pthread stack size will be used.

Signed-off-by: Don Wallwork <donw@xsightlabs.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Chengwen Feng <fengchengwen@huawei.com>
---
 doc/guides/linux_gsg/eal_args.include.rst     |  6 ++
 .../prog_guide/env_abstraction_layer.rst      | 21 +++++++
 lib/eal/common/eal_common_options.c           | 41 +++++++++++++
 lib/eal/common/eal_internal_cfg.h             |  4 ++
 lib/eal/common/eal_options.h                  |  2 +
 lib/eal/linux/eal.c                           | 61 ++++++++++++++++++-
 6 files changed, 133 insertions(+), 2 deletions(-)

diff --git a/doc/guides/linux_gsg/eal_args.include.rst b/doc/guides/linux_gsg/eal_args.include.rst
index 3549a0cf56..9cfbf7de84 100644
--- a/doc/guides/linux_gsg/eal_args.include.rst
+++ b/doc/guides/linux_gsg/eal_args.include.rst
@@ -116,6 +116,12 @@ Memory-related options
 
     Force IOVA mode to a specific value.
 
+*   ``--huge-worker-stack[=size]``
+
+    Allocate worker stack memory from hugepage memory. Stack size defaults
+    to system pthread stack size unless the optional size (in kbytes) is
+    specified.
+
 Debugging options
 ~~~~~~~~~~~~~~~~~
 
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index 5f0748fba1..e74516f0cf 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -329,6 +329,27 @@ Another option is to use bigger page sizes. Since fewer pages are required to
 cover the same memory area, fewer file descriptors will be stored internally
 by EAL.
 
+.. _huge-worker-stack:
+
+Hugepage Worker Stacks
+^^^^^^^^^^^^^^^^^^^^^^
+
+When the ``--huge-worker-stack[=size]`` EAL option is specified, worker
+thread stacks are allocated from hugepage memory local to the NUMA node
+of the thread. Worker stack size defaults to system pthread stack size
+if the optional size parameter is not specified.
+
+.. warning::
+    Stacks allocated from hugepage memory are not protected by guard
+    pages. Worker stacks must be sufficiently sized to prevent stack
+    overflow when this option is used.
+
+    As with normal thread stacks, hugepage worker thread stack size is
+    fixed and is not dynamically resized. Therefore, an application that
+    is free of stack page faults under a given load should be safe with
+    hugepage worker thread stacks given the same thread stack size and
+    loading conditions.
+
 Support for Externally Allocated Memory
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
index f247a42455..370801f19b 100644
--- a/lib/eal/common/eal_common_options.c
+++ b/lib/eal/common/eal_common_options.c
@@ -103,6 +103,7 @@ eal_long_options[] = {
 	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
 	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
 	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
+	{OPT_HUGE_WORKER_STACK, 2, NULL, OPT_HUGE_WORKER_STACK_NUM     },
 
 	{0,                     0, NULL, 0                        }
 };
@@ -1618,6 +1619,28 @@ eal_parse_huge_unlink(const char *arg, struct hugepage_file_discipline *out)
 	return -1;
 }
 
+#ifndef RTE_EXEC_ENV_WINDOWS
+static int
+eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
+{
+	size_t worker_stack_size;
+	char *end;
+
+	if (arg == NULL || arg[0] == '\0') {
+		*huge_worker_stack_size = WORKER_STACK_SIZE_FROM_OS;
+		return 0;
+	}
+	errno = 0;
+	worker_stack_size = strtoul(arg, &end, 10);
+	if (errno || end == NULL || worker_stack_size == 0 ||
+	    worker_stack_size >= (size_t)-1 / 1024)
+		return -1;
+
+	*huge_worker_stack_size = worker_stack_size * 1024;
+	return 0;
+}
+#endif
+
 int
 eal_parse_common_option(int opt, const char *optarg,
 			struct internal_config *conf)
@@ -1921,6 +1944,17 @@ eal_parse_common_option(int opt, const char *optarg,
 		}
 		break;
 
+#ifndef RTE_EXEC_ENV_WINDOWS
+	case OPT_HUGE_WORKER_STACK_NUM:
+		if (eal_parse_huge_worker_stack(optarg,
+						&conf->huge_worker_stack_size) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+				OPT_HUGE_WORKER_STACK"\n");
+			return -1;
+		}
+		break;
+#endif /* !RTE_EXEC_ENV_WINDOWS */
+
 	/* don't know what to do, leave this to caller */
 	default:
 		return 1;
@@ -2235,5 +2269,12 @@ eal_common_usage(void)
 	       "  --"OPT_NO_PCI"            Disable PCI\n"
 	       "  --"OPT_NO_HPET"           Disable HPET\n"
 	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
+#ifndef RTE_EXEC_ENV_WINDOWS
+	       "  --"OPT_HUGE_WORKER_STACK"[=size]\n"
+	       "                      Allocate worker thread stacks from\n"
+	       "                      hugepage memory. Size is in units of\n"
+	       "                      kbytes and defaults to system thread\n"
+	       "                      stack size if not specified.\n"
+#endif
 	       "\n", RTE_MAX_LCORE);
 }
diff --git a/lib/eal/common/eal_internal_cfg.h b/lib/eal/common/eal_internal_cfg.h
index b71faadd18..5e154967e4 100644
--- a/lib/eal/common/eal_internal_cfg.h
+++ b/lib/eal/common/eal_internal_cfg.h
@@ -48,6 +48,9 @@ struct hugepage_file_discipline {
 	bool unlink_existing;
 };
 
+/** Worker hugepage stack size should default to OS value. */
+#define WORKER_STACK_SIZE_FROM_OS ((size_t)~0)
+
 /**
  * internal configuration
  */
@@ -102,6 +105,7 @@ struct internal_config {
 	unsigned int no_telemetry; /**< true to disable Telemetry */
 	struct simd_bitwidth max_simd_bitwidth;
 	/**< max simd bitwidth path to use */
+	size_t huge_worker_stack_size; /**< worker thread stack size */
 };
 
 void eal_reset_internal_config(struct internal_config *internal_cfg);
diff --git a/lib/eal/common/eal_options.h b/lib/eal/common/eal_options.h
index 8e4f7202a2..3cc9cb6412 100644
--- a/lib/eal/common/eal_options.h
+++ b/lib/eal/common/eal_options.h
@@ -87,6 +87,8 @@ enum {
 	OPT_NO_TELEMETRY_NUM,
 #define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
 	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
+#define OPT_HUGE_WORKER_STACK  "huge-worker-stack"
+	OPT_HUGE_WORKER_STACK_NUM,
 
 	OPT_LONG_MAX_NUM
 };
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 1ef263434a..2bee66577e 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -857,6 +857,64 @@ is_iommu_enabled(void)
 	return n > 2;
 }
 
+static int
+eal_worker_thread_create(struct internal_config *internal_conf,
+			 int lcore_id)
+{
+	pthread_attr_t attr;
+	size_t stack_size;
+	void *stack_ptr;
+	int ret;
+
+	if (internal_conf->huge_worker_stack_size == 0)
+		return pthread_create(&lcore_config[lcore_id].thread_id,
+				      NULL,
+				      eal_thread_loop,
+				      (void *)(uintptr_t)lcore_id);
+
+	/* Allocate NUMA aware stack memory and set pthread attributes */
+	if (pthread_attr_init(&attr) != 0) {
+		rte_eal_init_alert("Cannot init pthread attributes");
+		rte_errno = EFAULT;
+		return -1;
+	}
+	if (internal_conf->huge_worker_stack_size == WORKER_STACK_SIZE_FROM_OS) {
+		if (pthread_attr_getstacksize(&attr, &stack_size) != 0) {
+			rte_errno = EFAULT;
+			return -1;
+		}
+	} else {
+		stack_size = internal_conf->huge_worker_stack_size;
+	}
+	stack_ptr = rte_zmalloc_socket("lcore_stack",
+				       stack_size,
+				       stack_size,
+				       rte_lcore_to_socket_id(lcore_id));
+
+	if (stack_ptr == NULL) {
+		rte_eal_init_alert("Cannot allocate worker lcore stack memory");
+		rte_errno = ENOMEM;
+		return -1;
+	}
+
+	if (pthread_attr_setstack(&attr, stack_ptr, stack_size) != 0) {
+		rte_eal_init_alert("Cannot set pthread stack attributes");
+		rte_errno = EFAULT;
+		return -1;
+	}
+
+	ret = pthread_create(&lcore_config[lcore_id].thread_id, &attr,
+			     eal_thread_loop,
+			     (void *)(uintptr_t)lcore_id);
+
+	if (pthread_attr_destroy(&attr) != 0) {
+		rte_eal_init_alert("Cannot destroy pthread attributes");
+		rte_errno = EFAULT;
+		return -1;
+	}
+	return ret;
+}
+
 /* Launch threads, called at application init(). */
 int
 rte_eal_init(int argc, char **argv)
@@ -1144,8 +1202,7 @@ rte_eal_init(int argc, char **argv)
 		lcore_config[i].state = WAIT;
 
 		/* create a thread for each lcore */
-		ret = pthread_create(&lcore_config[i].thread_id, NULL,
-				     eal_thread_loop, (void *)(uintptr_t)i);
+		ret = eal_worker_thread_create(internal_conf, i);
 		if (ret != 0)
 			rte_panic("Cannot create thread\n");
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v6] eal: allow worker lcore stacks to be allocated from hugepage memory
  2022-05-02 14:10 [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory Don Wallwork
                   ` (5 preceding siblings ...)
  2022-05-24 19:46 ` [PATCH v5] " Don Wallwork
@ 2022-05-24 19:51 ` Don Wallwork
  6 siblings, 0 replies; 25+ messages in thread
From: Don Wallwork @ 2022-05-24 19:51 UTC (permalink / raw)
  To: dev
  Cc: donw, stephen, fengchengwen, mb, anatoly.burakov, dmitry.kozliuk,
	bruce.richardson, Honnappa.Nagarahalli, nd, haiyue.wang,
	Kathleen.Capella

Add support for using hugepages for worker lcore stack memory.  The
intent is to improve performance by reducing stack memory related TLB
misses and also by using memory local to the NUMA node of each lcore.

EAL option '--huge-worker-stack [stack-size-in-kbytes]' is added to allow
the feature to be enabled at runtime.  If the size is not specified,
the system pthread stack size will be used.

Signed-off-by: Don Wallwork <donw@xsightlabs.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Chengwen Feng <fengchengwen@huawei.com>
---
 doc/guides/linux_gsg/eal_args.include.rst     |  6 ++
 .../prog_guide/env_abstraction_layer.rst      | 21 +++++++
 lib/eal/common/eal_common_options.c           | 35 +++++++++++
 lib/eal/common/eal_internal_cfg.h             |  4 ++
 lib/eal/common/eal_options.h                  |  2 +
 lib/eal/freebsd/eal.c                         |  6 ++
 lib/eal/linux/eal.c                           | 61 ++++++++++++++++++-
 lib/eal/windows/eal.c                         |  6 ++
 8 files changed, 139 insertions(+), 2 deletions(-)

diff --git a/doc/guides/linux_gsg/eal_args.include.rst b/doc/guides/linux_gsg/eal_args.include.rst
index 3549a0cf56..9cfbf7de84 100644
--- a/doc/guides/linux_gsg/eal_args.include.rst
+++ b/doc/guides/linux_gsg/eal_args.include.rst
@@ -116,6 +116,12 @@ Memory-related options
 
     Force IOVA mode to a specific value.
 
+*   ``--huge-worker-stack[=size]``
+
+    Allocate worker stack memory from hugepage memory. Stack size defaults
+    to system pthread stack size unless the optional size (in kbytes) is
+    specified.
+
 Debugging options
 ~~~~~~~~~~~~~~~~~
 
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index 5f0748fba1..e74516f0cf 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -329,6 +329,27 @@ Another option is to use bigger page sizes. Since fewer pages are required to
 cover the same memory area, fewer file descriptors will be stored internally
 by EAL.
 
+.. _huge-worker-stack:
+
+Hugepage Worker Stacks
+^^^^^^^^^^^^^^^^^^^^^^
+
+When the ``--huge-worker-stack[=size]`` EAL option is specified, worker
+thread stacks are allocated from hugepage memory local to the NUMA node
+of the thread. Worker stack size defaults to system pthread stack size
+if the optional size parameter is not specified.
+
+.. warning::
+    Stacks allocated from hugepage memory are not protected by guard
+    pages. Worker stacks must be sufficiently sized to prevent stack
+    overflow when this option is used.
+
+    As with normal thread stacks, hugepage worker thread stack size is
+    fixed and is not dynamically resized. Therefore, an application that
+    is free of stack page faults under a given load should be safe with
+    hugepage worker thread stacks given the same thread stack size and
+    loading conditions.
+
 Support for Externally Allocated Memory
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
index f247a42455..02e59051e8 100644
--- a/lib/eal/common/eal_common_options.c
+++ b/lib/eal/common/eal_common_options.c
@@ -103,6 +103,7 @@ eal_long_options[] = {
 	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
 	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
 	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
+	{OPT_HUGE_WORKER_STACK, 2, NULL, OPT_HUGE_WORKER_STACK_NUM     },
 
 	{0,                     0, NULL, 0                        }
 };
@@ -1618,6 +1619,26 @@ eal_parse_huge_unlink(const char *arg, struct hugepage_file_discipline *out)
 	return -1;
 }
 
+static int
+eal_parse_huge_worker_stack(const char *arg, size_t *huge_worker_stack_size)
+{
+	size_t worker_stack_size;
+	char *end;
+
+	if (arg == NULL || arg[0] == '\0') {
+		*huge_worker_stack_size = WORKER_STACK_SIZE_FROM_OS;
+		return 0;
+	}
+	errno = 0;
+	worker_stack_size = strtoul(arg, &end, 10);
+	if (errno || end == NULL || worker_stack_size == 0 ||
+	    worker_stack_size >= (size_t)-1 / 1024)
+		return -1;
+
+	*huge_worker_stack_size = worker_stack_size * 1024;
+	return 0;
+}
+
 int
 eal_parse_common_option(int opt, const char *optarg,
 			struct internal_config *conf)
@@ -1921,6 +1942,15 @@ eal_parse_common_option(int opt, const char *optarg,
 		}
 		break;
 
+	case OPT_HUGE_WORKER_STACK_NUM:
+		if (eal_parse_huge_worker_stack(optarg,
+						&conf->huge_worker_stack_size) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+				OPT_HUGE_WORKER_STACK"\n");
+			return -1;
+		}
+		break;
+
 	/* don't know what to do, leave this to caller */
 	default:
 		return 1;
@@ -2235,5 +2265,10 @@ eal_common_usage(void)
 	       "  --"OPT_NO_PCI"            Disable PCI\n"
 	       "  --"OPT_NO_HPET"           Disable HPET\n"
 	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
+	       "  --"OPT_HUGE_WORKER_STACK"[=size]\n"
+	       "                      Allocate worker thread stacks from\n"
+	       "                      hugepage memory. Size is in units of\n"
+	       "                      kbytes and defaults to system thread\n"
+	       "                      stack size if not specified.\n"
 	       "\n", RTE_MAX_LCORE);
 }
diff --git a/lib/eal/common/eal_internal_cfg.h b/lib/eal/common/eal_internal_cfg.h
index b71faadd18..5e154967e4 100644
--- a/lib/eal/common/eal_internal_cfg.h
+++ b/lib/eal/common/eal_internal_cfg.h
@@ -48,6 +48,9 @@ struct hugepage_file_discipline {
 	bool unlink_existing;
 };
 
+/** Worker hugepage stack size should default to OS value. */
+#define WORKER_STACK_SIZE_FROM_OS ((size_t)~0)
+
 /**
  * internal configuration
  */
@@ -102,6 +105,7 @@ struct internal_config {
 	unsigned int no_telemetry; /**< true to disable Telemetry */
 	struct simd_bitwidth max_simd_bitwidth;
 	/**< max simd bitwidth path to use */
+	size_t huge_worker_stack_size; /**< worker thread stack size */
 };
 
 void eal_reset_internal_config(struct internal_config *internal_cfg);
diff --git a/lib/eal/common/eal_options.h b/lib/eal/common/eal_options.h
index 8e4f7202a2..3cc9cb6412 100644
--- a/lib/eal/common/eal_options.h
+++ b/lib/eal/common/eal_options.h
@@ -87,6 +87,8 @@ enum {
 	OPT_NO_TELEMETRY_NUM,
 #define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
 	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
+#define OPT_HUGE_WORKER_STACK  "huge-worker-stack"
+	OPT_HUGE_WORKER_STACK_NUM,
 
 	OPT_LONG_MAX_NUM
 };
diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
index a6b20960f2..7368956649 100644
--- a/lib/eal/freebsd/eal.c
+++ b/lib/eal/freebsd/eal.c
@@ -795,6 +795,12 @@ rte_eal_init(int argc, char **argv)
 		config->main_lcore, (uintptr_t)pthread_self(), cpuset,
 		ret == 0 ? "" : "...");
 
+	if (internal_conf->huge_worker_stack_size != 0) {
+		rte_eal_init_alert("Hugepage worker stacks not supported");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+
 	RTE_LCORE_FOREACH_WORKER(i) {
 
 		/*
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 1ef263434a..d28a0fdb78 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -857,6 +857,64 @@ is_iommu_enabled(void)
 	return n > 2;
 }
 
+static int
+eal_worker_thread_create(struct internal_config *internal_conf,
+			 int lcore_id)
+{
+	pthread_attr_t attr;
+	size_t stack_size;
+	void *stack_ptr;
+	int ret;
+
+	if (internal_conf->huge_worker_stack_size == 0)
+		return pthread_create(&lcore_config[lcore_id].thread_id,
+				      NULL,
+				      eal_thread_loop,
+				      (void *)(uintptr_t)lcore_id);
+
+	/* Allocate NUMA aware stack memory and set pthread attributes */
+	if (pthread_attr_init(&attr) != 0) {
+		rte_eal_init_alert("Cannot init pthread attributes");
+		rte_errno = EFAULT;
+		return -1;
+	}
+	if (internal_conf->huge_worker_stack_size == WORKER_STACK_SIZE_FROM_OS) {
+		if (pthread_attr_getstacksize(&attr, &stack_size) != 0) {
+			rte_errno = EFAULT;
+			return -1;
+		}
+	} else {
+		stack_size = internal_conf->huge_worker_stack_size;
+	}
+	stack_ptr = rte_zmalloc_socket("lcore_stack",
+				       stack_size,
+				       RTE_CACHE_LINE_SIZE,
+				       rte_lcore_to_socket_id(lcore_id));
+
+	if (stack_ptr == NULL) {
+		rte_eal_init_alert("Cannot allocate worker lcore stack memory");
+		rte_errno = ENOMEM;
+		return -1;
+	}
+
+	if (pthread_attr_setstack(&attr, stack_ptr, stack_size) != 0) {
+		rte_eal_init_alert("Cannot set pthread stack attributes");
+		rte_errno = EFAULT;
+		return -1;
+	}
+
+	ret = pthread_create(&lcore_config[lcore_id].thread_id, &attr,
+			     eal_thread_loop,
+			     (void *)(uintptr_t)lcore_id);
+
+	if (pthread_attr_destroy(&attr) != 0) {
+		rte_eal_init_alert("Cannot destroy pthread attributes");
+		rte_errno = EFAULT;
+		return -1;
+	}
+	return ret;
+}
+
 /* Launch threads, called at application init(). */
 int
 rte_eal_init(int argc, char **argv)
@@ -1144,8 +1202,7 @@ rte_eal_init(int argc, char **argv)
 		lcore_config[i].state = WAIT;
 
 		/* create a thread for each lcore */
-		ret = pthread_create(&lcore_config[i].thread_id, NULL,
-				     eal_thread_loop, (void *)(uintptr_t)i);
+		ret = eal_worker_thread_create(internal_conf, i);
 		if (ret != 0)
 			rte_panic("Cannot create thread\n");
 
diff --git a/lib/eal/windows/eal.c b/lib/eal/windows/eal.c
index 122de2a319..5cd4a45872 100644
--- a/lib/eal/windows/eal.c
+++ b/lib/eal/windows/eal.c
@@ -416,6 +416,12 @@ rte_eal_init(int argc, char **argv)
 		config->main_lcore, (uintptr_t)pthread_self(), cpuset,
 		ret == 0 ? "" : "...");
 
+	if (internal_conf->huge_worker_stack_size != 0) {
+		rte_eal_init_alert("Hugepage worker stacks not supported");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+
 	RTE_LCORE_FOREACH_WORKER(i) {
 
 		/*
-- 
2.17.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2022-05-24 19:51 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-02 14:10 [PATCH] eal: allow worker lcore stacks to be allocated from hugepage memory Don Wallwork
2022-05-03  6:10 ` Morten Brørup
2022-05-03 13:08 ` Wang, Haiyue
2022-05-03 19:46   ` Don Wallwork
2022-05-04  3:08     ` Wang, Haiyue
2022-05-13 17:58 ` [PATCH v2] " Don Wallwork
2022-05-13 21:38   ` Stephen Hemminger
2022-05-16 19:43     ` Don Wallwork
2022-05-13 21:41   ` Stephen Hemminger
2022-05-14  3:31   ` fengchengwen
2022-05-16 19:47     ` Don Wallwork
2022-05-17  6:28       ` Morten Brørup
2022-05-16 19:50 ` [PATCH v3] " Don Wallwork
2022-05-16 20:28   ` Stephen Hemminger
2022-05-16 20:29     ` Don Wallwork
2022-05-17 15:31 ` [PATCH v4] " Don Wallwork
2022-05-17 15:56   ` Stephen Hemminger
2022-05-18 14:10     ` Don Wallwork
2022-05-20  8:30   ` fengchengwen
2022-05-23 22:35   ` Kathleen Capella
2022-05-24 13:48     ` Don Wallwork
2022-05-24 14:40   ` Burakov, Anatoly
2022-05-24 19:38     ` Don Wallwork
2022-05-24 19:46 ` [PATCH v5] " Don Wallwork
2022-05-24 19:51 ` [PATCH v6] " Don Wallwork

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ http://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git