DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH] kernel/freebsd: Allow contigmem allocation in NUMA domains
@ 2025-05-06 17:48 Jake Freeland
  2025-06-06 11:02 ` Bruce Richardson
  0 siblings, 1 reply; 2+ messages in thread
From: Jake Freeland @ 2025-05-06 17:48 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: Jake Freeland, dev

Allow the user to specify a list of comma separated NUMA domains in
`hw.contigmem.domains` to allocate memory from. All contigmem will be
allocated in an interleaved fashion across the specified domains using
the DOMAINSET_POLICY_INTERLEAVE policy from domainset(9).

If `hw.contigmem.domains` is unset, memory will be interleaved across
all NUMA domains on the system.

Signed-off-by: Jake Freeland <jfree@FreeBSD.org>
---
 kernel/freebsd/contigmem/contigmem.c | 66 +++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 2 deletions(-)

diff --git a/kernel/freebsd/contigmem/contigmem.c b/kernel/freebsd/contigmem/contigmem.c
index 591e46dded..01234567df 100644
--- a/kernel/freebsd/contigmem/contigmem.c
+++ b/kernel/freebsd/contigmem/contigmem.c
@@ -9,6 +9,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
+#include <sys/ctype.h>
+#include <sys/domainset.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
@@ -53,6 +55,12 @@ static int              contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS;
 static int64_t          contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE;
 static bool             contigmem_coredump_enable;

+static char		contigmem_domainstr[DOMAINSET_SETSIZE * 2];
+static struct domainset contigmem_domainset = {
+	.ds_policy = DOMAINSET_POLICY_INTERLEAVE,
+	.ds_prefer = -1,
+};
+
 static eventhandler_tag contigmem_eh_tag;
 static struct contigmem_buffer contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS];
 static struct cdev     *contigmem_cdev = NULL;
@@ -61,6 +69,8 @@ static int              contigmem_refcnt;
 TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers);
 TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size);
 TUNABLE_BOOL("hw.contigmem.coredump_enable", &contigmem_coredump_enable);
+TUNABLE_STR("hw.contigmem.domains", contigmem_domainstr,
+	sizeof(contigmem_domainstr));

 static SYSCTL_NODE(_hw, OID_AUTO, contigmem, CTLFLAG_RD, 0, "contigmem");

@@ -75,6 +85,8 @@ SYSCTL_BOOL(_hw_contigmem, OID_AUTO, coredump_enable, CTLFLAG_RD,

 static SYSCTL_NODE(_hw_contigmem, OID_AUTO, physaddr, CTLFLAG_RD, 0,
 	"physaddr");
+static SYSCTL_NODE(_hw_contigmem, OID_AUTO, domain, CTLFLAG_RD, 0,
+	"domain");

 MALLOC_DEFINE(M_CONTIGMEM, "contigmem", "contigmem(4) allocations");

@@ -114,9 +126,50 @@ static struct cdevsw contigmem_ops = {
 	.d_close        = contigmem_close,
 };

+static void
+contigmem_domains_parse(void)
+{
+	domainid_t did;
+	char *dstr;
+
+	int i = 0;
+	char name[12];
+
+	/*
+	 * If hw.contigmem.domains is not set, then evenly
+	 * distribute memory across all domains.
+	 */
+	if (*contigmem_domainstr == '\0') {
+		DOMAINSET_COPY(&all_domains, &contigmem_domainset.ds_mask);
+		return;
+	}
+
+	dstr = contigmem_domainstr;
+	while (*dstr != '\0') {
+		if (!isdigit(*dstr)) {
+			++dstr;
+			continue;
+		}
+		did = strtoul(dstr, &dstr, 0);
+		DOMAINSET_SET(did, &contigmem_domainset.ds_mask);
+
+		snprintf(name, sizeof(name), "%d", i++);
+		/* We should be adding ds_mask as a bitset. */
+		SYSCTL_ADD_INT(NULL,
+			&SYSCTL_NODE_CHILDREN(_hw_contigmem, domain), OID_AUTO,
+			name, CTLTYPE_INT | CTLFLAG_RD, SYSCTL_NULL_INT_PTR,
+			did, "Contiguous memory NUMA domain");
+	}
+	SYSCTL_ADD_INT(NULL,
+		&SYSCTL_NODE_CHILDREN(_hw, contigmem), OID_AUTO,
+		"num_domains", CTLTYPE_INT | CTLFLAG_RD, SYSCTL_NULL_INT_PTR,
+		i, "Number of contiguous memory NUMA domains");
+}
+
 static int
 contigmem_load(void)
 {
+	struct domainset *ds;
 	char index_string[8], description[32];
 	int  i, error = 0;
 	void *addr;
@@ -136,9 +189,18 @@ contigmem_load(void)
 		goto error;
 	}

+	contigmem_domains_parse();
+	ds = domainset_create(&contigmem_domainset);
+	if (ds == NULL) {
+		printf("invalid domain string: %s\n", contigmem_domainstr);
+		error = EINVAL;
+		goto error;
+	}
+
 	for (i = 0; i < contigmem_num_buffers; i++) {
-		addr = contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO,
-			0, BUS_SPACE_MAXADDR, contigmem_buffer_size, 0);
+		addr = contigmalloc_domainset(contigmem_buffer_size,
+			M_CONTIGMEM, ds, M_ZERO, 0, BUS_SPACE_MAXADDR,
+			contigmem_buffer_size, 0);
 		if (addr == NULL) {
 			printf("contigmalloc failed for buffer %d\n", i);
 			error = ENOMEM;
--
2.47.2


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] kernel/freebsd: Allow contigmem allocation in NUMA domains
  2025-05-06 17:48 [PATCH] kernel/freebsd: Allow contigmem allocation in NUMA domains Jake Freeland
@ 2025-06-06 11:02 ` Bruce Richardson
  0 siblings, 0 replies; 2+ messages in thread
From: Bruce Richardson @ 2025-06-06 11:02 UTC (permalink / raw)
  To: Jake Freeland; +Cc: dev

On Tue, May 06, 2025 at 12:48:38PM -0500, Jake Freeland wrote:
> Allow the user to specify a list of comma separated NUMA domains in
> `hw.contigmem.domains` to allocate memory from. All contigmem will be
> allocated in an interleaved fashion across the specified domains using
> the DOMAINSET_POLICY_INTERLEAVE policy from domainset(9).
> 
> If `hw.contigmem.domains` is unset, memory will be interleaved across
> all NUMA domains on the system.
> 
> Signed-off-by: Jake Freeland <jfree@FreeBSD.org>

Thanks for the patch.

I think it could do with some documentation including some usage examples,
for the user to know what goes in the hw.contigmem.domains parameter. I'm a
little unclear myself on how this new setting should be used.

My own thoughts on the general problem is that the memory allocation per
NUMA node should be specified in a similar pattern to that done when using
the DPDK socket-mem parameter. So, for example, to allocate 1G on each of
two numa nodes, one should provide the value "1024,1024", or to allocate
only on NUMA node 1, "0,1024". Something similar would be nice to have for
contigmem specification. [Using a string rather than numeric type, would
also have the advantage that we could further improve usability by allowing
"k,M,G" suffixes on the numeric values.]

What do you think?

/Bruce

> ---
>  kernel/freebsd/contigmem/contigmem.c | 66 +++++++++++++++++++++++++++-
>  1 file changed, 64 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/freebsd/contigmem/contigmem.c b/kernel/freebsd/contigmem/contigmem.c
> index 591e46dded..01234567df 100644
> --- a/kernel/freebsd/contigmem/contigmem.c
> +++ b/kernel/freebsd/contigmem/contigmem.c
> @@ -9,6 +9,8 @@ __FBSDID("$FreeBSD$");
>  #include <sys/bio.h>
>  #include <sys/bus.h>
>  #include <sys/conf.h>
> +#include <sys/ctype.h>
> +#include <sys/domainset.h>
>  #include <sys/kernel.h>
>  #include <sys/malloc.h>
>  #include <sys/module.h>
> @@ -53,6 +55,12 @@ static int              contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS;
>  static int64_t          contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE;
>  static bool             contigmem_coredump_enable;
> 
> +static char		contigmem_domainstr[DOMAINSET_SETSIZE * 2];
> +static struct domainset contigmem_domainset = {
> +	.ds_policy = DOMAINSET_POLICY_INTERLEAVE,
> +	.ds_prefer = -1,
> +};
> +
>  static eventhandler_tag contigmem_eh_tag;
>  static struct contigmem_buffer contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS];
>  static struct cdev     *contigmem_cdev = NULL;
> @@ -61,6 +69,8 @@ static int              contigmem_refcnt;
>  TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers);
>  TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size);
>  TUNABLE_BOOL("hw.contigmem.coredump_enable", &contigmem_coredump_enable);
> +TUNABLE_STR("hw.contigmem.domains", contigmem_domainstr,
> +	sizeof(contigmem_domainstr));
> 
>  static SYSCTL_NODE(_hw, OID_AUTO, contigmem, CTLFLAG_RD, 0, "contigmem");
> 
> @@ -75,6 +85,8 @@ SYSCTL_BOOL(_hw_contigmem, OID_AUTO, coredump_enable, CTLFLAG_RD,
> 
>  static SYSCTL_NODE(_hw_contigmem, OID_AUTO, physaddr, CTLFLAG_RD, 0,
>  	"physaddr");
> +static SYSCTL_NODE(_hw_contigmem, OID_AUTO, domain, CTLFLAG_RD, 0,
> +	"domain");
> 
>  MALLOC_DEFINE(M_CONTIGMEM, "contigmem", "contigmem(4) allocations");
> 
> @@ -114,9 +126,50 @@ static struct cdevsw contigmem_ops = {
>  	.d_close        = contigmem_close,
>  };
> 
> +static void
> +contigmem_domains_parse(void)
> +{
> +	domainid_t did;
> +	char *dstr;
> +
> +	int i = 0;
> +	char name[12];
> +
> +	/*
> +	 * If hw.contigmem.domains is not set, then evenly
> +	 * distribute memory across all domains.
> +	 */
> +	if (*contigmem_domainstr == '\0') {
> +		DOMAINSET_COPY(&all_domains, &contigmem_domainset.ds_mask);
> +		return;
> +	}
> +
> +	dstr = contigmem_domainstr;
> +	while (*dstr != '\0') {
> +		if (!isdigit(*dstr)) {
> +			++dstr;
> +			continue;
> +		}
> +		did = strtoul(dstr, &dstr, 0);
> +		DOMAINSET_SET(did, &contigmem_domainset.ds_mask);
> +
> +		snprintf(name, sizeof(name), "%d", i++);
> +		/* We should be adding ds_mask as a bitset. */
> +		SYSCTL_ADD_INT(NULL,
> +			&SYSCTL_NODE_CHILDREN(_hw_contigmem, domain), OID_AUTO,
> +			name, CTLTYPE_INT | CTLFLAG_RD, SYSCTL_NULL_INT_PTR,
> +			did, "Contiguous memory NUMA domain");
> +	}
> +	SYSCTL_ADD_INT(NULL,
> +		&SYSCTL_NODE_CHILDREN(_hw, contigmem), OID_AUTO,
> +		"num_domains", CTLTYPE_INT | CTLFLAG_RD, SYSCTL_NULL_INT_PTR,
> +		i, "Number of contiguous memory NUMA domains");
> +}
> +
>  static int
>  contigmem_load(void)
>  {
> +	struct domainset *ds;
>  	char index_string[8], description[32];
>  	int  i, error = 0;
>  	void *addr;
> @@ -136,9 +189,18 @@ contigmem_load(void)
>  		goto error;
>  	}
> 
> +	contigmem_domains_parse();
> +	ds = domainset_create(&contigmem_domainset);
> +	if (ds == NULL) {
> +		printf("invalid domain string: %s\n", contigmem_domainstr);
> +		error = EINVAL;
> +		goto error;
> +	}
> +
>  	for (i = 0; i < contigmem_num_buffers; i++) {
> -		addr = contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO,
> -			0, BUS_SPACE_MAXADDR, contigmem_buffer_size, 0);
> +		addr = contigmalloc_domainset(contigmem_buffer_size,
> +			M_CONTIGMEM, ds, M_ZERO, 0, BUS_SPACE_MAXADDR,
> +			contigmem_buffer_size, 0);
>  		if (addr == NULL) {
>  			printf("contigmalloc failed for buffer %d\n", i);
>  			error = ENOMEM;
> --
> 2.47.2
> 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-06-06 11:02 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-05-06 17:48 [PATCH] kernel/freebsd: Allow contigmem allocation in NUMA domains Jake Freeland
2025-06-06 11:02 ` Bruce Richardson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).