patches for DPDK stable branches
 help / color / mirror / Atom feed
From: Yuanhan Liu <yliu@fridaylinux.org>
To: Tiwei Bie <tiwei.bie@intel.com>
Cc: Bruce Richardson <bruce.richardson@intel.com>,
	dpdk stable <stable@dpdk.org>
Subject: [dpdk-stable] patch 'contigmem: do not zero pages during each mmap' has been queued to LTS release 16.11.3
Date: Fri, 14 Jul 2017 18:33:50 +0800	[thread overview]
Message-ID: <1500028450-25989-26-git-send-email-yliu@fridaylinux.org> (raw)
In-Reply-To: <1500028450-25989-1-git-send-email-yliu@fridaylinux.org>

Hi,

FYI, your patch has been queued to LTS release 16.11.3

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 07/19/17. So please
shout if anyone has objections.

Thanks.

	--yliu

---
>From 156fe3d7888a680342a6c3345284fa3a4a10b82b Mon Sep 17 00:00:00 2001
From: Tiwei Bie <tiwei.bie@intel.com>
Date: Sun, 4 Jun 2017 13:53:24 +0800
Subject: [PATCH] contigmem: do not zero pages during each mmap

[ upstream commit 190ce8645e1ebeb0733f9dbc7fa854b50d1dac2e ]

Don't zero the pages during each mmap. Instead, only zero the pages
when they are not already mmapped. Otherwise, the multi-process
support will be broken, as the pages will be zeroed when secondary
processes map the memory. Besides, track the open and mmap operations
on the cdev, and prevent the module from being unloaded when it is
still in use.

Fixes: 82f931805506 ("contigmem: zero all pages during mmap")

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
---
 lib/librte_eal/bsdapp/contigmem/contigmem.c | 186 ++++++++++++++++++++++++----
 1 file changed, 160 insertions(+), 26 deletions(-)

diff --git a/lib/librte_eal/bsdapp/contigmem/contigmem.c b/lib/librte_eal/bsdapp/contigmem/contigmem.c
index 03e3e8d..e8fb908 100644
--- a/lib/librte_eal/bsdapp/contigmem/contigmem.c
+++ b/lib/librte_eal/bsdapp/contigmem/contigmem.c
@@ -50,24 +50,37 @@ __FBSDID("$FreeBSD$");
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
+#include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
+
+struct contigmem_buffer {
+	void           *addr;
+	int             refcnt;
+	struct mtx      mtx;
+};
+
+struct contigmem_vm_handle {
+	int             buffer_index;
+};
 
 static int              contigmem_load(void);
 static int              contigmem_unload(void);
 static int              contigmem_physaddr(SYSCTL_HANDLER_ARGS);
 
-static d_mmap_t         contigmem_mmap;
 static d_mmap_single_t  contigmem_mmap_single;
 static d_open_t         contigmem_open;
+static d_close_t        contigmem_close;
 
 static int              contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS;
 static int64_t          contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE;
 
 static eventhandler_tag contigmem_eh_tag;
-static void            *contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS];
+static struct contigmem_buffer contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS];
 static struct cdev     *contigmem_cdev = NULL;
+static int              contigmem_refcnt;
 
 TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers);
 TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size);
@@ -78,6 +91,8 @@ SYSCTL_INT(_hw_contigmem, OID_AUTO, num_buffers, CTLFLAG_RD,
 	&contigmem_num_buffers, 0, "Number of contigmem buffers allocated");
 SYSCTL_QUAD(_hw_contigmem, OID_AUTO, buffer_size, CTLFLAG_RD,
 	&contigmem_buffer_size, 0, "Size of each contiguous buffer");
+SYSCTL_INT(_hw_contigmem, OID_AUTO, num_references, CTLFLAG_RD,
+	&contigmem_refcnt, 0, "Number of references to contigmem");
 
 static SYSCTL_NODE(_hw_contigmem, OID_AUTO, physaddr, CTLFLAG_RD, 0,
 	"physaddr");
@@ -114,9 +129,10 @@ MODULE_VERSION(contigmem, 1);
 static struct cdevsw contigmem_ops = {
 	.d_name         = "contigmem",
 	.d_version      = D_VERSION,
-	.d_mmap         = contigmem_mmap,
+	.d_flags        = D_TRACKCLOSE,
 	.d_mmap_single  = contigmem_mmap_single,
 	.d_open         = contigmem_open,
+	.d_close        = contigmem_close,
 };
 
 static int
@@ -124,6 +140,7 @@ contigmem_load()
 {
 	char index_string[8], description[32];
 	int  i, error = 0;
+	void *addr;
 
 	if (contigmem_num_buffers > RTE_CONTIGMEM_MAX_NUM_BUFS) {
 		printf("%d buffers requested is greater than %d allowed\n",
@@ -141,18 +158,20 @@ contigmem_load()
 	}
 
 	for (i = 0; i < contigmem_num_buffers; i++) {
-		contigmem_buffers[i] =
-				contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO, 0,
-			BUS_SPACE_MAXADDR, contigmem_buffer_size, 0);
-
-		if (contigmem_buffers[i] == NULL) {
+		addr = contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO,
+			0, BUS_SPACE_MAXADDR, contigmem_buffer_size, 0);
+		if (addr == NULL) {
 			printf("contigmalloc failed for buffer %d\n", i);
 			error = ENOMEM;
 			goto error;
 		}
 
-		printf("%2u: virt=%p phys=%p\n", i, contigmem_buffers[i],
-				(void *)pmap_kextract((vm_offset_t)contigmem_buffers[i]));
+		printf("%2u: virt=%p phys=%p\n", i, addr,
+			(void *)pmap_kextract((vm_offset_t)addr));
+
+		mtx_init(&contigmem_buffers[i].mtx, "contigmem", NULL, MTX_DEF);
+		contigmem_buffers[i].addr = addr;
+		contigmem_buffers[i].refcnt = 0;
 
 		snprintf(index_string, sizeof(index_string), "%d", i);
 		snprintf(description, sizeof(description),
@@ -170,10 +189,13 @@ contigmem_load()
 	return 0;
 
 error:
-	for (i = 0; i < contigmem_num_buffers; i++)
-		if (contigmem_buffers[i] != NULL)
-			contigfree(contigmem_buffers[i], contigmem_buffer_size,
-					M_CONTIGMEM);
+	for (i = 0; i < contigmem_num_buffers; i++) {
+		if (contigmem_buffers[i].addr != NULL)
+			contigfree(contigmem_buffers[i].addr,
+				contigmem_buffer_size, M_CONTIGMEM);
+		if (mtx_initialized(&contigmem_buffers[i].mtx))
+			mtx_destroy(&contigmem_buffers[i].mtx);
+	}
 
 	return error;
 }
@@ -183,16 +205,22 @@ contigmem_unload()
 {
 	int i;
 
+	if (contigmem_refcnt > 0)
+		return EBUSY;
+
 	if (contigmem_cdev != NULL)
 		destroy_dev(contigmem_cdev);
 
 	if (contigmem_eh_tag != NULL)
 		EVENTHANDLER_DEREGISTER(process_exit, contigmem_eh_tag);
 
-	for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++)
-		if (contigmem_buffers[i] != NULL)
-			contigfree(contigmem_buffers[i], contigmem_buffer_size,
-					M_CONTIGMEM);
+	for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++) {
+		if (contigmem_buffers[i].addr != NULL)
+			contigfree(contigmem_buffers[i].addr,
+				contigmem_buffer_size, M_CONTIGMEM);
+		if (mtx_initialized(&contigmem_buffers[i].mtx))
+			mtx_destroy(&contigmem_buffers[i].mtx);
+	}
 
 	return 0;
 }
@@ -203,7 +231,7 @@ contigmem_physaddr(SYSCTL_HANDLER_ARGS)
 	uint64_t	physaddr;
 	int		index = (int)(uintptr_t)arg1;
 
-	physaddr = (uint64_t)vtophys(contigmem_buffers[index]);
+	physaddr = (uint64_t)vtophys(contigmem_buffers[index].addr);
 	return sysctl_handle_64(oidp, &physaddr, 0, req);
 }
 
@@ -211,22 +239,121 @@ static int
 contigmem_open(struct cdev *cdev, int fflags, int devtype,
 		struct thread *td)
 {
+
+	atomic_add_int(&contigmem_refcnt, 1);
+
+	return 0;
+}
+
+static int
+contigmem_close(struct cdev *cdev, int fflags, int devtype,
+		struct thread *td)
+{
+
+	atomic_subtract_int(&contigmem_refcnt, 1);
+
 	return 0;
 }
 
 static int
-contigmem_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
-		int prot, vm_memattr_t *memattr)
+contigmem_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
+		vm_ooffset_t foff, struct ucred *cred, u_short *color)
 {
+	struct contigmem_vm_handle *vmh = handle;
+	struct contigmem_buffer *buf;
+
+	buf = &contigmem_buffers[vmh->buffer_index];
+
+	atomic_add_int(&contigmem_refcnt, 1);
+
+	mtx_lock(&buf->mtx);
+	if (buf->refcnt == 0)
+		memset(buf->addr, 0, contigmem_buffer_size);
+	buf->refcnt++;
+	mtx_unlock(&buf->mtx);
 
-	*paddr = offset;
 	return 0;
 }
 
+static void
+contigmem_cdev_pager_dtor(void *handle)
+{
+	struct contigmem_vm_handle *vmh = handle;
+	struct contigmem_buffer *buf;
+
+	buf = &contigmem_buffers[vmh->buffer_index];
+
+	mtx_lock(&buf->mtx);
+	buf->refcnt--;
+	mtx_unlock(&buf->mtx);
+
+	free(vmh, M_CONTIGMEM);
+
+	atomic_subtract_int(&contigmem_refcnt, 1);
+}
+
+static int
+contigmem_cdev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot,
+		vm_page_t *mres)
+{
+	vm_paddr_t paddr;
+	vm_page_t m_paddr, page;
+	vm_memattr_t memattr, memattr1;
+
+	memattr = object->memattr;
+
+	VM_OBJECT_WUNLOCK(object);
+
+	paddr = offset;
+
+	m_paddr = vm_phys_paddr_to_vm_page(paddr);
+	if (m_paddr != NULL) {
+		memattr1 = pmap_page_get_memattr(m_paddr);
+		if (memattr1 != memattr)
+			memattr = memattr1;
+	}
+
+	if (((*mres)->flags & PG_FICTITIOUS) != 0) {
+		/*
+		 * If the passed in result page is a fake page, update it with
+		 * the new physical address.
+		 */
+		page = *mres;
+		VM_OBJECT_WLOCK(object);
+		vm_page_updatefake(page, paddr, memattr);
+	} else {
+		vm_page_t mret;
+		/*
+		 * Replace the passed in reqpage page with our own fake page and
+		 * free up the original page.
+		 */
+		page = vm_page_getfake(paddr, memattr);
+		VM_OBJECT_WLOCK(object);
+		mret = vm_page_replace(page, object, (*mres)->pindex);
+		KASSERT(mret == *mres,
+		    ("invalid page replacement, old=%p, ret=%p", *mres, mret));
+		vm_page_lock(mret);
+		vm_page_free(mret);
+		vm_page_unlock(mret);
+		*mres = page;
+	}
+
+	page->valid = VM_PAGE_BITS_ALL;
+
+	return VM_PAGER_OK;
+}
+
+static struct cdev_pager_ops contigmem_cdev_pager_ops = {
+	.cdev_pg_ctor = contigmem_cdev_pager_ctor,
+	.cdev_pg_dtor = contigmem_cdev_pager_dtor,
+	.cdev_pg_fault = contigmem_cdev_pager_fault,
+};
+
 static int
 contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
 		struct vm_object **obj, int nprot)
 {
+	struct contigmem_vm_handle *vmh;
 	uint64_t buffer_index;
 
 	/*
@@ -238,10 +365,17 @@ contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
 	if (buffer_index >= contigmem_num_buffers)
 		return EINVAL;
 
-	memset(contigmem_buffers[buffer_index], 0, contigmem_buffer_size);
-	*offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index]);
-	*obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset,
-			curthread->td_ucred);
+	if (size > contigmem_buffer_size)
+		return EINVAL;
+
+	vmh = malloc(sizeof(*vmh), M_CONTIGMEM, M_NOWAIT | M_ZERO);
+	if (vmh == NULL)
+		return ENOMEM;
+	vmh->buffer_index = buffer_index;
+
+	*offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index].addr);
+	*obj = cdev_pager_allocate(vmh, OBJT_DEVICE, &contigmem_cdev_pager_ops,
+			size, nprot, *offset, curthread->td_ucred);
 
 	return 0;
 }
-- 
2.7.4

  parent reply	other threads:[~2017-07-14 10:36 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-14 10:33 [dpdk-stable] patch 'net/i40e: add return value checks' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/i40e/base: fix Tx error stats on VF' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/mlx5: fix completion buffer size' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/igb: fix add/delete of flex filters' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/cxgbe: fix port statistics' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/mlx5: fix exception handling' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/mlx5: fix redundant free of Tx buffer' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/igb: fix checksum valid flags' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/i40e: exclude internal packet's byte count' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/i40e: fix VF statistics' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/bnxt: fix reporting of link status' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'kni: fix build with gcc 7.1' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/enic: " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/mlx5: " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'vhost: fix guest pages memory leak' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/virtio: zero the whole memory zone' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'lpm: fix index of tbl8' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'ip_frag: free mbufs on reassembly table destroy' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'mem: fix malloc element resize with padding' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'examples/l2fwd-crypto: fix option parsing' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'examples/l2fwd-crypto: fix application help' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'cryptodev: fix device stop function' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'test/crypto: fix overflow' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'mbuf: fix debug checks for headroom and tailroom' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'contigmem: free allocated memory on error' " Yuanhan Liu
2017-07-14 10:33 ` Yuanhan Liu [this message]
2017-07-14 10:33 ` [dpdk-stable] patch 'eal: fix config file path when checking process' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/igb: fix flex filter length' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/fm10k: initialize link status in device start' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/bonding: fix when NTT flag updated' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/vmxnet3: fix receive queue memory leak' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/i40e: fix division by 0' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/bnxt: fix get link config' " Yuanhan Liu
2017-07-14 10:33 ` [dpdk-stable] patch 'net/bnxt: fix autoneg on 10GBase-T links' " Yuanhan Liu
2017-07-14 10:34 ` [dpdk-stable] patch 'net/bnxt: fix set link config' " Yuanhan Liu
2017-07-14 10:34 ` [dpdk-stable] patch 'net/mlx4: fix mbuf poisoning in debug code' " Yuanhan Liu
2017-07-14 10:34 ` [dpdk-stable] patch 'test/bonding: fix mode 4 names' " Yuanhan Liu
2017-07-14 10:34 ` [dpdk-stable] patch 'vhost: fix checking of device features' " Yuanhan Liu
2017-07-14 10:34 ` [dpdk-stable] patch 'net/virtio-user: fix crash when detaching device' " Yuanhan Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1500028450-25989-26-git-send-email-yliu@fridaylinux.org \
    --to=yliu@fridaylinux.org \
    --cc=bruce.richardson@intel.com \
    --cc=stable@dpdk.org \
    --cc=tiwei.bie@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).