DPDK patches and discussions
 help / color / mirror / Atom feed
From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: dev@dpdk.org, Yuanhan Liu <yliu@fridaylinux.org>
Cc: mst@redhat.com, vkaplans@redhat.com, jasowang@redhat.com,
	jfreiman@redhat.com, Maxime Coquelin <maxime.coquelin@redhat.com>
Subject: [dpdk-dev] [RFC 06/19] vhost: add iotlb helper functions
Date: Tue,  4 Jul 2017 11:49:09 +0200	[thread overview]
Message-ID: <20170704094922.11405-7-maxime.coquelin@redhat.com> (raw)
In-Reply-To: <20170704094922.11405-1-maxime.coquelin@redhat.com>

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/Makefile |   4 +-
 lib/librte_vhost/iotlb.c  | 235 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/librte_vhost/iotlb.h  |  47 ++++++++++
 lib/librte_vhost/vhost.c  |   1 +
 lib/librte_vhost/vhost.h  |   5 +
 5 files changed, 290 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_vhost/iotlb.c
 create mode 100644 lib/librte_vhost/iotlb.h

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 4a116fe..e1084ab 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -47,8 +47,8 @@ LDLIBS += -lnuma
 endif
 
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c socket.c vhost.c vhost_user.c \
-				   virtio_net.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
+					vhost_user.c virtio_net.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h
diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
new file mode 100644
index 0000000..02457fa
--- /dev/null
+++ b/lib/librte_vhost/iotlb.c
@@ -0,0 +1,235 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2017 Red Hat, Inc.
+ *   Copyright (c) 2017 Maxime Coquelin <maxime.coquelin@redhat.com>
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
+
+#include <rte_tailq.h>
+
+#include "iotlb.h"
+#include "vhost.h"
+
+struct vhost_iotlb_entry {
+	TAILQ_ENTRY(vhost_iotlb_entry) next;
+
+	uint64_t iova;
+	uint64_t uaddr;
+	uint64_t size;
+	uint8_t perm;
+};
+
+/* ToDo: refine cache size */
+#define IOTLB_CACHE_SIZE 1024
+
+/* ToDo: Coalesce contiguous entries? */
+void vhost_user_iotlb_insert(struct vhost_virtqueue *vq, uint64_t iova,
+				uint64_t uaddr, uint64_t size, uint8_t perm)
+{
+	struct vhost_iotlb_entry *node, *new_node;
+	int ret;
+
+	ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
+	if (ret) {
+		RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool empty, invalidate cache\n");
+		vhost_user_iotlb_remove_all(vq);
+		ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
+		if (ret) {
+			RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
+			return;
+		}
+	}
+
+	new_node->iova = iova;
+	new_node->uaddr = uaddr;
+	new_node->size = size;
+	new_node->perm = perm;
+
+	rte_rwlock_write_lock(&vq->iotlb_lock);
+
+	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
+		/*
+		 * IIUC, entries must be invalidated before being updated.
+		 * So if iova already in list, assume identical.
+		 */
+		if (node->iova == new_node->iova) {
+			rte_mempool_put(vq->iotlb_pool, new_node);
+			goto unlock;
+		} else if (node->iova > new_node->iova) {
+			TAILQ_INSERT_BEFORE(node, new_node, next);
+			goto unlock;
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
+
+unlock:
+	rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+void vhost_user_iotlb_remove(struct vhost_virtqueue *vq,
+					uint64_t iova, uint64_t size)
+{
+	struct vhost_iotlb_entry *node, *temp_node;
+
+	if (unlikely(!size))
+		return;
+
+	rte_rwlock_write_lock(&vq->iotlb_lock);
+
+	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
+		/* Sorted list */
+		if (unlikely(node->iova >= iova + size)) {
+			break;
+		} else if ((node->iova < iova + size) &&
+					(iova < node->iova + node->size)) {
+			TAILQ_REMOVE(&vq->iotlb_list, node, next);
+			rte_mempool_put(vq->iotlb_pool, node);
+			continue;
+		}
+	}
+
+	rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+void vhost_user_iotlb_remove_all(struct vhost_virtqueue *vq)
+{
+	struct vhost_iotlb_entry *node, *temp_node;
+
+	rte_rwlock_write_lock(&vq->iotlb_lock);
+
+	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
+		TAILQ_REMOVE(&vq->iotlb_list, node, next);
+		rte_mempool_put(vq->iotlb_pool, node);
+	}
+
+	rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+uint64_t vhost_user_iotlb_find(struct vhost_virtqueue *vq, uint64_t iova,
+						uint64_t *size, uint8_t perm)
+{
+	struct vhost_iotlb_entry *node;
+	uint64_t offset, vva = 0, mapped = 0;
+
+	if (unlikely(!*size))
+		goto out;
+
+	rte_rwlock_read_lock(&vq->iotlb_lock);
+
+	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
+		/* List sorted by iova */
+		if (unlikely(iova < node->iova))
+			break;
+
+		if (iova >= node->iova + node->size)
+			continue;
+
+		if (unlikely((perm & node->perm) != perm)) {
+			vva = 0;
+			break;
+		}
+
+		offset = iova - node->iova;
+		if (!vva)
+			vva = node->uaddr + offset;
+
+		mapped += node->size - offset;
+		iova = node->iova + node->size;
+
+		if (mapped >= *size)
+			break;
+	}
+
+	rte_rwlock_read_unlock(&vq->iotlb_lock);
+
+out:
+	if (mapped < *size)
+		*size = mapped;
+
+	/* Only part of the requested chunk is mapped */
+	if (unlikely(mapped < *size))
+		*size = mapped;
+
+	return vva;
+}
+
+int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
+{
+	char pool_name[RTE_MEMPOOL_NAMESIZE];
+	struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
+	int ret = -1, socket;
+
+	if (vq->iotlb_pool) {
+		/*
+		 * The cache has already been initialized,
+		 * just drop all entries
+		 */
+		vhost_user_iotlb_remove_all(vq);
+		return 0;
+	}
+
+#ifdef RTE_LIBRTE_VHOST_NUMA
+	ret = get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR);
+#endif
+	if (ret)
+		socket = 0;
+
+	rte_rwlock_init(&vq->iotlb_lock);
+
+	TAILQ_INIT(&vq->iotlb_list);
+
+	snprintf(pool_name, sizeof(pool_name), "iotlb_cache_%d_%d",
+			dev->vid, vq_index);
+
+	/* If already created, free it and recreate */
+	vq->iotlb_pool = rte_mempool_lookup(pool_name);
+	if (vq->iotlb_pool)
+		rte_mempool_free(vq->iotlb_pool);
+
+	vq->iotlb_pool = rte_mempool_create(pool_name,
+			IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0,
+			0, 0, NULL, NULL, NULL, socket,
+			MEMPOOL_F_NO_CACHE_ALIGN |
+			MEMPOOL_F_SP_PUT |
+			MEMPOOL_F_SC_GET);
+	if (!vq->iotlb_pool) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"Failed to create IOTLB cache pool (%s)\n",
+				pool_name);
+		return -1;
+	}
+
+	return 0;
+}
+
diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h
new file mode 100644
index 0000000..68a43ec
--- /dev/null
+++ b/lib/librte_vhost/iotlb.h
@@ -0,0 +1,47 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2017 Red Hat, Inc.
+ *   Copyright (c) 2017 Maxime Coquelin <maxime.coquelin@redhat.com>
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _VHOST_IOTLB_H_
+#define _VHOST_IOTLB_H_
+
+#include "vhost.h"
+void vhost_user_iotlb_insert(struct vhost_virtqueue *vq, uint64_t iova,
+					uint64_t uaddr, uint64_t size,
+					uint8_t perm);
+void vhost_user_iotlb_remove(struct vhost_virtqueue *vq,
+					uint64_t iova, uint64_t size);
+void vhost_user_iotlb_remove_all(struct vhost_virtqueue *vq);
+uint64_t vhost_user_iotlb_find(struct vhost_virtqueue *vq, uint64_t iova,
+					uint64_t *size, uint8_t perm);
+int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index);
+
+#endif /* _VHOST_IOTLB_H_ */
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 2a4bc91..5ca4de4 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -182,6 +182,7 @@ free_device(struct virtio_net *dev)
 		vq = dev->virtqueue[i];
 
 		rte_free(vq->shadow_used_ring);
+		rte_mempool_free(vq->iotlb_pool);
 
 		rte_free(vq);
 	}
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 5c9d931..7816a92 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -45,6 +45,7 @@
 
 #include <rte_log.h>
 #include <rte_ether.h>
+#include <rte_rwlock.h>
 
 #include "rte_vhost.h"
 
@@ -114,6 +115,10 @@ struct vhost_virtqueue {
 
 	struct vring_used_elem  *shadow_used_ring;
 	uint16_t                shadow_used_idx;
+
+	rte_rwlock_t	iotlb_lock;
+	struct rte_mempool *iotlb_pool;
+	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
 } __rte_cache_aligned;
 
 /* Old kernels have no such macros defined */
-- 
2.9.4

  parent reply	other threads:[~2017-07-04  9:49 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-04  9:49 [dpdk-dev] [RFC 00/19] Vhost-user: Implement device IOTLB support Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 01/19] vhost: protect virtio_net device struct Maxime Coquelin
2017-07-05 10:07   ` Jens Freimann
2017-07-07  7:31     ` Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 02/19] Revert "vhost: workaround MQ fails to startup" Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 03/19] vhost: prepare send_vhost_message() to slave requests Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 04/19] vhost: add support to slave requests channel Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 05/19] vhost: declare missing IOMMU-related definitions for old kernels Maxime Coquelin
2017-07-04  9:49 ` Maxime Coquelin [this message]
2017-07-04  9:49 ` [dpdk-dev] [RFC 07/19] vhost-user: add support to IOTLB miss slave requests Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 08/19] vhost: initialize vrings IOTLB caches Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 09/19] vhost: implement IOTLB events notification mechanism Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 10/19] vhost-user: handle IOTLB update and invalidate requests Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 11/19] vhost: introduce guest IOVA to backend VA helper Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 12/19] vhost: use the guest IOVA to host " Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 13/19] vhost: enable rings at the right time Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 14/19] vhost: don't dereference invalid dev pointer after its reallocation Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 15/19] vhost: postpone rings adresses translation Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 16/19] vhost-user: translate ring addresses when IOMMU enabled Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 17/19] vhost-user: iommu: postpone device creation until ring are mapped Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 18/19] vhost: iommu: Invalidate vring in case of matching IOTLB invalidate Maxime Coquelin
2017-07-04  9:49 ` [dpdk-dev] [RFC 19/19] vhost: enable IOMMU support Maxime Coquelin
2017-08-31  9:10 ` [dpdk-dev] [RFC 00/19] Vhost-user: Implement device IOTLB support Maxime Coquelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170704094922.11405-7-maxime.coquelin@redhat.com \
    --to=maxime.coquelin@redhat.com \
    --cc=dev@dpdk.org \
    --cc=jasowang@redhat.com \
    --cc=jfreiman@redhat.com \
    --cc=mst@redhat.com \
    --cc=vkaplans@redhat.com \
    --cc=yliu@fridaylinux.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).