DPDK patches and discussions
 help / color / mirror / Atom feed
From: Xiao Wang <xiao.w.wang@intel.com>
To: tiwei.bie@intel.com, maxime.coquelin@redhat.com
Cc: alejandro.lucero@netronome.com, dev@dpdk.org,
	zhihong.wang@intel.com, xiaolong.ye@intel.com,
	Xiao Wang <xiao.w.wang@intel.com>
Subject: [dpdk-dev] [PATCH v4 03/10] vhost: provide helpers for virtio ring relay
Date: Sat, 15 Dec 2018 05:16:05 +0800	[thread overview]
Message-ID: <20181214211612.167681-4-xiao.w.wang@intel.com> (raw)
In-Reply-To: <20181214211612.167681-1-xiao.w.wang@intel.com>

This patch provides two helpers for vdpa device driver to perform a
relay between the guest virtio ring and a mediate virtio ring.

The available ring relay will synchronize the available entries, and
helps to do desc validity checking.

The used ring relay will synchronize the used entries from mediate ring
to guest ring, and helps to do dirty page logging for live migration.

The next patch will leverage these two helpers.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 lib/librte_vhost/rte_vdpa.h            |  39 +++++++
 lib/librte_vhost/rte_vhost_version.map |   2 +
 lib/librte_vhost/vdpa.c                | 194 +++++++++++++++++++++++++++++++++
 lib/librte_vhost/vhost.h               |  40 +++++++
 lib/librte_vhost/virtio_net.c          |  39 -------
 5 files changed, 275 insertions(+), 39 deletions(-)

diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h
index fff657391..02b8d14ed 100644
--- a/lib/librte_vhost/rte_vdpa.h
+++ b/lib/librte_vhost/rte_vdpa.h
@@ -173,4 +173,43 @@ rte_vdpa_get_device_num(void);
  */
 int __rte_experimental
 rte_vhost_host_notifier_ctrl(int vid, bool enable);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Synchronize the available ring from guest to mediate ring, help to
+ * check desc validity to protect against malicious guest driver.
+ *
+ * @param vid
+ *  vhost device id
+ * @param qid
+ *  vhost queue id
+ * @param vring_m
+ *  mediate virtio ring pointer
+ * @return
+ *  number of synced available entries on success, -1 on failure
+ */
+int __rte_experimental
+rte_vdpa_relay_vring_avail(int vid, uint16_t qid, void *vring_m);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Synchronize the used ring from mediate ring to guest, log dirty
+ * page for each writeable buffer, caller should handle the used
+ * ring logging before device stop.
+ *
+ * @param vid
+ *  vhost device id
+ * @param qid
+ *  vhost queue id
+ * @param vring_m
+ *  mediate virtio ring pointer
+ * @return
+ *  number of synced used entries on success, -1 on failure
+ */
+int __rte_experimental
+rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m);
 #endif /* _RTE_VDPA_H_ */
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
index 22302e972..dd3b4c1cb 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -84,4 +84,6 @@ EXPERIMENTAL {
 	rte_vhost_crypto_set_zero_copy;
 	rte_vhost_va_from_guest_pa;
 	rte_vhost_host_notifier_ctrl;
+	rte_vdpa_relay_vring_avail;
+	rte_vdpa_relay_vring_used;
 };
diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
index e7d849ee0..dcf6c3b8e 100644
--- a/lib/librte_vhost/vdpa.c
+++ b/lib/librte_vhost/vdpa.c
@@ -122,3 +122,197 @@ rte_vdpa_get_device_num(void)
 {
 	return vdpa_device_num;
 }
+
+static bool
+invalid_desc_check(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
+{
+	uint64_t desc_addr, desc_chunck_len;
+
+	while (desc_len) {
+		desc_chunck_len = desc_len;
+		desc_addr = vhost_iova_to_vva(dev, vq,
+				desc_iova,
+				&desc_chunck_len,
+				perm);
+
+		if (!desc_addr)
+			return true;
+
+		desc_len -= desc_chunck_len;
+		desc_iova += desc_chunck_len;
+	}
+
+	return false;
+}
+
+int
+rte_vdpa_relay_vring_avail(int vid, uint16_t qid, void *vring_m)
+{
+	struct virtio_net *dev = get_device(vid);
+	uint16_t idx, idx_m, desc_id;
+	struct vring_desc desc;
+	struct vhost_virtqueue *vq;
+	struct vring_desc *desc_ring;
+	struct vring_desc *idesc = NULL;
+	struct vring *s_vring;
+	uint64_t dlen;
+	int ret;
+	uint8_t perm;
+
+	if (!dev || !vring_m)
+		return -1;
+
+	if (qid >= dev->nr_vring)
+		return -1;
+
+	if (vq_is_packed(dev))
+		return -1;
+
+	s_vring = (struct vring *)vring_m;
+	vq = dev->virtqueue[qid];
+	idx = vq->avail->idx;
+	idx_m = s_vring->avail->idx;
+	ret = (uint16_t)(idx - idx_m);
+
+	while (idx_m != idx) {
+		/* avail entry copy */
+		desc_id = vq->avail->ring[idx_m & (vq->size - 1)];
+		s_vring->avail->ring[idx_m & (vq->size - 1)] = desc_id;
+		desc_ring = vq->desc;
+
+		if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) {
+			dlen = vq->desc[desc_id].len;
+			desc_ring = (struct vring_desc *)(uintptr_t)
+				vhost_iova_to_vva(dev, vq,
+						vq->desc[desc_id].addr, &dlen,
+						VHOST_ACCESS_RO);
+			if (unlikely(!desc_ring))
+				return -1;
+
+			if (unlikely(dlen < vq->desc[idx].len)) {
+				idesc = alloc_copy_ind_table(dev, vq,
+						vq->desc[idx].addr,
+						vq->desc[idx].len);
+				if (unlikely(!idesc))
+					return -1;
+
+				desc_ring = idesc;
+			}
+
+			desc_id = 0;
+		}
+
+		/* check if the buf addr is within the guest memory */
+		do {
+			desc = desc_ring[desc_id];
+			perm = desc.flags & VRING_DESC_F_WRITE ?
+				VHOST_ACCESS_WO : VHOST_ACCESS_RO;
+			if (invalid_desc_check(dev, vq, desc.addr, desc.len,
+						perm)) {
+				if (unlikely(idesc))
+					free_ind_table(idesc);
+				return -1;
+			}
+			desc_id = desc.next;
+		} while (desc.flags & VRING_DESC_F_NEXT);
+
+		if (unlikely(idesc)) {
+			free_ind_table(idesc);
+			idesc = NULL;
+		}
+
+		idx_m++;
+	}
+
+	rte_smp_wmb();
+	s_vring->avail->idx = idx;
+
+	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
+		vhost_avail_event(vq) = idx;
+
+	return ret;
+}
+
+int
+rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m)
+{
+	struct virtio_net *dev = get_device(vid);
+	uint16_t idx, idx_m, desc_id;
+	struct vhost_virtqueue *vq;
+	struct vring_desc desc;
+	struct vring_desc *desc_ring;
+	struct vring_desc *idesc = NULL;
+	struct vring *s_vring;
+	uint64_t dlen;
+	int ret;
+
+	if (!dev || !vring_m)
+		return -1;
+
+	if (qid >= dev->nr_vring)
+		return -1;
+
+	if (vq_is_packed(dev))
+		return -1;
+
+	s_vring = (struct vring *)vring_m;
+	vq = dev->virtqueue[qid];
+	idx = vq->used->idx;
+	idx_m = s_vring->used->idx;
+	ret = (uint16_t)(idx_m - idx);
+
+	while (idx != idx_m) {
+		/* copy used entry, used ring logging is not covered here */
+		vq->used->ring[idx & (vq->size - 1)] =
+			s_vring->used->ring[idx & (vq->size - 1)];
+
+		desc_id = vq->used->ring[idx & (vq->size - 1)].id;
+		desc_ring = vq->desc;
+
+		if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) {
+			dlen = vq->desc[desc_id].len;
+			desc_ring = (struct vring_desc *)(uintptr_t)
+				vhost_iova_to_vva(dev, vq,
+						vq->desc[desc_id].addr, &dlen,
+						VHOST_ACCESS_RO);
+			if (unlikely(!desc_ring))
+				return -1;
+
+			if (unlikely(dlen < vq->desc[idx].len)) {
+				idesc = alloc_copy_ind_table(dev, vq,
+						vq->desc[idx].addr,
+						vq->desc[idx].len);
+				if (unlikely(!idesc))
+					return -1;
+
+				desc_ring = idesc;
+			}
+
+			desc_id = 0;
+		}
+
+		/* dirty page logging for DMA writeable buffer */
+		do {
+			desc = desc_ring[desc_id];
+			if (desc.flags & VRING_DESC_F_WRITE)
+				vhost_log_write(dev, desc.addr, desc.len);
+			desc_id = desc.next;
+		} while (desc.flags & VRING_DESC_F_NEXT);
+
+		if (unlikely(idesc)) {
+			free_ind_table(idesc);
+			idesc = NULL;
+		}
+
+		idx++;
+	}
+
+	rte_smp_wmb();
+	vq->used->idx = idx_m;
+
+	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
+		vring_used_event(s_vring) = idx_m;
+
+	return ret;
+}
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index d5bab4803..3b3265c4b 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -18,6 +18,7 @@
 #include <rte_log.h>
 #include <rte_ether.h>
 #include <rte_rwlock.h>
+#include <rte_malloc.h>
 
 #include "rte_vhost.h"
 #include "rte_vdpa.h"
@@ -754,4 +755,43 @@ vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
 		eventfd_write(vq->callfd, (eventfd_t)1);
 }
 
+static __rte_always_inline void *
+alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		uint64_t desc_addr, uint64_t desc_len)
+{
+	void *idesc;
+	uint64_t src, dst;
+	uint64_t len, remain = desc_len;
+
+	idesc = rte_malloc(__func__, desc_len, 0);
+	if (unlikely(!idesc))
+		return 0;
+
+	dst = (uint64_t)(uintptr_t)idesc;
+
+	while (remain) {
+		len = remain;
+		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
+				VHOST_ACCESS_RO);
+		if (unlikely(!src || !len)) {
+			rte_free(idesc);
+			return 0;
+		}
+
+		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
+
+		remain -= len;
+		dst += len;
+		desc_addr += len;
+	}
+
+	return idesc;
+}
+
+static __rte_always_inline void
+free_ind_table(void *idesc)
+{
+	rte_free(idesc);
+}
+
 #endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 5e1a1a727..8c657a101 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -37,45 +37,6 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
 	return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
 }
 
-static __rte_always_inline void *
-alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		uint64_t desc_addr, uint64_t desc_len)
-{
-	void *idesc;
-	uint64_t src, dst;
-	uint64_t len, remain = desc_len;
-
-	idesc = rte_malloc(__func__, desc_len, 0);
-	if (unlikely(!idesc))
-		return 0;
-
-	dst = (uint64_t)(uintptr_t)idesc;
-
-	while (remain) {
-		len = remain;
-		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
-				VHOST_ACCESS_RO);
-		if (unlikely(!src || !len)) {
-			rte_free(idesc);
-			return 0;
-		}
-
-		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
-
-		remain -= len;
-		dst += len;
-		desc_addr += len;
-	}
-
-	return idesc;
-}
-
-static __rte_always_inline void
-free_ind_table(void *idesc)
-{
-	rte_free(idesc);
-}
-
 static __rte_always_inline void
 do_flush_shadow_used_ring_split(struct virtio_net *dev,
 			struct vhost_virtqueue *vq,
-- 
2.15.1

  parent reply	other threads:[~2018-12-14 21:26 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-28  9:45 [dpdk-dev] [PATCH 0/9] support SW assisted VDPA live migration Xiao Wang
2018-11-28  9:45 ` [dpdk-dev] [PATCH 1/9] vhost: provide helper for host notifier ctrl Xiao Wang
2018-11-28  9:46 ` [dpdk-dev] [PATCH 2/9] vhost: provide helpers for virtio ring relay Xiao Wang
2018-12-04  6:22   ` Tiwei Bie
2018-12-12  6:51     ` Wang, Xiao W
2018-12-13  1:10   ` [dpdk-dev] [PATCH v2 0/9] support SW assisted VDPA live migration Xiao Wang
2018-12-13  1:10     ` [dpdk-dev] [PATCH v2 1/9] vhost: provide helper for host notifier ctrl Xiao Wang
2018-12-13  1:10     ` [dpdk-dev] [PATCH v2 2/9] vhost: provide helpers for virtio ring relay Xiao Wang
2018-12-13 10:09       ` [dpdk-dev] [PATCH v3 0/9] support SW assisted VDPA live migration Xiao Wang
2018-12-13 10:09         ` [dpdk-dev] [PATCH v3 1/9] vhost: provide helper for host notifier ctrl Xiao Wang
2018-12-14 13:33           ` Maxime Coquelin
2018-12-14 19:05             ` Wang, Xiao W
2018-12-14 21:16           ` [dpdk-dev] [PATCH v4 00/10] support SW assisted VDPA live migration Xiao Wang
2018-12-14 21:16             ` [dpdk-dev] [PATCH v4 01/10] vhost: remove unused internal API Xiao Wang
2018-12-16  8:58               ` Maxime Coquelin
2018-12-14 21:16             ` [dpdk-dev] [PATCH v4 02/10] vhost: provide helper for host notifier ctrl Xiao Wang
2018-12-16  9:00               ` Maxime Coquelin
2018-12-14 21:16             ` Xiao Wang [this message]
2018-12-16  9:10               ` [dpdk-dev] [PATCH v4 03/10] vhost: provide helpers for virtio ring relay Maxime Coquelin
2018-12-17  8:51                 ` Wang, Xiao W
2018-12-17 11:02                   ` Maxime Coquelin
2018-12-17 14:41                     ` Wang, Xiao W
2018-12-17 19:00                       ` Maxime Coquelin
2018-12-18  8:27                         ` Wang, Xiao W
2018-12-18  8:44                           ` Thomas Monjalon
2018-12-18  8:01               ` [dpdk-dev] [PATCH v5 00/10] support SW assisted VDPA live migration Xiao Wang
2018-12-18  8:01                 ` [dpdk-dev] [PATCH v5 01/10] vhost: remove unused internal API Xiao Wang
2018-12-18  8:01                 ` [dpdk-dev] [PATCH v5 02/10] vhost: provide helper for host notifier ctrl Xiao Wang
2018-12-18 15:37                   ` Ferruh Yigit
2018-12-18  8:02                 ` [dpdk-dev] [PATCH v5 03/10] vhost: provide helpers for virtio ring relay Xiao Wang
2018-12-18  8:02                 ` [dpdk-dev] [PATCH v5 04/10] net/ifc: dump debug message for error Xiao Wang
2018-12-18  8:02                 ` [dpdk-dev] [PATCH v5 05/10] net/ifc: store only registered device instance Xiao Wang
2018-12-18  8:02                 ` [dpdk-dev] [PATCH v5 06/10] net/ifc: detect if VDPA mode is specified Xiao Wang
2018-12-18  8:02                 ` [dpdk-dev] [PATCH v5 07/10] net/ifc: add devarg for LM mode Xiao Wang
2018-12-18 11:23                   ` Maxime Coquelin
2018-12-18  8:02                 ` [dpdk-dev] [PATCH v5 08/10] net/ifc: use lib API for used ring logging Xiao Wang
2018-12-18  8:02                 ` [dpdk-dev] [PATCH v5 09/10] net/ifc: support SW assisted VDPA live migration Xiao Wang
2018-12-18 11:33                   ` Maxime Coquelin
2018-12-18  8:02                 ` [dpdk-dev] [PATCH v5 10/10] doc: update ifc NIC document Xiao Wang
2018-12-18 11:35                   ` Maxime Coquelin
2018-12-14 21:16             ` [dpdk-dev] [PATCH v4 04/10] net/ifc: dump debug message for error Xiao Wang
2018-12-16  9:11               ` Maxime Coquelin
2018-12-14 21:16             ` [dpdk-dev] [PATCH v4 05/10] net/ifc: store only registered device instance Xiao Wang
2018-12-16  9:12               ` Maxime Coquelin
2018-12-14 21:16             ` [dpdk-dev] [PATCH v4 06/10] net/ifc: detect if VDPA mode is specified Xiao Wang
2018-12-16  9:17               ` Maxime Coquelin
2018-12-17  8:54                 ` Wang, Xiao W
2018-12-14 21:16             ` [dpdk-dev] [PATCH v4 07/10] net/ifc: add devarg for LM mode Xiao Wang
2018-12-16  9:21               ` Maxime Coquelin
2018-12-17  9:00                 ` Wang, Xiao W
2018-12-14 21:16             ` [dpdk-dev] [PATCH v4 08/10] net/ifc: use lib API for used ring logging Xiao Wang
2018-12-16  9:24               ` Maxime Coquelin
2018-12-14 21:16             ` [dpdk-dev] [PATCH v4 09/10] net/ifc: support SW assisted VDPA live migration Xiao Wang
2018-12-16  9:35               ` Maxime Coquelin
2018-12-17  9:12                 ` Wang, Xiao W
2018-12-17 11:08                   ` Maxime Coquelin
2018-12-14 21:16             ` [dpdk-dev] [PATCH v4 10/10] doc: update ifc NIC document Xiao Wang
2018-12-16  9:36               ` Maxime Coquelin
2018-12-17  9:15                 ` Wang, Xiao W
2018-12-18 14:01             ` [dpdk-dev] [PATCH v4 00/10] support SW assisted VDPA live migration Maxime Coquelin
2018-12-13 10:09         ` [dpdk-dev] [PATCH v3 2/9] vhost: provide helpers for virtio ring relay Xiao Wang
2018-12-13 10:09         ` [dpdk-dev] [PATCH v3 3/9] net/ifc: dump debug message for error Xiao Wang
2018-12-13 10:09         ` [dpdk-dev] [PATCH v3 4/9] net/ifc: store only registered device instance Xiao Wang
2018-12-13 10:09         ` [dpdk-dev] [PATCH v3 5/9] net/ifc: detect if VDPA mode is specified Xiao Wang
2018-12-13 10:09         ` [dpdk-dev] [PATCH v3 6/9] net/ifc: add devarg for LM mode Xiao Wang
2018-12-13 10:09         ` [dpdk-dev] [PATCH v3 7/9] net/ifc: use lib API for used ring logging Xiao Wang
2018-12-13 10:09         ` [dpdk-dev] [PATCH v3 8/9] net/ifc: support SW assisted VDPA live migration Xiao Wang
2018-12-13 10:09         ` [dpdk-dev] [PATCH v3 9/9] doc: update ifc NIC document Xiao Wang
2018-12-13  1:10     ` [dpdk-dev] [PATCH v2 3/9] net/ifc: dump debug message for error Xiao Wang
2018-12-13  1:10     ` [dpdk-dev] [PATCH v2 4/9] net/ifc: store only registered device instance Xiao Wang
2018-12-13  1:10     ` [dpdk-dev] [PATCH v2 5/9] net/ifc: detect if VDPA mode is specified Xiao Wang
2018-12-13  1:10     ` [dpdk-dev] [PATCH v2 6/9] net/ifc: add devarg for LM mode Xiao Wang
2018-12-13  1:10     ` [dpdk-dev] [PATCH v2 7/9] net/ifc: use lib API for used ring logging Xiao Wang
2018-12-13  1:10     ` [dpdk-dev] [PATCH v2 8/9] net/ifc: support SW assisted VDPA live migration Xiao Wang
2018-12-13  1:10     ` [dpdk-dev] [PATCH v2 9/9] doc: update ifc NIC document Xiao Wang
2018-11-28  9:46 ` [dpdk-dev] [PATCH 3/9] net/ifc: dump debug message for error Xiao Wang
2018-11-28  9:46 ` [dpdk-dev] [PATCH 4/9] net/ifc: store only registered device instance Xiao Wang
2018-11-28  9:46 ` [dpdk-dev] [PATCH 5/9] net/ifc: detect if VDPA mode is specified Xiao Wang
2018-11-28  9:46 ` [dpdk-dev] [PATCH 6/9] net/ifc: add devarg for LM mode Xiao Wang
2018-12-04  6:31   ` Tiwei Bie
2018-12-12  6:53     ` Wang, Xiao W
2018-12-12 10:15   ` Alejandro Lucero
2018-12-12 10:23     ` Wang, Xiao W
2018-11-28  9:46 ` [dpdk-dev] [PATCH 7/9] net/ifc: use lib API for used ring logging Xiao Wang
2018-11-28  9:46 ` [dpdk-dev] [PATCH 8/9] net/ifc: support SW assisted VDPA live migration Xiao Wang
2018-11-28  9:46 ` [dpdk-dev] [PATCH 9/9] doc: update ifc NIC document Xiao Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181214211612.167681-4-xiao.w.wang@intel.com \
    --to=xiao.w.wang@intel.com \
    --cc=alejandro.lucero@netronome.com \
    --cc=dev@dpdk.org \
    --cc=maxime.coquelin@redhat.com \
    --cc=tiwei.bie@intel.com \
    --cc=xiaolong.ye@intel.com \
    --cc=zhihong.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).