From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: stable@dpdk.org
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Subject: [dpdk-stable] [PATCH v17.08 4/9] vhost: add support for non-contiguous indirect descs tables
Date: Mon, 23 Apr 2018 17:59:51 +0200 [thread overview]
Message-ID: <20180423155956.21401-5-maxime.coquelin@redhat.com> (raw)
In-Reply-To: <20180423155956.21401-1-maxime.coquelin@redhat.com>
This patch adds support for non-contiguous indirect descriptor
tables in VA space.
When it happens, which is unlikely, a table is allocated and the
non-contiguous content is copied into it.
This issue has been assigned CVE-2018-1059.
Reported-by: Yongji Xie <xieyongji@baidu.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/librte_vhost/virtio_net.c | 207 ++++++++++++++++++++++++++++++++++++------
1 file changed, 179 insertions(+), 28 deletions(-)
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index d7af0effe..22fcbb3fb 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -44,6 +44,7 @@
#include <rte_udp.h>
#include <rte_sctp.h>
#include <rte_arp.h>
+#include <rte_malloc.h>
#include "vhost.h"
@@ -55,6 +56,44 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
}
+static __rte_always_inline struct vring_desc *
+alloc_copy_ind_table(struct virtio_net *dev, struct vring_desc *desc)
+{
+ struct vring_desc *idesc;
+ uint64_t src, dst;
+ uint64_t len, remain = desc->len;
+ uint64_t desc_addr = desc->addr;
+
+ idesc = rte_malloc(__func__, desc->len, 0);
+ if (unlikely(!idesc))
+ return 0;
+
+ dst = (uint64_t)(uintptr_t)idesc;
+
+ while (remain) {
+ len = remain;
+ src = rte_vhost_va_from_guest_pa(dev->mem, desc_addr, &len);
+ if (unlikely(!src || !len)) {
+ rte_free(idesc);
+ return 0;
+ }
+
+ rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
+
+ remain -= len;
+ dst += len;
+ desc_addr += len;
+ }
+
+ return idesc;
+}
+
+static __rte_always_inline void
+free_ind_table(struct vring_desc *idesc)
+{
+ rte_free(idesc);
+}
+
static __rte_always_inline void
do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint16_t to, uint16_t from, uint16_t size)
@@ -305,6 +344,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
rte_prefetch0(&vq->desc[desc_indexes[0]]);
for (i = 0; i < count; i++) {
+ struct vring_desc *idesc = NULL;
uint16_t desc_idx = desc_indexes[i];
int err;
@@ -314,12 +354,23 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
rte_vhost_va_from_guest_pa(dev->mem,
vq->desc[desc_idx].addr,
&dlen);
- if (unlikely(!descs ||
- dlen != vq->desc[desc_idx].len)) {
+ if (unlikely(!descs)) {
count = i;
break;
}
+ if (unlikely(dlen < vq->desc[desc_idx].len)) {
+ /*
+ * The indirect desc table is not contiguous
+ * in process VA space, we have to copy it.
+ */
+ idesc = alloc_copy_ind_table(dev, &vq->desc[desc_idx]);
+ if (unlikely(!idesc))
+ break;
+
+ descs = idesc;
+ }
+
desc_idx = 0;
sz = vq->desc[desc_idx].len / sizeof(*descs);
} else {
@@ -338,6 +389,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
if (i + 1 < count)
rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
+
+ if (unlikely(!!idesc))
+ free_ind_table(idesc);
}
rte_smp_wmb();
@@ -369,6 +423,7 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint32_t len = 0;
uint64_t dlen;
struct vring_desc *descs = vq->desc;
+ struct vring_desc *idesc = NULL;
*desc_chain_head = idx;
@@ -378,15 +433,29 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
rte_vhost_va_from_guest_pa(dev->mem,
vq->desc[idx].addr,
&dlen);
- if (unlikely(!descs || dlen != vq->desc[idx].len))
+ if (unlikely(!descs))
return -1;
+ if (unlikely(dlen < vq->desc[idx].len)) {
+ /*
+ * The indirect desc table is not contiguous
+ * in process VA space, we have to copy it.
+ */
+ idesc = alloc_copy_ind_table(dev, &vq->desc[idx]);
+ if (unlikely(!idesc))
+ return -1;
+
+ descs = idesc;
+ }
+
idx = 0;
}
while (1) {
- if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
+ if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) {
+ free_ind_table(idesc);
return -1;
+ }
len += descs[idx].len;
buf_vec[vec_id].buf_addr = descs[idx].addr;
@@ -403,6 +472,9 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
*desc_chain_len = len;
*vec_idx = vec_id;
+ if (unlikely(!!idesc))
+ free_ind_table(idesc);
+
return 0;
}
@@ -794,12 +866,13 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
struct rte_mempool *mbuf_pool)
{
struct vring_desc *desc;
- uint64_t desc_addr;
+ uint64_t desc_addr, desc_gaddr;
uint32_t desc_avail, desc_offset;
uint32_t mbuf_avail, mbuf_offset;
uint32_t cpy_len;
- uint64_t dlen;
+ uint64_t desc_chunck_len;
struct rte_mbuf *cur = m, *prev = m;
+ struct virtio_net_hdr tmp_hdr;
struct virtio_net_hdr *hdr = NULL;
/* A counter to avoid desc dead loop chain */
uint32_t nr_desc = 1;
@@ -809,16 +882,46 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
(desc->flags & VRING_DESC_F_INDIRECT))
return -1;
- dlen = desc->len;
+ desc_chunck_len = desc->len;
+ desc_gaddr = desc->addr;
desc_addr = rte_vhost_va_from_guest_pa(dev->mem,
- desc->addr,
- &dlen);
- if (unlikely(!desc_addr || dlen != desc->len))
+ desc_gaddr,
+ &desc_chunck_len);
+ if (unlikely(!desc_addr))
return -1;
if (virtio_net_with_host_offload(dev)) {
- hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
- rte_prefetch0(hdr);
+ if (unlikely(desc_chunck_len < sizeof(struct virtio_net_hdr))) {
+ uint64_t len = desc_chunck_len;
+ uint64_t remain = sizeof(struct virtio_net_hdr);
+ uint64_t src = desc_addr;
+ uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
+ uint64_t guest_addr = desc_gaddr;
+
+ /*
+ * No luck, the virtio-net header doesn't fit
+ * in a contiguous virtual area.
+ */
+ while (remain) {
+ len = remain;
+ src = rte_vhost_va_from_guest_pa(dev->mem,
+ guest_addr, &len);
+ if (unlikely(!src || !len))
+ return -1;
+
+ rte_memcpy((void *)(uintptr_t)dst,
+ (void *)(uintptr_t)src, len);
+
+ guest_addr += len;
+ remain -= len;
+ dst += len;
+ }
+
+ hdr = &tmp_hdr;
+ } else {
+ hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+ rte_prefetch0(hdr);
+ }
}
/*
@@ -832,11 +935,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
return -1;
- dlen = desc->len;
+ desc_chunck_len = desc->len;
+ desc_gaddr = desc->addr;
desc_addr = rte_vhost_va_from_guest_pa(dev->mem,
- desc->addr,
- &dlen);
- if (unlikely(!desc_addr || dlen != desc->len))
+ desc_gaddr,
+ &desc_chunck_len);
+ if (unlikely(!desc_addr))
return -1;
desc_offset = 0;
@@ -844,19 +948,34 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
nr_desc += 1;
} else {
desc_avail = desc->len - dev->vhost_hlen;
- desc_offset = dev->vhost_hlen;
+
+ if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
+ desc_chunck_len = desc_avail;
+ desc_gaddr += dev->vhost_hlen;
+ desc_addr = rte_vhost_va_from_guest_pa(dev->mem,
+ desc_gaddr,
+ &desc_chunck_len);
+ if (unlikely(!desc_addr))
+ return -1;
+
+ desc_offset = 0;
+ } else {
+ desc_offset = dev->vhost_hlen;
+ desc_chunck_len -= dev->vhost_hlen;
+ }
}
rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
- PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+ desc_chunck_len, 0);
mbuf_offset = 0;
mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
while (1) {
uint64_t hpa;
- cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+ cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
/*
* A desc buf might across two host physical pages that are
@@ -864,7 +983,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
* will be copied even though zero copy is enabled.
*/
if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
- desc->addr + desc_offset, cpy_len)))) {
+ desc_gaddr + desc_offset, cpy_len)))) {
cur->data_len = cpy_len;
cur->data_off = 0;
cur->buf_addr = (void *)(uintptr_t)desc_addr;
@@ -885,6 +1004,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
mbuf_avail -= cpy_len;
mbuf_offset += cpy_len;
desc_avail -= cpy_len;
+ desc_chunck_len -= cpy_len;
desc_offset += cpy_len;
/* This desc reaches to its end, get the next one */
@@ -899,11 +1019,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
return -1;
- dlen = desc->len;
+ desc_chunck_len = desc->len;
+ desc_gaddr = desc->addr;
desc_addr = rte_vhost_va_from_guest_pa(dev->mem,
- desc->addr,
- &dlen);
- if (unlikely(!desc_addr || dlen != desc->len))
+ desc_gaddr,
+ &desc_chunck_len);
+ if (unlikely(!desc_addr))
return -1;
rte_prefetch0((void *)(uintptr_t)desc_addr);
@@ -911,7 +1032,20 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
desc_offset = 0;
desc_avail = desc->len;
- PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
+ PRINT_PACKET(dev, (uintptr_t)desc_addr,
+ desc_chunck_len, 0);
+ } else if (unlikely(desc_chunck_len == 0)) {
+ desc_chunck_len = desc_avail;
+ desc_gaddr += desc_offset;
+ desc_addr = rte_vhost_va_from_guest_pa(dev->mem,
+ desc_gaddr,
+ &desc_chunck_len);
+ if (unlikely(!desc_addr))
+ return -1;
+ desc_offset = 0;
+
+ PRINT_PACKET(dev, (uintptr_t)desc_addr,
+ desc_chunck_len, 0);
}
/*
@@ -1139,7 +1273,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
/* Prefetch descriptor index. */
rte_prefetch0(&vq->desc[desc_indexes[0]]);
for (i = 0; i < count; i++) {
- struct vring_desc *desc;
+ struct vring_desc *desc, *idesc = NULL;
uint16_t sz, idx;
uint64_t dlen;
int err;
@@ -1153,10 +1287,21 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
rte_vhost_va_from_guest_pa(dev->mem,
vq->desc[desc_indexes[i]].addr,
&dlen);
- if (unlikely(!desc ||
- dlen != vq->desc[desc_indexes[i]].len))
+ if (unlikely(!desc))
break;
+ if (unlikely(dlen < vq->desc[desc_indexes[i]].len)) {
+ /*
+ * The indirect desc table is not contiguous
+ * in process VA space, we have to copy it.
+ */
+ idesc = alloc_copy_ind_table(dev, &vq->desc[desc_indexes[i]]);
+ if (unlikely(!idesc))
+ break;
+
+ desc = idesc;
+ }
+
rte_prefetch0(desc);
sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
idx = 0;
@@ -1170,12 +1315,14 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
if (unlikely(pkts[i] == NULL)) {
RTE_LOG(ERR, VHOST_DATA,
"Failed to allocate memory for mbuf.\n");
+ free_ind_table(idesc);
break;
}
err = copy_desc_to_mbuf(dev, desc, sz, pkts[i], idx, mbuf_pool);
if (unlikely(err)) {
rte_pktmbuf_free(pkts[i]);
+ free_ind_table(idesc);
break;
}
@@ -1185,6 +1332,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
zmbuf = get_zmbuf(vq);
if (!zmbuf) {
rte_pktmbuf_free(pkts[i]);
+ free_ind_table(idesc);
break;
}
zmbuf->mbuf = pkts[i];
@@ -1201,6 +1349,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
vq->nr_zmbuf += 1;
TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
}
+
+ if (unlikely(!!idesc))
+ free_ind_table(idesc);
}
vq->last_avail_idx += i;
--
2.14.3
next prev parent reply other threads:[~2018-04-23 16:00 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-04-23 15:59 [dpdk-stable] [PATCH v17.08 0/9] Vhost: CVE-2018-1059 fixes Maxime Coquelin
2018-04-23 15:59 ` [dpdk-stable] [PATCH v17.08 1/9] vhost: introduce safe API for GPA translation Maxime Coquelin
2018-04-23 15:59 ` [dpdk-stable] [PATCH v17.08 2/9] vhost: check all range is mapped when translating GPAs Maxime Coquelin
2018-04-23 15:59 ` [dpdk-stable] [PATCH v17.08 3/9] vhost: ensure all range is mapped when translating QVAs Maxime Coquelin
2018-04-23 15:59 ` Maxime Coquelin [this message]
2018-04-23 15:59 ` [dpdk-stable] [PATCH v17.08 5/9] vhost: handle virtually non-contiguous buffers in Rx Maxime Coquelin
2018-04-23 15:59 ` [dpdk-stable] [PATCH v17.08 6/9] vhost: handle virtually non-contiguous buffers in Rx-mrg Maxime Coquelin
2018-04-23 15:59 ` [dpdk-stable] [PATCH v17.08 7/9] examples/vhost: move to safe GPA translation API Maxime Coquelin
2018-04-23 15:59 ` [dpdk-stable] [PATCH v17.08 8/9] examples/vhost_scsi: " Maxime Coquelin
2018-04-23 15:59 ` [dpdk-stable] [PATCH v17.08 9/9] vhost: deprecate unsafe " Maxime Coquelin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180423155956.21401-5-maxime.coquelin@redhat.com \
--to=maxime.coquelin@redhat.com \
--cc=stable@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).