From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: dev@dpdk.org, tiwei.bie@intel.com, david.marchand@redhat.com,
jfreimann@redhat.com, bruce.richardson@intel.com,
zhihong.wang@intel.com, konstantin.ananyev@intel.com,
mattias.ronnblom@ericsson.com
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Subject: [dpdk-dev] [PATCH v3 3/5] vhost: do not inline unlikely fragmented buffers code
Date: Wed, 29 May 2019 15:04:18 +0200 [thread overview]
Message-ID: <20190529130420.6428-4-maxime.coquelin@redhat.com> (raw)
In-Reply-To: <20190529130420.6428-1-maxime.coquelin@redhat.com>
Handling of fragmented virtio-net header and indirect descriptors
tables was implemented to fix CVE-2018-1059. It should never
happen with healthy guests and so is already considered as
unlikely code path.
This patch moves these bits into non-inline dedicated functions
to reduce the I-cache pressure.
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Tiwei Bie <tiwei.bie@intel.com>
---
lib/librte_vhost/vdpa.c | 2 +-
lib/librte_vhost/vhost.c | 33 +++++++++++
lib/librte_vhost/vhost.h | 36 +-----------
lib/librte_vhost/virtio_net.c | 100 +++++++++++++++++++---------------
4 files changed, 93 insertions(+), 78 deletions(-)
diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
index e915488432..24a6698e91 100644
--- a/lib/librte_vhost/vdpa.c
+++ b/lib/librte_vhost/vdpa.c
@@ -181,7 +181,7 @@ rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m)
return -1;
if (unlikely(dlen < vq->desc[desc_id].len)) {
- idesc = alloc_copy_ind_table(dev, vq,
+ idesc = vhost_alloc_copy_ind_table(dev, vq,
vq->desc[desc_id].addr,
vq->desc[desc_id].len);
if (unlikely(!idesc))
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 7d427b60a5..981837b5dd 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -200,6 +200,39 @@ __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
}
}
+void *
+vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t desc_addr, uint64_t desc_len)
+{
+ void *idesc;
+ uint64_t src, dst;
+ uint64_t len, remain = desc_len;
+
+ idesc = rte_malloc(__func__, desc_len, 0);
+ if (unlikely(!idesc))
+ return NULL;
+
+ dst = (uint64_t)(uintptr_t)idesc;
+
+ while (remain) {
+ len = remain;
+ src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
+ VHOST_ACCESS_RO);
+ if (unlikely(!src || !len)) {
+ rte_free(idesc);
+ return NULL;
+ }
+
+ rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
+
+ remain -= len;
+ dst += len;
+ desc_addr += len;
+ }
+
+ return idesc;
+}
+
void
cleanup_vq(struct vhost_virtqueue *vq, int destroy)
{
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 3ab7b4950f..691f535530 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -488,6 +488,9 @@ void vhost_backend_cleanup(struct virtio_net *dev);
uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint64_t iova, uint64_t *len, uint8_t perm);
+void *vhost_alloc_copy_ind_table(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ uint64_t desc_addr, uint64_t desc_len);
int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq);
@@ -601,39 +604,6 @@ vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
eventfd_write(vq->callfd, (eventfd_t)1);
}
-static __rte_always_inline void *
-alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
- uint64_t desc_addr, uint64_t desc_len)
-{
- void *idesc;
- uint64_t src, dst;
- uint64_t len, remain = desc_len;
-
- idesc = rte_malloc(__func__, desc_len, 0);
- if (unlikely(!idesc))
- return 0;
-
- dst = (uint64_t)(uintptr_t)idesc;
-
- while (remain) {
- len = remain;
- src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
- VHOST_ACCESS_RO);
- if (unlikely(!src || !len)) {
- rte_free(idesc);
- return 0;
- }
-
- rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
-
- remain -= len;
- dst += len;
- desc_addr += len;
- }
-
- return idesc;
-}
-
static __rte_always_inline void
free_ind_table(void *idesc)
{
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 8aeb180016..4564e9bcc9 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -337,7 +337,7 @@ fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
* The indirect desc table is not contiguous
* in process VA space, we have to copy it.
*/
- idesc = alloc_copy_ind_table(dev, vq,
+ idesc = vhost_alloc_copy_ind_table(dev, vq,
vq->desc[idx].addr, vq->desc[idx].len);
if (unlikely(!idesc))
return -1;
@@ -454,7 +454,8 @@ fill_vec_buf_packed_indirect(struct virtio_net *dev,
* The indirect desc table is not contiguous
* in process VA space, we have to copy it.
*/
- idescs = alloc_copy_ind_table(dev, vq, desc->addr, desc->len);
+ idescs = vhost_alloc_copy_ind_table(dev,
+ vq, desc->addr, desc->len);
if (unlikely(!idescs))
return -1;
@@ -610,6 +611,36 @@ reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
return 0;
}
+static __rte_noinline void
+copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct buf_vector *buf_vec,
+ struct virtio_net_hdr_mrg_rxbuf *hdr)
+{
+ uint64_t len;
+ uint64_t remain = dev->vhost_hlen;
+ uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
+ uint64_t iova = buf_vec->buf_iova;
+
+ while (remain) {
+ len = RTE_MIN(remain,
+ buf_vec->buf_len);
+ dst = buf_vec->buf_addr;
+ rte_memcpy((void *)(uintptr_t)dst,
+ (void *)(uintptr_t)src,
+ len);
+
+ PRINT_PACKET(dev, (uintptr_t)dst,
+ (uint32_t)len, 0);
+ vhost_log_cache_write(dev, vq,
+ iova, len);
+
+ remain -= len;
+ iova += len;
+ src += len;
+ buf_vec++;
+ }
+}
+
static __rte_always_inline int
copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct rte_mbuf *m, struct buf_vector *buf_vec,
@@ -703,30 +734,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
num_buffers);
if (unlikely(hdr == &tmp_hdr)) {
- uint64_t len;
- uint64_t remain = dev->vhost_hlen;
- uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
- uint64_t iova = buf_vec[0].buf_iova;
- uint16_t hdr_vec_idx = 0;
-
- while (remain) {
- len = RTE_MIN(remain,
- buf_vec[hdr_vec_idx].buf_len);
- dst = buf_vec[hdr_vec_idx].buf_addr;
- rte_memcpy((void *)(uintptr_t)dst,
- (void *)(uintptr_t)src,
- len);
-
- PRINT_PACKET(dev, (uintptr_t)dst,
- (uint32_t)len, 0);
- vhost_log_cache_write(dev, vq,
- iova, len);
-
- remain -= len;
- iova += len;
- src += len;
- hdr_vec_idx++;
- }
+ copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
} else {
PRINT_PACKET(dev, (uintptr_t)hdr_addr,
dev->vhost_hlen, 0);
@@ -1063,6 +1071,27 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
}
}
+static __rte_noinline void
+copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
+ struct buf_vector *buf_vec)
+{
+ uint64_t len;
+ uint64_t remain = sizeof(struct virtio_net_hdr);
+ uint64_t src;
+ uint64_t dst = (uint64_t)(uintptr_t)hdr;
+
+ while (remain) {
+ len = RTE_MIN(remain, buf_vec->buf_len);
+ src = buf_vec->buf_addr;
+ rte_memcpy((void *)(uintptr_t)dst,
+ (void *)(uintptr_t)src, len);
+
+ remain -= len;
+ dst += len;
+ buf_vec++;
+ }
+}
+
static __rte_always_inline int
copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct buf_vector *buf_vec, uint16_t nr_vec,
@@ -1094,28 +1123,11 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
if (virtio_net_with_host_offload(dev)) {
if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
- uint64_t len;
- uint64_t remain = sizeof(struct virtio_net_hdr);
- uint64_t src;
- uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
- uint16_t hdr_vec_idx = 0;
-
/*
* No luck, the virtio-net header doesn't fit
* in a contiguous virtual area.
*/
- while (remain) {
- len = RTE_MIN(remain,
- buf_vec[hdr_vec_idx].buf_len);
- src = buf_vec[hdr_vec_idx].buf_addr;
- rte_memcpy((void *)(uintptr_t)dst,
- (void *)(uintptr_t)src, len);
-
- remain -= len;
- dst += len;
- hdr_vec_idx++;
- }
-
+ copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
hdr = &tmp_hdr;
} else {
hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
--
2.21.0
next prev parent reply other threads:[~2019-05-29 13:05 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-29 13:04 [dpdk-dev] [PATCH v3 0/5] vhost: I-cache pressure optimizations Maxime Coquelin
2019-05-29 13:04 ` [dpdk-dev] [PATCH v3 1/5] vhost: un-inline dirty pages logging functions Maxime Coquelin
2019-05-29 13:04 ` [dpdk-dev] [PATCH v3 2/5] vhost: do not inline packed and split functions Maxime Coquelin
2019-05-29 13:04 ` Maxime Coquelin [this message]
2019-05-29 13:04 ` [dpdk-dev] [PATCH v3 4/5] vhost: simplify descriptor's buffer prefetching Maxime Coquelin
2019-05-29 13:04 ` [dpdk-dev] [PATCH v3 5/5] eal/x86: force inlining of all memcpy and mov helpers Maxime Coquelin
2019-06-05 12:53 ` Bruce Richardson
2019-06-06 9:33 ` Maxime Coquelin
2019-06-05 12:32 ` [dpdk-dev] [PATCH v3 0/5] vhost: I-cache pressure optimizations Maxime Coquelin
2019-06-05 12:52 ` Bruce Richardson
2019-06-05 13:00 ` Maxime Coquelin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190529130420.6428-4-maxime.coquelin@redhat.com \
--to=maxime.coquelin@redhat.com \
--cc=bruce.richardson@intel.com \
--cc=david.marchand@redhat.com \
--cc=dev@dpdk.org \
--cc=jfreimann@redhat.com \
--cc=konstantin.ananyev@intel.com \
--cc=mattias.ronnblom@ericsson.com \
--cc=tiwei.bie@intel.com \
--cc=zhihong.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).