From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id 9CFA53257 for ; Thu, 31 Aug 2017 11:51:11 +0200 (CEST) Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id F0D866146C; Thu, 31 Aug 2017 09:51:10 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com F0D866146C Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=maxime.coquelin@redhat.com Received: from localhost.localdomain (ovpn-112-32.ams2.redhat.com [10.36.112.32]) by smtp.corp.redhat.com (Postfix) with ESMTP id 838CE841CB; Thu, 31 Aug 2017 09:51:07 +0000 (UTC) From: Maxime Coquelin To: dev@dpdk.org, yliu@fridaylinux.org, jfreiman@redhat.com, tiwei.bie@intel.com Cc: mst@redhat.com, vkaplans@redhat.com, jasowang@redhat.com, Maxime Coquelin Date: Thu, 31 Aug 2017 11:50:05 +0200 Message-Id: <20170831095023.21037-4-maxime.coquelin@redhat.com> In-Reply-To: <20170831095023.21037-1-maxime.coquelin@redhat.com> References: <20170831095023.21037-1-maxime.coquelin@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.12 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.39]); Thu, 31 Aug 2017 09:51:11 +0000 (UTC) Subject: [dpdk-dev] [PATCH 03/21] vhost: protect virtio_net device struct X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 31 Aug 2017 09:51:12 -0000 virtio_net device might be accessed while being reallocated in case of NUMA awareness. This case might be theoretical, but it will be needed anyway to protect vrings pages against invalidation. The virtio_net devs are now protected with a readers/writers lock, so that before reallocating the device, it is ensured that it is not being referenced by the processing threads. Signed-off-by: Maxime Coquelin --- lib/librte_vhost/vhost.c | 237 +++++++++++++++++++++++++++++++++++------- lib/librte_vhost/vhost.h | 3 +- lib/librte_vhost/vhost_user.c | 73 +++++-------- lib/librte_vhost/virtio_net.c | 17 ++- 4 files changed, 240 insertions(+), 90 deletions(-) diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 0b6aa1cc4..429983858 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -46,16 +46,25 @@ #include #include #include +#include #include #include "vhost.h" -struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; +struct vhost_device { + struct virtio_net *dev; + rte_rwlock_t lock; +}; -struct virtio_net * -get_device(int vid) +/* Declared as static so that .lock is initialized */ +static struct vhost_device vhost_devices[MAX_VHOST_DEVICE]; + +static inline struct virtio_net * +__get_device(int vid) { - struct virtio_net *dev = vhost_devices[vid]; + struct virtio_net *dev; + + dev = vhost_devices[vid].dev; if (unlikely(!dev)) { RTE_LOG(ERR, VHOST_CONFIG, @@ -65,6 +74,83 @@ get_device(int vid) return dev; } +struct virtio_net * +get_device(int vid) +{ + struct virtio_net *dev; + + rte_rwlock_read_lock(&vhost_devices[vid].lock); + + dev = __get_device(vid); + if (unlikely(!dev)) + rte_rwlock_read_unlock(&vhost_devices[vid].lock); + + return dev; +} + +void +put_device(int vid) +{ + rte_rwlock_read_unlock(&vhost_devices[vid].lock); +} + +static struct virtio_net * +get_device_wr(int vid) +{ + struct virtio_net *dev; + + rte_rwlock_write_lock(&vhost_devices[vid].lock); + + dev = __get_device(vid); + if (unlikely(!dev)) + rte_rwlock_write_unlock(&vhost_devices[vid].lock); + + return dev; +} + +static void +put_device_wr(int vid) +{ + rte_rwlock_write_unlock(&vhost_devices[vid].lock); +} + +int +realloc_device(int vid, int vq_index, int node) +{ + struct virtio_net *dev, *old_dev; + struct vhost_virtqueue *vq; + + dev = rte_malloc_socket(NULL, sizeof(*dev), 0, node); + if (!dev) + return -1; + + vq = rte_malloc_socket(NULL, sizeof(*vq), 0, node); + if (!vq) { + rte_free(dev); + return -1; + } + + old_dev = get_device_wr(vid); + if (!old_dev) { + rte_free(vq); + rte_free(dev); + return -1; + } + + memcpy(dev, old_dev, sizeof(*dev)); + memcpy(vq, old_dev->virtqueue[vq_index], sizeof(*vq)); + dev->virtqueue[vq_index] = vq; + + rte_free(old_dev->virtqueue[vq_index]); + rte_free(old_dev); + + vhost_devices[vid].dev = dev; + + put_device_wr(vid); + + return 0; +} + static void cleanup_vq(struct vhost_virtqueue *vq, int destroy) { @@ -195,7 +281,7 @@ vhost_new_device(void) } for (i = 0; i < MAX_VHOST_DEVICE; i++) { - if (vhost_devices[i] == NULL) + if (vhost_devices[i].dev == NULL) break; } if (i == MAX_VHOST_DEVICE) { @@ -205,8 +291,10 @@ vhost_new_device(void) return -1; } - vhost_devices[i] = dev; + rte_rwlock_write_lock(&vhost_devices[i].lock); + vhost_devices[i].dev = dev; dev->vid = i; + rte_rwlock_write_unlock(&vhost_devices[i].lock); return i; } @@ -228,10 +316,15 @@ vhost_destroy_device(int vid) dev->notify_ops->destroy_device(vid); } + put_device(vid); + dev = get_device_wr(vid); + cleanup_device(dev, 1); free_device(dev); - vhost_devices[vid] = NULL; + vhost_devices[vid].dev = NULL; + + put_device_wr(vid); } void @@ -249,6 +342,8 @@ vhost_set_ifname(int vid, const char *if_name, unsigned int if_len) strncpy(dev->ifname, if_name, len); dev->ifname[sizeof(dev->ifname) - 1] = '\0'; + + put_device(vid); } void @@ -260,25 +355,35 @@ vhost_enable_dequeue_zero_copy(int vid) return; dev->dequeue_zero_copy = 1; + + put_device(vid); } int rte_vhost_get_mtu(int vid, uint16_t *mtu) { struct virtio_net *dev = get_device(vid); + int ret = 0; if (!dev) return -ENODEV; - if (!(dev->flags & VIRTIO_DEV_READY)) - return -EAGAIN; + if (!(dev->flags & VIRTIO_DEV_READY)) { + ret = -EAGAIN; + goto out_put; + } - if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU))) - return -ENOTSUP; + if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU))) { + ret = -ENOTSUP; + goto out_put; + } *mtu = dev->mtu; - return 0; +out_put: + put_device(vid); + + return ret; } int @@ -298,9 +403,11 @@ rte_vhost_get_numa_node(int vid) RTE_LOG(ERR, VHOST_CONFIG, "(%d) failed to query numa node: %s\n", vid, rte_strerror(errno)); - return -1; + numa_node = -1; } + put_device(vid); + return numa_node; #else RTE_SET_USED(vid); @@ -312,22 +419,32 @@ uint32_t rte_vhost_get_queue_num(int vid) { struct virtio_net *dev = get_device(vid); + uint32_t queue_num; if (dev == NULL) return 0; - return dev->nr_vring / 2; + queue_num = dev->nr_vring / 2; + + put_device(vid); + + return queue_num; } uint16_t rte_vhost_get_vring_num(int vid) { struct virtio_net *dev = get_device(vid); + uint16_t vring_num; if (dev == NULL) return 0; - return dev->nr_vring; + vring_num = dev->nr_vring; + + put_device(vid); + + return vring_num; } int @@ -343,6 +460,8 @@ rte_vhost_get_ifname(int vid, char *buf, size_t len) strncpy(buf, dev->ifname, len); buf[len - 1] = '\0'; + put_device(vid); + return 0; } @@ -356,6 +475,9 @@ rte_vhost_get_negotiated_features(int vid, uint64_t *features) return -1; *features = dev->features; + + put_device(vid); + return 0; } @@ -365,6 +487,7 @@ rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem) struct virtio_net *dev; struct rte_vhost_memory *m; size_t size; + int ret = 0; dev = get_device(vid); if (!dev) @@ -372,14 +495,19 @@ rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem) size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region); m = malloc(sizeof(struct rte_vhost_memory) + size); - if (!m) - return -1; + if (!m) { + ret = -1; + goto out; + } m->nregions = dev->mem->nregions; memcpy(m->regions, dev->mem->regions, size); *mem = m; - return 0; +out: + put_device(vid); + + return ret; } int @@ -388,17 +516,22 @@ rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, { struct virtio_net *dev; struct vhost_virtqueue *vq; + int ret = 0; dev = get_device(vid); if (!dev) return -1; - if (vring_idx >= VHOST_MAX_VRING) - return -1; + if (vring_idx >= VHOST_MAX_VRING) { + ret = -1; + goto out; + } vq = dev->virtqueue[vring_idx]; - if (!vq) - return -1; + if (!vq) { + ret = -1; + goto out; + } vring->desc = vq->desc; vring->avail = vq->avail; @@ -409,7 +542,10 @@ rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, vring->kickfd = vq->kickfd; vring->size = vq->size; - return 0; +out: + put_device(vid); + + return ret; } uint16_t @@ -417,6 +553,7 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id) { struct virtio_net *dev; struct vhost_virtqueue *vq; + uint16_t avail_entries = 0; dev = get_device(vid); if (!dev) @@ -424,15 +561,23 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id) vq = dev->virtqueue[queue_id]; if (!vq->enabled) - return 0; + goto out; - return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx; + + avail_entries = *(volatile uint16_t *)&vq->avail->idx; + avail_entries -= vq->last_used_idx; + +out: + put_device(vid); + + return avail_entries; } int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable) { struct virtio_net *dev = get_device(vid); + int ret = 0; if (dev == NULL) return -1; @@ -440,11 +585,16 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable) if (enable) { RTE_LOG(ERR, VHOST_CONFIG, "guest notification isn't supported.\n"); - return -1; + ret = -1; + goto out; } dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY; - return 0; + +out: + put_device(vid); + + return ret; } void @@ -456,6 +606,8 @@ rte_vhost_log_write(int vid, uint64_t addr, uint64_t len) return; vhost_log_write(dev, addr, len); + + put_device(vid); } void @@ -470,12 +622,15 @@ rte_vhost_log_used_vring(int vid, uint16_t vring_idx, return; if (vring_idx >= VHOST_MAX_VRING) - return; + goto out; vq = dev->virtqueue[vring_idx]; if (!vq) - return; + goto out; vhost_log_used_vring(dev, vq, offset, len); + +out: + put_device(vid); } uint32_t @@ -483,6 +638,7 @@ rte_vhost_rx_queue_count(int vid, uint16_t qid) { struct virtio_net *dev; struct vhost_virtqueue *vq; + uint32_t queue_count; dev = get_device(vid); if (dev == NULL) @@ -491,15 +647,26 @@ rte_vhost_rx_queue_count(int vid, uint16_t qid) if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) { RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", dev->vid, __func__, qid); - return 0; + queue_count = 0; + goto out; } vq = dev->virtqueue[qid]; - if (vq == NULL) - return 0; + if (vq == NULL) { + queue_count = 0; + goto out; + } - if (unlikely(vq->enabled == 0 || vq->avail == NULL)) - return 0; + if (unlikely(vq->enabled == 0 || vq->avail == NULL)) { + queue_count = 0; + goto out; + } + + queue_count = *((volatile uint16_t *)&vq->avail->idx); + queue_count -= vq->last_avail_idx; + +out: + put_device(vid); - return *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx; + return queue_count; } diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 0f294f395..18ad69c85 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -269,7 +269,6 @@ vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, extern uint64_t VHOST_FEATURES; #define MAX_VHOST_DEVICE 1024 -extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; /* Convert guest physical address to host physical address */ static __rte_always_inline phys_addr_t @@ -292,6 +291,8 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size) } struct virtio_net *get_device(int vid); +void put_device(int vid); +int realloc_device(int vid, int vq_index, int node); int vhost_new_device(void); void cleanup_device(struct virtio_net *dev, int destroy); diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index ad2e8d380..5b3b8812a 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -241,62 +241,31 @@ vhost_user_set_vring_num(struct virtio_net *dev, static struct virtio_net* numa_realloc(struct virtio_net *dev, int index) { - int oldnode, newnode; - struct virtio_net *old_dev; - struct vhost_virtqueue *old_vq, *vq; - int ret; + int oldnode, newnode, vid, ret; - old_dev = dev; - vq = old_vq = dev->virtqueue[index]; + vid = dev->vid; - ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc, + ret = get_mempolicy(&newnode, NULL, 0, dev->virtqueue[index]->desc, MPOL_F_NODE | MPOL_F_ADDR); /* check if we need to reallocate vq */ - ret |= get_mempolicy(&oldnode, NULL, 0, old_vq, + ret |= get_mempolicy(&oldnode, NULL, 0, dev->virtqueue[index], MPOL_F_NODE | MPOL_F_ADDR); if (ret) { RTE_LOG(ERR, VHOST_CONFIG, "Unable to get vq numa information.\n"); return dev; } - if (oldnode != newnode) { - RTE_LOG(INFO, VHOST_CONFIG, - "reallocate vq from %d to %d node\n", oldnode, newnode); - vq = rte_malloc_socket(NULL, sizeof(*vq), 0, newnode); - if (!vq) - return dev; - - memcpy(vq, old_vq, sizeof(*vq)); - rte_free(old_vq); - } - /* check if we need to reallocate dev */ - ret = get_mempolicy(&oldnode, NULL, 0, old_dev, - MPOL_F_NODE | MPOL_F_ADDR); - if (ret) { - RTE_LOG(ERR, VHOST_CONFIG, - "Unable to get dev numa information.\n"); - goto out; - } if (oldnode != newnode) { RTE_LOG(INFO, VHOST_CONFIG, - "reallocate dev from %d to %d node\n", - oldnode, newnode); - dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode); - if (!dev) { - dev = old_dev; - goto out; - } - - memcpy(dev, old_dev, sizeof(*dev)); - rte_free(old_dev); + "reallocate vq from %d to %d node\n", oldnode, newnode); + put_device(vid); + if (realloc_device(vid, index, newnode)) + RTE_LOG(ERR, VHOST_CONFIG, "Failed to realloc device\n"); + dev = get_device(vid); } -out: - dev->virtqueue[index] = vq; - vhost_devices[dev->vid] = dev; - return dev; } #else @@ -336,9 +305,10 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva) * This function then converts these to our address space. */ static int -vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg) +vhost_user_set_vring_addr(struct virtio_net **pdev, VhostUserMsg *msg) { struct vhost_virtqueue *vq; + struct virtio_net *dev = *pdev; if (dev->mem == NULL) return -1; @@ -356,7 +326,7 @@ vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg) return -1; } - dev = numa_realloc(dev, msg->payload.addr.index); + *pdev = dev = numa_realloc(dev, msg->payload.addr.index); vq = dev->virtqueue[msg->payload.addr.index]; vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev, @@ -966,7 +936,7 @@ vhost_user_msg_handler(int vid, int fd) { struct virtio_net *dev; struct VhostUserMsg msg; - int ret; + int ret = 0; dev = get_device(vid); if (dev == NULL) @@ -978,7 +948,8 @@ vhost_user_msg_handler(int vid, int fd) RTE_LOG(ERR, VHOST_CONFIG, "failed to get callback ops for driver %s\n", dev->ifname); - return -1; + ret = -1; + goto out; } } @@ -994,10 +965,10 @@ vhost_user_msg_handler(int vid, int fd) RTE_LOG(ERR, VHOST_CONFIG, "vhost read incorrect message\n"); - return -1; + ret = -1; + goto out; } - ret = 0; RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n", vhost_message_str[msg.request]); @@ -1005,7 +976,8 @@ vhost_user_msg_handler(int vid, int fd) if (ret < 0) { RTE_LOG(ERR, VHOST_CONFIG, "failed to alloc queue\n"); - return -1; + ret = -1; + goto out; } switch (msg.request) { @@ -1054,7 +1026,7 @@ vhost_user_msg_handler(int vid, int fd) vhost_user_set_vring_num(dev, &msg); break; case VHOST_USER_SET_VRING_ADDR: - vhost_user_set_vring_addr(dev, &msg); + vhost_user_set_vring_addr(&dev, &msg); break; case VHOST_USER_SET_VRING_BASE: vhost_user_set_vring_base(dev, &msg); @@ -1122,5 +1094,8 @@ vhost_user_msg_handler(int vid, int fd) } } - return 0; +out: + put_device(vid); + + return ret; } diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index b889aa0b7..04255dc85 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -598,14 +598,19 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id, struct rte_mbuf **pkts, uint16_t count) { struct virtio_net *dev = get_device(vid); + int ret = 0; if (!dev) return 0; if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) - return virtio_dev_merge_rx(dev, queue_id, pkts, count); + ret = virtio_dev_merge_rx(dev, queue_id, pkts, count); else - return virtio_dev_rx(dev, queue_id, pkts, count); + ret = virtio_dev_rx(dev, queue_id, pkts, count); + + put_device(vid); + + return ret; } static inline bool @@ -1006,12 +1011,12 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", dev->vid, __func__, queue_id); - return 0; + goto out; } vq = dev->virtqueue[queue_id]; if (unlikely(vq->enabled == 0)) - return 0; + goto out; if (unlikely(dev->dequeue_zero_copy)) { struct zcopy_mbuf *zmbuf, *next; @@ -1061,7 +1066,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, if (rarp_mbuf == NULL) { RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n"); - return 0; + goto out; } if (make_rarp_packet(rarp_mbuf, &dev->mac)) { @@ -1180,5 +1185,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, i += 1; } + put_device(vid); + return i; } -- 2.13.3