* [PATCH 1/2] vhost: keep a reference to virtqueue index @ 2022-07-22 13:53 David Marchand 2022-07-22 13:53 ` [PATCH 2/2] vhost: stop using mempool for IOTLB David Marchand ` (4 more replies) 0 siblings, 5 replies; 25+ messages in thread From: David Marchand @ 2022-07-22 13:53 UTC (permalink / raw) To: dev; +Cc: Maxime Coquelin, Chenbo Xia Having a back reference to the index of the vq in the dev->virtqueue[] array makes it possible to unify the internal API with only passing dev and vq. It also allows displaying the vq index in log messages. Remove virtqueue index checks were unneeded (like in static helpers called from a loop on all available virtqueue). Move virtqueue index validity checks the sooner possible. Signed-off-by: David Marchand <david.marchand@redhat.com> --- lib/vhost/iotlb.c | 5 +-- lib/vhost/iotlb.h | 2 +- lib/vhost/vhost.c | 74 ++++++++++++++---------------------- lib/vhost/vhost.h | 3 ++ lib/vhost/vhost_user.c | 58 +++++++++++++--------------- lib/vhost/virtio_net.c | 86 +++++++++++++++++++----------------------- 6 files changed, 98 insertions(+), 130 deletions(-) diff --git a/lib/vhost/iotlb.c b/lib/vhost/iotlb.c index 35b4193606..dd35338ec0 100644 --- a/lib/vhost/iotlb.c +++ b/lib/vhost/iotlb.c @@ -293,10 +293,9 @@ vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq) } int -vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) +vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) { char pool_name[RTE_MEMPOOL_NAMESIZE]; - struct vhost_virtqueue *vq = dev->virtqueue[vq_index]; int socket = 0; if (vq->iotlb_pool) { @@ -319,7 +318,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) TAILQ_INIT(&vq->iotlb_pending_list); snprintf(pool_name, sizeof(pool_name), "iotlb_%u_%d_%d", - getpid(), dev->vid, vq_index); + getpid(), dev->vid, vq->index); VHOST_LOG_CONFIG(dev->ifname, DEBUG, "IOTLB cache name: %s\n", pool_name); /* If already created, free it and recreate */ diff --git a/lib/vhost/iotlb.h b/lib/vhost/iotlb.h index 8d0ff7473b..738e31e7b9 100644 --- a/lib/vhost/iotlb.h +++ b/lib/vhost/iotlb.h @@ -47,6 +47,6 @@ void vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqu void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova, uint64_t size, uint8_t perm); void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq); -int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index); +int vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq); #endif /* _VHOST_IOTLB_H_ */ diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c index 60cb05a0ff..97bae0de91 100644 --- a/lib/vhost/vhost.c +++ b/lib/vhost/vhost.c @@ -575,23 +575,10 @@ vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq) } static void -init_vring_queue(struct virtio_net *dev, uint32_t vring_idx) +init_vring_queue(struct virtio_net *dev, struct vhost_virtqueue *vq) { - struct vhost_virtqueue *vq; int numa_node = SOCKET_ID_ANY; - if (vring_idx >= VHOST_MAX_VRING) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to init vring, out of bound (%d)\n", - vring_idx); - return; - } - - vq = dev->virtqueue[vring_idx]; - if (!vq) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "virtqueue not allocated (%d)\n", vring_idx); - return; - } - memset(vq, 0, sizeof(struct vhost_virtqueue)); vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; @@ -607,32 +594,20 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx) #endif vq->numa_node = numa_node; - vhost_user_iotlb_init(dev, vring_idx); + vhost_user_iotlb_init(dev, vq); } static void -reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx) +reset_vring_queue(struct virtio_net *dev, struct vhost_virtqueue *vq) { - struct vhost_virtqueue *vq; + uint32_t vring_idx; int callfd; - if (vring_idx >= VHOST_MAX_VRING) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to reset vring, out of bound (%d)\n", - vring_idx); - return; - } - - vq = dev->virtqueue[vring_idx]; - if (!vq) { - VHOST_LOG_CONFIG(dev->ifname, ERR, - "failed to reset vring, virtqueue not allocated (%d)\n", - vring_idx); - return; - } - callfd = vq->callfd; - init_vring_queue(dev, vring_idx); + vring_idx = vq->index; + init_vring_queue(dev, vq); vq->callfd = callfd; + vq->index = vring_idx; } int @@ -655,8 +630,9 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx) } dev->virtqueue[i] = vq; - init_vring_queue(dev, i); + init_vring_queue(dev, vq); rte_spinlock_init(&vq->access_lock); + vq->index = vring_idx; vq->avail_wrap_counter = 1; vq->used_wrap_counter = 1; vq->signalled_used_valid = false; @@ -681,8 +657,16 @@ reset_device(struct virtio_net *dev) dev->protocol_features = 0; dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET; - for (i = 0; i < dev->nr_vring; i++) - reset_vring_queue(dev, i); + for (i = 0; i < dev->nr_vring; i++) { + struct vhost_virtqueue *vq = dev->virtqueue[i]; + + if (!vq) { + VHOST_LOG_CONFIG(dev->ifname, ERR, + "failed to reset vring, virtqueue not allocated (%d)\n", i); + continue; + } + reset_vring_queue(dev, vq); + } } /* @@ -1661,17 +1645,15 @@ rte_vhost_extern_callback_register(int vid, } static __rte_always_inline int -async_channel_register(int vid, uint16_t queue_id) +async_channel_register(struct virtio_net *dev, struct vhost_virtqueue *vq) { - struct virtio_net *dev = get_device(vid); - struct vhost_virtqueue *vq = dev->virtqueue[queue_id]; struct vhost_async *async; int node = vq->numa_node; if (unlikely(vq->async)) { VHOST_LOG_CONFIG(dev->ifname, ERR, "async register failed: already registered (qid: %d)\n", - queue_id); + vq->index); return -1; } @@ -1679,7 +1661,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async metadata (qid: %d)\n", - queue_id); + vq->index); return -1; } @@ -1688,7 +1670,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async->pkts_info) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async_pkts_info (qid: %d)\n", - queue_id); + vq->index); goto out_free_async; } @@ -1697,7 +1679,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async->pkts_cmpl_flag) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async pkts_cmpl_flag (qid: %d)\n", - queue_id); + vq->index); goto out_free_async; } @@ -1708,7 +1690,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async->buffers_packed) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async buffers (qid: %d)\n", - queue_id); + vq->index); goto out_free_inflight; } } else { @@ -1718,7 +1700,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async->descs_split) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async descs (qid: %d)\n", - queue_id); + vq->index); goto out_free_inflight; } } @@ -1753,7 +1735,7 @@ rte_vhost_async_channel_register(int vid, uint16_t queue_id) return -1; rte_spinlock_lock(&vq->access_lock); - ret = async_channel_register(vid, queue_id); + ret = async_channel_register(dev, vq); rte_spinlock_unlock(&vq->access_lock); return ret; @@ -1782,7 +1764,7 @@ rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id) return -1; } - return async_channel_register(vid, queue_id); + return async_channel_register(dev, vq); } int diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index 40fac3b7c6..c6260b54cc 100644 --- a/lib/vhost/vhost.h +++ b/lib/vhost/vhost.h @@ -309,6 +309,9 @@ struct vhost_virtqueue { /* Currently unused as polling mode is enabled */ int kickfd; + /* Index of this vq in dev->virtqueue[] */ + uint32_t index; + /* inflight share memory info */ union { struct rte_vhost_inflight_info_split *inflight_split; diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 4ad28bac45..73e69450fd 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -240,22 +240,20 @@ vhost_backend_cleanup(struct virtio_net *dev) } static void -vhost_user_notify_queue_state(struct virtio_net *dev, uint16_t index, - int enable) +vhost_user_notify_queue_state(struct virtio_net *dev, struct vhost_virtqueue *vq, + int enable) { struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev; - struct vhost_virtqueue *vq = dev->virtqueue[index]; /* Configure guest notifications on enable */ if (enable && vq->notif_enable != VIRTIO_UNINITIALIZED_NOTIF) vhost_enable_guest_notification(dev, vq, vq->notif_enable); if (vdpa_dev && vdpa_dev->ops->set_vring_state) - vdpa_dev->ops->set_vring_state(dev->vid, index, enable); + vdpa_dev->ops->set_vring_state(dev->vid, vq->index, enable); if (dev->notify_ops->vring_state_changed) - dev->notify_ops->vring_state_changed(dev->vid, - index, enable); + dev->notify_ops->vring_state_changed(dev->vid, vq->index, enable); } /* @@ -493,12 +491,11 @@ vhost_user_set_vring_num(struct virtio_net **pdev, * make them on the same numa node as the memory of vring descriptor. */ #ifdef RTE_LIBRTE_VHOST_NUMA -static struct virtio_net* -numa_realloc(struct virtio_net *dev, int index) +static struct virtio_net * +numa_realloc(struct virtio_net *dev, struct vhost_virtqueue *vq) { int node, dev_node; struct virtio_net *old_dev; - struct vhost_virtqueue *vq; struct batch_copy_elem *bce; struct guest_page *gp; struct rte_vhost_memory *mem; @@ -506,7 +503,6 @@ numa_realloc(struct virtio_net *dev, int index) int ret; old_dev = dev; - vq = dev->virtqueue[index]; /* * If VQ is ready, it is too late to reallocate, it certainly already @@ -519,7 +515,7 @@ numa_realloc(struct virtio_net *dev, int index) if (ret) { VHOST_LOG_CONFIG(dev->ifname, ERR, "unable to get virtqueue %d numa information.\n", - index); + vq->index); return dev; } @@ -530,14 +526,14 @@ numa_realloc(struct virtio_net *dev, int index) if (!vq) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to realloc virtqueue %d on node %d\n", - index, node); + vq->index, node); return dev; } - if (vq != dev->virtqueue[index]) { + if (vq != dev->virtqueue[vq->index]) { VHOST_LOG_CONFIG(dev->ifname, INFO, "reallocated virtqueue on node %d\n", node); - dev->virtqueue[index] = vq; - vhost_user_iotlb_init(dev, index); + dev->virtqueue[vq->index] = vq; + vhost_user_iotlb_init(dev, vq); } if (vq_is_packed(dev)) { @@ -665,8 +661,8 @@ numa_realloc(struct virtio_net *dev, int index) return dev; } #else -static struct virtio_net* -numa_realloc(struct virtio_net *dev, int index __rte_unused) +static struct virtio_net * +numa_realloc(struct virtio_net *dev, struct vhost_virtqueue *vq __rte_unused) { return dev; } @@ -739,9 +735,8 @@ log_addr_to_gpa(struct virtio_net *dev, struct vhost_virtqueue *vq) } static struct virtio_net * -translate_ring_addresses(struct virtio_net *dev, int vq_index) +translate_ring_addresses(struct virtio_net *dev, struct vhost_virtqueue *vq) { - struct vhost_virtqueue *vq = dev->virtqueue[vq_index]; struct vhost_vring_addr *addr = &vq->ring_addrs; uint64_t len, expected_len; @@ -765,8 +760,8 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) return dev; } - dev = numa_realloc(dev, vq_index); - vq = dev->virtqueue[vq_index]; + dev = numa_realloc(dev, vq); + vq = dev->virtqueue[vq->index]; addr = &vq->ring_addrs; len = sizeof(struct vring_packed_desc_event); @@ -807,8 +802,8 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) return dev; } - dev = numa_realloc(dev, vq_index); - vq = dev->virtqueue[vq_index]; + dev = numa_realloc(dev, vq); + vq = dev->virtqueue[vq->index]; addr = &vq->ring_addrs; len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size; @@ -887,7 +882,7 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, if ((vq->enabled && (dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) || access_ok) { - dev = translate_ring_addresses(dev, ctx->msg.payload.addr.index); + dev = translate_ring_addresses(dev, vq); if (!dev) return RTE_VHOST_MSG_RESULT_ERR; @@ -1396,7 +1391,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, */ vring_invalidate(dev, vq); - dev = translate_ring_addresses(dev, i); + dev = translate_ring_addresses(dev, vq); if (!dev) { dev = *pdev; goto free_mem_table; @@ -1781,7 +1776,7 @@ vhost_user_set_vring_call(struct virtio_net **pdev, if (vq->ready) { vq->ready = false; - vhost_user_notify_queue_state(dev, file.index, 0); + vhost_user_notify_queue_state(dev, vq, 0); } if (vq->callfd >= 0) @@ -2029,7 +2024,8 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, file.index, file.fd); /* Interpret ring addresses only when ring is started. */ - dev = translate_ring_addresses(dev, file.index); + vq = dev->virtqueue[file.index]; + dev = translate_ring_addresses(dev, vq); if (!dev) { if (file.fd != VIRTIO_INVALID_EVENTFD) close(file.fd); @@ -2039,8 +2035,6 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, *pdev = dev; - vq = dev->virtqueue[file.index]; - /* * When VHOST_USER_F_PROTOCOL_FEATURES is not negotiated, * the ring starts already enabled. Otherwise, it is enabled via @@ -2052,7 +2046,7 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, if (vq->ready) { vq->ready = false; - vhost_user_notify_queue_state(dev, file.index, 0); + vhost_user_notify_queue_state(dev, vq, 0); } if (vq->kickfd >= 0) @@ -2595,7 +2589,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, if (is_vring_iotlb(dev, vq, imsg)) { rte_spinlock_lock(&vq->access_lock); - *pdev = dev = translate_ring_addresses(dev, i); + *pdev = dev = translate_ring_addresses(dev, vq); rte_spinlock_unlock(&vq->access_lock); } } @@ -3159,7 +3153,7 @@ vhost_user_msg_handler(int vid, int fd) if (cur_ready != (vq && vq->ready)) { vq->ready = cur_ready; - vhost_user_notify_queue_state(dev, i, cur_ready); + vhost_user_notify_queue_state(dev, vq, cur_ready); } } diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index 35fa4670fd..467dfb203f 100644 --- a/lib/vhost/virtio_net.c +++ b/lib/vhost/virtio_net.c @@ -1555,22 +1555,12 @@ virtio_dev_rx_packed(struct virtio_net *dev, } static __rte_always_inline uint32_t -virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, +virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf **pkts, uint32_t count) { - struct vhost_virtqueue *vq; uint32_t nb_tx = 0; VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { - VHOST_LOG_DATA(dev->ifname, ERR, - "%s: invalid virtqueue idx %d.\n", - __func__, queue_id); - return 0; - } - - vq = dev->virtqueue[queue_id]; - rte_spinlock_lock(&vq->access_lock); if (unlikely(!vq->enabled)) @@ -1620,7 +1610,14 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id, return 0; } - return virtio_dev_rx(dev, queue_id, pkts, count); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { + VHOST_LOG_DATA(dev->ifname, ERR, + "%s: invalid virtqueue idx %d.\n", + __func__, queue_id); + return 0; + } + + return virtio_dev_rx(dev, dev->virtqueue[queue_id], pkts, count); } static __rte_always_inline uint16_t @@ -1669,8 +1666,7 @@ store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring, static __rte_noinline uint32_t virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count, - int16_t dma_id, uint16_t vchan_id) + struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) { struct buf_vector buf_vec[BUF_VECTOR_MAX]; uint32_t pkt_idx = 0; @@ -1732,7 +1728,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: failed to transfer %u packets for queue %u.\n", - __func__, pkt_err, queue_id); + __func__, pkt_err, vq->index); /* update number of completed packets */ pkt_idx = n_xfer; @@ -1878,8 +1874,7 @@ dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx, static __rte_noinline uint32_t virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count, - int16_t dma_id, uint16_t vchan_id) + struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) { uint32_t pkt_idx = 0; uint32_t remained = count; @@ -1924,7 +1919,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue if (unlikely(pkt_err)) { VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: failed to transfer %u packets for queue %u.\n", - __func__, pkt_err, queue_id); + __func__, pkt_err, vq->index); dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx); } @@ -2045,11 +2040,9 @@ write_back_completed_descs_packed(struct vhost_virtqueue *vq, } static __rte_always_inline uint16_t -vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id, - struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, - uint16_t vchan_id) +vhost_poll_enqueue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, uint16_t vchan_id) { - struct vhost_virtqueue *vq = dev->virtqueue[queue_id]; struct vhost_async *async = vq->async; struct async_inflight_info *pkts_info = async->pkts_info; uint16_t nr_cpl_pkts = 0; @@ -2156,7 +2149,7 @@ rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, goto out; } - n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count, dma_id, vchan_id); + n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, dma_id, vchan_id); vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); vq->stats.inflight_completed += n_pkts_cpl; @@ -2216,12 +2209,11 @@ rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id, } if ((queue_id & 1) == 0) - n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, - pkts, count, dma_id, vchan_id); - else { + n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, + dma_id, vchan_id); + else n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, - dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); - } + dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); vq->stats.inflight_completed += n_pkts_cpl; @@ -2275,12 +2267,11 @@ rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, } if ((queue_id & 1) == 0) - n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, - pkts, count, dma_id, vchan_id); - else { + n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, + dma_id, vchan_id); + else n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, - dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); - } + dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); vq->stats.inflight_completed += n_pkts_cpl; @@ -2292,19 +2283,12 @@ rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, } static __rte_always_inline uint32_t -virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, +virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) { - struct vhost_virtqueue *vq; uint32_t nb_tx = 0; VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { - VHOST_LOG_DATA(dev->ifname, ERR, - "%s: invalid virtqueue idx %d.\n", - __func__, queue_id); - return 0; - } if (unlikely(!dma_copy_track[dma_id].vchans || !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { @@ -2314,8 +2298,6 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, return 0; } - vq = dev->virtqueue[queue_id]; - rte_spinlock_lock(&vq->access_lock); if (unlikely(!vq->enabled || !vq->async)) @@ -2333,11 +2315,11 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, goto out; if (vq_is_packed(dev)) - nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, queue_id, - pkts, count, dma_id, vchan_id); + nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, pkts, count, + dma_id, vchan_id); else - nb_tx = virtio_dev_rx_async_submit_split(dev, vq, queue_id, - pkts, count, dma_id, vchan_id); + nb_tx = virtio_dev_rx_async_submit_split(dev, vq, pkts, count, + dma_id, vchan_id); vq->stats.inflight_submitted += nb_tx; @@ -2368,7 +2350,15 @@ rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id, return 0; } - return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, dma_id, vchan_id); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { + VHOST_LOG_DATA(dev->ifname, ERR, + "%s: invalid virtqueue idx %d.\n", + __func__, queue_id); + return 0; + } + + return virtio_dev_rx_async_submit(dev, dev->virtqueue[queue_id], pkts, count, + dma_id, vchan_id); } static inline bool -- 2.36.1 ^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 2/2] vhost: stop using mempool for IOTLB 2022-07-22 13:53 [PATCH 1/2] vhost: keep a reference to virtqueue index David Marchand @ 2022-07-22 13:53 ` David Marchand 2022-07-22 14:00 ` [PATCH 1/2] vhost: keep a reference to virtqueue index David Marchand ` (3 subsequent siblings) 4 siblings, 0 replies; 25+ messages in thread From: David Marchand @ 2022-07-22 13:53 UTC (permalink / raw) To: dev; +Cc: Maxime Coquelin, Chenbo Xia A mempool consumes 3 memzones (with the default ring mempool driver). Default DPDK configuration allows RTE_MAX_MEMZONE (2560) memzones. Assuming there is no other memzones (well, there is always other memzones in a DPDK application), that means that we can have a maximum of 853 mempools. The IOTLB so far was requesting a mempool per vq, which means that at the maximum, the vhost library can request mempools for 426 qps. This limit was recently reached on big systems with a lot of virtio ports (and multiqueue in use). While the limit on mempool count could be something we need to look at the DPDK project level, there is no reason to use mempools with the IOTLB code. The IOTLB cache entries do not need to be DMA-able and are only used by the current process (in multiprocess context). Getting/putting objects from/in the mempool is always associated with some other locks, so some level of contention is already present. We can convert to a malloc'd pool with objects put in a free list protected by a spinlock. Signed-off-by: David Marchand <david.marchand@redhat.com> --- lib/vhost/iotlb.c | 102 ++++++++++++++++++++++++++++------------------ lib/vhost/iotlb.h | 1 + lib/vhost/vhost.c | 2 +- lib/vhost/vhost.h | 4 +- 4 files changed, 67 insertions(+), 42 deletions(-) diff --git a/lib/vhost/iotlb.c b/lib/vhost/iotlb.c index dd35338ec0..2a78929e78 100644 --- a/lib/vhost/iotlb.c +++ b/lib/vhost/iotlb.c @@ -13,6 +13,7 @@ struct vhost_iotlb_entry { TAILQ_ENTRY(vhost_iotlb_entry) next; + SLIST_ENTRY(vhost_iotlb_entry) next_free; uint64_t iova; uint64_t uaddr; @@ -22,6 +23,28 @@ struct vhost_iotlb_entry { #define IOTLB_CACHE_SIZE 2048 +static struct vhost_iotlb_entry * +vhost_user_iotlb_pool_get(struct vhost_virtqueue *vq) +{ + struct vhost_iotlb_entry *node; + + rte_spinlock_lock(&vq->iotlb_free_lock); + node = SLIST_FIRST(&vq->iotlb_free_list); + if (node != NULL) + SLIST_REMOVE_HEAD(&vq->iotlb_free_list, next_free); + rte_spinlock_unlock(&vq->iotlb_free_lock); + return node; +} + +static void +vhost_user_iotlb_pool_put(struct vhost_virtqueue *vq, + struct vhost_iotlb_entry *node) +{ + rte_spinlock_lock(&vq->iotlb_free_lock); + SLIST_INSERT_HEAD(&vq->iotlb_free_list, node, next_free); + rte_spinlock_unlock(&vq->iotlb_free_lock); +} + static void vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq); @@ -34,7 +57,7 @@ vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq) RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) { TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); } rte_rwlock_write_unlock(&vq->iotlb_pending_lock); @@ -66,22 +89,21 @@ vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqueue * uint64_t iova, uint8_t perm) { struct vhost_iotlb_entry *node; - int ret; - ret = rte_mempool_get(vq->iotlb_pool, (void **)&node); - if (ret) { + node = vhost_user_iotlb_pool_get(vq); + if (node == NULL) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, - "IOTLB pool %s empty, clear entries for pending insertion\n", - vq->iotlb_pool->name); + "IOTLB pool for vq %"PRIu32" empty, clear entries for pending insertion\n", + vq->index); if (!TAILQ_EMPTY(&vq->iotlb_pending_list)) vhost_user_iotlb_pending_remove_all(vq); else vhost_user_iotlb_cache_random_evict(vq); - ret = rte_mempool_get(vq->iotlb_pool, (void **)&node); - if (ret) { + node = vhost_user_iotlb_pool_get(vq); + if (node == NULL) { VHOST_LOG_CONFIG(dev->ifname, ERR, - "IOTLB pool %s still empty, pending insertion failure\n", - vq->iotlb_pool->name); + "IOTLB pool vq %"PRIu32" still empty, pending insertion failure\n", + vq->index); return; } } @@ -113,7 +135,7 @@ vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, if ((node->perm & perm) != node->perm) continue; TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); } rte_rwlock_write_unlock(&vq->iotlb_pending_lock); @@ -128,7 +150,7 @@ vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq) RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { TAILQ_REMOVE(&vq->iotlb_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); } vq->iotlb_cache_nr = 0; @@ -149,7 +171,7 @@ vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq) RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { if (!entry_idx) { TAILQ_REMOVE(&vq->iotlb_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); vq->iotlb_cache_nr--; break; } @@ -165,22 +187,21 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq uint64_t size, uint8_t perm) { struct vhost_iotlb_entry *node, *new_node; - int ret; - ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node); - if (ret) { + new_node = vhost_user_iotlb_pool_get(vq); + if (new_node == NULL) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, - "IOTLB pool %s empty, clear entries for cache insertion\n", - vq->iotlb_pool->name); + "IOTLB pool vq %"PRIu32" empty, clear entries for cache insertion\n", + vq->index); if (!TAILQ_EMPTY(&vq->iotlb_list)) vhost_user_iotlb_cache_random_evict(vq); else vhost_user_iotlb_pending_remove_all(vq); - ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node); - if (ret) { + new_node = vhost_user_iotlb_pool_get(vq); + if (new_node == NULL) { VHOST_LOG_CONFIG(dev->ifname, ERR, - "IOTLB pool %s still empty, cache insertion failed\n", - vq->iotlb_pool->name); + "IOTLB pool vq %"PRIu32" still empty, cache insertion failed\n", + vq->index); return; } } @@ -198,7 +219,7 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq * So if iova already in list, assume identical. */ if (node->iova == new_node->iova) { - rte_mempool_put(vq->iotlb_pool, new_node); + vhost_user_iotlb_pool_put(vq, new_node); goto unlock; } else if (node->iova > new_node->iova) { TAILQ_INSERT_BEFORE(node, new_node, next); @@ -235,7 +256,7 @@ vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq, if (iova < node->iova + node->size) { TAILQ_REMOVE(&vq->iotlb_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); vq->iotlb_cache_nr--; } } @@ -295,7 +316,7 @@ vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq) int vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) { - char pool_name[RTE_MEMPOOL_NAMESIZE]; + unsigned int i; int socket = 0; if (vq->iotlb_pool) { @@ -304,6 +325,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) * just drop all cached and pending entries. */ vhost_user_iotlb_flush_all(vq); + rte_free(vq->iotlb_pool); } #ifdef RTE_LIBRTE_VHOST_NUMA @@ -311,32 +333,32 @@ vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) socket = 0; #endif + rte_spinlock_init(&vq->iotlb_free_lock); rte_rwlock_init(&vq->iotlb_lock); rte_rwlock_init(&vq->iotlb_pending_lock); + SLIST_INIT(&vq->iotlb_free_list); TAILQ_INIT(&vq->iotlb_list); TAILQ_INIT(&vq->iotlb_pending_list); - snprintf(pool_name, sizeof(pool_name), "iotlb_%u_%d_%d", - getpid(), dev->vid, vq->index); - VHOST_LOG_CONFIG(dev->ifname, DEBUG, "IOTLB cache name: %s\n", pool_name); - - /* If already created, free it and recreate */ - vq->iotlb_pool = rte_mempool_lookup(pool_name); - rte_mempool_free(vq->iotlb_pool); - - vq->iotlb_pool = rte_mempool_create(pool_name, - IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0, - 0, 0, NULL, NULL, NULL, socket, - RTE_MEMPOOL_F_NO_CACHE_ALIGN | - RTE_MEMPOOL_F_SP_PUT); + vq->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE, + sizeof(struct vhost_iotlb_entry), 0, socket); if (!vq->iotlb_pool) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "Failed to create IOTLB cache pool %s\n", - pool_name); + VHOST_LOG_CONFIG(dev->ifname, ERR, + "Failed to create IOTLB cache pool for vq %"PRIu32"\n", + vq->index); return -1; } + for (i = 0; i < IOTLB_CACHE_SIZE; i++) + vhost_user_iotlb_pool_put(vq, &vq->iotlb_pool[i]); vq->iotlb_cache_nr = 0; return 0; } + +void +vhost_user_iotlb_destroy(struct vhost_virtqueue *vq) +{ + rte_free(vq->iotlb_pool); +} diff --git a/lib/vhost/iotlb.h b/lib/vhost/iotlb.h index 738e31e7b9..e27ebebcf5 100644 --- a/lib/vhost/iotlb.h +++ b/lib/vhost/iotlb.h @@ -48,5 +48,6 @@ void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova, uint64_t size, uint8_t perm); void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq); int vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq); +void vhost_user_iotlb_destroy(struct vhost_virtqueue *vq); #endif /* _VHOST_IOTLB_H_ */ diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c index 97bae0de91..0dfebace58 100644 --- a/lib/vhost/vhost.c +++ b/lib/vhost/vhost.c @@ -394,7 +394,7 @@ free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq) vhost_free_async_mem(vq); rte_free(vq->batch_copy_elems); - rte_mempool_free(vq->iotlb_pool); + vhost_user_iotlb_destroy(vq); rte_free(vq->log_cache); rte_free(vq); } diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index c6260b54cc..782d916ae0 100644 --- a/lib/vhost/vhost.h +++ b/lib/vhost/vhost.h @@ -299,10 +299,12 @@ struct vhost_virtqueue { rte_rwlock_t iotlb_lock; rte_rwlock_t iotlb_pending_lock; - struct rte_mempool *iotlb_pool; + struct vhost_iotlb_entry *iotlb_pool; TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list; TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list; int iotlb_cache_nr; + rte_spinlock_t iotlb_free_lock; + SLIST_HEAD(, vhost_iotlb_entry) iotlb_free_list; /* Used to notify the guest (trigger interrupt) */ int callfd; -- 2.36.1 ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/2] vhost: keep a reference to virtqueue index 2022-07-22 13:53 [PATCH 1/2] vhost: keep a reference to virtqueue index David Marchand 2022-07-22 13:53 ` [PATCH 2/2] vhost: stop using mempool for IOTLB David Marchand @ 2022-07-22 14:00 ` David Marchand 2022-07-22 15:13 ` David Marchand ` (2 subsequent siblings) 4 siblings, 0 replies; 25+ messages in thread From: David Marchand @ 2022-07-22 14:00 UTC (permalink / raw) To: dev, Maxime Coquelin; +Cc: Chenbo Xia On Fri, Jul 22, 2022 at 3:54 PM David Marchand <david.marchand@redhat.com> wrote: > > Having a back reference to the index of the vq in the dev->virtqueue[] > array makes it possible to unify the internal API with only passing dev > and vq. > It also allows displaying the vq index in log messages. Fwiw, this conflicts with the lock annotations I intend to finish for 22.11. -- David Marchand ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/2] vhost: keep a reference to virtqueue index 2022-07-22 13:53 [PATCH 1/2] vhost: keep a reference to virtqueue index David Marchand 2022-07-22 13:53 ` [PATCH 2/2] vhost: stop using mempool for IOTLB David Marchand 2022-07-22 14:00 ` [PATCH 1/2] vhost: keep a reference to virtqueue index David Marchand @ 2022-07-22 15:13 ` David Marchand 2022-07-25 7:11 ` [PATCH v2 " David Marchand 2022-07-25 20:32 ` [PATCH v3 0/4] vHost IOTLB cache rework David Marchand 4 siblings, 0 replies; 25+ messages in thread From: David Marchand @ 2022-07-22 15:13 UTC (permalink / raw) To: dev; +Cc: Maxime Coquelin, Chenbo Xia On Fri, Jul 22, 2022 at 3:54 PM David Marchand <david.marchand@redhat.com> wrote: > @@ -607,32 +594,20 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx) > #endif > vq->numa_node = numa_node; > > - vhost_user_iotlb_init(dev, vring_idx); > + vhost_user_iotlb_init(dev, vq); Erm, it won't fly if the index is cleared at this point... I'll send a v2 next week. > } > -- David Marchand ^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH v2 1/2] vhost: keep a reference to virtqueue index 2022-07-22 13:53 [PATCH 1/2] vhost: keep a reference to virtqueue index David Marchand ` (2 preceding siblings ...) 2022-07-22 15:13 ` David Marchand @ 2022-07-25 7:11 ` David Marchand 2022-07-25 7:11 ` [PATCH v2 2/2] vhost: stop using mempool for IOTLB cache David Marchand 2022-07-25 20:32 ` [PATCH v3 0/4] vHost IOTLB cache rework David Marchand 4 siblings, 1 reply; 25+ messages in thread From: David Marchand @ 2022-07-25 7:11 UTC (permalink / raw) To: dev; +Cc: Maxime Coquelin, Chenbo Xia Having a back reference to the index of the vq in the dev->virtqueue[] array makes it possible to unify the internal API, with only passing dev and vq. It also allows displaying the vq index in log messages. Remove virtqueue index checks where unneeded (like in static helpers called from a loop on all available virtqueue). Move virtqueue index validity checks the sooner possible. Signed-off-by: David Marchand <david.marchand@redhat.com> --- Changes since v1: - fix vq init (that's what happens when only recompiling the vhost library and not relinking testpmd...), --- lib/vhost/iotlb.c | 5 +-- lib/vhost/iotlb.h | 2 +- lib/vhost/vhost.c | 72 +++++++++++++---------------------- lib/vhost/vhost.h | 3 ++ lib/vhost/vhost_user.c | 58 +++++++++++++--------------- lib/vhost/virtio_net.c | 86 +++++++++++++++++++----------------------- 6 files changed, 96 insertions(+), 130 deletions(-) diff --git a/lib/vhost/iotlb.c b/lib/vhost/iotlb.c index 35b4193606..dd35338ec0 100644 --- a/lib/vhost/iotlb.c +++ b/lib/vhost/iotlb.c @@ -293,10 +293,9 @@ vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq) } int -vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) +vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) { char pool_name[RTE_MEMPOOL_NAMESIZE]; - struct vhost_virtqueue *vq = dev->virtqueue[vq_index]; int socket = 0; if (vq->iotlb_pool) { @@ -319,7 +318,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) TAILQ_INIT(&vq->iotlb_pending_list); snprintf(pool_name, sizeof(pool_name), "iotlb_%u_%d_%d", - getpid(), dev->vid, vq_index); + getpid(), dev->vid, vq->index); VHOST_LOG_CONFIG(dev->ifname, DEBUG, "IOTLB cache name: %s\n", pool_name); /* If already created, free it and recreate */ diff --git a/lib/vhost/iotlb.h b/lib/vhost/iotlb.h index 8d0ff7473b..738e31e7b9 100644 --- a/lib/vhost/iotlb.h +++ b/lib/vhost/iotlb.h @@ -47,6 +47,6 @@ void vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqu void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova, uint64_t size, uint8_t perm); void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq); -int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index); +int vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq); #endif /* _VHOST_IOTLB_H_ */ diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c index 60cb05a0ff..1b17233652 100644 --- a/lib/vhost/vhost.c +++ b/lib/vhost/vhost.c @@ -575,25 +575,14 @@ vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq) } static void -init_vring_queue(struct virtio_net *dev, uint32_t vring_idx) +init_vring_queue(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint32_t vring_idx) { - struct vhost_virtqueue *vq; int numa_node = SOCKET_ID_ANY; - if (vring_idx >= VHOST_MAX_VRING) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to init vring, out of bound (%d)\n", - vring_idx); - return; - } - - vq = dev->virtqueue[vring_idx]; - if (!vq) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "virtqueue not allocated (%d)\n", vring_idx); - return; - } - memset(vq, 0, sizeof(struct vhost_virtqueue)); + vq->index = vring_idx; vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF; @@ -607,31 +596,16 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx) #endif vq->numa_node = numa_node; - vhost_user_iotlb_init(dev, vring_idx); + vhost_user_iotlb_init(dev, vq); } static void -reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx) +reset_vring_queue(struct virtio_net *dev, struct vhost_virtqueue *vq) { - struct vhost_virtqueue *vq; int callfd; - if (vring_idx >= VHOST_MAX_VRING) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to reset vring, out of bound (%d)\n", - vring_idx); - return; - } - - vq = dev->virtqueue[vring_idx]; - if (!vq) { - VHOST_LOG_CONFIG(dev->ifname, ERR, - "failed to reset vring, virtqueue not allocated (%d)\n", - vring_idx); - return; - } - callfd = vq->callfd; - init_vring_queue(dev, vring_idx); + init_vring_queue(dev, vq, vq->index); vq->callfd = callfd; } @@ -655,7 +629,7 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx) } dev->virtqueue[i] = vq; - init_vring_queue(dev, i); + init_vring_queue(dev, vq, i); rte_spinlock_init(&vq->access_lock); vq->avail_wrap_counter = 1; vq->used_wrap_counter = 1; @@ -681,8 +655,16 @@ reset_device(struct virtio_net *dev) dev->protocol_features = 0; dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET; - for (i = 0; i < dev->nr_vring; i++) - reset_vring_queue(dev, i); + for (i = 0; i < dev->nr_vring; i++) { + struct vhost_virtqueue *vq = dev->virtqueue[i]; + + if (!vq) { + VHOST_LOG_CONFIG(dev->ifname, ERR, + "failed to reset vring, virtqueue not allocated (%d)\n", i); + continue; + } + reset_vring_queue(dev, vq); + } } /* @@ -1661,17 +1643,15 @@ rte_vhost_extern_callback_register(int vid, } static __rte_always_inline int -async_channel_register(int vid, uint16_t queue_id) +async_channel_register(struct virtio_net *dev, struct vhost_virtqueue *vq) { - struct virtio_net *dev = get_device(vid); - struct vhost_virtqueue *vq = dev->virtqueue[queue_id]; struct vhost_async *async; int node = vq->numa_node; if (unlikely(vq->async)) { VHOST_LOG_CONFIG(dev->ifname, ERR, "async register failed: already registered (qid: %d)\n", - queue_id); + vq->index); return -1; } @@ -1679,7 +1659,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async metadata (qid: %d)\n", - queue_id); + vq->index); return -1; } @@ -1688,7 +1668,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async->pkts_info) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async_pkts_info (qid: %d)\n", - queue_id); + vq->index); goto out_free_async; } @@ -1697,7 +1677,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async->pkts_cmpl_flag) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async pkts_cmpl_flag (qid: %d)\n", - queue_id); + vq->index); goto out_free_async; } @@ -1708,7 +1688,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async->buffers_packed) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async buffers (qid: %d)\n", - queue_id); + vq->index); goto out_free_inflight; } } else { @@ -1718,7 +1698,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async->descs_split) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async descs (qid: %d)\n", - queue_id); + vq->index); goto out_free_inflight; } } @@ -1753,7 +1733,7 @@ rte_vhost_async_channel_register(int vid, uint16_t queue_id) return -1; rte_spinlock_lock(&vq->access_lock); - ret = async_channel_register(vid, queue_id); + ret = async_channel_register(dev, vq); rte_spinlock_unlock(&vq->access_lock); return ret; @@ -1782,7 +1762,7 @@ rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id) return -1; } - return async_channel_register(vid, queue_id); + return async_channel_register(dev, vq); } int diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index 40fac3b7c6..c6260b54cc 100644 --- a/lib/vhost/vhost.h +++ b/lib/vhost/vhost.h @@ -309,6 +309,9 @@ struct vhost_virtqueue { /* Currently unused as polling mode is enabled */ int kickfd; + /* Index of this vq in dev->virtqueue[] */ + uint32_t index; + /* inflight share memory info */ union { struct rte_vhost_inflight_info_split *inflight_split; diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 4ad28bac45..73e69450fd 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -240,22 +240,20 @@ vhost_backend_cleanup(struct virtio_net *dev) } static void -vhost_user_notify_queue_state(struct virtio_net *dev, uint16_t index, - int enable) +vhost_user_notify_queue_state(struct virtio_net *dev, struct vhost_virtqueue *vq, + int enable) { struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev; - struct vhost_virtqueue *vq = dev->virtqueue[index]; /* Configure guest notifications on enable */ if (enable && vq->notif_enable != VIRTIO_UNINITIALIZED_NOTIF) vhost_enable_guest_notification(dev, vq, vq->notif_enable); if (vdpa_dev && vdpa_dev->ops->set_vring_state) - vdpa_dev->ops->set_vring_state(dev->vid, index, enable); + vdpa_dev->ops->set_vring_state(dev->vid, vq->index, enable); if (dev->notify_ops->vring_state_changed) - dev->notify_ops->vring_state_changed(dev->vid, - index, enable); + dev->notify_ops->vring_state_changed(dev->vid, vq->index, enable); } /* @@ -493,12 +491,11 @@ vhost_user_set_vring_num(struct virtio_net **pdev, * make them on the same numa node as the memory of vring descriptor. */ #ifdef RTE_LIBRTE_VHOST_NUMA -static struct virtio_net* -numa_realloc(struct virtio_net *dev, int index) +static struct virtio_net * +numa_realloc(struct virtio_net *dev, struct vhost_virtqueue *vq) { int node, dev_node; struct virtio_net *old_dev; - struct vhost_virtqueue *vq; struct batch_copy_elem *bce; struct guest_page *gp; struct rte_vhost_memory *mem; @@ -506,7 +503,6 @@ numa_realloc(struct virtio_net *dev, int index) int ret; old_dev = dev; - vq = dev->virtqueue[index]; /* * If VQ is ready, it is too late to reallocate, it certainly already @@ -519,7 +515,7 @@ numa_realloc(struct virtio_net *dev, int index) if (ret) { VHOST_LOG_CONFIG(dev->ifname, ERR, "unable to get virtqueue %d numa information.\n", - index); + vq->index); return dev; } @@ -530,14 +526,14 @@ numa_realloc(struct virtio_net *dev, int index) if (!vq) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to realloc virtqueue %d on node %d\n", - index, node); + vq->index, node); return dev; } - if (vq != dev->virtqueue[index]) { + if (vq != dev->virtqueue[vq->index]) { VHOST_LOG_CONFIG(dev->ifname, INFO, "reallocated virtqueue on node %d\n", node); - dev->virtqueue[index] = vq; - vhost_user_iotlb_init(dev, index); + dev->virtqueue[vq->index] = vq; + vhost_user_iotlb_init(dev, vq); } if (vq_is_packed(dev)) { @@ -665,8 +661,8 @@ numa_realloc(struct virtio_net *dev, int index) return dev; } #else -static struct virtio_net* -numa_realloc(struct virtio_net *dev, int index __rte_unused) +static struct virtio_net * +numa_realloc(struct virtio_net *dev, struct vhost_virtqueue *vq __rte_unused) { return dev; } @@ -739,9 +735,8 @@ log_addr_to_gpa(struct virtio_net *dev, struct vhost_virtqueue *vq) } static struct virtio_net * -translate_ring_addresses(struct virtio_net *dev, int vq_index) +translate_ring_addresses(struct virtio_net *dev, struct vhost_virtqueue *vq) { - struct vhost_virtqueue *vq = dev->virtqueue[vq_index]; struct vhost_vring_addr *addr = &vq->ring_addrs; uint64_t len, expected_len; @@ -765,8 +760,8 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) return dev; } - dev = numa_realloc(dev, vq_index); - vq = dev->virtqueue[vq_index]; + dev = numa_realloc(dev, vq); + vq = dev->virtqueue[vq->index]; addr = &vq->ring_addrs; len = sizeof(struct vring_packed_desc_event); @@ -807,8 +802,8 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) return dev; } - dev = numa_realloc(dev, vq_index); - vq = dev->virtqueue[vq_index]; + dev = numa_realloc(dev, vq); + vq = dev->virtqueue[vq->index]; addr = &vq->ring_addrs; len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size; @@ -887,7 +882,7 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, if ((vq->enabled && (dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) || access_ok) { - dev = translate_ring_addresses(dev, ctx->msg.payload.addr.index); + dev = translate_ring_addresses(dev, vq); if (!dev) return RTE_VHOST_MSG_RESULT_ERR; @@ -1396,7 +1391,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, */ vring_invalidate(dev, vq); - dev = translate_ring_addresses(dev, i); + dev = translate_ring_addresses(dev, vq); if (!dev) { dev = *pdev; goto free_mem_table; @@ -1781,7 +1776,7 @@ vhost_user_set_vring_call(struct virtio_net **pdev, if (vq->ready) { vq->ready = false; - vhost_user_notify_queue_state(dev, file.index, 0); + vhost_user_notify_queue_state(dev, vq, 0); } if (vq->callfd >= 0) @@ -2029,7 +2024,8 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, file.index, file.fd); /* Interpret ring addresses only when ring is started. */ - dev = translate_ring_addresses(dev, file.index); + vq = dev->virtqueue[file.index]; + dev = translate_ring_addresses(dev, vq); if (!dev) { if (file.fd != VIRTIO_INVALID_EVENTFD) close(file.fd); @@ -2039,8 +2035,6 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, *pdev = dev; - vq = dev->virtqueue[file.index]; - /* * When VHOST_USER_F_PROTOCOL_FEATURES is not negotiated, * the ring starts already enabled. Otherwise, it is enabled via @@ -2052,7 +2046,7 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, if (vq->ready) { vq->ready = false; - vhost_user_notify_queue_state(dev, file.index, 0); + vhost_user_notify_queue_state(dev, vq, 0); } if (vq->kickfd >= 0) @@ -2595,7 +2589,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, if (is_vring_iotlb(dev, vq, imsg)) { rte_spinlock_lock(&vq->access_lock); - *pdev = dev = translate_ring_addresses(dev, i); + *pdev = dev = translate_ring_addresses(dev, vq); rte_spinlock_unlock(&vq->access_lock); } } @@ -3159,7 +3153,7 @@ vhost_user_msg_handler(int vid, int fd) if (cur_ready != (vq && vq->ready)) { vq->ready = cur_ready; - vhost_user_notify_queue_state(dev, i, cur_ready); + vhost_user_notify_queue_state(dev, vq, cur_ready); } } diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index 35fa4670fd..467dfb203f 100644 --- a/lib/vhost/virtio_net.c +++ b/lib/vhost/virtio_net.c @@ -1555,22 +1555,12 @@ virtio_dev_rx_packed(struct virtio_net *dev, } static __rte_always_inline uint32_t -virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, +virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf **pkts, uint32_t count) { - struct vhost_virtqueue *vq; uint32_t nb_tx = 0; VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { - VHOST_LOG_DATA(dev->ifname, ERR, - "%s: invalid virtqueue idx %d.\n", - __func__, queue_id); - return 0; - } - - vq = dev->virtqueue[queue_id]; - rte_spinlock_lock(&vq->access_lock); if (unlikely(!vq->enabled)) @@ -1620,7 +1610,14 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id, return 0; } - return virtio_dev_rx(dev, queue_id, pkts, count); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { + VHOST_LOG_DATA(dev->ifname, ERR, + "%s: invalid virtqueue idx %d.\n", + __func__, queue_id); + return 0; + } + + return virtio_dev_rx(dev, dev->virtqueue[queue_id], pkts, count); } static __rte_always_inline uint16_t @@ -1669,8 +1666,7 @@ store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring, static __rte_noinline uint32_t virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count, - int16_t dma_id, uint16_t vchan_id) + struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) { struct buf_vector buf_vec[BUF_VECTOR_MAX]; uint32_t pkt_idx = 0; @@ -1732,7 +1728,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: failed to transfer %u packets for queue %u.\n", - __func__, pkt_err, queue_id); + __func__, pkt_err, vq->index); /* update number of completed packets */ pkt_idx = n_xfer; @@ -1878,8 +1874,7 @@ dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx, static __rte_noinline uint32_t virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count, - int16_t dma_id, uint16_t vchan_id) + struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) { uint32_t pkt_idx = 0; uint32_t remained = count; @@ -1924,7 +1919,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue if (unlikely(pkt_err)) { VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: failed to transfer %u packets for queue %u.\n", - __func__, pkt_err, queue_id); + __func__, pkt_err, vq->index); dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx); } @@ -2045,11 +2040,9 @@ write_back_completed_descs_packed(struct vhost_virtqueue *vq, } static __rte_always_inline uint16_t -vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id, - struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, - uint16_t vchan_id) +vhost_poll_enqueue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, uint16_t vchan_id) { - struct vhost_virtqueue *vq = dev->virtqueue[queue_id]; struct vhost_async *async = vq->async; struct async_inflight_info *pkts_info = async->pkts_info; uint16_t nr_cpl_pkts = 0; @@ -2156,7 +2149,7 @@ rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, goto out; } - n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count, dma_id, vchan_id); + n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, dma_id, vchan_id); vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); vq->stats.inflight_completed += n_pkts_cpl; @@ -2216,12 +2209,11 @@ rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id, } if ((queue_id & 1) == 0) - n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, - pkts, count, dma_id, vchan_id); - else { + n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, + dma_id, vchan_id); + else n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, - dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); - } + dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); vq->stats.inflight_completed += n_pkts_cpl; @@ -2275,12 +2267,11 @@ rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, } if ((queue_id & 1) == 0) - n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, - pkts, count, dma_id, vchan_id); - else { + n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, + dma_id, vchan_id); + else n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, - dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); - } + dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); vq->stats.inflight_completed += n_pkts_cpl; @@ -2292,19 +2283,12 @@ rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, } static __rte_always_inline uint32_t -virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, +virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) { - struct vhost_virtqueue *vq; uint32_t nb_tx = 0; VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { - VHOST_LOG_DATA(dev->ifname, ERR, - "%s: invalid virtqueue idx %d.\n", - __func__, queue_id); - return 0; - } if (unlikely(!dma_copy_track[dma_id].vchans || !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { @@ -2314,8 +2298,6 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, return 0; } - vq = dev->virtqueue[queue_id]; - rte_spinlock_lock(&vq->access_lock); if (unlikely(!vq->enabled || !vq->async)) @@ -2333,11 +2315,11 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, goto out; if (vq_is_packed(dev)) - nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, queue_id, - pkts, count, dma_id, vchan_id); + nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, pkts, count, + dma_id, vchan_id); else - nb_tx = virtio_dev_rx_async_submit_split(dev, vq, queue_id, - pkts, count, dma_id, vchan_id); + nb_tx = virtio_dev_rx_async_submit_split(dev, vq, pkts, count, + dma_id, vchan_id); vq->stats.inflight_submitted += nb_tx; @@ -2368,7 +2350,15 @@ rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id, return 0; } - return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, dma_id, vchan_id); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { + VHOST_LOG_DATA(dev->ifname, ERR, + "%s: invalid virtqueue idx %d.\n", + __func__, queue_id); + return 0; + } + + return virtio_dev_rx_async_submit(dev, dev->virtqueue[queue_id], pkts, count, + dma_id, vchan_id); } static inline bool -- 2.36.1 ^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH v2 2/2] vhost: stop using mempool for IOTLB cache 2022-07-25 7:11 ` [PATCH v2 " David Marchand @ 2022-07-25 7:11 ` David Marchand 0 siblings, 0 replies; 25+ messages in thread From: David Marchand @ 2022-07-25 7:11 UTC (permalink / raw) To: dev; +Cc: Maxime Coquelin, Chenbo Xia A mempool consumes 3 memzones (with the default ring mempool driver). The default DPDK configuration allows RTE_MAX_MEMZONE (2560) memzones. Assuming there is no other memzones that means that we can have a maximum of 853 mempools. In the vhost library, the IOTLB cache code so far was requesting a mempool per vq, which means that at the maximum, the vhost library could request mempools for 426 qps. This limit was recently reached on big systems with a lot of virtio ports (and multiqueue in use). While the limit on mempool count could be something we fix at the DPDK project level, there is no reason to use mempools for the IOTLB cache: - the IOTLB cache entries do not need to be DMA-able and are only used by the current process (in multiprocess context), - getting/putting objects from/in the mempool is always associated with some other locks, so some level of lock contention is already present, We can convert to a malloc'd pool with objects put in a free list protected by a spinlock. Signed-off-by: David Marchand <david.marchand@redhat.com> --- lib/vhost/iotlb.c | 102 ++++++++++++++++++++++++++++------------------ lib/vhost/iotlb.h | 1 + lib/vhost/vhost.c | 2 +- lib/vhost/vhost.h | 4 +- 4 files changed, 67 insertions(+), 42 deletions(-) diff --git a/lib/vhost/iotlb.c b/lib/vhost/iotlb.c index dd35338ec0..2a78929e78 100644 --- a/lib/vhost/iotlb.c +++ b/lib/vhost/iotlb.c @@ -13,6 +13,7 @@ struct vhost_iotlb_entry { TAILQ_ENTRY(vhost_iotlb_entry) next; + SLIST_ENTRY(vhost_iotlb_entry) next_free; uint64_t iova; uint64_t uaddr; @@ -22,6 +23,28 @@ struct vhost_iotlb_entry { #define IOTLB_CACHE_SIZE 2048 +static struct vhost_iotlb_entry * +vhost_user_iotlb_pool_get(struct vhost_virtqueue *vq) +{ + struct vhost_iotlb_entry *node; + + rte_spinlock_lock(&vq->iotlb_free_lock); + node = SLIST_FIRST(&vq->iotlb_free_list); + if (node != NULL) + SLIST_REMOVE_HEAD(&vq->iotlb_free_list, next_free); + rte_spinlock_unlock(&vq->iotlb_free_lock); + return node; +} + +static void +vhost_user_iotlb_pool_put(struct vhost_virtqueue *vq, + struct vhost_iotlb_entry *node) +{ + rte_spinlock_lock(&vq->iotlb_free_lock); + SLIST_INSERT_HEAD(&vq->iotlb_free_list, node, next_free); + rte_spinlock_unlock(&vq->iotlb_free_lock); +} + static void vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq); @@ -34,7 +57,7 @@ vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq) RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) { TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); } rte_rwlock_write_unlock(&vq->iotlb_pending_lock); @@ -66,22 +89,21 @@ vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqueue * uint64_t iova, uint8_t perm) { struct vhost_iotlb_entry *node; - int ret; - ret = rte_mempool_get(vq->iotlb_pool, (void **)&node); - if (ret) { + node = vhost_user_iotlb_pool_get(vq); + if (node == NULL) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, - "IOTLB pool %s empty, clear entries for pending insertion\n", - vq->iotlb_pool->name); + "IOTLB pool for vq %"PRIu32" empty, clear entries for pending insertion\n", + vq->index); if (!TAILQ_EMPTY(&vq->iotlb_pending_list)) vhost_user_iotlb_pending_remove_all(vq); else vhost_user_iotlb_cache_random_evict(vq); - ret = rte_mempool_get(vq->iotlb_pool, (void **)&node); - if (ret) { + node = vhost_user_iotlb_pool_get(vq); + if (node == NULL) { VHOST_LOG_CONFIG(dev->ifname, ERR, - "IOTLB pool %s still empty, pending insertion failure\n", - vq->iotlb_pool->name); + "IOTLB pool vq %"PRIu32" still empty, pending insertion failure\n", + vq->index); return; } } @@ -113,7 +135,7 @@ vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, if ((node->perm & perm) != node->perm) continue; TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); } rte_rwlock_write_unlock(&vq->iotlb_pending_lock); @@ -128,7 +150,7 @@ vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq) RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { TAILQ_REMOVE(&vq->iotlb_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); } vq->iotlb_cache_nr = 0; @@ -149,7 +171,7 @@ vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq) RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { if (!entry_idx) { TAILQ_REMOVE(&vq->iotlb_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); vq->iotlb_cache_nr--; break; } @@ -165,22 +187,21 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq uint64_t size, uint8_t perm) { struct vhost_iotlb_entry *node, *new_node; - int ret; - ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node); - if (ret) { + new_node = vhost_user_iotlb_pool_get(vq); + if (new_node == NULL) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, - "IOTLB pool %s empty, clear entries for cache insertion\n", - vq->iotlb_pool->name); + "IOTLB pool vq %"PRIu32" empty, clear entries for cache insertion\n", + vq->index); if (!TAILQ_EMPTY(&vq->iotlb_list)) vhost_user_iotlb_cache_random_evict(vq); else vhost_user_iotlb_pending_remove_all(vq); - ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node); - if (ret) { + new_node = vhost_user_iotlb_pool_get(vq); + if (new_node == NULL) { VHOST_LOG_CONFIG(dev->ifname, ERR, - "IOTLB pool %s still empty, cache insertion failed\n", - vq->iotlb_pool->name); + "IOTLB pool vq %"PRIu32" still empty, cache insertion failed\n", + vq->index); return; } } @@ -198,7 +219,7 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq * So if iova already in list, assume identical. */ if (node->iova == new_node->iova) { - rte_mempool_put(vq->iotlb_pool, new_node); + vhost_user_iotlb_pool_put(vq, new_node); goto unlock; } else if (node->iova > new_node->iova) { TAILQ_INSERT_BEFORE(node, new_node, next); @@ -235,7 +256,7 @@ vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq, if (iova < node->iova + node->size) { TAILQ_REMOVE(&vq->iotlb_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); vq->iotlb_cache_nr--; } } @@ -295,7 +316,7 @@ vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq) int vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) { - char pool_name[RTE_MEMPOOL_NAMESIZE]; + unsigned int i; int socket = 0; if (vq->iotlb_pool) { @@ -304,6 +325,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) * just drop all cached and pending entries. */ vhost_user_iotlb_flush_all(vq); + rte_free(vq->iotlb_pool); } #ifdef RTE_LIBRTE_VHOST_NUMA @@ -311,32 +333,32 @@ vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) socket = 0; #endif + rte_spinlock_init(&vq->iotlb_free_lock); rte_rwlock_init(&vq->iotlb_lock); rte_rwlock_init(&vq->iotlb_pending_lock); + SLIST_INIT(&vq->iotlb_free_list); TAILQ_INIT(&vq->iotlb_list); TAILQ_INIT(&vq->iotlb_pending_list); - snprintf(pool_name, sizeof(pool_name), "iotlb_%u_%d_%d", - getpid(), dev->vid, vq->index); - VHOST_LOG_CONFIG(dev->ifname, DEBUG, "IOTLB cache name: %s\n", pool_name); - - /* If already created, free it and recreate */ - vq->iotlb_pool = rte_mempool_lookup(pool_name); - rte_mempool_free(vq->iotlb_pool); - - vq->iotlb_pool = rte_mempool_create(pool_name, - IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0, - 0, 0, NULL, NULL, NULL, socket, - RTE_MEMPOOL_F_NO_CACHE_ALIGN | - RTE_MEMPOOL_F_SP_PUT); + vq->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE, + sizeof(struct vhost_iotlb_entry), 0, socket); if (!vq->iotlb_pool) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "Failed to create IOTLB cache pool %s\n", - pool_name); + VHOST_LOG_CONFIG(dev->ifname, ERR, + "Failed to create IOTLB cache pool for vq %"PRIu32"\n", + vq->index); return -1; } + for (i = 0; i < IOTLB_CACHE_SIZE; i++) + vhost_user_iotlb_pool_put(vq, &vq->iotlb_pool[i]); vq->iotlb_cache_nr = 0; return 0; } + +void +vhost_user_iotlb_destroy(struct vhost_virtqueue *vq) +{ + rte_free(vq->iotlb_pool); +} diff --git a/lib/vhost/iotlb.h b/lib/vhost/iotlb.h index 738e31e7b9..e27ebebcf5 100644 --- a/lib/vhost/iotlb.h +++ b/lib/vhost/iotlb.h @@ -48,5 +48,6 @@ void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova, uint64_t size, uint8_t perm); void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq); int vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq); +void vhost_user_iotlb_destroy(struct vhost_virtqueue *vq); #endif /* _VHOST_IOTLB_H_ */ diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c index 1b17233652..aa671f47a3 100644 --- a/lib/vhost/vhost.c +++ b/lib/vhost/vhost.c @@ -394,7 +394,7 @@ free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq) vhost_free_async_mem(vq); rte_free(vq->batch_copy_elems); - rte_mempool_free(vq->iotlb_pool); + vhost_user_iotlb_destroy(vq); rte_free(vq->log_cache); rte_free(vq); } diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index c6260b54cc..782d916ae0 100644 --- a/lib/vhost/vhost.h +++ b/lib/vhost/vhost.h @@ -299,10 +299,12 @@ struct vhost_virtqueue { rte_rwlock_t iotlb_lock; rte_rwlock_t iotlb_pending_lock; - struct rte_mempool *iotlb_pool; + struct vhost_iotlb_entry *iotlb_pool; TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list; TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list; int iotlb_cache_nr; + rte_spinlock_t iotlb_free_lock; + SLIST_HEAD(, vhost_iotlb_entry) iotlb_free_list; /* Used to notify the guest (trigger interrupt) */ int callfd; -- 2.36.1 ^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH v3 0/4] vHost IOTLB cache rework 2022-07-22 13:53 [PATCH 1/2] vhost: keep a reference to virtqueue index David Marchand ` (3 preceding siblings ...) 2022-07-25 7:11 ` [PATCH v2 " David Marchand @ 2022-07-25 20:32 ` David Marchand 2022-07-25 20:32 ` [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation David Marchand ` (4 more replies) 4 siblings, 5 replies; 25+ messages in thread From: David Marchand @ 2022-07-25 20:32 UTC (permalink / raw) To: dev This series aim was initially to solve a scalability issue with the IOTLB cache code. But along the way, I tried to make the code a bit more robust by unifying how the device and virtqueue are passed around. -- David Marchand David Marchand (4): vhost: fix vq use after free on numa reallocation vhost: make numa reallocation code more robust vhost: keep a reference to virtqueue index vhost: stop using mempool for IOTLB cache lib/vhost/iotlb.c | 105 +++++++++++++++----------- lib/vhost/iotlb.h | 3 +- lib/vhost/vhost.c | 74 +++++++----------- lib/vhost/vhost.h | 7 +- lib/vhost/vhost_user.c | 167 +++++++++++++++++++---------------------- lib/vhost/virtio_net.c | 86 ++++++++++----------- 6 files changed, 212 insertions(+), 230 deletions(-) -- 2.36.1 ^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation 2022-07-25 20:32 ` [PATCH v3 0/4] vHost IOTLB cache rework David Marchand @ 2022-07-25 20:32 ` David Marchand 2022-07-26 7:55 ` Maxime Coquelin 2022-07-25 20:32 ` [PATCH v3 2/4] vhost: make NUMA reallocation code more robust David Marchand ` (3 subsequent siblings) 4 siblings, 1 reply; 25+ messages in thread From: David Marchand @ 2022-07-25 20:32 UTC (permalink / raw) To: dev; +Cc: stable, Maxime Coquelin, Chenbo Xia translate_ring_addresses (via numa_realloc) may change a virtio device and virtio queue. The virtqueue object must be refreshed before accessing the lock. Fixes: 04c27cb673b9 ("vhost: fix unsafe vring addresses modifications") Cc: stable@dpdk.org Signed-off-by: David Marchand <david.marchand@redhat.com> --- lib/vhost/vhost_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 4ad28bac45..91d40e32fc 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -2596,6 +2596,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, if (is_vring_iotlb(dev, vq, imsg)) { rte_spinlock_lock(&vq->access_lock); *pdev = dev = translate_ring_addresses(dev, i); + vq = dev->virtqueue[i]; rte_spinlock_unlock(&vq->access_lock); } } -- 2.36.1 ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation 2022-07-25 20:32 ` [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation David Marchand @ 2022-07-26 7:55 ` Maxime Coquelin 2022-09-13 15:02 ` Maxime Coquelin 0 siblings, 1 reply; 25+ messages in thread From: Maxime Coquelin @ 2022-07-26 7:55 UTC (permalink / raw) To: David Marchand, dev; +Cc: stable, Chenbo Xia On 7/25/22 22:32, David Marchand wrote: > translate_ring_addresses (via numa_realloc) may change a virtio device and > virtio queue. > The virtqueue object must be refreshed before accessing the lock. > > Fixes: 04c27cb673b9 ("vhost: fix unsafe vring addresses modifications") > Cc: stable@dpdk.org > > Signed-off-by: David Marchand <david.marchand@redhat.com> > --- > lib/vhost/vhost_user.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c > index 4ad28bac45..91d40e32fc 100644 > --- a/lib/vhost/vhost_user.c > +++ b/lib/vhost/vhost_user.c > @@ -2596,6 +2596,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, > if (is_vring_iotlb(dev, vq, imsg)) { > rte_spinlock_lock(&vq->access_lock); > *pdev = dev = translate_ring_addresses(dev, i); > + vq = dev->virtqueue[i]; > rte_spinlock_unlock(&vq->access_lock); > } > } Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Thanks, Maxime ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation 2022-07-26 7:55 ` Maxime Coquelin @ 2022-09-13 15:02 ` Maxime Coquelin 2022-09-14 1:05 ` Xia, Chenbo 0 siblings, 1 reply; 25+ messages in thread From: Maxime Coquelin @ 2022-09-13 15:02 UTC (permalink / raw) To: David Marchand, Chenbo Xia, Thomas Monjalon; +Cc: stable, dev Hi, On 7/26/22 09:55, Maxime Coquelin wrote: > > > On 7/25/22 22:32, David Marchand wrote: >> translate_ring_addresses (via numa_realloc) may change a virtio device >> and >> virtio queue. >> The virtqueue object must be refreshed before accessing the lock. >> >> Fixes: 04c27cb673b9 ("vhost: fix unsafe vring addresses modifications") >> Cc: stable@dpdk.org >> >> Signed-off-by: David Marchand <david.marchand@redhat.com> >> --- >> lib/vhost/vhost_user.c | 1 + >> 1 file changed, 1 insertion(+) >> >> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c >> index 4ad28bac45..91d40e32fc 100644 >> --- a/lib/vhost/vhost_user.c >> +++ b/lib/vhost/vhost_user.c >> @@ -2596,6 +2596,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, >> if (is_vring_iotlb(dev, vq, imsg)) { >> rte_spinlock_lock(&vq->access_lock); >> *pdev = dev = translate_ring_addresses(dev, i); >> + vq = dev->virtqueue[i]; >> rte_spinlock_unlock(&vq->access_lock); >> } >> } > > Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> > > Thanks, > Maxime The bug this patch is fixing is being reproduced downstream. It would be great it gets merged in main branch rapidly so that we can perform the backport. Chenbo, are you planning a pull request for vhost/virtio in the next few days? If not, should the main branch maintainer pick this single patch directly and let the rest of the series more time for reviews? Thanks, Maxime ^ permalink raw reply [flat|nested] 25+ messages in thread
* RE: [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation 2022-09-13 15:02 ` Maxime Coquelin @ 2022-09-14 1:05 ` Xia, Chenbo 2022-09-14 7:14 ` Maxime Coquelin 0 siblings, 1 reply; 25+ messages in thread From: Xia, Chenbo @ 2022-09-14 1:05 UTC (permalink / raw) To: Maxime Coquelin, David Marchand, Thomas Monjalon; +Cc: stable, dev Hi Maxime, > -----Original Message----- > From: Maxime Coquelin <maxime.coquelin@redhat.com> > Sent: Tuesday, September 13, 2022 11:03 PM > To: David Marchand <david.marchand@redhat.com>; Xia, Chenbo > <chenbo.xia@intel.com>; Thomas Monjalon <thomas@monjalon.net> > Cc: stable@dpdk.org; dev@dpdk.org > Subject: Re: [PATCH v3 1/4] vhost: fix vq use after free on NUMA > reallocation > > Hi, > > On 7/26/22 09:55, Maxime Coquelin wrote: > > > > > > On 7/25/22 22:32, David Marchand wrote: > >> translate_ring_addresses (via numa_realloc) may change a virtio device > >> and > >> virtio queue. > >> The virtqueue object must be refreshed before accessing the lock. > >> > >> Fixes: 04c27cb673b9 ("vhost: fix unsafe vring addresses modifications") > >> Cc: stable@dpdk.org > >> > >> Signed-off-by: David Marchand <david.marchand@redhat.com> > >> --- > >> lib/vhost/vhost_user.c | 1 + > >> 1 file changed, 1 insertion(+) > >> > >> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c > >> index 4ad28bac45..91d40e32fc 100644 > >> --- a/lib/vhost/vhost_user.c > >> +++ b/lib/vhost/vhost_user.c > >> @@ -2596,6 +2596,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, > >> if (is_vring_iotlb(dev, vq, imsg)) { > >> rte_spinlock_lock(&vq->access_lock); > >> *pdev = dev = translate_ring_addresses(dev, i); > >> + vq = dev->virtqueue[i]; > >> rte_spinlock_unlock(&vq->access_lock); > >> } > >> } > > > > Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> > > > > Thanks, > > Maxime > > The bug this patch is fixing is being reproduced downstream. > It would be great it gets merged in main branch rapidly so that we can > perform the backport. > > Chenbo, are you planning a pull request for vhost/virtio in the next few > days? If not, should the main branch maintainer pick this single patch > directly and let the rest of the series more time for reviews? Based on the status of all patches in the list, I guess PR will not happen this week. So it will be good if David/Thomas can directly pick up this. Thanks, Chenbo > > Thanks, > Maxime ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation 2022-09-14 1:05 ` Xia, Chenbo @ 2022-09-14 7:14 ` Maxime Coquelin 2022-09-14 9:15 ` Thomas Monjalon 0 siblings, 1 reply; 25+ messages in thread From: Maxime Coquelin @ 2022-09-14 7:14 UTC (permalink / raw) To: Xia, Chenbo, David Marchand, Thomas Monjalon; +Cc: stable, dev Hi Chenbo, On 9/14/22 03:05, Xia, Chenbo wrote: > Hi Maxime, > >> -----Original Message----- >> From: Maxime Coquelin <maxime.coquelin@redhat.com> >> Sent: Tuesday, September 13, 2022 11:03 PM >> To: David Marchand <david.marchand@redhat.com>; Xia, Chenbo >> <chenbo.xia@intel.com>; Thomas Monjalon <thomas@monjalon.net> >> Cc: stable@dpdk.org; dev@dpdk.org >> Subject: Re: [PATCH v3 1/4] vhost: fix vq use after free on NUMA >> reallocation >> >> Hi, >> >> On 7/26/22 09:55, Maxime Coquelin wrote: >>> >>> >>> On 7/25/22 22:32, David Marchand wrote: >>>> translate_ring_addresses (via numa_realloc) may change a virtio device >>>> and >>>> virtio queue. >>>> The virtqueue object must be refreshed before accessing the lock. >>>> >>>> Fixes: 04c27cb673b9 ("vhost: fix unsafe vring addresses modifications") >>>> Cc: stable@dpdk.org >>>> >>>> Signed-off-by: David Marchand <david.marchand@redhat.com> >>>> --- >>>> lib/vhost/vhost_user.c | 1 + >>>> 1 file changed, 1 insertion(+) >>>> >>>> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c >>>> index 4ad28bac45..91d40e32fc 100644 >>>> --- a/lib/vhost/vhost_user.c >>>> +++ b/lib/vhost/vhost_user.c >>>> @@ -2596,6 +2596,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, >>>> if (is_vring_iotlb(dev, vq, imsg)) { >>>> rte_spinlock_lock(&vq->access_lock); >>>> *pdev = dev = translate_ring_addresses(dev, i); >>>> + vq = dev->virtqueue[i]; >>>> rte_spinlock_unlock(&vq->access_lock); >>>> } >>>> } >>> >>> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> >>> >>> Thanks, >>> Maxime >> >> The bug this patch is fixing is being reproduced downstream. >> It would be great it gets merged in main branch rapidly so that we can >> perform the backport. >> >> Chenbo, are you planning a pull request for vhost/virtio in the next few >> days? If not, should the main branch maintainer pick this single patch >> directly and let the rest of the series more time for reviews? > > Based on the status of all patches in the list, I guess PR will not happen > this week. So it will be good if David/Thomas can directly pick up this. OK, sounds good to me. Thomas/David, is that good on your side? Thanks, Maxime > Thanks, > Chenbo > >> >> Thanks, >> Maxime > ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation 2022-09-14 7:14 ` Maxime Coquelin @ 2022-09-14 9:15 ` Thomas Monjalon 2022-09-14 9:34 ` Maxime Coquelin 0 siblings, 1 reply; 25+ messages in thread From: Thomas Monjalon @ 2022-09-14 9:15 UTC (permalink / raw) To: Xia, Chenbo, David Marchand, Maxime Coquelin; +Cc: stable, dev 14/09/2022 09:14, Maxime Coquelin: > On 9/14/22 03:05, Xia, Chenbo wrote: > > From: Maxime Coquelin <maxime.coquelin@redhat.com> > >> On 7/26/22 09:55, Maxime Coquelin wrote: > >> The bug this patch is fixing is being reproduced downstream. > >> It would be great it gets merged in main branch rapidly so that we can > >> perform the backport. > >> > >> Chenbo, are you planning a pull request for vhost/virtio in the next few > >> days? If not, should the main branch maintainer pick this single patch > >> directly and let the rest of the series more time for reviews? > > > > Based on the status of all patches in the list, I guess PR will not happen > > this week. So it will be good if David/Thomas can directly pick up this. > > OK, sounds good to me. > > Thomas/David, is that good on your side? Is there a blocker to merge the whole series? ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation 2022-09-14 9:15 ` Thomas Monjalon @ 2022-09-14 9:34 ` Maxime Coquelin 2022-09-14 9:45 ` Thomas Monjalon 0 siblings, 1 reply; 25+ messages in thread From: Maxime Coquelin @ 2022-09-14 9:34 UTC (permalink / raw) To: Thomas Monjalon, Xia, Chenbo, David Marchand; +Cc: stable, dev On 9/14/22 11:15, Thomas Monjalon wrote: > 14/09/2022 09:14, Maxime Coquelin: >> On 9/14/22 03:05, Xia, Chenbo wrote: >>> From: Maxime Coquelin <maxime.coquelin@redhat.com> >>>> On 7/26/22 09:55, Maxime Coquelin wrote: >>>> The bug this patch is fixing is being reproduced downstream. >>>> It would be great it gets merged in main branch rapidly so that we can >>>> perform the backport. >>>> >>>> Chenbo, are you planning a pull request for vhost/virtio in the next few >>>> days? If not, should the main branch maintainer pick this single patch >>>> directly and let the rest of the series more time for reviews? >>> >>> Based on the status of all patches in the list, I guess PR will not happen >>> this week. So it will be good if David/Thomas can directly pick up this. >> >> OK, sounds good to me. >> >> Thomas/David, is that good on your side? > > Is there a blocker to merge the whole series? No, I reviewed the whole series. But other patches are an enhancement, so it would not hurt to have more reviews. I am fine either way. Thanks, Maxime ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation 2022-09-14 9:34 ` Maxime Coquelin @ 2022-09-14 9:45 ` Thomas Monjalon 2022-09-14 11:48 ` Maxime Coquelin 0 siblings, 1 reply; 25+ messages in thread From: Thomas Monjalon @ 2022-09-14 9:45 UTC (permalink / raw) To: Xia, Chenbo, David Marchand, Maxime Coquelin; +Cc: stable, dev 14/09/2022 11:34, Maxime Coquelin: > > On 9/14/22 11:15, Thomas Monjalon wrote: > > 14/09/2022 09:14, Maxime Coquelin: > >> On 9/14/22 03:05, Xia, Chenbo wrote: > >>> From: Maxime Coquelin <maxime.coquelin@redhat.com> > >>>> On 7/26/22 09:55, Maxime Coquelin wrote: > >>>> The bug this patch is fixing is being reproduced downstream. > >>>> It would be great it gets merged in main branch rapidly so that we can > >>>> perform the backport. > >>>> > >>>> Chenbo, are you planning a pull request for vhost/virtio in the next few > >>>> days? If not, should the main branch maintainer pick this single patch > >>>> directly and let the rest of the series more time for reviews? > >>> > >>> Based on the status of all patches in the list, I guess PR will not happen > >>> this week. So it will be good if David/Thomas can directly pick up this. > >> > >> OK, sounds good to me. > >> > >> Thomas/David, is that good on your side? > > > > Is there a blocker to merge the whole series? > > No, I reviewed the whole series. > > But other patches are an enhancement, so it would not hurt to have more > reviews. > > I am fine either way. I prefer not breaking series. How urgent is it? Should I merge it today? ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation 2022-09-14 9:45 ` Thomas Monjalon @ 2022-09-14 11:48 ` Maxime Coquelin 0 siblings, 0 replies; 25+ messages in thread From: Maxime Coquelin @ 2022-09-14 11:48 UTC (permalink / raw) To: Thomas Monjalon, Xia, Chenbo, David Marchand; +Cc: stable, dev On 9/14/22 11:45, Thomas Monjalon wrote: > 14/09/2022 11:34, Maxime Coquelin: >> >> On 9/14/22 11:15, Thomas Monjalon wrote: >>> 14/09/2022 09:14, Maxime Coquelin: >>>> On 9/14/22 03:05, Xia, Chenbo wrote: >>>>> From: Maxime Coquelin <maxime.coquelin@redhat.com> >>>>>> On 7/26/22 09:55, Maxime Coquelin wrote: >>>>>> The bug this patch is fixing is being reproduced downstream. >>>>>> It would be great it gets merged in main branch rapidly so that we can >>>>>> perform the backport. >>>>>> >>>>>> Chenbo, are you planning a pull request for vhost/virtio in the next few >>>>>> days? If not, should the main branch maintainer pick this single patch >>>>>> directly and let the rest of the series more time for reviews? >>>>> >>>>> Based on the status of all patches in the list, I guess PR will not happen >>>>> this week. So it will be good if David/Thomas can directly pick up this. >>>> >>>> OK, sounds good to me. >>>> >>>> Thomas/David, is that good on your side? >>> >>> Is there a blocker to merge the whole series? >> >> No, I reviewed the whole series. >> >> But other patches are an enhancement, so it would not hurt to have more >> reviews. >> >> I am fine either way. > > I prefer not breaking series. > How urgent is it? Should I merge it today? > > It causes failures in some of our QE tests, so it is urgent for us. If you prefer not breaking the series, I am fine if you take it all. Thanks, Maxime ^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH v3 2/4] vhost: make NUMA reallocation code more robust 2022-07-25 20:32 ` [PATCH v3 0/4] vHost IOTLB cache rework David Marchand 2022-07-25 20:32 ` [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation David Marchand @ 2022-07-25 20:32 ` David Marchand 2022-07-26 8:39 ` Maxime Coquelin 2022-07-25 20:32 ` [PATCH v3 3/4] vhost: keep a reference to virtqueue index David Marchand ` (2 subsequent siblings) 4 siblings, 1 reply; 25+ messages in thread From: David Marchand @ 2022-07-25 20:32 UTC (permalink / raw) To: dev; +Cc: Maxime Coquelin, Chenbo Xia translate_ring_addresses and numa_realloc may change a virtio device and virtio queue. Callers of those helpers must be extra careful and refresh any reference to old data. Change those functions prototype as a way to hint about this issue and always ask for an indirect pointer. Besides, when reallocating the device and queue, the code already made sure it will return a pointer to a valid device. The checks on such returned pointer can be removed. Signed-off-by: David Marchand <david.marchand@redhat.com> --- lib/vhost/vhost_user.c | 144 +++++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 78 deletions(-) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 91d40e32fc..46d4a02c1e 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -493,11 +493,11 @@ vhost_user_set_vring_num(struct virtio_net **pdev, * make them on the same numa node as the memory of vring descriptor. */ #ifdef RTE_LIBRTE_VHOST_NUMA -static struct virtio_net* -numa_realloc(struct virtio_net *dev, int index) +static void +numa_realloc(struct virtio_net **pdev, struct vhost_virtqueue **pvq, int index) { int node, dev_node; - struct virtio_net *old_dev; + struct virtio_net *dev; struct vhost_virtqueue *vq; struct batch_copy_elem *bce; struct guest_page *gp; @@ -505,34 +505,35 @@ numa_realloc(struct virtio_net *dev, int index) size_t mem_size; int ret; - old_dev = dev; - vq = dev->virtqueue[index]; + dev = *pdev; + vq = *pvq; /* * If VQ is ready, it is too late to reallocate, it certainly already * happened anyway on VHOST_USER_SET_VRING_ADRR. */ if (vq->ready) - return dev; + return; ret = get_mempolicy(&node, NULL, 0, vq->desc, MPOL_F_NODE | MPOL_F_ADDR); if (ret) { VHOST_LOG_CONFIG(dev->ifname, ERR, "unable to get virtqueue %d numa information.\n", index); - return dev; + return; } if (node == vq->numa_node) goto out_dev_realloc; - vq = rte_realloc_socket(vq, sizeof(*vq), 0, node); + vq = rte_realloc_socket(*pvq, sizeof(**pvq), 0, node); if (!vq) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to realloc virtqueue %d on node %d\n", index, node); - return dev; + return; } + *pvq = vq; if (vq != dev->virtqueue[index]) { VHOST_LOG_CONFIG(dev->ifname, INFO, "reallocated virtqueue on node %d\n", node); @@ -549,7 +550,7 @@ numa_realloc(struct virtio_net *dev, int index) VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to realloc shadow packed on node %d\n", node); - return dev; + return; } vq->shadow_used_packed = sup; } else { @@ -561,7 +562,7 @@ numa_realloc(struct virtio_net *dev, int index) VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to realloc shadow split on node %d\n", node); - return dev; + return; } vq->shadow_used_split = sus; } @@ -572,7 +573,7 @@ numa_realloc(struct virtio_net *dev, int index) VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to realloc batch copy elem on node %d\n", node); - return dev; + return; } vq->batch_copy_elems = bce; @@ -584,7 +585,7 @@ numa_realloc(struct virtio_net *dev, int index) VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to realloc log cache on node %d\n", node); - return dev; + return; } vq->log_cache = lc; } @@ -597,7 +598,7 @@ numa_realloc(struct virtio_net *dev, int index) VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to realloc resubmit inflight on node %d\n", node); - return dev; + return; } vq->resubmit_inflight = ri; @@ -610,7 +611,7 @@ numa_realloc(struct virtio_net *dev, int index) VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to realloc resubmit list on node %d\n", node); - return dev; + return; } ri->resubmit_list = rd; } @@ -621,22 +622,23 @@ numa_realloc(struct virtio_net *dev, int index) out_dev_realloc: if (dev->flags & VIRTIO_DEV_RUNNING) - return dev; + return; ret = get_mempolicy(&dev_node, NULL, 0, dev, MPOL_F_NODE | MPOL_F_ADDR); if (ret) { VHOST_LOG_CONFIG(dev->ifname, ERR, "unable to get numa information.\n"); - return dev; + return; } if (dev_node == node) - return dev; + return; - dev = rte_realloc_socket(old_dev, sizeof(*dev), 0, node); + dev = rte_realloc_socket(*pdev, sizeof(**pdev), 0, node); if (!dev) { - VHOST_LOG_CONFIG(old_dev->ifname, ERR, "failed to realloc dev on node %d\n", node); - return old_dev; + VHOST_LOG_CONFIG((*pdev)->ifname, ERR, "failed to realloc dev on node %d\n", node); + return; } + *pdev = dev; VHOST_LOG_CONFIG(dev->ifname, INFO, "reallocated device on node %d\n", node); vhost_devices[dev->vid] = dev; @@ -648,7 +650,7 @@ numa_realloc(struct virtio_net *dev, int index) VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to realloc mem table on node %d\n", node); - return dev; + return; } dev->mem = mem; @@ -658,17 +660,17 @@ numa_realloc(struct virtio_net *dev, int index) VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to realloc guest pages on node %d\n", node); - return dev; + return; } dev->guest_pages = gp; - - return dev; } #else -static struct virtio_net* -numa_realloc(struct virtio_net *dev, int index __rte_unused) +static void +numa_realloc(struct virtio_net **pdev, struct vhost_virtqueue **pvq, int index) { - return dev; + RTE_SET_USED(pdev); + RTE_SET_USED(pvq); + RTE_SET_USED(index); } #endif @@ -738,88 +740,92 @@ log_addr_to_gpa(struct virtio_net *dev, struct vhost_virtqueue *vq) return log_gpa; } -static struct virtio_net * -translate_ring_addresses(struct virtio_net *dev, int vq_index) +static void +translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq, + int vq_index) { - struct vhost_virtqueue *vq = dev->virtqueue[vq_index]; - struct vhost_vring_addr *addr = &vq->ring_addrs; + struct vhost_virtqueue *vq; + struct virtio_net *dev; uint64_t len, expected_len; - if (addr->flags & (1 << VHOST_VRING_F_LOG)) { + dev = *pdev; + vq = *pvq; + + if (vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG)) { vq->log_guest_addr = log_addr_to_gpa(dev, vq); if (vq->log_guest_addr == 0) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, "failed to map log_guest_addr.\n"); - return dev; + return; } } if (vq_is_packed(dev)) { len = sizeof(struct vring_packed_desc) * vq->size; vq->desc_packed = (struct vring_packed_desc *)(uintptr_t) - ring_addr_to_vva(dev, vq, addr->desc_user_addr, &len); + ring_addr_to_vva(dev, vq, vq->ring_addrs.desc_user_addr, &len); if (vq->desc_packed == NULL || len != sizeof(struct vring_packed_desc) * vq->size) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, "failed to map desc_packed ring.\n"); - return dev; + return; } - dev = numa_realloc(dev, vq_index); - vq = dev->virtqueue[vq_index]; - addr = &vq->ring_addrs; + numa_realloc(&dev, &vq, vq_index); + *pdev = dev; + *pvq = vq; len = sizeof(struct vring_packed_desc_event); vq->driver_event = (struct vring_packed_desc_event *) (uintptr_t)ring_addr_to_vva(dev, - vq, addr->avail_user_addr, &len); + vq, vq->ring_addrs.avail_user_addr, &len); if (vq->driver_event == NULL || len != sizeof(struct vring_packed_desc_event)) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, "failed to find driver area address.\n"); - return dev; + return; } len = sizeof(struct vring_packed_desc_event); vq->device_event = (struct vring_packed_desc_event *) (uintptr_t)ring_addr_to_vva(dev, - vq, addr->used_user_addr, &len); + vq, vq->ring_addrs.used_user_addr, &len); if (vq->device_event == NULL || len != sizeof(struct vring_packed_desc_event)) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, "failed to find device area address.\n"); - return dev; + return; } vq->access_ok = true; - return dev; + return; } /* The addresses are converted from QEMU virtual to Vhost virtual. */ if (vq->desc && vq->avail && vq->used) - return dev; + return; len = sizeof(struct vring_desc) * vq->size; vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev, - vq, addr->desc_user_addr, &len); + vq, vq->ring_addrs.desc_user_addr, &len); if (vq->desc == 0 || len != sizeof(struct vring_desc) * vq->size) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, "failed to map desc ring.\n"); - return dev; + return; } - dev = numa_realloc(dev, vq_index); - vq = dev->virtqueue[vq_index]; - addr = &vq->ring_addrs; + numa_realloc(&dev, &vq, vq_index); + *pdev = dev; + *pvq = vq; len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size; if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) len += sizeof(uint16_t); expected_len = len; vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev, - vq, addr->avail_user_addr, &len); + vq, vq->ring_addrs.avail_user_addr, &len); if (vq->avail == 0 || len != expected_len) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, "failed to map avail ring.\n"); - return dev; + return; } len = sizeof(struct vring_used) + @@ -828,10 +834,10 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) len += sizeof(uint16_t); expected_len = len; vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev, - vq, addr->used_user_addr, &len); + vq, vq->ring_addrs.used_user_addr, &len); if (vq->used == 0 || len != expected_len) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, "failed to map used ring.\n"); - return dev; + return; } if (vq->last_used_idx != vq->used->idx) { @@ -850,8 +856,6 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) VHOST_LOG_CONFIG(dev->ifname, DEBUG, "mapped address avail: %p\n", vq->avail); VHOST_LOG_CONFIG(dev->ifname, DEBUG, "mapped address used: %p\n", vq->used); VHOST_LOG_CONFIG(dev->ifname, DEBUG, "log_guest_addr: %" PRIx64 "\n", vq->log_guest_addr); - - return dev; } /* @@ -887,10 +891,7 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, if ((vq->enabled && (dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) || access_ok) { - dev = translate_ring_addresses(dev, ctx->msg.payload.addr.index); - if (!dev) - return RTE_VHOST_MSG_RESULT_ERR; - + translate_ring_addresses(&dev, &vq, ctx->msg.payload.addr.index); *pdev = dev; } @@ -1396,12 +1397,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, */ vring_invalidate(dev, vq); - dev = translate_ring_addresses(dev, i); - if (!dev) { - dev = *pdev; - goto free_mem_table; - } - + translate_ring_addresses(&dev, &vq, i); *pdev = dev; } } @@ -2029,17 +2025,9 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, file.index, file.fd); /* Interpret ring addresses only when ring is started. */ - dev = translate_ring_addresses(dev, file.index); - if (!dev) { - if (file.fd != VIRTIO_INVALID_EVENTFD) - close(file.fd); - - return RTE_VHOST_MSG_RESULT_ERR; - } - - *pdev = dev; - vq = dev->virtqueue[file.index]; + translate_ring_addresses(&dev, &vq, file.index); + *pdev = dev; /* * When VHOST_USER_F_PROTOCOL_FEATURES is not negotiated, @@ -2595,8 +2583,8 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, if (is_vring_iotlb(dev, vq, imsg)) { rte_spinlock_lock(&vq->access_lock); - *pdev = dev = translate_ring_addresses(dev, i); - vq = dev->virtqueue[i]; + translate_ring_addresses(&dev, &vq, i); + *pdev = dev; rte_spinlock_unlock(&vq->access_lock); } } -- 2.36.1 ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH v3 2/4] vhost: make NUMA reallocation code more robust 2022-07-25 20:32 ` [PATCH v3 2/4] vhost: make NUMA reallocation code more robust David Marchand @ 2022-07-26 8:39 ` Maxime Coquelin 0 siblings, 0 replies; 25+ messages in thread From: Maxime Coquelin @ 2022-07-26 8:39 UTC (permalink / raw) To: David Marchand, dev; +Cc: Chenbo Xia On 7/25/22 22:32, David Marchand wrote: > translate_ring_addresses and numa_realloc may change a virtio device and > virtio queue. Callers of those helpers must be extra careful and refresh > any reference to old data. > > Change those functions prototype as a way to hint about this issue and > always ask for an indirect pointer. > > Besides, when reallocating the device and queue, the code already made > sure it will return a pointer to a valid device. The checks on such > returned pointer can be removed. > > Signed-off-by: David Marchand <david.marchand@redhat.com> > --- > lib/vhost/vhost_user.c | 144 +++++++++++++++++++---------------------- > 1 file changed, 66 insertions(+), 78 deletions(-) > Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Thanks, Maxime ^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH v3 3/4] vhost: keep a reference to virtqueue index 2022-07-25 20:32 ` [PATCH v3 0/4] vHost IOTLB cache rework David Marchand 2022-07-25 20:32 ` [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation David Marchand 2022-07-25 20:32 ` [PATCH v3 2/4] vhost: make NUMA reallocation code more robust David Marchand @ 2022-07-25 20:32 ` David Marchand 2022-07-26 8:52 ` Maxime Coquelin 2022-07-25 20:32 ` [PATCH v3 4/4] vhost: stop using mempool for IOTLB cache David Marchand 2022-09-15 16:02 ` [PATCH v3 0/4] vHost IOTLB cache rework Thomas Monjalon 4 siblings, 1 reply; 25+ messages in thread From: David Marchand @ 2022-07-25 20:32 UTC (permalink / raw) To: dev; +Cc: Maxime Coquelin, Chenbo Xia Having a back reference to the index of the vq in the dev->virtqueue[] array makes it possible to unify the internal API, with only passing dev and vq. It also allows displaying the vq index in log messages. Remove virtqueue index checks where unneeded (like in static helpers called from a loop on all available virtqueue). Move virtqueue index validity checks the sooner possible. Signed-off-by: David Marchand <david.marchand@redhat.com> --- Changes since v2: - rebased on top of cleanup that avoids some use-after-free issues in case of allocation failures or numa realloactions, Changes since v1: - fix vq init (that's what happens when only recompiling the vhost library and not relinking testpmd...), --- lib/vhost/iotlb.c | 5 +-- lib/vhost/iotlb.h | 2 +- lib/vhost/vhost.c | 72 +++++++++++++---------------------- lib/vhost/vhost.h | 3 ++ lib/vhost/vhost_user.c | 46 +++++++++++----------- lib/vhost/virtio_net.c | 86 +++++++++++++++++++----------------------- 6 files changed, 91 insertions(+), 123 deletions(-) diff --git a/lib/vhost/iotlb.c b/lib/vhost/iotlb.c index 35b4193606..dd35338ec0 100644 --- a/lib/vhost/iotlb.c +++ b/lib/vhost/iotlb.c @@ -293,10 +293,9 @@ vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq) } int -vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) +vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) { char pool_name[RTE_MEMPOOL_NAMESIZE]; - struct vhost_virtqueue *vq = dev->virtqueue[vq_index]; int socket = 0; if (vq->iotlb_pool) { @@ -319,7 +318,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) TAILQ_INIT(&vq->iotlb_pending_list); snprintf(pool_name, sizeof(pool_name), "iotlb_%u_%d_%d", - getpid(), dev->vid, vq_index); + getpid(), dev->vid, vq->index); VHOST_LOG_CONFIG(dev->ifname, DEBUG, "IOTLB cache name: %s\n", pool_name); /* If already created, free it and recreate */ diff --git a/lib/vhost/iotlb.h b/lib/vhost/iotlb.h index 8d0ff7473b..738e31e7b9 100644 --- a/lib/vhost/iotlb.h +++ b/lib/vhost/iotlb.h @@ -47,6 +47,6 @@ void vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqu void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova, uint64_t size, uint8_t perm); void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq); -int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index); +int vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq); #endif /* _VHOST_IOTLB_H_ */ diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c index 60cb05a0ff..1b17233652 100644 --- a/lib/vhost/vhost.c +++ b/lib/vhost/vhost.c @@ -575,25 +575,14 @@ vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq) } static void -init_vring_queue(struct virtio_net *dev, uint32_t vring_idx) +init_vring_queue(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint32_t vring_idx) { - struct vhost_virtqueue *vq; int numa_node = SOCKET_ID_ANY; - if (vring_idx >= VHOST_MAX_VRING) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to init vring, out of bound (%d)\n", - vring_idx); - return; - } - - vq = dev->virtqueue[vring_idx]; - if (!vq) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "virtqueue not allocated (%d)\n", vring_idx); - return; - } - memset(vq, 0, sizeof(struct vhost_virtqueue)); + vq->index = vring_idx; vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF; @@ -607,31 +596,16 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx) #endif vq->numa_node = numa_node; - vhost_user_iotlb_init(dev, vring_idx); + vhost_user_iotlb_init(dev, vq); } static void -reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx) +reset_vring_queue(struct virtio_net *dev, struct vhost_virtqueue *vq) { - struct vhost_virtqueue *vq; int callfd; - if (vring_idx >= VHOST_MAX_VRING) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to reset vring, out of bound (%d)\n", - vring_idx); - return; - } - - vq = dev->virtqueue[vring_idx]; - if (!vq) { - VHOST_LOG_CONFIG(dev->ifname, ERR, - "failed to reset vring, virtqueue not allocated (%d)\n", - vring_idx); - return; - } - callfd = vq->callfd; - init_vring_queue(dev, vring_idx); + init_vring_queue(dev, vq, vq->index); vq->callfd = callfd; } @@ -655,7 +629,7 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx) } dev->virtqueue[i] = vq; - init_vring_queue(dev, i); + init_vring_queue(dev, vq, i); rte_spinlock_init(&vq->access_lock); vq->avail_wrap_counter = 1; vq->used_wrap_counter = 1; @@ -681,8 +655,16 @@ reset_device(struct virtio_net *dev) dev->protocol_features = 0; dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET; - for (i = 0; i < dev->nr_vring; i++) - reset_vring_queue(dev, i); + for (i = 0; i < dev->nr_vring; i++) { + struct vhost_virtqueue *vq = dev->virtqueue[i]; + + if (!vq) { + VHOST_LOG_CONFIG(dev->ifname, ERR, + "failed to reset vring, virtqueue not allocated (%d)\n", i); + continue; + } + reset_vring_queue(dev, vq); + } } /* @@ -1661,17 +1643,15 @@ rte_vhost_extern_callback_register(int vid, } static __rte_always_inline int -async_channel_register(int vid, uint16_t queue_id) +async_channel_register(struct virtio_net *dev, struct vhost_virtqueue *vq) { - struct virtio_net *dev = get_device(vid); - struct vhost_virtqueue *vq = dev->virtqueue[queue_id]; struct vhost_async *async; int node = vq->numa_node; if (unlikely(vq->async)) { VHOST_LOG_CONFIG(dev->ifname, ERR, "async register failed: already registered (qid: %d)\n", - queue_id); + vq->index); return -1; } @@ -1679,7 +1659,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async metadata (qid: %d)\n", - queue_id); + vq->index); return -1; } @@ -1688,7 +1668,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async->pkts_info) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async_pkts_info (qid: %d)\n", - queue_id); + vq->index); goto out_free_async; } @@ -1697,7 +1677,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async->pkts_cmpl_flag) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async pkts_cmpl_flag (qid: %d)\n", - queue_id); + vq->index); goto out_free_async; } @@ -1708,7 +1688,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async->buffers_packed) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async buffers (qid: %d)\n", - queue_id); + vq->index); goto out_free_inflight; } } else { @@ -1718,7 +1698,7 @@ async_channel_register(int vid, uint16_t queue_id) if (!async->descs_split) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to allocate async descs (qid: %d)\n", - queue_id); + vq->index); goto out_free_inflight; } } @@ -1753,7 +1733,7 @@ rte_vhost_async_channel_register(int vid, uint16_t queue_id) return -1; rte_spinlock_lock(&vq->access_lock); - ret = async_channel_register(vid, queue_id); + ret = async_channel_register(dev, vq); rte_spinlock_unlock(&vq->access_lock); return ret; @@ -1782,7 +1762,7 @@ rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id) return -1; } - return async_channel_register(vid, queue_id); + return async_channel_register(dev, vq); } int diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index 40fac3b7c6..c6260b54cc 100644 --- a/lib/vhost/vhost.h +++ b/lib/vhost/vhost.h @@ -309,6 +309,9 @@ struct vhost_virtqueue { /* Currently unused as polling mode is enabled */ int kickfd; + /* Index of this vq in dev->virtqueue[] */ + uint32_t index; + /* inflight share memory info */ union { struct rte_vhost_inflight_info_split *inflight_split; diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 46d4a02c1e..01820902d8 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -240,22 +240,20 @@ vhost_backend_cleanup(struct virtio_net *dev) } static void -vhost_user_notify_queue_state(struct virtio_net *dev, uint16_t index, - int enable) +vhost_user_notify_queue_state(struct virtio_net *dev, struct vhost_virtqueue *vq, + int enable) { struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev; - struct vhost_virtqueue *vq = dev->virtqueue[index]; /* Configure guest notifications on enable */ if (enable && vq->notif_enable != VIRTIO_UNINITIALIZED_NOTIF) vhost_enable_guest_notification(dev, vq, vq->notif_enable); if (vdpa_dev && vdpa_dev->ops->set_vring_state) - vdpa_dev->ops->set_vring_state(dev->vid, index, enable); + vdpa_dev->ops->set_vring_state(dev->vid, vq->index, enable); if (dev->notify_ops->vring_state_changed) - dev->notify_ops->vring_state_changed(dev->vid, - index, enable); + dev->notify_ops->vring_state_changed(dev->vid, vq->index, enable); } /* @@ -494,7 +492,7 @@ vhost_user_set_vring_num(struct virtio_net **pdev, */ #ifdef RTE_LIBRTE_VHOST_NUMA static void -numa_realloc(struct virtio_net **pdev, struct vhost_virtqueue **pvq, int index) +numa_realloc(struct virtio_net **pdev, struct vhost_virtqueue **pvq) { int node, dev_node; struct virtio_net *dev; @@ -519,7 +517,7 @@ numa_realloc(struct virtio_net **pdev, struct vhost_virtqueue **pvq, int index) if (ret) { VHOST_LOG_CONFIG(dev->ifname, ERR, "unable to get virtqueue %d numa information.\n", - index); + vq->index); return; } @@ -530,15 +528,15 @@ numa_realloc(struct virtio_net **pdev, struct vhost_virtqueue **pvq, int index) if (!vq) { VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to realloc virtqueue %d on node %d\n", - index, node); + (*pvq)->index, node); return; } *pvq = vq; - if (vq != dev->virtqueue[index]) { + if (vq != dev->virtqueue[vq->index]) { VHOST_LOG_CONFIG(dev->ifname, INFO, "reallocated virtqueue on node %d\n", node); - dev->virtqueue[index] = vq; - vhost_user_iotlb_init(dev, index); + dev->virtqueue[vq->index] = vq; + vhost_user_iotlb_init(dev, vq); } if (vq_is_packed(dev)) { @@ -666,11 +664,10 @@ numa_realloc(struct virtio_net **pdev, struct vhost_virtqueue **pvq, int index) } #else static void -numa_realloc(struct virtio_net **pdev, struct vhost_virtqueue **pvq, int index) +numa_realloc(struct virtio_net **pdev, struct vhost_virtqueue **pvq) { RTE_SET_USED(pdev); RTE_SET_USED(pvq); - RTE_SET_USED(index); } #endif @@ -741,8 +738,7 @@ log_addr_to_gpa(struct virtio_net *dev, struct vhost_virtqueue *vq) } static void -translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq, - int vq_index) +translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq) { struct vhost_virtqueue *vq; struct virtio_net *dev; @@ -771,7 +767,7 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq, return; } - numa_realloc(&dev, &vq, vq_index); + numa_realloc(&dev, &vq); *pdev = dev; *pvq = vq; @@ -813,7 +809,7 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq, return; } - numa_realloc(&dev, &vq, vq_index); + numa_realloc(&dev, &vq); *pdev = dev; *pvq = vq; @@ -891,7 +887,7 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, if ((vq->enabled && (dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) || access_ok) { - translate_ring_addresses(&dev, &vq, ctx->msg.payload.addr.index); + translate_ring_addresses(&dev, &vq); *pdev = dev; } @@ -1397,7 +1393,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, */ vring_invalidate(dev, vq); - translate_ring_addresses(&dev, &vq, i); + translate_ring_addresses(&dev, &vq); *pdev = dev; } } @@ -1777,7 +1773,7 @@ vhost_user_set_vring_call(struct virtio_net **pdev, if (vq->ready) { vq->ready = false; - vhost_user_notify_queue_state(dev, file.index, 0); + vhost_user_notify_queue_state(dev, vq, 0); } if (vq->callfd >= 0) @@ -2026,7 +2022,7 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, /* Interpret ring addresses only when ring is started. */ vq = dev->virtqueue[file.index]; - translate_ring_addresses(&dev, &vq, file.index); + translate_ring_addresses(&dev, &vq); *pdev = dev; /* @@ -2040,7 +2036,7 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, if (vq->ready) { vq->ready = false; - vhost_user_notify_queue_state(dev, file.index, 0); + vhost_user_notify_queue_state(dev, vq, 0); } if (vq->kickfd >= 0) @@ -2583,7 +2579,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, if (is_vring_iotlb(dev, vq, imsg)) { rte_spinlock_lock(&vq->access_lock); - translate_ring_addresses(&dev, &vq, i); + translate_ring_addresses(&dev, &vq); *pdev = dev; rte_spinlock_unlock(&vq->access_lock); } @@ -3148,7 +3144,7 @@ vhost_user_msg_handler(int vid, int fd) if (cur_ready != (vq && vq->ready)) { vq->ready = cur_ready; - vhost_user_notify_queue_state(dev, i, cur_ready); + vhost_user_notify_queue_state(dev, vq, cur_ready); } } diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index 35fa4670fd..467dfb203f 100644 --- a/lib/vhost/virtio_net.c +++ b/lib/vhost/virtio_net.c @@ -1555,22 +1555,12 @@ virtio_dev_rx_packed(struct virtio_net *dev, } static __rte_always_inline uint32_t -virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, +virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf **pkts, uint32_t count) { - struct vhost_virtqueue *vq; uint32_t nb_tx = 0; VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { - VHOST_LOG_DATA(dev->ifname, ERR, - "%s: invalid virtqueue idx %d.\n", - __func__, queue_id); - return 0; - } - - vq = dev->virtqueue[queue_id]; - rte_spinlock_lock(&vq->access_lock); if (unlikely(!vq->enabled)) @@ -1620,7 +1610,14 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id, return 0; } - return virtio_dev_rx(dev, queue_id, pkts, count); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { + VHOST_LOG_DATA(dev->ifname, ERR, + "%s: invalid virtqueue idx %d.\n", + __func__, queue_id); + return 0; + } + + return virtio_dev_rx(dev, dev->virtqueue[queue_id], pkts, count); } static __rte_always_inline uint16_t @@ -1669,8 +1666,7 @@ store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring, static __rte_noinline uint32_t virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count, - int16_t dma_id, uint16_t vchan_id) + struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) { struct buf_vector buf_vec[BUF_VECTOR_MAX]; uint32_t pkt_idx = 0; @@ -1732,7 +1728,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: failed to transfer %u packets for queue %u.\n", - __func__, pkt_err, queue_id); + __func__, pkt_err, vq->index); /* update number of completed packets */ pkt_idx = n_xfer; @@ -1878,8 +1874,7 @@ dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx, static __rte_noinline uint32_t virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count, - int16_t dma_id, uint16_t vchan_id) + struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) { uint32_t pkt_idx = 0; uint32_t remained = count; @@ -1924,7 +1919,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue if (unlikely(pkt_err)) { VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: failed to transfer %u packets for queue %u.\n", - __func__, pkt_err, queue_id); + __func__, pkt_err, vq->index); dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx); } @@ -2045,11 +2040,9 @@ write_back_completed_descs_packed(struct vhost_virtqueue *vq, } static __rte_always_inline uint16_t -vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id, - struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, - uint16_t vchan_id) +vhost_poll_enqueue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, uint16_t vchan_id) { - struct vhost_virtqueue *vq = dev->virtqueue[queue_id]; struct vhost_async *async = vq->async; struct async_inflight_info *pkts_info = async->pkts_info; uint16_t nr_cpl_pkts = 0; @@ -2156,7 +2149,7 @@ rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, goto out; } - n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count, dma_id, vchan_id); + n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, dma_id, vchan_id); vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); vq->stats.inflight_completed += n_pkts_cpl; @@ -2216,12 +2209,11 @@ rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id, } if ((queue_id & 1) == 0) - n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, - pkts, count, dma_id, vchan_id); - else { + n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, + dma_id, vchan_id); + else n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, - dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); - } + dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); vq->stats.inflight_completed += n_pkts_cpl; @@ -2275,12 +2267,11 @@ rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, } if ((queue_id & 1) == 0) - n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, - pkts, count, dma_id, vchan_id); - else { + n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, + dma_id, vchan_id); + else n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, - dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); - } + dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl); vq->stats.inflight_completed += n_pkts_cpl; @@ -2292,19 +2283,12 @@ rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, } static __rte_always_inline uint32_t -virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, +virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id) { - struct vhost_virtqueue *vq; uint32_t nb_tx = 0; VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { - VHOST_LOG_DATA(dev->ifname, ERR, - "%s: invalid virtqueue idx %d.\n", - __func__, queue_id); - return 0; - } if (unlikely(!dma_copy_track[dma_id].vchans || !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) { @@ -2314,8 +2298,6 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, return 0; } - vq = dev->virtqueue[queue_id]; - rte_spinlock_lock(&vq->access_lock); if (unlikely(!vq->enabled || !vq->async)) @@ -2333,11 +2315,11 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, goto out; if (vq_is_packed(dev)) - nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, queue_id, - pkts, count, dma_id, vchan_id); + nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, pkts, count, + dma_id, vchan_id); else - nb_tx = virtio_dev_rx_async_submit_split(dev, vq, queue_id, - pkts, count, dma_id, vchan_id); + nb_tx = virtio_dev_rx_async_submit_split(dev, vq, pkts, count, + dma_id, vchan_id); vq->stats.inflight_submitted += nb_tx; @@ -2368,7 +2350,15 @@ rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id, return 0; } - return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, dma_id, vchan_id); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { + VHOST_LOG_DATA(dev->ifname, ERR, + "%s: invalid virtqueue idx %d.\n", + __func__, queue_id); + return 0; + } + + return virtio_dev_rx_async_submit(dev, dev->virtqueue[queue_id], pkts, count, + dma_id, vchan_id); } static inline bool -- 2.36.1 ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH v3 3/4] vhost: keep a reference to virtqueue index 2022-07-25 20:32 ` [PATCH v3 3/4] vhost: keep a reference to virtqueue index David Marchand @ 2022-07-26 8:52 ` Maxime Coquelin 2022-07-26 10:00 ` David Marchand 0 siblings, 1 reply; 25+ messages in thread From: Maxime Coquelin @ 2022-07-26 8:52 UTC (permalink / raw) To: David Marchand, dev; +Cc: Chenbo Xia On 7/25/22 22:32, David Marchand wrote: > Having a back reference to the index of the vq in the dev->virtqueue[] > array makes it possible to unify the internal API, with only passing dev > and vq. > It also allows displaying the vq index in log messages. > > Remove virtqueue index checks where unneeded (like in static helpers > called from a loop on all available virtqueue). > Move virtqueue index validity checks the sooner possible. > > Signed-off-by: David Marchand <david.marchand@redhat.com> > --- > Changes since v2: > - rebased on top of cleanup that avoids some use-after-free issues in > case of allocation failures or numa realloactions, > > Changes since v1: > - fix vq init (that's what happens when only recompiling the vhost > library and not relinking testpmd...), > > --- > lib/vhost/iotlb.c | 5 +-- > lib/vhost/iotlb.h | 2 +- > lib/vhost/vhost.c | 72 +++++++++++++---------------------- > lib/vhost/vhost.h | 3 ++ > lib/vhost/vhost_user.c | 46 +++++++++++----------- > lib/vhost/virtio_net.c | 86 +++++++++++++++++++----------------------- > 6 files changed, 91 insertions(+), 123 deletions(-) > ... > diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c > index 35fa4670fd..467dfb203f 100644 > --- a/lib/vhost/virtio_net.c > +++ b/lib/vhost/virtio_net.c ... > @@ -2275,12 +2267,11 @@ rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, > } > > if ((queue_id & 1) == 0) > - n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, > - pkts, count, dma_id, vchan_id); > - else { > + n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, > + dma_id, vchan_id); > + else > n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, > - dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); > - } > + dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); One of the two functions should be renamed for consistency, but that's not the point of this series. For this patch: Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Thanks, Maxime ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH v3 3/4] vhost: keep a reference to virtqueue index 2022-07-26 8:52 ` Maxime Coquelin @ 2022-07-26 10:00 ` David Marchand 0 siblings, 0 replies; 25+ messages in thread From: David Marchand @ 2022-07-26 10:00 UTC (permalink / raw) To: Maxime Coquelin; +Cc: dev, Chenbo Xia On Tue, Jul 26, 2022 at 10:52 AM Maxime Coquelin <maxime.coquelin@redhat.com> wrote: > > @@ -2275,12 +2267,11 @@ rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts, > > } > > > > if ((queue_id & 1) == 0) > > - n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, > > - pkts, count, dma_id, vchan_id); > > - else { > > + n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, > > + dma_id, vchan_id); > > + else > > n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count, > > - dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); > > - } > > + dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS); > > One of the two functions should be renamed for consistency, but that's > not the point of this series. The async prefix makes sense, so renaming vhost_poll_enqueue_completed as async_poll_enqueue_completed seems the way to go. I don't mind sending a separate patch for this. -- David Marchand ^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH v3 4/4] vhost: stop using mempool for IOTLB cache 2022-07-25 20:32 ` [PATCH v3 0/4] vHost IOTLB cache rework David Marchand ` (2 preceding siblings ...) 2022-07-25 20:32 ` [PATCH v3 3/4] vhost: keep a reference to virtqueue index David Marchand @ 2022-07-25 20:32 ` David Marchand 2022-07-26 9:26 ` Maxime Coquelin 2022-09-15 16:02 ` [PATCH v3 0/4] vHost IOTLB cache rework Thomas Monjalon 4 siblings, 1 reply; 25+ messages in thread From: David Marchand @ 2022-07-25 20:32 UTC (permalink / raw) To: dev; +Cc: Maxime Coquelin, Chenbo Xia A mempool consumes 3 memzones (with the default ring mempool driver). The default DPDK configuration allows RTE_MAX_MEMZONE (2560) memzones. Assuming there is no other memzones that means that we can have a maximum of 853 mempools. In the vhost library, the IOTLB cache code so far was requesting a mempool per vq, which means that at the maximum, the vhost library could request mempools for 426 qps. This limit was recently reached on big systems with a lot of virtio ports (and multiqueue in use). While the limit on mempool count could be something we fix at the DPDK project level, there is no reason to use mempools for the IOTLB cache: - the IOTLB cache entries do not need to be DMA-able and are only used by the current process (in multiprocess context), - getting/putting objects from/in the mempool is always associated with some other locks, so some level of lock contention is already present, We can convert to a malloc'd pool with objects put in a free list protected by a spinlock. Signed-off-by: David Marchand <david.marchand@redhat.com> --- lib/vhost/iotlb.c | 102 ++++++++++++++++++++++++++++------------------ lib/vhost/iotlb.h | 1 + lib/vhost/vhost.c | 2 +- lib/vhost/vhost.h | 4 +- 4 files changed, 67 insertions(+), 42 deletions(-) diff --git a/lib/vhost/iotlb.c b/lib/vhost/iotlb.c index dd35338ec0..2a78929e78 100644 --- a/lib/vhost/iotlb.c +++ b/lib/vhost/iotlb.c @@ -13,6 +13,7 @@ struct vhost_iotlb_entry { TAILQ_ENTRY(vhost_iotlb_entry) next; + SLIST_ENTRY(vhost_iotlb_entry) next_free; uint64_t iova; uint64_t uaddr; @@ -22,6 +23,28 @@ struct vhost_iotlb_entry { #define IOTLB_CACHE_SIZE 2048 +static struct vhost_iotlb_entry * +vhost_user_iotlb_pool_get(struct vhost_virtqueue *vq) +{ + struct vhost_iotlb_entry *node; + + rte_spinlock_lock(&vq->iotlb_free_lock); + node = SLIST_FIRST(&vq->iotlb_free_list); + if (node != NULL) + SLIST_REMOVE_HEAD(&vq->iotlb_free_list, next_free); + rte_spinlock_unlock(&vq->iotlb_free_lock); + return node; +} + +static void +vhost_user_iotlb_pool_put(struct vhost_virtqueue *vq, + struct vhost_iotlb_entry *node) +{ + rte_spinlock_lock(&vq->iotlb_free_lock); + SLIST_INSERT_HEAD(&vq->iotlb_free_list, node, next_free); + rte_spinlock_unlock(&vq->iotlb_free_lock); +} + static void vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq); @@ -34,7 +57,7 @@ vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq) RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) { TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); } rte_rwlock_write_unlock(&vq->iotlb_pending_lock); @@ -66,22 +89,21 @@ vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqueue * uint64_t iova, uint8_t perm) { struct vhost_iotlb_entry *node; - int ret; - ret = rte_mempool_get(vq->iotlb_pool, (void **)&node); - if (ret) { + node = vhost_user_iotlb_pool_get(vq); + if (node == NULL) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, - "IOTLB pool %s empty, clear entries for pending insertion\n", - vq->iotlb_pool->name); + "IOTLB pool for vq %"PRIu32" empty, clear entries for pending insertion\n", + vq->index); if (!TAILQ_EMPTY(&vq->iotlb_pending_list)) vhost_user_iotlb_pending_remove_all(vq); else vhost_user_iotlb_cache_random_evict(vq); - ret = rte_mempool_get(vq->iotlb_pool, (void **)&node); - if (ret) { + node = vhost_user_iotlb_pool_get(vq); + if (node == NULL) { VHOST_LOG_CONFIG(dev->ifname, ERR, - "IOTLB pool %s still empty, pending insertion failure\n", - vq->iotlb_pool->name); + "IOTLB pool vq %"PRIu32" still empty, pending insertion failure\n", + vq->index); return; } } @@ -113,7 +135,7 @@ vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, if ((node->perm & perm) != node->perm) continue; TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); } rte_rwlock_write_unlock(&vq->iotlb_pending_lock); @@ -128,7 +150,7 @@ vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq) RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { TAILQ_REMOVE(&vq->iotlb_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); } vq->iotlb_cache_nr = 0; @@ -149,7 +171,7 @@ vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq) RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { if (!entry_idx) { TAILQ_REMOVE(&vq->iotlb_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); vq->iotlb_cache_nr--; break; } @@ -165,22 +187,21 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq uint64_t size, uint8_t perm) { struct vhost_iotlb_entry *node, *new_node; - int ret; - ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node); - if (ret) { + new_node = vhost_user_iotlb_pool_get(vq); + if (new_node == NULL) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, - "IOTLB pool %s empty, clear entries for cache insertion\n", - vq->iotlb_pool->name); + "IOTLB pool vq %"PRIu32" empty, clear entries for cache insertion\n", + vq->index); if (!TAILQ_EMPTY(&vq->iotlb_list)) vhost_user_iotlb_cache_random_evict(vq); else vhost_user_iotlb_pending_remove_all(vq); - ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node); - if (ret) { + new_node = vhost_user_iotlb_pool_get(vq); + if (new_node == NULL) { VHOST_LOG_CONFIG(dev->ifname, ERR, - "IOTLB pool %s still empty, cache insertion failed\n", - vq->iotlb_pool->name); + "IOTLB pool vq %"PRIu32" still empty, cache insertion failed\n", + vq->index); return; } } @@ -198,7 +219,7 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq * So if iova already in list, assume identical. */ if (node->iova == new_node->iova) { - rte_mempool_put(vq->iotlb_pool, new_node); + vhost_user_iotlb_pool_put(vq, new_node); goto unlock; } else if (node->iova > new_node->iova) { TAILQ_INSERT_BEFORE(node, new_node, next); @@ -235,7 +256,7 @@ vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq, if (iova < node->iova + node->size) { TAILQ_REMOVE(&vq->iotlb_list, node, next); - rte_mempool_put(vq->iotlb_pool, node); + vhost_user_iotlb_pool_put(vq, node); vq->iotlb_cache_nr--; } } @@ -295,7 +316,7 @@ vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq) int vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) { - char pool_name[RTE_MEMPOOL_NAMESIZE]; + unsigned int i; int socket = 0; if (vq->iotlb_pool) { @@ -304,6 +325,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) * just drop all cached and pending entries. */ vhost_user_iotlb_flush_all(vq); + rte_free(vq->iotlb_pool); } #ifdef RTE_LIBRTE_VHOST_NUMA @@ -311,32 +333,32 @@ vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) socket = 0; #endif + rte_spinlock_init(&vq->iotlb_free_lock); rte_rwlock_init(&vq->iotlb_lock); rte_rwlock_init(&vq->iotlb_pending_lock); + SLIST_INIT(&vq->iotlb_free_list); TAILQ_INIT(&vq->iotlb_list); TAILQ_INIT(&vq->iotlb_pending_list); - snprintf(pool_name, sizeof(pool_name), "iotlb_%u_%d_%d", - getpid(), dev->vid, vq->index); - VHOST_LOG_CONFIG(dev->ifname, DEBUG, "IOTLB cache name: %s\n", pool_name); - - /* If already created, free it and recreate */ - vq->iotlb_pool = rte_mempool_lookup(pool_name); - rte_mempool_free(vq->iotlb_pool); - - vq->iotlb_pool = rte_mempool_create(pool_name, - IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0, - 0, 0, NULL, NULL, NULL, socket, - RTE_MEMPOOL_F_NO_CACHE_ALIGN | - RTE_MEMPOOL_F_SP_PUT); + vq->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE, + sizeof(struct vhost_iotlb_entry), 0, socket); if (!vq->iotlb_pool) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "Failed to create IOTLB cache pool %s\n", - pool_name); + VHOST_LOG_CONFIG(dev->ifname, ERR, + "Failed to create IOTLB cache pool for vq %"PRIu32"\n", + vq->index); return -1; } + for (i = 0; i < IOTLB_CACHE_SIZE; i++) + vhost_user_iotlb_pool_put(vq, &vq->iotlb_pool[i]); vq->iotlb_cache_nr = 0; return 0; } + +void +vhost_user_iotlb_destroy(struct vhost_virtqueue *vq) +{ + rte_free(vq->iotlb_pool); +} diff --git a/lib/vhost/iotlb.h b/lib/vhost/iotlb.h index 738e31e7b9..e27ebebcf5 100644 --- a/lib/vhost/iotlb.h +++ b/lib/vhost/iotlb.h @@ -48,5 +48,6 @@ void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova, uint64_t size, uint8_t perm); void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq); int vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq); +void vhost_user_iotlb_destroy(struct vhost_virtqueue *vq); #endif /* _VHOST_IOTLB_H_ */ diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c index 1b17233652..aa671f47a3 100644 --- a/lib/vhost/vhost.c +++ b/lib/vhost/vhost.c @@ -394,7 +394,7 @@ free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq) vhost_free_async_mem(vq); rte_free(vq->batch_copy_elems); - rte_mempool_free(vq->iotlb_pool); + vhost_user_iotlb_destroy(vq); rte_free(vq->log_cache); rte_free(vq); } diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index c6260b54cc..782d916ae0 100644 --- a/lib/vhost/vhost.h +++ b/lib/vhost/vhost.h @@ -299,10 +299,12 @@ struct vhost_virtqueue { rte_rwlock_t iotlb_lock; rte_rwlock_t iotlb_pending_lock; - struct rte_mempool *iotlb_pool; + struct vhost_iotlb_entry *iotlb_pool; TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list; TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list; int iotlb_cache_nr; + rte_spinlock_t iotlb_free_lock; + SLIST_HEAD(, vhost_iotlb_entry) iotlb_free_list; /* Used to notify the guest (trigger interrupt) */ int callfd; -- 2.36.1 ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH v3 4/4] vhost: stop using mempool for IOTLB cache 2022-07-25 20:32 ` [PATCH v3 4/4] vhost: stop using mempool for IOTLB cache David Marchand @ 2022-07-26 9:26 ` Maxime Coquelin 2023-02-17 7:42 ` Yuan, DukaiX 0 siblings, 1 reply; 25+ messages in thread From: Maxime Coquelin @ 2022-07-26 9:26 UTC (permalink / raw) To: David Marchand, dev; +Cc: Chenbo Xia On 7/25/22 22:32, David Marchand wrote: > A mempool consumes 3 memzones (with the default ring mempool driver). > The default DPDK configuration allows RTE_MAX_MEMZONE (2560) memzones. > > Assuming there is no other memzones that means that we can have a > maximum of 853 mempools. > > In the vhost library, the IOTLB cache code so far was requesting a > mempool per vq, which means that at the maximum, the vhost library > could request mempools for 426 qps. > > This limit was recently reached on big systems with a lot of virtio > ports (and multiqueue in use). > > While the limit on mempool count could be something we fix at the DPDK > project level, there is no reason to use mempools for the IOTLB cache: > - the IOTLB cache entries do not need to be DMA-able and are only used > by the current process (in multiprocess context), > - getting/putting objects from/in the mempool is always associated with > some other locks, so some level of lock contention is already present, > > We can convert to a malloc'd pool with objects put in a free list > protected by a spinlock. > > Signed-off-by: David Marchand <david.marchand@redhat.com> > --- > lib/vhost/iotlb.c | 102 ++++++++++++++++++++++++++++------------------ > lib/vhost/iotlb.h | 1 + > lib/vhost/vhost.c | 2 +- > lib/vhost/vhost.h | 4 +- > 4 files changed, 67 insertions(+), 42 deletions(-) > Thanks for working on this, this is definitely not needed to use mempool for this. Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Maxime ^ permalink raw reply [flat|nested] 25+ messages in thread
* RE: [PATCH v3 4/4] vhost: stop using mempool for IOTLB cache 2022-07-26 9:26 ` Maxime Coquelin @ 2023-02-17 7:42 ` Yuan, DukaiX 0 siblings, 0 replies; 25+ messages in thread From: Yuan, DukaiX @ 2023-02-17 7:42 UTC (permalink / raw) To: Maxime Coquelin, David Marchand, dev; +Cc: Xia, Chenbo Hi David, I get an error message when I use qemu-6.2.0 to relaunch dpdk-l3fwd-power with Asan, I need your help to confirm this issue. For more information please refer to https://bugs.dpdk.org/show_bug.cgi?id=1135. Waiting for your reply. Best regards, Dukai,Yuan > -----Original Message----- > From: Maxime Coquelin <maxime.coquelin@redhat.com> > Sent: 2022年7月26日 17:27 > To: David Marchand <david.marchand@redhat.com>; dev@dpdk.org > Cc: Xia, Chenbo <Chenbo.Xia@intel.com> > Subject: Re: [PATCH v3 4/4] vhost: stop using mempool for IOTLB cache > > > > On 7/25/22 22:32, David Marchand wrote: > > A mempool consumes 3 memzones (with the default ring mempool driver). > > The default DPDK configuration allows RTE_MAX_MEMZONE (2560) > memzones. > > > > Assuming there is no other memzones that means that we can have a > > maximum of 853 mempools. > > > > In the vhost library, the IOTLB cache code so far was requesting a > > mempool per vq, which means that at the maximum, the vhost library > > could request mempools for 426 qps. > > > > This limit was recently reached on big systems with a lot of virtio > > ports (and multiqueue in use). > > > > While the limit on mempool count could be something we fix at the DPDK > > project level, there is no reason to use mempools for the IOTLB cache: > > - the IOTLB cache entries do not need to be DMA-able and are only used > > by the current process (in multiprocess context), > > - getting/putting objects from/in the mempool is always associated with > > some other locks, so some level of lock contention is already > > present, > > > > We can convert to a malloc'd pool with objects put in a free list > > protected by a spinlock. > > > > Signed-off-by: David Marchand <david.marchand@redhat.com> > > --- > > lib/vhost/iotlb.c | 102 ++++++++++++++++++++++++++++------------------ > > lib/vhost/iotlb.h | 1 + > > lib/vhost/vhost.c | 2 +- > > lib/vhost/vhost.h | 4 +- > > 4 files changed, 67 insertions(+), 42 deletions(-) > > > > Thanks for working on this, this is definitely not needed to use mempool for > this. > > Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> > > Maxime ^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH v3 0/4] vHost IOTLB cache rework 2022-07-25 20:32 ` [PATCH v3 0/4] vHost IOTLB cache rework David Marchand ` (3 preceding siblings ...) 2022-07-25 20:32 ` [PATCH v3 4/4] vhost: stop using mempool for IOTLB cache David Marchand @ 2022-09-15 16:02 ` Thomas Monjalon 4 siblings, 0 replies; 25+ messages in thread From: Thomas Monjalon @ 2022-09-15 16:02 UTC (permalink / raw) To: David Marchand; +Cc: dev, maxime.coquelin 25/07/2022 22:32, David Marchand: > This series aim was initially to solve a scalability issue with the IOTLB > cache code. But along the way, I tried to make the code a bit more robust > by unifying how the device and virtqueue are passed around. Applied directly in main tree to ease backport, thanks. ^ permalink raw reply [flat|nested] 25+ messages in thread
end of thread, other threads:[~2023-02-17 7:42 UTC | newest] Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2022-07-22 13:53 [PATCH 1/2] vhost: keep a reference to virtqueue index David Marchand 2022-07-22 13:53 ` [PATCH 2/2] vhost: stop using mempool for IOTLB David Marchand 2022-07-22 14:00 ` [PATCH 1/2] vhost: keep a reference to virtqueue index David Marchand 2022-07-22 15:13 ` David Marchand 2022-07-25 7:11 ` [PATCH v2 " David Marchand 2022-07-25 7:11 ` [PATCH v2 2/2] vhost: stop using mempool for IOTLB cache David Marchand 2022-07-25 20:32 ` [PATCH v3 0/4] vHost IOTLB cache rework David Marchand 2022-07-25 20:32 ` [PATCH v3 1/4] vhost: fix vq use after free on NUMA reallocation David Marchand 2022-07-26 7:55 ` Maxime Coquelin 2022-09-13 15:02 ` Maxime Coquelin 2022-09-14 1:05 ` Xia, Chenbo 2022-09-14 7:14 ` Maxime Coquelin 2022-09-14 9:15 ` Thomas Monjalon 2022-09-14 9:34 ` Maxime Coquelin 2022-09-14 9:45 ` Thomas Monjalon 2022-09-14 11:48 ` Maxime Coquelin 2022-07-25 20:32 ` [PATCH v3 2/4] vhost: make NUMA reallocation code more robust David Marchand 2022-07-26 8:39 ` Maxime Coquelin 2022-07-25 20:32 ` [PATCH v3 3/4] vhost: keep a reference to virtqueue index David Marchand 2022-07-26 8:52 ` Maxime Coquelin 2022-07-26 10:00 ` David Marchand 2022-07-25 20:32 ` [PATCH v3 4/4] vhost: stop using mempool for IOTLB cache David Marchand 2022-07-26 9:26 ` Maxime Coquelin 2023-02-17 7:42 ` Yuan, DukaiX 2022-09-15 16:02 ` [PATCH v3 0/4] vHost IOTLB cache rework Thomas Monjalon
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).