From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by dpdk.org (Postfix) with ESMTP id 9F1CB4C8E for ; Tue, 6 Mar 2018 11:45:08 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by fmsmga101.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 06 Mar 2018 02:45:08 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.47,431,1515484800"; d="scan'208";a="22160409" Received: from debian.sh.intel.com ([10.67.104.164]) by fmsmga007.fm.intel.com with ESMTP; 06 Mar 2018 02:45:06 -0800 From: Tiwei Bie To: dev@dpdk.org Cc: maxime.coquelin@redhat.com, jianfeng.tan@intel.com, yliu@fridaylinux.org, zhihong.wang@intel.com, xiao.w.wang@intel.com, cunming.liang@intel.com, dan.daly@intel.com, tiwei.bie@intel.com Date: Tue, 6 Mar 2018 18:43:27 +0800 Message-Id: <20180306104327.14470-4-tiwei.bie@intel.com> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20180306104327.14470-1-tiwei.bie@intel.com> References: <20180306104327.14470-1-tiwei.bie@intel.com> Subject: [dpdk-dev] [PATCH 3/3] vhost: support VFIO based accelerator X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 06 Mar 2018 10:45:09 -0000 This commit adds the VFIO based accelerator support to vhost. A new API is provided to support asking QEMU to do further setup to allow notifications and interrupts being delivered directly between the driver in guest and the vDPA device in host. Signed-off-by: Tiwei Bie --- lib/librte_vhost/rte_vhost.h | 28 ++++++ lib/librte_vhost/rte_vhost_version.map | 1 + lib/librte_vhost/vhost_user.c | 166 +++++++++++++++++++++++++++++++++ lib/librte_vhost/vhost_user.h | 9 ++ 4 files changed, 204 insertions(+) diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index d5589c543..68842e908 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -35,6 +35,7 @@ extern "C" { #define RTE_VHOST_USER_PROTOCOL_F_REPLY_ACK 3 #define RTE_VHOST_USER_PROTOCOL_F_NET_MTU 4 #define RTE_VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 +#define RTE_VHOST_USER_PROTOCOL_F_VFIO 8 #define RTE_VHOST_USER_F_PROTOCOL_FEATURES 30 /** @@ -591,6 +592,33 @@ rte_vhost_get_vdpa_eid(int vid); int __rte_experimental rte_vhost_get_vdpa_did(int vid); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Enable or disable the VFIO based accelerator for vhost-user. + * + * This function is to ask QEMU to do further setup to better + * support the vDPA device at vhost user backend. With this + * setup, the notifications and interrupts will be delivered + * directly between the driver in guest and the vDPA device + * in host if platform supports e.g. EPT and Posted interrupt. + * It's nice to have, and not mandatory. + * + * @param vid + * vhost device ID + * @param int + * Enable or disable + * + * @return + * 0: success + * -ENODEV: no such vhost device + * -ENOTSUP: device does not support VFIO based accelerator feature + * -EINVAL: there is no accelerator assigned to this vhost device + * -EFAULT: failed to talk with QEMU + */ +int rte_vhost_vfio_accelerator_ctrl(int vid, int enable); + #ifdef __cplusplus } #endif diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index 36257e51b..ca970170f 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -72,6 +72,7 @@ EXPERIMENTAL { rte_vhost_set_vring_base; rte_vhost_get_vdpa_eid; rte_vhost_get_vdpa_did; + rte_vhost_vfio_accelerator_ctrl; rte_vdpa_register_engine; rte_vdpa_unregister_engine; rte_vdpa_find_engine_id; diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index e3a1dfbfb..a65598d80 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "iotlb.h" #include "vhost.h" @@ -1628,6 +1629,27 @@ vhost_user_msg_handler(int vid, int fd) return 0; } +static int process_slave_message_reply(struct virtio_net *dev, + const VhostUserMsg *msg) +{ + VhostUserMsg msg_reply; + + if ((msg->flags & VHOST_USER_NEED_REPLY) == 0) + return 0; + + if (read_vhost_message(dev->slave_req_fd, &msg_reply) < 0) + return -1; + + if (msg_reply.request.slave != msg->request.slave) { + RTE_LOG(ERR, VHOST_CONFIG, + "received unexpected msg type (%u), expected %u\n", + msg_reply.request.slave, msg->request.slave); + return -1; + } + + return msg_reply.payload.u64; +} + int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm) { @@ -1653,3 +1675,147 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm) return 0; } + +static int vhost_user_slave_set_vring_file(struct virtio_net *dev, + uint32_t request, + struct vhost_vring_file *file) +{ + int *fdp = NULL; + size_t fd_num = 0; + int ret; + struct VhostUserMsg msg = { + .request.slave = request, + .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY, + .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, + .size = sizeof(msg.payload.u64), + }; + + if (file->fd < 0) + msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; + else { + fdp = &file->fd; + fd_num = 1; + } + + ret = send_vhost_message(dev->slave_req_fd, &msg, fdp, fd_num); + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to send slave message %u (%d)\n", + request, ret); + return ret; + } + + return process_slave_message_reply(dev, &msg); +} + +static int vhost_user_slave_set_vring_notify_area(struct virtio_net *dev, + int index, int fd, + uint64_t offset, + uint64_t size) +{ + int *fdp = NULL; + size_t fd_num = 0; + int ret; + struct VhostUserMsg msg = { + .request.slave = VHOST_USER_SLAVE_VRING_NOTIFY_AREA_MSG, + .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY, + .payload.area = { + .u64 = index & VHOST_USER_VRING_IDX_MASK, + .size = size, + .offset = offset, + }, + .size = sizeof(msg.payload.area), + }; + + if (fd < 0) + msg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK; + else { + fdp = &fd; + fd_num = 1; + } + + ret = send_vhost_message(dev->slave_req_fd, &msg, fdp, fd_num); + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to set vring notify area (%d)\n", ret); + return ret; + } + + return process_slave_message_reply(dev, &msg); +} + +int __rte_experimental +rte_vhost_vfio_accelerator_ctrl(int vid, int enable) +{ + struct virtio_net *dev = get_device(vid); + int groupfd, devicefd, eid, ret = 0; + struct rte_vdpa_eng_driver *drv; + struct vhost_vring_file file; + uint64_t offset, size; + unsigned int i; + + if (!dev) + return -ENODEV; + + eid = dev->eid; + if (eid < 0) + return -EINVAL; + + if (!(dev->features & (1ULL << VIRTIO_F_VERSION_1)) || + !(dev->features & (1ULL << RTE_VHOST_USER_F_PROTOCOL_FEATURES)) || + !(dev->protocol_features & + (1ULL << RTE_VHOST_USER_PROTOCOL_F_VFIO))) + return -ENOTSUP; + + drv = vdpa_engines[eid]->eng_drv; + + RTE_FUNC_PTR_OR_ERR_RET(drv->dev_ops.get_vfio_device_fd, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(drv->dev_ops.get_vfio_group_fd, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(drv->dev_ops.get_notify_area, -ENOTSUP); + + devicefd = drv->dev_ops.get_vfio_device_fd(vid); + if (devicefd < 0) + return -ENOTSUP; + + groupfd = drv->dev_ops.get_vfio_group_fd(vid); + if (groupfd < 0) + return -ENOTSUP; + + if (enable) { + for (i = 0; i < dev->nr_vring * 2; i++) { + file.index = i; + file.fd = groupfd; + + if (drv->dev_ops.get_notify_area(vid, i, &offset, + &size) < 0) { + ret = -ENOTSUP; + goto disable; + } + + if (vhost_user_slave_set_vring_file(dev, + VHOST_USER_SLAVE_VRING_VFIO_GROUP_MSG, + &file) < 0) { + ret = -EFAULT; + goto disable; + } + if (vhost_user_slave_set_vring_notify_area(dev, i, + devicefd, offset, size) < 0) { + ret = -EFAULT; + goto disable; + } + } + } else { +disable: + for (i = 0; i < dev->nr_vring * 2; i++) { + file.index = i; + file.fd = -1; + vhost_user_slave_set_vring_file(dev, + VHOST_USER_SLAVE_VRING_VFIO_GROUP_MSG, + &file); + vhost_user_slave_set_vring_notify_area(dev, i, -1, + 0, 0); + } + } + + return ret; +} diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h index 066e772dd..c74d288d4 100644 --- a/lib/librte_vhost/vhost_user.h +++ b/lib/librte_vhost/vhost_user.h @@ -52,6 +52,8 @@ typedef enum VhostUserRequest { typedef enum VhostUserSlaveRequest { VHOST_USER_SLAVE_NONE = 0, VHOST_USER_SLAVE_IOTLB_MSG = 1, + VHOST_USER_SLAVE_VRING_VFIO_GROUP_MSG = 3, + VHOST_USER_SLAVE_VRING_NOTIFY_AREA_MSG = 4, VHOST_USER_SLAVE_MAX } VhostUserSlaveRequest; @@ -73,6 +75,12 @@ typedef struct VhostUserLog { uint64_t mmap_offset; } VhostUserLog; +typedef struct VhostUserVringArea { + uint64_t u64; + uint64_t size; + uint64_t offset; +} VhostUserVringArea; + typedef struct VhostUserMsg { union { uint32_t master; /* a VhostUserRequest value */ @@ -93,6 +101,7 @@ typedef struct VhostUserMsg { VhostUserMemory memory; VhostUserLog log; struct vhost_iotlb_msg iotlb; + VhostUserVringArea area; } payload; int fds[VHOST_MEMORY_MAX_NREGIONS]; } __attribute((packed)) VhostUserMsg; -- 2.11.0