From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pd0-f171.google.com (mail-pd0-f171.google.com [209.85.192.171]) by dpdk.org (Postfix) with ESMTP id CCB018075 for ; Tue, 16 Dec 2014 04:05:23 +0100 (CET) Received: by mail-pd0-f171.google.com with SMTP id y13so12997350pdi.2 for ; Mon, 15 Dec 2014 19:05:23 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:message-id:date:from:user-agent:mime-version:to :cc:subject:references:in-reply-to:content-type :content-transfer-encoding; bh=wF5DcOqKHEfkP0mUpOR9sKzsltQf35tH7DWD6gyo34M=; b=lAkHJnlweCKU34Upu+LtvGK6BXKn2tKkF1iq9F/WkHhNzPcIzNS8OeuLLaneMqv45l DKhNO6ePaBbv46lYB/qHbYjpqxFj1SRdoqoO0TgOmy0YVtctQcqOonVb8R8j21ghXlX8 vU2qe+DMz/mgrrvTYqs0bJdSW1lYiri/+OcxjpDrkIScbw0yblr6j1UZcZ+8nWvKPong /Q7is7jOk5Kf5UqY5WI8rLJq9qIm1YlCIAR00OYF9Kyacdlj+ZdmLE0OyYn908DNf2uY h0W5bHqvrb/zfpypXA5pxLqw9TvODRlZlTIb7NpNlWho4ln7sC+fie5nfeaUWPoJJQae u9Ww== X-Gm-Message-State: ALoCoQlyerGUytIgd4/odf5sWLJKYQ7Q4iVMS+RMqUmyU6iUuQvRKRDxxbq5whfRbcx3HTeX0Jdj X-Received: by 10.68.57.196 with SMTP id k4mr56416972pbq.76.1418699122975; Mon, 15 Dec 2014 19:05:22 -0800 (PST) Received: from [10.16.129.101] (napt.igel.co.jp. [219.106.231.132]) by mx.google.com with ESMTPSA id lx9sm6580646pdb.91.2014.12.15.19.05.21 for (version=TLSv1 cipher=ECDHE-RSA-RC4-SHA bits=128/128); Mon, 15 Dec 2014 19:05:22 -0800 (PST) Message-ID: <548FA172.5030604@igel.co.jp> Date: Tue, 16 Dec 2014 12:05:22 +0900 From: Tetsuya Mukawa User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:24.0) Gecko/20100101 Thunderbird/24.6.0 MIME-Version: 1.0 To: Huawei Xie , dev@dpdk.org References: <1418247477-13920-1-git-send-email-huawei.xie@intel.com> <1418247477-13920-9-git-send-email-huawei.xie@intel.com> In-Reply-To: <1418247477-13920-9-git-send-email-huawei.xie@intel.com> Content-Type: text/plain; charset=ISO-2022-JP Content-Transfer-Encoding: 7bit Cc: haifeng.lin@intel.com Subject: Re: [dpdk-dev] [PATCH RFC v2 08/12] lib/librte_vhost: vhost-user support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 16 Dec 2014 03:05:24 -0000 (2014/12/11 6:37), Huawei Xie wrote: > vhost-user support > > > Signed-off-by: Huawei Xie > --- > lib/librte_vhost/Makefile | 5 +- > lib/librte_vhost/vhost-net.h | 4 + > lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 9 + > lib/librte_vhost/vhost_user/vhost-net-user.c | 422 ++++++++++++++++++++++++++ > lib/librte_vhost/vhost_user/vhost-net-user.h | 108 +++++++ > lib/librte_vhost/vhost_user/virtio-net-user.c | 199 ++++++++++++ > lib/librte_vhost/vhost_user/virtio-net-user.h | 48 +++ > lib/librte_vhost/virtio-net.c | 16 +- > lib/librte_vhost/virtio-net.h | 43 +++ > 9 files changed, 842 insertions(+), 12 deletions(-) > create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c > create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h > create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c > create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h > create mode 100644 lib/librte_vhost/virtio-net.h > > diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile > index e0d0ef6..b2f14a0 100644 > --- a/lib/librte_vhost/Makefile > +++ b/lib/librte_vhost/Makefile > @@ -34,10 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk > # library name > LIB = librte_vhost.a > > -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse > +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 -D_FILE_OFFSET_BITS=64 -lfuse > LDFLAGS += -lfuse > # all source are stored in SRCS-y > -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c > +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c > +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c > > # install includes > SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h > diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h > index f7e96fd..f9ec40b 100644 > --- a/lib/librte_vhost/vhost-net.h > +++ b/lib/librte_vhost/vhost-net.h > @@ -41,8 +41,12 @@ > > #include > > +#include "rte_virtio_net.h" > + > #define VHOST_MEMORY_MAX_NREGIONS 8 > > +extern struct vhost_net_device_ops const *ops; > + > /* Macros for printing using RTE_LOG */ > #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 > #define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1 > diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c > index edcbc10..8ac3360 100644 > --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c > +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c > @@ -268,6 +268,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx, > struct vhost_memory_region *mem_regions = (void *)(uintptr_t) > ((uint64_t)(uintptr_t)mem_regions_addr + size); > uint64_t base_address = 0, mapped_address, mapped_size; > + struct virtio_dev *dev; > > for (idx = 0; idx < nregions; idx++) { > regions[idx].guest_phys_address = > @@ -335,6 +336,14 @@ cuse_set_mem_table(struct vhost_device_ctx ctx, > regions[idx].guest_phys_address; > } > > + dev = get_device(ctx); > + if (dev && dev->mem && dev->mmaped_address) { > + munmap((void *)(uintptr_t)dev->mmaped_address, > + (size_t)dev->mmaped_size); > + free(dev->mem); > + dev->mem = NULL; > + } > + > ops->set_mem_table(ctx, ®ions[0], valid_regions); > return 0; > } > diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c > new file mode 100644 > index 0000000..841d7e6 > --- /dev/null > +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c > @@ -0,0 +1,422 @@ > +/*- > + * BSD LICENSE > + * > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * * Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in > + * the documentation and/or other materials provided with the > + * distribution. > + * * Neither the name of Intel Corporation nor the names of its > + * contributors may be used to endorse or promote products derived > + * from this software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +#include > +#include > + > +#include "fd_man.h" > +#include "vhost-net-user.h" > +#include "vhost-net.h" > +#include "virtio-net-user.h" > + > +static void vserver_new_vq_conn(int fd, uint64_t data); > +static void vserver_message_handler(int fd, uint64_t dat); > +struct vhost_net_device_ops const *ops; > + > +static struct vhost_server *g_vhost_server; > + > +static const char *vhost_message_str[VHOST_USER_MAX] = { > + [VHOST_USER_NONE] = "VHOST_USER_NONE", > + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", > + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", > + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", > + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", > + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", > + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", > + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", > + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", > + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", > + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", > + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", > + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", > + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", > + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR" > +}; > + > +/** > + * Create a unix domain socket, bind to path and listen for connection. > + * @return > + * socket fd or -1 on failure > + */ > +static int > +uds_socket(const char *path) > +{ > + struct sockaddr_un un; > + int sockfd; > + int ret; > + > + if (path == NULL) > + return -1; > + > + sockfd = socket(AF_UNIX, SOCK_STREAM, 0); > + if (sockfd < 0) > + return -1; > + RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd); > + > + memset(&un, 0, sizeof(un)); > + un.sun_family = AF_UNIX; > + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); > + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un)); > + if (ret == -1) > + goto err; > + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path); > + > + ret = listen(sockfd, 1); > + if (ret == -1) > + goto err; > + > + return sockfd; > + > +err: > + close(sockfd); > + return -1; > +} > + > +/* return bytes# of read on success or negative val on failure. */ > +static int > +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) > +{ > + struct iovec iov; > + struct msghdr msgh = { 0 }; > + size_t fdsize = fd_num * sizeof(int); > + char control[CMSG_SPACE(fdsize)]; > + struct cmsghdr *cmsg; > + int ret; > + > + iov.iov_base = buf; > + iov.iov_len = buflen; > + > + msgh.msg_iov = &iov; > + msgh.msg_iovlen = 1; > + msgh.msg_control = control; > + msgh.msg_controllen = sizeof(control); > + > + ret = recvmsg(sockfd, &msgh, 0); > + if (ret <= 0) { > + RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n"); > + return ret; > + } > + > + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { > + RTE_LOG(ERR, VHOST_CONFIG, "Truncted msg\n"); > + return -1; > + } > + > + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; > + cmsg = CMSG_NXTHDR(&msgh, cmsg)) { > + if ((cmsg->cmsg_level == SOL_SOCKET) && > + (cmsg->cmsg_type == SCM_RIGHTS)) { > + memcpy(fds, CMSG_DATA(cmsg), fdsize); > + break; > + } > + } > + > + return ret; > +} > + > +/* return bytes# of read on success or negative val on failure. */ > +static int > +read_vhost_message(int sockfd, struct VhostUserMsg *msg) > +{ > + int ret; > + > + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, > + msg->fds, VHOST_MEMORY_MAX_NREGIONS); > + if (ret <= 0) > + return ret; > + > + if (msg && msg->size) { > + if (msg->size > sizeof(msg->payload)) { > + RTE_LOG(ERR, VHOST_CONFIG, > + "invalid msg size: %d\n", msg->size); > + return -1; > + } > + ret = read(sockfd, &msg->payload, msg->size); > + if (ret <= 0) > + return ret; > + if (ret != (int)msg->size) { > + RTE_LOG(ERR, VHOST_CONFIG, > + "read control message failed\n"); > + return -1; > + } > + } > + > + return ret; > +} > + > +static int > +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) > +{ > + > + struct iovec iov; > + struct msghdr msgh = { 0 }; > + size_t fdsize = fd_num * sizeof(int); > + char control[CMSG_SPACE(fdsize)]; > + struct cmsghdr *cmsg; > + int ret; > + > + iov.iov_base = buf; > + iov.iov_len = buflen; > + > + msgh.msg_iov = &iov; > + msgh.msg_iovlen = 1; > + > + if (fds && fd_num > 0) { > + msgh.msg_control = control; > + msgh.msg_controllen = sizeof(control); > + cmsg = CMSG_FIRSTHDR(&msgh); > + cmsg->cmsg_len = CMSG_LEN(fdsize); > + cmsg->cmsg_level = SOL_SOCKET; > + cmsg->cmsg_type = SCM_RIGHTS; > + memcpy(CMSG_DATA(cmsg), fds, fdsize); > + } else { > + msgh.msg_control = NULL; > + msgh.msg_controllen = 0; > + } > + > + do { > + ret = sendmsg(sockfd, &msgh, 0); > + } while (ret < 0 && errno == EINTR); > + > + if (ret < 0) { > + RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n"); > + return ret; > + } > + > + return ret; > +} > + > +static int > +send_vhost_message(int sockfd, struct VhostUserMsg *msg) > +{ > + int ret; > + > + if (!msg) > + return 0; > + > + msg->flags &= ~VHOST_USER_VERSION_MASK; > + msg->flags |= VHOST_USER_VERSION; > + sg->flags |= VHOST_USER_REPLY_MASK; > + > + ret = send_fd_message(sockfd, (char *)msg, > + VHOST_USER_HDR_SIZE + msg->size, NULL, 0); > + > + return ret; > +} > + > +/* call back when there is new virtio connection. */ > +static void > +vserver_new_vq_conn(int fd, uint64_t dat) > +{ > + struct vhost_server *vserver = (void *)(uintptr_t)dat; > + int conn_fd; > + uint32_t fh; > + struct vhost_device_ctx vdev_ctx = { 0 }; > + > + conn_fd = accept(fd, NULL, NULL); > + RTE_LOG(INFO, VHOST_CONFIG, > + "new virtio connection is %d\n", conn_fd); > + if (conn_fd < 0) > + return; > + > + fh = ops->new_device(vdev_ctx); > + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh); > + > + fdset_add(&vserver->fdset, > + conn_fd, vserver_message_handler, NULL, fh); > +} > + > +/* callback when there is message on the connfd */ > +static void > +vserver_message_handler(int connfd, uint64_t dat) > +{ > + struct vhost_device_ctx ctx; > + uint32_t fh = (uint32_t)dat; > + struct VhostUserMsg msg; > + uint64_t features; > + int ret; > + > + ctx.fh = fh; > + ret = read_vhost_message(connfd, &msg); > + if (ret < 0) { > + RTE_LOG(ERR, VHOST_CONFIG, > + "vhost read message failed\n"); > + > + /*TODO: cleanup */ > + close(connfd); > + fdset_del(&g_vhost_server->fdset, connfd); > + ops->destroy_device(ctx); > + > + return; > + } else if (ret == 0) { > + RTE_LOG(INFO, VHOST_CONFIG, > + "vhost peer closed\n"); > + > + /*TODO: cleanup */ > + close(connfd); > + fdset_del(&g_vhost_server->fdset, connfd); > + ops->destroy_device(ctx); > + > + return; > + } > + if (msg.request > VHOST_USER_MAX) { > + RTE_LOG(ERR, VHOST_CONFIG, > + "vhost read incorrect message\n"); > + > + /*TODO: cleanup */ > + close(connfd); > + fdset_del(&g_vhost_server->fdset, connfd); > + > + return; > + } > + > + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n", > + vhost_message_str[msg.request]); > + switch (msg.request) { > + case VHOST_USER_GET_FEATURES: > + ret = ops->get_features(ctx, &features); > + msg.payload.u64 = ret; > + msg.size = sizeof(msg.payload.u64); > + send_vhost_message(connfd, &msg); > + break; > + case VHOST_USER_SET_FEATURES: > + ops->set_features(ctx, &features); > + break; > + > + case VHOST_USER_SET_OWNER: > + ops->set_owner(ctx); > + break; > + case VHOST_USER_RESET_OWNER: > + ops->reset_owner(ctx); > + break; > + > + case VHOST_USER_SET_MEM_TABLE: > + user_set_mem_table(ctx, &msg); > + break; > + > + case VHOST_USER_SET_LOG_BASE: > + case VHOST_USER_SET_LOG_FD: > + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n"); > + break; > + > + case VHOST_USER_SET_VRING_NUM: > + ops->set_vring_num(ctx, &msg.payload.state); > + break; > + case VHOST_USER_SET_VRING_ADDR: > + ops->set_vring_addr(ctx, &msg.payload.addr); > + break; > + case VHOST_USER_SET_VRING_BASE: > + ops->set_vring_base(ctx, &msg.payload.state); > + break; > + > + case VHOST_USER_GET_VRING_BASE: > + ret = user_get_vring_base(ctx, &msg.payload.state); > + msg.size = sizeof(msg.payload.state); > + send_vhost_message(connfd, &msg); > + break; > + > + case VHOST_USER_SET_VRING_KICK: > + user_set_vring_kick(ctx, &msg); > + break; > + case VHOST_USER_SET_VRING_CALL: > + user_set_vring_call(ctx, &msg); > + break; > + > + case VHOST_USER_SET_VRING_ERR: > + RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n"); > + break; > + > + default: > + break; > + > + } > +} > + > + > +/** > + * Creates and initialise the vhost server. > + */ > +int > +rte_vhost_driver_register(const char *path) > +{ > + > + struct vhost_server *vserver; > + > + if (g_vhost_server != NULL) > + return -1; > + > + vserver = calloc(sizeof(struct vhost_server), 1); > + if (vserver == NULL) > + return -1; > + > + fdset_init(&vserver->fdset); > + > + unlink(path); > + > + vserver->listenfd = uds_socket(path); > + if (vserver->listenfd < 0) { > + free(vserver); > + return -1; > + } > + vserver->path = path; > + > + fdset_add(&vserver->fdset, vserver->listenfd, > + vserver_new_vq_conn, NULL, > + (uint64_t)(uintptr_t)vserver); > + > + ops = get_virtio_net_callbacks(); > + > + g_vhost_server = vserver; > + > + return 0; > +} > + > + > +int > +rte_vhost_driver_session_start(void) > +{ > + fdset_event_dispatch(&g_vhost_server->fdset); > + return 0; > +} > + > diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h > new file mode 100644 > index 0000000..c138844 > --- /dev/null > +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h > @@ -0,0 +1,108 @@ > +/*- > + * BSD LICENSE > + * > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * * Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in > + * the documentation and/or other materials provided with the > + * distribution. > + * * Neither the name of Intel Corporation nor the names of its > + * contributors may be used to endorse or promote products derived > + * from this software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#ifndef _VHOST_NET_USER_H > +#define _VHOST_NET_USER_H > + > +#include > +#include > + > +#include "fd_man.h" > + > +struct vhost_server { > + const char *path; /**< The path the uds is bind to. */ > + int listenfd; /**< The listener sockfd. */ > + struct fdset fdset; /**< The fd list this vhost server manages. */ > +}; > + > +/* refer to hw/virtio/vhost-user.c */ > + > +#define VHOST_MEMORY_MAX_NREGIONS 8 > + > +typedef enum VhostUserRequest { > + VHOST_USER_NONE = 0, > + VHOST_USER_GET_FEATURES = 1, > + VHOST_USER_SET_FEATURES = 2, > + VHOST_USER_SET_OWNER = 3, > + VHOST_USER_RESET_OWNER = 4, > + VHOST_USER_SET_MEM_TABLE = 5, > + VHOST_USER_SET_LOG_BASE = 6, > + VHOST_USER_SET_LOG_FD = 7, > + VHOST_USER_SET_VRING_NUM = 8, > + VHOST_USER_SET_VRING_ADDR = 9, > + VHOST_USER_SET_VRING_BASE = 10, > + VHOST_USER_GET_VRING_BASE = 11, > + VHOST_USER_SET_VRING_KICK = 12, > + VHOST_USER_SET_VRING_CALL = 13, > + VHOST_USER_SET_VRING_ERR = 14, > + VHOST_USER_MAX > +} VhostUserRequest; > + > +typedef struct VhostUserMemoryRegion { > + uint64_t guest_phys_addr; > + uint64_t memory_size; > + uint64_t userspace_addr; > + uint64_t mmap_offset; > +} VhostUserMemoryRegion; > + > +typedef struct VhostUserMemory { > + uint32_t nregions; > + uint32_t padding; > + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; > +} VhostUserMemory; > + > +typedef struct VhostUserMsg { > + VhostUserRequest request; > + > +#define VHOST_USER_VERSION_MASK (0x3) > +#define VHOST_USER_REPLY_MASK (0x1 << 2) > + uint32_t flags; > + uint32_t size; /* the following payload size */ > + union { > +#define VHOST_USER_VRING_IDX_MASK (0xff) > +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) > + uint64_t u64; > + struct vhost_vring_state state; > + struct vhost_vring_addr addr; > + VhostUserMemory memory; > + } payload; > + int fds[VHOST_MEMORY_MAX_NREGIONS]; > +} __attribute((packed)) VhostUserMsg; > + > +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64) > + > +/* The version of the protocol we support */ > +#define VHOST_USER_VERSION (0x1) > + > +/*****************************************************************************/ > +#endif > diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c > new file mode 100644 > index 0000000..ad59fcc > --- /dev/null > +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c > @@ -0,0 +1,199 @@ > +/*- > + * BSD LICENSE > + * > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * * Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in > + * the documentation and/or other materials provided with the > + * distribution. > + * * Neither the name of Intel Corporation nor the names of its > + * contributors may be used to endorse or promote products derived > + * from this software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#include > +#include > +#include > +#include > +#include > + > +#include > + > +#include "virtio-net.h" > +#include "virtio-net-user.h" > +#include "vhost-net-user.h" > +#include "vhost-net.h" > + > +int > +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) > +{ > + unsigned int idx; > + struct VhostUserMemory memory = pmsg->payload.memory; > + struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS]; > + uint64_t mapped_address, base_address = 0; > + > + for (idx = 0; idx < memory.nregions; idx++) { > + if (memory.regions[idx].guest_phys_addr == 0) > + base_address = memory.regions[idx].userspace_addr; > + } > + if (base_address == 0) { > + RTE_LOG(ERR, VHOST_CONFIG, > + "couldn't find the mem region whose GPA is 0.\n"); > + return -1; > + } > + > + for (idx = 0; idx < memory.nregions; idx++) { > + regions[idx].guest_phys_address = > + memory.regions[idx].guest_phys_addr; > + regions[idx].guest_phys_address_end = > + memory.regions[idx].guest_phys_addr + > + memory.regions[idx].memory_size; > + regions[idx].memory_size = memory.regions[idx].memory_size; > + regions[idx].userspace_address = > + memory.regions[idx].userspace_addr; > + > + /* This is ugly */ > + mapped_address = (uint64_t)(uintptr_t)mmap(NULL, > + regions[idx].memory_size + > + memory.regions[idx].mmap_offset, > + PROT_READ | PROT_WRITE, MAP_SHARED, > + pmsg->fds[idx], > + 0); > + RTE_LOG(INFO, VHOST_CONFIG, > + "mapped region %d to %p\n", > + idx, (void *)mapped_address); > + > + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) { > + RTE_LOG(ERR, VHOST_CONFIG, > + "mmap qemu guest failed.\n"); > + return -1; > + } > + > + mapped_address += memory.regions[idx].mmap_offset; > + > + regions[idx].address_offset = mapped_address - > + regions[idx].guest_phys_address; > + LOG_DEBUG(VHOST_CONFIG, > + "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n", > + idx, > + (void *)(uintptr_t)regions[idx].guest_phys_address, > + (void *)(uintptr_t)regions[idx].userspace_address, > + regions[idx].memory_size); > + } > + ops->set_mem_table(ctx, regions, memory.nregions); > + return 0; > +} > + > + > +static int > +virtio_is_ready(struct virtio_net *dev) > +{ > + struct vhost_virtqueue *rvq, *tvq; > + > + /* mq support in future.*/ > + rvq = dev->virtqueue[VIRTIO_RXQ]; > + tvq = dev->virtqueue[VIRTIO_TXQ]; > + if (rvq && tvq && rvq->desc && tvq->desc && > + (rvq->kickfd != (eventfd_t)-1) && > + (rvq->callfd != (eventfd_t)-1) && > + (tvq->kickfd != (eventfd_t)-1) && > + (tvq->callfd != (eventfd_t)-1)) { > + RTE_LOG(INFO, VHOST_CONFIG, > + "virtio is now ready for processing.\n"); > + return 1; > + } > + RTE_LOG(INFO, VHOST_CONFIG, > + "virtio isn't ready for processing.\n"); > + return 0; > +} > + > +void > +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) > +{ > + struct vhost_vring_file file; > + > + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; > + file.fd = pmsg->fds[0]; > + RTE_LOG(INFO, VHOST_CONFIG, > + "vring call idx:%d file:%d\n", file.index, file.fd); > + ops->set_vring_call(ctx, &file); > +} > + > + > +/* > + * In vhost-user, when we receive kick message, will test whether virtio > + * device is ready for packet processing. > + */ > +void > +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) > +{ > + struct vhost_vring_file file; > + struct virtio_net *dev = get_device(ctx); > + > + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; > + file.fd = pmsg->fds[0]; > + RTE_LOG(INFO, VHOST_CONFIG, > + "vring kick idx:%d file:%d\n", file.index, file.fd); > + ops->set_vring_kick(ctx, &file); > + > + if (virtio_is_ready(dev) && > + !(dev->flags & VIRTIO_DEV_RUNNING)) > + notify_ops->new_device(dev); > + > +} > + > +/* > + * when virtio is stopped, qemu will send us the GET_VRING_BASE message. > + */ > +int > +user_get_vring_base(struct vhost_device_ctx ctx, > + struct vhost_vring_state *state) > +{ > + struct virtio_net *dev = get_device(ctx); > + > + /* We have to stop the queue (virtio) if it is running. */ > + if (dev->flags & VIRTIO_DEV_RUNNING) > + notify_ops->destroy_device(dev); I have an one concern about finalization of vrings. Can vhost-backend stop accessing RX/TX to the vring before replying to this message? QEMU sends this message when virtio-net device is finalized by virtio-net driver on the guest. After finalization, memories used by the vring will be freed by virtio-net driver, because these memories are allocated by virtio-net driver. Because of this, I guess vhost-backend must stop accessing to vring before replying to this message. I am not sure what is a good way to stop accessing. One idea is adding a condition checking when rte_vhost_dequeue_burst() and rte_vhost_enqueue_burst() is called. Anyway we probably need to wait for stopping access before replying. Thanks, Tetsuya > + > + /* Here we are safe to get the last used index */ > + ops->get_vring_base(ctx, state->index, state); > + > + RTE_LOG(INFO, VHOST_CONFIG, > + "vring base idx:%d file:%d\n", state->index, state->num); > + /* > + * Based on current qemu vhost-user implementation, this message is > + * sent and only sent in vhost_vring_stop. > + * TODO: cleanup the vring, it isn't usable since here. > + */ > + if (dev->virtqueue[VIRTIO_RXQ]->callfd) { > + close(dev->virtqueue[VIRTIO_RXQ]->callfd); > + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1; > + } > + if (dev->virtqueue[VIRTIO_TXQ]->callfd) { > + close(dev->virtqueue[VIRTIO_TXQ]->callfd); > + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1; > + } > + > + return 0; > + > +} > diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h > new file mode 100644 > index 0000000..0f6a75a > --- /dev/null > +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h > @@ -0,0 +1,48 @@ > +/*- > + * BSD LICENSE > + * > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * * Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in > + * the documentation and/or other materials provided with the > + * distribution. > + * * Neither the name of Intel Corporation nor the names of its > + * contributors may be used to endorse or promote products derived > + * from this software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#ifndef _VIRTIO_NET_USER_H > +#define _VIRTIO_NET_USER_H > + > +#include "vhost-net.h" > +#include "vhost-net-user.h" > + > +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *); > + > +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *); > + > +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *); > + > +int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *); > + > +#endif > diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c > index f81e459..0b49f1b 100644 > --- a/lib/librte_vhost/virtio-net.c > +++ b/lib/librte_vhost/virtio-net.c > @@ -46,6 +46,7 @@ > #include > > #include "vhost-net.h" > +#include "virtio-net.h" > > /* > * Device linked list structure for configuration. > @@ -56,7 +57,7 @@ struct virtio_net_config_ll { > }; > > /* device ops to add/remove device to/from data core. */ > -static struct virtio_net_device_ops const *notify_ops; > +struct virtio_net_device_ops const *notify_ops; > /* root address of the linked list of managed virtio devices */ > static struct virtio_net_config_ll *ll_root; > > @@ -83,8 +84,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va) > if ((qemu_va >= region->userspace_address) && > (qemu_va <= region->userspace_address + > region->memory_size)) { > - vhost_va = dev->mem->mapped_address + qemu_va - > - dev->mem->base_address; > + vhost_va = qemu_va + region->guest_phys_address + > + region->address_offset - > + region->userspace_address; > break; > } > } > @@ -114,7 +116,7 @@ get_config_ll_entry(struct vhost_device_ctx ctx) > * Searches the configuration core linked list and > * retrieves the device if it exists. > */ > -static struct virtio_net * > +struct virtio_net * > get_device(struct vhost_device_ctx ctx) > { > struct virtio_net_config_ll *ll_dev; > @@ -450,12 +452,6 @@ set_mem_table(struct vhost_device_ctx ctx, > if (dev == NULL) > return -1; > > - if (dev->mem) { > - munmap((void *)(uintptr_t)dev->mem->mapped_address, > - (size_t)dev->mem->mapped_size); > - free(dev->mem); > - } > - > /* Malloc the memory structure depending on the number of regions. */ > mem = calloc(1, sizeof(struct virtio_memory) + > (sizeof(struct virtio_memory_regions) * nregions)); > diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h > new file mode 100644 > index 0000000..da4ade0 > --- /dev/null > +++ b/lib/librte_vhost/virtio-net.h > @@ -0,0 +1,43 @@ > +/*- > + * BSD LICENSE > + * > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * * Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in > + * the documentation and/or other materials provided with the > + * distribution. > + * * Neither the name of Intel Corporation nor the names of its > + * contributors may be used to endorse or promote products derived > + * from this software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#ifndef _VIRTIO_NET_H > +#define _VIRTIO_NET_H > + > +#include "vhost-net.h" > +#include "rte_virtio_net.h" > + > +struct virtio_net_device_ops const *notify_ops; > +struct virtio_net * get_device(struct vhost_device_ctx ctx); > + > +#endif