* [dpdk-dev] [PATCH RFC] lib/librte_vhost: vhost-user
@ 2014-11-15 1:14 Huawei Xie
2014-11-17 6:04 ` Tetsuya Mukawa
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Huawei Xie @ 2014-11-15 1:14 UTC (permalink / raw)
To: dev
implement socket server
fd event dispatch mechanism
vhost sock message handling
memory map for each region
VHOST_USER_SET_VRING_KICK_FD as the indicator that vring is available
VHOST_USER_GET_VRING_BASE as the message that vring should be released
The message flow between vhost-user and vhost-cuse is kindof different,
which makes virtio-net common message handler layer difficult and complicated to handle
both cases in new_device/destroy_device/memory map/resource cleanup.
Will only leave the most common messag handling in virtio-net, and move the
control logic to cuse/fuse layer.
Signed-off-by: Huawei Xie <huawei.xie@intel.com>
---
lib/librte_vhost/Makefile | 14 +-
lib/librte_vhost/eventfd_link/eventfd_link.c | 27 +-
lib/librte_vhost/eventfd_link/eventfd_link.h | 48 +-
lib/librte_vhost/libvirt/qemu-wrap.py | 367 ---------------
lib/librte_vhost/rte_virtio_net.h | 106 ++---
lib/librte_vhost/vhost-cuse/vhost-net-cdev.c | 436 ++++++++++++++++++
lib/librte_vhost/vhost-cuse/virtio-net-cdev.c | 314 +++++++++++++
lib/librte_vhost/vhost-cuse/virtio-net-cdev.h | 43 ++
lib/librte_vhost/vhost-net-cdev.c | 389 ----------------
lib/librte_vhost/vhost-net-cdev.h | 113 -----
lib/librte_vhost/vhost-user/fd_man.c | 158 +++++++
lib/librte_vhost/vhost-user/fd_man.h | 31 ++
lib/librte_vhost/vhost-user/vhost-net-user.c | 417 +++++++++++++++++
lib/librte_vhost/vhost-user/vhost-net-user.h | 74 +++
lib/librte_vhost/vhost-user/virtio-net-user.c | 208 +++++++++
lib/librte_vhost/vhost-user/virtio-net-user.h | 11 +
lib/librte_vhost/vhost_rxtx.c | 625 ++++----------------------
lib/librte_vhost/virtio-net.c | 450 ++++---------------
18 files changed, 1939 insertions(+), 1892 deletions(-)
delete mode 100755 lib/librte_vhost/libvirt/qemu-wrap.py
create mode 100644 lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
delete mode 100644 lib/librte_vhost/vhost-net-cdev.c
delete mode 100644 lib/librte_vhost/vhost-net-cdev.h
create mode 100644 lib/librte_vhost/vhost-user/fd_man.c
create mode 100644 lib/librte_vhost/vhost-user/fd_man.h
create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.c
create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.h
create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.c
create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.h
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index c008d64..cb4e172 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -34,17 +34,19 @@ include $(RTE_SDK)/mk/rte.vars.mk
# library name
LIB = librte_vhost.a
-CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I. -I vhost-user -I vhost-cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
LDFLAGS += -lfuse
# all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c
+#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-cuse/vhost-net-cdev.c vhost-cuse/virtio-net-cdev.c
+
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-user/fd_man.c vhost-user/vhost-net-user.c vhost-user/virtio-net-user.c
+
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += virtio-net.c vhost_rxtx.c
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
-# dependencies
-DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal
-DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_ether
-DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mbuf
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf
include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.c b/lib/librte_vhost/eventfd_link/eventfd_link.c
index 7755dd6..4c9b628 100644
--- a/lib/librte_vhost/eventfd_link/eventfd_link.c
+++ b/lib/librte_vhost/eventfd_link/eventfd_link.c
@@ -13,8 +13,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
* The full GNU General Public License is included in this distribution
* in the file called LICENSE.GPL.
*
@@ -78,8 +77,7 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
switch (ioctl) {
case EVENTFD_COPY:
- if (copy_from_user(&eventfd_copy, argp,
- sizeof(struct eventfd_copy)))
+ if (copy_from_user(&eventfd_copy, argp, sizeof(struct eventfd_copy)))
return -EFAULT;
/*
@@ -88,28 +86,28 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
task_target =
pid_task(find_vpid(eventfd_copy.target_pid), PIDTYPE_PID);
if (task_target == NULL) {
- pr_debug("Failed to get mem ctx for target pid\n");
+ printk(KERN_DEBUG "Failed to get mem ctx for target pid\n");
return -EFAULT;
}
files = get_files_struct(current);
if (files == NULL) {
- pr_debug("Failed to get files struct\n");
+ printk(KERN_DEBUG "Failed to get files struct\n");
return -EFAULT;
}
rcu_read_lock();
file = fcheck_files(files, eventfd_copy.source_fd);
if (file) {
- if (file->f_mode & FMODE_PATH ||
- !atomic_long_inc_not_zero(&file->f_count))
+ if (file->f_mode & FMODE_PATH
+ || !atomic_long_inc_not_zero(&file->f_count))
file = NULL;
}
rcu_read_unlock();
put_files_struct(files);
if (file == NULL) {
- pr_debug("Failed to get file from source pid\n");
+ printk(KERN_DEBUG "Failed to get file from source pid\n");
return 0;
}
@@ -128,25 +126,26 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
files = get_files_struct(task_target);
if (files == NULL) {
- pr_debug("Failed to get files struct\n");
+ printk(KERN_DEBUG "Failed to get files struct\n");
return -EFAULT;
}
rcu_read_lock();
file = fcheck_files(files, eventfd_copy.target_fd);
if (file) {
- if (file->f_mode & FMODE_PATH ||
- !atomic_long_inc_not_zero(&file->f_count))
- file = NULL;
+ if (file->f_mode & FMODE_PATH
+ || !atomic_long_inc_not_zero(&file->f_count))
+ file = NULL;
}
rcu_read_unlock();
put_files_struct(files);
if (file == NULL) {
- pr_debug("Failed to get file from target pid\n");
+ printk(KERN_DEBUG "Failed to get file from target pid\n");
return 0;
}
+
/*
* Install the file struct from the target process into the
* file desciptor of the source process,
diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.h b/lib/librte_vhost/eventfd_link/eventfd_link.h
index ea619ec..38052e2 100644
--- a/lib/librte_vhost/eventfd_link/eventfd_link.h
+++ b/lib/librte_vhost/eventfd_link/eventfd_link.h
@@ -1,7 +1,4 @@
/*-
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
* GPL LICENSE SUMMARY
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
@@ -16,61 +13,28 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
* The full GNU General Public License is included in this distribution
* in the file called LICENSE.GPL.
*
* Contact Information:
* Intel Corporation
- *
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#ifndef _EVENTFD_LINK_H_
#define _EVENTFD_LINK_H_
/*
- * ioctl to copy an fd entry in calling process to an fd in a target process
+ * ioctl to copy an fd entry in calling process to an fd in a target process
*/
#define EVENTFD_COPY 1
/*
- * arguements for the EVENTFD_COPY ioctl
+ * arguements for the EVENTFD_COPY ioctl
*/
struct eventfd_copy {
- unsigned target_fd; /* fd in the target pid */
- unsigned source_fd; /* fd in the calling pid */
- pid_t target_pid; /* pid of the target pid */
+ unsigned target_fd; /**< fd in the target pid */
+ unsigned source_fd; /**< fd in the calling pid */
+ pid_t target_pid; /**< pid of the target pid */
};
#endif /* _EVENTFD_LINK_H_ */
diff --git a/lib/librte_vhost/libvirt/qemu-wrap.py b/lib/librte_vhost/libvirt/qemu-wrap.py
deleted file mode 100755
index e2d68a0..0000000
--- a/lib/librte_vhost/libvirt/qemu-wrap.py
+++ /dev/null
@@ -1,367 +0,0 @@
-#!/usr/bin/python
-#/*
-# * BSD LICENSE
-# *
-# * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-# * All rights reserved.
-# *
-# * Redistribution and use in source and binary forms, with or without
-# * modification, are permitted provided that the following conditions
-# * are met:
-# *
-# * * Redistributions of source code must retain the above copyright
-# * notice, this list of conditions and the following disclaimer.
-# * * Redistributions in binary form must reproduce the above copyright
-# * notice, this list of conditions and the following disclaimer in
-# * the documentation and/or other materials provided with the
-# * distribution.
-# * * Neither the name of Intel Corporation nor the names of its
-# * contributors may be used to endorse or promote products derived
-# * from this software without specific prior written permission.
-# *
-# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# */
-
-#####################################################################
-# This script is designed to modify the call to the QEMU emulator
-# to support userspace vhost when starting a guest machine through
-# libvirt with vhost enabled. The steps to enable this are as follows
-# and should be run as root:
-#
-# 1. Place this script in a libvirtd's binary search PATH ($PATH)
-# A good location would be in the same directory that the QEMU
-# binary is located
-#
-# 2. Ensure that the script has the same owner/group and file
-# permissions as the QEMU binary
-#
-# 3. Update the VM xml file using "virsh edit VM.xml"
-#
-# 3.a) Set the VM to use the launch script
-#
-# Set the emulator path contained in the
-# <emulator><emulator/> tags
-#
-# e.g replace <emulator>/usr/bin/qemu-kvm<emulator/>
-# with <emulator>/usr/bin/qemu-wrap.py<emulator/>
-#
-# 3.b) Set the VM's device's to use vhost-net offload
-#
-# <interface type="network">
-# <model type="virtio"/>
-# <driver name="vhost"/>
-# <interface/>
-#
-# 4. Enable libvirt to access our userpace device file by adding it to
-# controllers cgroup for libvirtd using the following steps
-#
-# 4.a) In /etc/libvirt/qemu.conf add/edit the following lines:
-# 1) cgroup_controllers = [ ... "devices", ... ]
-# 2) clear_emulator_capabilities = 0
-# 3) user = "root"
-# 4) group = "root"
-# 5) cgroup_device_acl = [
-# "/dev/null", "/dev/full", "/dev/zero",
-# "/dev/random", "/dev/urandom",
-# "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
-# "/dev/rtc", "/dev/hpet", "/dev/net/tun",
-# "/dev/<devbase-name>-<index>",
-# ]
-#
-# 4.b) Disable SELinux or set to permissive mode
-#
-# 4.c) Mount cgroup device controller
-# "mkdir /dev/cgroup"
-# "mount -t cgroup none /dev/cgroup -o devices"
-#
-# 4.d) Set hugetlbfs_mount variable - ( Optional )
-# VMs using userspace vhost must use hugepage backed
-# memory. This can be enabled in the libvirt XML
-# config by adding a memory backing section to the
-# XML config e.g.
-# <memoryBacking>
-# <hugepages/>
-# </memoryBacking>
-# This memory backing section should be added after the
-# <memory> and <currentMemory> sections. This will add
-# flags "-mem-prealloc -mem-path <path>" to the QEMU
-# command line. The hugetlbfs_mount variable can be used
-# to override the default <path> passed through by libvirt.
-#
-# if "-mem-prealloc" or "-mem-path <path>" are not passed
-# through and a vhost device is detected then these options will
-# be automatically added by this script. This script will detect
-# the system hugetlbfs mount point to be used for <path>. The
-# default <path> for this script can be overidden by the
-# hugetlbfs_dir variable in the configuration section of this script.
-#
-#
-# 4.e) Restart the libvirtd system process
-# e.g. on Fedora "systemctl restart libvirtd.service"
-#
-#
-# 4.f) Edit the Configuration Parameters section of this script
-# to point to the correct emulator location and set any
-# addition options
-#
-# The script modifies the libvirtd Qemu call by modifying/adding
-# options based on the configuration parameters below.
-# NOTE:
-# emul_path and us_vhost_path must be set
-# All other parameters are optional
-#####################################################################
-
-
-#############################################
-# Configuration Parameters
-#############################################
-#Path to QEMU binary
-emul_path = "/usr/local/bin/qemu-system-x86_64"
-
-#Path to userspace vhost device file
-# This filename should match the --dev-basename --dev-index parameters of
-# the command used to launch the userspace vhost sample application e.g.
-# if the sample app lauch command is:
-# ./build/vhost-switch ..... --dev-basename usvhost --dev-index 1
-# then this variable should be set to:
-# us_vhost_path = "/dev/usvhost-1"
-us_vhost_path = "/dev/usvhost-1"
-
-#List of additional user defined emulation options. These options will
-#be added to all Qemu calls
-emul_opts_user = []
-
-#List of additional user defined emulation options for vhost only.
-#These options will only be added to vhost enabled guests
-emul_opts_user_vhost = []
-
-#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs
-# Set this variable to one to enable this option for all VMs
-use_huge_all = 0
-
-#Instead of autodetecting, override the hugetlbfs directory by setting
-#this variable
-hugetlbfs_dir = ""
-
-#############################################
-
-
-#############################################
-# ****** Do Not Modify Below this Line ******
-#############################################
-
-import sys, os, subprocess
-
-
-#List of open userspace vhost file descriptors
-fd_list = []
-
-#additional virtio device flags when using userspace vhost
-vhost_flags = [ "csum=off",
- "gso=off",
- "guest_tso4=off",
- "guest_tso6=off",
- "guest_ecn=off"
- ]
-
-
-#############################################
-# Find the system hugefile mount point.
-# Note:
-# if multiple hugetlbfs mount points exist
-# then the first one found will be used
-#############################################
-def find_huge_mount():
-
- if (len(hugetlbfs_dir)):
- return hugetlbfs_dir
-
- huge_mount = ""
-
- if (os.access("/proc/mounts", os.F_OK)):
- f = open("/proc/mounts", "r")
- line = f.readline()
- while line:
- line_split = line.split(" ")
- if line_split[2] == 'hugetlbfs':
- huge_mount = line_split[1]
- break
- line = f.readline()
- else:
- print "/proc/mounts not found"
- exit (1)
-
- f.close
- if len(huge_mount) == 0:
- print "Failed to find hugetlbfs mount point"
- exit (1)
-
- return huge_mount
-
-
-#############################################
-# Get a userspace Vhost file descriptor
-#############################################
-def get_vhost_fd():
-
- if (os.access(us_vhost_path, os.F_OK)):
- fd = os.open( us_vhost_path, os.O_RDWR)
- else:
- print ("US-Vhost file %s not found" %us_vhost_path)
- exit (1)
-
- return fd
-
-
-#############################################
-# Check for vhostfd. if found then replace
-# with our own vhost fd and append any vhost
-# flags onto the end
-#############################################
-def modify_netdev_arg(arg):
-
- global fd_list
- vhost_in_use = 0
- s = ''
- new_opts = []
- netdev_opts = arg.split(",")
-
- for opt in netdev_opts:
- #check if vhost is used
- if "vhost" == opt[:5]:
- vhost_in_use = 1
- else:
- new_opts.append(opt)
-
- #if using vhost append vhost options
- if vhost_in_use == 1:
- #append vhost on option
- new_opts.append('vhost=on')
- #append vhostfd ption
- new_fd = get_vhost_fd()
- new_opts.append('vhostfd=' + str(new_fd))
- fd_list.append(new_fd)
-
- #concatenate all options
- for opt in new_opts:
- if len(s) > 0:
- s+=','
-
- s+=opt
-
- return s
-
-
-#############################################
-# Main
-#############################################
-def main():
-
- global fd_list
- global vhost_in_use
- new_args = []
- num_cmd_args = len(sys.argv)
- emul_call = ''
- mem_prealloc_set = 0
- mem_path_set = 0
- num = 0;
-
- #parse the parameters
- while (num < num_cmd_args):
- arg = sys.argv[num]
-
- #Check netdev +1 parameter for vhostfd
- if arg == '-netdev':
- num_vhost_devs = len(fd_list)
- new_args.append(arg)
-
- num+=1
- arg = sys.argv[num]
- mod_arg = modify_netdev_arg(arg)
- new_args.append(mod_arg)
-
- #append vhost flags if this is a vhost device
- # and -device is the next arg
- # i.e -device -opt1,-opt2,...,-opt3,%vhost
- if (num_vhost_devs < len(fd_list)):
- num+=1
- arg = sys.argv[num]
- if arg == '-device':
- new_args.append(arg)
- num+=1
- new_arg = sys.argv[num]
- for flag in vhost_flags:
- new_arg = ''.join([new_arg,',',flag])
- new_args.append(new_arg)
- else:
- new_args.append(arg)
- elif arg == '-mem-prealloc':
- mem_prealloc_set = 1
- new_args.append(arg)
- elif arg == '-mem-path':
- mem_path_set = 1
- new_args.append(arg)
-
- else:
- new_args.append(arg)
-
- num+=1
-
- #Set Qemu binary location
- emul_call+=emul_path
- emul_call+=" "
-
- #Add prealloc mem options if using vhost and not already added
- if ((len(fd_list) > 0) and (mem_prealloc_set == 0)):
- emul_call += "-mem-prealloc "
-
- #Add mempath mem options if using vhost and not already added
- if ((len(fd_list) > 0) and (mem_path_set == 0)):
- #Detect and add hugetlbfs mount point
- mp = find_huge_mount()
- mp = "".join(["-mem-path ", mp])
- emul_call += mp
- emul_call += " "
-
-
- #add user options
- for opt in emul_opts_user:
- emul_call += opt
- emul_call += " "
-
- #Add add user vhost only options
- if len(fd_list) > 0:
- for opt in emul_opts_user_vhost:
- emul_call += opt
- emul_call += " "
-
- #Add updated libvirt options
- iter_args = iter(new_args)
- #skip 1st arg i.e. call to this script
- next(iter_args)
- for arg in iter_args:
- emul_call+=str(arg)
- emul_call+= " "
-
- #Call QEMU
- subprocess.call(emul_call, shell=True)
-
-
- #Close usvhost files
- for fd in fd_list:
- os.close(fd)
-
-
-if __name__ == "__main__":
- main()
-
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 00b1328..7a05dab 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -34,11 +34,6 @@
#ifndef _VIRTIO_NET_H_
#define _VIRTIO_NET_H_
-/**
- * @file
- * Interface to vhost net
- */
-
#include <stdint.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_net.h>
@@ -48,66 +43,38 @@
#include <rte_mempool.h>
#include <rte_mbuf.h>
-/* Used to indicate that the device is running on a data core */
-#define VIRTIO_DEV_RUNNING 1
-
-/* Backend value set by guest. */
-#define VIRTIO_DEV_STOPPED -1
-
+#define VIRTIO_DEV_RUNNING 1 /**< Used to indicate that the device is running on a data core. */
+#define VIRTIO_DEV_STOPPED -1 /**< Backend value set by guest. */
/* Enum for virtqueue management. */
enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
-#define BUF_VECTOR_MAX 256
-
-/**
- * Structure contains buffer address, length and descriptor index
- * from vring to do scatter RX.
- */
-struct buf_vector {
- uint64_t buf_addr;
- uint32_t buf_len;
- uint32_t desc_idx;
-};
-
/**
* Structure contains variables relevant to RX/TX virtqueues.
*/
struct vhost_virtqueue {
- struct vring_desc *desc; /**< Virtqueue descriptor ring. */
- struct vring_avail *avail; /**< Virtqueue available ring. */
- struct vring_used *used; /**< Virtqueue used ring. */
- uint32_t size; /**< Size of descriptor ring. */
- uint32_t backend; /**< Backend value to determine if device should started/stopped. */
- uint16_t vhost_hlen; /**< Vhost header length (varies depending on RX merge buffers. */
- volatile uint16_t last_used_idx; /**< Last index used on the available ring */
- volatile uint16_t last_used_idx_res; /**< Used for multiple devices reserving buffers. */
- eventfd_t callfd; /**< Currently unused as polling mode is enabled. */
- eventfd_t kickfd; /**< Used to notify the guest (trigger interrupt). */
- struct buf_vector buf_vec[BUF_VECTOR_MAX]; /**< for scatter RX. */
-} __rte_cache_aligned;
-
-/**
- * Device structure contains all configuration information relating to the device.
- */
-struct virtio_net {
- struct vhost_virtqueue *virtqueue[VIRTIO_QNUM]; /**< Contains all virtqueue information. */
- struct virtio_memory *mem; /**< QEMU memory and memory region information. */
- uint64_t features; /**< Negotiated feature set. */
- uint64_t device_fh; /**< device identifier. */
- uint32_t flags; /**< Device flags. Only used to check if device is running on data core. */
- void *priv; /**< private context */
+ struct vring_desc *desc; /**< descriptor ring. */
+ struct vring_avail *avail; /**< available ring. */
+ struct vring_used *used; /**< used ring. */
+ uint32_t size; /**< Size of descriptor ring. */
+ uint32_t backend; /**< Backend value to determine if device should be started/stopped. */
+ uint16_t vhost_hlen; /**< Vhost header length (varies depending on RX merge buffers. */
+ volatile uint16_t last_used_idx; /**< Last index used on the available ring. */
+ volatile uint16_t last_used_idx_res; /**< Used for multiple devices reserving buffers. */
+ eventfd_t callfd; /**< Currently unused as polling mode is enabled. */
+ eventfd_t kickfd; /**< Used to notify the guest (trigger interrupt). */
} __rte_cache_aligned;
/**
- * Information relating to memory regions including offsets to addresses in QEMUs memory file.
+ * Information relating to memory regions including offsets to
+ * addresses in QEMUs memory file.
*/
struct virtio_memory_regions {
- uint64_t guest_phys_address; /**< Base guest physical address of region. */
- uint64_t guest_phys_address_end; /**< End guest physical address of region. */
- uint64_t memory_size; /**< Size of region. */
- uint64_t userspace_address; /**< Base userspace address of region. */
- uint64_t address_offset; /**< Offset of region for address translation. */
+ uint64_t guest_phys_address; /**< Base guest physical address of region. */
+ uint64_t guest_phys_address_end; /**< End guest physical address of region. */
+ uint64_t memory_size; /**< Size of region. */
+ uint64_t userspace_address; /**< Base userspace address of region. */
+ uint64_t address_offset; /**< Offset of region for address translation. */
};
@@ -115,21 +82,34 @@ struct virtio_memory_regions {
* Memory structure includes region and mapping information.
*/
struct virtio_memory {
- uint64_t base_address; /**< Base QEMU userspace address of the memory file. */
- uint64_t mapped_address; /**< Mapped address of memory file base in our applications memory space. */
- uint64_t mapped_size; /**< Total size of memory file. */
- uint32_t nregions; /**< Number of memory regions. */
+ uint64_t base_address; /**< Base QEMU userspace address of the memory file. */
+ uint64_t mapped_address; /**< Mapped address of memory file base in our applications memory space. */
+ uint64_t mapped_size; /**< Total size of memory file. */
+ uint32_t nregions; /**< Number of memory regions. */
struct virtio_memory_regions regions[0]; /**< Memory region information. */
};
/**
+ * Device structure contains all configuration information relating to the device.
+ */
+struct virtio_net {
+ struct vhost_virtqueue *virtqueue[VIRTIO_QNUM]; /**< Contains all virtqueue information. */
+ struct virtio_memory *mem; /**< QEMU memory and memory region information. */
+ uint64_t features; /**< Negotiated feature set. */
+ uint64_t device_fh; /**< Device identifier. */
+ uint32_t flags; /**< Device flags. Only used to check if device is running on data core. */
+ void *priv;
+} __rte_cache_aligned;
+
+/**
* Device operations to add/remove device.
*/
struct virtio_net_device_ops {
- int (*new_device)(struct virtio_net *); /**< Add device. */
- void (*destroy_device)(volatile struct virtio_net *); /**< Remove device. */
+ int (*new_device)(struct virtio_net *); /**< Add device. */
+ void (*destroy_device)(struct virtio_net *); /**< Remove device. */
};
+
static inline uint16_t __attribute__((always_inline))
rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
{
@@ -179,7 +159,7 @@ int rte_vhost_driver_register(const char *dev_name);
/* Register callbacks. */
int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
-/* Start vhost driver session blocking loop. */
+
int rte_vhost_driver_session_start(void);
/**
@@ -192,8 +172,8 @@ int rte_vhost_driver_session_start(void);
* @return
* num of packets enqueued
*/
-uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
- struct rte_mbuf **pkts, uint16_t count);
+uint32_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint32_t count);
/**
* This function gets guest buffers from the virtio device TX virtqueue,
@@ -206,7 +186,7 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
* @return
* num of packets dequeued
*/
-uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
- struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
+uint32_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
+ struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count);
#endif /* _VIRTIO_NET_H_ */
diff --git a/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
new file mode 100644
index 0000000..4671643
--- /dev/null
+++ b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
@@ -0,0 +1,436 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <fuse/cuse_lowlevel.h>
+#include <linux/limits.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_virtio_net.h>
+
+#include "virtio-net-cdev.h"
+#include "vhost-net.h"
+#include "eventfd_link/eventfd_link.h"
+
+#define FUSE_OPT_DUMMY "\0\0"
+#define FUSE_OPT_FORE "-f\0\0"
+#define FUSE_OPT_NOMULTI "-s\0\0"
+
+static const uint32_t default_major = 231;
+static const uint32_t default_minor = 1;
+static const char cuse_device_name[] = "/dev/cuse";
+static const char default_cdev[] = "vhost-net";
+static const char eventfd_cdev[] = "/dev/eventfd-link";
+
+static struct fuse_session *session;
+const struct vhost_net_device_ops const *ops;
+
+/*
+ * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
+ * when the device is added to the device linked list.
+ */
+static struct vhost_device_ctx
+fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
+{
+ struct vhost_device_ctx ctx;
+ struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
+
+ ctx.pid = req_ctx->pid;
+ ctx.fh = fi->fh;
+
+ return ctx;
+}
+
+/*
+ * When the device is created in QEMU it gets initialised here and
+ * added to the device linked list.
+ */
+static void
+vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
+{
+ struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+ int err = 0;
+
+ err = ops->new_device(ctx);
+ if (err == -1) {
+ fuse_reply_err(req, EPERM);
+ return;
+ }
+
+ fi->fh = err;
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "(%"PRIu64") Device configuration started\n", fi->fh);
+ fuse_reply_open(req, fi);
+}
+
+/*
+ * When QEMU is shutdown or killed the device gets released.
+ */
+static void
+vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
+{
+ int err = 0;
+ struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+
+ ops->destroy_device(ctx);
+ RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
+ fuse_reply_err(req, err);
+}
+
+/*
+ * Boilerplate code for CUSE IOCTL
+ * Implicit arguments: ctx, req, result.
+ */
+#define VHOST_IOCTL(func) do { \
+ result = (func)(ctx); \
+ fuse_reply_ioctl(req, result, NULL, 0); \
+} while (0)
+
+/*
+ * Boilerplate IOCTL RETRY
+ * Implicit arguments: req.
+ */
+#define VHOST_IOCTL_RETRY(size_r, size_w) do { \
+ struct iovec iov_r = { arg, (size_r) }; \
+ struct iovec iov_w = { arg, (size_w) }; \
+ fuse_reply_ioctl_retry(req, &iov_r, \
+ (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Read IOCTL
+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ */
+#define VHOST_IOCTL_R(type, var, func) do { \
+ if (!in_bufsz) { \
+ VHOST_IOCTL_RETRY(sizeof(type), 0);\
+ } else { \
+ (var) = *(const type*)in_buf; \
+ result = func(ctx, &(var)); \
+ fuse_reply_ioctl(req, result, NULL, 0);\
+ } \
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Write IOCTL
+ * Implicit arguments: ctx, req, result, out_bufsz.
+ */
+#define VHOST_IOCTL_W(type, var, func) do { \
+ if (!out_bufsz) { \
+ VHOST_IOCTL_RETRY(0, sizeof(type));\
+ } else { \
+ result = (func)(ctx, &(var));\
+ fuse_reply_ioctl(req, result, &(var), sizeof(type));\
+ } \
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Read/Write IOCTL
+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ */
+#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \
+ if (!in_bufsz) { \
+ VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
+ } else { \
+ (var1) = *(const type1*) (in_buf); \
+ result = (func)(ctx, (var1), &(var2)); \
+ fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
+ } \
+} while (0)
+
+/*
+ * This function uses the eventfd_link kernel module to copy an eventfd file
+ * descriptor provided by QEMU in to our process space.
+ */
+static int
+eventfd_copy(int target_fd, int target_pid)
+{
+ int eventfd_link, ret;
+ struct eventfd_copy eventfd_copy;
+ int fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+
+ if (fd == -1)
+ return -1;
+
+ /* Open the character device to the kernel module. */
+ /* TODO: check this earlier rather than fail until VM boots! */
+ eventfd_link = open(eventfd_cdev, O_RDWR);
+ if (eventfd_link < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "eventfd_link module is not loaded\n");
+ return -1;
+ }
+
+ eventfd_copy.source_fd = fd;
+ eventfd_copy.target_fd = target_fd;
+ eventfd_copy.target_pid = target_pid;
+ /* Call the IOCTL to copy the eventfd. */
+ ret = ioctl(eventfd_link, EVENTFD_COPY, &eventfd_copy);
+ close(eventfd_link);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "EVENTFD_COPY ioctl failed\n");
+ return -1;
+ }
+
+ return fd;
+}
+
+/*
+ * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on
+ * the type of IOCTL a buffer is requested to read or to write. This
+ * request is handled by FUSE and the buffer is then given to CUSE.
+ */
+static void
+vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
+ struct fuse_file_info *fi, __rte_unused unsigned flags,
+ const void *in_buf, size_t in_bufsz, size_t out_bufsz)
+{
+ struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+ struct vhost_vring_file file;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ uint64_t features;
+ uint32_t index;
+ int result = 0;
+
+ switch (cmd) {
+ case VHOST_NET_SET_BACKEND:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
+ VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
+ break;
+
+ case VHOST_GET_FEATURES:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
+ VHOST_IOCTL_W(uint64_t, features, ops->get_features);
+ break;
+
+ case VHOST_SET_FEATURES:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
+ VHOST_IOCTL_R(uint64_t, features, ops->set_features);
+ break;
+
+ case VHOST_RESET_OWNER:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
+ VHOST_IOCTL(ops->reset_owner);
+ break;
+
+ case VHOST_SET_OWNER:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
+ VHOST_IOCTL(ops->set_owner);
+ break;
+
+ case VHOST_SET_MEM_TABLE:
+ /*TODO fix race condition.*/
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
+ static struct vhost_memory mem_temp;
+ switch (in_bufsz) {
+ case 0:
+ VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
+ break;
+
+ case sizeof(struct vhost_memory):
+ mem_temp = *(const struct vhost_memory *) in_buf;
+
+ if (mem_temp.nregions > 0) {
+ VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
+ (sizeof(struct vhost_memory_region) *
+ mem_temp.nregions), 0);
+ } else {
+ result = -1;
+ fuse_reply_ioctl(req, result, NULL, 0);
+ }
+ break;
+
+ default:
+ result = cuse_set_mem_table(ctx, in_buf,
+ mem_temp.nregions);
+ if (result)
+ fuse_reply_err(req, EINVAL);
+ else
+ fuse_reply_ioctl(req, result, NULL, 0);
+ }
+ break;
+
+ case VHOST_SET_VRING_NUM:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
+ VHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_num);
+ break;
+
+ case VHOST_SET_VRING_BASE:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
+ VHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_base);
+ break;
+
+ case VHOST_GET_VRING_BASE:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
+ VHOST_IOCTL_RW(uint32_t, index,
+ struct vhost_vring_state, state, ops->get_vring_base);
+ break;
+
+ case VHOST_SET_VRING_ADDR:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
+ VHOST_IOCTL_R(struct vhost_vring_addr, addr, ops->set_vring_addr);
+ break;
+
+ case VHOST_SET_VRING_KICK:
+ case VHOST_SET_VRING_CALL:
+ if (!in_buf) {
+ VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
+ } else {
+ int fd;
+ file = *(const struct vhost_vring_file *)in_buf;
+ LOG_DEBUG(VHOST_CONFIG,
+ "kick/call idx:%d fd:%d\n", file.index, file.fd);
+ if ((fd = eventfd_copy(file.fd, ctx.pid)) < 0){
+ fuse_reply_ioctl(req, -1, NULL, 0);
+ }
+ file.fd = fd;
+ if (cmd == VHOST_SET_VRING_KICK) {
+ VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_call);
+ }
+ else {
+ VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_kick);
+ }
+ }
+ break;
+
+ default:
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
+ result = -1;
+ fuse_reply_ioctl(req, result, NULL, 0);
+ }
+
+ if (result < 0)
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
+ else
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
+}
+
+/*
+ * Structure handling open, release and ioctl function pointers is populated.
+ */
+static const struct cuse_lowlevel_ops vhost_net_ops = {
+ .open = vhost_net_open,
+ .release = vhost_net_release,
+ .ioctl = vhost_net_ioctl,
+};
+
+/*
+ * cuse_info is populated and used to register the cuse device.
+ * vhost_net_device_ops are also passed when the device is registered in app.
+ */
+int
+rte_vhost_driver_register(const char *dev_name)
+{
+ struct cuse_info cuse_info;
+ char device_name[PATH_MAX] = "";
+ char char_device_name[PATH_MAX] = "";
+ const char *device_argv[] = { device_name };
+
+ char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
+ char fuse_opt_fore[] = FUSE_OPT_FORE;
+ char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
+ char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
+
+ if (access(cuse_device_name, R_OK | W_OK) < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "char device %s can't be accessed, maybe not exist\n",
+ cuse_device_name);
+ return -1;
+ }
+
+ /*
+ * The device name is created. This is passed to QEMU so that it can
+ * register the device with our application.
+ */
+ snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
+ snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
+
+ /* Check if device already exists. */
+ if (access(char_device_name, F_OK) != -1) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "char device %s already exists\n", char_device_name);
+ return -1;
+ }
+
+ memset(&cuse_info, 0, sizeof(cuse_info));
+ cuse_info.dev_major = default_major;
+ cuse_info.dev_minor = default_minor;
+ cuse_info.dev_info_argc = 1;
+ cuse_info.dev_info_argv = device_argv;
+ cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
+
+ ops = get_virtio_net_callbacks();
+
+ session = cuse_lowlevel_setup(3, fuse_argv,
+ &cuse_info, &vhost_net_ops, 0, NULL);
+ if (session == NULL)
+ return -1;
+
+ return 0;
+}
+
+/**
+ * The CUSE session is launched allowing the application to receive open,
+ * release and ioctl calls.
+ */
+int
+rte_vhost_driver_session_start(void)
+{
+ fuse_session_loop(session);
+
+ return 0;
+}
diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
new file mode 100644
index 0000000..5c16aa5
--- /dev/null
+++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
@@ -0,0 +1,314 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <dirent.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <fuse/cuse_lowlevel.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <rte_log.h>
+
+#include "vhost-net.h"
+#include "virtio-net-cdev.h"
+
+extern struct vhost_net_device_ops const *ops;
+
+/* Line size for reading maps file. */
+static const uint32_t BUFSIZE = PATH_MAX;
+
+/* Size of prot char array in procmap. */
+#define PROT_SZ 5
+
+/* Number of elements in procmap struct. */
+#define PROCMAP_SZ 8
+
+/* Structure containing information gathered from maps file. */
+struct procmap {
+ uint64_t va_start; /* Start virtual address in file. */
+ uint64_t len; /* Size of file. */
+ uint64_t pgoff; /* Not used. */
+ uint32_t maj; /* Not used. */
+ uint32_t min; /* Not used. */
+ uint32_t ino; /* Not used. */
+ char prot[PROT_SZ]; /* Not used. */
+ char fname[PATH_MAX]; /* File name. */
+};
+
+/*
+ * Locate the file containing QEMU's memory space and
+ * map it to our address space.
+ */
+static int
+host_memory_map(pid_t pid, uint64_t addr,
+ uint64_t *mapped_address, uint64_t *mapped_size)
+{
+ struct dirent *dptr = NULL;
+ struct procmap procmap;
+ DIR *dp = NULL;
+ int fd;
+ int i;
+ char memfile[PATH_MAX];
+ char mapfile[PATH_MAX];
+ char procdir[PATH_MAX];
+ char resolved_path[PATH_MAX];
+ FILE *fmap;
+ void *map;
+ uint8_t found = 0;
+ char line[BUFSIZE];
+ char dlm[] = "- : ";
+ char *str, *sp, *in[PROCMAP_SZ];
+ char *end = NULL;
+
+ /* Path where mem files are located. */
+ snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
+ /* Maps file used to locate mem file. */
+ snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
+
+ fmap = fopen(mapfile, "r");
+ if (fmap == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to open maps file for pid %d\n", pid);
+ return -1;
+ }
+
+ /* Read through maps file until we find out base_address. */
+ while (fgets(line, BUFSIZE, fmap) != 0) {
+ str = line;
+ errno = 0;
+ /* Split line in to fields. */
+ for (i = 0; i < PROCMAP_SZ; i++) {
+ in[i] = strtok_r(str, &dlm[i], &sp);
+ if ((in[i] == NULL) || (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+ str = NULL;
+ }
+
+ /* Convert/Copy each field as needed. */
+ procmap.va_start = strtoull(in[0], &end, 16);
+ if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
+ (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+
+ procmap.len = strtoull(in[1], &end, 16);
+ if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
+ (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+
+ procmap.pgoff = strtoull(in[3], &end, 16);
+ if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
+ (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+
+ procmap.maj = strtoul(in[4], &end, 16);
+ if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
+ (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+
+ procmap.min = strtoul(in[5], &end, 16);
+ if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
+ (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+
+ procmap.ino = strtoul(in[6], &end, 16);
+ if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
+ (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+
+ memcpy(&procmap.prot, in[2], PROT_SZ);
+ memcpy(&procmap.fname, in[7], PATH_MAX);
+
+ if (procmap.va_start == addr) {
+ procmap.len = procmap.len - procmap.va_start;
+ found = 1;
+ break;
+ }
+ }
+ fclose(fmap);
+
+ if (!found) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to find memory file in pid %d maps file\n", pid);
+ return -1;
+ }
+
+ /* Find the guest memory file among the process fds. */
+ dp = opendir(procdir);
+ if (dp == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Cannot open pid %d process directory\n",
+ pid);
+ return -1;
+
+ }
+
+ found = 0;
+
+ /* Read the fd directory contents. */
+ while (NULL != (dptr = readdir(dp))) {
+ snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
+ pid, dptr->d_name);
+ realpath(memfile, resolved_path);
+ if (resolved_path == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to resolve fd directory\n");
+ closedir(dp);
+ return -1;
+ }
+ if (strncmp(resolved_path, procmap.fname,
+ strnlen(procmap.fname, PATH_MAX)) == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ closedir(dp);
+
+ if (found == 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to find memory file for pid %d\n",
+ pid);
+ return -1;
+ }
+ /* Open the shared memory file and map the memory into this process. */
+ fd = open(memfile, O_RDWR);
+
+ if (fd == -1) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to open %s for pid %d\n",
+ memfile, pid);
+ return -1;
+ }
+
+ map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE ,
+ MAP_POPULATE|MAP_SHARED, fd, 0);
+ close(fd);
+
+ if (map == MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Error mapping the file %s for pid %d\n",
+ memfile, pid);
+ return -1;
+ }
+
+ /* Store the memory address and size in the device data structure */
+ *mapped_address = (uint64_t)(uintptr_t)map;
+ *mapped_size = procmap.len;
+
+ LOG_DEBUG(VHOST_CONFIG,
+ "Mem File: %s->%s - Size: %llu - VA: %p\n",
+ memfile, resolved_path,
+ (unsigned long long)mapped_size, map);
+
+ return 0;
+}
+
+int
+cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,
+ uint32_t nregions)
+{
+ uint64_t size = offsetof(struct vhost_memory, regions);
+ uint32_t idx;
+ struct virtio_memory_regions regions[8]; /* VHOST_MAX_MEMORY_REGIONS */
+ struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
+ ((uint64_t)(uintptr_t)mem_regions_addr + size);
+ uint64_t base_address = 0, mapped_address, mapped_size;
+
+ for (idx = 0; idx < nregions; idx++) {
+ regions[idx].guest_phys_address =
+ mem_regions[idx].guest_phys_addr;
+ regions[idx].guest_phys_address_end =
+ regions[idx].guest_phys_address +
+ mem_regions[idx].memory_size;
+ regions[idx].memory_size =
+ mem_regions[idx].memory_size;
+ regions[idx].userspace_address =
+ mem_regions[idx].userspace_addr;
+
+ LOG_DEBUG(VHOST_CONFIG, "REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
+ idx,
+ (void *)(uintptr_t)regions[idx].guest_phys_address,
+ (void *)(uintptr_t)regions[idx].userspace_address,
+ regions[idx].memory_size);
+
+ /*set the base address mapping*/
+ if (regions[idx].guest_phys_address == 0x0) {
+ base_address =
+ regions[idx].userspace_address;
+ /* Map VM memory file */
+ if (host_memory_map(ctx.pid, base_address,
+ &mapped_address, &mapped_size) != 0) {
+ return -1;
+ }
+ }
+ }
+
+ /* Check that we have a valid base address. */
+ if (base_address == 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to find base address of qemu memory file.\n");
+ return -1;
+ }
+
+ for (idx = 0; idx < nregions; idx++) {
+ regions[idx].address_offset =
+ mapped_address - base_address +
+ regions[idx].userspace_address -
+ regions[idx].guest_phys_address;
+ }
+
+ ops->set_mem_table(ctx, ®ions[0], nregions);
+ return 0;
+}
diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
new file mode 100644
index 0000000..6f98ce8
--- /dev/null
+++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
@@ -0,0 +1,43 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _VIRTIO_NET_CDEV_H
+#define _VIRTIO_NET_CDEV_H
+#include <stdint.h>
+
+#include "vhost-net.h"
+
+int
+cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,
+ uint32_t nregions);
+
+#endif
diff --git a/lib/librte_vhost/vhost-net-cdev.c b/lib/librte_vhost/vhost-net-cdev.c
deleted file mode 100644
index 57c76cb..0000000
--- a/lib/librte_vhost/vhost-net-cdev.c
+++ /dev/null
@@ -1,389 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <errno.h>
-#include <fuse/cuse_lowlevel.h>
-#include <linux/limits.h>
-#include <linux/vhost.h>
-#include <stdint.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <rte_ethdev.h>
-#include <rte_log.h>
-#include <rte_string_fns.h>
-#include <rte_virtio_net.h>
-
-#include "vhost-net-cdev.h"
-
-#define FUSE_OPT_DUMMY "\0\0"
-#define FUSE_OPT_FORE "-f\0\0"
-#define FUSE_OPT_NOMULTI "-s\0\0"
-
-static const uint32_t default_major = 231;
-static const uint32_t default_minor = 1;
-static const char cuse_device_name[] = "/dev/cuse";
-static const char default_cdev[] = "vhost-net";
-
-static struct fuse_session *session;
-static struct vhost_net_device_ops const *ops;
-
-/*
- * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
- * when the device is added to the device linked list.
- */
-static struct vhost_device_ctx
-fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
-{
- struct vhost_device_ctx ctx;
- struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
-
- ctx.pid = req_ctx->pid;
- ctx.fh = fi->fh;
-
- return ctx;
-}
-
-/*
- * When the device is created in QEMU it gets initialised here and
- * added to the device linked list.
- */
-static void
-vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
-{
- struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
- int err = 0;
-
- err = ops->new_device(ctx);
- if (err == -1) {
- fuse_reply_err(req, EPERM);
- return;
- }
-
- fi->fh = err;
-
- RTE_LOG(INFO, VHOST_CONFIG,
- "(%"PRIu64") Device configuration started\n", fi->fh);
- fuse_reply_open(req, fi);
-}
-
-/*
- * When QEMU is shutdown or killed the device gets released.
- */
-static void
-vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
-{
- int err = 0;
- struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
-
- ops->destroy_device(ctx);
- RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
- fuse_reply_err(req, err);
-}
-
-/*
- * Boilerplate code for CUSE IOCTL
- * Implicit arguments: ctx, req, result.
- */
-#define VHOST_IOCTL(func) do { \
- result = (func)(ctx); \
- fuse_reply_ioctl(req, result, NULL, 0); \
-} while (0)
-
-/*
- * Boilerplate IOCTL RETRY
- * Implicit arguments: req.
- */
-#define VHOST_IOCTL_RETRY(size_r, size_w) do { \
- struct iovec iov_r = { arg, (size_r) }; \
- struct iovec iov_w = { arg, (size_w) }; \
- fuse_reply_ioctl_retry(req, &iov_r, \
- (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
-} while (0)
-
-/*
- * Boilerplate code for CUSE Read IOCTL
- * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
- */
-#define VHOST_IOCTL_R(type, var, func) do { \
- if (!in_bufsz) { \
- VHOST_IOCTL_RETRY(sizeof(type), 0);\
- } else { \
- (var) = *(const type*)in_buf; \
- result = func(ctx, &(var)); \
- fuse_reply_ioctl(req, result, NULL, 0);\
- } \
-} while (0)
-
-/*
- * Boilerplate code for CUSE Write IOCTL
- * Implicit arguments: ctx, req, result, out_bufsz.
- */
-#define VHOST_IOCTL_W(type, var, func) do { \
- if (!out_bufsz) { \
- VHOST_IOCTL_RETRY(0, sizeof(type));\
- } else { \
- result = (func)(ctx, &(var));\
- fuse_reply_ioctl(req, result, &(var), sizeof(type));\
- } \
-} while (0)
-
-/*
- * Boilerplate code for CUSE Read/Write IOCTL
- * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
- */
-#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \
- if (!in_bufsz) { \
- VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
- } else { \
- (var1) = *(const type1*) (in_buf); \
- result = (func)(ctx, (var1), &(var2)); \
- fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
- } \
-} while (0)
-
-/*
- * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
- * of IOCTL a buffer is requested to read or to write. This request is handled
- * by FUSE and the buffer is then given to CUSE.
- */
-static void
-vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
- struct fuse_file_info *fi, __rte_unused unsigned flags,
- const void *in_buf, size_t in_bufsz, size_t out_bufsz)
-{
- struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
- struct vhost_vring_file file;
- struct vhost_vring_state state;
- struct vhost_vring_addr addr;
- uint64_t features;
- uint32_t index;
- int result = 0;
-
- switch (cmd) {
- case VHOST_NET_SET_BACKEND:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
- break;
-
- case VHOST_GET_FEATURES:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
- VHOST_IOCTL_W(uint64_t, features, ops->get_features);
- break;
-
- case VHOST_SET_FEATURES:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
- VHOST_IOCTL_R(uint64_t, features, ops->set_features);
- break;
-
- case VHOST_RESET_OWNER:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
- VHOST_IOCTL(ops->reset_owner);
- break;
-
- case VHOST_SET_OWNER:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
- VHOST_IOCTL(ops->set_owner);
- break;
-
- case VHOST_SET_MEM_TABLE:
- /*TODO fix race condition.*/
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
- static struct vhost_memory mem_temp;
-
- switch (in_bufsz) {
- case 0:
- VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
- break;
-
- case sizeof(struct vhost_memory):
- mem_temp = *(const struct vhost_memory *) in_buf;
-
- if (mem_temp.nregions > 0) {
- VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
- (sizeof(struct vhost_memory_region) *
- mem_temp.nregions), 0);
- } else {
- result = -1;
- fuse_reply_ioctl(req, result, NULL, 0);
- }
- break;
-
- default:
- result = ops->set_mem_table(ctx,
- in_buf, mem_temp.nregions);
- if (result)
- fuse_reply_err(req, EINVAL);
- else
- fuse_reply_ioctl(req, result, NULL, 0);
- }
- break;
-
- case VHOST_SET_VRING_NUM:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_state, state,
- ops->set_vring_num);
- break;
-
- case VHOST_SET_VRING_BASE:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_state, state,
- ops->set_vring_base);
- break;
-
- case VHOST_GET_VRING_BASE:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
- VHOST_IOCTL_RW(uint32_t, index,
- struct vhost_vring_state, state, ops->get_vring_base);
- break;
-
- case VHOST_SET_VRING_ADDR:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_addr, addr,
- ops->set_vring_addr);
- break;
-
- case VHOST_SET_VRING_KICK:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_file, file,
- ops->set_vring_kick);
- break;
-
- case VHOST_SET_VRING_CALL:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_file, file,
- ops->set_vring_call);
- break;
-
- default:
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
- result = -1;
- fuse_reply_ioctl(req, result, NULL, 0);
- }
-
- if (result < 0)
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
- else
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
-}
-
-/*
- * Structure handling open, release and ioctl function pointers is populated.
- */
-static const struct cuse_lowlevel_ops vhost_net_ops = {
- .open = vhost_net_open,
- .release = vhost_net_release,
- .ioctl = vhost_net_ioctl,
-};
-
-/*
- * cuse_info is populated and used to register the cuse device.
- * vhost_net_device_ops are also passed when the device is registered in app.
- */
-int
-rte_vhost_driver_register(const char *dev_name)
-{
- struct cuse_info cuse_info;
- char device_name[PATH_MAX] = "";
- char char_device_name[PATH_MAX] = "";
- const char *device_argv[] = { device_name };
-
- char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
- char fuse_opt_fore[] = FUSE_OPT_FORE;
- char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
- char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
-
- if (access(cuse_device_name, R_OK | W_OK) < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "char device %s can't be accessed, maybe not exist\n",
- cuse_device_name);
- return -1;
- }
-
- /*
- * The device name is created. This is passed to QEMU so that it can
- * register the device with our application.
- */
- snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
- snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
-
- /* Check if device already exists. */
- if (access(char_device_name, F_OK) != -1) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "char device %s already exists\n", char_device_name);
- return -1;
- }
-
- memset(&cuse_info, 0, sizeof(cuse_info));
- cuse_info.dev_major = default_major;
- cuse_info.dev_minor = default_minor;
- cuse_info.dev_info_argc = 1;
- cuse_info.dev_info_argv = device_argv;
- cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
-
- ops = get_virtio_net_callbacks();
-
- session = cuse_lowlevel_setup(3, fuse_argv,
- &cuse_info, &vhost_net_ops, 0, NULL);
- if (session == NULL)
- return -1;
-
- return 0;
-}
-
-/**
- * The CUSE session is launched allowing the application to receive open,
- * release and ioctl calls.
- */
-int
-rte_vhost_driver_session_start(void)
-{
- fuse_session_loop(session);
-
- return 0;
-}
diff --git a/lib/librte_vhost/vhost-net-cdev.h b/lib/librte_vhost/vhost-net-cdev.h
deleted file mode 100644
index 03a5c57..0000000
--- a/lib/librte_vhost/vhost-net-cdev.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VHOST_NET_CDEV_H_
-#define _VHOST_NET_CDEV_H_
-#include <stdint.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <linux/vhost.h>
-
-#include <rte_log.h>
-
-/* Macros for printing using RTE_LOG */
-#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
-#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
-
-#ifdef RTE_LIBRTE_VHOST_DEBUG
-#define VHOST_MAX_PRINT_BUFF 6072
-#define LOG_LEVEL RTE_LOG_DEBUG
-#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
-#define PRINT_PACKET(device, addr, size, header) do { \
- char *pkt_addr = (char *)(addr); \
- unsigned int index; \
- char packet[VHOST_MAX_PRINT_BUFF]; \
- \
- if ((header)) \
- snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \
- else \
- snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \
- for (index = 0; index < (size); index++) { \
- snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
- "%02hhx ", pkt_addr[index]); \
- } \
- snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
- \
- LOG_DEBUG(VHOST_DATA, "%s", packet); \
-} while (0)
-#else
-#define LOG_LEVEL RTE_LOG_INFO
-#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
-#define PRINT_PACKET(device, addr, size, header) do {} while (0)
-#endif
-
-
-/*
- * Structure used to identify device context.
- */
-struct vhost_device_ctx {
- pid_t pid; /* PID of process calling the IOCTL. */
- uint64_t fh; /* Populated with fi->fh to track the device index. */
-};
-
-/*
- * Structure contains function pointers to be defined in virtio-net.c. These
- * functions are called in CUSE context and are used to configure devices.
- */
-struct vhost_net_device_ops {
- int (*new_device)(struct vhost_device_ctx);
- void (*destroy_device)(struct vhost_device_ctx);
-
- int (*get_features)(struct vhost_device_ctx, uint64_t *);
- int (*set_features)(struct vhost_device_ctx, uint64_t *);
-
- int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
-
- int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state *);
- int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr *);
- int (*set_vring_base)(struct vhost_device_ctx, struct vhost_vring_state *);
- int (*get_vring_base)(struct vhost_device_ctx, uint32_t, struct vhost_vring_state *);
-
- int (*set_vring_kick)(struct vhost_device_ctx, struct vhost_vring_file *);
- int (*set_vring_call)(struct vhost_device_ctx, struct vhost_vring_file *);
-
- int (*set_backend)(struct vhost_device_ctx, struct vhost_vring_file *);
-
- int (*set_owner)(struct vhost_device_ctx);
- int (*reset_owner)(struct vhost_device_ctx);
-};
-
-
-struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
-#endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/librte_vhost/vhost-user/fd_man.c b/lib/librte_vhost/vhost-user/fd_man.c
new file mode 100644
index 0000000..c7fd3f2
--- /dev/null
+++ b/lib/librte_vhost/vhost-user/fd_man.c
@@ -0,0 +1,158 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <rte_log.h>
+
+#include "fd_man.h"
+
+/**
+ * Returns the index in the fdset for a fd.
+ * If fd is -1, it means to search for a free entry.
+ * @return
+ * Index for the fd, or -1 if fd isn't in the fdset.
+ */
+static int
+fdset_find_fd(struct fdset *pfdset, int fd)
+{
+ int i;
+
+ for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++);
+
+ return i == pfdset->num ? -1 : i;
+}
+
+static int
+fdset_find_free_slot(struct fdset *pfdset)
+{
+ return fdset_find_fd(pfdset, -1);
+
+}
+
+static void
+fdset_add_fd(struct fdset *pfdset, int idx, int fd, fd_cb rcb,
+ fd_cb wcb, uint64_t dat)
+{
+ struct fdentry *pfdentry = &pfdset->fd[idx];
+
+ pfdentry->fd = fd;
+ pfdentry->rcb = rcb;
+ pfdentry->wcb = wcb;
+ pfdentry->dat = dat;
+}
+
+/**
+ * Fill the read/write fdset with the fds in the fdset.
+ * @return
+ * the maximum fds filled in the read/write fd_set.
+ */
+static int
+fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
+{
+ struct fdentry *pfdentry;
+ int i, maxfds = -1;
+ int num = MAX_FDS;
+
+ for (i = 0; i < num ; i++) {
+ pfdentry = &pfdset->fd[i];
+ if (pfdentry->fd != -1) {
+ int added = 0;
+ if (pfdentry->rcb && rfset) {
+ FD_SET(pfdentry->fd, rfset);
+ added = 1;
+ }
+ if (pfdentry->wcb && wfset) {
+ FD_SET(pfdentry->fd, wfset);
+ added = 1;
+ }
+ if (added)
+ maxfds = pfdentry->fd < maxfds ?
+ maxfds : pfdentry->fd;
+ }
+ }
+ return maxfds;
+}
+
+void
+fdset_init(struct fdset *pfdset)
+{
+ int i;
+
+ for (i = 0; i < MAX_FDS; i++)
+ pfdset->fd[i].fd = -1;
+ pfdset->num = MAX_FDS;
+
+}
+
+/**
+ * Register the fd in the fdset with its read/write handler and context.
+ */
+int
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
+{
+ int i;
+
+ if (fd == -1)
+ return -1;
+
+ /* Find a free slot in the list. */
+ i = fdset_find_free_slot(pfdset);
+ if (i == -1)
+ return -2;
+
+ fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
+
+ return 0;
+}
+
+/**
+ * Unregister the fd from the fdset.
+ */
+void
+fdset_del(struct fdset *pfdset, int fd)
+{
+ int i;
+
+ i = fdset_find_fd(pfdset, fd);
+ if (i != -1) {
+ pfdset->fd[i].fd = -1;
+ }
+}
+
+
+void
+fdset_event_dispatch(struct fdset *pfdset)
+{
+ fd_set rfds,wfds;
+ int i, maxfds;
+ struct fdentry *pfdentry;
+ int num = MAX_FDS;
+
+ if (pfdset == NULL)
+ return;
+ while (1) {
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+ maxfds = fdset_fill(&rfds, &wfds, pfdset);
+ /* fd management runs in one thread */
+ if (maxfds == -1) {
+ return;
+ }
+
+ select(maxfds + 1, &rfds, &wfds, NULL, NULL);
+
+ for (i = 0; i < num; i++) {
+ pfdentry = &pfdset->fd[i];
+ if (FD_ISSET(pfdentry->fd, &rfds))
+ pfdentry->rcb(pfdentry->fd, pfdentry->dat);
+ if (FD_ISSET(pfdentry->fd, &wfds))
+ pfdentry->wcb(pfdentry->fd, pfdentry->dat);
+ }
+
+ }
+}
diff --git a/lib/librte_vhost/vhost-user/fd_man.h b/lib/librte_vhost/vhost-user/fd_man.h
new file mode 100644
index 0000000..57cc81d
--- /dev/null
+++ b/lib/librte_vhost/vhost-user/fd_man.h
@@ -0,0 +1,31 @@
+#ifndef _FD_MAN_H_
+#define _FD_MAN_H_
+#include <stdint.h>
+
+#define MAX_FDS 1024
+
+typedef void (*fd_cb)(int fd, uint64_t dat);
+
+struct fdentry {
+ int fd; /* -1 indicates this entry is empty */
+ fd_cb rcb; /* callback when this fd is readable. */
+ fd_cb wcb; /* callback when this fd is writeable.*/
+ uint64_t dat; /* fd context */
+};
+
+struct fdset {
+ struct fdentry fd[MAX_FDS];
+ int num;
+};
+
+
+void fdset_init(struct fdset *pfdset);
+
+int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb,
+ fd_cb wcb, uint64_t ctx);
+
+void fdset_del(struct fdset *pfdset, int fd);
+
+void fdset_event_dispatch(struct fdset *pfdset);
+
+#endif
diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.c b/lib/librte_vhost/vhost-user/vhost-net-user.c
new file mode 100644
index 0000000..34450f4
--- /dev/null
+++ b/lib/librte_vhost/vhost-user/vhost-net-user.c
@@ -0,0 +1,417 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <errno.h>
+
+#include <rte_log.h>
+#include <rte_virtio_net.h>
+
+#include "fd_man.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+#include "virtio-net-user.h"
+
+static void vserver_new_vq_conn(int fd, uint64_t data);
+static void vserver_message_handler(int fd, uint64_t dat);
+const struct vhost_net_device_ops *ops;
+
+static struct vhost_server *g_vhost_server;
+
+static const char *vhost_message_str[VHOST_USER_MAX] =
+{
+ [VHOST_USER_NONE] = "VHOST_USER_NONE",
+ [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+ [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+ [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+ [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
+ [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+ [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
+ [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
+ [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+ [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+ [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+ [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+ [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+ [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+ [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR"
+};
+
+/**
+ * Create a unix domain socket and bind to path.
+ * @return
+ * socket fd or -1 on failure
+ */
+static int
+uds_socket(const char *path)
+{
+ struct sockaddr_un un;
+ int sockfd;
+ int ret;
+
+ if (path == NULL)
+ return -1;
+
+ sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sockfd < 0)
+ return -1;
+ RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
+
+ memset(&un, 0, sizeof(un));
+ un.sun_family = AF_UNIX;
+ snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
+ ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
+ if (ret == -1)
+ goto err;
+ RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
+
+ ret = listen(sockfd, 1);
+ if (ret == -1)
+ goto err;
+
+ return sockfd;
+
+err:
+ close(sockfd);
+ return -1;
+}
+
+
+/* return bytes# of read */
+static int
+read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+
+ struct iovec iov;
+ struct msghdr msgh = { 0 };
+ size_t fdsize = fd_num * sizeof(int);
+ char control[CMSG_SPACE(fdsize)];
+ struct cmsghdr *cmsg;
+ int ret;
+
+ iov.iov_base = buf;
+ iov.iov_len = buflen;
+
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+
+ ret = recvmsg(sockfd, &msgh, 0);
+ if (ret <= 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
+ return ret;
+ }
+ /* ret == buflen */
+ if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
+ return -1;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+ if ( (cmsg->cmsg_level == SOL_SOCKET) &&
+ (cmsg->cmsg_type == SCM_RIGHTS)) {
+ memcpy(fds, CMSG_DATA(cmsg), fdsize);
+ break;
+ }
+ }
+ return ret;
+}
+
+static int
+read_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+ int ret;
+
+ ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
+ msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+ if (ret <= 0)
+ return ret;
+
+ if (msg->size) {
+ if (msg->size > sizeof(msg->payload)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "%s: invalid size:%d\n", __func__, msg->size);
+ return -1;
+ }
+ ret = read(sockfd, &msg->payload, msg->size);
+ if (ret == 0)
+ return 0;
+ if (ret != (int)msg->size) {
+ printf("read control message failed\n");
+ return -1;
+ }
+ }
+
+ return ret;
+}
+
+static int
+send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+
+ struct iovec iov;
+ struct msghdr msgh = { 0 };
+ size_t fdsize = fd_num * sizeof(int);
+ char control[CMSG_SPACE(fdsize)];
+ struct cmsghdr *cmsg;
+ int ret;
+
+ iov.iov_base = buf;
+ iov.iov_len = buflen;
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+
+ if (fds && fd_num > 0) {
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_len = CMSG_LEN(fdsize);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), fds, fdsize);
+ } else {
+ msgh.msg_control = NULL;
+ msgh.msg_controllen = 0;
+ }
+
+ do {
+ ret = sendmsg(sockfd, &msgh, 0);
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+ int ret;
+
+ msg->flags &= ~VHOST_USER_VERSION_MASK;
+ msg->flags |= VHOST_USER_VERSION;
+ msg->flags |= VHOST_USER_REPLY_MASK;
+
+ ret = send_fd_message(sockfd, (char *)msg,
+ VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+
+ return ret;
+}
+
+/* call back when there is new connection. */
+static void
+vserver_new_vq_conn(int fd, uint64_t dat)
+{
+ struct vhost_server *vserver = (void *)(uintptr_t)dat;
+ int conn_fd;
+ uint32_t fh;
+ struct vhost_device_ctx vdev_ctx = { 0 };
+
+ conn_fd = accept(fd, NULL, NULL);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "%s: new connection is %d\n", __func__, conn_fd);
+ if (conn_fd < 0)
+ return;
+
+ fh = ops->new_device(vdev_ctx);
+ RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
+
+ fdset_add(&vserver->fdset,
+ conn_fd, vserver_message_handler, NULL, fh);
+}
+
+/* callback when there is message on the connfd */
+static void
+vserver_message_handler(int connfd, uint64_t dat)
+{
+ struct vhost_device_ctx ctx;
+ uint32_t fh = (uint32_t)dat;
+ struct VhostUserMsg msg;
+ uint64_t features;
+ int ret;
+
+ ctx.fh = fh;
+ ret = read_vhost_message(connfd, &msg);
+ if (ret < 0) {
+ printf("vhost read message failed\n");
+
+ /*TODO: cleanup */
+ close(connfd);
+ fdset_del(&g_vhost_server->fdset, connfd);
+ ops->destroy_device(ctx);
+
+ return;
+ } else if (ret == 0) {
+ /*TODO: cleanup */
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vhost peer closed\n");
+ close(connfd);
+ fdset_del(&g_vhost_server->fdset, connfd);
+ ops->destroy_device(ctx);
+
+ return;
+ }
+ if (msg.request > VHOST_USER_MAX) {
+ /*TODO: cleanup */
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vhost read incorrect message\n");
+ close(connfd);
+ fdset_del(&g_vhost_server->fdset, connfd);
+
+ return;
+ }
+
+ RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
+ vhost_message_str[msg.request]);
+ switch (msg.request) {
+ case VHOST_USER_GET_FEATURES:
+ ret = ops->get_features(ctx, &features);
+ msg.payload.u64 = ret;
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(connfd, &msg);
+ break;
+ case VHOST_USER_SET_FEATURES:
+ ops->set_features(ctx, &features);
+ break;
+
+ case VHOST_USER_SET_OWNER:
+ ops->set_owner(ctx);
+ break;
+ case VHOST_USER_RESET_OWNER:
+ ops->reset_owner(ctx);
+ break;
+
+ case VHOST_USER_SET_MEM_TABLE:
+ user_set_mem_table(ctx, &msg);
+ break;
+
+ case VHOST_USER_SET_LOG_BASE:
+ case VHOST_USER_SET_LOG_FD:
+ RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+ break;
+
+ case VHOST_USER_SET_VRING_NUM:
+ ops->set_vring_num(ctx, &msg.payload.state);
+ break;
+ case VHOST_USER_SET_VRING_ADDR:
+ ops->set_vring_addr(ctx, &msg.payload.addr);
+ break;
+ case VHOST_USER_SET_VRING_BASE:
+ ops->set_vring_base(ctx, &msg.payload.state);
+ break;
+
+ case VHOST_USER_GET_VRING_BASE:
+ ret = ops->get_vring_base(ctx, msg.payload.state.index,
+ &msg.payload.state);
+ msg.size = sizeof(msg.payload.state);
+ send_vhost_message(connfd, &msg);
+ break;
+
+ case VHOST_USER_SET_VRING_KICK:
+ user_set_vring_kick(ctx, &msg);
+ break;
+ case VHOST_USER_SET_VRING_CALL:
+ user_set_vring_call(ctx, &msg);
+ break;
+
+ case VHOST_USER_SET_VRING_ERR:
+ RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
+ break;
+
+ default:
+ break;
+
+ }
+}
+
+
+/**
+ * Creates and initialise the vhost server.
+ */
+int
+rte_vhost_driver_register(const char *path)
+{
+
+ struct vhost_server *vserver;
+
+ if (g_vhost_server != NULL)
+ return -1;
+
+ vserver = calloc(sizeof(struct vhost_server), 1);
+ /*TODO: all allocation is through DPDK memory allocation */
+ if (vserver == NULL)
+ return -1;
+
+ fdset_init(&vserver->fdset);
+
+ unlink(path);
+
+ vserver->listenfd = uds_socket(path);
+ if (vserver->listenfd < 0) {
+ free(vserver);
+ return -1;
+ }
+ vserver->path = path;
+
+ fdset_add(&vserver->fdset, vserver->listenfd,
+ vserver_new_vq_conn, NULL,
+ (uint64_t)(uintptr_t)vserver);
+
+ ops = get_virtio_net_callbacks();
+
+ g_vhost_server = vserver;
+
+ return 0;
+}
+
+
+int
+rte_vhost_driver_session_start(void)
+{
+ fdset_event_dispatch(&g_vhost_server->fdset);
+ return 0;
+}
+
diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.h b/lib/librte_vhost/vhost-user/vhost-net-user.h
new file mode 100644
index 0000000..c9df9fa
--- /dev/null
+++ b/lib/librte_vhost/vhost-user/vhost-net-user.h
@@ -0,0 +1,74 @@
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "fd_man.h"
+
+struct vhost_server {
+ const char *path; /**< The path the uds is bind to. */
+ int listenfd; /**< The listener sockfd. */
+ struct fdset fdset; /**< The fd list this vhost server manages. */
+};
+
+/*********** FROM hw/virtio/vhost-user.c *************************************/
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+typedef enum VhostUserRequest {
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+ uint64_t guest_phys_addr;
+ uint64_t memory_size;
+ uint64_t userspace_addr;
+ uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+ uint32_t nregions;
+ uint32_t padding;
+ VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+ VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK (0x3)
+#define VHOST_USER_REPLY_MASK (0x1 << 2)
+ uint32_t flags;
+ uint32_t size; /* the following payload size */
+ union {
+#define VHOST_USER_VRING_IDX_MASK (0xff)
+#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
+ uint64_t u64;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ VhostUserMemory memory;
+ } payload;
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute__((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION (0x1)
+
+/*****************************************************************************/
+#endif
diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.c b/lib/librte_vhost/vhost-user/virtio-net-user.c
new file mode 100644
index 0000000..f38e6cc
--- /dev/null
+++ b/lib/librte_vhost/vhost-user/virtio-net-user.c
@@ -0,0 +1,208 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include <rte_log.h>
+
+#include "virtio-net-user.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+
+extern const struct vhost_net_device_ops *ops;
+
+#if 0
+int
+user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+ unsigned int idx;
+ struct VhostUserMemory memory = pmsg->payload.memory;
+ struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
+ uint64_t mapped_address, base_address = 0, mem_size = 0;
+
+ for (idx = 0; idx < memory.nregions; idx++) {
+ if (memory.regions[idx].guest_phys_addr == 0)
+ base_address = memory.regions[idx].userspace_addr;
+ }
+ if (base_address == 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "couldn't find the mem region whose gpa is 0.\n");
+ return -1;
+ }
+
+ for (idx = 0; idx < memory.nregions; idx++) {
+ uint64_t size = memory.regions[idx].userspace_addr -
+ base_address + memory.regions[idx].memory_size;
+ if (mem_size < size)
+ mem_size = size;
+ }
+
+ /*
+ * here we assume qemu will map only one file for memory allocation,
+ * we only use fds[0] with offset 0.
+ */
+ mapped_address = (uint64_t)(uintptr_t)mmap(NULL, mem_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED, pmsg->fds[0], 0);
+
+ if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
+ return -1;
+ }
+
+ for (idx = 0; idx < memory.nregions; idx++) {
+ regions[idx].guest_phys_address =
+ memory.regions[idx].guest_phys_addr;
+ regions[idx].guest_phys_address_end =
+ memory.regions[idx].guest_phys_addr +
+ memory.regions[idx].memory_size;
+ regions[idx].memory_size = memory.regions[idx].memory_size;
+ regions[idx].userspace_address =
+ memory.regions[idx].userspace_addr;
+
+ regions[idx].address_offset = mapped_address - base_address +
+ regions[idx].userspace_address -
+ regions[idx].guest_phys_address;
+ LOG_DEBUG(VHOST_CONFIG,
+ "REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
+ idx,
+ (void *)(uintptr_t)regions[idx].guest_phys_address,
+ (void *)(uintptr_t)regions[idx].userspace_address,
+ regions[idx].memory_size);
+ }
+ ops->set_mem_table(ctx, regions, memory.nregions);
+ return 0;
+}
+
+#else
+
+int
+user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+ unsigned int idx;
+ struct VhostUserMemory memory = pmsg->payload.memory;
+ struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
+ uint64_t mapped_address, base_address = 0;
+
+ for (idx = 0; idx < memory.nregions; idx++) {
+ if (memory.regions[idx].guest_phys_addr == 0)
+ base_address = memory.regions[idx].userspace_addr;
+ }
+ if (base_address == 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "couldn't find the mem region whose gpa is 0.\n");
+ return -1;
+ }
+
+
+ for (idx = 0; idx < memory.nregions; idx++) {
+ regions[idx].guest_phys_address =
+ memory.regions[idx].guest_phys_addr;
+ regions[idx].guest_phys_address_end =
+ memory.regions[idx].guest_phys_addr +
+ memory.regions[idx].memory_size;
+ regions[idx].memory_size = memory.regions[idx].memory_size;
+ regions[idx].userspace_address =
+ memory.regions[idx].userspace_addr;
+/*
+ mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
+ regions[idx].memory_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ pmsg->fds[idx],
+ memory.regions[idx].mmap_offset);
+*/
+
+/* This is ugly */
+ mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
+ regions[idx].memory_size +
+ memory.regions[idx].mmap_offset,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ pmsg->fds[idx],
+ 0);
+ printf("mapped to %p\n", (void *)mapped_address);
+
+ if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
+ return -1;
+ }
+
+// printf("ret=%d\n", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset + 0x3FFFFFFF) & ~0x3FFFFFFF));
+// printf("unaligned ret=%d\n", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset ) ));
+ mapped_address += memory.regions[idx].mmap_offset;
+
+ regions[idx].address_offset = mapped_address -
+ regions[idx].guest_phys_address;
+ LOG_DEBUG(VHOST_CONFIG,
+ "REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
+ idx,
+ (void *)(uintptr_t)regions[idx].guest_phys_address,
+ (void *)(uintptr_t)regions[idx].userspace_address,
+ regions[idx].memory_size);
+ }
+ ops->set_mem_table(ctx, regions, memory.nregions);
+ return 0;
+}
+
+
+
+
+#endif
+
+
+void
+user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+ struct vhost_vring_file file;
+
+ file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ file.fd = pmsg->fds[0];
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring call idx:%d file:%d\n", file.index, file.fd);
+ ops->set_vring_call(ctx, &file);
+}
+
+
+void
+user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+ struct vhost_vring_file file;
+
+ file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ file.fd = pmsg->fds[0];
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring kick idx:%d file:%d\n", file.index, file.fd);
+ ops->set_vring_kick(ctx, &file);
+}
diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.h b/lib/librte_vhost/vhost-user/virtio-net-user.h
new file mode 100644
index 0000000..0969376
--- /dev/null
+++ b/lib/librte_vhost/vhost-user/virtio-net-user.h
@@ -0,0 +1,11 @@
+#ifndef _VIRTIO_NET_USER_H
+#define _VIRTIO_NET_USER_H
+
+#include "vhost-net.h"
+#include "vhost-net-user.h"
+
+int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
+void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
+void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
+
+#endif
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index ccfd82f..8ff0301 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -38,19 +38,14 @@
#include <rte_memcpy.h>
#include <rte_virtio_net.h>
-#include "vhost-net-cdev.h"
+#include "vhost-net.h"
-#define MAX_PKT_BURST 32
+#define VHOST_MAX_PKT_BURST 64
+#define VHOST_MAX_MRG_PKT_BURST 64
-/**
- * This function adds buffers to the virtio devices RX virtqueue. Buffers can
- * be received from the physical port or from another virtio device. A packet
- * count is returned to indicate the number of packets that are succesfully
- * added to the RX queue. This function works when mergeable is disabled.
- */
-static inline uint32_t __attribute__((always_inline))
-virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
- struct rte_mbuf **pkts, uint32_t count)
+
+uint32_t
+rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count)
{
struct vhost_virtqueue *vq;
struct vring_desc *desc;
@@ -59,26 +54,23 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
uint64_t buff_addr = 0;
uint64_t buff_hdr_addr = 0;
- uint32_t head[MAX_PKT_BURST], packet_len = 0;
+ uint32_t head[VHOST_MAX_PKT_BURST], packet_len = 0;
uint32_t head_idx, packet_success = 0;
+ uint32_t mergeable, mrg_count = 0;
uint16_t avail_idx, res_cur_idx;
uint16_t res_base_idx, res_end_idx;
uint16_t free_entries;
uint8_t success = 0;
- LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s()\n", dev->device_fh, __func__);
if (unlikely(queue_id != VIRTIO_RXQ)) {
LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
return 0;
}
vq = dev->virtqueue[VIRTIO_RXQ];
- count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
-
- /*
- * As many data cores may want access to available buffers,
- * they need to be reserved.
- */
+ count = (count > VHOST_MAX_PKT_BURST) ? VHOST_MAX_PKT_BURST : count;
+ /* As many data cores may want access to available buffers, they need to be reserved. */
do {
res_base_idx = vq->last_used_idx_res;
avail_idx = *((volatile uint16_t *)&vq->avail->idx);
@@ -93,21 +85,25 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
res_end_idx = res_base_idx + count;
/* vq->last_used_idx_res is atomically updated. */
- /* TODO: Allow to disable cmpset if no concurrency in application. */
+ /* TODO: Allow to disable cmpset if no concurrency in application */
success = rte_atomic16_cmpset(&vq->last_used_idx_res,
res_base_idx, res_end_idx);
+ /* If there is contention here and failed, try again. */
} while (unlikely(success == 0));
res_cur_idx = res_base_idx;
LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
- dev->device_fh, res_cur_idx, res_end_idx);
+ dev->device_fh,
+ res_cur_idx, res_end_idx);
/* Prefetch available ring to retrieve indexes. */
rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);
+ /* Check if the VIRTIO_NET_F_MRG_RXBUF feature is enabled. */
+ mergeable = dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF);
+
/* Retrieve all of the head indexes first to avoid caching issues. */
for (head_idx = 0; head_idx < count; head_idx++)
- head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) &
- (vq->size - 1)];
+ head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];
/*Prefetch descriptor index. */
rte_prefetch0(&vq->desc[head[packet_success]]);
@@ -123,46 +119,57 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
/* Prefetch buffer address. */
rte_prefetch0((void *)(uintptr_t)buff_addr);
- /* Copy virtio_hdr to packet and increment buffer address */
- buff_hdr_addr = buff_addr;
- packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
-
- /*
- * If the descriptors are chained the header and data are
- * placed in separate buffers.
- */
- if (desc->flags & VRING_DESC_F_NEXT) {
- desc->len = vq->vhost_hlen;
- desc = &vq->desc[desc->next];
- /* Buffer address translation. */
- buff_addr = gpa_to_vva(dev, desc->addr);
- desc->len = rte_pktmbuf_data_len(buff);
+ if (mergeable && (mrg_count != 0)) {
+ desc->len = packet_len = rte_pktmbuf_data_len(buff);
} else {
- buff_addr += vq->vhost_hlen;
- desc->len = packet_len;
+ /* Copy virtio_hdr to packet and increment buffer address */
+ buff_hdr_addr = buff_addr;
+ packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
+
+ /*
+ * If the descriptors are chained the header and data are placed in
+ * separate buffers.
+ */
+ if (desc->flags & VRING_DESC_F_NEXT) {
+ desc->len = vq->vhost_hlen;
+ desc = &vq->desc[desc->next];
+ /* Buffer address translation. */
+ buff_addr = gpa_to_vva(dev, desc->addr);
+ desc->len = rte_pktmbuf_data_len(buff);
+ } else {
+ buff_addr += vq->vhost_hlen;
+ desc->len = packet_len;
+ }
}
+ VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, rte_pktmbuf_data_len(buff), 0);
+
/* Update used ring with desc information */
- vq->used->ring[res_cur_idx & (vq->size - 1)].id =
- head[packet_success];
+ vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];
vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
/* Copy mbuf data to buffer */
- /* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
- rte_memcpy((void *)(uintptr_t)buff_addr,
- rte_pktmbuf_mtod(buff, const void *),
- rte_pktmbuf_data_len(buff));
- PRINT_PACKET(dev, (uintptr_t)buff_addr,
- rte_pktmbuf_data_len(buff), 0);
+ /* TODO fixme for sg mbuf and the case that desc couldn't hold the mbuf data */
+ rte_memcpy((void *)(uintptr_t)buff_addr, (const void *)buff->pkt.data, rte_pktmbuf_data_len(buff));
res_cur_idx++;
packet_success++;
- rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
- (const void *)&virtio_hdr, vq->vhost_hlen);
-
- PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
-
+ /* If mergeable is disabled then a header is required per buffer. */
+ if (!mergeable) {
+ rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);
+ VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
+ } else {
+ mrg_count++;
+ /* Merge buffer can only handle so many buffers at a time. Tell the guest if this limit is reached. */
+ if ((mrg_count == VHOST_MAX_MRG_PKT_BURST) || (res_cur_idx == res_end_idx)) {
+ virtio_hdr.num_buffers = mrg_count;
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n", dev->device_fh, virtio_hdr.num_buffers);
+ rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);
+ VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
+ mrg_count = 0;
+ }
+ }
if (res_cur_idx < res_end_idx) {
/* Prefetch descriptor index. */
rte_prefetch0(&vq->desc[head[packet_success]]);
@@ -184,357 +191,18 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
return count;
}
-static inline uint32_t __attribute__((always_inline))
-copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
- uint16_t res_end_idx, struct rte_mbuf *pkt)
-{
- uint32_t vec_idx = 0;
- uint32_t entry_success = 0;
- struct vhost_virtqueue *vq;
- /* The virtio_hdr is initialised to 0. */
- struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {
- {0, 0, 0, 0, 0, 0}, 0};
- uint16_t cur_idx = res_base_idx;
- uint64_t vb_addr = 0;
- uint64_t vb_hdr_addr = 0;
- uint32_t seg_offset = 0;
- uint32_t vb_offset = 0;
- uint32_t seg_avail;
- uint32_t vb_avail;
- uint32_t cpy_len, entry_len;
-
- if (pkt == NULL)
- return 0;
-
- LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| "
- "End Index %d\n",
- dev->device_fh, cur_idx, res_end_idx);
-
- /*
- * Convert from gpa to vva
- * (guest physical addr -> vhost virtual addr)
- */
- vq = dev->virtqueue[VIRTIO_RXQ];
- vb_addr =
- gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
- vb_hdr_addr = vb_addr;
-
- /* Prefetch buffer address. */
- rte_prefetch0((void *)(uintptr_t)vb_addr);
-
- virtio_hdr.num_buffers = res_end_idx - res_base_idx;
-
- LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
- dev->device_fh, virtio_hdr.num_buffers);
- rte_memcpy((void *)(uintptr_t)vb_hdr_addr,
- (const void *)&virtio_hdr, vq->vhost_hlen);
-
- PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1);
-
- seg_avail = rte_pktmbuf_data_len(pkt);
- vb_offset = vq->vhost_hlen;
- vb_avail =
- vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
-
- entry_len = vq->vhost_hlen;
-
- if (vb_avail == 0) {
- uint32_t desc_idx =
- vq->buf_vec[vec_idx].desc_idx;
- vq->desc[desc_idx].len = vq->vhost_hlen;
-
- if ((vq->desc[desc_idx].flags
- & VRING_DESC_F_NEXT) == 0) {
- /* Update used ring with desc information */
- vq->used->ring[cur_idx & (vq->size - 1)].id
- = vq->buf_vec[vec_idx].desc_idx;
- vq->used->ring[cur_idx & (vq->size - 1)].len
- = entry_len;
-
- entry_len = 0;
- cur_idx++;
- entry_success++;
- }
-
- vec_idx++;
- vb_addr =
- gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
-
- /* Prefetch buffer address. */
- rte_prefetch0((void *)(uintptr_t)vb_addr);
- vb_offset = 0;
- vb_avail = vq->buf_vec[vec_idx].buf_len;
- }
-
- cpy_len = RTE_MIN(vb_avail, seg_avail);
-
- while (cpy_len > 0) {
- /* Copy mbuf data to vring buffer */
- rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset),
- (const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset),
- cpy_len);
-
- PRINT_PACKET(dev,
- (uintptr_t)(vb_addr + vb_offset),
- cpy_len, 0);
-
- seg_offset += cpy_len;
- vb_offset += cpy_len;
- seg_avail -= cpy_len;
- vb_avail -= cpy_len;
- entry_len += cpy_len;
-
- if (seg_avail != 0) {
- /*
- * The virtio buffer in this vring
- * entry reach to its end.
- * But the segment doesn't complete.
- */
- if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &
- VRING_DESC_F_NEXT) == 0) {
- /* Update used ring with desc information */
- vq->used->ring[cur_idx & (vq->size - 1)].id
- = vq->buf_vec[vec_idx].desc_idx;
- vq->used->ring[cur_idx & (vq->size - 1)].len
- = entry_len;
- entry_len = 0;
- cur_idx++;
- entry_success++;
- }
-
- vec_idx++;
- vb_addr = gpa_to_vva(dev,
- vq->buf_vec[vec_idx].buf_addr);
- vb_offset = 0;
- vb_avail = vq->buf_vec[vec_idx].buf_len;
- cpy_len = RTE_MIN(vb_avail, seg_avail);
- } else {
- /*
- * This current segment complete, need continue to
- * check if the whole packet complete or not.
- */
- pkt = pkt->next;
- if (pkt != NULL) {
- /*
- * There are more segments.
- */
- if (vb_avail == 0) {
- /*
- * This current buffer from vring is
- * used up, need fetch next buffer
- * from buf_vec.
- */
- uint32_t desc_idx =
- vq->buf_vec[vec_idx].desc_idx;
- vq->desc[desc_idx].len = vb_offset;
-
- if ((vq->desc[desc_idx].flags &
- VRING_DESC_F_NEXT) == 0) {
- uint16_t wrapped_idx =
- cur_idx & (vq->size - 1);
- /*
- * Update used ring with the
- * descriptor information
- */
- vq->used->ring[wrapped_idx].id
- = desc_idx;
- vq->used->ring[wrapped_idx].len
- = entry_len;
- entry_success++;
- entry_len = 0;
- cur_idx++;
- }
-
- /* Get next buffer from buf_vec. */
- vec_idx++;
- vb_addr = gpa_to_vva(dev,
- vq->buf_vec[vec_idx].buf_addr);
- vb_avail =
- vq->buf_vec[vec_idx].buf_len;
- vb_offset = 0;
- }
-
- seg_offset = 0;
- seg_avail = rte_pktmbuf_data_len(pkt);
- cpy_len = RTE_MIN(vb_avail, seg_avail);
- } else {
- /*
- * This whole packet completes.
- */
- uint32_t desc_idx =
- vq->buf_vec[vec_idx].desc_idx;
- vq->desc[desc_idx].len = vb_offset;
-
- while (vq->desc[desc_idx].flags &
- VRING_DESC_F_NEXT) {
- desc_idx = vq->desc[desc_idx].next;
- vq->desc[desc_idx].len = 0;
- }
-
- /* Update used ring with desc information */
- vq->used->ring[cur_idx & (vq->size - 1)].id
- = vq->buf_vec[vec_idx].desc_idx;
- vq->used->ring[cur_idx & (vq->size - 1)].len
- = entry_len;
- entry_len = 0;
- cur_idx++;
- entry_success++;
- seg_avail = 0;
- cpy_len = RTE_MIN(vb_avail, seg_avail);
- }
- }
- }
-
- return entry_success;
-}
-
-/*
- * This function works for mergeable RX.
- */
-static inline uint32_t __attribute__((always_inline))
-virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
- struct rte_mbuf **pkts, uint32_t count)
+uint32_t
+rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count)
{
- struct vhost_virtqueue *vq;
- uint32_t pkt_idx = 0, entry_success = 0;
- uint16_t avail_idx, res_cur_idx;
- uint16_t res_base_idx, res_end_idx;
- uint8_t success = 0;
-
- LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
- dev->device_fh);
- if (unlikely(queue_id != VIRTIO_RXQ)) {
- LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
- }
-
- vq = dev->virtqueue[VIRTIO_RXQ];
- count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
-
- if (count == 0)
- return 0;
-
- for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
- uint32_t secure_len = 0;
- uint16_t need_cnt;
- uint32_t vec_idx = 0;
- uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
- uint16_t i, id;
-
- do {
- /*
- * As many data cores may want access to available
- * buffers, they need to be reserved.
- */
- res_base_idx = vq->last_used_idx_res;
- res_cur_idx = res_base_idx;
-
- do {
- avail_idx = *((volatile uint16_t *)&vq->avail->idx);
- if (unlikely(res_cur_idx == avail_idx)) {
- LOG_DEBUG(VHOST_DATA,
- "(%"PRIu64") Failed "
- "to get enough desc from "
- "vring\n",
- dev->device_fh);
- return pkt_idx;
- } else {
- uint16_t wrapped_idx =
- (res_cur_idx) & (vq->size - 1);
- uint32_t idx =
- vq->avail->ring[wrapped_idx];
- uint8_t next_desc;
-
- do {
- next_desc = 0;
- secure_len += vq->desc[idx].len;
- if (vq->desc[idx].flags &
- VRING_DESC_F_NEXT) {
- idx = vq->desc[idx].next;
- next_desc = 1;
- }
- } while (next_desc);
-
- res_cur_idx++;
- }
- } while (pkt_len > secure_len);
-
- /* vq->last_used_idx_res is atomically updated. */
- success = rte_atomic16_cmpset(&vq->last_used_idx_res,
- res_base_idx,
- res_cur_idx);
- } while (success == 0);
-
- id = res_base_idx;
- need_cnt = res_cur_idx - res_base_idx;
-
- for (i = 0; i < need_cnt; i++, id++) {
- uint16_t wrapped_idx = id & (vq->size - 1);
- uint32_t idx = vq->avail->ring[wrapped_idx];
- uint8_t next_desc;
- do {
- next_desc = 0;
- vq->buf_vec[vec_idx].buf_addr =
- vq->desc[idx].addr;
- vq->buf_vec[vec_idx].buf_len =
- vq->desc[idx].len;
- vq->buf_vec[vec_idx].desc_idx = idx;
- vec_idx++;
-
- if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
- idx = vq->desc[idx].next;
- next_desc = 1;
- }
- } while (next_desc);
- }
-
- res_end_idx = res_cur_idx;
-
- entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
- res_end_idx, pkts[pkt_idx]);
-
- rte_compiler_barrier();
-
- /*
- * Wait until it's our turn to add our buffer
- * to the used ring.
- */
- while (unlikely(vq->last_used_idx != res_base_idx))
- rte_pause();
-
- *(volatile uint16_t *)&vq->used->idx += entry_success;
- vq->last_used_idx = res_end_idx;
-
- /* Kick the guest if necessary. */
- if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
- eventfd_write((int)vq->kickfd, 1);
- }
-
- return count;
-}
-
-uint16_t
-rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
- struct rte_mbuf **pkts, uint16_t count)
-{
- if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)))
- return virtio_dev_merge_rx(dev, queue_id, pkts, count);
- else
- return virtio_dev_rx(dev, queue_id, pkts, count);
-}
-
-uint16_t
-rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
- struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
-{
- struct rte_mbuf *m, *prev;
+ struct rte_mbuf *mbuf;
struct vhost_virtqueue *vq;
struct vring_desc *desc;
- uint64_t vb_addr = 0;
- uint32_t head[MAX_PKT_BURST];
+ uint64_t buff_addr = 0;
+ uint32_t head[VHOST_MAX_PKT_BURST];
uint32_t used_idx;
uint32_t i;
- uint16_t free_entries, entry_success = 0;
+ uint16_t free_entries, packet_success = 0;
uint16_t avail_idx;
if (unlikely(queue_id != VIRTIO_TXQ)) {
@@ -549,8 +217,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
if (vq->last_used_idx == avail_idx)
return 0;
- LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__,
- dev->device_fh);
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s(%d->%d)\n",
+ dev->device_fh, __func__, vq->last_used_idx, avail_idx);
/* Prefetch available ring to retrieve head indexes. */
rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
@@ -558,173 +226,68 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
/*get the number of free entries in the ring*/
free_entries = (avail_idx - vq->last_used_idx);
- free_entries = RTE_MIN(free_entries, count);
+ if (free_entries > count)
+ free_entries = count;
/* Limit to MAX_PKT_BURST. */
- free_entries = RTE_MIN(free_entries, MAX_PKT_BURST);
+ if (free_entries > VHOST_MAX_PKT_BURST)
+ free_entries = VHOST_MAX_PKT_BURST;
- LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
- dev->device_fh, free_entries);
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries);
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < free_entries; i++)
head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];
/* Prefetch descriptor index. */
- rte_prefetch0(&vq->desc[head[entry_success]]);
+ rte_prefetch0(&vq->desc[head[packet_success]]);
rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
- while (entry_success < free_entries) {
- uint32_t vb_avail, vb_offset;
- uint32_t seg_avail, seg_offset;
- uint32_t cpy_len;
- uint32_t seg_num = 0;
- struct rte_mbuf *cur;
- uint8_t alloc_err = 0;
-
- desc = &vq->desc[head[entry_success]];
+ while (packet_success < free_entries) {
+ desc = &vq->desc[head[packet_success]];
/* Discard first buffer as it is the virtio header */
desc = &vq->desc[desc->next];
/* Buffer address translation. */
- vb_addr = gpa_to_vva(dev, desc->addr);
+ buff_addr = gpa_to_vva(dev, desc->addr);
/* Prefetch buffer address. */
- rte_prefetch0((void *)(uintptr_t)vb_addr);
+ rte_prefetch0((void *)(uintptr_t)buff_addr);
used_idx = vq->last_used_idx & (vq->size - 1);
- if (entry_success < (free_entries - 1)) {
+ if (packet_success < (free_entries - 1)) {
/* Prefetch descriptor index. */
- rte_prefetch0(&vq->desc[head[entry_success+1]]);
+ rte_prefetch0(&vq->desc[head[packet_success+1]]);
rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);
}
/* Update used index buffer information. */
- vq->used->ring[used_idx].id = head[entry_success];
+ vq->used->ring[used_idx].id = head[packet_success];
vq->used->ring[used_idx].len = 0;
- vb_offset = 0;
- vb_avail = desc->len;
- /* Allocate an mbuf and populate the structure. */
- m = rte_pktmbuf_alloc(mbuf_pool);
- if (unlikely(m == NULL)) {
- RTE_LOG(ERR, VHOST_DATA,
- "Failed to allocate memory for mbuf.\n");
- return entry_success;
+ mbuf = rte_pktmbuf_alloc(mbuf_pool);
+ if (unlikely(mbuf == NULL)) {
+ RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n");
+ return packet_success;
}
- seg_offset = 0;
- seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
- cpy_len = RTE_MIN(vb_avail, seg_avail);
-
- PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);
-
- seg_num++;
- cur = m;
- prev = m;
- while (cpy_len != 0) {
- rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset),
- (void *)((uintptr_t)(vb_addr + vb_offset)),
- cpy_len);
-
- seg_offset += cpy_len;
- vb_offset += cpy_len;
- vb_avail -= cpy_len;
- seg_avail -= cpy_len;
-
- if (vb_avail != 0) {
- /*
- * The segment reachs to its end,
- * while the virtio buffer in TX vring has
- * more data to be copied.
- */
- cur->data_len = seg_offset;
- m->pkt_len += seg_offset;
- /* Allocate mbuf and populate the structure. */
- cur = rte_pktmbuf_alloc(mbuf_pool);
- if (unlikely(cur == NULL)) {
- RTE_LOG(ERR, VHOST_DATA, "Failed to "
- "allocate memory for mbuf.\n");
- rte_pktmbuf_free(m);
- alloc_err = 1;
- break;
- }
-
- seg_num++;
- prev->next = cur;
- prev = cur;
- seg_offset = 0;
- seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
- } else {
- if (desc->flags & VRING_DESC_F_NEXT) {
- /*
- * There are more virtio buffers in
- * same vring entry need to be copied.
- */
- if (seg_avail == 0) {
- /*
- * The current segment hasn't
- * room to accomodate more
- * data.
- */
- cur->data_len = seg_offset;
- m->pkt_len += seg_offset;
- /*
- * Allocate an mbuf and
- * populate the structure.
- */
- cur = rte_pktmbuf_alloc(mbuf_pool);
- if (unlikely(cur == NULL)) {
- RTE_LOG(ERR,
- VHOST_DATA,
- "Failed to "
- "allocate memory "
- "for mbuf\n");
- rte_pktmbuf_free(m);
- alloc_err = 1;
- break;
- }
- seg_num++;
- prev->next = cur;
- prev = cur;
- seg_offset = 0;
- seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
- }
-
- desc = &vq->desc[desc->next];
-
- /* Buffer address translation. */
- vb_addr = gpa_to_vva(dev, desc->addr);
- /* Prefetch buffer address. */
- rte_prefetch0((void *)(uintptr_t)vb_addr);
- vb_offset = 0;
- vb_avail = desc->len;
-
- PRINT_PACKET(dev, (uintptr_t)vb_addr,
- desc->len, 0);
- } else {
- /* The whole packet completes. */
- cur->data_len = seg_offset;
- m->pkt_len += seg_offset;
- vb_avail = 0;
- }
- }
+ mbuf->pkt.data_len = desc->len;
+ mbuf->pkt.pkt_len = mbuf->pkt.data_len;
- cpy_len = RTE_MIN(vb_avail, seg_avail);
- }
+ rte_memcpy((void *) mbuf->pkt.data,
+ (const void *) buff_addr, mbuf->pkt.data_len);
- if (unlikely(alloc_err == 1))
- break;
+ pkts[packet_success] = mbuf;
- m->nb_segs = seg_num;
+ VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);
- pkts[entry_success] = m;
vq->last_used_idx++;
- entry_success++;
+ packet_success++;
}
rte_compiler_barrier();
- vq->used->idx += entry_success;
+ vq->used->idx += packet_success;
/* Kick guest if required. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
eventfd_write((int)vq->kickfd, 1);
- return entry_success;
+
+ return packet_success;
}
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 852b6d1..516e743 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -31,17 +31,14 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <dirent.h>
-#include <fuse/cuse_lowlevel.h>
#include <linux/vhost.h>
#include <linux/virtio_net.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
-#include <sys/eventfd.h>
-#include <sys/ioctl.h>
#include <sys/mman.h>
#include <unistd.h>
+#include <assert.h>
#include <rte_ethdev.h>
#include <rte_log.h>
@@ -49,10 +46,8 @@
#include <rte_memory.h>
#include <rte_virtio_net.h>
-#include "vhost-net-cdev.h"
-#include "eventfd_link/eventfd_link.h"
-
-/*
+#include "vhost-net.h"
+/**
* Device linked list structure for configuration.
*/
struct virtio_net_config_ll {
@@ -60,38 +55,15 @@ struct virtio_net_config_ll {
struct virtio_net_config_ll *next; /* Next dev on linked list.*/
};
-const char eventfd_cdev[] = "/dev/eventfd-link";
-
-/* device ops to add/remove device to/from data core. */
+/* device ops to add/remove device to data core. */
static struct virtio_net_device_ops const *notify_ops;
-/* root address of the linked list of managed virtio devices */
+/* root address of the linked list in the configuration core. */
static struct virtio_net_config_ll *ll_root;
/* Features supported by this lib. */
-#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
- (1ULL << VIRTIO_NET_F_CTRL_RX))
+#define VHOST_SUPPORTED_FEATURES (1ULL << VIRTIO_NET_F_MRG_RXBUF)
static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
-/* Line size for reading maps file. */
-static const uint32_t BUFSIZE = PATH_MAX;
-
-/* Size of prot char array in procmap. */
-#define PROT_SZ 5
-
-/* Number of elements in procmap struct. */
-#define PROCMAP_SZ 8
-
-/* Structure containing information gathered from maps file. */
-struct procmap {
- uint64_t va_start; /* Start virtual address in file. */
- uint64_t len; /* Size of file. */
- uint64_t pgoff; /* Not used. */
- uint32_t maj; /* Not used. */
- uint32_t min; /* Not used. */
- uint32_t ino; /* Not used. */
- char prot[PROT_SZ]; /* Not used. */
- char fname[PATH_MAX]; /* File name. */
-};
/*
* Converts QEMU virtual address to Vhost virtual address. This function is
@@ -110,199 +82,15 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
if ((qemu_va >= region->userspace_address) &&
(qemu_va <= region->userspace_address +
region->memory_size)) {
- vhost_va = dev->mem->mapped_address + qemu_va -
- dev->mem->base_address;
+ vhost_va = qemu_va + region->guest_phys_address +
+ region->address_offset -
+ region->userspace_address;
break;
}
}
return vhost_va;
}
-/*
- * Locate the file containing QEMU's memory space and
- * map it to our address space.
- */
-static int
-host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
- pid_t pid, uint64_t addr)
-{
- struct dirent *dptr = NULL;
- struct procmap procmap;
- DIR *dp = NULL;
- int fd;
- int i;
- char memfile[PATH_MAX];
- char mapfile[PATH_MAX];
- char procdir[PATH_MAX];
- char resolved_path[PATH_MAX];
- char *path = NULL;
- FILE *fmap;
- void *map;
- uint8_t found = 0;
- char line[BUFSIZE];
- char dlm[] = "- : ";
- char *str, *sp, *in[PROCMAP_SZ];
- char *end = NULL;
-
- /* Path where mem files are located. */
- snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
- /* Maps file used to locate mem file. */
- snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
-
- fmap = fopen(mapfile, "r");
- if (fmap == NULL) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to open maps file for pid %d\n",
- dev->device_fh, pid);
- return -1;
- }
-
- /* Read through maps file until we find out base_address. */
- while (fgets(line, BUFSIZE, fmap) != 0) {
- str = line;
- errno = 0;
- /* Split line into fields. */
- for (i = 0; i < PROCMAP_SZ; i++) {
- in[i] = strtok_r(str, &dlm[i], &sp);
- if ((in[i] == NULL) || (errno != 0)) {
- fclose(fmap);
- return -1;
- }
- str = NULL;
- }
-
- /* Convert/Copy each field as needed. */
- procmap.va_start = strtoull(in[0], &end, 16);
- if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.len = strtoull(in[1], &end, 16);
- if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.pgoff = strtoull(in[3], &end, 16);
- if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.maj = strtoul(in[4], &end, 16);
- if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.min = strtoul(in[5], &end, 16);
- if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.ino = strtoul(in[6], &end, 16);
- if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- memcpy(&procmap.prot, in[2], PROT_SZ);
- memcpy(&procmap.fname, in[7], PATH_MAX);
-
- if (procmap.va_start == addr) {
- procmap.len = procmap.len - procmap.va_start;
- found = 1;
- break;
- }
- }
- fclose(fmap);
-
- if (!found) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to find memory file in pid %d maps file\n",
- dev->device_fh, pid);
- return -1;
- }
-
- /* Find the guest memory file among the process fds. */
- dp = opendir(procdir);
- if (dp == NULL) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Cannot open pid %d process directory\n",
- dev->device_fh, pid);
- return -1;
- }
-
- found = 0;
-
- /* Read the fd directory contents. */
- while (NULL != (dptr = readdir(dp))) {
- snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
- pid, dptr->d_name);
- path = realpath(memfile, resolved_path);
- if ((path == NULL) && (strlen(resolved_path) == 0)) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to resolve fd directory\n",
- dev->device_fh);
- closedir(dp);
- return -1;
- }
- if (strncmp(resolved_path, procmap.fname,
- strnlen(procmap.fname, PATH_MAX)) == 0) {
- found = 1;
- break;
- }
- }
-
- closedir(dp);
-
- if (found == 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to find memory file for pid %d\n",
- dev->device_fh, pid);
- return -1;
- }
- /* Open the shared memory file and map the memory into this process. */
- fd = open(memfile, O_RDWR);
-
- if (fd == -1) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to open %s for pid %d\n",
- dev->device_fh, memfile, pid);
- return -1;
- }
-
- map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
- MAP_POPULATE|MAP_SHARED, fd, 0);
- close(fd);
-
- if (map == MAP_FAILED) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Error mapping the file %s for pid %d\n",
- dev->device_fh, memfile, pid);
- return -1;
- }
-
- /* Store the memory address and size in the device data structure */
- mem->mapped_address = (uint64_t)(uintptr_t)map;
- mem->mapped_size = procmap.len;
-
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") Mem File: %s->%s - Size: %llu - VA: %p\n",
- dev->device_fh,
- memfile, resolved_path,
- (unsigned long long)mem->mapped_size, map);
-
- return 0;
-}
/*
* Retrieves an entry from the devices configuration linked list.
@@ -376,7 +164,7 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
}
}
-
+/*TODO dpdk alloc/free if possible */
/*
* Unmap any memory, close any file descriptors and
* free any memory owned by a device.
@@ -389,16 +177,17 @@ cleanup_device(struct virtio_net *dev)
munmap((void *)(uintptr_t)dev->mem->mapped_address,
(size_t)dev->mem->mapped_size);
free(dev->mem);
+ dev->mem = NULL;
}
/* Close any event notifiers opened by device. */
- if (dev->virtqueue[VIRTIO_RXQ]->callfd)
+ if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
- if (dev->virtqueue[VIRTIO_RXQ]->kickfd)
+ if (dev->virtqueue[VIRTIO_RXQ]->kickfd > 0)
close((int)dev->virtqueue[VIRTIO_RXQ]->kickfd);
- if (dev->virtqueue[VIRTIO_TXQ]->callfd)
+ if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
- if (dev->virtqueue[VIRTIO_TXQ]->kickfd)
+ if (dev->virtqueue[VIRTIO_TXQ]->kickfd > 0)
close((int)dev->virtqueue[VIRTIO_TXQ]->kickfd);
}
@@ -522,8 +311,8 @@ new_device(struct vhost_device_ctx ctx)
}
/*
- * Function is called from the CUSE release function. This function will
- * cleanup the device and remove it from device configuration linked list.
+ * Function is called from the CUSE release function. This function will cleanup
+ * the device and remove it from device configuration linked list.
*/
static void
destroy_device(struct vhost_device_ctx ctx)
@@ -569,6 +358,7 @@ set_owner(struct vhost_device_ctx ctx)
return -1;
return 0;
+ /* TODO check ctx.fh is meaningfull here */
}
/*
@@ -651,14 +441,12 @@ set_features(struct vhost_device_ctx ctx, uint64_t *pu)
* This includes storing offsets used to translate buffer addresses.
*/
static int
-set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
- uint32_t nregions)
+set_mem_table(struct vhost_device_ctx ctx,
+ const struct virtio_memory_regions *regions, uint32_t nregions)
{
struct virtio_net *dev;
- struct vhost_memory_region *mem_regions;
struct virtio_memory *mem;
- uint64_t size = offsetof(struct vhost_memory, regions);
- uint32_t regionidx, valid_regions;
+ uint32_t regionidx;
dev = get_device(ctx);
if (dev == NULL)
@@ -682,107 +470,24 @@ set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
mem->nregions = nregions;
- mem_regions = (void *)(uintptr_t)
- ((uint64_t)(uintptr_t)mem_regions_addr + size);
-
for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
/* Populate the region structure for each region. */
- mem->regions[regionidx].guest_phys_address =
- mem_regions[regionidx].guest_phys_addr;
- mem->regions[regionidx].guest_phys_address_end =
- mem->regions[regionidx].guest_phys_address +
- mem_regions[regionidx].memory_size;
- mem->regions[regionidx].memory_size =
- mem_regions[regionidx].memory_size;
- mem->regions[regionidx].userspace_address =
- mem_regions[regionidx].userspace_addr;
-
- LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n", dev->device_fh,
- regionidx,
- (void *)(uintptr_t)mem->regions[regionidx].guest_phys_address,
- (void *)(uintptr_t)mem->regions[regionidx].userspace_address,
- mem->regions[regionidx].memory_size);
-
- /*set the base address mapping*/
+ mem->regions[regionidx] = regions[regionidx];
if (mem->regions[regionidx].guest_phys_address == 0x0) {
mem->base_address =
mem->regions[regionidx].userspace_address;
- /* Map VM memory file */
- if (host_memory_map(dev, mem, ctx.pid,
- mem->base_address) != 0) {
- free(mem);
- return -1;
- }
+ mem->mapped_address =
+ mem->regions[regionidx].address_offset;
}
}
- /* Check that we have a valid base address. */
- if (mem->base_address == 0) {
- RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Failed to find base address of qemu memory file.\n", dev->device_fh);
- free(mem);
- return -1;
- }
-
- /*
- * Check if all of our regions have valid mappings.
- * Usually one does not exist in the QEMU memory file.
- */
- valid_regions = mem->nregions;
- for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
- if ((mem->regions[regionidx].userspace_address <
- mem->base_address) ||
- (mem->regions[regionidx].userspace_address >
- (mem->base_address + mem->mapped_size)))
- valid_regions--;
- }
-
- /*
- * If a region does not have a valid mapping,
- * we rebuild our memory struct to contain only valid entries.
- */
- if (valid_regions != mem->nregions) {
- LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") Not all memory regions exist in the QEMU mem file. Re-populating mem structure\n",
- dev->device_fh);
-
- /*
- * Re-populate the memory structure with only valid regions.
- * Invalid regions are over-written with memmove.
- */
- valid_regions = 0;
-
- for (regionidx = mem->nregions; 0 != regionidx--;) {
- if ((mem->regions[regionidx].userspace_address <
- mem->base_address) ||
- (mem->regions[regionidx].userspace_address >
- (mem->base_address + mem->mapped_size))) {
- memmove(&mem->regions[regionidx],
- &mem->regions[regionidx + 1],
- sizeof(struct virtio_memory_regions) *
- valid_regions);
- } else {
- valid_regions++;
- }
- }
- }
- mem->nregions = valid_regions;
+ /*TODO addback the logic that remove invalid memory regions */
dev->mem = mem;
- /*
- * Calculate the address offset for each region.
- * This offset is used to identify the vhost virtual address
- * corresponding to a QEMU guest physical address.
- */
- for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
- dev->mem->regions[regionidx].address_offset =
- dev->mem->regions[regionidx].userspace_address -
- dev->mem->base_address +
- dev->mem->mapped_address -
- dev->mem->regions[regionidx].guest_phys_address;
-
- }
return 0;
}
+
/*
* Called from CUSE IOCTL: VHOST_SET_VRING_NUM
* The virtio device sends us the size of the descriptor ring.
@@ -896,38 +601,62 @@ get_vring_base(struct vhost_device_ctx ctx, uint32_t index,
/* State->index refers to the queue index. The txq is 1, rxq is 0. */
state->num = dev->virtqueue[state->index]->last_used_idx;
- return 0;
-}
+ if (dev->flags & VIRTIO_DEV_RUNNING) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "get_vring_base message is for release\n");
+ notify_ops->destroy_device(dev);
+ /*
+ * sync call.
+ * when it returns, it means it si removed from data core.
+ */
+ }
+ /* TODO fix all munmap */
+ if (dev->mem) {
+ munmap((void *)(uintptr_t)dev->mem->mapped_address,
+ (size_t)dev->mem->mapped_size);
+ free(dev->mem);
+ dev->mem = NULL;
+ }
-/*
- * This function uses the eventfd_link kernel module to copy an eventfd file
- * descriptor provided by QEMU in to our process space.
- */
-static int
-eventfd_copy(struct virtio_net *dev, struct eventfd_copy *eventfd_copy)
-{
- int eventfd_link, ret;
- /* Open the character device to the kernel module. */
- eventfd_link = open(eventfd_cdev, O_RDWR);
- if (eventfd_link < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") eventfd_link module is not loaded\n",
- dev->device_fh);
- return -1;
- }
+ if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
+ close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
+ dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
+ if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
+ close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
+ dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
+ /* We don't cleanup callfd here as we willn't get CALLFD again */
+
+ dev->virtqueue[VIRTIO_RXQ]->desc = NULL;
+ dev->virtqueue[VIRTIO_RXQ]->avail = NULL;
+ dev->virtqueue[VIRTIO_RXQ]->used = NULL;
+ dev->virtqueue[VIRTIO_RXQ]->last_used_idx = 0;
+ dev->virtqueue[VIRTIO_RXQ]->last_used_idx_res = 0;
+
+ dev->virtqueue[VIRTIO_TXQ]->desc = NULL;
+ dev->virtqueue[VIRTIO_TXQ]->avail = NULL;
+ dev->virtqueue[VIRTIO_TXQ]->used = NULL;
+ dev->virtqueue[VIRTIO_TXQ]->last_used_idx = 0;
+ dev->virtqueue[VIRTIO_TXQ]->last_used_idx_res = 0;
- /* Call the IOCTL to copy the eventfd. */
- ret = ioctl(eventfd_link, EVENTFD_COPY, eventfd_copy);
- close(eventfd_link);
- if (ret < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") EVENTFD_COPY ioctl failed\n",
- dev->device_fh);
- return -1;
- }
+ return 0;
+}
+static int
+virtio_is_ready(struct virtio_net *dev, int index)
+{
+ struct vhost_virtqueue *vq1, *vq2;
+ /* mq support in future.*/
+ vq1 = dev->virtqueue[index];
+ vq2 = dev->virtqueue[index ^ 1];
+ if (vq1 && vq2 && vq1->desc && vq2->desc &&
+ (vq1->kickfd > 0) && (vq1->callfd > 0) &&
+ (vq2->kickfd > 0) && (vq2->callfd > 0)) {
+ LOG_DEBUG(VHOST_CONFIG, "virtio is ready for processing.\n");
+ return 1;
+ }
+ LOG_DEBUG(VHOST_CONFIG, "virtio isn't ready for processing.\n");
return 0;
}
@@ -940,7 +669,6 @@ static int
set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
{
struct virtio_net *dev;
- struct eventfd_copy eventfd_kick;
struct vhost_virtqueue *vq;
dev = get_device(ctx);
@@ -953,14 +681,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
if (vq->kickfd)
close((int)vq->kickfd);
- /* Populate the eventfd_copy structure and call eventfd_copy. */
- vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
- eventfd_kick.source_fd = vq->kickfd;
- eventfd_kick.target_fd = file->fd;
- eventfd_kick.target_pid = ctx.pid;
-
- if (eventfd_copy(dev, &eventfd_kick))
- return -1;
+ vq->kickfd = file->fd;
return 0;
}
@@ -974,7 +695,6 @@ static int
set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
{
struct virtio_net *dev;
- struct eventfd_copy eventfd_call;
struct vhost_virtqueue *vq;
dev = get_device(ctx);
@@ -986,16 +706,11 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
if (vq->callfd)
close((int)vq->callfd);
+ vq->callfd = file->fd;
- /* Populate the eventfd_copy structure and call eventfd_copy. */
- vq->callfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
- eventfd_call.source_fd = vq->callfd;
- eventfd_call.target_fd = file->fd;
- eventfd_call.target_pid = ctx.pid;
-
- if (eventfd_copy(dev, &eventfd_call))
- return -1;
-
+ if (virtio_is_ready(dev, file->index) &&
+ !(dev->flags & VIRTIO_DEV_RUNNING))
+ notify_ops->new_device(dev);
return 0;
}
@@ -1024,6 +739,7 @@ set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
* If the device isn't already running and both backend fds are set,
* we add the device.
*/
+ LOG_DEBUG(VHOST_CONFIG, "%s %d\n", __func__, file->fd);
if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
if (((int)dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED) &&
((int)dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED))
--
1.8.1.4
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [dpdk-dev] [PATCH RFC] lib/librte_vhost: vhost-user
2014-11-15 1:14 [dpdk-dev] [PATCH RFC] lib/librte_vhost: vhost-user Huawei Xie
@ 2014-11-17 6:04 ` Tetsuya Mukawa
2014-11-17 6:11 ` Tetsuya Mukawa
2014-11-17 6:06 ` [dpdk-dev] [RFC PATCH] lib/librte_vhost: cleanup white spaces, tabs and indents Tetsuya Mukawa
2014-11-17 6:07 ` [dpdk-dev] [RFC PATCH 1/2] lib/librte_vhost: change macro name of include guard Tetsuya Mukawa
2 siblings, 1 reply; 6+ messages in thread
From: Tetsuya Mukawa @ 2014-11-17 6:04 UTC (permalink / raw)
To: Huawei Xie, dev
Hi Xie,
(2014/11/15 10:14), Huawei Xie wrote:
> implement socket server
> fd event dispatch mechanism
> vhost sock message handling
> memory map for each region
> VHOST_USER_SET_VRING_KICK_FD as the indicator that vring is available
> VHOST_USER_GET_VRING_BASE as the message that vring should be released
>
> The message flow between vhost-user and vhost-cuse is kindof different,
> which makes virtio-net common message handler layer difficult and complicated to handle
> both cases in new_device/destroy_device/memory map/resource cleanup.
>
> Will only leave the most common messag handling in virtio-net, and move the
> control logic to cuse/fuse layer.
>
>
> Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Great patch!
I guess we can start from this patch to implement vhost-user and
abstraction layer.
I've checked patch.
1. White space, tab and indent patch.
I will send patch that clears white space, tab and indent. Could you
please check it?
It might be difficult to see the difference, if your editor doesn't show
a space or tab.
2. Some files are based on old codes.
At least, following patch is not included.
- vhost: fix build without unused result
Also vhost_rxtx.c isn't probably based on latest code.
3. Device abstraction layer code
I will send the device abstraction layer code after this email.
Anyway, I guess we need to decide whether, or not we still keep
vhost-cuse code
4. Multiple devices operation.
For example, when thread1 opens vhost-user device1 and thread2 opens
vhost-user device2,
each thread may want to register own callbacks.
Current implementation may not allow this.
I guess we need to eliminate global variables in librte_vhost as much as
possible.
Thanks,
Tetsuya
> ---
> lib/librte_vhost/Makefile | 14 +-
> lib/librte_vhost/eventfd_link/eventfd_link.c | 27 +-
> lib/librte_vhost/eventfd_link/eventfd_link.h | 48 +-
> lib/librte_vhost/libvirt/qemu-wrap.py | 367 ---------------
> lib/librte_vhost/rte_virtio_net.h | 106 ++---
> lib/librte_vhost/vhost-cuse/vhost-net-cdev.c | 436 ++++++++++++++++++
> lib/librte_vhost/vhost-cuse/virtio-net-cdev.c | 314 +++++++++++++
> lib/librte_vhost/vhost-cuse/virtio-net-cdev.h | 43 ++
> lib/librte_vhost/vhost-net-cdev.c | 389 ----------------
> lib/librte_vhost/vhost-net-cdev.h | 113 -----
> lib/librte_vhost/vhost-user/fd_man.c | 158 +++++++
> lib/librte_vhost/vhost-user/fd_man.h | 31 ++
> lib/librte_vhost/vhost-user/vhost-net-user.c | 417 +++++++++++++++++
> lib/librte_vhost/vhost-user/vhost-net-user.h | 74 +++
> lib/librte_vhost/vhost-user/virtio-net-user.c | 208 +++++++++
> lib/librte_vhost/vhost-user/virtio-net-user.h | 11 +
> lib/librte_vhost/vhost_rxtx.c | 625 ++++----------------------
> lib/librte_vhost/virtio-net.c | 450 ++++---------------
> 18 files changed, 1939 insertions(+), 1892 deletions(-)
> delete mode 100755 lib/librte_vhost/libvirt/qemu-wrap.py
> create mode 100644 lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
> create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
> create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
> delete mode 100644 lib/librte_vhost/vhost-net-cdev.c
> delete mode 100644 lib/librte_vhost/vhost-net-cdev.h
> create mode 100644 lib/librte_vhost/vhost-user/fd_man.c
> create mode 100644 lib/librte_vhost/vhost-user/fd_man.h
> create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.c
> create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.h
> create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.c
> create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.h
>
> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> index c008d64..cb4e172 100644
> --- a/lib/librte_vhost/Makefile
> +++ b/lib/librte_vhost/Makefile
> @@ -34,17 +34,19 @@ include $(RTE_SDK)/mk/rte.vars.mk
> # library name
> LIB = librte_vhost.a
>
> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I. -I vhost-user -I vhost-cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
> LDFLAGS += -lfuse
> # all source are stored in SRCS-y
> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c
> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-cuse/vhost-net-cdev.c vhost-cuse/virtio-net-cdev.c
> +
> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-user/fd_man.c vhost-user/vhost-net-user.c vhost-user/virtio-net-user.c
> +
> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += virtio-net.c vhost_rxtx.c
>
> # install includes
> SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
>
> -# dependencies
> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal
> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_ether
> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mbuf
> +# this lib needs eal
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf
>
> include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.c b/lib/librte_vhost/eventfd_link/eventfd_link.c
> index 7755dd6..4c9b628 100644
> --- a/lib/librte_vhost/eventfd_link/eventfd_link.c
> +++ b/lib/librte_vhost/eventfd_link/eventfd_link.c
> @@ -13,8 +13,7 @@
> * General Public License for more details.
> *
> * You should have received a copy of the GNU General Public License
> - * along with this program; if not, write to the Free Software
> - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
> * The full GNU General Public License is included in this distribution
> * in the file called LICENSE.GPL.
> *
> @@ -78,8 +77,7 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
>
> switch (ioctl) {
> case EVENTFD_COPY:
> - if (copy_from_user(&eventfd_copy, argp,
> - sizeof(struct eventfd_copy)))
> + if (copy_from_user(&eventfd_copy, argp, sizeof(struct eventfd_copy)))
> return -EFAULT;
>
> /*
> @@ -88,28 +86,28 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
> task_target =
> pid_task(find_vpid(eventfd_copy.target_pid), PIDTYPE_PID);
> if (task_target == NULL) {
> - pr_debug("Failed to get mem ctx for target pid\n");
> + printk(KERN_DEBUG "Failed to get mem ctx for target pid\n");
> return -EFAULT;
> }
>
> files = get_files_struct(current);
> if (files == NULL) {
> - pr_debug("Failed to get files struct\n");
> + printk(KERN_DEBUG "Failed to get files struct\n");
> return -EFAULT;
> }
>
> rcu_read_lock();
> file = fcheck_files(files, eventfd_copy.source_fd);
> if (file) {
> - if (file->f_mode & FMODE_PATH ||
> - !atomic_long_inc_not_zero(&file->f_count))
> + if (file->f_mode & FMODE_PATH
> + || !atomic_long_inc_not_zero(&file->f_count))
> file = NULL;
> }
> rcu_read_unlock();
> put_files_struct(files);
>
> if (file == NULL) {
> - pr_debug("Failed to get file from source pid\n");
> + printk(KERN_DEBUG "Failed to get file from source pid\n");
> return 0;
> }
>
> @@ -128,25 +126,26 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
>
> files = get_files_struct(task_target);
> if (files == NULL) {
> - pr_debug("Failed to get files struct\n");
> + printk(KERN_DEBUG "Failed to get files struct\n");
> return -EFAULT;
> }
>
> rcu_read_lock();
> file = fcheck_files(files, eventfd_copy.target_fd);
> if (file) {
> - if (file->f_mode & FMODE_PATH ||
> - !atomic_long_inc_not_zero(&file->f_count))
> - file = NULL;
> + if (file->f_mode & FMODE_PATH
> + || !atomic_long_inc_not_zero(&file->f_count))
> + file = NULL;
> }
> rcu_read_unlock();
> put_files_struct(files);
>
> if (file == NULL) {
> - pr_debug("Failed to get file from target pid\n");
> + printk(KERN_DEBUG "Failed to get file from target pid\n");
> return 0;
> }
>
> +
> /*
> * Install the file struct from the target process into the
> * file desciptor of the source process,
> diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.h b/lib/librte_vhost/eventfd_link/eventfd_link.h
> index ea619ec..38052e2 100644
> --- a/lib/librte_vhost/eventfd_link/eventfd_link.h
> +++ b/lib/librte_vhost/eventfd_link/eventfd_link.h
> @@ -1,7 +1,4 @@
> /*-
> - * This file is provided under a dual BSD/GPLv2 license. When using or
> - * redistributing this file, you may do so under either license.
> - *
> * GPL LICENSE SUMMARY
> *
> * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> @@ -16,61 +13,28 @@
> * General Public License for more details.
> *
> * You should have received a copy of the GNU General Public License
> - * along with this program; if not, write to the Free Software
> - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
> * The full GNU General Public License is included in this distribution
> * in the file called LICENSE.GPL.
> *
> * Contact Information:
> * Intel Corporation
> - *
> - * BSD LICENSE
> - *
> - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> - * All rights reserved.
> - *
> - * Redistribution and use in source and binary forms, with or without
> - * modification, are permitted provided that the following conditions
> - * are met:
> - *
> - * Redistributions of source code must retain the above copyright
> - * notice, this list of conditions and the following disclaimer.
> - * Redistributions in binary form must reproduce the above copyright
> - * notice, this list of conditions and the following disclaimer in
> - * the documentation and/or other materials provided with the
> - * distribution.
> - * Neither the name of Intel Corporation nor the names of its
> - * contributors may be used to endorse or promote products derived
> - * from this software without specific prior written permission.
> - *
> - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> - *
> */
>
> #ifndef _EVENTFD_LINK_H_
> #define _EVENTFD_LINK_H_
>
> /*
> - * ioctl to copy an fd entry in calling process to an fd in a target process
> + * ioctl to copy an fd entry in calling process to an fd in a target process
> */
> #define EVENTFD_COPY 1
>
> /*
> - * arguements for the EVENTFD_COPY ioctl
> + * arguements for the EVENTFD_COPY ioctl
> */
> struct eventfd_copy {
> - unsigned target_fd; /* fd in the target pid */
> - unsigned source_fd; /* fd in the calling pid */
> - pid_t target_pid; /* pid of the target pid */
> + unsigned target_fd; /**< fd in the target pid */
> + unsigned source_fd; /**< fd in the calling pid */
> + pid_t target_pid; /**< pid of the target pid */
> };
> #endif /* _EVENTFD_LINK_H_ */
> diff --git a/lib/librte_vhost/libvirt/qemu-wrap.py b/lib/librte_vhost/libvirt/qemu-wrap.py
> deleted file mode 100755
> index e2d68a0..0000000
> --- a/lib/librte_vhost/libvirt/qemu-wrap.py
> +++ /dev/null
> @@ -1,367 +0,0 @@
> -#!/usr/bin/python
> -#/*
> -# * BSD LICENSE
> -# *
> -# * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> -# * All rights reserved.
> -# *
> -# * Redistribution and use in source and binary forms, with or without
> -# * modification, are permitted provided that the following conditions
> -# * are met:
> -# *
> -# * * Redistributions of source code must retain the above copyright
> -# * notice, this list of conditions and the following disclaimer.
> -# * * Redistributions in binary form must reproduce the above copyright
> -# * notice, this list of conditions and the following disclaimer in
> -# * the documentation and/or other materials provided with the
> -# * distribution.
> -# * * Neither the name of Intel Corporation nor the names of its
> -# * contributors may be used to endorse or promote products derived
> -# * from this software without specific prior written permission.
> -# *
> -# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> -# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> -# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> -# * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> -# * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> -# * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> -# * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> -# * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> -# * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> -# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> -# * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> -# */
> -
> -#####################################################################
> -# This script is designed to modify the call to the QEMU emulator
> -# to support userspace vhost when starting a guest machine through
> -# libvirt with vhost enabled. The steps to enable this are as follows
> -# and should be run as root:
> -#
> -# 1. Place this script in a libvirtd's binary search PATH ($PATH)
> -# A good location would be in the same directory that the QEMU
> -# binary is located
> -#
> -# 2. Ensure that the script has the same owner/group and file
> -# permissions as the QEMU binary
> -#
> -# 3. Update the VM xml file using "virsh edit VM.xml"
> -#
> -# 3.a) Set the VM to use the launch script
> -#
> -# Set the emulator path contained in the
> -# <emulator><emulator/> tags
> -#
> -# e.g replace <emulator>/usr/bin/qemu-kvm<emulator/>
> -# with <emulator>/usr/bin/qemu-wrap.py<emulator/>
> -#
> -# 3.b) Set the VM's device's to use vhost-net offload
> -#
> -# <interface type="network">
> -# <model type="virtio"/>
> -# <driver name="vhost"/>
> -# <interface/>
> -#
> -# 4. Enable libvirt to access our userpace device file by adding it to
> -# controllers cgroup for libvirtd using the following steps
> -#
> -# 4.a) In /etc/libvirt/qemu.conf add/edit the following lines:
> -# 1) cgroup_controllers = [ ... "devices", ... ]
> -# 2) clear_emulator_capabilities = 0
> -# 3) user = "root"
> -# 4) group = "root"
> -# 5) cgroup_device_acl = [
> -# "/dev/null", "/dev/full", "/dev/zero",
> -# "/dev/random", "/dev/urandom",
> -# "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
> -# "/dev/rtc", "/dev/hpet", "/dev/net/tun",
> -# "/dev/<devbase-name>-<index>",
> -# ]
> -#
> -# 4.b) Disable SELinux or set to permissive mode
> -#
> -# 4.c) Mount cgroup device controller
> -# "mkdir /dev/cgroup"
> -# "mount -t cgroup none /dev/cgroup -o devices"
> -#
> -# 4.d) Set hugetlbfs_mount variable - ( Optional )
> -# VMs using userspace vhost must use hugepage backed
> -# memory. This can be enabled in the libvirt XML
> -# config by adding a memory backing section to the
> -# XML config e.g.
> -# <memoryBacking>
> -# <hugepages/>
> -# </memoryBacking>
> -# This memory backing section should be added after the
> -# <memory> and <currentMemory> sections. This will add
> -# flags "-mem-prealloc -mem-path <path>" to the QEMU
> -# command line. The hugetlbfs_mount variable can be used
> -# to override the default <path> passed through by libvirt.
> -#
> -# if "-mem-prealloc" or "-mem-path <path>" are not passed
> -# through and a vhost device is detected then these options will
> -# be automatically added by this script. This script will detect
> -# the system hugetlbfs mount point to be used for <path>. The
> -# default <path> for this script can be overidden by the
> -# hugetlbfs_dir variable in the configuration section of this script.
> -#
> -#
> -# 4.e) Restart the libvirtd system process
> -# e.g. on Fedora "systemctl restart libvirtd.service"
> -#
> -#
> -# 4.f) Edit the Configuration Parameters section of this script
> -# to point to the correct emulator location and set any
> -# addition options
> -#
> -# The script modifies the libvirtd Qemu call by modifying/adding
> -# options based on the configuration parameters below.
> -# NOTE:
> -# emul_path and us_vhost_path must be set
> -# All other parameters are optional
> -#####################################################################
> -
> -
> -#############################################
> -# Configuration Parameters
> -#############################################
> -#Path to QEMU binary
> -emul_path = "/usr/local/bin/qemu-system-x86_64"
> -
> -#Path to userspace vhost device file
> -# This filename should match the --dev-basename --dev-index parameters of
> -# the command used to launch the userspace vhost sample application e.g.
> -# if the sample app lauch command is:
> -# ./build/vhost-switch ..... --dev-basename usvhost --dev-index 1
> -# then this variable should be set to:
> -# us_vhost_path = "/dev/usvhost-1"
> -us_vhost_path = "/dev/usvhost-1"
> -
> -#List of additional user defined emulation options. These options will
> -#be added to all Qemu calls
> -emul_opts_user = []
> -
> -#List of additional user defined emulation options for vhost only.
> -#These options will only be added to vhost enabled guests
> -emul_opts_user_vhost = []
> -
> -#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs
> -# Set this variable to one to enable this option for all VMs
> -use_huge_all = 0
> -
> -#Instead of autodetecting, override the hugetlbfs directory by setting
> -#this variable
> -hugetlbfs_dir = ""
> -
> -#############################################
> -
> -
> -#############################################
> -# ****** Do Not Modify Below this Line ******
> -#############################################
> -
> -import sys, os, subprocess
> -
> -
> -#List of open userspace vhost file descriptors
> -fd_list = []
> -
> -#additional virtio device flags when using userspace vhost
> -vhost_flags = [ "csum=off",
> - "gso=off",
> - "guest_tso4=off",
> - "guest_tso6=off",
> - "guest_ecn=off"
> - ]
> -
> -
> -#############################################
> -# Find the system hugefile mount point.
> -# Note:
> -# if multiple hugetlbfs mount points exist
> -# then the first one found will be used
> -#############################################
> -def find_huge_mount():
> -
> - if (len(hugetlbfs_dir)):
> - return hugetlbfs_dir
> -
> - huge_mount = ""
> -
> - if (os.access("/proc/mounts", os.F_OK)):
> - f = open("/proc/mounts", "r")
> - line = f.readline()
> - while line:
> - line_split = line.split(" ")
> - if line_split[2] == 'hugetlbfs':
> - huge_mount = line_split[1]
> - break
> - line = f.readline()
> - else:
> - print "/proc/mounts not found"
> - exit (1)
> -
> - f.close
> - if len(huge_mount) == 0:
> - print "Failed to find hugetlbfs mount point"
> - exit (1)
> -
> - return huge_mount
> -
> -
> -#############################################
> -# Get a userspace Vhost file descriptor
> -#############################################
> -def get_vhost_fd():
> -
> - if (os.access(us_vhost_path, os.F_OK)):
> - fd = os.open( us_vhost_path, os.O_RDWR)
> - else:
> - print ("US-Vhost file %s not found" %us_vhost_path)
> - exit (1)
> -
> - return fd
> -
> -
> -#############################################
> -# Check for vhostfd. if found then replace
> -# with our own vhost fd and append any vhost
> -# flags onto the end
> -#############################################
> -def modify_netdev_arg(arg):
> -
> - global fd_list
> - vhost_in_use = 0
> - s = ''
> - new_opts = []
> - netdev_opts = arg.split(",")
> -
> - for opt in netdev_opts:
> - #check if vhost is used
> - if "vhost" == opt[:5]:
> - vhost_in_use = 1
> - else:
> - new_opts.append(opt)
> -
> - #if using vhost append vhost options
> - if vhost_in_use == 1:
> - #append vhost on option
> - new_opts.append('vhost=on')
> - #append vhostfd ption
> - new_fd = get_vhost_fd()
> - new_opts.append('vhostfd=' + str(new_fd))
> - fd_list.append(new_fd)
> -
> - #concatenate all options
> - for opt in new_opts:
> - if len(s) > 0:
> - s+=','
> -
> - s+=opt
> -
> - return s
> -
> -
> -#############################################
> -# Main
> -#############################################
> -def main():
> -
> - global fd_list
> - global vhost_in_use
> - new_args = []
> - num_cmd_args = len(sys.argv)
> - emul_call = ''
> - mem_prealloc_set = 0
> - mem_path_set = 0
> - num = 0;
> -
> - #parse the parameters
> - while (num < num_cmd_args):
> - arg = sys.argv[num]
> -
> - #Check netdev +1 parameter for vhostfd
> - if arg == '-netdev':
> - num_vhost_devs = len(fd_list)
> - new_args.append(arg)
> -
> - num+=1
> - arg = sys.argv[num]
> - mod_arg = modify_netdev_arg(arg)
> - new_args.append(mod_arg)
> -
> - #append vhost flags if this is a vhost device
> - # and -device is the next arg
> - # i.e -device -opt1,-opt2,...,-opt3,%vhost
> - if (num_vhost_devs < len(fd_list)):
> - num+=1
> - arg = sys.argv[num]
> - if arg == '-device':
> - new_args.append(arg)
> - num+=1
> - new_arg = sys.argv[num]
> - for flag in vhost_flags:
> - new_arg = ''.join([new_arg,',',flag])
> - new_args.append(new_arg)
> - else:
> - new_args.append(arg)
> - elif arg == '-mem-prealloc':
> - mem_prealloc_set = 1
> - new_args.append(arg)
> - elif arg == '-mem-path':
> - mem_path_set = 1
> - new_args.append(arg)
> -
> - else:
> - new_args.append(arg)
> -
> - num+=1
> -
> - #Set Qemu binary location
> - emul_call+=emul_path
> - emul_call+=" "
> -
> - #Add prealloc mem options if using vhost and not already added
> - if ((len(fd_list) > 0) and (mem_prealloc_set == 0)):
> - emul_call += "-mem-prealloc "
> -
> - #Add mempath mem options if using vhost and not already added
> - if ((len(fd_list) > 0) and (mem_path_set == 0)):
> - #Detect and add hugetlbfs mount point
> - mp = find_huge_mount()
> - mp = "".join(["-mem-path ", mp])
> - emul_call += mp
> - emul_call += " "
> -
> -
> - #add user options
> - for opt in emul_opts_user:
> - emul_call += opt
> - emul_call += " "
> -
> - #Add add user vhost only options
> - if len(fd_list) > 0:
> - for opt in emul_opts_user_vhost:
> - emul_call += opt
> - emul_call += " "
> -
> - #Add updated libvirt options
> - iter_args = iter(new_args)
> - #skip 1st arg i.e. call to this script
> - next(iter_args)
> - for arg in iter_args:
> - emul_call+=str(arg)
> - emul_call+= " "
> -
> - #Call QEMU
> - subprocess.call(emul_call, shell=True)
> -
> -
> - #Close usvhost files
> - for fd in fd_list:
> - os.close(fd)
> -
> -
> -if __name__ == "__main__":
> - main()
> -
> diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
> index 00b1328..7a05dab 100644
> --- a/lib/librte_vhost/rte_virtio_net.h
> +++ b/lib/librte_vhost/rte_virtio_net.h
> @@ -34,11 +34,6 @@
> #ifndef _VIRTIO_NET_H_
> #define _VIRTIO_NET_H_
>
> -/**
> - * @file
> - * Interface to vhost net
> - */
> -
> #include <stdint.h>
> #include <linux/virtio_ring.h>
> #include <linux/virtio_net.h>
> @@ -48,66 +43,38 @@
> #include <rte_mempool.h>
> #include <rte_mbuf.h>
>
> -/* Used to indicate that the device is running on a data core */
> -#define VIRTIO_DEV_RUNNING 1
> -
> -/* Backend value set by guest. */
> -#define VIRTIO_DEV_STOPPED -1
> -
> +#define VIRTIO_DEV_RUNNING 1 /**< Used to indicate that the device is running on a data core. */
> +#define VIRTIO_DEV_STOPPED -1 /**< Backend value set by guest. */
>
> /* Enum for virtqueue management. */
> enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
>
> -#define BUF_VECTOR_MAX 256
> -
> -/**
> - * Structure contains buffer address, length and descriptor index
> - * from vring to do scatter RX.
> - */
> -struct buf_vector {
> - uint64_t buf_addr;
> - uint32_t buf_len;
> - uint32_t desc_idx;
> -};
> -
> /**
> * Structure contains variables relevant to RX/TX virtqueues.
> */
> struct vhost_virtqueue {
> - struct vring_desc *desc; /**< Virtqueue descriptor ring. */
> - struct vring_avail *avail; /**< Virtqueue available ring. */
> - struct vring_used *used; /**< Virtqueue used ring. */
> - uint32_t size; /**< Size of descriptor ring. */
> - uint32_t backend; /**< Backend value to determine if device should started/stopped. */
> - uint16_t vhost_hlen; /**< Vhost header length (varies depending on RX merge buffers. */
> - volatile uint16_t last_used_idx; /**< Last index used on the available ring */
> - volatile uint16_t last_used_idx_res; /**< Used for multiple devices reserving buffers. */
> - eventfd_t callfd; /**< Currently unused as polling mode is enabled. */
> - eventfd_t kickfd; /**< Used to notify the guest (trigger interrupt). */
> - struct buf_vector buf_vec[BUF_VECTOR_MAX]; /**< for scatter RX. */
> -} __rte_cache_aligned;
> -
> -/**
> - * Device structure contains all configuration information relating to the device.
> - */
> -struct virtio_net {
> - struct vhost_virtqueue *virtqueue[VIRTIO_QNUM]; /**< Contains all virtqueue information. */
> - struct virtio_memory *mem; /**< QEMU memory and memory region information. */
> - uint64_t features; /**< Negotiated feature set. */
> - uint64_t device_fh; /**< device identifier. */
> - uint32_t flags; /**< Device flags. Only used to check if device is running on data core. */
> - void *priv; /**< private context */
> + struct vring_desc *desc; /**< descriptor ring. */
> + struct vring_avail *avail; /**< available ring. */
> + struct vring_used *used; /**< used ring. */
> + uint32_t size; /**< Size of descriptor ring. */
> + uint32_t backend; /**< Backend value to determine if device should be started/stopped. */
> + uint16_t vhost_hlen; /**< Vhost header length (varies depending on RX merge buffers. */
> + volatile uint16_t last_used_idx; /**< Last index used on the available ring. */
> + volatile uint16_t last_used_idx_res; /**< Used for multiple devices reserving buffers. */
> + eventfd_t callfd; /**< Currently unused as polling mode is enabled. */
> + eventfd_t kickfd; /**< Used to notify the guest (trigger interrupt). */
> } __rte_cache_aligned;
>
> /**
> - * Information relating to memory regions including offsets to addresses in QEMUs memory file.
> + * Information relating to memory regions including offsets to
> + * addresses in QEMUs memory file.
> */
> struct virtio_memory_regions {
> - uint64_t guest_phys_address; /**< Base guest physical address of region. */
> - uint64_t guest_phys_address_end; /**< End guest physical address of region. */
> - uint64_t memory_size; /**< Size of region. */
> - uint64_t userspace_address; /**< Base userspace address of region. */
> - uint64_t address_offset; /**< Offset of region for address translation. */
> + uint64_t guest_phys_address; /**< Base guest physical address of region. */
> + uint64_t guest_phys_address_end; /**< End guest physical address of region. */
> + uint64_t memory_size; /**< Size of region. */
> + uint64_t userspace_address; /**< Base userspace address of region. */
> + uint64_t address_offset; /**< Offset of region for address translation. */
> };
>
>
> @@ -115,21 +82,34 @@ struct virtio_memory_regions {
> * Memory structure includes region and mapping information.
> */
> struct virtio_memory {
> - uint64_t base_address; /**< Base QEMU userspace address of the memory file. */
> - uint64_t mapped_address; /**< Mapped address of memory file base in our applications memory space. */
> - uint64_t mapped_size; /**< Total size of memory file. */
> - uint32_t nregions; /**< Number of memory regions. */
> + uint64_t base_address; /**< Base QEMU userspace address of the memory file. */
> + uint64_t mapped_address; /**< Mapped address of memory file base in our applications memory space. */
> + uint64_t mapped_size; /**< Total size of memory file. */
> + uint32_t nregions; /**< Number of memory regions. */
> struct virtio_memory_regions regions[0]; /**< Memory region information. */
> };
>
> /**
> + * Device structure contains all configuration information relating to the device.
> + */
> +struct virtio_net {
> + struct vhost_virtqueue *virtqueue[VIRTIO_QNUM]; /**< Contains all virtqueue information. */
> + struct virtio_memory *mem; /**< QEMU memory and memory region information. */
> + uint64_t features; /**< Negotiated feature set. */
> + uint64_t device_fh; /**< Device identifier. */
> + uint32_t flags; /**< Device flags. Only used to check if device is running on data core. */
> + void *priv;
> +} __rte_cache_aligned;
> +
> +/**
> * Device operations to add/remove device.
> */
> struct virtio_net_device_ops {
> - int (*new_device)(struct virtio_net *); /**< Add device. */
> - void (*destroy_device)(volatile struct virtio_net *); /**< Remove device. */
> + int (*new_device)(struct virtio_net *); /**< Add device. */
> + void (*destroy_device)(struct virtio_net *); /**< Remove device. */
> };
>
> +
> static inline uint16_t __attribute__((always_inline))
> rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
> {
> @@ -179,7 +159,7 @@ int rte_vhost_driver_register(const char *dev_name);
>
> /* Register callbacks. */
> int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
> -/* Start vhost driver session blocking loop. */
> +
> int rte_vhost_driver_session_start(void);
>
> /**
> @@ -192,8 +172,8 @@ int rte_vhost_driver_session_start(void);
> * @return
> * num of packets enqueued
> */
> -uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
> - struct rte_mbuf **pkts, uint16_t count);
> +uint32_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
> + struct rte_mbuf **pkts, uint32_t count);
>
> /**
> * This function gets guest buffers from the virtio device TX virtqueue,
> @@ -206,7 +186,7 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
> * @return
> * num of packets dequeued
> */
> -uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> - struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
> +uint32_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count);
>
> #endif /* _VIRTIO_NET_H_ */
> diff --git a/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
> new file mode 100644
> index 0000000..4671643
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
> @@ -0,0 +1,436 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <fuse/cuse_lowlevel.h>
> +#include <linux/limits.h>
> +#include <linux/vhost.h>
> +#include <linux/virtio_net.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/ioctl.h>
> +
> +#include <rte_ethdev.h>
> +#include <rte_log.h>
> +#include <rte_string_fns.h>
> +#include <rte_virtio_net.h>
> +
> +#include "virtio-net-cdev.h"
> +#include "vhost-net.h"
> +#include "eventfd_link/eventfd_link.h"
> +
> +#define FUSE_OPT_DUMMY "\0\0"
> +#define FUSE_OPT_FORE "-f\0\0"
> +#define FUSE_OPT_NOMULTI "-s\0\0"
> +
> +static const uint32_t default_major = 231;
> +static const uint32_t default_minor = 1;
> +static const char cuse_device_name[] = "/dev/cuse";
> +static const char default_cdev[] = "vhost-net";
> +static const char eventfd_cdev[] = "/dev/eventfd-link";
> +
> +static struct fuse_session *session;
> +const struct vhost_net_device_ops const *ops;
> +
> +/*
> + * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
> + * when the device is added to the device linked list.
> + */
> +static struct vhost_device_ctx
> +fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
> +{
> + struct vhost_device_ctx ctx;
> + struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
> +
> + ctx.pid = req_ctx->pid;
> + ctx.fh = fi->fh;
> +
> + return ctx;
> +}
> +
> +/*
> + * When the device is created in QEMU it gets initialised here and
> + * added to the device linked list.
> + */
> +static void
> +vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
> +{
> + struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
> + int err = 0;
> +
> + err = ops->new_device(ctx);
> + if (err == -1) {
> + fuse_reply_err(req, EPERM);
> + return;
> + }
> +
> + fi->fh = err;
> +
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "(%"PRIu64") Device configuration started\n", fi->fh);
> + fuse_reply_open(req, fi);
> +}
> +
> +/*
> + * When QEMU is shutdown or killed the device gets released.
> + */
> +static void
> +vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
> +{
> + int err = 0;
> + struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
> +
> + ops->destroy_device(ctx);
> + RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
> + fuse_reply_err(req, err);
> +}
> +
> +/*
> + * Boilerplate code for CUSE IOCTL
> + * Implicit arguments: ctx, req, result.
> + */
> +#define VHOST_IOCTL(func) do { \
> + result = (func)(ctx); \
> + fuse_reply_ioctl(req, result, NULL, 0); \
> +} while (0)
> +
> +/*
> + * Boilerplate IOCTL RETRY
> + * Implicit arguments: req.
> + */
> +#define VHOST_IOCTL_RETRY(size_r, size_w) do { \
> + struct iovec iov_r = { arg, (size_r) }; \
> + struct iovec iov_w = { arg, (size_w) }; \
> + fuse_reply_ioctl_retry(req, &iov_r, \
> + (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
> +} while (0)
> +
> +/*
> + * Boilerplate code for CUSE Read IOCTL
> + * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
> + */
> +#define VHOST_IOCTL_R(type, var, func) do { \
> + if (!in_bufsz) { \
> + VHOST_IOCTL_RETRY(sizeof(type), 0);\
> + } else { \
> + (var) = *(const type*)in_buf; \
> + result = func(ctx, &(var)); \
> + fuse_reply_ioctl(req, result, NULL, 0);\
> + } \
> +} while (0)
> +
> +/*
> + * Boilerplate code for CUSE Write IOCTL
> + * Implicit arguments: ctx, req, result, out_bufsz.
> + */
> +#define VHOST_IOCTL_W(type, var, func) do { \
> + if (!out_bufsz) { \
> + VHOST_IOCTL_RETRY(0, sizeof(type));\
> + } else { \
> + result = (func)(ctx, &(var));\
> + fuse_reply_ioctl(req, result, &(var), sizeof(type));\
> + } \
> +} while (0)
> +
> +/*
> + * Boilerplate code for CUSE Read/Write IOCTL
> + * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
> + */
> +#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \
> + if (!in_bufsz) { \
> + VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
> + } else { \
> + (var1) = *(const type1*) (in_buf); \
> + result = (func)(ctx, (var1), &(var2)); \
> + fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
> + } \
> +} while (0)
> +
> +/*
> + * This function uses the eventfd_link kernel module to copy an eventfd file
> + * descriptor provided by QEMU in to our process space.
> + */
> +static int
> +eventfd_copy(int target_fd, int target_pid)
> +{
> + int eventfd_link, ret;
> + struct eventfd_copy eventfd_copy;
> + int fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +
> + if (fd == -1)
> + return -1;
> +
> + /* Open the character device to the kernel module. */
> + /* TODO: check this earlier rather than fail until VM boots! */
> + eventfd_link = open(eventfd_cdev, O_RDWR);
> + if (eventfd_link < 0) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "eventfd_link module is not loaded\n");
> + return -1;
> + }
> +
> + eventfd_copy.source_fd = fd;
> + eventfd_copy.target_fd = target_fd;
> + eventfd_copy.target_pid = target_pid;
> + /* Call the IOCTL to copy the eventfd. */
> + ret = ioctl(eventfd_link, EVENTFD_COPY, &eventfd_copy);
> + close(eventfd_link);
> +
> + if (ret < 0) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "EVENTFD_COPY ioctl failed\n");
> + return -1;
> + }
> +
> + return fd;
> +}
> +
> +/*
> + * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on
> + * the type of IOCTL a buffer is requested to read or to write. This
> + * request is handled by FUSE and the buffer is then given to CUSE.
> + */
> +static void
> +vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
> + struct fuse_file_info *fi, __rte_unused unsigned flags,
> + const void *in_buf, size_t in_bufsz, size_t out_bufsz)
> +{
> + struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
> + struct vhost_vring_file file;
> + struct vhost_vring_state state;
> + struct vhost_vring_addr addr;
> + uint64_t features;
> + uint32_t index;
> + int result = 0;
> +
> + switch (cmd) {
> + case VHOST_NET_SET_BACKEND:
> + LOG_DEBUG(VHOST_CONFIG,
> + "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
> + VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
> + break;
> +
> + case VHOST_GET_FEATURES:
> + LOG_DEBUG(VHOST_CONFIG,
> + "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
> + VHOST_IOCTL_W(uint64_t, features, ops->get_features);
> + break;
> +
> + case VHOST_SET_FEATURES:
> + LOG_DEBUG(VHOST_CONFIG,
> + "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
> + VHOST_IOCTL_R(uint64_t, features, ops->set_features);
> + break;
> +
> + case VHOST_RESET_OWNER:
> + LOG_DEBUG(VHOST_CONFIG,
> + "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
> + VHOST_IOCTL(ops->reset_owner);
> + break;
> +
> + case VHOST_SET_OWNER:
> + LOG_DEBUG(VHOST_CONFIG,
> + "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
> + VHOST_IOCTL(ops->set_owner);
> + break;
> +
> + case VHOST_SET_MEM_TABLE:
> + /*TODO fix race condition.*/
> + LOG_DEBUG(VHOST_CONFIG,
> + "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
> + static struct vhost_memory mem_temp;
> + switch (in_bufsz) {
> + case 0:
> + VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
> + break;
> +
> + case sizeof(struct vhost_memory):
> + mem_temp = *(const struct vhost_memory *) in_buf;
> +
> + if (mem_temp.nregions > 0) {
> + VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
> + (sizeof(struct vhost_memory_region) *
> + mem_temp.nregions), 0);
> + } else {
> + result = -1;
> + fuse_reply_ioctl(req, result, NULL, 0);
> + }
> + break;
> +
> + default:
> + result = cuse_set_mem_table(ctx, in_buf,
> + mem_temp.nregions);
> + if (result)
> + fuse_reply_err(req, EINVAL);
> + else
> + fuse_reply_ioctl(req, result, NULL, 0);
> + }
> + break;
> +
> + case VHOST_SET_VRING_NUM:
> + LOG_DEBUG(VHOST_CONFIG,
> + "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
> + VHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_num);
> + break;
> +
> + case VHOST_SET_VRING_BASE:
> + LOG_DEBUG(VHOST_CONFIG,
> + "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
> + VHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_base);
> + break;
> +
> + case VHOST_GET_VRING_BASE:
> + LOG_DEBUG(VHOST_CONFIG,
> + "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
> + VHOST_IOCTL_RW(uint32_t, index,
> + struct vhost_vring_state, state, ops->get_vring_base);
> + break;
> +
> + case VHOST_SET_VRING_ADDR:
> + LOG_DEBUG(VHOST_CONFIG,
> + "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
> + VHOST_IOCTL_R(struct vhost_vring_addr, addr, ops->set_vring_addr);
> + break;
> +
> + case VHOST_SET_VRING_KICK:
> + case VHOST_SET_VRING_CALL:
> + if (!in_buf) {
> + VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
> + } else {
> + int fd;
> + file = *(const struct vhost_vring_file *)in_buf;
> + LOG_DEBUG(VHOST_CONFIG,
> + "kick/call idx:%d fd:%d\n", file.index, file.fd);
> + if ((fd = eventfd_copy(file.fd, ctx.pid)) < 0){
> + fuse_reply_ioctl(req, -1, NULL, 0);
> + }
> + file.fd = fd;
> + if (cmd == VHOST_SET_VRING_KICK) {
> + VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_call);
> + }
> + else {
> + VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_kick);
> + }
> + }
> + break;
> +
> + default:
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
> + result = -1;
> + fuse_reply_ioctl(req, result, NULL, 0);
> + }
> +
> + if (result < 0)
> + LOG_DEBUG(VHOST_CONFIG,
> + "(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
> + else
> + LOG_DEBUG(VHOST_CONFIG,
> + "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
> +}
> +
> +/*
> + * Structure handling open, release and ioctl function pointers is populated.
> + */
> +static const struct cuse_lowlevel_ops vhost_net_ops = {
> + .open = vhost_net_open,
> + .release = vhost_net_release,
> + .ioctl = vhost_net_ioctl,
> +};
> +
> +/*
> + * cuse_info is populated and used to register the cuse device.
> + * vhost_net_device_ops are also passed when the device is registered in app.
> + */
> +int
> +rte_vhost_driver_register(const char *dev_name)
> +{
> + struct cuse_info cuse_info;
> + char device_name[PATH_MAX] = "";
> + char char_device_name[PATH_MAX] = "";
> + const char *device_argv[] = { device_name };
> +
> + char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
> + char fuse_opt_fore[] = FUSE_OPT_FORE;
> + char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
> + char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
> +
> + if (access(cuse_device_name, R_OK | W_OK) < 0) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "char device %s can't be accessed, maybe not exist\n",
> + cuse_device_name);
> + return -1;
> + }
> +
> + /*
> + * The device name is created. This is passed to QEMU so that it can
> + * register the device with our application.
> + */
> + snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
> + snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
> +
> + /* Check if device already exists. */
> + if (access(char_device_name, F_OK) != -1) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "char device %s already exists\n", char_device_name);
> + return -1;
> + }
> +
> + memset(&cuse_info, 0, sizeof(cuse_info));
> + cuse_info.dev_major = default_major;
> + cuse_info.dev_minor = default_minor;
> + cuse_info.dev_info_argc = 1;
> + cuse_info.dev_info_argv = device_argv;
> + cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
> +
> + ops = get_virtio_net_callbacks();
> +
> + session = cuse_lowlevel_setup(3, fuse_argv,
> + &cuse_info, &vhost_net_ops, 0, NULL);
> + if (session == NULL)
> + return -1;
> +
> + return 0;
> +}
> +
> +/**
> + * The CUSE session is launched allowing the application to receive open,
> + * release and ioctl calls.
> + */
> +int
> +rte_vhost_driver_session_start(void)
> +{
> + fuse_session_loop(session);
> +
> + return 0;
> +}
> diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
> new file mode 100644
> index 0000000..5c16aa5
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
> @@ -0,0 +1,314 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <dirent.h>
> +#include <linux/vhost.h>
> +#include <linux/virtio_net.h>
> +#include <fuse/cuse_lowlevel.h>
> +#include <stddef.h>
> +#include <string.h>
> +#include <stdlib.h>
> +#include <sys/eventfd.h>
> +#include <sys/mman.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +#include <errno.h>
> +
> +#include <rte_log.h>
> +
> +#include "vhost-net.h"
> +#include "virtio-net-cdev.h"
> +
> +extern struct vhost_net_device_ops const *ops;
> +
> +/* Line size for reading maps file. */
> +static const uint32_t BUFSIZE = PATH_MAX;
> +
> +/* Size of prot char array in procmap. */
> +#define PROT_SZ 5
> +
> +/* Number of elements in procmap struct. */
> +#define PROCMAP_SZ 8
> +
> +/* Structure containing information gathered from maps file. */
> +struct procmap {
> + uint64_t va_start; /* Start virtual address in file. */
> + uint64_t len; /* Size of file. */
> + uint64_t pgoff; /* Not used. */
> + uint32_t maj; /* Not used. */
> + uint32_t min; /* Not used. */
> + uint32_t ino; /* Not used. */
> + char prot[PROT_SZ]; /* Not used. */
> + char fname[PATH_MAX]; /* File name. */
> +};
> +
> +/*
> + * Locate the file containing QEMU's memory space and
> + * map it to our address space.
> + */
> +static int
> +host_memory_map(pid_t pid, uint64_t addr,
> + uint64_t *mapped_address, uint64_t *mapped_size)
> +{
> + struct dirent *dptr = NULL;
> + struct procmap procmap;
> + DIR *dp = NULL;
> + int fd;
> + int i;
> + char memfile[PATH_MAX];
> + char mapfile[PATH_MAX];
> + char procdir[PATH_MAX];
> + char resolved_path[PATH_MAX];
> + FILE *fmap;
> + void *map;
> + uint8_t found = 0;
> + char line[BUFSIZE];
> + char dlm[] = "- : ";
> + char *str, *sp, *in[PROCMAP_SZ];
> + char *end = NULL;
> +
> + /* Path where mem files are located. */
> + snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
> + /* Maps file used to locate mem file. */
> + snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
> +
> + fmap = fopen(mapfile, "r");
> + if (fmap == NULL) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "Failed to open maps file for pid %d\n", pid);
> + return -1;
> + }
> +
> + /* Read through maps file until we find out base_address. */
> + while (fgets(line, BUFSIZE, fmap) != 0) {
> + str = line;
> + errno = 0;
> + /* Split line in to fields. */
> + for (i = 0; i < PROCMAP_SZ; i++) {
> + in[i] = strtok_r(str, &dlm[i], &sp);
> + if ((in[i] == NULL) || (errno != 0)) {
> + fclose(fmap);
> + return -1;
> + }
> + str = NULL;
> + }
> +
> + /* Convert/Copy each field as needed. */
> + procmap.va_start = strtoull(in[0], &end, 16);
> + if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
> + (errno != 0)) {
> + fclose(fmap);
> + return -1;
> + }
> +
> + procmap.len = strtoull(in[1], &end, 16);
> + if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
> + (errno != 0)) {
> + fclose(fmap);
> + return -1;
> + }
> +
> + procmap.pgoff = strtoull(in[3], &end, 16);
> + if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
> + (errno != 0)) {
> + fclose(fmap);
> + return -1;
> + }
> +
> + procmap.maj = strtoul(in[4], &end, 16);
> + if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
> + (errno != 0)) {
> + fclose(fmap);
> + return -1;
> + }
> +
> + procmap.min = strtoul(in[5], &end, 16);
> + if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
> + (errno != 0)) {
> + fclose(fmap);
> + return -1;
> + }
> +
> + procmap.ino = strtoul(in[6], &end, 16);
> + if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
> + (errno != 0)) {
> + fclose(fmap);
> + return -1;
> + }
> +
> + memcpy(&procmap.prot, in[2], PROT_SZ);
> + memcpy(&procmap.fname, in[7], PATH_MAX);
> +
> + if (procmap.va_start == addr) {
> + procmap.len = procmap.len - procmap.va_start;
> + found = 1;
> + break;
> + }
> + }
> + fclose(fmap);
> +
> + if (!found) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "Failed to find memory file in pid %d maps file\n", pid);
> + return -1;
> + }
> +
> + /* Find the guest memory file among the process fds. */
> + dp = opendir(procdir);
> + if (dp == NULL) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "Cannot open pid %d process directory\n",
> + pid);
> + return -1;
> +
> + }
> +
> + found = 0;
> +
> + /* Read the fd directory contents. */
> + while (NULL != (dptr = readdir(dp))) {
> + snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
> + pid, dptr->d_name);
> + realpath(memfile, resolved_path);
> + if (resolved_path == NULL) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "Failed to resolve fd directory\n");
> + closedir(dp);
> + return -1;
> + }
> + if (strncmp(resolved_path, procmap.fname,
> + strnlen(procmap.fname, PATH_MAX)) == 0) {
> + found = 1;
> + break;
> + }
> + }
> +
> + closedir(dp);
> +
> + if (found == 0) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "Failed to find memory file for pid %d\n",
> + pid);
> + return -1;
> + }
> + /* Open the shared memory file and map the memory into this process. */
> + fd = open(memfile, O_RDWR);
> +
> + if (fd == -1) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "Failed to open %s for pid %d\n",
> + memfile, pid);
> + return -1;
> + }
> +
> + map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE ,
> + MAP_POPULATE|MAP_SHARED, fd, 0);
> + close(fd);
> +
> + if (map == MAP_FAILED) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "Error mapping the file %s for pid %d\n",
> + memfile, pid);
> + return -1;
> + }
> +
> + /* Store the memory address and size in the device data structure */
> + *mapped_address = (uint64_t)(uintptr_t)map;
> + *mapped_size = procmap.len;
> +
> + LOG_DEBUG(VHOST_CONFIG,
> + "Mem File: %s->%s - Size: %llu - VA: %p\n",
> + memfile, resolved_path,
> + (unsigned long long)mapped_size, map);
> +
> + return 0;
> +}
> +
> +int
> +cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,
> + uint32_t nregions)
> +{
> + uint64_t size = offsetof(struct vhost_memory, regions);
> + uint32_t idx;
> + struct virtio_memory_regions regions[8]; /* VHOST_MAX_MEMORY_REGIONS */
> + struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
> + ((uint64_t)(uintptr_t)mem_regions_addr + size);
> + uint64_t base_address = 0, mapped_address, mapped_size;
> +
> + for (idx = 0; idx < nregions; idx++) {
> + regions[idx].guest_phys_address =
> + mem_regions[idx].guest_phys_addr;
> + regions[idx].guest_phys_address_end =
> + regions[idx].guest_phys_address +
> + mem_regions[idx].memory_size;
> + regions[idx].memory_size =
> + mem_regions[idx].memory_size;
> + regions[idx].userspace_address =
> + mem_regions[idx].userspace_addr;
> +
> + LOG_DEBUG(VHOST_CONFIG, "REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
> + idx,
> + (void *)(uintptr_t)regions[idx].guest_phys_address,
> + (void *)(uintptr_t)regions[idx].userspace_address,
> + regions[idx].memory_size);
> +
> + /*set the base address mapping*/
> + if (regions[idx].guest_phys_address == 0x0) {
> + base_address =
> + regions[idx].userspace_address;
> + /* Map VM memory file */
> + if (host_memory_map(ctx.pid, base_address,
> + &mapped_address, &mapped_size) != 0) {
> + return -1;
> + }
> + }
> + }
> +
> + /* Check that we have a valid base address. */
> + if (base_address == 0) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "Failed to find base address of qemu memory file.\n");
> + return -1;
> + }
> +
> + for (idx = 0; idx < nregions; idx++) {
> + regions[idx].address_offset =
> + mapped_address - base_address +
> + regions[idx].userspace_address -
> + regions[idx].guest_phys_address;
> + }
> +
> + ops->set_mem_table(ctx, ®ions[0], nregions);
> + return 0;
> +}
> diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
> new file mode 100644
> index 0000000..6f98ce8
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
> @@ -0,0 +1,43 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +#ifndef _VIRTIO_NET_CDEV_H
> +#define _VIRTIO_NET_CDEV_H
> +#include <stdint.h>
> +
> +#include "vhost-net.h"
> +
> +int
> +cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,
> + uint32_t nregions);
> +
> +#endif
> diff --git a/lib/librte_vhost/vhost-net-cdev.c b/lib/librte_vhost/vhost-net-cdev.c
> deleted file mode 100644
> index 57c76cb..0000000
> --- a/lib/librte_vhost/vhost-net-cdev.c
> +++ /dev/null
> @@ -1,389 +0,0 @@
> -/*-
> - * BSD LICENSE
> - *
> - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> - * All rights reserved.
> - *
> - * Redistribution and use in source and binary forms, with or without
> - * modification, are permitted provided that the following conditions
> - * are met:
> - *
> - * * Redistributions of source code must retain the above copyright
> - * notice, this list of conditions and the following disclaimer.
> - * * Redistributions in binary form must reproduce the above copyright
> - * notice, this list of conditions and the following disclaimer in
> - * the documentation and/or other materials provided with the
> - * distribution.
> - * * Neither the name of Intel Corporation nor the names of its
> - * contributors may be used to endorse or promote products derived
> - * from this software without specific prior written permission.
> - *
> - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> - */
> -
> -#include <errno.h>
> -#include <fuse/cuse_lowlevel.h>
> -#include <linux/limits.h>
> -#include <linux/vhost.h>
> -#include <stdint.h>
> -#include <string.h>
> -#include <unistd.h>
> -
> -#include <rte_ethdev.h>
> -#include <rte_log.h>
> -#include <rte_string_fns.h>
> -#include <rte_virtio_net.h>
> -
> -#include "vhost-net-cdev.h"
> -
> -#define FUSE_OPT_DUMMY "\0\0"
> -#define FUSE_OPT_FORE "-f\0\0"
> -#define FUSE_OPT_NOMULTI "-s\0\0"
> -
> -static const uint32_t default_major = 231;
> -static const uint32_t default_minor = 1;
> -static const char cuse_device_name[] = "/dev/cuse";
> -static const char default_cdev[] = "vhost-net";
> -
> -static struct fuse_session *session;
> -static struct vhost_net_device_ops const *ops;
> -
> -/*
> - * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
> - * when the device is added to the device linked list.
> - */
> -static struct vhost_device_ctx
> -fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
> -{
> - struct vhost_device_ctx ctx;
> - struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
> -
> - ctx.pid = req_ctx->pid;
> - ctx.fh = fi->fh;
> -
> - return ctx;
> -}
> -
> -/*
> - * When the device is created in QEMU it gets initialised here and
> - * added to the device linked list.
> - */
> -static void
> -vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
> -{
> - struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
> - int err = 0;
> -
> - err = ops->new_device(ctx);
> - if (err == -1) {
> - fuse_reply_err(req, EPERM);
> - return;
> - }
> -
> - fi->fh = err;
> -
> - RTE_LOG(INFO, VHOST_CONFIG,
> - "(%"PRIu64") Device configuration started\n", fi->fh);
> - fuse_reply_open(req, fi);
> -}
> -
> -/*
> - * When QEMU is shutdown or killed the device gets released.
> - */
> -static void
> -vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
> -{
> - int err = 0;
> - struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
> -
> - ops->destroy_device(ctx);
> - RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
> - fuse_reply_err(req, err);
> -}
> -
> -/*
> - * Boilerplate code for CUSE IOCTL
> - * Implicit arguments: ctx, req, result.
> - */
> -#define VHOST_IOCTL(func) do { \
> - result = (func)(ctx); \
> - fuse_reply_ioctl(req, result, NULL, 0); \
> -} while (0)
> -
> -/*
> - * Boilerplate IOCTL RETRY
> - * Implicit arguments: req.
> - */
> -#define VHOST_IOCTL_RETRY(size_r, size_w) do { \
> - struct iovec iov_r = { arg, (size_r) }; \
> - struct iovec iov_w = { arg, (size_w) }; \
> - fuse_reply_ioctl_retry(req, &iov_r, \
> - (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
> -} while (0)
> -
> -/*
> - * Boilerplate code for CUSE Read IOCTL
> - * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
> - */
> -#define VHOST_IOCTL_R(type, var, func) do { \
> - if (!in_bufsz) { \
> - VHOST_IOCTL_RETRY(sizeof(type), 0);\
> - } else { \
> - (var) = *(const type*)in_buf; \
> - result = func(ctx, &(var)); \
> - fuse_reply_ioctl(req, result, NULL, 0);\
> - } \
> -} while (0)
> -
> -/*
> - * Boilerplate code for CUSE Write IOCTL
> - * Implicit arguments: ctx, req, result, out_bufsz.
> - */
> -#define VHOST_IOCTL_W(type, var, func) do { \
> - if (!out_bufsz) { \
> - VHOST_IOCTL_RETRY(0, sizeof(type));\
> - } else { \
> - result = (func)(ctx, &(var));\
> - fuse_reply_ioctl(req, result, &(var), sizeof(type));\
> - } \
> -} while (0)
> -
> -/*
> - * Boilerplate code for CUSE Read/Write IOCTL
> - * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
> - */
> -#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \
> - if (!in_bufsz) { \
> - VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
> - } else { \
> - (var1) = *(const type1*) (in_buf); \
> - result = (func)(ctx, (var1), &(var2)); \
> - fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
> - } \
> -} while (0)
> -
> -/*
> - * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
> - * of IOCTL a buffer is requested to read or to write. This request is handled
> - * by FUSE and the buffer is then given to CUSE.
> - */
> -static void
> -vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
> - struct fuse_file_info *fi, __rte_unused unsigned flags,
> - const void *in_buf, size_t in_bufsz, size_t out_bufsz)
> -{
> - struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
> - struct vhost_vring_file file;
> - struct vhost_vring_state state;
> - struct vhost_vring_addr addr;
> - uint64_t features;
> - uint32_t index;
> - int result = 0;
> -
> - switch (cmd) {
> - case VHOST_NET_SET_BACKEND:
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
> - VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
> - break;
> -
> - case VHOST_GET_FEATURES:
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
> - VHOST_IOCTL_W(uint64_t, features, ops->get_features);
> - break;
> -
> - case VHOST_SET_FEATURES:
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
> - VHOST_IOCTL_R(uint64_t, features, ops->set_features);
> - break;
> -
> - case VHOST_RESET_OWNER:
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
> - VHOST_IOCTL(ops->reset_owner);
> - break;
> -
> - case VHOST_SET_OWNER:
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
> - VHOST_IOCTL(ops->set_owner);
> - break;
> -
> - case VHOST_SET_MEM_TABLE:
> - /*TODO fix race condition.*/
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
> - static struct vhost_memory mem_temp;
> -
> - switch (in_bufsz) {
> - case 0:
> - VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
> - break;
> -
> - case sizeof(struct vhost_memory):
> - mem_temp = *(const struct vhost_memory *) in_buf;
> -
> - if (mem_temp.nregions > 0) {
> - VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
> - (sizeof(struct vhost_memory_region) *
> - mem_temp.nregions), 0);
> - } else {
> - result = -1;
> - fuse_reply_ioctl(req, result, NULL, 0);
> - }
> - break;
> -
> - default:
> - result = ops->set_mem_table(ctx,
> - in_buf, mem_temp.nregions);
> - if (result)
> - fuse_reply_err(req, EINVAL);
> - else
> - fuse_reply_ioctl(req, result, NULL, 0);
> - }
> - break;
> -
> - case VHOST_SET_VRING_NUM:
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
> - VHOST_IOCTL_R(struct vhost_vring_state, state,
> - ops->set_vring_num);
> - break;
> -
> - case VHOST_SET_VRING_BASE:
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
> - VHOST_IOCTL_R(struct vhost_vring_state, state,
> - ops->set_vring_base);
> - break;
> -
> - case VHOST_GET_VRING_BASE:
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
> - VHOST_IOCTL_RW(uint32_t, index,
> - struct vhost_vring_state, state, ops->get_vring_base);
> - break;
> -
> - case VHOST_SET_VRING_ADDR:
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
> - VHOST_IOCTL_R(struct vhost_vring_addr, addr,
> - ops->set_vring_addr);
> - break;
> -
> - case VHOST_SET_VRING_KICK:
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", ctx.fh);
> - VHOST_IOCTL_R(struct vhost_vring_file, file,
> - ops->set_vring_kick);
> - break;
> -
> - case VHOST_SET_VRING_CALL:
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", ctx.fh);
> - VHOST_IOCTL_R(struct vhost_vring_file, file,
> - ops->set_vring_call);
> - break;
> -
> - default:
> - RTE_LOG(ERR, VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
> - result = -1;
> - fuse_reply_ioctl(req, result, NULL, 0);
> - }
> -
> - if (result < 0)
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
> - else
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
> -}
> -
> -/*
> - * Structure handling open, release and ioctl function pointers is populated.
> - */
> -static const struct cuse_lowlevel_ops vhost_net_ops = {
> - .open = vhost_net_open,
> - .release = vhost_net_release,
> - .ioctl = vhost_net_ioctl,
> -};
> -
> -/*
> - * cuse_info is populated and used to register the cuse device.
> - * vhost_net_device_ops are also passed when the device is registered in app.
> - */
> -int
> -rte_vhost_driver_register(const char *dev_name)
> -{
> - struct cuse_info cuse_info;
> - char device_name[PATH_MAX] = "";
> - char char_device_name[PATH_MAX] = "";
> - const char *device_argv[] = { device_name };
> -
> - char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
> - char fuse_opt_fore[] = FUSE_OPT_FORE;
> - char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
> - char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
> -
> - if (access(cuse_device_name, R_OK | W_OK) < 0) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> - "char device %s can't be accessed, maybe not exist\n",
> - cuse_device_name);
> - return -1;
> - }
> -
> - /*
> - * The device name is created. This is passed to QEMU so that it can
> - * register the device with our application.
> - */
> - snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
> - snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
> -
> - /* Check if device already exists. */
> - if (access(char_device_name, F_OK) != -1) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> - "char device %s already exists\n", char_device_name);
> - return -1;
> - }
> -
> - memset(&cuse_info, 0, sizeof(cuse_info));
> - cuse_info.dev_major = default_major;
> - cuse_info.dev_minor = default_minor;
> - cuse_info.dev_info_argc = 1;
> - cuse_info.dev_info_argv = device_argv;
> - cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
> -
> - ops = get_virtio_net_callbacks();
> -
> - session = cuse_lowlevel_setup(3, fuse_argv,
> - &cuse_info, &vhost_net_ops, 0, NULL);
> - if (session == NULL)
> - return -1;
> -
> - return 0;
> -}
> -
> -/**
> - * The CUSE session is launched allowing the application to receive open,
> - * release and ioctl calls.
> - */
> -int
> -rte_vhost_driver_session_start(void)
> -{
> - fuse_session_loop(session);
> -
> - return 0;
> -}
> diff --git a/lib/librte_vhost/vhost-net-cdev.h b/lib/librte_vhost/vhost-net-cdev.h
> deleted file mode 100644
> index 03a5c57..0000000
> --- a/lib/librte_vhost/vhost-net-cdev.h
> +++ /dev/null
> @@ -1,113 +0,0 @@
> -/*-
> - * BSD LICENSE
> - *
> - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> - * All rights reserved.
> - *
> - * Redistribution and use in source and binary forms, with or without
> - * modification, are permitted provided that the following conditions
> - * are met:
> - *
> - * * Redistributions of source code must retain the above copyright
> - * notice, this list of conditions and the following disclaimer.
> - * * Redistributions in binary form must reproduce the above copyright
> - * notice, this list of conditions and the following disclaimer in
> - * the documentation and/or other materials provided with the
> - * distribution.
> - * * Neither the name of Intel Corporation nor the names of its
> - * contributors may be used to endorse or promote products derived
> - * from this software without specific prior written permission.
> - *
> - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> - */
> -
> -#ifndef _VHOST_NET_CDEV_H_
> -#define _VHOST_NET_CDEV_H_
> -#include <stdint.h>
> -#include <stdio.h>
> -#include <sys/types.h>
> -#include <unistd.h>
> -#include <linux/vhost.h>
> -
> -#include <rte_log.h>
> -
> -/* Macros for printing using RTE_LOG */
> -#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
> -#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
> -
> -#ifdef RTE_LIBRTE_VHOST_DEBUG
> -#define VHOST_MAX_PRINT_BUFF 6072
> -#define LOG_LEVEL RTE_LOG_DEBUG
> -#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
> -#define PRINT_PACKET(device, addr, size, header) do { \
> - char *pkt_addr = (char *)(addr); \
> - unsigned int index; \
> - char packet[VHOST_MAX_PRINT_BUFF]; \
> - \
> - if ((header)) \
> - snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \
> - else \
> - snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \
> - for (index = 0; index < (size); index++) { \
> - snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
> - "%02hhx ", pkt_addr[index]); \
> - } \
> - snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
> - \
> - LOG_DEBUG(VHOST_DATA, "%s", packet); \
> -} while (0)
> -#else
> -#define LOG_LEVEL RTE_LOG_INFO
> -#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
> -#define PRINT_PACKET(device, addr, size, header) do {} while (0)
> -#endif
> -
> -
> -/*
> - * Structure used to identify device context.
> - */
> -struct vhost_device_ctx {
> - pid_t pid; /* PID of process calling the IOCTL. */
> - uint64_t fh; /* Populated with fi->fh to track the device index. */
> -};
> -
> -/*
> - * Structure contains function pointers to be defined in virtio-net.c. These
> - * functions are called in CUSE context and are used to configure devices.
> - */
> -struct vhost_net_device_ops {
> - int (*new_device)(struct vhost_device_ctx);
> - void (*destroy_device)(struct vhost_device_ctx);
> -
> - int (*get_features)(struct vhost_device_ctx, uint64_t *);
> - int (*set_features)(struct vhost_device_ctx, uint64_t *);
> -
> - int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
> -
> - int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state *);
> - int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr *);
> - int (*set_vring_base)(struct vhost_device_ctx, struct vhost_vring_state *);
> - int (*get_vring_base)(struct vhost_device_ctx, uint32_t, struct vhost_vring_state *);
> -
> - int (*set_vring_kick)(struct vhost_device_ctx, struct vhost_vring_file *);
> - int (*set_vring_call)(struct vhost_device_ctx, struct vhost_vring_file *);
> -
> - int (*set_backend)(struct vhost_device_ctx, struct vhost_vring_file *);
> -
> - int (*set_owner)(struct vhost_device_ctx);
> - int (*reset_owner)(struct vhost_device_ctx);
> -};
> -
> -
> -struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
> -#endif /* _VHOST_NET_CDEV_H_ */
> diff --git a/lib/librte_vhost/vhost-user/fd_man.c b/lib/librte_vhost/vhost-user/fd_man.c
> new file mode 100644
> index 0000000..c7fd3f2
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-user/fd_man.c
> @@ -0,0 +1,158 @@
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <sys/socket.h>
> +#include <sys/select.h>
> +#include <sys/time.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +
> +#include <rte_log.h>
> +
> +#include "fd_man.h"
> +
> +/**
> + * Returns the index in the fdset for a fd.
> + * If fd is -1, it means to search for a free entry.
> + * @return
> + * Index for the fd, or -1 if fd isn't in the fdset.
> + */
> +static int
> +fdset_find_fd(struct fdset *pfdset, int fd)
> +{
> + int i;
> +
> + for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++);
> +
> + return i == pfdset->num ? -1 : i;
> +}
> +
> +static int
> +fdset_find_free_slot(struct fdset *pfdset)
> +{
> + return fdset_find_fd(pfdset, -1);
> +
> +}
> +
> +static void
> +fdset_add_fd(struct fdset *pfdset, int idx, int fd, fd_cb rcb,
> + fd_cb wcb, uint64_t dat)
> +{
> + struct fdentry *pfdentry = &pfdset->fd[idx];
> +
> + pfdentry->fd = fd;
> + pfdentry->rcb = rcb;
> + pfdentry->wcb = wcb;
> + pfdentry->dat = dat;
> +}
> +
> +/**
> + * Fill the read/write fdset with the fds in the fdset.
> + * @return
> + * the maximum fds filled in the read/write fd_set.
> + */
> +static int
> +fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
> +{
> + struct fdentry *pfdentry;
> + int i, maxfds = -1;
> + int num = MAX_FDS;
> +
> + for (i = 0; i < num ; i++) {
> + pfdentry = &pfdset->fd[i];
> + if (pfdentry->fd != -1) {
> + int added = 0;
> + if (pfdentry->rcb && rfset) {
> + FD_SET(pfdentry->fd, rfset);
> + added = 1;
> + }
> + if (pfdentry->wcb && wfset) {
> + FD_SET(pfdentry->fd, wfset);
> + added = 1;
> + }
> + if (added)
> + maxfds = pfdentry->fd < maxfds ?
> + maxfds : pfdentry->fd;
> + }
> + }
> + return maxfds;
> +}
> +
> +void
> +fdset_init(struct fdset *pfdset)
> +{
> + int i;
> +
> + for (i = 0; i < MAX_FDS; i++)
> + pfdset->fd[i].fd = -1;
> + pfdset->num = MAX_FDS;
> +
> +}
> +
> +/**
> + * Register the fd in the fdset with its read/write handler and context.
> + */
> +int
> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
> +{
> + int i;
> +
> + if (fd == -1)
> + return -1;
> +
> + /* Find a free slot in the list. */
> + i = fdset_find_free_slot(pfdset);
> + if (i == -1)
> + return -2;
> +
> + fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
> +
> + return 0;
> +}
> +
> +/**
> + * Unregister the fd from the fdset.
> + */
> +void
> +fdset_del(struct fdset *pfdset, int fd)
> +{
> + int i;
> +
> + i = fdset_find_fd(pfdset, fd);
> + if (i != -1) {
> + pfdset->fd[i].fd = -1;
> + }
> +}
> +
> +
> +void
> +fdset_event_dispatch(struct fdset *pfdset)
> +{
> + fd_set rfds,wfds;
> + int i, maxfds;
> + struct fdentry *pfdentry;
> + int num = MAX_FDS;
> +
> + if (pfdset == NULL)
> + return;
> + while (1) {
> + FD_ZERO(&rfds);
> + FD_ZERO(&wfds);
> + maxfds = fdset_fill(&rfds, &wfds, pfdset);
> + /* fd management runs in one thread */
> + if (maxfds == -1) {
> + return;
> + }
> +
> + select(maxfds + 1, &rfds, &wfds, NULL, NULL);
> +
> + for (i = 0; i < num; i++) {
> + pfdentry = &pfdset->fd[i];
> + if (FD_ISSET(pfdentry->fd, &rfds))
> + pfdentry->rcb(pfdentry->fd, pfdentry->dat);
> + if (FD_ISSET(pfdentry->fd, &wfds))
> + pfdentry->wcb(pfdentry->fd, pfdentry->dat);
> + }
> +
> + }
> +}
> diff --git a/lib/librte_vhost/vhost-user/fd_man.h b/lib/librte_vhost/vhost-user/fd_man.h
> new file mode 100644
> index 0000000..57cc81d
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-user/fd_man.h
> @@ -0,0 +1,31 @@
> +#ifndef _FD_MAN_H_
> +#define _FD_MAN_H_
> +#include <stdint.h>
> +
> +#define MAX_FDS 1024
> +
> +typedef void (*fd_cb)(int fd, uint64_t dat);
> +
> +struct fdentry {
> + int fd; /* -1 indicates this entry is empty */
> + fd_cb rcb; /* callback when this fd is readable. */
> + fd_cb wcb; /* callback when this fd is writeable.*/
> + uint64_t dat; /* fd context */
> +};
> +
> +struct fdset {
> + struct fdentry fd[MAX_FDS];
> + int num;
> +};
> +
> +
> +void fdset_init(struct fdset *pfdset);
> +
> +int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb,
> + fd_cb wcb, uint64_t ctx);
> +
> +void fdset_del(struct fdset *pfdset, int fd);
> +
> +void fdset_event_dispatch(struct fdset *pfdset);
> +
> +#endif
> diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.c b/lib/librte_vhost/vhost-user/vhost-net-user.c
> new file mode 100644
> index 0000000..34450f4
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-user/vhost-net-user.c
> @@ -0,0 +1,417 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <limits.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <errno.h>
> +
> +#include <rte_log.h>
> +#include <rte_virtio_net.h>
> +
> +#include "fd_man.h"
> +#include "vhost-net-user.h"
> +#include "vhost-net.h"
> +#include "virtio-net-user.h"
> +
> +static void vserver_new_vq_conn(int fd, uint64_t data);
> +static void vserver_message_handler(int fd, uint64_t dat);
> +const struct vhost_net_device_ops *ops;
> +
> +static struct vhost_server *g_vhost_server;
> +
> +static const char *vhost_message_str[VHOST_USER_MAX] =
> +{
> + [VHOST_USER_NONE] = "VHOST_USER_NONE",
> + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
> + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
> + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
> + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
> + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
> + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
> + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
> + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
> + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
> + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
> + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
> + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
> + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
> + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR"
> +};
> +
> +/**
> + * Create a unix domain socket and bind to path.
> + * @return
> + * socket fd or -1 on failure
> + */
> +static int
> +uds_socket(const char *path)
> +{
> + struct sockaddr_un un;
> + int sockfd;
> + int ret;
> +
> + if (path == NULL)
> + return -1;
> +
> + sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
> + if (sockfd < 0)
> + return -1;
> + RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
> +
> + memset(&un, 0, sizeof(un));
> + un.sun_family = AF_UNIX;
> + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
> + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
> + if (ret == -1)
> + goto err;
> + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
> +
> + ret = listen(sockfd, 1);
> + if (ret == -1)
> + goto err;
> +
> + return sockfd;
> +
> +err:
> + close(sockfd);
> + return -1;
> +}
> +
> +
> +/* return bytes# of read */
> +static int
> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> +{
> +
> + struct iovec iov;
> + struct msghdr msgh = { 0 };
> + size_t fdsize = fd_num * sizeof(int);
> + char control[CMSG_SPACE(fdsize)];
> + struct cmsghdr *cmsg;
> + int ret;
> +
> + iov.iov_base = buf;
> + iov.iov_len = buflen;
> +
> + msgh.msg_iov = &iov;
> + msgh.msg_iovlen = 1;
> + msgh.msg_control = control;
> + msgh.msg_controllen = sizeof(control);
> +
> + ret = recvmsg(sockfd, &msgh, 0);
> + if (ret <= 0) {
> + RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
> + return ret;
> + }
> + /* ret == buflen */
> + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
> + RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
> + return -1;
> + }
> +
> + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
> + cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
> + if ( (cmsg->cmsg_level == SOL_SOCKET) &&
> + (cmsg->cmsg_type == SCM_RIGHTS)) {
> + memcpy(fds, CMSG_DATA(cmsg), fdsize);
> + break;
> + }
> + }
> + return ret;
> +}
> +
> +static int
> +read_vhost_message(int sockfd, struct VhostUserMsg *msg)
> +{
> + int ret;
> +
> + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
> + msg->fds, VHOST_MEMORY_MAX_NREGIONS);
> + if (ret <= 0)
> + return ret;
> +
> + if (msg->size) {
> + if (msg->size > sizeof(msg->payload)) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "%s: invalid size:%d\n", __func__, msg->size);
> + return -1;
> + }
> + ret = read(sockfd, &msg->payload, msg->size);
> + if (ret == 0)
> + return 0;
> + if (ret != (int)msg->size) {
> + printf("read control message failed\n");
> + return -1;
> + }
> + }
> +
> + return ret;
> +}
> +
> +static int
> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> +{
> +
> + struct iovec iov;
> + struct msghdr msgh = { 0 };
> + size_t fdsize = fd_num * sizeof(int);
> + char control[CMSG_SPACE(fdsize)];
> + struct cmsghdr *cmsg;
> + int ret;
> +
> + iov.iov_base = buf;
> + iov.iov_len = buflen;
> + msgh.msg_iov = &iov;
> + msgh.msg_iovlen = 1;
> +
> + if (fds && fd_num > 0) {
> + msgh.msg_control = control;
> + msgh.msg_controllen = sizeof(control);
> + cmsg = CMSG_FIRSTHDR(&msgh);
> + cmsg->cmsg_len = CMSG_LEN(fdsize);
> + cmsg->cmsg_level = SOL_SOCKET;
> + cmsg->cmsg_type = SCM_RIGHTS;
> + memcpy(CMSG_DATA(cmsg), fds, fdsize);
> + } else {
> + msgh.msg_control = NULL;
> + msgh.msg_controllen = 0;
> + }
> +
> + do {
> + ret = sendmsg(sockfd, &msgh, 0);
> + } while (ret < 0 && errno == EINTR);
> +
> + if (ret < 0) {
> + RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int
> +send_vhost_message(int sockfd, struct VhostUserMsg *msg)
> +{
> + int ret;
> +
> + msg->flags &= ~VHOST_USER_VERSION_MASK;
> + msg->flags |= VHOST_USER_VERSION;
> + msg->flags |= VHOST_USER_REPLY_MASK;
> +
> + ret = send_fd_message(sockfd, (char *)msg,
> + VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
> +
> + return ret;
> +}
> +
> +/* call back when there is new connection. */
> +static void
> +vserver_new_vq_conn(int fd, uint64_t dat)
> +{
> + struct vhost_server *vserver = (void *)(uintptr_t)dat;
> + int conn_fd;
> + uint32_t fh;
> + struct vhost_device_ctx vdev_ctx = { 0 };
> +
> + conn_fd = accept(fd, NULL, NULL);
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "%s: new connection is %d\n", __func__, conn_fd);
> + if (conn_fd < 0)
> + return;
> +
> + fh = ops->new_device(vdev_ctx);
> + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
> +
> + fdset_add(&vserver->fdset,
> + conn_fd, vserver_message_handler, NULL, fh);
> +}
> +
> +/* callback when there is message on the connfd */
> +static void
> +vserver_message_handler(int connfd, uint64_t dat)
> +{
> + struct vhost_device_ctx ctx;
> + uint32_t fh = (uint32_t)dat;
> + struct VhostUserMsg msg;
> + uint64_t features;
> + int ret;
> +
> + ctx.fh = fh;
> + ret = read_vhost_message(connfd, &msg);
> + if (ret < 0) {
> + printf("vhost read message failed\n");
> +
> + /*TODO: cleanup */
> + close(connfd);
> + fdset_del(&g_vhost_server->fdset, connfd);
> + ops->destroy_device(ctx);
> +
> + return;
> + } else if (ret == 0) {
> + /*TODO: cleanup */
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "vhost peer closed\n");
> + close(connfd);
> + fdset_del(&g_vhost_server->fdset, connfd);
> + ops->destroy_device(ctx);
> +
> + return;
> + }
> + if (msg.request > VHOST_USER_MAX) {
> + /*TODO: cleanup */
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "vhost read incorrect message\n");
> + close(connfd);
> + fdset_del(&g_vhost_server->fdset, connfd);
> +
> + return;
> + }
> +
> + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> + vhost_message_str[msg.request]);
> + switch (msg.request) {
> + case VHOST_USER_GET_FEATURES:
> + ret = ops->get_features(ctx, &features);
> + msg.payload.u64 = ret;
> + msg.size = sizeof(msg.payload.u64);
> + send_vhost_message(connfd, &msg);
> + break;
> + case VHOST_USER_SET_FEATURES:
> + ops->set_features(ctx, &features);
> + break;
> +
> + case VHOST_USER_SET_OWNER:
> + ops->set_owner(ctx);
> + break;
> + case VHOST_USER_RESET_OWNER:
> + ops->reset_owner(ctx);
> + break;
> +
> + case VHOST_USER_SET_MEM_TABLE:
> + user_set_mem_table(ctx, &msg);
> + break;
> +
> + case VHOST_USER_SET_LOG_BASE:
> + case VHOST_USER_SET_LOG_FD:
> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> + break;
> +
> + case VHOST_USER_SET_VRING_NUM:
> + ops->set_vring_num(ctx, &msg.payload.state);
> + break;
> + case VHOST_USER_SET_VRING_ADDR:
> + ops->set_vring_addr(ctx, &msg.payload.addr);
> + break;
> + case VHOST_USER_SET_VRING_BASE:
> + ops->set_vring_base(ctx, &msg.payload.state);
> + break;
> +
> + case VHOST_USER_GET_VRING_BASE:
> + ret = ops->get_vring_base(ctx, msg.payload.state.index,
> + &msg.payload.state);
> + msg.size = sizeof(msg.payload.state);
> + send_vhost_message(connfd, &msg);
> + break;
> +
> + case VHOST_USER_SET_VRING_KICK:
> + user_set_vring_kick(ctx, &msg);
> + break;
> + case VHOST_USER_SET_VRING_CALL:
> + user_set_vring_call(ctx, &msg);
> + break;
> +
> + case VHOST_USER_SET_VRING_ERR:
> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
> + break;
> +
> + default:
> + break;
> +
> + }
> +}
> +
> +
> +/**
> + * Creates and initialise the vhost server.
> + */
> +int
> +rte_vhost_driver_register(const char *path)
> +{
> +
> + struct vhost_server *vserver;
> +
> + if (g_vhost_server != NULL)
> + return -1;
> +
> + vserver = calloc(sizeof(struct vhost_server), 1);
> + /*TODO: all allocation is through DPDK memory allocation */
> + if (vserver == NULL)
> + return -1;
> +
> + fdset_init(&vserver->fdset);
> +
> + unlink(path);
> +
> + vserver->listenfd = uds_socket(path);
> + if (vserver->listenfd < 0) {
> + free(vserver);
> + return -1;
> + }
> + vserver->path = path;
> +
> + fdset_add(&vserver->fdset, vserver->listenfd,
> + vserver_new_vq_conn, NULL,
> + (uint64_t)(uintptr_t)vserver);
> +
> + ops = get_virtio_net_callbacks();
> +
> + g_vhost_server = vserver;
> +
> + return 0;
> +}
> +
> +
> +int
> +rte_vhost_driver_session_start(void)
> +{
> + fdset_event_dispatch(&g_vhost_server->fdset);
> + return 0;
> +}
> +
> diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.h b/lib/librte_vhost/vhost-user/vhost-net-user.h
> new file mode 100644
> index 0000000..c9df9fa
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-user/vhost-net-user.h
> @@ -0,0 +1,74 @@
> +#ifndef _VHOST_NET_USER_H
> +#define _VHOST_NET_USER_H
> +#include <stdint.h>
> +#include <linux/vhost.h>
> +
> +#include "fd_man.h"
> +
> +struct vhost_server {
> + const char *path; /**< The path the uds is bind to. */
> + int listenfd; /**< The listener sockfd. */
> + struct fdset fdset; /**< The fd list this vhost server manages. */
> +};
> +
> +/*********** FROM hw/virtio/vhost-user.c *************************************/
> +
> +#define VHOST_MEMORY_MAX_NREGIONS 8
> +
> +typedef enum VhostUserRequest {
> + VHOST_USER_NONE = 0,
> + VHOST_USER_GET_FEATURES = 1,
> + VHOST_USER_SET_FEATURES = 2,
> + VHOST_USER_SET_OWNER = 3,
> + VHOST_USER_RESET_OWNER = 4,
> + VHOST_USER_SET_MEM_TABLE = 5,
> + VHOST_USER_SET_LOG_BASE = 6,
> + VHOST_USER_SET_LOG_FD = 7,
> + VHOST_USER_SET_VRING_NUM = 8,
> + VHOST_USER_SET_VRING_ADDR = 9,
> + VHOST_USER_SET_VRING_BASE = 10,
> + VHOST_USER_GET_VRING_BASE = 11,
> + VHOST_USER_SET_VRING_KICK = 12,
> + VHOST_USER_SET_VRING_CALL = 13,
> + VHOST_USER_SET_VRING_ERR = 14,
> + VHOST_USER_MAX
> +} VhostUserRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> + uint64_t guest_phys_addr;
> + uint64_t memory_size;
> + uint64_t userspace_addr;
> + uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> + uint32_t nregions;
> + uint32_t padding;
> + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserMsg {
> + VhostUserRequest request;
> +
> +#define VHOST_USER_VERSION_MASK (0x3)
> +#define VHOST_USER_REPLY_MASK (0x1 << 2)
> + uint32_t flags;
> + uint32_t size; /* the following payload size */
> + union {
> +#define VHOST_USER_VRING_IDX_MASK (0xff)
> +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
> + uint64_t u64;
> + struct vhost_vring_state state;
> + struct vhost_vring_addr addr;
> + VhostUserMemory memory;
> + } payload;
> + int fds[VHOST_MEMORY_MAX_NREGIONS];
> +} __attribute__((packed)) VhostUserMsg;
> +
> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION (0x1)
> +
> +/*****************************************************************************/
> +#endif
> diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.c b/lib/librte_vhost/vhost-user/virtio-net-user.c
> new file mode 100644
> index 0000000..f38e6cc
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-user/virtio-net-user.c
> @@ -0,0 +1,208 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <sys/mman.h>
> +
> +#include <rte_log.h>
> +
> +#include "virtio-net-user.h"
> +#include "vhost-net-user.h"
> +#include "vhost-net.h"
> +
> +extern const struct vhost_net_device_ops *ops;
> +
> +#if 0
> +int
> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> + unsigned int idx;
> + struct VhostUserMemory memory = pmsg->payload.memory;
> + struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
> + uint64_t mapped_address, base_address = 0, mem_size = 0;
> +
> + for (idx = 0; idx < memory.nregions; idx++) {
> + if (memory.regions[idx].guest_phys_addr == 0)
> + base_address = memory.regions[idx].userspace_addr;
> + }
> + if (base_address == 0) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "couldn't find the mem region whose gpa is 0.\n");
> + return -1;
> + }
> +
> + for (idx = 0; idx < memory.nregions; idx++) {
> + uint64_t size = memory.regions[idx].userspace_addr -
> + base_address + memory.regions[idx].memory_size;
> + if (mem_size < size)
> + mem_size = size;
> + }
> +
> + /*
> + * here we assume qemu will map only one file for memory allocation,
> + * we only use fds[0] with offset 0.
> + */
> + mapped_address = (uint64_t)(uintptr_t)mmap(NULL, mem_size,
> + PROT_READ | PROT_WRITE, MAP_SHARED, pmsg->fds[0], 0);
> +
> + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
> + RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
> + return -1;
> + }
> +
> + for (idx = 0; idx < memory.nregions; idx++) {
> + regions[idx].guest_phys_address =
> + memory.regions[idx].guest_phys_addr;
> + regions[idx].guest_phys_address_end =
> + memory.regions[idx].guest_phys_addr +
> + memory.regions[idx].memory_size;
> + regions[idx].memory_size = memory.regions[idx].memory_size;
> + regions[idx].userspace_address =
> + memory.regions[idx].userspace_addr;
> +
> + regions[idx].address_offset = mapped_address - base_address +
> + regions[idx].userspace_address -
> + regions[idx].guest_phys_address;
> + LOG_DEBUG(VHOST_CONFIG,
> + "REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
> + idx,
> + (void *)(uintptr_t)regions[idx].guest_phys_address,
> + (void *)(uintptr_t)regions[idx].userspace_address,
> + regions[idx].memory_size);
> + }
> + ops->set_mem_table(ctx, regions, memory.nregions);
> + return 0;
> +}
> +
> +#else
> +
> +int
> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> + unsigned int idx;
> + struct VhostUserMemory memory = pmsg->payload.memory;
> + struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
> + uint64_t mapped_address, base_address = 0;
> +
> + for (idx = 0; idx < memory.nregions; idx++) {
> + if (memory.regions[idx].guest_phys_addr == 0)
> + base_address = memory.regions[idx].userspace_addr;
> + }
> + if (base_address == 0) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "couldn't find the mem region whose gpa is 0.\n");
> + return -1;
> + }
> +
> +
> + for (idx = 0; idx < memory.nregions; idx++) {
> + regions[idx].guest_phys_address =
> + memory.regions[idx].guest_phys_addr;
> + regions[idx].guest_phys_address_end =
> + memory.regions[idx].guest_phys_addr +
> + memory.regions[idx].memory_size;
> + regions[idx].memory_size = memory.regions[idx].memory_size;
> + regions[idx].userspace_address =
> + memory.regions[idx].userspace_addr;
> +/*
> + mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
> + regions[idx].memory_size,
> + PROT_READ | PROT_WRITE, MAP_SHARED,
> + pmsg->fds[idx],
> + memory.regions[idx].mmap_offset);
> +*/
> +
> +/* This is ugly */
> + mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
> + regions[idx].memory_size +
> + memory.regions[idx].mmap_offset,
> + PROT_READ | PROT_WRITE, MAP_SHARED,
> + pmsg->fds[idx],
> + 0);
> + printf("mapped to %p\n", (void *)mapped_address);
> +
> + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
> + RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
> + return -1;
> + }
> +
> +// printf("ret=%d\n", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset + 0x3FFFFFFF) & ~0x3FFFFFFF));
> +// printf("unaligned ret=%d\n", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset ) ));
> + mapped_address += memory.regions[idx].mmap_offset;
> +
> + regions[idx].address_offset = mapped_address -
> + regions[idx].guest_phys_address;
> + LOG_DEBUG(VHOST_CONFIG,
> + "REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
> + idx,
> + (void *)(uintptr_t)regions[idx].guest_phys_address,
> + (void *)(uintptr_t)regions[idx].userspace_address,
> + regions[idx].memory_size);
> + }
> + ops->set_mem_table(ctx, regions, memory.nregions);
> + return 0;
> +}
> +
> +
> +
> +
> +#endif
> +
> +
> +void
> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> + struct vhost_vring_file file;
> +
> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> + file.fd = pmsg->fds[0];
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "vring call idx:%d file:%d\n", file.index, file.fd);
> + ops->set_vring_call(ctx, &file);
> +}
> +
> +
> +void
> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> + struct vhost_vring_file file;
> +
> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> + file.fd = pmsg->fds[0];
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "vring kick idx:%d file:%d\n", file.index, file.fd);
> + ops->set_vring_kick(ctx, &file);
> +}
> diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.h b/lib/librte_vhost/vhost-user/virtio-net-user.h
> new file mode 100644
> index 0000000..0969376
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-user/virtio-net-user.h
> @@ -0,0 +1,11 @@
> +#ifndef _VIRTIO_NET_USER_H
> +#define _VIRTIO_NET_USER_H
> +
> +#include "vhost-net.h"
> +#include "vhost-net-user.h"
> +
> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +#endif
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index ccfd82f..8ff0301 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -38,19 +38,14 @@
> #include <rte_memcpy.h>
> #include <rte_virtio_net.h>
>
> -#include "vhost-net-cdev.h"
> +#include "vhost-net.h"
>
> -#define MAX_PKT_BURST 32
> +#define VHOST_MAX_PKT_BURST 64
> +#define VHOST_MAX_MRG_PKT_BURST 64
>
> -/**
> - * This function adds buffers to the virtio devices RX virtqueue. Buffers can
> - * be received from the physical port or from another virtio device. A packet
> - * count is returned to indicate the number of packets that are succesfully
> - * added to the RX queue. This function works when mergeable is disabled.
> - */
> -static inline uint32_t __attribute__((always_inline))
> -virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
> - struct rte_mbuf **pkts, uint32_t count)
> +
> +uint32_t
> +rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count)
> {
> struct vhost_virtqueue *vq;
> struct vring_desc *desc;
> @@ -59,26 +54,23 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
> struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> uint64_t buff_addr = 0;
> uint64_t buff_hdr_addr = 0;
> - uint32_t head[MAX_PKT_BURST], packet_len = 0;
> + uint32_t head[VHOST_MAX_PKT_BURST], packet_len = 0;
> uint32_t head_idx, packet_success = 0;
> + uint32_t mergeable, mrg_count = 0;
> uint16_t avail_idx, res_cur_idx;
> uint16_t res_base_idx, res_end_idx;
> uint16_t free_entries;
> uint8_t success = 0;
>
> - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
> + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s()\n", dev->device_fh, __func__);
> if (unlikely(queue_id != VIRTIO_RXQ)) {
> LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
> return 0;
> }
>
> vq = dev->virtqueue[VIRTIO_RXQ];
> - count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
> -
> - /*
> - * As many data cores may want access to available buffers,
> - * they need to be reserved.
> - */
> + count = (count > VHOST_MAX_PKT_BURST) ? VHOST_MAX_PKT_BURST : count;
> + /* As many data cores may want access to available buffers, they need to be reserved. */
> do {
> res_base_idx = vq->last_used_idx_res;
> avail_idx = *((volatile uint16_t *)&vq->avail->idx);
> @@ -93,21 +85,25 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>
> res_end_idx = res_base_idx + count;
> /* vq->last_used_idx_res is atomically updated. */
> - /* TODO: Allow to disable cmpset if no concurrency in application. */
> + /* TODO: Allow to disable cmpset if no concurrency in application */
> success = rte_atomic16_cmpset(&vq->last_used_idx_res,
> res_base_idx, res_end_idx);
> + /* If there is contention here and failed, try again. */
> } while (unlikely(success == 0));
> res_cur_idx = res_base_idx;
> LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
> - dev->device_fh, res_cur_idx, res_end_idx);
> + dev->device_fh,
> + res_cur_idx, res_end_idx);
>
> /* Prefetch available ring to retrieve indexes. */
> rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);
>
> + /* Check if the VIRTIO_NET_F_MRG_RXBUF feature is enabled. */
> + mergeable = dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF);
> +
> /* Retrieve all of the head indexes first to avoid caching issues. */
> for (head_idx = 0; head_idx < count; head_idx++)
> - head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) &
> - (vq->size - 1)];
> + head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];
>
> /*Prefetch descriptor index. */
> rte_prefetch0(&vq->desc[head[packet_success]]);
> @@ -123,46 +119,57 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
> /* Prefetch buffer address. */
> rte_prefetch0((void *)(uintptr_t)buff_addr);
>
> - /* Copy virtio_hdr to packet and increment buffer address */
> - buff_hdr_addr = buff_addr;
> - packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
> -
> - /*
> - * If the descriptors are chained the header and data are
> - * placed in separate buffers.
> - */
> - if (desc->flags & VRING_DESC_F_NEXT) {
> - desc->len = vq->vhost_hlen;
> - desc = &vq->desc[desc->next];
> - /* Buffer address translation. */
> - buff_addr = gpa_to_vva(dev, desc->addr);
> - desc->len = rte_pktmbuf_data_len(buff);
> + if (mergeable && (mrg_count != 0)) {
> + desc->len = packet_len = rte_pktmbuf_data_len(buff);
> } else {
> - buff_addr += vq->vhost_hlen;
> - desc->len = packet_len;
> + /* Copy virtio_hdr to packet and increment buffer address */
> + buff_hdr_addr = buff_addr;
> + packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
> +
> + /*
> + * If the descriptors are chained the header and data are placed in
> + * separate buffers.
> + */
> + if (desc->flags & VRING_DESC_F_NEXT) {
> + desc->len = vq->vhost_hlen;
> + desc = &vq->desc[desc->next];
> + /* Buffer address translation. */
> + buff_addr = gpa_to_vva(dev, desc->addr);
> + desc->len = rte_pktmbuf_data_len(buff);
> + } else {
> + buff_addr += vq->vhost_hlen;
> + desc->len = packet_len;
> + }
> }
>
> + VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, rte_pktmbuf_data_len(buff), 0);
> +
> /* Update used ring with desc information */
> - vq->used->ring[res_cur_idx & (vq->size - 1)].id =
> - head[packet_success];
> + vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];
> vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
>
> /* Copy mbuf data to buffer */
> - /* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
> - rte_memcpy((void *)(uintptr_t)buff_addr,
> - rte_pktmbuf_mtod(buff, const void *),
> - rte_pktmbuf_data_len(buff));
> - PRINT_PACKET(dev, (uintptr_t)buff_addr,
> - rte_pktmbuf_data_len(buff), 0);
> + /* TODO fixme for sg mbuf and the case that desc couldn't hold the mbuf data */
> + rte_memcpy((void *)(uintptr_t)buff_addr, (const void *)buff->pkt.data, rte_pktmbuf_data_len(buff));
>
> res_cur_idx++;
> packet_success++;
>
> - rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
> - (const void *)&virtio_hdr, vq->vhost_hlen);
> -
> - PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
> -
> + /* If mergeable is disabled then a header is required per buffer. */
> + if (!mergeable) {
> + rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);
> + VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
> + } else {
> + mrg_count++;
> + /* Merge buffer can only handle so many buffers at a time. Tell the guest if this limit is reached. */
> + if ((mrg_count == VHOST_MAX_MRG_PKT_BURST) || (res_cur_idx == res_end_idx)) {
> + virtio_hdr.num_buffers = mrg_count;
> + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n", dev->device_fh, virtio_hdr.num_buffers);
> + rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);
> + VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
> + mrg_count = 0;
> + }
> + }
> if (res_cur_idx < res_end_idx) {
> /* Prefetch descriptor index. */
> rte_prefetch0(&vq->desc[head[packet_success]]);
> @@ -184,357 +191,18 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
> return count;
> }
>
> -static inline uint32_t __attribute__((always_inline))
> -copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
> - uint16_t res_end_idx, struct rte_mbuf *pkt)
> -{
> - uint32_t vec_idx = 0;
> - uint32_t entry_success = 0;
> - struct vhost_virtqueue *vq;
> - /* The virtio_hdr is initialised to 0. */
> - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {
> - {0, 0, 0, 0, 0, 0}, 0};
> - uint16_t cur_idx = res_base_idx;
> - uint64_t vb_addr = 0;
> - uint64_t vb_hdr_addr = 0;
> - uint32_t seg_offset = 0;
> - uint32_t vb_offset = 0;
> - uint32_t seg_avail;
> - uint32_t vb_avail;
> - uint32_t cpy_len, entry_len;
> -
> - if (pkt == NULL)
> - return 0;
> -
> - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| "
> - "End Index %d\n",
> - dev->device_fh, cur_idx, res_end_idx);
> -
> - /*
> - * Convert from gpa to vva
> - * (guest physical addr -> vhost virtual addr)
> - */
> - vq = dev->virtqueue[VIRTIO_RXQ];
> - vb_addr =
> - gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
> - vb_hdr_addr = vb_addr;
> -
> - /* Prefetch buffer address. */
> - rte_prefetch0((void *)(uintptr_t)vb_addr);
> -
> - virtio_hdr.num_buffers = res_end_idx - res_base_idx;
> -
> - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
> - dev->device_fh, virtio_hdr.num_buffers);
>
> - rte_memcpy((void *)(uintptr_t)vb_hdr_addr,
> - (const void *)&virtio_hdr, vq->vhost_hlen);
> -
> - PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1);
> -
> - seg_avail = rte_pktmbuf_data_len(pkt);
> - vb_offset = vq->vhost_hlen;
> - vb_avail =
> - vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
> -
> - entry_len = vq->vhost_hlen;
> -
> - if (vb_avail == 0) {
> - uint32_t desc_idx =
> - vq->buf_vec[vec_idx].desc_idx;
> - vq->desc[desc_idx].len = vq->vhost_hlen;
> -
> - if ((vq->desc[desc_idx].flags
> - & VRING_DESC_F_NEXT) == 0) {
> - /* Update used ring with desc information */
> - vq->used->ring[cur_idx & (vq->size - 1)].id
> - = vq->buf_vec[vec_idx].desc_idx;
> - vq->used->ring[cur_idx & (vq->size - 1)].len
> - = entry_len;
> -
> - entry_len = 0;
> - cur_idx++;
> - entry_success++;
> - }
> -
> - vec_idx++;
> - vb_addr =
> - gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
> -
> - /* Prefetch buffer address. */
> - rte_prefetch0((void *)(uintptr_t)vb_addr);
> - vb_offset = 0;
> - vb_avail = vq->buf_vec[vec_idx].buf_len;
> - }
> -
> - cpy_len = RTE_MIN(vb_avail, seg_avail);
> -
> - while (cpy_len > 0) {
> - /* Copy mbuf data to vring buffer */
> - rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset),
> - (const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset),
> - cpy_len);
> -
> - PRINT_PACKET(dev,
> - (uintptr_t)(vb_addr + vb_offset),
> - cpy_len, 0);
> -
> - seg_offset += cpy_len;
> - vb_offset += cpy_len;
> - seg_avail -= cpy_len;
> - vb_avail -= cpy_len;
> - entry_len += cpy_len;
> -
> - if (seg_avail != 0) {
> - /*
> - * The virtio buffer in this vring
> - * entry reach to its end.
> - * But the segment doesn't complete.
> - */
> - if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &
> - VRING_DESC_F_NEXT) == 0) {
> - /* Update used ring with desc information */
> - vq->used->ring[cur_idx & (vq->size - 1)].id
> - = vq->buf_vec[vec_idx].desc_idx;
> - vq->used->ring[cur_idx & (vq->size - 1)].len
> - = entry_len;
> - entry_len = 0;
> - cur_idx++;
> - entry_success++;
> - }
> -
> - vec_idx++;
> - vb_addr = gpa_to_vva(dev,
> - vq->buf_vec[vec_idx].buf_addr);
> - vb_offset = 0;
> - vb_avail = vq->buf_vec[vec_idx].buf_len;
> - cpy_len = RTE_MIN(vb_avail, seg_avail);
> - } else {
> - /*
> - * This current segment complete, need continue to
> - * check if the whole packet complete or not.
> - */
> - pkt = pkt->next;
> - if (pkt != NULL) {
> - /*
> - * There are more segments.
> - */
> - if (vb_avail == 0) {
> - /*
> - * This current buffer from vring is
> - * used up, need fetch next buffer
> - * from buf_vec.
> - */
> - uint32_t desc_idx =
> - vq->buf_vec[vec_idx].desc_idx;
> - vq->desc[desc_idx].len = vb_offset;
> -
> - if ((vq->desc[desc_idx].flags &
> - VRING_DESC_F_NEXT) == 0) {
> - uint16_t wrapped_idx =
> - cur_idx & (vq->size - 1);
> - /*
> - * Update used ring with the
> - * descriptor information
> - */
> - vq->used->ring[wrapped_idx].id
> - = desc_idx;
> - vq->used->ring[wrapped_idx].len
> - = entry_len;
> - entry_success++;
> - entry_len = 0;
> - cur_idx++;
> - }
> -
> - /* Get next buffer from buf_vec. */
> - vec_idx++;
> - vb_addr = gpa_to_vva(dev,
> - vq->buf_vec[vec_idx].buf_addr);
> - vb_avail =
> - vq->buf_vec[vec_idx].buf_len;
> - vb_offset = 0;
> - }
> -
> - seg_offset = 0;
> - seg_avail = rte_pktmbuf_data_len(pkt);
> - cpy_len = RTE_MIN(vb_avail, seg_avail);
> - } else {
> - /*
> - * This whole packet completes.
> - */
> - uint32_t desc_idx =
> - vq->buf_vec[vec_idx].desc_idx;
> - vq->desc[desc_idx].len = vb_offset;
> -
> - while (vq->desc[desc_idx].flags &
> - VRING_DESC_F_NEXT) {
> - desc_idx = vq->desc[desc_idx].next;
> - vq->desc[desc_idx].len = 0;
> - }
> -
> - /* Update used ring with desc information */
> - vq->used->ring[cur_idx & (vq->size - 1)].id
> - = vq->buf_vec[vec_idx].desc_idx;
> - vq->used->ring[cur_idx & (vq->size - 1)].len
> - = entry_len;
> - entry_len = 0;
> - cur_idx++;
> - entry_success++;
> - seg_avail = 0;
> - cpy_len = RTE_MIN(vb_avail, seg_avail);
> - }
> - }
> - }
> -
> - return entry_success;
> -}
> -
> -/*
> - * This function works for mergeable RX.
> - */
> -static inline uint32_t __attribute__((always_inline))
> -virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
> - struct rte_mbuf **pkts, uint32_t count)
> +uint32_t
> +rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count)
> {
> - struct vhost_virtqueue *vq;
> - uint32_t pkt_idx = 0, entry_success = 0;
> - uint16_t avail_idx, res_cur_idx;
> - uint16_t res_base_idx, res_end_idx;
> - uint8_t success = 0;
> -
> - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
> - dev->device_fh);
> - if (unlikely(queue_id != VIRTIO_RXQ)) {
> - LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
> - }
> -
> - vq = dev->virtqueue[VIRTIO_RXQ];
> - count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
> -
> - if (count == 0)
> - return 0;
> -
> - for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
> - uint32_t secure_len = 0;
> - uint16_t need_cnt;
> - uint32_t vec_idx = 0;
> - uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
> - uint16_t i, id;
> -
> - do {
> - /*
> - * As many data cores may want access to available
> - * buffers, they need to be reserved.
> - */
> - res_base_idx = vq->last_used_idx_res;
> - res_cur_idx = res_base_idx;
> -
> - do {
> - avail_idx = *((volatile uint16_t *)&vq->avail->idx);
> - if (unlikely(res_cur_idx == avail_idx)) {
> - LOG_DEBUG(VHOST_DATA,
> - "(%"PRIu64") Failed "
> - "to get enough desc from "
> - "vring\n",
> - dev->device_fh);
> - return pkt_idx;
> - } else {
> - uint16_t wrapped_idx =
> - (res_cur_idx) & (vq->size - 1);
> - uint32_t idx =
> - vq->avail->ring[wrapped_idx];
> - uint8_t next_desc;
> -
> - do {
> - next_desc = 0;
> - secure_len += vq->desc[idx].len;
> - if (vq->desc[idx].flags &
> - VRING_DESC_F_NEXT) {
> - idx = vq->desc[idx].next;
> - next_desc = 1;
> - }
> - } while (next_desc);
> -
> - res_cur_idx++;
> - }
> - } while (pkt_len > secure_len);
> -
> - /* vq->last_used_idx_res is atomically updated. */
> - success = rte_atomic16_cmpset(&vq->last_used_idx_res,
> - res_base_idx,
> - res_cur_idx);
> - } while (success == 0);
> -
> - id = res_base_idx;
> - need_cnt = res_cur_idx - res_base_idx;
> -
> - for (i = 0; i < need_cnt; i++, id++) {
> - uint16_t wrapped_idx = id & (vq->size - 1);
> - uint32_t idx = vq->avail->ring[wrapped_idx];
> - uint8_t next_desc;
> - do {
> - next_desc = 0;
> - vq->buf_vec[vec_idx].buf_addr =
> - vq->desc[idx].addr;
> - vq->buf_vec[vec_idx].buf_len =
> - vq->desc[idx].len;
> - vq->buf_vec[vec_idx].desc_idx = idx;
> - vec_idx++;
> -
> - if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
> - idx = vq->desc[idx].next;
> - next_desc = 1;
> - }
> - } while (next_desc);
> - }
> -
> - res_end_idx = res_cur_idx;
> -
> - entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
> - res_end_idx, pkts[pkt_idx]);
> -
> - rte_compiler_barrier();
> -
> - /*
> - * Wait until it's our turn to add our buffer
> - * to the used ring.
> - */
> - while (unlikely(vq->last_used_idx != res_base_idx))
> - rte_pause();
> -
> - *(volatile uint16_t *)&vq->used->idx += entry_success;
> - vq->last_used_idx = res_end_idx;
> -
> - /* Kick the guest if necessary. */
> - if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
> - eventfd_write((int)vq->kickfd, 1);
> - }
> -
> - return count;
> -}
> -
> -uint16_t
> -rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
> - struct rte_mbuf **pkts, uint16_t count)
> -{
> - if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)))
> - return virtio_dev_merge_rx(dev, queue_id, pkts, count);
> - else
> - return virtio_dev_rx(dev, queue_id, pkts, count);
> -}
> -
> -uint16_t
> -rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> - struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
> -{
> - struct rte_mbuf *m, *prev;
> + struct rte_mbuf *mbuf;
> struct vhost_virtqueue *vq;
> struct vring_desc *desc;
> - uint64_t vb_addr = 0;
> - uint32_t head[MAX_PKT_BURST];
> + uint64_t buff_addr = 0;
> + uint32_t head[VHOST_MAX_PKT_BURST];
> uint32_t used_idx;
> uint32_t i;
> - uint16_t free_entries, entry_success = 0;
> + uint16_t free_entries, packet_success = 0;
> uint16_t avail_idx;
>
> if (unlikely(queue_id != VIRTIO_TXQ)) {
> @@ -549,8 +217,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> if (vq->last_used_idx == avail_idx)
> return 0;
>
> - LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__,
> - dev->device_fh);
> + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s(%d->%d)\n",
> + dev->device_fh, __func__, vq->last_used_idx, avail_idx);
>
> /* Prefetch available ring to retrieve head indexes. */
> rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
> @@ -558,173 +226,68 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> /*get the number of free entries in the ring*/
> free_entries = (avail_idx - vq->last_used_idx);
>
> - free_entries = RTE_MIN(free_entries, count);
> + if (free_entries > count)
> + free_entries = count;
> /* Limit to MAX_PKT_BURST. */
> - free_entries = RTE_MIN(free_entries, MAX_PKT_BURST);
> + if (free_entries > VHOST_MAX_PKT_BURST)
> + free_entries = VHOST_MAX_PKT_BURST;
>
> - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
> - dev->device_fh, free_entries);
> + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries);
> /* Retrieve all of the head indexes first to avoid caching issues. */
> for (i = 0; i < free_entries; i++)
> head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];
>
> /* Prefetch descriptor index. */
> - rte_prefetch0(&vq->desc[head[entry_success]]);
> + rte_prefetch0(&vq->desc[head[packet_success]]);
> rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
>
> - while (entry_success < free_entries) {
> - uint32_t vb_avail, vb_offset;
> - uint32_t seg_avail, seg_offset;
> - uint32_t cpy_len;
> - uint32_t seg_num = 0;
> - struct rte_mbuf *cur;
> - uint8_t alloc_err = 0;
> -
> - desc = &vq->desc[head[entry_success]];
> + while (packet_success < free_entries) {
> + desc = &vq->desc[head[packet_success]];
>
> /* Discard first buffer as it is the virtio header */
> desc = &vq->desc[desc->next];
>
> /* Buffer address translation. */
> - vb_addr = gpa_to_vva(dev, desc->addr);
> + buff_addr = gpa_to_vva(dev, desc->addr);
> /* Prefetch buffer address. */
> - rte_prefetch0((void *)(uintptr_t)vb_addr);
> + rte_prefetch0((void *)(uintptr_t)buff_addr);
>
> used_idx = vq->last_used_idx & (vq->size - 1);
>
> - if (entry_success < (free_entries - 1)) {
> + if (packet_success < (free_entries - 1)) {
> /* Prefetch descriptor index. */
> - rte_prefetch0(&vq->desc[head[entry_success+1]]);
> + rte_prefetch0(&vq->desc[head[packet_success+1]]);
> rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);
> }
>
> /* Update used index buffer information. */
> - vq->used->ring[used_idx].id = head[entry_success];
> + vq->used->ring[used_idx].id = head[packet_success];
> vq->used->ring[used_idx].len = 0;
>
> - vb_offset = 0;
> - vb_avail = desc->len;
> - /* Allocate an mbuf and populate the structure. */
> - m = rte_pktmbuf_alloc(mbuf_pool);
> - if (unlikely(m == NULL)) {
> - RTE_LOG(ERR, VHOST_DATA,
> - "Failed to allocate memory for mbuf.\n");
> - return entry_success;
> + mbuf = rte_pktmbuf_alloc(mbuf_pool);
> + if (unlikely(mbuf == NULL)) {
> + RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n");
> + return packet_success;
> }
> - seg_offset = 0;
> - seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
> - cpy_len = RTE_MIN(vb_avail, seg_avail);
> -
> - PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);
> -
> - seg_num++;
> - cur = m;
> - prev = m;
> - while (cpy_len != 0) {
> - rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset),
> - (void *)((uintptr_t)(vb_addr + vb_offset)),
> - cpy_len);
> -
> - seg_offset += cpy_len;
> - vb_offset += cpy_len;
> - vb_avail -= cpy_len;
> - seg_avail -= cpy_len;
> -
> - if (vb_avail != 0) {
> - /*
> - * The segment reachs to its end,
> - * while the virtio buffer in TX vring has
> - * more data to be copied.
> - */
> - cur->data_len = seg_offset;
> - m->pkt_len += seg_offset;
> - /* Allocate mbuf and populate the structure. */
> - cur = rte_pktmbuf_alloc(mbuf_pool);
> - if (unlikely(cur == NULL)) {
> - RTE_LOG(ERR, VHOST_DATA, "Failed to "
> - "allocate memory for mbuf.\n");
> - rte_pktmbuf_free(m);
> - alloc_err = 1;
> - break;
> - }
> -
> - seg_num++;
> - prev->next = cur;
> - prev = cur;
> - seg_offset = 0;
> - seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
> - } else {
> - if (desc->flags & VRING_DESC_F_NEXT) {
> - /*
> - * There are more virtio buffers in
> - * same vring entry need to be copied.
> - */
> - if (seg_avail == 0) {
> - /*
> - * The current segment hasn't
> - * room to accomodate more
> - * data.
> - */
> - cur->data_len = seg_offset;
> - m->pkt_len += seg_offset;
> - /*
> - * Allocate an mbuf and
> - * populate the structure.
> - */
> - cur = rte_pktmbuf_alloc(mbuf_pool);
> - if (unlikely(cur == NULL)) {
> - RTE_LOG(ERR,
> - VHOST_DATA,
> - "Failed to "
> - "allocate memory "
> - "for mbuf\n");
> - rte_pktmbuf_free(m);
> - alloc_err = 1;
> - break;
> - }
> - seg_num++;
> - prev->next = cur;
> - prev = cur;
> - seg_offset = 0;
> - seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
> - }
> -
> - desc = &vq->desc[desc->next];
> -
> - /* Buffer address translation. */
> - vb_addr = gpa_to_vva(dev, desc->addr);
> - /* Prefetch buffer address. */
> - rte_prefetch0((void *)(uintptr_t)vb_addr);
> - vb_offset = 0;
> - vb_avail = desc->len;
> -
> - PRINT_PACKET(dev, (uintptr_t)vb_addr,
> - desc->len, 0);
> - } else {
> - /* The whole packet completes. */
> - cur->data_len = seg_offset;
> - m->pkt_len += seg_offset;
> - vb_avail = 0;
> - }
> - }
> + mbuf->pkt.data_len = desc->len;
> + mbuf->pkt.pkt_len = mbuf->pkt.data_len;
>
> - cpy_len = RTE_MIN(vb_avail, seg_avail);
> - }
> + rte_memcpy((void *) mbuf->pkt.data,
> + (const void *) buff_addr, mbuf->pkt.data_len);
>
> - if (unlikely(alloc_err == 1))
> - break;
> + pkts[packet_success] = mbuf;
>
> - m->nb_segs = seg_num;
> + VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);
>
> - pkts[entry_success] = m;
> vq->last_used_idx++;
> - entry_success++;
> + packet_success++;
> }
>
> rte_compiler_barrier();
> - vq->used->idx += entry_success;
> + vq->used->idx += packet_success;
> /* Kick guest if required. */
> if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
> eventfd_write((int)vq->kickfd, 1);
> - return entry_success;
> +
> + return packet_success;
> }
> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> index 852b6d1..516e743 100644
> --- a/lib/librte_vhost/virtio-net.c
> +++ b/lib/librte_vhost/virtio-net.c
> @@ -31,17 +31,14 @@
> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> */
>
> -#include <dirent.h>
> -#include <fuse/cuse_lowlevel.h>
> #include <linux/vhost.h>
> #include <linux/virtio_net.h>
> #include <stddef.h>
> #include <stdint.h>
> #include <stdlib.h>
> -#include <sys/eventfd.h>
> -#include <sys/ioctl.h>
> #include <sys/mman.h>
> #include <unistd.h>
> +#include <assert.h>
>
> #include <rte_ethdev.h>
> #include <rte_log.h>
> @@ -49,10 +46,8 @@
> #include <rte_memory.h>
> #include <rte_virtio_net.h>
>
> -#include "vhost-net-cdev.h"
> -#include "eventfd_link/eventfd_link.h"
> -
> -/*
> +#include "vhost-net.h"
> +/**
> * Device linked list structure for configuration.
> */
> struct virtio_net_config_ll {
> @@ -60,38 +55,15 @@ struct virtio_net_config_ll {
> struct virtio_net_config_ll *next; /* Next dev on linked list.*/
> };
>
> -const char eventfd_cdev[] = "/dev/eventfd-link";
> -
> -/* device ops to add/remove device to/from data core. */
> +/* device ops to add/remove device to data core. */
> static struct virtio_net_device_ops const *notify_ops;
> -/* root address of the linked list of managed virtio devices */
> +/* root address of the linked list in the configuration core. */
> static struct virtio_net_config_ll *ll_root;
>
> /* Features supported by this lib. */
> -#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
> - (1ULL << VIRTIO_NET_F_CTRL_RX))
> +#define VHOST_SUPPORTED_FEATURES (1ULL << VIRTIO_NET_F_MRG_RXBUF)
> static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
>
> -/* Line size for reading maps file. */
> -static const uint32_t BUFSIZE = PATH_MAX;
> -
> -/* Size of prot char array in procmap. */
> -#define PROT_SZ 5
> -
> -/* Number of elements in procmap struct. */
> -#define PROCMAP_SZ 8
> -
> -/* Structure containing information gathered from maps file. */
> -struct procmap {
> - uint64_t va_start; /* Start virtual address in file. */
> - uint64_t len; /* Size of file. */
> - uint64_t pgoff; /* Not used. */
> - uint32_t maj; /* Not used. */
> - uint32_t min; /* Not used. */
> - uint32_t ino; /* Not used. */
> - char prot[PROT_SZ]; /* Not used. */
> - char fname[PATH_MAX]; /* File name. */
> -};
>
> /*
> * Converts QEMU virtual address to Vhost virtual address. This function is
> @@ -110,199 +82,15 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
> if ((qemu_va >= region->userspace_address) &&
> (qemu_va <= region->userspace_address +
> region->memory_size)) {
> - vhost_va = dev->mem->mapped_address + qemu_va -
> - dev->mem->base_address;
> + vhost_va = qemu_va + region->guest_phys_address +
> + region->address_offset -
> + region->userspace_address;
> break;
> }
> }
> return vhost_va;
> }
>
> -/*
> - * Locate the file containing QEMU's memory space and
> - * map it to our address space.
> - */
> -static int
> -host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
> - pid_t pid, uint64_t addr)
> -{
> - struct dirent *dptr = NULL;
> - struct procmap procmap;
> - DIR *dp = NULL;
> - int fd;
> - int i;
> - char memfile[PATH_MAX];
> - char mapfile[PATH_MAX];
> - char procdir[PATH_MAX];
> - char resolved_path[PATH_MAX];
> - char *path = NULL;
> - FILE *fmap;
> - void *map;
> - uint8_t found = 0;
> - char line[BUFSIZE];
> - char dlm[] = "- : ";
> - char *str, *sp, *in[PROCMAP_SZ];
> - char *end = NULL;
> -
> - /* Path where mem files are located. */
> - snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
> - /* Maps file used to locate mem file. */
> - snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
> -
> - fmap = fopen(mapfile, "r");
> - if (fmap == NULL) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> - "(%"PRIu64") Failed to open maps file for pid %d\n",
> - dev->device_fh, pid);
> - return -1;
> - }
> -
> - /* Read through maps file until we find out base_address. */
> - while (fgets(line, BUFSIZE, fmap) != 0) {
> - str = line;
> - errno = 0;
> - /* Split line into fields. */
> - for (i = 0; i < PROCMAP_SZ; i++) {
> - in[i] = strtok_r(str, &dlm[i], &sp);
> - if ((in[i] == NULL) || (errno != 0)) {
> - fclose(fmap);
> - return -1;
> - }
> - str = NULL;
> - }
> -
> - /* Convert/Copy each field as needed. */
> - procmap.va_start = strtoull(in[0], &end, 16);
> - if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
> - (errno != 0)) {
> - fclose(fmap);
> - return -1;
> - }
> -
> - procmap.len = strtoull(in[1], &end, 16);
> - if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
> - (errno != 0)) {
> - fclose(fmap);
> - return -1;
> - }
> -
> - procmap.pgoff = strtoull(in[3], &end, 16);
> - if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
> - (errno != 0)) {
> - fclose(fmap);
> - return -1;
> - }
> -
> - procmap.maj = strtoul(in[4], &end, 16);
> - if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
> - (errno != 0)) {
> - fclose(fmap);
> - return -1;
> - }
> -
> - procmap.min = strtoul(in[5], &end, 16);
> - if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
> - (errno != 0)) {
> - fclose(fmap);
> - return -1;
> - }
> -
> - procmap.ino = strtoul(in[6], &end, 16);
> - if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
> - (errno != 0)) {
> - fclose(fmap);
> - return -1;
> - }
> -
> - memcpy(&procmap.prot, in[2], PROT_SZ);
> - memcpy(&procmap.fname, in[7], PATH_MAX);
> -
> - if (procmap.va_start == addr) {
> - procmap.len = procmap.len - procmap.va_start;
> - found = 1;
> - break;
> - }
> - }
> - fclose(fmap);
> -
> - if (!found) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> - "(%"PRIu64") Failed to find memory file in pid %d maps file\n",
> - dev->device_fh, pid);
> - return -1;
> - }
> -
> - /* Find the guest memory file among the process fds. */
> - dp = opendir(procdir);
> - if (dp == NULL) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> - "(%"PRIu64") Cannot open pid %d process directory\n",
> - dev->device_fh, pid);
> - return -1;
> - }
> -
> - found = 0;
> -
> - /* Read the fd directory contents. */
> - while (NULL != (dptr = readdir(dp))) {
> - snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
> - pid, dptr->d_name);
> - path = realpath(memfile, resolved_path);
> - if ((path == NULL) && (strlen(resolved_path) == 0)) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> - "(%"PRIu64") Failed to resolve fd directory\n",
> - dev->device_fh);
> - closedir(dp);
> - return -1;
> - }
> - if (strncmp(resolved_path, procmap.fname,
> - strnlen(procmap.fname, PATH_MAX)) == 0) {
> - found = 1;
> - break;
> - }
> - }
> -
> - closedir(dp);
> -
> - if (found == 0) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> - "(%"PRIu64") Failed to find memory file for pid %d\n",
> - dev->device_fh, pid);
> - return -1;
> - }
> - /* Open the shared memory file and map the memory into this process. */
> - fd = open(memfile, O_RDWR);
> -
> - if (fd == -1) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> - "(%"PRIu64") Failed to open %s for pid %d\n",
> - dev->device_fh, memfile, pid);
> - return -1;
> - }
> -
> - map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
> - MAP_POPULATE|MAP_SHARED, fd, 0);
> - close(fd);
> -
> - if (map == MAP_FAILED) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> - "(%"PRIu64") Error mapping the file %s for pid %d\n",
> - dev->device_fh, memfile, pid);
> - return -1;
> - }
> -
> - /* Store the memory address and size in the device data structure */
> - mem->mapped_address = (uint64_t)(uintptr_t)map;
> - mem->mapped_size = procmap.len;
> -
> - LOG_DEBUG(VHOST_CONFIG,
> - "(%"PRIu64") Mem File: %s->%s - Size: %llu - VA: %p\n",
> - dev->device_fh,
> - memfile, resolved_path,
> - (unsigned long long)mem->mapped_size, map);
> -
> - return 0;
> -}
>
> /*
> * Retrieves an entry from the devices configuration linked list.
> @@ -376,7 +164,7 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
> }
>
> }
> -
> +/*TODO dpdk alloc/free if possible */
> /*
> * Unmap any memory, close any file descriptors and
> * free any memory owned by a device.
> @@ -389,16 +177,17 @@ cleanup_device(struct virtio_net *dev)
> munmap((void *)(uintptr_t)dev->mem->mapped_address,
> (size_t)dev->mem->mapped_size);
> free(dev->mem);
> + dev->mem = NULL;
> }
>
> /* Close any event notifiers opened by device. */
> - if (dev->virtqueue[VIRTIO_RXQ]->callfd)
> + if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
> close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
> - if (dev->virtqueue[VIRTIO_RXQ]->kickfd)
> + if (dev->virtqueue[VIRTIO_RXQ]->kickfd > 0)
> close((int)dev->virtqueue[VIRTIO_RXQ]->kickfd);
> - if (dev->virtqueue[VIRTIO_TXQ]->callfd)
> + if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
> close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
> - if (dev->virtqueue[VIRTIO_TXQ]->kickfd)
> + if (dev->virtqueue[VIRTIO_TXQ]->kickfd > 0)
> close((int)dev->virtqueue[VIRTIO_TXQ]->kickfd);
> }
>
> @@ -522,8 +311,8 @@ new_device(struct vhost_device_ctx ctx)
> }
>
> /*
> - * Function is called from the CUSE release function. This function will
> - * cleanup the device and remove it from device configuration linked list.
> + * Function is called from the CUSE release function. This function will cleanup
> + * the device and remove it from device configuration linked list.
> */
> static void
> destroy_device(struct vhost_device_ctx ctx)
> @@ -569,6 +358,7 @@ set_owner(struct vhost_device_ctx ctx)
> return -1;
>
> return 0;
> + /* TODO check ctx.fh is meaningfull here */
> }
>
> /*
> @@ -651,14 +441,12 @@ set_features(struct vhost_device_ctx ctx, uint64_t *pu)
> * This includes storing offsets used to translate buffer addresses.
> */
> static int
> -set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
> - uint32_t nregions)
> +set_mem_table(struct vhost_device_ctx ctx,
> + const struct virtio_memory_regions *regions, uint32_t nregions)
> {
> struct virtio_net *dev;
> - struct vhost_memory_region *mem_regions;
> struct virtio_memory *mem;
> - uint64_t size = offsetof(struct vhost_memory, regions);
> - uint32_t regionidx, valid_regions;
> + uint32_t regionidx;
>
> dev = get_device(ctx);
> if (dev == NULL)
> @@ -682,107 +470,24 @@ set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
>
> mem->nregions = nregions;
>
> - mem_regions = (void *)(uintptr_t)
> - ((uint64_t)(uintptr_t)mem_regions_addr + size);
> -
> for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
> /* Populate the region structure for each region. */
> - mem->regions[regionidx].guest_phys_address =
> - mem_regions[regionidx].guest_phys_addr;
> - mem->regions[regionidx].guest_phys_address_end =
> - mem->regions[regionidx].guest_phys_address +
> - mem_regions[regionidx].memory_size;
> - mem->regions[regionidx].memory_size =
> - mem_regions[regionidx].memory_size;
> - mem->regions[regionidx].userspace_address =
> - mem_regions[regionidx].userspace_addr;
> -
> - LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n", dev->device_fh,
> - regionidx,
> - (void *)(uintptr_t)mem->regions[regionidx].guest_phys_address,
> - (void *)(uintptr_t)mem->regions[regionidx].userspace_address,
> - mem->regions[regionidx].memory_size);
> -
> - /*set the base address mapping*/
> + mem->regions[regionidx] = regions[regionidx];
> if (mem->regions[regionidx].guest_phys_address == 0x0) {
> mem->base_address =
> mem->regions[regionidx].userspace_address;
> - /* Map VM memory file */
> - if (host_memory_map(dev, mem, ctx.pid,
> - mem->base_address) != 0) {
> - free(mem);
> - return -1;
> - }
> + mem->mapped_address =
> + mem->regions[regionidx].address_offset;
> }
> }
>
> - /* Check that we have a valid base address. */
> - if (mem->base_address == 0) {
> - RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Failed to find base address of qemu memory file.\n", dev->device_fh);
> - free(mem);
> - return -1;
> - }
> -
> - /*
> - * Check if all of our regions have valid mappings.
> - * Usually one does not exist in the QEMU memory file.
> - */
> - valid_regions = mem->nregions;
> - for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
> - if ((mem->regions[regionidx].userspace_address <
> - mem->base_address) ||
> - (mem->regions[regionidx].userspace_address >
> - (mem->base_address + mem->mapped_size)))
> - valid_regions--;
> - }
> -
> - /*
> - * If a region does not have a valid mapping,
> - * we rebuild our memory struct to contain only valid entries.
> - */
> - if (valid_regions != mem->nregions) {
> - LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") Not all memory regions exist in the QEMU mem file. Re-populating mem structure\n",
> - dev->device_fh);
> -
> - /*
> - * Re-populate the memory structure with only valid regions.
> - * Invalid regions are over-written with memmove.
> - */
> - valid_regions = 0;
> -
> - for (regionidx = mem->nregions; 0 != regionidx--;) {
> - if ((mem->regions[regionidx].userspace_address <
> - mem->base_address) ||
> - (mem->regions[regionidx].userspace_address >
> - (mem->base_address + mem->mapped_size))) {
> - memmove(&mem->regions[regionidx],
> - &mem->regions[regionidx + 1],
> - sizeof(struct virtio_memory_regions) *
> - valid_regions);
> - } else {
> - valid_regions++;
> - }
> - }
> - }
> - mem->nregions = valid_regions;
> + /*TODO addback the logic that remove invalid memory regions */
> dev->mem = mem;
>
> - /*
> - * Calculate the address offset for each region.
> - * This offset is used to identify the vhost virtual address
> - * corresponding to a QEMU guest physical address.
> - */
> - for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
> - dev->mem->regions[regionidx].address_offset =
> - dev->mem->regions[regionidx].userspace_address -
> - dev->mem->base_address +
> - dev->mem->mapped_address -
> - dev->mem->regions[regionidx].guest_phys_address;
> -
> - }
> return 0;
> }
>
> +
> /*
> * Called from CUSE IOCTL: VHOST_SET_VRING_NUM
> * The virtio device sends us the size of the descriptor ring.
> @@ -896,38 +601,62 @@ get_vring_base(struct vhost_device_ctx ctx, uint32_t index,
> /* State->index refers to the queue index. The txq is 1, rxq is 0. */
> state->num = dev->virtqueue[state->index]->last_used_idx;
>
> - return 0;
> -}
> + if (dev->flags & VIRTIO_DEV_RUNNING) {
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "get_vring_base message is for release\n");
> + notify_ops->destroy_device(dev);
> + /*
> + * sync call.
> + * when it returns, it means it si removed from data core.
> + */
> + }
> + /* TODO fix all munmap */
> + if (dev->mem) {
> + munmap((void *)(uintptr_t)dev->mem->mapped_address,
> + (size_t)dev->mem->mapped_size);
> + free(dev->mem);
> + dev->mem = NULL;
> + }
>
> -/*
> - * This function uses the eventfd_link kernel module to copy an eventfd file
> - * descriptor provided by QEMU in to our process space.
> - */
> -static int
> -eventfd_copy(struct virtio_net *dev, struct eventfd_copy *eventfd_copy)
> -{
> - int eventfd_link, ret;
>
> - /* Open the character device to the kernel module. */
> - eventfd_link = open(eventfd_cdev, O_RDWR);
> - if (eventfd_link < 0) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> - "(%"PRIu64") eventfd_link module is not loaded\n",
> - dev->device_fh);
> - return -1;
> - }
> + if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
> + close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
> + dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
> + if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
> + close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
> + dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
> + /* We don't cleanup callfd here as we willn't get CALLFD again */
> +
> + dev->virtqueue[VIRTIO_RXQ]->desc = NULL;
> + dev->virtqueue[VIRTIO_RXQ]->avail = NULL;
> + dev->virtqueue[VIRTIO_RXQ]->used = NULL;
> + dev->virtqueue[VIRTIO_RXQ]->last_used_idx = 0;
> + dev->virtqueue[VIRTIO_RXQ]->last_used_idx_res = 0;
> +
> + dev->virtqueue[VIRTIO_TXQ]->desc = NULL;
> + dev->virtqueue[VIRTIO_TXQ]->avail = NULL;
> + dev->virtqueue[VIRTIO_TXQ]->used = NULL;
> + dev->virtqueue[VIRTIO_TXQ]->last_used_idx = 0;
> + dev->virtqueue[VIRTIO_TXQ]->last_used_idx_res = 0;
>
> - /* Call the IOCTL to copy the eventfd. */
> - ret = ioctl(eventfd_link, EVENTFD_COPY, eventfd_copy);
> - close(eventfd_link);
>
> - if (ret < 0) {
> - RTE_LOG(ERR, VHOST_CONFIG,
> - "(%"PRIu64") EVENTFD_COPY ioctl failed\n",
> - dev->device_fh);
> - return -1;
> - }
> + return 0;
> +}
>
> +static int
> +virtio_is_ready(struct virtio_net *dev, int index)
> +{
> + struct vhost_virtqueue *vq1, *vq2;
> + /* mq support in future.*/
> + vq1 = dev->virtqueue[index];
> + vq2 = dev->virtqueue[index ^ 1];
> + if (vq1 && vq2 && vq1->desc && vq2->desc &&
> + (vq1->kickfd > 0) && (vq1->callfd > 0) &&
> + (vq2->kickfd > 0) && (vq2->callfd > 0)) {
> + LOG_DEBUG(VHOST_CONFIG, "virtio is ready for processing.\n");
> + return 1;
> + }
> + LOG_DEBUG(VHOST_CONFIG, "virtio isn't ready for processing.\n");
> return 0;
> }
>
> @@ -940,7 +669,6 @@ static int
> set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> {
> struct virtio_net *dev;
> - struct eventfd_copy eventfd_kick;
> struct vhost_virtqueue *vq;
>
> dev = get_device(ctx);
> @@ -953,14 +681,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> if (vq->kickfd)
> close((int)vq->kickfd);
>
> - /* Populate the eventfd_copy structure and call eventfd_copy. */
> - vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> - eventfd_kick.source_fd = vq->kickfd;
> - eventfd_kick.target_fd = file->fd;
> - eventfd_kick.target_pid = ctx.pid;
> -
> - if (eventfd_copy(dev, &eventfd_kick))
> - return -1;
> + vq->kickfd = file->fd;
>
> return 0;
> }
> @@ -974,7 +695,6 @@ static int
> set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> {
> struct virtio_net *dev;
> - struct eventfd_copy eventfd_call;
> struct vhost_virtqueue *vq;
>
> dev = get_device(ctx);
> @@ -986,16 +706,11 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>
> if (vq->callfd)
> close((int)vq->callfd);
> + vq->callfd = file->fd;
>
> - /* Populate the eventfd_copy structure and call eventfd_copy. */
> - vq->callfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> - eventfd_call.source_fd = vq->callfd;
> - eventfd_call.target_fd = file->fd;
> - eventfd_call.target_pid = ctx.pid;
> -
> - if (eventfd_copy(dev, &eventfd_call))
> - return -1;
> -
> + if (virtio_is_ready(dev, file->index) &&
> + !(dev->flags & VIRTIO_DEV_RUNNING))
> + notify_ops->new_device(dev);
> return 0;
> }
>
> @@ -1024,6 +739,7 @@ set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> * If the device isn't already running and both backend fds are set,
> * we add the device.
> */
> + LOG_DEBUG(VHOST_CONFIG, "%s %d\n", __func__, file->fd);
> if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
> if (((int)dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED) &&
> ((int)dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED))
^ permalink raw reply [flat|nested] 6+ messages in thread
* [dpdk-dev] [RFC PATCH] lib/librte_vhost: cleanup white spaces, tabs and indents
2014-11-15 1:14 [dpdk-dev] [PATCH RFC] lib/librte_vhost: vhost-user Huawei Xie
2014-11-17 6:04 ` Tetsuya Mukawa
@ 2014-11-17 6:06 ` Tetsuya Mukawa
2014-11-17 6:07 ` [dpdk-dev] [RFC PATCH 1/2] lib/librte_vhost: change macro name of include guard Tetsuya Mukawa
2 siblings, 0 replies; 6+ messages in thread
From: Tetsuya Mukawa @ 2014-11-17 6:06 UTC (permalink / raw)
To: dev
---
lib/librte_vhost/rte_virtio_net.h | 4 +--
lib/librte_vhost/vhost-cuse/vhost-net-cdev.c | 4 +--
lib/librte_vhost/vhost-cuse/virtio-net-cdev.c | 8 ++---
lib/librte_vhost/vhost-user/fd_man.c | 13 ++++----
lib/librte_vhost/vhost-user/fd_man.h | 2 +-
lib/librte_vhost/vhost-user/vhost-net-user.c | 37 +++++++++++-----------
lib/librte_vhost/vhost-user/virtio-net-user.c | 44 +++++++++++++--------------
lib/librte_vhost/vhost_rxtx.c | 2 +-
lib/librte_vhost/virtio-net.c | 10 +++---
9 files changed, 61 insertions(+), 63 deletions(-)
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 7a05dab..7d7d001 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -140,12 +140,12 @@ gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
}
/**
- * Disable features in feature_mask. Returns 0 on success.
+ * Disable features in feature_mask. Returns 0 on success.
*/
int rte_vhost_feature_disable(uint64_t feature_mask);
/**
- * Enable features in feature_mask. Returns 0 on success.
+ * Enable features in feature_mask. Returns 0 on success.
*/
int rte_vhost_feature_enable(uint64_t feature_mask);
diff --git a/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
index 4671643..688ec00 100644
--- a/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
@@ -329,7 +329,7 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
} else {
int fd;
file = *(const struct vhost_vring_file *)in_buf;
- LOG_DEBUG(VHOST_CONFIG,
+ LOG_DEBUG(VHOST_CONFIG,
"kick/call idx:%d fd:%d\n", file.index, file.fd);
if ((fd = eventfd_copy(file.fd, ctx.pid)) < 0){
fuse_reply_ioctl(req, -1, NULL, 0);
@@ -338,7 +338,7 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
if (cmd == VHOST_SET_VRING_KICK) {
VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_call);
}
- else {
+ else {
VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_kick);
}
}
diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
index 5c16aa5..7381140 100644
--- a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
@@ -288,7 +288,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_r
base_address =
regions[idx].userspace_address;
/* Map VM memory file */
- if (host_memory_map(ctx.pid, base_address,
+ if (host_memory_map(ctx.pid, base_address,
&mapped_address, &mapped_size) != 0) {
return -1;
}
@@ -297,18 +297,18 @@ cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_r
/* Check that we have a valid base address. */
if (base_address == 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
+ RTE_LOG(ERR, VHOST_CONFIG,
"Failed to find base address of qemu memory file.\n");
return -1;
}
for (idx = 0; idx < nregions; idx++) {
- regions[idx].address_offset =
+ regions[idx].address_offset =
mapped_address - base_address +
regions[idx].userspace_address -
regions[idx].guest_phys_address;
}
-
+
ops->set_mem_table(ctx, ®ions[0], nregions);
return 0;
}
diff --git a/lib/librte_vhost/vhost-user/fd_man.c b/lib/librte_vhost/vhost-user/fd_man.c
index c7fd3f2..cbc656b 100644
--- a/lib/librte_vhost/vhost-user/fd_man.c
+++ b/lib/librte_vhost/vhost-user/fd_man.c
@@ -15,7 +15,7 @@
* Returns the index in the fdset for a fd.
* If fd is -1, it means to search for a free entry.
* @return
- * Index for the fd, or -1 if fd isn't in the fdset.
+ * Index for the fd, or -1 if fd isn't in the fdset.
*/
static int
fdset_find_fd(struct fdset *pfdset, int fd)
@@ -23,8 +23,8 @@ fdset_find_fd(struct fdset *pfdset, int fd)
int i;
for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++);
-
- return i == pfdset->num ? -1 : i;
+
+ return i == pfdset->num ? -1 : i;
}
static int
@@ -35,7 +35,7 @@ fdset_find_free_slot(struct fdset *pfdset)
}
static void
-fdset_add_fd(struct fdset *pfdset, int idx, int fd, fd_cb rcb,
+fdset_add_fd(struct fdset *pfdset, int idx, int fd, fd_cb rcb,
fd_cb wcb, uint64_t dat)
{
struct fdentry *pfdentry = &pfdset->fd[idx];
@@ -111,7 +111,7 @@ fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
}
/**
- * Unregister the fd from the fdset.
+ * Unregister the fd from the fdset.
*/
void
fdset_del(struct fdset *pfdset, int fd)
@@ -148,11 +148,10 @@ fdset_event_dispatch(struct fdset *pfdset)
for (i = 0; i < num; i++) {
pfdentry = &pfdset->fd[i];
- if (FD_ISSET(pfdentry->fd, &rfds))
+ if (FD_ISSET(pfdentry->fd, &rfds))
pfdentry->rcb(pfdentry->fd, pfdentry->dat);
if (FD_ISSET(pfdentry->fd, &wfds))
pfdentry->wcb(pfdentry->fd, pfdentry->dat);
}
-
}
}
diff --git a/lib/librte_vhost/vhost-user/fd_man.h b/lib/librte_vhost/vhost-user/fd_man.h
index 57cc81d..8df17b4 100644
--- a/lib/librte_vhost/vhost-user/fd_man.h
+++ b/lib/librte_vhost/vhost-user/fd_man.h
@@ -15,7 +15,7 @@ struct fdentry {
struct fdset {
struct fdentry fd[MAX_FDS];
- int num;
+ int num;
};
diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.c b/lib/librte_vhost/vhost-user/vhost-net-user.c
index 34450f4..0b100ba 100644
--- a/lib/librte_vhost/vhost-user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost-user/vhost-net-user.c
@@ -106,7 +106,7 @@ uds_socket(const char *path)
ret = listen(sockfd, 1);
if (ret == -1)
goto err;
-
+
return sockfd;
err:
@@ -129,7 +129,7 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
iov.iov_base = buf;
iov.iov_len = buflen;
-
+
msgh.msg_iov = &iov;
msgh.msg_iovlen = 1;
msgh.msg_control = control;
@@ -148,7 +148,7 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
- if ( (cmsg->cmsg_level == SOL_SOCKET) &&
+ if ( (cmsg->cmsg_level == SOL_SOCKET) &&
(cmsg->cmsg_type == SCM_RIGHTS)) {
memcpy(fds, CMSG_DATA(cmsg), fdsize);
break;
@@ -162,14 +162,14 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg)
{
int ret;
- ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
+ ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
msg->fds, VHOST_MEMORY_MAX_NREGIONS);
if (ret <= 0)
return ret;
if (msg->size) {
if (msg->size > sizeof(msg->payload)) {
- RTE_LOG(ERR, VHOST_CONFIG,
+ RTE_LOG(ERR, VHOST_CONFIG,
"%s: invalid size:%d\n", __func__, msg->size);
return -1;
}
@@ -182,7 +182,7 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg)
}
}
- return ret;
+ return ret;
}
static int
@@ -200,7 +200,7 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
iov.iov_len = buflen;
msgh.msg_iov = &iov;
msgh.msg_iovlen = 1;
-
+
if (fds && fd_num > 0) {
msgh.msg_control = control;
msgh.msg_controllen = sizeof(control);
@@ -222,7 +222,7 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
return -1;
}
-
+
return 0;
}
@@ -233,15 +233,15 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg)
msg->flags &= ~VHOST_USER_VERSION_MASK;
msg->flags |= VHOST_USER_VERSION;
- msg->flags |= VHOST_USER_REPLY_MASK;
+ msg->flags |= VHOST_USER_REPLY_MASK;
- ret = send_fd_message(sockfd, (char *)msg,
+ ret = send_fd_message(sockfd, (char *)msg,
VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
-
+
return ret;
}
-/* call back when there is new connection. */
+/* call back when there is new connection. */
static void
vserver_new_vq_conn(int fd, uint64_t dat)
{
@@ -251,7 +251,7 @@ vserver_new_vq_conn(int fd, uint64_t dat)
struct vhost_device_ctx vdev_ctx = { 0 };
conn_fd = accept(fd, NULL, NULL);
- RTE_LOG(INFO, VHOST_CONFIG,
+ RTE_LOG(INFO, VHOST_CONFIG,
"%s: new connection is %d\n", __func__, conn_fd);
if (conn_fd < 0)
return;
@@ -259,8 +259,8 @@ vserver_new_vq_conn(int fd, uint64_t dat)
fh = ops->new_device(vdev_ctx);
RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
- fdset_add(&vserver->fdset,
- conn_fd, vserver_message_handler, NULL, fh);
+ fdset_add(&vserver->fdset,
+ conn_fd, vserver_message_handler, NULL, fh);
}
/* callback when there is message on the connfd */
@@ -277,7 +277,7 @@ vserver_message_handler(int connfd, uint64_t dat)
ret = read_vhost_message(connfd, &msg);
if (ret < 0) {
printf("vhost read message failed\n");
-
+
/*TODO: cleanup */
close(connfd);
fdset_del(&g_vhost_server->fdset, connfd);
@@ -286,7 +286,7 @@ vserver_message_handler(int connfd, uint64_t dat)
return;
} else if (ret == 0) {
/*TODO: cleanup */
- RTE_LOG(INFO, VHOST_CONFIG,
+ RTE_LOG(INFO, VHOST_CONFIG,
"vhost peer closed\n");
close(connfd);
fdset_del(&g_vhost_server->fdset, connfd);
@@ -296,7 +296,7 @@ vserver_message_handler(int connfd, uint64_t dat)
}
if (msg.request > VHOST_USER_MAX) {
/*TODO: cleanup */
- RTE_LOG(INFO, VHOST_CONFIG,
+ RTE_LOG(INFO, VHOST_CONFIG,
"vhost read incorrect message\n");
close(connfd);
fdset_del(&g_vhost_server->fdset, connfd);
@@ -363,7 +363,6 @@ vserver_message_handler(int connfd, uint64_t dat)
default:
break;
-
}
}
diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.c b/lib/librte_vhost/vhost-user/virtio-net-user.c
index f38e6cc..4103977 100644
--- a/lib/librte_vhost/vhost-user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost-user/virtio-net-user.c
@@ -65,7 +65,7 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
}
for (idx = 0; idx < memory.nregions; idx++) {
- uint64_t size = memory.regions[idx].userspace_addr -
+ uint64_t size = memory.regions[idx].userspace_addr -
base_address + memory.regions[idx].memory_size;
if (mem_size < size)
mem_size = size;
@@ -75,28 +75,28 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
* here we assume qemu will map only one file for memory allocation,
* we only use fds[0] with offset 0.
*/
- mapped_address = (uint64_t)(uintptr_t)mmap(NULL, mem_size,
+ mapped_address = (uint64_t)(uintptr_t)mmap(NULL, mem_size,
PROT_READ | PROT_WRITE, MAP_SHARED, pmsg->fds[0], 0);
if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
return -1;
}
-
+
for (idx = 0; idx < memory.nregions; idx++) {
- regions[idx].guest_phys_address =
+ regions[idx].guest_phys_address =
memory.regions[idx].guest_phys_addr;
- regions[idx].guest_phys_address_end =
+ regions[idx].guest_phys_address_end =
memory.regions[idx].guest_phys_addr +
memory.regions[idx].memory_size;
regions[idx].memory_size = memory.regions[idx].memory_size;
- regions[idx].userspace_address =
+ regions[idx].userspace_address =
memory.regions[idx].userspace_addr;
- regions[idx].address_offset = mapped_address - base_address +
+ regions[idx].address_offset = mapped_address - base_address +
regions[idx].userspace_address -
regions[idx].guest_phys_address;
- LOG_DEBUG(VHOST_CONFIG,
+ LOG_DEBUG(VHOST_CONFIG,
"REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
idx,
(void *)(uintptr_t)regions[idx].guest_phys_address,
@@ -129,28 +129,28 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
for (idx = 0; idx < memory.nregions; idx++) {
- regions[idx].guest_phys_address =
+ regions[idx].guest_phys_address =
memory.regions[idx].guest_phys_addr;
- regions[idx].guest_phys_address_end =
+ regions[idx].guest_phys_address_end =
memory.regions[idx].guest_phys_addr +
memory.regions[idx].memory_size;
regions[idx].memory_size = memory.regions[idx].memory_size;
- regions[idx].userspace_address =
+ regions[idx].userspace_address =
memory.regions[idx].userspace_addr;
/*
- mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
- regions[idx].memory_size,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- pmsg->fds[idx],
+ mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
+ regions[idx].memory_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ pmsg->fds[idx],
memory.regions[idx].mmap_offset);
*/
/* This is ugly */
- mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
+ mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
regions[idx].memory_size +
- memory.regions[idx].mmap_offset,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- pmsg->fds[idx],
+ memory.regions[idx].mmap_offset,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ pmsg->fds[idx],
0);
printf("mapped to %p\n", (void *)mapped_address);
@@ -165,7 +165,7 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
regions[idx].address_offset = mapped_address -
regions[idx].guest_phys_address;
- LOG_DEBUG(VHOST_CONFIG,
+ LOG_DEBUG(VHOST_CONFIG,
"REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
idx,
(void *)(uintptr_t)regions[idx].guest_phys_address,
@@ -189,7 +189,7 @@ user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
file.fd = pmsg->fds[0];
- RTE_LOG(INFO, VHOST_CONFIG,
+ RTE_LOG(INFO, VHOST_CONFIG,
"vring call idx:%d file:%d\n", file.index, file.fd);
ops->set_vring_call(ctx, &file);
}
@@ -202,7 +202,7 @@ user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
file.fd = pmsg->fds[0];
- RTE_LOG(INFO, VHOST_CONFIG,
+ RTE_LOG(INFO, VHOST_CONFIG,
"vring kick idx:%d file:%d\n", file.index, file.fd);
ops->set_vring_kick(ctx, &file);
}
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 8ff0301..3a33eb0 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -217,7 +217,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_me
if (vq->last_used_idx == avail_idx)
return 0;
- LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s(%d->%d)\n",
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s(%d->%d)\n",
dev->device_fh, __func__, vq->last_used_idx, avail_idx);
/* Prefetch available ring to retrieve head indexes. */
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 516e743..30661e3 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -82,7 +82,7 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
if ((qemu_va >= region->userspace_address) &&
(qemu_va <= region->userspace_address +
region->memory_size)) {
- vhost_va = qemu_va + region->guest_phys_address +
+ vhost_va = qemu_va + region->guest_phys_address +
region->address_offset -
region->userspace_address;
break;
@@ -476,7 +476,7 @@ set_mem_table(struct vhost_device_ctx ctx,
if (mem->regions[regionidx].guest_phys_address == 0x0) {
mem->base_address =
mem->regions[regionidx].userspace_address;
- mem->mapped_address =
+ mem->mapped_address =
mem->regions[regionidx].address_offset;
}
}
@@ -602,7 +602,7 @@ get_vring_base(struct vhost_device_ctx ctx, uint32_t index,
state->num = dev->virtqueue[state->index]->last_used_idx;
if (dev->flags & VIRTIO_DEV_RUNNING) {
- RTE_LOG(INFO, VHOST_CONFIG,
+ RTE_LOG(INFO, VHOST_CONFIG,
"get_vring_base message is for release\n");
notify_ops->destroy_device(dev);
/*
@@ -626,7 +626,7 @@ get_vring_base(struct vhost_device_ctx ctx, uint32_t index,
close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
/* We don't cleanup callfd here as we willn't get CALLFD again */
-
+
dev->virtqueue[VIRTIO_RXQ]->desc = NULL;
dev->virtqueue[VIRTIO_RXQ]->avail = NULL;
dev->virtqueue[VIRTIO_RXQ]->used = NULL;
@@ -650,7 +650,7 @@ virtio_is_ready(struct virtio_net *dev, int index)
/* mq support in future.*/
vq1 = dev->virtqueue[index];
vq2 = dev->virtqueue[index ^ 1];
- if (vq1 && vq2 && vq1->desc && vq2->desc &&
+ if (vq1 && vq2 && vq1->desc && vq2->desc &&
(vq1->kickfd > 0) && (vq1->callfd > 0) &&
(vq2->kickfd > 0) && (vq2->callfd > 0)) {
LOG_DEBUG(VHOST_CONFIG, "virtio is ready for processing.\n");
--
1.9.1
^ permalink raw reply [flat|nested] 6+ messages in thread
* [dpdk-dev] [RFC PATCH 1/2] lib/librte_vhost: change macro name of include guard.
2014-11-15 1:14 [dpdk-dev] [PATCH RFC] lib/librte_vhost: vhost-user Huawei Xie
2014-11-17 6:04 ` Tetsuya Mukawa
2014-11-17 6:06 ` [dpdk-dev] [RFC PATCH] lib/librte_vhost: cleanup white spaces, tabs and indents Tetsuya Mukawa
@ 2014-11-17 6:07 ` Tetsuya Mukawa
2014-11-17 6:07 ` [dpdk-dev] [RFC PATCH 2/2] lib/librte_vhost: Add device abstraction layer Tetsuya Mukawa
2 siblings, 1 reply; 6+ messages in thread
From: Tetsuya Mukawa @ 2014-11-17 6:07 UTC (permalink / raw)
To: dev
This patch changes include macro name like following.
- "_VIRTIO_NET_H_" > "_RTE_VIRTIO_NET_H_"
---
lib/librte_vhost/rte_virtio_net.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 7d7d001..a09533d 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -31,8 +31,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef _VIRTIO_NET_H_
-#define _VIRTIO_NET_H_
+#ifndef _RTE_VIRTIO_NET_H_
+#define _RTE_VIRTIO_NET_H_
#include <stdint.h>
#include <linux/virtio_ring.h>
@@ -189,4 +189,4 @@ uint32_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
uint32_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count);
-#endif /* _VIRTIO_NET_H_ */
+#endif /* _RTE_VIRTIO_NET_H_ */
--
1.9.1
^ permalink raw reply [flat|nested] 6+ messages in thread
* [dpdk-dev] [RFC PATCH 2/2] lib/librte_vhost: Add device abstraction layer
2014-11-17 6:07 ` [dpdk-dev] [RFC PATCH 1/2] lib/librte_vhost: change macro name of include guard Tetsuya Mukawa
@ 2014-11-17 6:07 ` Tetsuya Mukawa
0 siblings, 0 replies; 6+ messages in thread
From: Tetsuya Mukawa @ 2014-11-17 6:07 UTC (permalink / raw)
To: dev
---
lib/librte_vhost/Makefile | 6 +-
lib/librte_vhost/rte_virtio_net.h | 22 ++++-
lib/librte_vhost/vhost-cuse/vhost-net-cdev.c | 6 +-
lib/librte_vhost/vhost-cuse/vhost-net-cdev.h | 40 +++++++++
lib/librte_vhost/vhost-cuse/virtio-net-cdev.c | 1 +
lib/librte_vhost/vhost-net.c | 101 +++++++++++++++++++++++
lib/librte_vhost/vhost-net.h | 114 ++++++++++++++++++++++++++
lib/librte_vhost/vhost-user/vhost-net-user.c | 6 +-
lib/librte_vhost/vhost-user/vhost-net-user.h | 3 +
lib/librte_vhost/vhost-user/virtio-net-user.c | 1 +
10 files changed, 290 insertions(+), 10 deletions(-)
create mode 100644 lib/librte_vhost/vhost-cuse/vhost-net-cdev.h
create mode 100644 lib/librte_vhost/vhost-net.c
create mode 100644 lib/librte_vhost/vhost-net.h
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index cb4e172..4363a14 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -37,11 +37,11 @@ LIB = librte_vhost.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I. -I vhost-user -I vhost-cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
LDFLAGS += -lfuse
# all source are stored in SRCS-y
-#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-cuse/vhost-net-cdev.c vhost-cuse/virtio-net-cdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-cuse/vhost-net-cdev.c vhost-cuse/virtio-net-cdev.c
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-user/fd_man.c vhost-user/vhost-net-user.c vhost-user/virtio-net-user.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost-user/fd_man.c vhost-user/vhost-net-user.c vhost-user/virtio-net-user.c
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += virtio-net.c vhost_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += virtio-net.c vhost_rxtx.c vhost-net.c
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index a09533d..116c7e9 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -140,6 +140,23 @@ gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
}
/**
+ * Enum for vhost driver types.
+ */
+enum rte_vhost_driver_t {
+ VHOST_DRV_CUSE, /* vhost-cuse driver */
+ VHOST_DRV_USER, /* vhost-user driver */
+ VHOST_DRV_NUM /* the number of vhost driver types */
+};
+
+/**
+ * Structure contains information relating vhost driver.
+ */
+struct rte_vhost_driver {
+ enum rte_vhost_driver_t type; /**< driver type. */
+ const char *dev_name; /**< accessing device name. */
+};
+
+/**
* Disable features in feature_mask. Returns 0 on success.
*/
int rte_vhost_feature_disable(uint64_t feature_mask);
@@ -155,12 +172,13 @@ uint64_t rte_vhost_feature_get(void);
int rte_vhost_enable_guest_notification(struct virtio_net *dev, uint16_t queue_id, int enable);
/* Register vhost driver. dev_name could be different for multiple instance support. */
-int rte_vhost_driver_register(const char *dev_name);
+struct rte_vhost_driver *rte_vhost_driver_register(
+ const char *dev_name, enum rte_vhost_driver_t type);
/* Register callbacks. */
int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
-int rte_vhost_driver_session_start(void);
+int rte_vhost_driver_session_start(struct rte_vhost_driver *drv);
/**
* This function adds buffers to the virtio devices RX virtqueue. Buffers can
diff --git a/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
index 688ec00..6ea54ee 100644
--- a/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
@@ -47,6 +47,7 @@
#include "virtio-net-cdev.h"
#include "vhost-net.h"
+#include "vhost-net-cdev.h"
#include "eventfd_link/eventfd_link.h"
#define FUSE_OPT_DUMMY "\0\0"
@@ -373,8 +374,9 @@ static const struct cuse_lowlevel_ops vhost_net_ops = {
* vhost_net_device_ops are also passed when the device is registered in app.
*/
int
-rte_vhost_driver_register(const char *dev_name)
+vhost_cuse_driver_register(struct rte_vhost_driver *drv)
{
+ const char *dev_name = drv->dev_name;
struct cuse_info cuse_info;
char device_name[PATH_MAX] = "";
char char_device_name[PATH_MAX] = "";
@@ -428,7 +430,7 @@ rte_vhost_driver_register(const char *dev_name)
* release and ioctl calls.
*/
int
-rte_vhost_driver_session_start(void)
+vhost_cuse_driver_session_start(void)
{
fuse_session_loop(session);
diff --git a/lib/librte_vhost/vhost-cuse/vhost-net-cdev.h b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.h
new file mode 100644
index 0000000..cb094ee
--- /dev/null
+++ b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.h
@@ -0,0 +1,40 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 IGEL Co.,Ltd. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IGEL nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_CDEV_H
+#define _VHOST_NET_CDEV_H
+
+int vhost_cuse_driver_register(struct rte_vhost_driver *drv);
+int vhost_cuse_driver_session_start(void);
+
+#endif
diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
index 7381140..42a6b24 100644
--- a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
@@ -46,6 +46,7 @@
#include <errno.h>
#include <rte_log.h>
+#include <rte_virtio_net.h>
#include "vhost-net.h"
#include "virtio-net-cdev.h"
diff --git a/lib/librte_vhost/vhost-net.c b/lib/librte_vhost/vhost-net.c
new file mode 100644
index 0000000..7a4537d
--- /dev/null
+++ b/lib/librte_vhost/vhost-net.c
@@ -0,0 +1,101 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 IGEL Co.,Ltd. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IGEL nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <rte_malloc.h>
+#include <rte_virtio_net.h>
+
+#include "vhost-cuse/vhost-net-cdev.h"
+#include "vhost-user/vhost-net-user.h"
+
+/**
+ * This function abstracts cuse and vhost-user driver registration.
+ */
+struct rte_vhost_driver *
+rte_vhost_driver_register(const char *dev_name, enum rte_vhost_driver_t type)
+{
+ int ret;
+ struct rte_vhost_driver *drv;
+
+ drv = rte_zmalloc(dev_name, sizeof(struct rte_vhost_driver),
+ CACHE_LINE_SIZE);
+ if (drv == NULL)
+ return NULL;
+
+ drv->dev_name = dev_name;
+ drv->type = type;
+
+ switch (type) {
+ case VHOST_DRV_CUSE:
+ ret = vhost_cuse_driver_register(drv);
+ if (ret != 0)
+ goto err;
+ break;
+ case VHOST_DRV_USER:
+ ret = vhost_user_driver_register(drv);
+ if (ret != 0)
+ goto err;
+ break;
+ default:
+ break;
+ }
+
+ return drv;
+err:
+ free(drv);
+ return NULL;
+}
+
+/**
+ * The session is launched allowing the application to
+ * receive open, release and ioctl calls.
+ */
+int
+rte_vhost_driver_session_start(struct rte_vhost_driver *drv)
+{
+ if (drv == NULL)
+ return -ENODEV;
+
+ switch (drv->type) {
+ case VHOST_DRV_CUSE:
+ vhost_cuse_driver_session_start();
+ break;
+ case VHOST_DRV_USER:
+ vhost_user_driver_session_start();
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
new file mode 100644
index 0000000..881a45f
--- /dev/null
+++ b/lib/librte_vhost/vhost-net.h
@@ -0,0 +1,114 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_H_
+#define _VHOST_NET_H_
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <linux/vhost.h>
+
+#include <rte_log.h>
+
+/* Macros for printing using RTE_LOG */
+#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
+#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
+
+#ifdef RTE_LIBRTE_VHOST_DEBUG
+#define VHOST_MAX_PRINT_BUFF 6072
+#define LOG_LEVEL RTE_LOG_DEBUG
+#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
+#define VHOST_PRINT_PACKET(device, addr, size, header) do { \
+ char *pkt_addr = (char *)(addr); \
+ unsigned int index; \
+ char packet[VHOST_MAX_PRINT_BUFF]; \
+ \
+ if ((header)) \
+ snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \
+ else \
+ snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \
+ for (index = 0; index < (size); index++) { \
+ snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
+ "%02hhx ", pkt_addr[index]); \
+ } \
+ snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
+ \
+ LOG_DEBUG(VHOST_DATA, "%s", packet); \
+} while (0)
+#else
+#define LOG_LEVEL RTE_LOG_INFO
+#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
+#define VHOST_PRINT_PACKET(device, addr, size, header) do {} while (0)
+#endif
+
+
+/*
+ * Structure used to identify device context.
+ */
+struct vhost_device_ctx {
+ pid_t pid; /* PID of process calling the IOCTL. */
+ uint64_t fh; /* Populated with fi->fh to track the device index. */
+};
+
+/*
+ * Structure contains function pointers to be defined in virtio-net.c. These
+ * functions are called in CUSE context and are used to configure devices.
+ */
+struct vhost_net_device_ops {
+ int (*new_device)(struct vhost_device_ctx);
+ void (*destroy_device)(struct vhost_device_ctx);
+
+ int (*get_features)(struct vhost_device_ctx, uint64_t *);
+ int (*set_features)(struct vhost_device_ctx, uint64_t *);
+
+ int (*set_mem_table)(struct vhost_device_ctx,
+ const struct virtio_memory_regions *, uint32_t);
+
+ int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state *);
+ int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr *);
+ int (*set_vring_base)(struct vhost_device_ctx, struct vhost_vring_state *);
+ int (*get_vring_base)(struct vhost_device_ctx, uint32_t, struct vhost_vring_state *);
+
+ int (*set_vring_kick)(struct vhost_device_ctx, struct vhost_vring_file *);
+ int (*set_vring_call)(struct vhost_device_ctx, struct vhost_vring_file *);
+
+ int (*set_backend)(struct vhost_device_ctx, struct vhost_vring_file *);
+
+ int (*set_owner)(struct vhost_device_ctx);
+ int (*reset_owner)(struct vhost_device_ctx);
+};
+
+
+struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
+#endif /* _VHOST_NET_H_ */
diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.c b/lib/librte_vhost/vhost-user/vhost-net-user.c
index 0b100ba..837f840 100644
--- a/lib/librte_vhost/vhost-user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost-user/vhost-net-user.c
@@ -371,9 +371,9 @@ vserver_message_handler(int connfd, uint64_t dat)
* Creates and initialise the vhost server.
*/
int
-rte_vhost_driver_register(const char *path)
+vhost_user_driver_register(struct rte_vhost_driver *drv)
{
-
+ const char *path = drv->dev_name;
struct vhost_server *vserver;
if (g_vhost_server != NULL)
@@ -408,7 +408,7 @@ rte_vhost_driver_register(const char *path)
int
-rte_vhost_driver_session_start(void)
+vhost_user_driver_session_start(void)
{
fdset_event_dispatch(&g_vhost_server->fdset);
return 0;
diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.h b/lib/librte_vhost/vhost-user/vhost-net-user.h
index c9df9fa..d90c147 100644
--- a/lib/librte_vhost/vhost-user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost-user/vhost-net-user.h
@@ -71,4 +71,7 @@ typedef struct VhostUserMsg {
#define VHOST_USER_VERSION (0x1)
/*****************************************************************************/
+int vhost_user_driver_register(struct rte_vhost_driver *drv);
+int vhost_user_driver_session_start(void);
+
#endif
diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.c b/lib/librte_vhost/vhost-user/virtio-net-user.c
index 4103977..f839219 100644
--- a/lib/librte_vhost/vhost-user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost-user/virtio-net-user.c
@@ -38,6 +38,7 @@
#include <sys/mman.h>
#include <rte_log.h>
+#include <rte_virtio_net.h>
#include "virtio-net-user.h"
#include "vhost-net-user.h"
--
1.9.1
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [dpdk-dev] [PATCH RFC] lib/librte_vhost: vhost-user
2014-11-17 6:04 ` Tetsuya Mukawa
@ 2014-11-17 6:11 ` Tetsuya Mukawa
0 siblings, 0 replies; 6+ messages in thread
From: Tetsuya Mukawa @ 2014-11-17 6:11 UTC (permalink / raw)
To: Huawei Xie, dev
Hi Xie,
(2014/11/17 15:04), Tetsuya Mukawa wrote:
> Hi Xie,
>
>
> (2014/11/15 10:14), Huawei Xie wrote:
>> implement socket server
>> fd event dispatch mechanism
>> vhost sock message handling
>> memory map for each region
>> VHOST_USER_SET_VRING_KICK_FD as the indicator that vring is available
>> VHOST_USER_GET_VRING_BASE as the message that vring should be released
>>
>> The message flow between vhost-user and vhost-cuse is kindof different,
>> which makes virtio-net common message handler layer difficult and complicated to handle
>> both cases in new_device/destroy_device/memory map/resource cleanup.
>>
>> Will only leave the most common messag handling in virtio-net, and move the
>> control logic to cuse/fuse layer.
>>
>>
>> Signed-off-by: Huawei Xie <huawei.xie@intel.com>
> Great patch!
> I guess we can start from this patch to implement vhost-user and
> abstraction layer.
>
> I've checked patch.
>
> 1. White space, tab and indent patch.
> I will send patch that clears white space, tab and indent. Could you
> please check it?
> It might be difficult to see the difference, if your editor doesn't show
> a space or tab.
>
> 2. Some files are based on old codes.
> At least, following patch is not included.
> - vhost: fix build without unused result
> Also vhost_rxtx.c isn't probably based on latest code.
>
> 3. Device abstraction layer code
> I will send the device abstraction layer code after this email.
> Anyway, I guess we need to decide whether, or not we still keep
> vhost-cuse code
Additionally, the above patches are based on your RFC patch.
Tetsuya
>
> 4. Multiple devices operation.
> For example, when thread1 opens vhost-user device1 and thread2 opens
> vhost-user device2,
> each thread may want to register own callbacks.
> Current implementation may not allow this.
> I guess we need to eliminate global variables in librte_vhost as much as
> possible.
>
> Thanks,
> Tetsuya
>
>> ---
>> lib/librte_vhost/Makefile | 14 +-
>> lib/librte_vhost/eventfd_link/eventfd_link.c | 27 +-
>> lib/librte_vhost/eventfd_link/eventfd_link.h | 48 +-
>> lib/librte_vhost/libvirt/qemu-wrap.py | 367 ---------------
>> lib/librte_vhost/rte_virtio_net.h | 106 ++---
>> lib/librte_vhost/vhost-cuse/vhost-net-cdev.c | 436 ++++++++++++++++++
>> lib/librte_vhost/vhost-cuse/virtio-net-cdev.c | 314 +++++++++++++
>> lib/librte_vhost/vhost-cuse/virtio-net-cdev.h | 43 ++
>> lib/librte_vhost/vhost-net-cdev.c | 389 ----------------
>> lib/librte_vhost/vhost-net-cdev.h | 113 -----
>> lib/librte_vhost/vhost-user/fd_man.c | 158 +++++++
>> lib/librte_vhost/vhost-user/fd_man.h | 31 ++
>> lib/librte_vhost/vhost-user/vhost-net-user.c | 417 +++++++++++++++++
>> lib/librte_vhost/vhost-user/vhost-net-user.h | 74 +++
>> lib/librte_vhost/vhost-user/virtio-net-user.c | 208 +++++++++
>> lib/librte_vhost/vhost-user/virtio-net-user.h | 11 +
>> lib/librte_vhost/vhost_rxtx.c | 625 ++++----------------------
>> lib/librte_vhost/virtio-net.c | 450 ++++---------------
>> 18 files changed, 1939 insertions(+), 1892 deletions(-)
>> delete mode 100755 lib/librte_vhost/libvirt/qemu-wrap.py
>> create mode 100644 lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
>> create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
>> create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
>> delete mode 100644 lib/librte_vhost/vhost-net-cdev.c
>> delete mode 100644 lib/librte_vhost/vhost-net-cdev.h
>> create mode 100644 lib/librte_vhost/vhost-user/fd_man.c
>> create mode 100644 lib/librte_vhost/vhost-user/fd_man.h
>> create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.c
>> create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.h
>> create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.c
>> create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.h
>>
>> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
>> index c008d64..cb4e172 100644
>> --- a/lib/librte_vhost/Makefile
>> +++ b/lib/librte_vhost/Makefile
>> @@ -34,17 +34,19 @@ include $(RTE_SDK)/mk/rte.vars.mk
>> # library name
>> LIB = librte_vhost.a
>>
>> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
>> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I. -I vhost-user -I vhost-cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
>> LDFLAGS += -lfuse
>> # all source are stored in SRCS-y
>> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c
>> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-cuse/vhost-net-cdev.c vhost-cuse/virtio-net-cdev.c
>> +
>> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-user/fd_man.c vhost-user/vhost-net-user.c vhost-user/virtio-net-user.c
>> +
>> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += virtio-net.c vhost_rxtx.c
>>
>> # install includes
>> SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
>>
>> -# dependencies
>> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal
>> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_ether
>> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mbuf
>> +# this lib needs eal
>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf
>>
>> include $(RTE_SDK)/mk/rte.lib.mk
>> diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.c b/lib/librte_vhost/eventfd_link/eventfd_link.c
>> index 7755dd6..4c9b628 100644
>> --- a/lib/librte_vhost/eventfd_link/eventfd_link.c
>> +++ b/lib/librte_vhost/eventfd_link/eventfd_link.c
>> @@ -13,8 +13,7 @@
>> * General Public License for more details.
>> *
>> * You should have received a copy of the GNU General Public License
>> - * along with this program; if not, write to the Free Software
>> - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
>> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
>> * The full GNU General Public License is included in this distribution
>> * in the file called LICENSE.GPL.
>> *
>> @@ -78,8 +77,7 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
>>
>> switch (ioctl) {
>> case EVENTFD_COPY:
>> - if (copy_from_user(&eventfd_copy, argp,
>> - sizeof(struct eventfd_copy)))
>> + if (copy_from_user(&eventfd_copy, argp, sizeof(struct eventfd_copy)))
>> return -EFAULT;
>>
>> /*
>> @@ -88,28 +86,28 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
>> task_target =
>> pid_task(find_vpid(eventfd_copy.target_pid), PIDTYPE_PID);
>> if (task_target == NULL) {
>> - pr_debug("Failed to get mem ctx for target pid\n");
>> + printk(KERN_DEBUG "Failed to get mem ctx for target pid\n");
>> return -EFAULT;
>> }
>>
>> files = get_files_struct(current);
>> if (files == NULL) {
>> - pr_debug("Failed to get files struct\n");
>> + printk(KERN_DEBUG "Failed to get files struct\n");
>> return -EFAULT;
>> }
>>
>> rcu_read_lock();
>> file = fcheck_files(files, eventfd_copy.source_fd);
>> if (file) {
>> - if (file->f_mode & FMODE_PATH ||
>> - !atomic_long_inc_not_zero(&file->f_count))
>> + if (file->f_mode & FMODE_PATH
>> + || !atomic_long_inc_not_zero(&file->f_count))
>> file = NULL;
>> }
>> rcu_read_unlock();
>> put_files_struct(files);
>>
>> if (file == NULL) {
>> - pr_debug("Failed to get file from source pid\n");
>> + printk(KERN_DEBUG "Failed to get file from source pid\n");
>> return 0;
>> }
>>
>> @@ -128,25 +126,26 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
>>
>> files = get_files_struct(task_target);
>> if (files == NULL) {
>> - pr_debug("Failed to get files struct\n");
>> + printk(KERN_DEBUG "Failed to get files struct\n");
>> return -EFAULT;
>> }
>>
>> rcu_read_lock();
>> file = fcheck_files(files, eventfd_copy.target_fd);
>> if (file) {
>> - if (file->f_mode & FMODE_PATH ||
>> - !atomic_long_inc_not_zero(&file->f_count))
>> - file = NULL;
>> + if (file->f_mode & FMODE_PATH
>> + || !atomic_long_inc_not_zero(&file->f_count))
>> + file = NULL;
>> }
>> rcu_read_unlock();
>> put_files_struct(files);
>>
>> if (file == NULL) {
>> - pr_debug("Failed to get file from target pid\n");
>> + printk(KERN_DEBUG "Failed to get file from target pid\n");
>> return 0;
>> }
>>
>> +
>> /*
>> * Install the file struct from the target process into the
>> * file desciptor of the source process,
>> diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.h b/lib/librte_vhost/eventfd_link/eventfd_link.h
>> index ea619ec..38052e2 100644
>> --- a/lib/librte_vhost/eventfd_link/eventfd_link.h
>> +++ b/lib/librte_vhost/eventfd_link/eventfd_link.h
>> @@ -1,7 +1,4 @@
>> /*-
>> - * This file is provided under a dual BSD/GPLv2 license. When using or
>> - * redistributing this file, you may do so under either license.
>> - *
>> * GPL LICENSE SUMMARY
>> *
>> * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> @@ -16,61 +13,28 @@
>> * General Public License for more details.
>> *
>> * You should have received a copy of the GNU General Public License
>> - * along with this program; if not, write to the Free Software
>> - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
>> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
>> * The full GNU General Public License is included in this distribution
>> * in the file called LICENSE.GPL.
>> *
>> * Contact Information:
>> * Intel Corporation
>> - *
>> - * BSD LICENSE
>> - *
>> - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> - * All rights reserved.
>> - *
>> - * Redistribution and use in source and binary forms, with or without
>> - * modification, are permitted provided that the following conditions
>> - * are met:
>> - *
>> - * Redistributions of source code must retain the above copyright
>> - * notice, this list of conditions and the following disclaimer.
>> - * Redistributions in binary form must reproduce the above copyright
>> - * notice, this list of conditions and the following disclaimer in
>> - * the documentation and/or other materials provided with the
>> - * distribution.
>> - * Neither the name of Intel Corporation nor the names of its
>> - * contributors may be used to endorse or promote products derived
>> - * from this software without specific prior written permission.
>> - *
>> - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> - *
>> */
>>
>> #ifndef _EVENTFD_LINK_H_
>> #define _EVENTFD_LINK_H_
>>
>> /*
>> - * ioctl to copy an fd entry in calling process to an fd in a target process
>> + * ioctl to copy an fd entry in calling process to an fd in a target process
>> */
>> #define EVENTFD_COPY 1
>>
>> /*
>> - * arguements for the EVENTFD_COPY ioctl
>> + * arguements for the EVENTFD_COPY ioctl
>> */
>> struct eventfd_copy {
>> - unsigned target_fd; /* fd in the target pid */
>> - unsigned source_fd; /* fd in the calling pid */
>> - pid_t target_pid; /* pid of the target pid */
>> + unsigned target_fd; /**< fd in the target pid */
>> + unsigned source_fd; /**< fd in the calling pid */
>> + pid_t target_pid; /**< pid of the target pid */
>> };
>> #endif /* _EVENTFD_LINK_H_ */
>> diff --git a/lib/librte_vhost/libvirt/qemu-wrap.py b/lib/librte_vhost/libvirt/qemu-wrap.py
>> deleted file mode 100755
>> index e2d68a0..0000000
>> --- a/lib/librte_vhost/libvirt/qemu-wrap.py
>> +++ /dev/null
>> @@ -1,367 +0,0 @@
>> -#!/usr/bin/python
>> -#/*
>> -# * BSD LICENSE
>> -# *
>> -# * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> -# * All rights reserved.
>> -# *
>> -# * Redistribution and use in source and binary forms, with or without
>> -# * modification, are permitted provided that the following conditions
>> -# * are met:
>> -# *
>> -# * * Redistributions of source code must retain the above copyright
>> -# * notice, this list of conditions and the following disclaimer.
>> -# * * Redistributions in binary form must reproduce the above copyright
>> -# * notice, this list of conditions and the following disclaimer in
>> -# * the documentation and/or other materials provided with the
>> -# * distribution.
>> -# * * Neither the name of Intel Corporation nor the names of its
>> -# * contributors may be used to endorse or promote products derived
>> -# * from this software without specific prior written permission.
>> -# *
>> -# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> -# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> -# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> -# * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> -# * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> -# * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> -# * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> -# * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> -# * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> -# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> -# * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> -# */
>> -
>> -#####################################################################
>> -# This script is designed to modify the call to the QEMU emulator
>> -# to support userspace vhost when starting a guest machine through
>> -# libvirt with vhost enabled. The steps to enable this are as follows
>> -# and should be run as root:
>> -#
>> -# 1. Place this script in a libvirtd's binary search PATH ($PATH)
>> -# A good location would be in the same directory that the QEMU
>> -# binary is located
>> -#
>> -# 2. Ensure that the script has the same owner/group and file
>> -# permissions as the QEMU binary
>> -#
>> -# 3. Update the VM xml file using "virsh edit VM.xml"
>> -#
>> -# 3.a) Set the VM to use the launch script
>> -#
>> -# Set the emulator path contained in the
>> -# <emulator><emulator/> tags
>> -#
>> -# e.g replace <emulator>/usr/bin/qemu-kvm<emulator/>
>> -# with <emulator>/usr/bin/qemu-wrap.py<emulator/>
>> -#
>> -# 3.b) Set the VM's device's to use vhost-net offload
>> -#
>> -# <interface type="network">
>> -# <model type="virtio"/>
>> -# <driver name="vhost"/>
>> -# <interface/>
>> -#
>> -# 4. Enable libvirt to access our userpace device file by adding it to
>> -# controllers cgroup for libvirtd using the following steps
>> -#
>> -# 4.a) In /etc/libvirt/qemu.conf add/edit the following lines:
>> -# 1) cgroup_controllers = [ ... "devices", ... ]
>> -# 2) clear_emulator_capabilities = 0
>> -# 3) user = "root"
>> -# 4) group = "root"
>> -# 5) cgroup_device_acl = [
>> -# "/dev/null", "/dev/full", "/dev/zero",
>> -# "/dev/random", "/dev/urandom",
>> -# "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
>> -# "/dev/rtc", "/dev/hpet", "/dev/net/tun",
>> -# "/dev/<devbase-name>-<index>",
>> -# ]
>> -#
>> -# 4.b) Disable SELinux or set to permissive mode
>> -#
>> -# 4.c) Mount cgroup device controller
>> -# "mkdir /dev/cgroup"
>> -# "mount -t cgroup none /dev/cgroup -o devices"
>> -#
>> -# 4.d) Set hugetlbfs_mount variable - ( Optional )
>> -# VMs using userspace vhost must use hugepage backed
>> -# memory. This can be enabled in the libvirt XML
>> -# config by adding a memory backing section to the
>> -# XML config e.g.
>> -# <memoryBacking>
>> -# <hugepages/>
>> -# </memoryBacking>
>> -# This memory backing section should be added after the
>> -# <memory> and <currentMemory> sections. This will add
>> -# flags "-mem-prealloc -mem-path <path>" to the QEMU
>> -# command line. The hugetlbfs_mount variable can be used
>> -# to override the default <path> passed through by libvirt.
>> -#
>> -# if "-mem-prealloc" or "-mem-path <path>" are not passed
>> -# through and a vhost device is detected then these options will
>> -# be automatically added by this script. This script will detect
>> -# the system hugetlbfs mount point to be used for <path>. The
>> -# default <path> for this script can be overidden by the
>> -# hugetlbfs_dir variable in the configuration section of this script.
>> -#
>> -#
>> -# 4.e) Restart the libvirtd system process
>> -# e.g. on Fedora "systemctl restart libvirtd.service"
>> -#
>> -#
>> -# 4.f) Edit the Configuration Parameters section of this script
>> -# to point to the correct emulator location and set any
>> -# addition options
>> -#
>> -# The script modifies the libvirtd Qemu call by modifying/adding
>> -# options based on the configuration parameters below.
>> -# NOTE:
>> -# emul_path and us_vhost_path must be set
>> -# All other parameters are optional
>> -#####################################################################
>> -
>> -
>> -#############################################
>> -# Configuration Parameters
>> -#############################################
>> -#Path to QEMU binary
>> -emul_path = "/usr/local/bin/qemu-system-x86_64"
>> -
>> -#Path to userspace vhost device file
>> -# This filename should match the --dev-basename --dev-index parameters of
>> -# the command used to launch the userspace vhost sample application e.g.
>> -# if the sample app lauch command is:
>> -# ./build/vhost-switch ..... --dev-basename usvhost --dev-index 1
>> -# then this variable should be set to:
>> -# us_vhost_path = "/dev/usvhost-1"
>> -us_vhost_path = "/dev/usvhost-1"
>> -
>> -#List of additional user defined emulation options. These options will
>> -#be added to all Qemu calls
>> -emul_opts_user = []
>> -
>> -#List of additional user defined emulation options for vhost only.
>> -#These options will only be added to vhost enabled guests
>> -emul_opts_user_vhost = []
>> -
>> -#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs
>> -# Set this variable to one to enable this option for all VMs
>> -use_huge_all = 0
>> -
>> -#Instead of autodetecting, override the hugetlbfs directory by setting
>> -#this variable
>> -hugetlbfs_dir = ""
>> -
>> -#############################################
>> -
>> -
>> -#############################################
>> -# ****** Do Not Modify Below this Line ******
>> -#############################################
>> -
>> -import sys, os, subprocess
>> -
>> -
>> -#List of open userspace vhost file descriptors
>> -fd_list = []
>> -
>> -#additional virtio device flags when using userspace vhost
>> -vhost_flags = [ "csum=off",
>> - "gso=off",
>> - "guest_tso4=off",
>> - "guest_tso6=off",
>> - "guest_ecn=off"
>> - ]
>> -
>> -
>> -#############################################
>> -# Find the system hugefile mount point.
>> -# Note:
>> -# if multiple hugetlbfs mount points exist
>> -# then the first one found will be used
>> -#############################################
>> -def find_huge_mount():
>> -
>> - if (len(hugetlbfs_dir)):
>> - return hugetlbfs_dir
>> -
>> - huge_mount = ""
>> -
>> - if (os.access("/proc/mounts", os.F_OK)):
>> - f = open("/proc/mounts", "r")
>> - line = f.readline()
>> - while line:
>> - line_split = line.split(" ")
>> - if line_split[2] == 'hugetlbfs':
>> - huge_mount = line_split[1]
>> - break
>> - line = f.readline()
>> - else:
>> - print "/proc/mounts not found"
>> - exit (1)
>> -
>> - f.close
>> - if len(huge_mount) == 0:
>> - print "Failed to find hugetlbfs mount point"
>> - exit (1)
>> -
>> - return huge_mount
>> -
>> -
>> -#############################################
>> -# Get a userspace Vhost file descriptor
>> -#############################################
>> -def get_vhost_fd():
>> -
>> - if (os.access(us_vhost_path, os.F_OK)):
>> - fd = os.open( us_vhost_path, os.O_RDWR)
>> - else:
>> - print ("US-Vhost file %s not found" %us_vhost_path)
>> - exit (1)
>> -
>> - return fd
>> -
>> -
>> -#############################################
>> -# Check for vhostfd. if found then replace
>> -# with our own vhost fd and append any vhost
>> -# flags onto the end
>> -#############################################
>> -def modify_netdev_arg(arg):
>> -
>> - global fd_list
>> - vhost_in_use = 0
>> - s = ''
>> - new_opts = []
>> - netdev_opts = arg.split(",")
>> -
>> - for opt in netdev_opts:
>> - #check if vhost is used
>> - if "vhost" == opt[:5]:
>> - vhost_in_use = 1
>> - else:
>> - new_opts.append(opt)
>> -
>> - #if using vhost append vhost options
>> - if vhost_in_use == 1:
>> - #append vhost on option
>> - new_opts.append('vhost=on')
>> - #append vhostfd ption
>> - new_fd = get_vhost_fd()
>> - new_opts.append('vhostfd=' + str(new_fd))
>> - fd_list.append(new_fd)
>> -
>> - #concatenate all options
>> - for opt in new_opts:
>> - if len(s) > 0:
>> - s+=','
>> -
>> - s+=opt
>> -
>> - return s
>> -
>> -
>> -#############################################
>> -# Main
>> -#############################################
>> -def main():
>> -
>> - global fd_list
>> - global vhost_in_use
>> - new_args = []
>> - num_cmd_args = len(sys.argv)
>> - emul_call = ''
>> - mem_prealloc_set = 0
>> - mem_path_set = 0
>> - num = 0;
>> -
>> - #parse the parameters
>> - while (num < num_cmd_args):
>> - arg = sys.argv[num]
>> -
>> - #Check netdev +1 parameter for vhostfd
>> - if arg == '-netdev':
>> - num_vhost_devs = len(fd_list)
>> - new_args.append(arg)
>> -
>> - num+=1
>> - arg = sys.argv[num]
>> - mod_arg = modify_netdev_arg(arg)
>> - new_args.append(mod_arg)
>> -
>> - #append vhost flags if this is a vhost device
>> - # and -device is the next arg
>> - # i.e -device -opt1,-opt2,...,-opt3,%vhost
>> - if (num_vhost_devs < len(fd_list)):
>> - num+=1
>> - arg = sys.argv[num]
>> - if arg == '-device':
>> - new_args.append(arg)
>> - num+=1
>> - new_arg = sys.argv[num]
>> - for flag in vhost_flags:
>> - new_arg = ''.join([new_arg,',',flag])
>> - new_args.append(new_arg)
>> - else:
>> - new_args.append(arg)
>> - elif arg == '-mem-prealloc':
>> - mem_prealloc_set = 1
>> - new_args.append(arg)
>> - elif arg == '-mem-path':
>> - mem_path_set = 1
>> - new_args.append(arg)
>> -
>> - else:
>> - new_args.append(arg)
>> -
>> - num+=1
>> -
>> - #Set Qemu binary location
>> - emul_call+=emul_path
>> - emul_call+=" "
>> -
>> - #Add prealloc mem options if using vhost and not already added
>> - if ((len(fd_list) > 0) and (mem_prealloc_set == 0)):
>> - emul_call += "-mem-prealloc "
>> -
>> - #Add mempath mem options if using vhost and not already added
>> - if ((len(fd_list) > 0) and (mem_path_set == 0)):
>> - #Detect and add hugetlbfs mount point
>> - mp = find_huge_mount()
>> - mp = "".join(["-mem-path ", mp])
>> - emul_call += mp
>> - emul_call += " "
>> -
>> -
>> - #add user options
>> - for opt in emul_opts_user:
>> - emul_call += opt
>> - emul_call += " "
>> -
>> - #Add add user vhost only options
>> - if len(fd_list) > 0:
>> - for opt in emul_opts_user_vhost:
>> - emul_call += opt
>> - emul_call += " "
>> -
>> - #Add updated libvirt options
>> - iter_args = iter(new_args)
>> - #skip 1st arg i.e. call to this script
>> - next(iter_args)
>> - for arg in iter_args:
>> - emul_call+=str(arg)
>> - emul_call+= " "
>> -
>> - #Call QEMU
>> - subprocess.call(emul_call, shell=True)
>> -
>> -
>> - #Close usvhost files
>> - for fd in fd_list:
>> - os.close(fd)
>> -
>> -
>> -if __name__ == "__main__":
>> - main()
>> -
>> diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
>> index 00b1328..7a05dab 100644
>> --- a/lib/librte_vhost/rte_virtio_net.h
>> +++ b/lib/librte_vhost/rte_virtio_net.h
>> @@ -34,11 +34,6 @@
>> #ifndef _VIRTIO_NET_H_
>> #define _VIRTIO_NET_H_
>>
>> -/**
>> - * @file
>> - * Interface to vhost net
>> - */
>> -
>> #include <stdint.h>
>> #include <linux/virtio_ring.h>
>> #include <linux/virtio_net.h>
>> @@ -48,66 +43,38 @@
>> #include <rte_mempool.h>
>> #include <rte_mbuf.h>
>>
>> -/* Used to indicate that the device is running on a data core */
>> -#define VIRTIO_DEV_RUNNING 1
>> -
>> -/* Backend value set by guest. */
>> -#define VIRTIO_DEV_STOPPED -1
>> -
>> +#define VIRTIO_DEV_RUNNING 1 /**< Used to indicate that the device is running on a data core. */
>> +#define VIRTIO_DEV_STOPPED -1 /**< Backend value set by guest. */
>>
>> /* Enum for virtqueue management. */
>> enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
>>
>> -#define BUF_VECTOR_MAX 256
>> -
>> -/**
>> - * Structure contains buffer address, length and descriptor index
>> - * from vring to do scatter RX.
>> - */
>> -struct buf_vector {
>> - uint64_t buf_addr;
>> - uint32_t buf_len;
>> - uint32_t desc_idx;
>> -};
>> -
>> /**
>> * Structure contains variables relevant to RX/TX virtqueues.
>> */
>> struct vhost_virtqueue {
>> - struct vring_desc *desc; /**< Virtqueue descriptor ring. */
>> - struct vring_avail *avail; /**< Virtqueue available ring. */
>> - struct vring_used *used; /**< Virtqueue used ring. */
>> - uint32_t size; /**< Size of descriptor ring. */
>> - uint32_t backend; /**< Backend value to determine if device should started/stopped. */
>> - uint16_t vhost_hlen; /**< Vhost header length (varies depending on RX merge buffers. */
>> - volatile uint16_t last_used_idx; /**< Last index used on the available ring */
>> - volatile uint16_t last_used_idx_res; /**< Used for multiple devices reserving buffers. */
>> - eventfd_t callfd; /**< Currently unused as polling mode is enabled. */
>> - eventfd_t kickfd; /**< Used to notify the guest (trigger interrupt). */
>> - struct buf_vector buf_vec[BUF_VECTOR_MAX]; /**< for scatter RX. */
>> -} __rte_cache_aligned;
>> -
>> -/**
>> - * Device structure contains all configuration information relating to the device.
>> - */
>> -struct virtio_net {
>> - struct vhost_virtqueue *virtqueue[VIRTIO_QNUM]; /**< Contains all virtqueue information. */
>> - struct virtio_memory *mem; /**< QEMU memory and memory region information. */
>> - uint64_t features; /**< Negotiated feature set. */
>> - uint64_t device_fh; /**< device identifier. */
>> - uint32_t flags; /**< Device flags. Only used to check if device is running on data core. */
>> - void *priv; /**< private context */
>> + struct vring_desc *desc; /**< descriptor ring. */
>> + struct vring_avail *avail; /**< available ring. */
>> + struct vring_used *used; /**< used ring. */
>> + uint32_t size; /**< Size of descriptor ring. */
>> + uint32_t backend; /**< Backend value to determine if device should be started/stopped. */
>> + uint16_t vhost_hlen; /**< Vhost header length (varies depending on RX merge buffers. */
>> + volatile uint16_t last_used_idx; /**< Last index used on the available ring. */
>> + volatile uint16_t last_used_idx_res; /**< Used for multiple devices reserving buffers. */
>> + eventfd_t callfd; /**< Currently unused as polling mode is enabled. */
>> + eventfd_t kickfd; /**< Used to notify the guest (trigger interrupt). */
>> } __rte_cache_aligned;
>>
>> /**
>> - * Information relating to memory regions including offsets to addresses in QEMUs memory file.
>> + * Information relating to memory regions including offsets to
>> + * addresses in QEMUs memory file.
>> */
>> struct virtio_memory_regions {
>> - uint64_t guest_phys_address; /**< Base guest physical address of region. */
>> - uint64_t guest_phys_address_end; /**< End guest physical address of region. */
>> - uint64_t memory_size; /**< Size of region. */
>> - uint64_t userspace_address; /**< Base userspace address of region. */
>> - uint64_t address_offset; /**< Offset of region for address translation. */
>> + uint64_t guest_phys_address; /**< Base guest physical address of region. */
>> + uint64_t guest_phys_address_end; /**< End guest physical address of region. */
>> + uint64_t memory_size; /**< Size of region. */
>> + uint64_t userspace_address; /**< Base userspace address of region. */
>> + uint64_t address_offset; /**< Offset of region for address translation. */
>> };
>>
>>
>> @@ -115,21 +82,34 @@ struct virtio_memory_regions {
>> * Memory structure includes region and mapping information.
>> */
>> struct virtio_memory {
>> - uint64_t base_address; /**< Base QEMU userspace address of the memory file. */
>> - uint64_t mapped_address; /**< Mapped address of memory file base in our applications memory space. */
>> - uint64_t mapped_size; /**< Total size of memory file. */
>> - uint32_t nregions; /**< Number of memory regions. */
>> + uint64_t base_address; /**< Base QEMU userspace address of the memory file. */
>> + uint64_t mapped_address; /**< Mapped address of memory file base in our applications memory space. */
>> + uint64_t mapped_size; /**< Total size of memory file. */
>> + uint32_t nregions; /**< Number of memory regions. */
>> struct virtio_memory_regions regions[0]; /**< Memory region information. */
>> };
>>
>> /**
>> + * Device structure contains all configuration information relating to the device.
>> + */
>> +struct virtio_net {
>> + struct vhost_virtqueue *virtqueue[VIRTIO_QNUM]; /**< Contains all virtqueue information. */
>> + struct virtio_memory *mem; /**< QEMU memory and memory region information. */
>> + uint64_t features; /**< Negotiated feature set. */
>> + uint64_t device_fh; /**< Device identifier. */
>> + uint32_t flags; /**< Device flags. Only used to check if device is running on data core. */
>> + void *priv;
>> +} __rte_cache_aligned;
>> +
>> +/**
>> * Device operations to add/remove device.
>> */
>> struct virtio_net_device_ops {
>> - int (*new_device)(struct virtio_net *); /**< Add device. */
>> - void (*destroy_device)(volatile struct virtio_net *); /**< Remove device. */
>> + int (*new_device)(struct virtio_net *); /**< Add device. */
>> + void (*destroy_device)(struct virtio_net *); /**< Remove device. */
>> };
>>
>> +
>> static inline uint16_t __attribute__((always_inline))
>> rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
>> {
>> @@ -179,7 +159,7 @@ int rte_vhost_driver_register(const char *dev_name);
>>
>> /* Register callbacks. */
>> int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
>> -/* Start vhost driver session blocking loop. */
>> +
>> int rte_vhost_driver_session_start(void);
>>
>> /**
>> @@ -192,8 +172,8 @@ int rte_vhost_driver_session_start(void);
>> * @return
>> * num of packets enqueued
>> */
>> -uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>> - struct rte_mbuf **pkts, uint16_t count);
>> +uint32_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>> + struct rte_mbuf **pkts, uint32_t count);
>>
>> /**
>> * This function gets guest buffers from the virtio device TX virtqueue,
>> @@ -206,7 +186,7 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>> * @return
>> * num of packets dequeued
>> */
>> -uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>> - struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
>> +uint32_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>> + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count);
>>
>> #endif /* _VIRTIO_NET_H_ */
>> diff --git a/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
>> new file mode 100644
>> index 0000000..4671643
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
>> @@ -0,0 +1,436 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <fuse/cuse_lowlevel.h>
>> +#include <linux/limits.h>
>> +#include <linux/vhost.h>
>> +#include <linux/virtio_net.h>
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include <sys/ioctl.h>
>> +
>> +#include <rte_ethdev.h>
>> +#include <rte_log.h>
>> +#include <rte_string_fns.h>
>> +#include <rte_virtio_net.h>
>> +
>> +#include "virtio-net-cdev.h"
>> +#include "vhost-net.h"
>> +#include "eventfd_link/eventfd_link.h"
>> +
>> +#define FUSE_OPT_DUMMY "\0\0"
>> +#define FUSE_OPT_FORE "-f\0\0"
>> +#define FUSE_OPT_NOMULTI "-s\0\0"
>> +
>> +static const uint32_t default_major = 231;
>> +static const uint32_t default_minor = 1;
>> +static const char cuse_device_name[] = "/dev/cuse";
>> +static const char default_cdev[] = "vhost-net";
>> +static const char eventfd_cdev[] = "/dev/eventfd-link";
>> +
>> +static struct fuse_session *session;
>> +const struct vhost_net_device_ops const *ops;
>> +
>> +/*
>> + * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
>> + * when the device is added to the device linked list.
>> + */
>> +static struct vhost_device_ctx
>> +fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
>> +{
>> + struct vhost_device_ctx ctx;
>> + struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
>> +
>> + ctx.pid = req_ctx->pid;
>> + ctx.fh = fi->fh;
>> +
>> + return ctx;
>> +}
>> +
>> +/*
>> + * When the device is created in QEMU it gets initialised here and
>> + * added to the device linked list.
>> + */
>> +static void
>> +vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
>> +{
>> + struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> + int err = 0;
>> +
>> + err = ops->new_device(ctx);
>> + if (err == -1) {
>> + fuse_reply_err(req, EPERM);
>> + return;
>> + }
>> +
>> + fi->fh = err;
>> +
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "(%"PRIu64") Device configuration started\n", fi->fh);
>> + fuse_reply_open(req, fi);
>> +}
>> +
>> +/*
>> + * When QEMU is shutdown or killed the device gets released.
>> + */
>> +static void
>> +vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
>> +{
>> + int err = 0;
>> + struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> +
>> + ops->destroy_device(ctx);
>> + RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
>> + fuse_reply_err(req, err);
>> +}
>> +
>> +/*
>> + * Boilerplate code for CUSE IOCTL
>> + * Implicit arguments: ctx, req, result.
>> + */
>> +#define VHOST_IOCTL(func) do { \
>> + result = (func)(ctx); \
>> + fuse_reply_ioctl(req, result, NULL, 0); \
>> +} while (0)
>> +
>> +/*
>> + * Boilerplate IOCTL RETRY
>> + * Implicit arguments: req.
>> + */
>> +#define VHOST_IOCTL_RETRY(size_r, size_w) do { \
>> + struct iovec iov_r = { arg, (size_r) }; \
>> + struct iovec iov_w = { arg, (size_w) }; \
>> + fuse_reply_ioctl_retry(req, &iov_r, \
>> + (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
>> +} while (0)
>> +
>> +/*
>> + * Boilerplate code for CUSE Read IOCTL
>> + * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
>> + */
>> +#define VHOST_IOCTL_R(type, var, func) do { \
>> + if (!in_bufsz) { \
>> + VHOST_IOCTL_RETRY(sizeof(type), 0);\
>> + } else { \
>> + (var) = *(const type*)in_buf; \
>> + result = func(ctx, &(var)); \
>> + fuse_reply_ioctl(req, result, NULL, 0);\
>> + } \
>> +} while (0)
>> +
>> +/*
>> + * Boilerplate code for CUSE Write IOCTL
>> + * Implicit arguments: ctx, req, result, out_bufsz.
>> + */
>> +#define VHOST_IOCTL_W(type, var, func) do { \
>> + if (!out_bufsz) { \
>> + VHOST_IOCTL_RETRY(0, sizeof(type));\
>> + } else { \
>> + result = (func)(ctx, &(var));\
>> + fuse_reply_ioctl(req, result, &(var), sizeof(type));\
>> + } \
>> +} while (0)
>> +
>> +/*
>> + * Boilerplate code for CUSE Read/Write IOCTL
>> + * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
>> + */
>> +#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \
>> + if (!in_bufsz) { \
>> + VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
>> + } else { \
>> + (var1) = *(const type1*) (in_buf); \
>> + result = (func)(ctx, (var1), &(var2)); \
>> + fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
>> + } \
>> +} while (0)
>> +
>> +/*
>> + * This function uses the eventfd_link kernel module to copy an eventfd file
>> + * descriptor provided by QEMU in to our process space.
>> + */
>> +static int
>> +eventfd_copy(int target_fd, int target_pid)
>> +{
>> + int eventfd_link, ret;
>> + struct eventfd_copy eventfd_copy;
>> + int fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>> +
>> + if (fd == -1)
>> + return -1;
>> +
>> + /* Open the character device to the kernel module. */
>> + /* TODO: check this earlier rather than fail until VM boots! */
>> + eventfd_link = open(eventfd_cdev, O_RDWR);
>> + if (eventfd_link < 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "eventfd_link module is not loaded\n");
>> + return -1;
>> + }
>> +
>> + eventfd_copy.source_fd = fd;
>> + eventfd_copy.target_fd = target_fd;
>> + eventfd_copy.target_pid = target_pid;
>> + /* Call the IOCTL to copy the eventfd. */
>> + ret = ioctl(eventfd_link, EVENTFD_COPY, &eventfd_copy);
>> + close(eventfd_link);
>> +
>> + if (ret < 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "EVENTFD_COPY ioctl failed\n");
>> + return -1;
>> + }
>> +
>> + return fd;
>> +}
>> +
>> +/*
>> + * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on
>> + * the type of IOCTL a buffer is requested to read or to write. This
>> + * request is handled by FUSE and the buffer is then given to CUSE.
>> + */
>> +static void
>> +vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
>> + struct fuse_file_info *fi, __rte_unused unsigned flags,
>> + const void *in_buf, size_t in_bufsz, size_t out_bufsz)
>> +{
>> + struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> + struct vhost_vring_file file;
>> + struct vhost_vring_state state;
>> + struct vhost_vring_addr addr;
>> + uint64_t features;
>> + uint32_t index;
>> + int result = 0;
>> +
>> + switch (cmd) {
>> + case VHOST_NET_SET_BACKEND:
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
>> + VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
>> + break;
>> +
>> + case VHOST_GET_FEATURES:
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
>> + VHOST_IOCTL_W(uint64_t, features, ops->get_features);
>> + break;
>> +
>> + case VHOST_SET_FEATURES:
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
>> + VHOST_IOCTL_R(uint64_t, features, ops->set_features);
>> + break;
>> +
>> + case VHOST_RESET_OWNER:
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
>> + VHOST_IOCTL(ops->reset_owner);
>> + break;
>> +
>> + case VHOST_SET_OWNER:
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
>> + VHOST_IOCTL(ops->set_owner);
>> + break;
>> +
>> + case VHOST_SET_MEM_TABLE:
>> + /*TODO fix race condition.*/
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
>> + static struct vhost_memory mem_temp;
>> + switch (in_bufsz) {
>> + case 0:
>> + VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
>> + break;
>> +
>> + case sizeof(struct vhost_memory):
>> + mem_temp = *(const struct vhost_memory *) in_buf;
>> +
>> + if (mem_temp.nregions > 0) {
>> + VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
>> + (sizeof(struct vhost_memory_region) *
>> + mem_temp.nregions), 0);
>> + } else {
>> + result = -1;
>> + fuse_reply_ioctl(req, result, NULL, 0);
>> + }
>> + break;
>> +
>> + default:
>> + result = cuse_set_mem_table(ctx, in_buf,
>> + mem_temp.nregions);
>> + if (result)
>> + fuse_reply_err(req, EINVAL);
>> + else
>> + fuse_reply_ioctl(req, result, NULL, 0);
>> + }
>> + break;
>> +
>> + case VHOST_SET_VRING_NUM:
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
>> + VHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_num);
>> + break;
>> +
>> + case VHOST_SET_VRING_BASE:
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
>> + VHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_base);
>> + break;
>> +
>> + case VHOST_GET_VRING_BASE:
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
>> + VHOST_IOCTL_RW(uint32_t, index,
>> + struct vhost_vring_state, state, ops->get_vring_base);
>> + break;
>> +
>> + case VHOST_SET_VRING_ADDR:
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
>> + VHOST_IOCTL_R(struct vhost_vring_addr, addr, ops->set_vring_addr);
>> + break;
>> +
>> + case VHOST_SET_VRING_KICK:
>> + case VHOST_SET_VRING_CALL:
>> + if (!in_buf) {
>> + VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
>> + } else {
>> + int fd;
>> + file = *(const struct vhost_vring_file *)in_buf;
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "kick/call idx:%d fd:%d\n", file.index, file.fd);
>> + if ((fd = eventfd_copy(file.fd, ctx.pid)) < 0){
>> + fuse_reply_ioctl(req, -1, NULL, 0);
>> + }
>> + file.fd = fd;
>> + if (cmd == VHOST_SET_VRING_KICK) {
>> + VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_call);
>> + }
>> + else {
>> + VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_kick);
>> + }
>> + }
>> + break;
>> +
>> + default:
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
>> + result = -1;
>> + fuse_reply_ioctl(req, result, NULL, 0);
>> + }
>> +
>> + if (result < 0)
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
>> + else
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
>> +}
>> +
>> +/*
>> + * Structure handling open, release and ioctl function pointers is populated.
>> + */
>> +static const struct cuse_lowlevel_ops vhost_net_ops = {
>> + .open = vhost_net_open,
>> + .release = vhost_net_release,
>> + .ioctl = vhost_net_ioctl,
>> +};
>> +
>> +/*
>> + * cuse_info is populated and used to register the cuse device.
>> + * vhost_net_device_ops are also passed when the device is registered in app.
>> + */
>> +int
>> +rte_vhost_driver_register(const char *dev_name)
>> +{
>> + struct cuse_info cuse_info;
>> + char device_name[PATH_MAX] = "";
>> + char char_device_name[PATH_MAX] = "";
>> + const char *device_argv[] = { device_name };
>> +
>> + char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
>> + char fuse_opt_fore[] = FUSE_OPT_FORE;
>> + char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
>> + char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
>> +
>> + if (access(cuse_device_name, R_OK | W_OK) < 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "char device %s can't be accessed, maybe not exist\n",
>> + cuse_device_name);
>> + return -1;
>> + }
>> +
>> + /*
>> + * The device name is created. This is passed to QEMU so that it can
>> + * register the device with our application.
>> + */
>> + snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
>> + snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
>> +
>> + /* Check if device already exists. */
>> + if (access(char_device_name, F_OK) != -1) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "char device %s already exists\n", char_device_name);
>> + return -1;
>> + }
>> +
>> + memset(&cuse_info, 0, sizeof(cuse_info));
>> + cuse_info.dev_major = default_major;
>> + cuse_info.dev_minor = default_minor;
>> + cuse_info.dev_info_argc = 1;
>> + cuse_info.dev_info_argv = device_argv;
>> + cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
>> +
>> + ops = get_virtio_net_callbacks();
>> +
>> + session = cuse_lowlevel_setup(3, fuse_argv,
>> + &cuse_info, &vhost_net_ops, 0, NULL);
>> + if (session == NULL)
>> + return -1;
>> +
>> + return 0;
>> +}
>> +
>> +/**
>> + * The CUSE session is launched allowing the application to receive open,
>> + * release and ioctl calls.
>> + */
>> +int
>> +rte_vhost_driver_session_start(void)
>> +{
>> + fuse_session_loop(session);
>> +
>> + return 0;
>> +}
>> diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
>> new file mode 100644
>> index 0000000..5c16aa5
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
>> @@ -0,0 +1,314 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <dirent.h>
>> +#include <linux/vhost.h>
>> +#include <linux/virtio_net.h>
>> +#include <fuse/cuse_lowlevel.h>
>> +#include <stddef.h>
>> +#include <string.h>
>> +#include <stdlib.h>
>> +#include <sys/eventfd.h>
>> +#include <sys/mman.h>
>> +#include <sys/types.h>
>> +#include <unistd.h>
>> +#include <errno.h>
>> +
>> +#include <rte_log.h>
>> +
>> +#include "vhost-net.h"
>> +#include "virtio-net-cdev.h"
>> +
>> +extern struct vhost_net_device_ops const *ops;
>> +
>> +/* Line size for reading maps file. */
>> +static const uint32_t BUFSIZE = PATH_MAX;
>> +
>> +/* Size of prot char array in procmap. */
>> +#define PROT_SZ 5
>> +
>> +/* Number of elements in procmap struct. */
>> +#define PROCMAP_SZ 8
>> +
>> +/* Structure containing information gathered from maps file. */
>> +struct procmap {
>> + uint64_t va_start; /* Start virtual address in file. */
>> + uint64_t len; /* Size of file. */
>> + uint64_t pgoff; /* Not used. */
>> + uint32_t maj; /* Not used. */
>> + uint32_t min; /* Not used. */
>> + uint32_t ino; /* Not used. */
>> + char prot[PROT_SZ]; /* Not used. */
>> + char fname[PATH_MAX]; /* File name. */
>> +};
>> +
>> +/*
>> + * Locate the file containing QEMU's memory space and
>> + * map it to our address space.
>> + */
>> +static int
>> +host_memory_map(pid_t pid, uint64_t addr,
>> + uint64_t *mapped_address, uint64_t *mapped_size)
>> +{
>> + struct dirent *dptr = NULL;
>> + struct procmap procmap;
>> + DIR *dp = NULL;
>> + int fd;
>> + int i;
>> + char memfile[PATH_MAX];
>> + char mapfile[PATH_MAX];
>> + char procdir[PATH_MAX];
>> + char resolved_path[PATH_MAX];
>> + FILE *fmap;
>> + void *map;
>> + uint8_t found = 0;
>> + char line[BUFSIZE];
>> + char dlm[] = "- : ";
>> + char *str, *sp, *in[PROCMAP_SZ];
>> + char *end = NULL;
>> +
>> + /* Path where mem files are located. */
>> + snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
>> + /* Maps file used to locate mem file. */
>> + snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
>> +
>> + fmap = fopen(mapfile, "r");
>> + if (fmap == NULL) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "Failed to open maps file for pid %d\n", pid);
>> + return -1;
>> + }
>> +
>> + /* Read through maps file until we find out base_address. */
>> + while (fgets(line, BUFSIZE, fmap) != 0) {
>> + str = line;
>> + errno = 0;
>> + /* Split line in to fields. */
>> + for (i = 0; i < PROCMAP_SZ; i++) {
>> + in[i] = strtok_r(str, &dlm[i], &sp);
>> + if ((in[i] == NULL) || (errno != 0)) {
>> + fclose(fmap);
>> + return -1;
>> + }
>> + str = NULL;
>> + }
>> +
>> + /* Convert/Copy each field as needed. */
>> + procmap.va_start = strtoull(in[0], &end, 16);
>> + if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
>> + (errno != 0)) {
>> + fclose(fmap);
>> + return -1;
>> + }
>> +
>> + procmap.len = strtoull(in[1], &end, 16);
>> + if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
>> + (errno != 0)) {
>> + fclose(fmap);
>> + return -1;
>> + }
>> +
>> + procmap.pgoff = strtoull(in[3], &end, 16);
>> + if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
>> + (errno != 0)) {
>> + fclose(fmap);
>> + return -1;
>> + }
>> +
>> + procmap.maj = strtoul(in[4], &end, 16);
>> + if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
>> + (errno != 0)) {
>> + fclose(fmap);
>> + return -1;
>> + }
>> +
>> + procmap.min = strtoul(in[5], &end, 16);
>> + if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
>> + (errno != 0)) {
>> + fclose(fmap);
>> + return -1;
>> + }
>> +
>> + procmap.ino = strtoul(in[6], &end, 16);
>> + if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
>> + (errno != 0)) {
>> + fclose(fmap);
>> + return -1;
>> + }
>> +
>> + memcpy(&procmap.prot, in[2], PROT_SZ);
>> + memcpy(&procmap.fname, in[7], PATH_MAX);
>> +
>> + if (procmap.va_start == addr) {
>> + procmap.len = procmap.len - procmap.va_start;
>> + found = 1;
>> + break;
>> + }
>> + }
>> + fclose(fmap);
>> +
>> + if (!found) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "Failed to find memory file in pid %d maps file\n", pid);
>> + return -1;
>> + }
>> +
>> + /* Find the guest memory file among the process fds. */
>> + dp = opendir(procdir);
>> + if (dp == NULL) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "Cannot open pid %d process directory\n",
>> + pid);
>> + return -1;
>> +
>> + }
>> +
>> + found = 0;
>> +
>> + /* Read the fd directory contents. */
>> + while (NULL != (dptr = readdir(dp))) {
>> + snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
>> + pid, dptr->d_name);
>> + realpath(memfile, resolved_path);
>> + if (resolved_path == NULL) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "Failed to resolve fd directory\n");
>> + closedir(dp);
>> + return -1;
>> + }
>> + if (strncmp(resolved_path, procmap.fname,
>> + strnlen(procmap.fname, PATH_MAX)) == 0) {
>> + found = 1;
>> + break;
>> + }
>> + }
>> +
>> + closedir(dp);
>> +
>> + if (found == 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "Failed to find memory file for pid %d\n",
>> + pid);
>> + return -1;
>> + }
>> + /* Open the shared memory file and map the memory into this process. */
>> + fd = open(memfile, O_RDWR);
>> +
>> + if (fd == -1) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "Failed to open %s for pid %d\n",
>> + memfile, pid);
>> + return -1;
>> + }
>> +
>> + map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE ,
>> + MAP_POPULATE|MAP_SHARED, fd, 0);
>> + close(fd);
>> +
>> + if (map == MAP_FAILED) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "Error mapping the file %s for pid %d\n",
>> + memfile, pid);
>> + return -1;
>> + }
>> +
>> + /* Store the memory address and size in the device data structure */
>> + *mapped_address = (uint64_t)(uintptr_t)map;
>> + *mapped_size = procmap.len;
>> +
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "Mem File: %s->%s - Size: %llu - VA: %p\n",
>> + memfile, resolved_path,
>> + (unsigned long long)mapped_size, map);
>> +
>> + return 0;
>> +}
>> +
>> +int
>> +cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,
>> + uint32_t nregions)
>> +{
>> + uint64_t size = offsetof(struct vhost_memory, regions);
>> + uint32_t idx;
>> + struct virtio_memory_regions regions[8]; /* VHOST_MAX_MEMORY_REGIONS */
>> + struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
>> + ((uint64_t)(uintptr_t)mem_regions_addr + size);
>> + uint64_t base_address = 0, mapped_address, mapped_size;
>> +
>> + for (idx = 0; idx < nregions; idx++) {
>> + regions[idx].guest_phys_address =
>> + mem_regions[idx].guest_phys_addr;
>> + regions[idx].guest_phys_address_end =
>> + regions[idx].guest_phys_address +
>> + mem_regions[idx].memory_size;
>> + regions[idx].memory_size =
>> + mem_regions[idx].memory_size;
>> + regions[idx].userspace_address =
>> + mem_regions[idx].userspace_addr;
>> +
>> + LOG_DEBUG(VHOST_CONFIG, "REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
>> + idx,
>> + (void *)(uintptr_t)regions[idx].guest_phys_address,
>> + (void *)(uintptr_t)regions[idx].userspace_address,
>> + regions[idx].memory_size);
>> +
>> + /*set the base address mapping*/
>> + if (regions[idx].guest_phys_address == 0x0) {
>> + base_address =
>> + regions[idx].userspace_address;
>> + /* Map VM memory file */
>> + if (host_memory_map(ctx.pid, base_address,
>> + &mapped_address, &mapped_size) != 0) {
>> + return -1;
>> + }
>> + }
>> + }
>> +
>> + /* Check that we have a valid base address. */
>> + if (base_address == 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "Failed to find base address of qemu memory file.\n");
>> + return -1;
>> + }
>> +
>> + for (idx = 0; idx < nregions; idx++) {
>> + regions[idx].address_offset =
>> + mapped_address - base_address +
>> + regions[idx].userspace_address -
>> + regions[idx].guest_phys_address;
>> + }
>> +
>> + ops->set_mem_table(ctx, ®ions[0], nregions);
>> + return 0;
>> +}
>> diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
>> new file mode 100644
>> index 0000000..6f98ce8
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
>> @@ -0,0 +1,43 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +#ifndef _VIRTIO_NET_CDEV_H
>> +#define _VIRTIO_NET_CDEV_H
>> +#include <stdint.h>
>> +
>> +#include "vhost-net.h"
>> +
>> +int
>> +cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,
>> + uint32_t nregions);
>> +
>> +#endif
>> diff --git a/lib/librte_vhost/vhost-net-cdev.c b/lib/librte_vhost/vhost-net-cdev.c
>> deleted file mode 100644
>> index 57c76cb..0000000
>> --- a/lib/librte_vhost/vhost-net-cdev.c
>> +++ /dev/null
>> @@ -1,389 +0,0 @@
>> -/*-
>> - * BSD LICENSE
>> - *
>> - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> - * All rights reserved.
>> - *
>> - * Redistribution and use in source and binary forms, with or without
>> - * modification, are permitted provided that the following conditions
>> - * are met:
>> - *
>> - * * Redistributions of source code must retain the above copyright
>> - * notice, this list of conditions and the following disclaimer.
>> - * * Redistributions in binary form must reproduce the above copyright
>> - * notice, this list of conditions and the following disclaimer in
>> - * the documentation and/or other materials provided with the
>> - * distribution.
>> - * * Neither the name of Intel Corporation nor the names of its
>> - * contributors may be used to endorse or promote products derived
>> - * from this software without specific prior written permission.
>> - *
>> - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> - */
>> -
>> -#include <errno.h>
>> -#include <fuse/cuse_lowlevel.h>
>> -#include <linux/limits.h>
>> -#include <linux/vhost.h>
>> -#include <stdint.h>
>> -#include <string.h>
>> -#include <unistd.h>
>> -
>> -#include <rte_ethdev.h>
>> -#include <rte_log.h>
>> -#include <rte_string_fns.h>
>> -#include <rte_virtio_net.h>
>> -
>> -#include "vhost-net-cdev.h"
>> -
>> -#define FUSE_OPT_DUMMY "\0\0"
>> -#define FUSE_OPT_FORE "-f\0\0"
>> -#define FUSE_OPT_NOMULTI "-s\0\0"
>> -
>> -static const uint32_t default_major = 231;
>> -static const uint32_t default_minor = 1;
>> -static const char cuse_device_name[] = "/dev/cuse";
>> -static const char default_cdev[] = "vhost-net";
>> -
>> -static struct fuse_session *session;
>> -static struct vhost_net_device_ops const *ops;
>> -
>> -/*
>> - * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
>> - * when the device is added to the device linked list.
>> - */
>> -static struct vhost_device_ctx
>> -fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
>> -{
>> - struct vhost_device_ctx ctx;
>> - struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
>> -
>> - ctx.pid = req_ctx->pid;
>> - ctx.fh = fi->fh;
>> -
>> - return ctx;
>> -}
>> -
>> -/*
>> - * When the device is created in QEMU it gets initialised here and
>> - * added to the device linked list.
>> - */
>> -static void
>> -vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
>> -{
>> - struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> - int err = 0;
>> -
>> - err = ops->new_device(ctx);
>> - if (err == -1) {
>> - fuse_reply_err(req, EPERM);
>> - return;
>> - }
>> -
>> - fi->fh = err;
>> -
>> - RTE_LOG(INFO, VHOST_CONFIG,
>> - "(%"PRIu64") Device configuration started\n", fi->fh);
>> - fuse_reply_open(req, fi);
>> -}
>> -
>> -/*
>> - * When QEMU is shutdown or killed the device gets released.
>> - */
>> -static void
>> -vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
>> -{
>> - int err = 0;
>> - struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> -
>> - ops->destroy_device(ctx);
>> - RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
>> - fuse_reply_err(req, err);
>> -}
>> -
>> -/*
>> - * Boilerplate code for CUSE IOCTL
>> - * Implicit arguments: ctx, req, result.
>> - */
>> -#define VHOST_IOCTL(func) do { \
>> - result = (func)(ctx); \
>> - fuse_reply_ioctl(req, result, NULL, 0); \
>> -} while (0)
>> -
>> -/*
>> - * Boilerplate IOCTL RETRY
>> - * Implicit arguments: req.
>> - */
>> -#define VHOST_IOCTL_RETRY(size_r, size_w) do { \
>> - struct iovec iov_r = { arg, (size_r) }; \
>> - struct iovec iov_w = { arg, (size_w) }; \
>> - fuse_reply_ioctl_retry(req, &iov_r, \
>> - (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
>> -} while (0)
>> -
>> -/*
>> - * Boilerplate code for CUSE Read IOCTL
>> - * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
>> - */
>> -#define VHOST_IOCTL_R(type, var, func) do { \
>> - if (!in_bufsz) { \
>> - VHOST_IOCTL_RETRY(sizeof(type), 0);\
>> - } else { \
>> - (var) = *(const type*)in_buf; \
>> - result = func(ctx, &(var)); \
>> - fuse_reply_ioctl(req, result, NULL, 0);\
>> - } \
>> -} while (0)
>> -
>> -/*
>> - * Boilerplate code for CUSE Write IOCTL
>> - * Implicit arguments: ctx, req, result, out_bufsz.
>> - */
>> -#define VHOST_IOCTL_W(type, var, func) do { \
>> - if (!out_bufsz) { \
>> - VHOST_IOCTL_RETRY(0, sizeof(type));\
>> - } else { \
>> - result = (func)(ctx, &(var));\
>> - fuse_reply_ioctl(req, result, &(var), sizeof(type));\
>> - } \
>> -} while (0)
>> -
>> -/*
>> - * Boilerplate code for CUSE Read/Write IOCTL
>> - * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
>> - */
>> -#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \
>> - if (!in_bufsz) { \
>> - VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
>> - } else { \
>> - (var1) = *(const type1*) (in_buf); \
>> - result = (func)(ctx, (var1), &(var2)); \
>> - fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
>> - } \
>> -} while (0)
>> -
>> -/*
>> - * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
>> - * of IOCTL a buffer is requested to read or to write. This request is handled
>> - * by FUSE and the buffer is then given to CUSE.
>> - */
>> -static void
>> -vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
>> - struct fuse_file_info *fi, __rte_unused unsigned flags,
>> - const void *in_buf, size_t in_bufsz, size_t out_bufsz)
>> -{
>> - struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> - struct vhost_vring_file file;
>> - struct vhost_vring_state state;
>> - struct vhost_vring_addr addr;
>> - uint64_t features;
>> - uint32_t index;
>> - int result = 0;
>> -
>> - switch (cmd) {
>> - case VHOST_NET_SET_BACKEND:
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
>> - VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
>> - break;
>> -
>> - case VHOST_GET_FEATURES:
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
>> - VHOST_IOCTL_W(uint64_t, features, ops->get_features);
>> - break;
>> -
>> - case VHOST_SET_FEATURES:
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
>> - VHOST_IOCTL_R(uint64_t, features, ops->set_features);
>> - break;
>> -
>> - case VHOST_RESET_OWNER:
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
>> - VHOST_IOCTL(ops->reset_owner);
>> - break;
>> -
>> - case VHOST_SET_OWNER:
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
>> - VHOST_IOCTL(ops->set_owner);
>> - break;
>> -
>> - case VHOST_SET_MEM_TABLE:
>> - /*TODO fix race condition.*/
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
>> - static struct vhost_memory mem_temp;
>> -
>> - switch (in_bufsz) {
>> - case 0:
>> - VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
>> - break;
>> -
>> - case sizeof(struct vhost_memory):
>> - mem_temp = *(const struct vhost_memory *) in_buf;
>> -
>> - if (mem_temp.nregions > 0) {
>> - VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
>> - (sizeof(struct vhost_memory_region) *
>> - mem_temp.nregions), 0);
>> - } else {
>> - result = -1;
>> - fuse_reply_ioctl(req, result, NULL, 0);
>> - }
>> - break;
>> -
>> - default:
>> - result = ops->set_mem_table(ctx,
>> - in_buf, mem_temp.nregions);
>> - if (result)
>> - fuse_reply_err(req, EINVAL);
>> - else
>> - fuse_reply_ioctl(req, result, NULL, 0);
>> - }
>> - break;
>> -
>> - case VHOST_SET_VRING_NUM:
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
>> - VHOST_IOCTL_R(struct vhost_vring_state, state,
>> - ops->set_vring_num);
>> - break;
>> -
>> - case VHOST_SET_VRING_BASE:
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
>> - VHOST_IOCTL_R(struct vhost_vring_state, state,
>> - ops->set_vring_base);
>> - break;
>> -
>> - case VHOST_GET_VRING_BASE:
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
>> - VHOST_IOCTL_RW(uint32_t, index,
>> - struct vhost_vring_state, state, ops->get_vring_base);
>> - break;
>> -
>> - case VHOST_SET_VRING_ADDR:
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
>> - VHOST_IOCTL_R(struct vhost_vring_addr, addr,
>> - ops->set_vring_addr);
>> - break;
>> -
>> - case VHOST_SET_VRING_KICK:
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", ctx.fh);
>> - VHOST_IOCTL_R(struct vhost_vring_file, file,
>> - ops->set_vring_kick);
>> - break;
>> -
>> - case VHOST_SET_VRING_CALL:
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", ctx.fh);
>> - VHOST_IOCTL_R(struct vhost_vring_file, file,
>> - ops->set_vring_call);
>> - break;
>> -
>> - default:
>> - RTE_LOG(ERR, VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
>> - result = -1;
>> - fuse_reply_ioctl(req, result, NULL, 0);
>> - }
>> -
>> - if (result < 0)
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
>> - else
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
>> -}
>> -
>> -/*
>> - * Structure handling open, release and ioctl function pointers is populated.
>> - */
>> -static const struct cuse_lowlevel_ops vhost_net_ops = {
>> - .open = vhost_net_open,
>> - .release = vhost_net_release,
>> - .ioctl = vhost_net_ioctl,
>> -};
>> -
>> -/*
>> - * cuse_info is populated and used to register the cuse device.
>> - * vhost_net_device_ops are also passed when the device is registered in app.
>> - */
>> -int
>> -rte_vhost_driver_register(const char *dev_name)
>> -{
>> - struct cuse_info cuse_info;
>> - char device_name[PATH_MAX] = "";
>> - char char_device_name[PATH_MAX] = "";
>> - const char *device_argv[] = { device_name };
>> -
>> - char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
>> - char fuse_opt_fore[] = FUSE_OPT_FORE;
>> - char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
>> - char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
>> -
>> - if (access(cuse_device_name, R_OK | W_OK) < 0) {
>> - RTE_LOG(ERR, VHOST_CONFIG,
>> - "char device %s can't be accessed, maybe not exist\n",
>> - cuse_device_name);
>> - return -1;
>> - }
>> -
>> - /*
>> - * The device name is created. This is passed to QEMU so that it can
>> - * register the device with our application.
>> - */
>> - snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
>> - snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
>> -
>> - /* Check if device already exists. */
>> - if (access(char_device_name, F_OK) != -1) {
>> - RTE_LOG(ERR, VHOST_CONFIG,
>> - "char device %s already exists\n", char_device_name);
>> - return -1;
>> - }
>> -
>> - memset(&cuse_info, 0, sizeof(cuse_info));
>> - cuse_info.dev_major = default_major;
>> - cuse_info.dev_minor = default_minor;
>> - cuse_info.dev_info_argc = 1;
>> - cuse_info.dev_info_argv = device_argv;
>> - cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
>> -
>> - ops = get_virtio_net_callbacks();
>> -
>> - session = cuse_lowlevel_setup(3, fuse_argv,
>> - &cuse_info, &vhost_net_ops, 0, NULL);
>> - if (session == NULL)
>> - return -1;
>> -
>> - return 0;
>> -}
>> -
>> -/**
>> - * The CUSE session is launched allowing the application to receive open,
>> - * release and ioctl calls.
>> - */
>> -int
>> -rte_vhost_driver_session_start(void)
>> -{
>> - fuse_session_loop(session);
>> -
>> - return 0;
>> -}
>> diff --git a/lib/librte_vhost/vhost-net-cdev.h b/lib/librte_vhost/vhost-net-cdev.h
>> deleted file mode 100644
>> index 03a5c57..0000000
>> --- a/lib/librte_vhost/vhost-net-cdev.h
>> +++ /dev/null
>> @@ -1,113 +0,0 @@
>> -/*-
>> - * BSD LICENSE
>> - *
>> - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> - * All rights reserved.
>> - *
>> - * Redistribution and use in source and binary forms, with or without
>> - * modification, are permitted provided that the following conditions
>> - * are met:
>> - *
>> - * * Redistributions of source code must retain the above copyright
>> - * notice, this list of conditions and the following disclaimer.
>> - * * Redistributions in binary form must reproduce the above copyright
>> - * notice, this list of conditions and the following disclaimer in
>> - * the documentation and/or other materials provided with the
>> - * distribution.
>> - * * Neither the name of Intel Corporation nor the names of its
>> - * contributors may be used to endorse or promote products derived
>> - * from this software without specific prior written permission.
>> - *
>> - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> - */
>> -
>> -#ifndef _VHOST_NET_CDEV_H_
>> -#define _VHOST_NET_CDEV_H_
>> -#include <stdint.h>
>> -#include <stdio.h>
>> -#include <sys/types.h>
>> -#include <unistd.h>
>> -#include <linux/vhost.h>
>> -
>> -#include <rte_log.h>
>> -
>> -/* Macros for printing using RTE_LOG */
>> -#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
>> -#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
>> -
>> -#ifdef RTE_LIBRTE_VHOST_DEBUG
>> -#define VHOST_MAX_PRINT_BUFF 6072
>> -#define LOG_LEVEL RTE_LOG_DEBUG
>> -#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
>> -#define PRINT_PACKET(device, addr, size, header) do { \
>> - char *pkt_addr = (char *)(addr); \
>> - unsigned int index; \
>> - char packet[VHOST_MAX_PRINT_BUFF]; \
>> - \
>> - if ((header)) \
>> - snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \
>> - else \
>> - snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \
>> - for (index = 0; index < (size); index++) { \
>> - snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
>> - "%02hhx ", pkt_addr[index]); \
>> - } \
>> - snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
>> - \
>> - LOG_DEBUG(VHOST_DATA, "%s", packet); \
>> -} while (0)
>> -#else
>> -#define LOG_LEVEL RTE_LOG_INFO
>> -#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
>> -#define PRINT_PACKET(device, addr, size, header) do {} while (0)
>> -#endif
>> -
>> -
>> -/*
>> - * Structure used to identify device context.
>> - */
>> -struct vhost_device_ctx {
>> - pid_t pid; /* PID of process calling the IOCTL. */
>> - uint64_t fh; /* Populated with fi->fh to track the device index. */
>> -};
>> -
>> -/*
>> - * Structure contains function pointers to be defined in virtio-net.c. These
>> - * functions are called in CUSE context and are used to configure devices.
>> - */
>> -struct vhost_net_device_ops {
>> - int (*new_device)(struct vhost_device_ctx);
>> - void (*destroy_device)(struct vhost_device_ctx);
>> -
>> - int (*get_features)(struct vhost_device_ctx, uint64_t *);
>> - int (*set_features)(struct vhost_device_ctx, uint64_t *);
>> -
>> - int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
>> -
>> - int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state *);
>> - int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr *);
>> - int (*set_vring_base)(struct vhost_device_ctx, struct vhost_vring_state *);
>> - int (*get_vring_base)(struct vhost_device_ctx, uint32_t, struct vhost_vring_state *);
>> -
>> - int (*set_vring_kick)(struct vhost_device_ctx, struct vhost_vring_file *);
>> - int (*set_vring_call)(struct vhost_device_ctx, struct vhost_vring_file *);
>> -
>> - int (*set_backend)(struct vhost_device_ctx, struct vhost_vring_file *);
>> -
>> - int (*set_owner)(struct vhost_device_ctx);
>> - int (*reset_owner)(struct vhost_device_ctx);
>> -};
>> -
>> -
>> -struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
>> -#endif /* _VHOST_NET_CDEV_H_ */
>> diff --git a/lib/librte_vhost/vhost-user/fd_man.c b/lib/librte_vhost/vhost-user/fd_man.c
>> new file mode 100644
>> index 0000000..c7fd3f2
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/fd_man.c
>> @@ -0,0 +1,158 @@
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <sys/socket.h>
>> +#include <sys/select.h>
>> +#include <sys/time.h>
>> +#include <sys/types.h>
>> +#include <unistd.h>
>> +
>> +#include <rte_log.h>
>> +
>> +#include "fd_man.h"
>> +
>> +/**
>> + * Returns the index in the fdset for a fd.
>> + * If fd is -1, it means to search for a free entry.
>> + * @return
>> + * Index for the fd, or -1 if fd isn't in the fdset.
>> + */
>> +static int
>> +fdset_find_fd(struct fdset *pfdset, int fd)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++);
>> +
>> + return i == pfdset->num ? -1 : i;
>> +}
>> +
>> +static int
>> +fdset_find_free_slot(struct fdset *pfdset)
>> +{
>> + return fdset_find_fd(pfdset, -1);
>> +
>> +}
>> +
>> +static void
>> +fdset_add_fd(struct fdset *pfdset, int idx, int fd, fd_cb rcb,
>> + fd_cb wcb, uint64_t dat)
>> +{
>> + struct fdentry *pfdentry = &pfdset->fd[idx];
>> +
>> + pfdentry->fd = fd;
>> + pfdentry->rcb = rcb;
>> + pfdentry->wcb = wcb;
>> + pfdentry->dat = dat;
>> +}
>> +
>> +/**
>> + * Fill the read/write fdset with the fds in the fdset.
>> + * @return
>> + * the maximum fds filled in the read/write fd_set.
>> + */
>> +static int
>> +fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
>> +{
>> + struct fdentry *pfdentry;
>> + int i, maxfds = -1;
>> + int num = MAX_FDS;
>> +
>> + for (i = 0; i < num ; i++) {
>> + pfdentry = &pfdset->fd[i];
>> + if (pfdentry->fd != -1) {
>> + int added = 0;
>> + if (pfdentry->rcb && rfset) {
>> + FD_SET(pfdentry->fd, rfset);
>> + added = 1;
>> + }
>> + if (pfdentry->wcb && wfset) {
>> + FD_SET(pfdentry->fd, wfset);
>> + added = 1;
>> + }
>> + if (added)
>> + maxfds = pfdentry->fd < maxfds ?
>> + maxfds : pfdentry->fd;
>> + }
>> + }
>> + return maxfds;
>> +}
>> +
>> +void
>> +fdset_init(struct fdset *pfdset)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < MAX_FDS; i++)
>> + pfdset->fd[i].fd = -1;
>> + pfdset->num = MAX_FDS;
>> +
>> +}
>> +
>> +/**
>> + * Register the fd in the fdset with its read/write handler and context.
>> + */
>> +int
>> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
>> +{
>> + int i;
>> +
>> + if (fd == -1)
>> + return -1;
>> +
>> + /* Find a free slot in the list. */
>> + i = fdset_find_free_slot(pfdset);
>> + if (i == -1)
>> + return -2;
>> +
>> + fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
>> +
>> + return 0;
>> +}
>> +
>> +/**
>> + * Unregister the fd from the fdset.
>> + */
>> +void
>> +fdset_del(struct fdset *pfdset, int fd)
>> +{
>> + int i;
>> +
>> + i = fdset_find_fd(pfdset, fd);
>> + if (i != -1) {
>> + pfdset->fd[i].fd = -1;
>> + }
>> +}
>> +
>> +
>> +void
>> +fdset_event_dispatch(struct fdset *pfdset)
>> +{
>> + fd_set rfds,wfds;
>> + int i, maxfds;
>> + struct fdentry *pfdentry;
>> + int num = MAX_FDS;
>> +
>> + if (pfdset == NULL)
>> + return;
>> + while (1) {
>> + FD_ZERO(&rfds);
>> + FD_ZERO(&wfds);
>> + maxfds = fdset_fill(&rfds, &wfds, pfdset);
>> + /* fd management runs in one thread */
>> + if (maxfds == -1) {
>> + return;
>> + }
>> +
>> + select(maxfds + 1, &rfds, &wfds, NULL, NULL);
>> +
>> + for (i = 0; i < num; i++) {
>> + pfdentry = &pfdset->fd[i];
>> + if (FD_ISSET(pfdentry->fd, &rfds))
>> + pfdentry->rcb(pfdentry->fd, pfdentry->dat);
>> + if (FD_ISSET(pfdentry->fd, &wfds))
>> + pfdentry->wcb(pfdentry->fd, pfdentry->dat);
>> + }
>> +
>> + }
>> +}
>> diff --git a/lib/librte_vhost/vhost-user/fd_man.h b/lib/librte_vhost/vhost-user/fd_man.h
>> new file mode 100644
>> index 0000000..57cc81d
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/fd_man.h
>> @@ -0,0 +1,31 @@
>> +#ifndef _FD_MAN_H_
>> +#define _FD_MAN_H_
>> +#include <stdint.h>
>> +
>> +#define MAX_FDS 1024
>> +
>> +typedef void (*fd_cb)(int fd, uint64_t dat);
>> +
>> +struct fdentry {
>> + int fd; /* -1 indicates this entry is empty */
>> + fd_cb rcb; /* callback when this fd is readable. */
>> + fd_cb wcb; /* callback when this fd is writeable.*/
>> + uint64_t dat; /* fd context */
>> +};
>> +
>> +struct fdset {
>> + struct fdentry fd[MAX_FDS];
>> + int num;
>> +};
>> +
>> +
>> +void fdset_init(struct fdset *pfdset);
>> +
>> +int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb,
>> + fd_cb wcb, uint64_t ctx);
>> +
>> +void fdset_del(struct fdset *pfdset, int fd);
>> +
>> +void fdset_event_dispatch(struct fdset *pfdset);
>> +
>> +#endif
>> diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.c b/lib/librte_vhost/vhost-user/vhost-net-user.c
>> new file mode 100644
>> index 0000000..34450f4
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/vhost-net-user.c
>> @@ -0,0 +1,417 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <limits.h>
>> +#include <stdlib.h>
>> +#include <unistd.h>
>> +#include <string.h>
>> +#include <sys/types.h>
>> +#include <sys/socket.h>
>> +#include <sys/un.h>
>> +#include <errno.h>
>> +
>> +#include <rte_log.h>
>> +#include <rte_virtio_net.h>
>> +
>> +#include "fd_man.h"
>> +#include "vhost-net-user.h"
>> +#include "vhost-net.h"
>> +#include "virtio-net-user.h"
>> +
>> +static void vserver_new_vq_conn(int fd, uint64_t data);
>> +static void vserver_message_handler(int fd, uint64_t dat);
>> +const struct vhost_net_device_ops *ops;
>> +
>> +static struct vhost_server *g_vhost_server;
>> +
>> +static const char *vhost_message_str[VHOST_USER_MAX] =
>> +{
>> + [VHOST_USER_NONE] = "VHOST_USER_NONE",
>> + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
>> + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
>> + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
>> + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
>> + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
>> + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
>> + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
>> + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
>> + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
>> + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
>> + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
>> + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
>> + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
>> + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR"
>> +};
>> +
>> +/**
>> + * Create a unix domain socket and bind to path.
>> + * @return
>> + * socket fd or -1 on failure
>> + */
>> +static int
>> +uds_socket(const char *path)
>> +{
>> + struct sockaddr_un un;
>> + int sockfd;
>> + int ret;
>> +
>> + if (path == NULL)
>> + return -1;
>> +
>> + sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
>> + if (sockfd < 0)
>> + return -1;
>> + RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
>> +
>> + memset(&un, 0, sizeof(un));
>> + un.sun_family = AF_UNIX;
>> + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
>> + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
>> + if (ret == -1)
>> + goto err;
>> + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
>> +
>> + ret = listen(sockfd, 1);
>> + if (ret == -1)
>> + goto err;
>> +
>> + return sockfd;
>> +
>> +err:
>> + close(sockfd);
>> + return -1;
>> +}
>> +
>> +
>> +/* return bytes# of read */
>> +static int
>> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
>> +{
>> +
>> + struct iovec iov;
>> + struct msghdr msgh = { 0 };
>> + size_t fdsize = fd_num * sizeof(int);
>> + char control[CMSG_SPACE(fdsize)];
>> + struct cmsghdr *cmsg;
>> + int ret;
>> +
>> + iov.iov_base = buf;
>> + iov.iov_len = buflen;
>> +
>> + msgh.msg_iov = &iov;
>> + msgh.msg_iovlen = 1;
>> + msgh.msg_control = control;
>> + msgh.msg_controllen = sizeof(control);
>> +
>> + ret = recvmsg(sockfd, &msgh, 0);
>> + if (ret <= 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
>> + return ret;
>> + }
>> + /* ret == buflen */
>> + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
>> + RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
>> + return -1;
>> + }
>> +
>> + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
>> + cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
>> + if ( (cmsg->cmsg_level == SOL_SOCKET) &&
>> + (cmsg->cmsg_type == SCM_RIGHTS)) {
>> + memcpy(fds, CMSG_DATA(cmsg), fdsize);
>> + break;
>> + }
>> + }
>> + return ret;
>> +}
>> +
>> +static int
>> +read_vhost_message(int sockfd, struct VhostUserMsg *msg)
>> +{
>> + int ret;
>> +
>> + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
>> + msg->fds, VHOST_MEMORY_MAX_NREGIONS);
>> + if (ret <= 0)
>> + return ret;
>> +
>> + if (msg->size) {
>> + if (msg->size > sizeof(msg->payload)) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "%s: invalid size:%d\n", __func__, msg->size);
>> + return -1;
>> + }
>> + ret = read(sockfd, &msg->payload, msg->size);
>> + if (ret == 0)
>> + return 0;
>> + if (ret != (int)msg->size) {
>> + printf("read control message failed\n");
>> + return -1;
>> + }
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static int
>> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
>> +{
>> +
>> + struct iovec iov;
>> + struct msghdr msgh = { 0 };
>> + size_t fdsize = fd_num * sizeof(int);
>> + char control[CMSG_SPACE(fdsize)];
>> + struct cmsghdr *cmsg;
>> + int ret;
>> +
>> + iov.iov_base = buf;
>> + iov.iov_len = buflen;
>> + msgh.msg_iov = &iov;
>> + msgh.msg_iovlen = 1;
>> +
>> + if (fds && fd_num > 0) {
>> + msgh.msg_control = control;
>> + msgh.msg_controllen = sizeof(control);
>> + cmsg = CMSG_FIRSTHDR(&msgh);
>> + cmsg->cmsg_len = CMSG_LEN(fdsize);
>> + cmsg->cmsg_level = SOL_SOCKET;
>> + cmsg->cmsg_type = SCM_RIGHTS;
>> + memcpy(CMSG_DATA(cmsg), fds, fdsize);
>> + } else {
>> + msgh.msg_control = NULL;
>> + msgh.msg_controllen = 0;
>> + }
>> +
>> + do {
>> + ret = sendmsg(sockfd, &msgh, 0);
>> + } while (ret < 0 && errno == EINTR);
>> +
>> + if (ret < 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
>> + return -1;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int
>> +send_vhost_message(int sockfd, struct VhostUserMsg *msg)
>> +{
>> + int ret;
>> +
>> + msg->flags &= ~VHOST_USER_VERSION_MASK;
>> + msg->flags |= VHOST_USER_VERSION;
>> + msg->flags |= VHOST_USER_REPLY_MASK;
>> +
>> + ret = send_fd_message(sockfd, (char *)msg,
>> + VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
>> +
>> + return ret;
>> +}
>> +
>> +/* call back when there is new connection. */
>> +static void
>> +vserver_new_vq_conn(int fd, uint64_t dat)
>> +{
>> + struct vhost_server *vserver = (void *)(uintptr_t)dat;
>> + int conn_fd;
>> + uint32_t fh;
>> + struct vhost_device_ctx vdev_ctx = { 0 };
>> +
>> + conn_fd = accept(fd, NULL, NULL);
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "%s: new connection is %d\n", __func__, conn_fd);
>> + if (conn_fd < 0)
>> + return;
>> +
>> + fh = ops->new_device(vdev_ctx);
>> + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
>> +
>> + fdset_add(&vserver->fdset,
>> + conn_fd, vserver_message_handler, NULL, fh);
>> +}
>> +
>> +/* callback when there is message on the connfd */
>> +static void
>> +vserver_message_handler(int connfd, uint64_t dat)
>> +{
>> + struct vhost_device_ctx ctx;
>> + uint32_t fh = (uint32_t)dat;
>> + struct VhostUserMsg msg;
>> + uint64_t features;
>> + int ret;
>> +
>> + ctx.fh = fh;
>> + ret = read_vhost_message(connfd, &msg);
>> + if (ret < 0) {
>> + printf("vhost read message failed\n");
>> +
>> + /*TODO: cleanup */
>> + close(connfd);
>> + fdset_del(&g_vhost_server->fdset, connfd);
>> + ops->destroy_device(ctx);
>> +
>> + return;
>> + } else if (ret == 0) {
>> + /*TODO: cleanup */
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "vhost peer closed\n");
>> + close(connfd);
>> + fdset_del(&g_vhost_server->fdset, connfd);
>> + ops->destroy_device(ctx);
>> +
>> + return;
>> + }
>> + if (msg.request > VHOST_USER_MAX) {
>> + /*TODO: cleanup */
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "vhost read incorrect message\n");
>> + close(connfd);
>> + fdset_del(&g_vhost_server->fdset, connfd);
>> +
>> + return;
>> + }
>> +
>> + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
>> + vhost_message_str[msg.request]);
>> + switch (msg.request) {
>> + case VHOST_USER_GET_FEATURES:
>> + ret = ops->get_features(ctx, &features);
>> + msg.payload.u64 = ret;
>> + msg.size = sizeof(msg.payload.u64);
>> + send_vhost_message(connfd, &msg);
>> + break;
>> + case VHOST_USER_SET_FEATURES:
>> + ops->set_features(ctx, &features);
>> + break;
>> +
>> + case VHOST_USER_SET_OWNER:
>> + ops->set_owner(ctx);
>> + break;
>> + case VHOST_USER_RESET_OWNER:
>> + ops->reset_owner(ctx);
>> + break;
>> +
>> + case VHOST_USER_SET_MEM_TABLE:
>> + user_set_mem_table(ctx, &msg);
>> + break;
>> +
>> + case VHOST_USER_SET_LOG_BASE:
>> + case VHOST_USER_SET_LOG_FD:
>> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
>> + break;
>> +
>> + case VHOST_USER_SET_VRING_NUM:
>> + ops->set_vring_num(ctx, &msg.payload.state);
>> + break;
>> + case VHOST_USER_SET_VRING_ADDR:
>> + ops->set_vring_addr(ctx, &msg.payload.addr);
>> + break;
>> + case VHOST_USER_SET_VRING_BASE:
>> + ops->set_vring_base(ctx, &msg.payload.state);
>> + break;
>> +
>> + case VHOST_USER_GET_VRING_BASE:
>> + ret = ops->get_vring_base(ctx, msg.payload.state.index,
>> + &msg.payload.state);
>> + msg.size = sizeof(msg.payload.state);
>> + send_vhost_message(connfd, &msg);
>> + break;
>> +
>> + case VHOST_USER_SET_VRING_KICK:
>> + user_set_vring_kick(ctx, &msg);
>> + break;
>> + case VHOST_USER_SET_VRING_CALL:
>> + user_set_vring_call(ctx, &msg);
>> + break;
>> +
>> + case VHOST_USER_SET_VRING_ERR:
>> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
>> + break;
>> +
>> + default:
>> + break;
>> +
>> + }
>> +}
>> +
>> +
>> +/**
>> + * Creates and initialise the vhost server.
>> + */
>> +int
>> +rte_vhost_driver_register(const char *path)
>> +{
>> +
>> + struct vhost_server *vserver;
>> +
>> + if (g_vhost_server != NULL)
>> + return -1;
>> +
>> + vserver = calloc(sizeof(struct vhost_server), 1);
>> + /*TODO: all allocation is through DPDK memory allocation */
>> + if (vserver == NULL)
>> + return -1;
>> +
>> + fdset_init(&vserver->fdset);
>> +
>> + unlink(path);
>> +
>> + vserver->listenfd = uds_socket(path);
>> + if (vserver->listenfd < 0) {
>> + free(vserver);
>> + return -1;
>> + }
>> + vserver->path = path;
>> +
>> + fdset_add(&vserver->fdset, vserver->listenfd,
>> + vserver_new_vq_conn, NULL,
>> + (uint64_t)(uintptr_t)vserver);
>> +
>> + ops = get_virtio_net_callbacks();
>> +
>> + g_vhost_server = vserver;
>> +
>> + return 0;
>> +}
>> +
>> +
>> +int
>> +rte_vhost_driver_session_start(void)
>> +{
>> + fdset_event_dispatch(&g_vhost_server->fdset);
>> + return 0;
>> +}
>> +
>> diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.h b/lib/librte_vhost/vhost-user/vhost-net-user.h
>> new file mode 100644
>> index 0000000..c9df9fa
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/vhost-net-user.h
>> @@ -0,0 +1,74 @@
>> +#ifndef _VHOST_NET_USER_H
>> +#define _VHOST_NET_USER_H
>> +#include <stdint.h>
>> +#include <linux/vhost.h>
>> +
>> +#include "fd_man.h"
>> +
>> +struct vhost_server {
>> + const char *path; /**< The path the uds is bind to. */
>> + int listenfd; /**< The listener sockfd. */
>> + struct fdset fdset; /**< The fd list this vhost server manages. */
>> +};
>> +
>> +/*********** FROM hw/virtio/vhost-user.c *************************************/
>> +
>> +#define VHOST_MEMORY_MAX_NREGIONS 8
>> +
>> +typedef enum VhostUserRequest {
>> + VHOST_USER_NONE = 0,
>> + VHOST_USER_GET_FEATURES = 1,
>> + VHOST_USER_SET_FEATURES = 2,
>> + VHOST_USER_SET_OWNER = 3,
>> + VHOST_USER_RESET_OWNER = 4,
>> + VHOST_USER_SET_MEM_TABLE = 5,
>> + VHOST_USER_SET_LOG_BASE = 6,
>> + VHOST_USER_SET_LOG_FD = 7,
>> + VHOST_USER_SET_VRING_NUM = 8,
>> + VHOST_USER_SET_VRING_ADDR = 9,
>> + VHOST_USER_SET_VRING_BASE = 10,
>> + VHOST_USER_GET_VRING_BASE = 11,
>> + VHOST_USER_SET_VRING_KICK = 12,
>> + VHOST_USER_SET_VRING_CALL = 13,
>> + VHOST_USER_SET_VRING_ERR = 14,
>> + VHOST_USER_MAX
>> +} VhostUserRequest;
>> +
>> +typedef struct VhostUserMemoryRegion {
>> + uint64_t guest_phys_addr;
>> + uint64_t memory_size;
>> + uint64_t userspace_addr;
>> + uint64_t mmap_offset;
>> +} VhostUserMemoryRegion;
>> +
>> +typedef struct VhostUserMemory {
>> + uint32_t nregions;
>> + uint32_t padding;
>> + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
>> +} VhostUserMemory;
>> +
>> +typedef struct VhostUserMsg {
>> + VhostUserRequest request;
>> +
>> +#define VHOST_USER_VERSION_MASK (0x3)
>> +#define VHOST_USER_REPLY_MASK (0x1 << 2)
>> + uint32_t flags;
>> + uint32_t size; /* the following payload size */
>> + union {
>> +#define VHOST_USER_VRING_IDX_MASK (0xff)
>> +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
>> + uint64_t u64;
>> + struct vhost_vring_state state;
>> + struct vhost_vring_addr addr;
>> + VhostUserMemory memory;
>> + } payload;
>> + int fds[VHOST_MEMORY_MAX_NREGIONS];
>> +} __attribute__((packed)) VhostUserMsg;
>> +
>> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
>> +
>> +/* The version of the protocol we support */
>> +#define VHOST_USER_VERSION (0x1)
>> +
>> +/*****************************************************************************/
>> +#endif
>> diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.c b/lib/librte_vhost/vhost-user/virtio-net-user.c
>> new file mode 100644
>> index 0000000..f38e6cc
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/virtio-net-user.c
>> @@ -0,0 +1,208 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <unistd.h>
>> +#include <sys/mman.h>
>> +
>> +#include <rte_log.h>
>> +
>> +#include "virtio-net-user.h"
>> +#include "vhost-net-user.h"
>> +#include "vhost-net.h"
>> +
>> +extern const struct vhost_net_device_ops *ops;
>> +
>> +#if 0
>> +int
>> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> + unsigned int idx;
>> + struct VhostUserMemory memory = pmsg->payload.memory;
>> + struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
>> + uint64_t mapped_address, base_address = 0, mem_size = 0;
>> +
>> + for (idx = 0; idx < memory.nregions; idx++) {
>> + if (memory.regions[idx].guest_phys_addr == 0)
>> + base_address = memory.regions[idx].userspace_addr;
>> + }
>> + if (base_address == 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "couldn't find the mem region whose gpa is 0.\n");
>> + return -1;
>> + }
>> +
>> + for (idx = 0; idx < memory.nregions; idx++) {
>> + uint64_t size = memory.regions[idx].userspace_addr -
>> + base_address + memory.regions[idx].memory_size;
>> + if (mem_size < size)
>> + mem_size = size;
>> + }
>> +
>> + /*
>> + * here we assume qemu will map only one file for memory allocation,
>> + * we only use fds[0] with offset 0.
>> + */
>> + mapped_address = (uint64_t)(uintptr_t)mmap(NULL, mem_size,
>> + PROT_READ | PROT_WRITE, MAP_SHARED, pmsg->fds[0], 0);
>> +
>> + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
>> + RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
>> + return -1;
>> + }
>> +
>> + for (idx = 0; idx < memory.nregions; idx++) {
>> + regions[idx].guest_phys_address =
>> + memory.regions[idx].guest_phys_addr;
>> + regions[idx].guest_phys_address_end =
>> + memory.regions[idx].guest_phys_addr +
>> + memory.regions[idx].memory_size;
>> + regions[idx].memory_size = memory.regions[idx].memory_size;
>> + regions[idx].userspace_address =
>> + memory.regions[idx].userspace_addr;
>> +
>> + regions[idx].address_offset = mapped_address - base_address +
>> + regions[idx].userspace_address -
>> + regions[idx].guest_phys_address;
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
>> + idx,
>> + (void *)(uintptr_t)regions[idx].guest_phys_address,
>> + (void *)(uintptr_t)regions[idx].userspace_address,
>> + regions[idx].memory_size);
>> + }
>> + ops->set_mem_table(ctx, regions, memory.nregions);
>> + return 0;
>> +}
>> +
>> +#else
>> +
>> +int
>> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> + unsigned int idx;
>> + struct VhostUserMemory memory = pmsg->payload.memory;
>> + struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
>> + uint64_t mapped_address, base_address = 0;
>> +
>> + for (idx = 0; idx < memory.nregions; idx++) {
>> + if (memory.regions[idx].guest_phys_addr == 0)
>> + base_address = memory.regions[idx].userspace_addr;
>> + }
>> + if (base_address == 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "couldn't find the mem region whose gpa is 0.\n");
>> + return -1;
>> + }
>> +
>> +
>> + for (idx = 0; idx < memory.nregions; idx++) {
>> + regions[idx].guest_phys_address =
>> + memory.regions[idx].guest_phys_addr;
>> + regions[idx].guest_phys_address_end =
>> + memory.regions[idx].guest_phys_addr +
>> + memory.regions[idx].memory_size;
>> + regions[idx].memory_size = memory.regions[idx].memory_size;
>> + regions[idx].userspace_address =
>> + memory.regions[idx].userspace_addr;
>> +/*
>> + mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
>> + regions[idx].memory_size,
>> + PROT_READ | PROT_WRITE, MAP_SHARED,
>> + pmsg->fds[idx],
>> + memory.regions[idx].mmap_offset);
>> +*/
>> +
>> +/* This is ugly */
>> + mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
>> + regions[idx].memory_size +
>> + memory.regions[idx].mmap_offset,
>> + PROT_READ | PROT_WRITE, MAP_SHARED,
>> + pmsg->fds[idx],
>> + 0);
>> + printf("mapped to %p\n", (void *)mapped_address);
>> +
>> + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
>> + RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
>> + return -1;
>> + }
>> +
>> +// printf("ret=%d\n", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset + 0x3FFFFFFF) & ~0x3FFFFFFF));
>> +// printf("unaligned ret=%d\n", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset ) ));
>> + mapped_address += memory.regions[idx].mmap_offset;
>> +
>> + regions[idx].address_offset = mapped_address -
>> + regions[idx].guest_phys_address;
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
>> + idx,
>> + (void *)(uintptr_t)regions[idx].guest_phys_address,
>> + (void *)(uintptr_t)regions[idx].userspace_address,
>> + regions[idx].memory_size);
>> + }
>> + ops->set_mem_table(ctx, regions, memory.nregions);
>> + return 0;
>> +}
>> +
>> +
>> +
>> +
>> +#endif
>> +
>> +
>> +void
>> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> + struct vhost_vring_file file;
>> +
>> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
>> + file.fd = pmsg->fds[0];
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "vring call idx:%d file:%d\n", file.index, file.fd);
>> + ops->set_vring_call(ctx, &file);
>> +}
>> +
>> +
>> +void
>> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> + struct vhost_vring_file file;
>> +
>> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
>> + file.fd = pmsg->fds[0];
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "vring kick idx:%d file:%d\n", file.index, file.fd);
>> + ops->set_vring_kick(ctx, &file);
>> +}
>> diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.h b/lib/librte_vhost/vhost-user/virtio-net-user.h
>> new file mode 100644
>> index 0000000..0969376
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/virtio-net-user.h
>> @@ -0,0 +1,11 @@
>> +#ifndef _VIRTIO_NET_USER_H
>> +#define _VIRTIO_NET_USER_H
>> +
>> +#include "vhost-net.h"
>> +#include "vhost-net-user.h"
>> +
>> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
>> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
>> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
>> +
>> +#endif
>> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
>> index ccfd82f..8ff0301 100644
>> --- a/lib/librte_vhost/vhost_rxtx.c
>> +++ b/lib/librte_vhost/vhost_rxtx.c
>> @@ -38,19 +38,14 @@
>> #include <rte_memcpy.h>
>> #include <rte_virtio_net.h>
>>
>> -#include "vhost-net-cdev.h"
>> +#include "vhost-net.h"
>>
>> -#define MAX_PKT_BURST 32
>> +#define VHOST_MAX_PKT_BURST 64
>> +#define VHOST_MAX_MRG_PKT_BURST 64
>>
>> -/**
>> - * This function adds buffers to the virtio devices RX virtqueue. Buffers can
>> - * be received from the physical port or from another virtio device. A packet
>> - * count is returned to indicate the number of packets that are succesfully
>> - * added to the RX queue. This function works when mergeable is disabled.
>> - */
>> -static inline uint32_t __attribute__((always_inline))
>> -virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>> - struct rte_mbuf **pkts, uint32_t count)
>> +
>> +uint32_t
>> +rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count)
>> {
>> struct vhost_virtqueue *vq;
>> struct vring_desc *desc;
>> @@ -59,26 +54,23 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>> struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>> uint64_t buff_addr = 0;
>> uint64_t buff_hdr_addr = 0;
>> - uint32_t head[MAX_PKT_BURST], packet_len = 0;
>> + uint32_t head[VHOST_MAX_PKT_BURST], packet_len = 0;
>> uint32_t head_idx, packet_success = 0;
>> + uint32_t mergeable, mrg_count = 0;
>> uint16_t avail_idx, res_cur_idx;
>> uint16_t res_base_idx, res_end_idx;
>> uint16_t free_entries;
>> uint8_t success = 0;
>>
>> - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
>> + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s()\n", dev->device_fh, __func__);
>> if (unlikely(queue_id != VIRTIO_RXQ)) {
>> LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
>> return 0;
>> }
>>
>> vq = dev->virtqueue[VIRTIO_RXQ];
>> - count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
>> -
>> - /*
>> - * As many data cores may want access to available buffers,
>> - * they need to be reserved.
>> - */
>> + count = (count > VHOST_MAX_PKT_BURST) ? VHOST_MAX_PKT_BURST : count;
>> + /* As many data cores may want access to available buffers, they need to be reserved. */
>> do {
>> res_base_idx = vq->last_used_idx_res;
>> avail_idx = *((volatile uint16_t *)&vq->avail->idx);
>> @@ -93,21 +85,25 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>>
>> res_end_idx = res_base_idx + count;
>> /* vq->last_used_idx_res is atomically updated. */
>> - /* TODO: Allow to disable cmpset if no concurrency in application. */
>> + /* TODO: Allow to disable cmpset if no concurrency in application */
>> success = rte_atomic16_cmpset(&vq->last_used_idx_res,
>> res_base_idx, res_end_idx);
>> + /* If there is contention here and failed, try again. */
>> } while (unlikely(success == 0));
>> res_cur_idx = res_base_idx;
>> LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
>> - dev->device_fh, res_cur_idx, res_end_idx);
>> + dev->device_fh,
>> + res_cur_idx, res_end_idx);
>>
>> /* Prefetch available ring to retrieve indexes. */
>> rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);
>>
>> + /* Check if the VIRTIO_NET_F_MRG_RXBUF feature is enabled. */
>> + mergeable = dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF);
>> +
>> /* Retrieve all of the head indexes first to avoid caching issues. */
>> for (head_idx = 0; head_idx < count; head_idx++)
>> - head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) &
>> - (vq->size - 1)];
>> + head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];
>>
>> /*Prefetch descriptor index. */
>> rte_prefetch0(&vq->desc[head[packet_success]]);
>> @@ -123,46 +119,57 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>> /* Prefetch buffer address. */
>> rte_prefetch0((void *)(uintptr_t)buff_addr);
>>
>> - /* Copy virtio_hdr to packet and increment buffer address */
>> - buff_hdr_addr = buff_addr;
>> - packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
>> -
>> - /*
>> - * If the descriptors are chained the header and data are
>> - * placed in separate buffers.
>> - */
>> - if (desc->flags & VRING_DESC_F_NEXT) {
>> - desc->len = vq->vhost_hlen;
>> - desc = &vq->desc[desc->next];
>> - /* Buffer address translation. */
>> - buff_addr = gpa_to_vva(dev, desc->addr);
>> - desc->len = rte_pktmbuf_data_len(buff);
>> + if (mergeable && (mrg_count != 0)) {
>> + desc->len = packet_len = rte_pktmbuf_data_len(buff);
>> } else {
>> - buff_addr += vq->vhost_hlen;
>> - desc->len = packet_len;
>> + /* Copy virtio_hdr to packet and increment buffer address */
>> + buff_hdr_addr = buff_addr;
>> + packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
>> +
>> + /*
>> + * If the descriptors are chained the header and data are placed in
>> + * separate buffers.
>> + */
>> + if (desc->flags & VRING_DESC_F_NEXT) {
>> + desc->len = vq->vhost_hlen;
>> + desc = &vq->desc[desc->next];
>> + /* Buffer address translation. */
>> + buff_addr = gpa_to_vva(dev, desc->addr);
>> + desc->len = rte_pktmbuf_data_len(buff);
>> + } else {
>> + buff_addr += vq->vhost_hlen;
>> + desc->len = packet_len;
>> + }
>> }
>>
>> + VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, rte_pktmbuf_data_len(buff), 0);
>> +
>> /* Update used ring with desc information */
>> - vq->used->ring[res_cur_idx & (vq->size - 1)].id =
>> - head[packet_success];
>> + vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];
>> vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
>>
>> /* Copy mbuf data to buffer */
>> - /* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
>> - rte_memcpy((void *)(uintptr_t)buff_addr,
>> - rte_pktmbuf_mtod(buff, const void *),
>> - rte_pktmbuf_data_len(buff));
>> - PRINT_PACKET(dev, (uintptr_t)buff_addr,
>> - rte_pktmbuf_data_len(buff), 0);
>> + /* TODO fixme for sg mbuf and the case that desc couldn't hold the mbuf data */
>> + rte_memcpy((void *)(uintptr_t)buff_addr, (const void *)buff->pkt.data, rte_pktmbuf_data_len(buff));
>>
>> res_cur_idx++;
>> packet_success++;
>>
>> - rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
>> - (const void *)&virtio_hdr, vq->vhost_hlen);
>> -
>> - PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
>> -
>> + /* If mergeable is disabled then a header is required per buffer. */
>> + if (!mergeable) {
>> + rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);
>> + VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
>> + } else {
>> + mrg_count++;
>> + /* Merge buffer can only handle so many buffers at a time. Tell the guest if this limit is reached. */
>> + if ((mrg_count == VHOST_MAX_MRG_PKT_BURST) || (res_cur_idx == res_end_idx)) {
>> + virtio_hdr.num_buffers = mrg_count;
>> + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n", dev->device_fh, virtio_hdr.num_buffers);
>> + rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);
>> + VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
>> + mrg_count = 0;
>> + }
>> + }
>> if (res_cur_idx < res_end_idx) {
>> /* Prefetch descriptor index. */
>> rte_prefetch0(&vq->desc[head[packet_success]]);
>> @@ -184,357 +191,18 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>> return count;
>> }
>>
>> -static inline uint32_t __attribute__((always_inline))
>> -copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
>> - uint16_t res_end_idx, struct rte_mbuf *pkt)
>> -{
>> - uint32_t vec_idx = 0;
>> - uint32_t entry_success = 0;
>> - struct vhost_virtqueue *vq;
>> - /* The virtio_hdr is initialised to 0. */
>> - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {
>> - {0, 0, 0, 0, 0, 0}, 0};
>> - uint16_t cur_idx = res_base_idx;
>> - uint64_t vb_addr = 0;
>> - uint64_t vb_hdr_addr = 0;
>> - uint32_t seg_offset = 0;
>> - uint32_t vb_offset = 0;
>> - uint32_t seg_avail;
>> - uint32_t vb_avail;
>> - uint32_t cpy_len, entry_len;
>> -
>> - if (pkt == NULL)
>> - return 0;
>> -
>> - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| "
>> - "End Index %d\n",
>> - dev->device_fh, cur_idx, res_end_idx);
>> -
>> - /*
>> - * Convert from gpa to vva
>> - * (guest physical addr -> vhost virtual addr)
>> - */
>> - vq = dev->virtqueue[VIRTIO_RXQ];
>> - vb_addr =
>> - gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
>> - vb_hdr_addr = vb_addr;
>> -
>> - /* Prefetch buffer address. */
>> - rte_prefetch0((void *)(uintptr_t)vb_addr);
>> -
>> - virtio_hdr.num_buffers = res_end_idx - res_base_idx;
>> -
>> - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
>> - dev->device_fh, virtio_hdr.num_buffers);
>>
>> - rte_memcpy((void *)(uintptr_t)vb_hdr_addr,
>> - (const void *)&virtio_hdr, vq->vhost_hlen);
>> -
>> - PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1);
>> -
>> - seg_avail = rte_pktmbuf_data_len(pkt);
>> - vb_offset = vq->vhost_hlen;
>> - vb_avail =
>> - vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
>> -
>> - entry_len = vq->vhost_hlen;
>> -
>> - if (vb_avail == 0) {
>> - uint32_t desc_idx =
>> - vq->buf_vec[vec_idx].desc_idx;
>> - vq->desc[desc_idx].len = vq->vhost_hlen;
>> -
>> - if ((vq->desc[desc_idx].flags
>> - & VRING_DESC_F_NEXT) == 0) {
>> - /* Update used ring with desc information */
>> - vq->used->ring[cur_idx & (vq->size - 1)].id
>> - = vq->buf_vec[vec_idx].desc_idx;
>> - vq->used->ring[cur_idx & (vq->size - 1)].len
>> - = entry_len;
>> -
>> - entry_len = 0;
>> - cur_idx++;
>> - entry_success++;
>> - }
>> -
>> - vec_idx++;
>> - vb_addr =
>> - gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
>> -
>> - /* Prefetch buffer address. */
>> - rte_prefetch0((void *)(uintptr_t)vb_addr);
>> - vb_offset = 0;
>> - vb_avail = vq->buf_vec[vec_idx].buf_len;
>> - }
>> -
>> - cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -
>> - while (cpy_len > 0) {
>> - /* Copy mbuf data to vring buffer */
>> - rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset),
>> - (const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset),
>> - cpy_len);
>> -
>> - PRINT_PACKET(dev,
>> - (uintptr_t)(vb_addr + vb_offset),
>> - cpy_len, 0);
>> -
>> - seg_offset += cpy_len;
>> - vb_offset += cpy_len;
>> - seg_avail -= cpy_len;
>> - vb_avail -= cpy_len;
>> - entry_len += cpy_len;
>> -
>> - if (seg_avail != 0) {
>> - /*
>> - * The virtio buffer in this vring
>> - * entry reach to its end.
>> - * But the segment doesn't complete.
>> - */
>> - if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &
>> - VRING_DESC_F_NEXT) == 0) {
>> - /* Update used ring with desc information */
>> - vq->used->ring[cur_idx & (vq->size - 1)].id
>> - = vq->buf_vec[vec_idx].desc_idx;
>> - vq->used->ring[cur_idx & (vq->size - 1)].len
>> - = entry_len;
>> - entry_len = 0;
>> - cur_idx++;
>> - entry_success++;
>> - }
>> -
>> - vec_idx++;
>> - vb_addr = gpa_to_vva(dev,
>> - vq->buf_vec[vec_idx].buf_addr);
>> - vb_offset = 0;
>> - vb_avail = vq->buf_vec[vec_idx].buf_len;
>> - cpy_len = RTE_MIN(vb_avail, seg_avail);
>> - } else {
>> - /*
>> - * This current segment complete, need continue to
>> - * check if the whole packet complete or not.
>> - */
>> - pkt = pkt->next;
>> - if (pkt != NULL) {
>> - /*
>> - * There are more segments.
>> - */
>> - if (vb_avail == 0) {
>> - /*
>> - * This current buffer from vring is
>> - * used up, need fetch next buffer
>> - * from buf_vec.
>> - */
>> - uint32_t desc_idx =
>> - vq->buf_vec[vec_idx].desc_idx;
>> - vq->desc[desc_idx].len = vb_offset;
>> -
>> - if ((vq->desc[desc_idx].flags &
>> - VRING_DESC_F_NEXT) == 0) {
>> - uint16_t wrapped_idx =
>> - cur_idx & (vq->size - 1);
>> - /*
>> - * Update used ring with the
>> - * descriptor information
>> - */
>> - vq->used->ring[wrapped_idx].id
>> - = desc_idx;
>> - vq->used->ring[wrapped_idx].len
>> - = entry_len;
>> - entry_success++;
>> - entry_len = 0;
>> - cur_idx++;
>> - }
>> -
>> - /* Get next buffer from buf_vec. */
>> - vec_idx++;
>> - vb_addr = gpa_to_vva(dev,
>> - vq->buf_vec[vec_idx].buf_addr);
>> - vb_avail =
>> - vq->buf_vec[vec_idx].buf_len;
>> - vb_offset = 0;
>> - }
>> -
>> - seg_offset = 0;
>> - seg_avail = rte_pktmbuf_data_len(pkt);
>> - cpy_len = RTE_MIN(vb_avail, seg_avail);
>> - } else {
>> - /*
>> - * This whole packet completes.
>> - */
>> - uint32_t desc_idx =
>> - vq->buf_vec[vec_idx].desc_idx;
>> - vq->desc[desc_idx].len = vb_offset;
>> -
>> - while (vq->desc[desc_idx].flags &
>> - VRING_DESC_F_NEXT) {
>> - desc_idx = vq->desc[desc_idx].next;
>> - vq->desc[desc_idx].len = 0;
>> - }
>> -
>> - /* Update used ring with desc information */
>> - vq->used->ring[cur_idx & (vq->size - 1)].id
>> - = vq->buf_vec[vec_idx].desc_idx;
>> - vq->used->ring[cur_idx & (vq->size - 1)].len
>> - = entry_len;
>> - entry_len = 0;
>> - cur_idx++;
>> - entry_success++;
>> - seg_avail = 0;
>> - cpy_len = RTE_MIN(vb_avail, seg_avail);
>> - }
>> - }
>> - }
>> -
>> - return entry_success;
>> -}
>> -
>> -/*
>> - * This function works for mergeable RX.
>> - */
>> -static inline uint32_t __attribute__((always_inline))
>> -virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
>> - struct rte_mbuf **pkts, uint32_t count)
>> +uint32_t
>> +rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count)
>> {
>> - struct vhost_virtqueue *vq;
>> - uint32_t pkt_idx = 0, entry_success = 0;
>> - uint16_t avail_idx, res_cur_idx;
>> - uint16_t res_base_idx, res_end_idx;
>> - uint8_t success = 0;
>> -
>> - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
>> - dev->device_fh);
>> - if (unlikely(queue_id != VIRTIO_RXQ)) {
>> - LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
>> - }
>> -
>> - vq = dev->virtqueue[VIRTIO_RXQ];
>> - count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
>> -
>> - if (count == 0)
>> - return 0;
>> -
>> - for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
>> - uint32_t secure_len = 0;
>> - uint16_t need_cnt;
>> - uint32_t vec_idx = 0;
>> - uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
>> - uint16_t i, id;
>> -
>> - do {
>> - /*
>> - * As many data cores may want access to available
>> - * buffers, they need to be reserved.
>> - */
>> - res_base_idx = vq->last_used_idx_res;
>> - res_cur_idx = res_base_idx;
>> -
>> - do {
>> - avail_idx = *((volatile uint16_t *)&vq->avail->idx);
>> - if (unlikely(res_cur_idx == avail_idx)) {
>> - LOG_DEBUG(VHOST_DATA,
>> - "(%"PRIu64") Failed "
>> - "to get enough desc from "
>> - "vring\n",
>> - dev->device_fh);
>> - return pkt_idx;
>> - } else {
>> - uint16_t wrapped_idx =
>> - (res_cur_idx) & (vq->size - 1);
>> - uint32_t idx =
>> - vq->avail->ring[wrapped_idx];
>> - uint8_t next_desc;
>> -
>> - do {
>> - next_desc = 0;
>> - secure_len += vq->desc[idx].len;
>> - if (vq->desc[idx].flags &
>> - VRING_DESC_F_NEXT) {
>> - idx = vq->desc[idx].next;
>> - next_desc = 1;
>> - }
>> - } while (next_desc);
>> -
>> - res_cur_idx++;
>> - }
>> - } while (pkt_len > secure_len);
>> -
>> - /* vq->last_used_idx_res is atomically updated. */
>> - success = rte_atomic16_cmpset(&vq->last_used_idx_res,
>> - res_base_idx,
>> - res_cur_idx);
>> - } while (success == 0);
>> -
>> - id = res_base_idx;
>> - need_cnt = res_cur_idx - res_base_idx;
>> -
>> - for (i = 0; i < need_cnt; i++, id++) {
>> - uint16_t wrapped_idx = id & (vq->size - 1);
>> - uint32_t idx = vq->avail->ring[wrapped_idx];
>> - uint8_t next_desc;
>> - do {
>> - next_desc = 0;
>> - vq->buf_vec[vec_idx].buf_addr =
>> - vq->desc[idx].addr;
>> - vq->buf_vec[vec_idx].buf_len =
>> - vq->desc[idx].len;
>> - vq->buf_vec[vec_idx].desc_idx = idx;
>> - vec_idx++;
>> -
>> - if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
>> - idx = vq->desc[idx].next;
>> - next_desc = 1;
>> - }
>> - } while (next_desc);
>> - }
>> -
>> - res_end_idx = res_cur_idx;
>> -
>> - entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
>> - res_end_idx, pkts[pkt_idx]);
>> -
>> - rte_compiler_barrier();
>> -
>> - /*
>> - * Wait until it's our turn to add our buffer
>> - * to the used ring.
>> - */
>> - while (unlikely(vq->last_used_idx != res_base_idx))
>> - rte_pause();
>> -
>> - *(volatile uint16_t *)&vq->used->idx += entry_success;
>> - vq->last_used_idx = res_end_idx;
>> -
>> - /* Kick the guest if necessary. */
>> - if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
>> - eventfd_write((int)vq->kickfd, 1);
>> - }
>> -
>> - return count;
>> -}
>> -
>> -uint16_t
>> -rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>> - struct rte_mbuf **pkts, uint16_t count)
>> -{
>> - if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)))
>> - return virtio_dev_merge_rx(dev, queue_id, pkts, count);
>> - else
>> - return virtio_dev_rx(dev, queue_id, pkts, count);
>> -}
>> -
>> -uint16_t
>> -rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>> - struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
>> -{
>> - struct rte_mbuf *m, *prev;
>> + struct rte_mbuf *mbuf;
>> struct vhost_virtqueue *vq;
>> struct vring_desc *desc;
>> - uint64_t vb_addr = 0;
>> - uint32_t head[MAX_PKT_BURST];
>> + uint64_t buff_addr = 0;
>> + uint32_t head[VHOST_MAX_PKT_BURST];
>> uint32_t used_idx;
>> uint32_t i;
>> - uint16_t free_entries, entry_success = 0;
>> + uint16_t free_entries, packet_success = 0;
>> uint16_t avail_idx;
>>
>> if (unlikely(queue_id != VIRTIO_TXQ)) {
>> @@ -549,8 +217,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>> if (vq->last_used_idx == avail_idx)
>> return 0;
>>
>> - LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__,
>> - dev->device_fh);
>> + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s(%d->%d)\n",
>> + dev->device_fh, __func__, vq->last_used_idx, avail_idx);
>>
>> /* Prefetch available ring to retrieve head indexes. */
>> rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
>> @@ -558,173 +226,68 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>> /*get the number of free entries in the ring*/
>> free_entries = (avail_idx - vq->last_used_idx);
>>
>> - free_entries = RTE_MIN(free_entries, count);
>> + if (free_entries > count)
>> + free_entries = count;
>> /* Limit to MAX_PKT_BURST. */
>> - free_entries = RTE_MIN(free_entries, MAX_PKT_BURST);
>> + if (free_entries > VHOST_MAX_PKT_BURST)
>> + free_entries = VHOST_MAX_PKT_BURST;
>>
>> - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
>> - dev->device_fh, free_entries);
>> + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries);
>> /* Retrieve all of the head indexes first to avoid caching issues. */
>> for (i = 0; i < free_entries; i++)
>> head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];
>>
>> /* Prefetch descriptor index. */
>> - rte_prefetch0(&vq->desc[head[entry_success]]);
>> + rte_prefetch0(&vq->desc[head[packet_success]]);
>> rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
>>
>> - while (entry_success < free_entries) {
>> - uint32_t vb_avail, vb_offset;
>> - uint32_t seg_avail, seg_offset;
>> - uint32_t cpy_len;
>> - uint32_t seg_num = 0;
>> - struct rte_mbuf *cur;
>> - uint8_t alloc_err = 0;
>> -
>> - desc = &vq->desc[head[entry_success]];
>> + while (packet_success < free_entries) {
>> + desc = &vq->desc[head[packet_success]];
>>
>> /* Discard first buffer as it is the virtio header */
>> desc = &vq->desc[desc->next];
>>
>> /* Buffer address translation. */
>> - vb_addr = gpa_to_vva(dev, desc->addr);
>> + buff_addr = gpa_to_vva(dev, desc->addr);
>> /* Prefetch buffer address. */
>> - rte_prefetch0((void *)(uintptr_t)vb_addr);
>> + rte_prefetch0((void *)(uintptr_t)buff_addr);
>>
>> used_idx = vq->last_used_idx & (vq->size - 1);
>>
>> - if (entry_success < (free_entries - 1)) {
>> + if (packet_success < (free_entries - 1)) {
>> /* Prefetch descriptor index. */
>> - rte_prefetch0(&vq->desc[head[entry_success+1]]);
>> + rte_prefetch0(&vq->desc[head[packet_success+1]]);
>> rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);
>> }
>>
>> /* Update used index buffer information. */
>> - vq->used->ring[used_idx].id = head[entry_success];
>> + vq->used->ring[used_idx].id = head[packet_success];
>> vq->used->ring[used_idx].len = 0;
>>
>> - vb_offset = 0;
>> - vb_avail = desc->len;
>> - /* Allocate an mbuf and populate the structure. */
>> - m = rte_pktmbuf_alloc(mbuf_pool);
>> - if (unlikely(m == NULL)) {
>> - RTE_LOG(ERR, VHOST_DATA,
>> - "Failed to allocate memory for mbuf.\n");
>> - return entry_success;
>> + mbuf = rte_pktmbuf_alloc(mbuf_pool);
>> + if (unlikely(mbuf == NULL)) {
>> + RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n");
>> + return packet_success;
>> }
>> - seg_offset = 0;
>> - seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
>> - cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -
>> - PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);
>> -
>> - seg_num++;
>> - cur = m;
>> - prev = m;
>> - while (cpy_len != 0) {
>> - rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset),
>> - (void *)((uintptr_t)(vb_addr + vb_offset)),
>> - cpy_len);
>> -
>> - seg_offset += cpy_len;
>> - vb_offset += cpy_len;
>> - vb_avail -= cpy_len;
>> - seg_avail -= cpy_len;
>> -
>> - if (vb_avail != 0) {
>> - /*
>> - * The segment reachs to its end,
>> - * while the virtio buffer in TX vring has
>> - * more data to be copied.
>> - */
>> - cur->data_len = seg_offset;
>> - m->pkt_len += seg_offset;
>> - /* Allocate mbuf and populate the structure. */
>> - cur = rte_pktmbuf_alloc(mbuf_pool);
>> - if (unlikely(cur == NULL)) {
>> - RTE_LOG(ERR, VHOST_DATA, "Failed to "
>> - "allocate memory for mbuf.\n");
>> - rte_pktmbuf_free(m);
>> - alloc_err = 1;
>> - break;
>> - }
>> -
>> - seg_num++;
>> - prev->next = cur;
>> - prev = cur;
>> - seg_offset = 0;
>> - seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
>> - } else {
>> - if (desc->flags & VRING_DESC_F_NEXT) {
>> - /*
>> - * There are more virtio buffers in
>> - * same vring entry need to be copied.
>> - */
>> - if (seg_avail == 0) {
>> - /*
>> - * The current segment hasn't
>> - * room to accomodate more
>> - * data.
>> - */
>> - cur->data_len = seg_offset;
>> - m->pkt_len += seg_offset;
>> - /*
>> - * Allocate an mbuf and
>> - * populate the structure.
>> - */
>> - cur = rte_pktmbuf_alloc(mbuf_pool);
>> - if (unlikely(cur == NULL)) {
>> - RTE_LOG(ERR,
>> - VHOST_DATA,
>> - "Failed to "
>> - "allocate memory "
>> - "for mbuf\n");
>> - rte_pktmbuf_free(m);
>> - alloc_err = 1;
>> - break;
>> - }
>> - seg_num++;
>> - prev->next = cur;
>> - prev = cur;
>> - seg_offset = 0;
>> - seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
>> - }
>> -
>> - desc = &vq->desc[desc->next];
>> -
>> - /* Buffer address translation. */
>> - vb_addr = gpa_to_vva(dev, desc->addr);
>> - /* Prefetch buffer address. */
>> - rte_prefetch0((void *)(uintptr_t)vb_addr);
>> - vb_offset = 0;
>> - vb_avail = desc->len;
>> -
>> - PRINT_PACKET(dev, (uintptr_t)vb_addr,
>> - desc->len, 0);
>> - } else {
>> - /* The whole packet completes. */
>> - cur->data_len = seg_offset;
>> - m->pkt_len += seg_offset;
>> - vb_avail = 0;
>> - }
>> - }
>> + mbuf->pkt.data_len = desc->len;
>> + mbuf->pkt.pkt_len = mbuf->pkt.data_len;
>>
>> - cpy_len = RTE_MIN(vb_avail, seg_avail);
>> - }
>> + rte_memcpy((void *) mbuf->pkt.data,
>> + (const void *) buff_addr, mbuf->pkt.data_len);
>>
>> - if (unlikely(alloc_err == 1))
>> - break;
>> + pkts[packet_success] = mbuf;
>>
>> - m->nb_segs = seg_num;
>> + VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);
>>
>> - pkts[entry_success] = m;
>> vq->last_used_idx++;
>> - entry_success++;
>> + packet_success++;
>> }
>>
>> rte_compiler_barrier();
>> - vq->used->idx += entry_success;
>> + vq->used->idx += packet_success;
>> /* Kick guest if required. */
>> if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
>> eventfd_write((int)vq->kickfd, 1);
>> - return entry_success;
>> +
>> + return packet_success;
>> }
>> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
>> index 852b6d1..516e743 100644
>> --- a/lib/librte_vhost/virtio-net.c
>> +++ b/lib/librte_vhost/virtio-net.c
>> @@ -31,17 +31,14 @@
>> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> */
>>
>> -#include <dirent.h>
>> -#include <fuse/cuse_lowlevel.h>
>> #include <linux/vhost.h>
>> #include <linux/virtio_net.h>
>> #include <stddef.h>
>> #include <stdint.h>
>> #include <stdlib.h>
>> -#include <sys/eventfd.h>
>> -#include <sys/ioctl.h>
>> #include <sys/mman.h>
>> #include <unistd.h>
>> +#include <assert.h>
>>
>> #include <rte_ethdev.h>
>> #include <rte_log.h>
>> @@ -49,10 +46,8 @@
>> #include <rte_memory.h>
>> #include <rte_virtio_net.h>
>>
>> -#include "vhost-net-cdev.h"
>> -#include "eventfd_link/eventfd_link.h"
>> -
>> -/*
>> +#include "vhost-net.h"
>> +/**
>> * Device linked list structure for configuration.
>> */
>> struct virtio_net_config_ll {
>> @@ -60,38 +55,15 @@ struct virtio_net_config_ll {
>> struct virtio_net_config_ll *next; /* Next dev on linked list.*/
>> };
>>
>> -const char eventfd_cdev[] = "/dev/eventfd-link";
>> -
>> -/* device ops to add/remove device to/from data core. */
>> +/* device ops to add/remove device to data core. */
>> static struct virtio_net_device_ops const *notify_ops;
>> -/* root address of the linked list of managed virtio devices */
>> +/* root address of the linked list in the configuration core. */
>> static struct virtio_net_config_ll *ll_root;
>>
>> /* Features supported by this lib. */
>> -#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
>> - (1ULL << VIRTIO_NET_F_CTRL_RX))
>> +#define VHOST_SUPPORTED_FEATURES (1ULL << VIRTIO_NET_F_MRG_RXBUF)
>> static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
>>
>> -/* Line size for reading maps file. */
>> -static const uint32_t BUFSIZE = PATH_MAX;
>> -
>> -/* Size of prot char array in procmap. */
>> -#define PROT_SZ 5
>> -
>> -/* Number of elements in procmap struct. */
>> -#define PROCMAP_SZ 8
>> -
>> -/* Structure containing information gathered from maps file. */
>> -struct procmap {
>> - uint64_t va_start; /* Start virtual address in file. */
>> - uint64_t len; /* Size of file. */
>> - uint64_t pgoff; /* Not used. */
>> - uint32_t maj; /* Not used. */
>> - uint32_t min; /* Not used. */
>> - uint32_t ino; /* Not used. */
>> - char prot[PROT_SZ]; /* Not used. */
>> - char fname[PATH_MAX]; /* File name. */
>> -};
>>
>> /*
>> * Converts QEMU virtual address to Vhost virtual address. This function is
>> @@ -110,199 +82,15 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
>> if ((qemu_va >= region->userspace_address) &&
>> (qemu_va <= region->userspace_address +
>> region->memory_size)) {
>> - vhost_va = dev->mem->mapped_address + qemu_va -
>> - dev->mem->base_address;
>> + vhost_va = qemu_va + region->guest_phys_address +
>> + region->address_offset -
>> + region->userspace_address;
>> break;
>> }
>> }
>> return vhost_va;
>> }
>>
>> -/*
>> - * Locate the file containing QEMU's memory space and
>> - * map it to our address space.
>> - */
>> -static int
>> -host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
>> - pid_t pid, uint64_t addr)
>> -{
>> - struct dirent *dptr = NULL;
>> - struct procmap procmap;
>> - DIR *dp = NULL;
>> - int fd;
>> - int i;
>> - char memfile[PATH_MAX];
>> - char mapfile[PATH_MAX];
>> - char procdir[PATH_MAX];
>> - char resolved_path[PATH_MAX];
>> - char *path = NULL;
>> - FILE *fmap;
>> - void *map;
>> - uint8_t found = 0;
>> - char line[BUFSIZE];
>> - char dlm[] = "- : ";
>> - char *str, *sp, *in[PROCMAP_SZ];
>> - char *end = NULL;
>> -
>> - /* Path where mem files are located. */
>> - snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
>> - /* Maps file used to locate mem file. */
>> - snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
>> -
>> - fmap = fopen(mapfile, "r");
>> - if (fmap == NULL) {
>> - RTE_LOG(ERR, VHOST_CONFIG,
>> - "(%"PRIu64") Failed to open maps file for pid %d\n",
>> - dev->device_fh, pid);
>> - return -1;
>> - }
>> -
>> - /* Read through maps file until we find out base_address. */
>> - while (fgets(line, BUFSIZE, fmap) != 0) {
>> - str = line;
>> - errno = 0;
>> - /* Split line into fields. */
>> - for (i = 0; i < PROCMAP_SZ; i++) {
>> - in[i] = strtok_r(str, &dlm[i], &sp);
>> - if ((in[i] == NULL) || (errno != 0)) {
>> - fclose(fmap);
>> - return -1;
>> - }
>> - str = NULL;
>> - }
>> -
>> - /* Convert/Copy each field as needed. */
>> - procmap.va_start = strtoull(in[0], &end, 16);
>> - if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
>> - (errno != 0)) {
>> - fclose(fmap);
>> - return -1;
>> - }
>> -
>> - procmap.len = strtoull(in[1], &end, 16);
>> - if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
>> - (errno != 0)) {
>> - fclose(fmap);
>> - return -1;
>> - }
>> -
>> - procmap.pgoff = strtoull(in[3], &end, 16);
>> - if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
>> - (errno != 0)) {
>> - fclose(fmap);
>> - return -1;
>> - }
>> -
>> - procmap.maj = strtoul(in[4], &end, 16);
>> - if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
>> - (errno != 0)) {
>> - fclose(fmap);
>> - return -1;
>> - }
>> -
>> - procmap.min = strtoul(in[5], &end, 16);
>> - if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
>> - (errno != 0)) {
>> - fclose(fmap);
>> - return -1;
>> - }
>> -
>> - procmap.ino = strtoul(in[6], &end, 16);
>> - if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
>> - (errno != 0)) {
>> - fclose(fmap);
>> - return -1;
>> - }
>> -
>> - memcpy(&procmap.prot, in[2], PROT_SZ);
>> - memcpy(&procmap.fname, in[7], PATH_MAX);
>> -
>> - if (procmap.va_start == addr) {
>> - procmap.len = procmap.len - procmap.va_start;
>> - found = 1;
>> - break;
>> - }
>> - }
>> - fclose(fmap);
>> -
>> - if (!found) {
>> - RTE_LOG(ERR, VHOST_CONFIG,
>> - "(%"PRIu64") Failed to find memory file in pid %d maps file\n",
>> - dev->device_fh, pid);
>> - return -1;
>> - }
>> -
>> - /* Find the guest memory file among the process fds. */
>> - dp = opendir(procdir);
>> - if (dp == NULL) {
>> - RTE_LOG(ERR, VHOST_CONFIG,
>> - "(%"PRIu64") Cannot open pid %d process directory\n",
>> - dev->device_fh, pid);
>> - return -1;
>> - }
>> -
>> - found = 0;
>> -
>> - /* Read the fd directory contents. */
>> - while (NULL != (dptr = readdir(dp))) {
>> - snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
>> - pid, dptr->d_name);
>> - path = realpath(memfile, resolved_path);
>> - if ((path == NULL) && (strlen(resolved_path) == 0)) {
>> - RTE_LOG(ERR, VHOST_CONFIG,
>> - "(%"PRIu64") Failed to resolve fd directory\n",
>> - dev->device_fh);
>> - closedir(dp);
>> - return -1;
>> - }
>> - if (strncmp(resolved_path, procmap.fname,
>> - strnlen(procmap.fname, PATH_MAX)) == 0) {
>> - found = 1;
>> - break;
>> - }
>> - }
>> -
>> - closedir(dp);
>> -
>> - if (found == 0) {
>> - RTE_LOG(ERR, VHOST_CONFIG,
>> - "(%"PRIu64") Failed to find memory file for pid %d\n",
>> - dev->device_fh, pid);
>> - return -1;
>> - }
>> - /* Open the shared memory file and map the memory into this process. */
>> - fd = open(memfile, O_RDWR);
>> -
>> - if (fd == -1) {
>> - RTE_LOG(ERR, VHOST_CONFIG,
>> - "(%"PRIu64") Failed to open %s for pid %d\n",
>> - dev->device_fh, memfile, pid);
>> - return -1;
>> - }
>> -
>> - map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
>> - MAP_POPULATE|MAP_SHARED, fd, 0);
>> - close(fd);
>> -
>> - if (map == MAP_FAILED) {
>> - RTE_LOG(ERR, VHOST_CONFIG,
>> - "(%"PRIu64") Error mapping the file %s for pid %d\n",
>> - dev->device_fh, memfile, pid);
>> - return -1;
>> - }
>> -
>> - /* Store the memory address and size in the device data structure */
>> - mem->mapped_address = (uint64_t)(uintptr_t)map;
>> - mem->mapped_size = procmap.len;
>> -
>> - LOG_DEBUG(VHOST_CONFIG,
>> - "(%"PRIu64") Mem File: %s->%s - Size: %llu - VA: %p\n",
>> - dev->device_fh,
>> - memfile, resolved_path,
>> - (unsigned long long)mem->mapped_size, map);
>> -
>> - return 0;
>> -}
>>
>> /*
>> * Retrieves an entry from the devices configuration linked list.
>> @@ -376,7 +164,7 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
>> }
>>
>> }
>> -
>> +/*TODO dpdk alloc/free if possible */
>> /*
>> * Unmap any memory, close any file descriptors and
>> * free any memory owned by a device.
>> @@ -389,16 +177,17 @@ cleanup_device(struct virtio_net *dev)
>> munmap((void *)(uintptr_t)dev->mem->mapped_address,
>> (size_t)dev->mem->mapped_size);
>> free(dev->mem);
>> + dev->mem = NULL;
>> }
>>
>> /* Close any event notifiers opened by device. */
>> - if (dev->virtqueue[VIRTIO_RXQ]->callfd)
>> + if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
>> close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
>> - if (dev->virtqueue[VIRTIO_RXQ]->kickfd)
>> + if (dev->virtqueue[VIRTIO_RXQ]->kickfd > 0)
>> close((int)dev->virtqueue[VIRTIO_RXQ]->kickfd);
>> - if (dev->virtqueue[VIRTIO_TXQ]->callfd)
>> + if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
>> close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
>> - if (dev->virtqueue[VIRTIO_TXQ]->kickfd)
>> + if (dev->virtqueue[VIRTIO_TXQ]->kickfd > 0)
>> close((int)dev->virtqueue[VIRTIO_TXQ]->kickfd);
>> }
>>
>> @@ -522,8 +311,8 @@ new_device(struct vhost_device_ctx ctx)
>> }
>>
>> /*
>> - * Function is called from the CUSE release function. This function will
>> - * cleanup the device and remove it from device configuration linked list.
>> + * Function is called from the CUSE release function. This function will cleanup
>> + * the device and remove it from device configuration linked list.
>> */
>> static void
>> destroy_device(struct vhost_device_ctx ctx)
>> @@ -569,6 +358,7 @@ set_owner(struct vhost_device_ctx ctx)
>> return -1;
>>
>> return 0;
>> + /* TODO check ctx.fh is meaningfull here */
>> }
>>
>> /*
>> @@ -651,14 +441,12 @@ set_features(struct vhost_device_ctx ctx, uint64_t *pu)
>> * This includes storing offsets used to translate buffer addresses.
>> */
>> static int
>> -set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
>> - uint32_t nregions)
>> +set_mem_table(struct vhost_device_ctx ctx,
>> + const struct virtio_memory_regions *regions, uint32_t nregions)
>> {
>> struct virtio_net *dev;
>> - struct vhost_memory_region *mem_regions;
>> struct virtio_memory *mem;
>> - uint64_t size = offsetof(struct vhost_memory, regions);
>> - uint32_t regionidx, valid_regions;
>> + uint32_t regionidx;
>>
>> dev = get_device(ctx);
>> if (dev == NULL)
>> @@ -682,107 +470,24 @@ set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
>>
>> mem->nregions = nregions;
>>
>> - mem_regions = (void *)(uintptr_t)
>> - ((uint64_t)(uintptr_t)mem_regions_addr + size);
>> -
>> for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
>> /* Populate the region structure for each region. */
>> - mem->regions[regionidx].guest_phys_address =
>> - mem_regions[regionidx].guest_phys_addr;
>> - mem->regions[regionidx].guest_phys_address_end =
>> - mem->regions[regionidx].guest_phys_address +
>> - mem_regions[regionidx].memory_size;
>> - mem->regions[regionidx].memory_size =
>> - mem_regions[regionidx].memory_size;
>> - mem->regions[regionidx].userspace_address =
>> - mem_regions[regionidx].userspace_addr;
>> -
>> - LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n", dev->device_fh,
>> - regionidx,
>> - (void *)(uintptr_t)mem->regions[regionidx].guest_phys_address,
>> - (void *)(uintptr_t)mem->regions[regionidx].userspace_address,
>> - mem->regions[regionidx].memory_size);
>> -
>> - /*set the base address mapping*/
>> + mem->regions[regionidx] = regions[regionidx];
>> if (mem->regions[regionidx].guest_phys_address == 0x0) {
>> mem->base_address =
>> mem->regions[regionidx].userspace_address;
>> - /* Map VM memory file */
>> - if (host_memory_map(dev, mem, ctx.pid,
>> - mem->base_address) != 0) {
>> - free(mem);
>> - return -1;
>> - }
>> + mem->mapped_address =
>> + mem->regions[regionidx].address_offset;
>> }
>> }
>>
>> - /* Check that we have a valid base address. */
>> - if (mem->base_address == 0) {
>> - RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Failed to find base address of qemu memory file.\n", dev->device_fh);
>> - free(mem);
>> - return -1;
>> - }
>> -
>> - /*
>> - * Check if all of our regions have valid mappings.
>> - * Usually one does not exist in the QEMU memory file.
>> - */
>> - valid_regions = mem->nregions;
>> - for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
>> - if ((mem->regions[regionidx].userspace_address <
>> - mem->base_address) ||
>> - (mem->regions[regionidx].userspace_address >
>> - (mem->base_address + mem->mapped_size)))
>> - valid_regions--;
>> - }
>> -
>> - /*
>> - * If a region does not have a valid mapping,
>> - * we rebuild our memory struct to contain only valid entries.
>> - */
>> - if (valid_regions != mem->nregions) {
>> - LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") Not all memory regions exist in the QEMU mem file. Re-populating mem structure\n",
>> - dev->device_fh);
>> -
>> - /*
>> - * Re-populate the memory structure with only valid regions.
>> - * Invalid regions are over-written with memmove.
>> - */
>> - valid_regions = 0;
>> -
>> - for (regionidx = mem->nregions; 0 != regionidx--;) {
>> - if ((mem->regions[regionidx].userspace_address <
>> - mem->base_address) ||
>> - (mem->regions[regionidx].userspace_address >
>> - (mem->base_address + mem->mapped_size))) {
>> - memmove(&mem->regions[regionidx],
>> - &mem->regions[regionidx + 1],
>> - sizeof(struct virtio_memory_regions) *
>> - valid_regions);
>> - } else {
>> - valid_regions++;
>> - }
>> - }
>> - }
>> - mem->nregions = valid_regions;
>> + /*TODO addback the logic that remove invalid memory regions */
>> dev->mem = mem;
>>
>> - /*
>> - * Calculate the address offset for each region.
>> - * This offset is used to identify the vhost virtual address
>> - * corresponding to a QEMU guest physical address.
>> - */
>> - for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
>> - dev->mem->regions[regionidx].address_offset =
>> - dev->mem->regions[regionidx].userspace_address -
>> - dev->mem->base_address +
>> - dev->mem->mapped_address -
>> - dev->mem->regions[regionidx].guest_phys_address;
>> -
>> - }
>> return 0;
>> }
>>
>> +
>> /*
>> * Called from CUSE IOCTL: VHOST_SET_VRING_NUM
>> * The virtio device sends us the size of the descriptor ring.
>> @@ -896,38 +601,62 @@ get_vring_base(struct vhost_device_ctx ctx, uint32_t index,
>> /* State->index refers to the queue index. The txq is 1, rxq is 0. */
>> state->num = dev->virtqueue[state->index]->last_used_idx;
>>
>> - return 0;
>> -}
>> + if (dev->flags & VIRTIO_DEV_RUNNING) {
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "get_vring_base message is for release\n");
>> + notify_ops->destroy_device(dev);
>> + /*
>> + * sync call.
>> + * when it returns, it means it si removed from data core.
>> + */
>> + }
>> + /* TODO fix all munmap */
>> + if (dev->mem) {
>> + munmap((void *)(uintptr_t)dev->mem->mapped_address,
>> + (size_t)dev->mem->mapped_size);
>> + free(dev->mem);
>> + dev->mem = NULL;
>> + }
>>
>> -/*
>> - * This function uses the eventfd_link kernel module to copy an eventfd file
>> - * descriptor provided by QEMU in to our process space.
>> - */
>> -static int
>> -eventfd_copy(struct virtio_net *dev, struct eventfd_copy *eventfd_copy)
>> -{
>> - int eventfd_link, ret;
>>
>> - /* Open the character device to the kernel module. */
>> - eventfd_link = open(eventfd_cdev, O_RDWR);
>> - if (eventfd_link < 0) {
>> - RTE_LOG(ERR, VHOST_CONFIG,
>> - "(%"PRIu64") eventfd_link module is not loaded\n",
>> - dev->device_fh);
>> - return -1;
>> - }
>> + if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
>> + close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
>> + dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
>> + if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
>> + close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
>> + dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
>> + /* We don't cleanup callfd here as we willn't get CALLFD again */
>> +
>> + dev->virtqueue[VIRTIO_RXQ]->desc = NULL;
>> + dev->virtqueue[VIRTIO_RXQ]->avail = NULL;
>> + dev->virtqueue[VIRTIO_RXQ]->used = NULL;
>> + dev->virtqueue[VIRTIO_RXQ]->last_used_idx = 0;
>> + dev->virtqueue[VIRTIO_RXQ]->last_used_idx_res = 0;
>> +
>> + dev->virtqueue[VIRTIO_TXQ]->desc = NULL;
>> + dev->virtqueue[VIRTIO_TXQ]->avail = NULL;
>> + dev->virtqueue[VIRTIO_TXQ]->used = NULL;
>> + dev->virtqueue[VIRTIO_TXQ]->last_used_idx = 0;
>> + dev->virtqueue[VIRTIO_TXQ]->last_used_idx_res = 0;
>>
>> - /* Call the IOCTL to copy the eventfd. */
>> - ret = ioctl(eventfd_link, EVENTFD_COPY, eventfd_copy);
>> - close(eventfd_link);
>>
>> - if (ret < 0) {
>> - RTE_LOG(ERR, VHOST_CONFIG,
>> - "(%"PRIu64") EVENTFD_COPY ioctl failed\n",
>> - dev->device_fh);
>> - return -1;
>> - }
>> + return 0;
>> +}
>>
>> +static int
>> +virtio_is_ready(struct virtio_net *dev, int index)
>> +{
>> + struct vhost_virtqueue *vq1, *vq2;
>> + /* mq support in future.*/
>> + vq1 = dev->virtqueue[index];
>> + vq2 = dev->virtqueue[index ^ 1];
>> + if (vq1 && vq2 && vq1->desc && vq2->desc &&
>> + (vq1->kickfd > 0) && (vq1->callfd > 0) &&
>> + (vq2->kickfd > 0) && (vq2->callfd > 0)) {
>> + LOG_DEBUG(VHOST_CONFIG, "virtio is ready for processing.\n");
>> + return 1;
>> + }
>> + LOG_DEBUG(VHOST_CONFIG, "virtio isn't ready for processing.\n");
>> return 0;
>> }
>>
>> @@ -940,7 +669,6 @@ static int
>> set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>> {
>> struct virtio_net *dev;
>> - struct eventfd_copy eventfd_kick;
>> struct vhost_virtqueue *vq;
>>
>> dev = get_device(ctx);
>> @@ -953,14 +681,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>> if (vq->kickfd)
>> close((int)vq->kickfd);
>>
>> - /* Populate the eventfd_copy structure and call eventfd_copy. */
>> - vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>> - eventfd_kick.source_fd = vq->kickfd;
>> - eventfd_kick.target_fd = file->fd;
>> - eventfd_kick.target_pid = ctx.pid;
>> -
>> - if (eventfd_copy(dev, &eventfd_kick))
>> - return -1;
>> + vq->kickfd = file->fd;
>>
>> return 0;
>> }
>> @@ -974,7 +695,6 @@ static int
>> set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>> {
>> struct virtio_net *dev;
>> - struct eventfd_copy eventfd_call;
>> struct vhost_virtqueue *vq;
>>
>> dev = get_device(ctx);
>> @@ -986,16 +706,11 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>>
>> if (vq->callfd)
>> close((int)vq->callfd);
>> + vq->callfd = file->fd;
>>
>> - /* Populate the eventfd_copy structure and call eventfd_copy. */
>> - vq->callfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>> - eventfd_call.source_fd = vq->callfd;
>> - eventfd_call.target_fd = file->fd;
>> - eventfd_call.target_pid = ctx.pid;
>> -
>> - if (eventfd_copy(dev, &eventfd_call))
>> - return -1;
>> -
>> + if (virtio_is_ready(dev, file->index) &&
>> + !(dev->flags & VIRTIO_DEV_RUNNING))
>> + notify_ops->new_device(dev);
>> return 0;
>> }
>>
>> @@ -1024,6 +739,7 @@ set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>> * If the device isn't already running and both backend fds are set,
>> * we add the device.
>> */
>> + LOG_DEBUG(VHOST_CONFIG, "%s %d\n", __func__, file->fd);
>> if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
>> if (((int)dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED) &&
>> ((int)dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED))
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2014-11-17 6:01 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-11-15 1:14 [dpdk-dev] [PATCH RFC] lib/librte_vhost: vhost-user Huawei Xie
2014-11-17 6:04 ` Tetsuya Mukawa
2014-11-17 6:11 ` Tetsuya Mukawa
2014-11-17 6:06 ` [dpdk-dev] [RFC PATCH] lib/librte_vhost: cleanup white spaces, tabs and indents Tetsuya Mukawa
2014-11-17 6:07 ` [dpdk-dev] [RFC PATCH 1/2] lib/librte_vhost: change macro name of include guard Tetsuya Mukawa
2014-11-17 6:07 ` [dpdk-dev] [RFC PATCH 2/2] lib/librte_vhost: Add device abstraction layer Tetsuya Mukawa
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).