* [dpdk-dev] [PATCH RFC 4/4] examples/vdpa: add virtio-net PCI device driver
2017-12-29 18:04 [dpdk-dev] [PATCH RFC 0/4] examples/vdpa: add virtio-net PCI device driver Xiao Wang
` (2 preceding siblings ...)
2017-12-29 18:05 ` [dpdk-dev] [PATCH RFC 3/4] vhost: get all callfd before setting datapath Xiao Wang
@ 2017-12-29 18:05 ` Xiao Wang
3 siblings, 0 replies; 5+ messages in thread
From: Xiao Wang @ 2017-12-29 18:05 UTC (permalink / raw)
To: dev
Cc: zhihong.wang, jianfeng.tan, tiwei.bie, maxime.coquelin, yliu,
cunming.liang, dan.daly, remy.horton, mohammad.abdul.awal,
Xiao Wang
This sample shows an instance of vDPA device driver based on vDPA
lib, this driver uses a standard virtio-net PCI device as vDPA
device, which can serve as a backend for a virtio-net pci device
in nested VM.
The key driver ops implemented are:
* vdpa_virtio_eng_init
Prepare a resource pool to be used as vDPA device for a engine.
* vdpa_virtio_eng_uninit
Reset the vDPA resource pool for a engine.
* vdpa_virtio_dev_init
Allocate a device for corresponding vhost socket.
* vdpa_virtio_dev_uninit
Free a device that is previously allocated.
* vdpa_virtio_dev_conf
With the guest virtio information recorded in virtio_net structure,
driver configures device and IOMMU to set up vhost datapath, which
includes: vring operation, VFIO interrupt, kick relay.
* vdpa_virtio_dev_close
Unset the stuff that are configured in dev_conf.
* device capability reporting, e.g. queue number, features.
Below are setup steps for your reference:
1. Make sure your kernnel vhost module and QEMU support vIOMMU.
- OS: CentOS 7.4
- QEMU: 2.10.1
- Guest OS: CentOS 7.2
- Nested VM OS: CentOS 7.2
2. enable VT-x feature for vCPU in VM.
modprobe kvm_intel nested=1
3. Start a VM with a virtio-net-pci device.
./qemu-2.10.1/x86_64-softmmu/qemu-system-x86_64 -enable-kvm -cpu host \
<snip>
-machine q35 \
-device intel-iommu \
-netdev tap,id=mytap,ifname=vdpa,vhostforce=on \
-device virtio-net-pci,netdev=mytap,mac=00:aa:bb:cc:dd:ee,\
disable-modern=off,disable-legacy=on,iommu_platform=on \
4. Bind VFIO-pci to virtio_net_pci device
a) login to VM;
b) modprobe vfio-pci
c) rmmod vfio_iommu_type1
d) modprobe vfio_iommu_type1 allow_unsafe_interrupts=1
e) ./usertools/dpdk-devbind.py -b vfio-pci 00:03.0
5. Start vDPA sample
Based on DPDK 17.11 and the vDPA RFC patch, apply this patch set.
Sample compilation is just like the other DPDK samples.
./examples/vdpa/build/vdpa -c 0x6 -n 4 --socket-mem 512 --no-pci -- \
--bdf 0000:00:03.0 --devcnt 1 --engine vdpa_virtio_net \
--iface /tmp/vhost-user- --queue 1
6. Start nested VM
./qemu-2.10.1/x86_64-softmmu/qemu-system-x86_64 -cpu host -enable-kvm \
<snip>
-mem-prealloc \
-chardev socket,id=char0,path=/tmp/vhost-user-0 \
-netdev type=vhost-user,id=vdpa,chardev=char0,vhostforce \
-device virtio-net-pci,netdev=vdpa,mac=00:aa:bb:cc:dd:ee \
7. Login the nested VM, and verify the virtio in nested VM can communicate
with tap device on host.
Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
examples/vdpa/Makefile | 59 ++
examples/vdpa/main.c | 321 ++++++++++
examples/vdpa/vdpa_virtio_net.c | 1274 +++++++++++++++++++++++++++++++++++++++
examples/vdpa/vdpa_virtio_net.h | 144 +++++
4 files changed, 1798 insertions(+)
create mode 100644 examples/vdpa/Makefile
create mode 100644 examples/vdpa/main.c
create mode 100644 examples/vdpa/vdpa_virtio_net.c
create mode 100644 examples/vdpa/vdpa_virtio_net.h
diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
new file mode 100644
index 0000000..6571a05
--- /dev/null
+++ b/examples/vdpa/Makefile
@@ -0,0 +1,59 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = vdpa
+
+# all source are stored in SRCS-y
+SRCS-y := main.c vdpa_virtio_net.c
+
+CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -D_GNU_SOURCE
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
new file mode 100644
index 0000000..3cf6c78
--- /dev/null
+++ b/examples/vdpa/main.c
@@ -0,0 +1,321 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <getopt.h>
+#include <signal.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vhost.h>
+#include <rte_vdpa.h>
+#include "vdpa_virtio_net.h"
+
+#define MAX_PATH_LEN 128
+#define MAX_VDPA_SAMPLE_PORTS 8
+
+struct vdpa_port {
+ char ifname[MAX_PATH_LEN];
+ int eid;
+ int did;
+ int vid;
+};
+
+struct vdpa_port vports[MAX_VDPA_SAMPLE_PORTS];
+struct rte_vdpa_eng_id dev_id;
+char engine[MAX_PATH_LEN];
+char iface[MAX_PATH_LEN];
+int engid;
+int queue;
+int devcnt;
+
+static int
+get_unsigned(const char *str, int base)
+{
+ unsigned long num;
+ char *end = NULL;
+
+ errno = 0;
+ num = strtoul(str, &end, base);
+ if ((str[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
+ return -1;
+
+ return num;
+}
+
+static int
+parse_args(int argc, char **argv)
+{
+ static const char *short_option = "";
+ static struct option long_option[] = {
+ {"bdf", required_argument, NULL, 0},
+ {"engine", required_argument, NULL, 0},
+ {"queue", required_argument, NULL, 0},
+ {"devcnt", required_argument, NULL, 0},
+ {"iface", required_argument, NULL, 0},
+ {NULL, 0, 0, 0},
+ };
+ char str[MAX_PATH_LEN];
+ int opt, idx;
+ int num[4];
+ int i, j;
+
+ while ((opt = getopt_long(argc, argv, short_option, long_option, &idx))
+ != EOF) {
+ switch (opt) {
+ case 0:
+ if (strncmp(long_option[idx].name, "bdf",
+ MAX_PATH_LEN) == 0) {
+ strcpy(str, optarg);
+ memset(num, 0, 4 * sizeof(num[0]));
+ i = strlen(str) - 1;
+ j = 3;
+ while (i > 0 && j >= 0) {
+ while ((str[i - 1] != ':'
+ && str[i - 1] != '.')
+ && i > 0)
+ i--;
+ num[j--] = get_unsigned(&str[i], 16);
+ i--;
+ if (i >= 0)
+ str[i] = '\0';
+ }
+ dev_id.pci_addr.domain = num[0];
+ dev_id.pci_addr.bus = num[1];
+ dev_id.pci_addr.devid = num[2];
+ dev_id.pci_addr.function = num[3];
+ printf("bdf %04x:%02x:%02x.%02x\n",
+ dev_id.pci_addr.domain,
+ dev_id.pci_addr.bus,
+ dev_id.pci_addr.devid,
+ dev_id.pci_addr.function);
+ } else if (strncmp(long_option[idx].name, "queue",
+ MAX_PATH_LEN) == 0) {
+ queue = get_unsigned(optarg, 10);
+ printf("queue %d\n", queue);
+ } else if (strncmp(long_option[idx].name, "devcnt",
+ MAX_PATH_LEN) == 0) {
+ devcnt = get_unsigned(optarg, 10);
+ printf("devcnt %d\n", devcnt);
+ } else if (strncmp(long_option[idx].name, "engine",
+ MAX_PATH_LEN) == 0) {
+ strcpy(engine, optarg);
+ printf("engine %s\n", engine);
+ } else if (strncmp(long_option[idx].name, "iface",
+ MAX_PATH_LEN) == 0) {
+ strcpy(iface, optarg);
+ printf("iface %s\n", iface);
+ }
+ break;
+ default:
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+register_engine(void)
+{
+ if (strlen(engine) <= 0)
+ return -1;
+
+ engid = rte_vdpa_register_engine(engine, &dev_id);
+
+ return engid;
+}
+
+static int
+unregister_engine(void)
+{
+ if (engid < 0)
+ return -1;
+
+ engid = rte_vdpa_unregister_engine(engid);
+
+ return engid;
+}
+
+static int
+init(void)
+{
+ devcnt = MAX_VDPA_SAMPLE_PORTS;
+ engid = -1;
+ queue = 1;
+ memset(&dev_id, 0, sizeof(dev_id));
+ memset(engine, 0, MAX_PATH_LEN * sizeof(engine[0]));
+ memset(iface, 0, MAX_PATH_LEN * sizeof(iface[0]));
+
+ return 0;
+}
+
+static void
+sigint_handler(__rte_unused int signum)
+{
+ exit(0);
+}
+
+static int
+new_device(int vid)
+{
+ char ifname[MAX_PATH_LEN];
+ int i;
+
+ rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+ for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++) {
+ if (strcmp(ifname, vports[i].ifname) == 0) {
+ vports[i].vid = vid;
+ break;
+ }
+ }
+
+ if (i >= MAX_VDPA_SAMPLE_PORTS)
+ return -1;
+
+ rte_vhost_set_vdpa_eid(vid, vports[i].eid);
+ rte_vhost_set_vdpa_did(vid, vports[i].did);
+
+ return 0;
+}
+
+static void
+destroy_device(int vid)
+{
+ int i;
+
+ for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++)
+ if (vid == vports[i].vid) {
+ vports[i].vid = -1;
+ break;
+ }
+}
+
+static const struct vhost_device_ops vdpa_devops = {
+ .new_device = new_device,
+ .destroy_device = destroy_device,
+ .vring_state_changed = NULL,
+ .features_changed = NULL,
+ .new_connection = NULL,
+ .destroy_connection = NULL,
+};
+
+int
+main(int argc, char *argv[])
+{
+ char ifname[MAX_PATH_LEN];
+ char ch;
+ int did, ret, i;
+ uint64_t flags = 0;
+
+ signal(SIGINT, sigint_handler);
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "eal init failed\n");
+
+ argc -= ret;
+ argv += ret;
+ ret = init();
+ if (ret)
+ rte_exit(EXIT_FAILURE, "app init failed\n");
+
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "invalid argument\n");
+
+ ret = register_engine();
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "register engine failed\n");
+
+ if (devcnt > vdpa_virtio_get_device_num(engid))
+ rte_exit(EXIT_FAILURE, "not enough devices in engine\n");
+
+ for (i = 0; i < devcnt; i++) {
+ sprintf(ifname, "%s%d", iface, i);
+ /* for vdpa devices, need to reserve resource via driver */
+ did = vdpa_virtio_dev_init(engid, ifname);
+ vports[i].eid = engid;
+ vports[i].did = did;
+ strcpy(vports[i].ifname, ifname);
+
+ ret = rte_vhost_driver_register(ifname, flags);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE,
+ "register driver failed: %s\n",
+ ifname);
+
+ rte_vhost_driver_callback_register(ifname, &vdpa_devops);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE,
+ "register driver ops failed: %s\n",
+ ifname);
+ /* for vdpa devices, need to set capabilities via vhost lib */
+ rte_vhost_driver_set_queue_num(ifname,
+ RTE_MIN(vdpa_virtio_get_queue_num(engid, did),
+ queue));
+ rte_vhost_driver_set_features(ifname,
+ vdpa_virtio_get_features(engid, did));
+ rte_vhost_driver_set_protocol_features(ifname,
+ vdpa_virtio_get_protocol_features(engid, did));
+
+ if (rte_vhost_driver_start(ifname) < 0)
+ rte_exit(EXIT_FAILURE,
+ "start vhost driver failed: %s\n",
+ ifname);
+
+ /* for vdpa devices, need to start device via driver */
+ vdpa_virtio_dev_start(engid, did);
+ }
+
+ printf("enter \'q\' to quit\n");
+ while (scanf("%c", &ch)) {
+ if (ch == 'q')
+ break;
+ while (ch != '\n')
+ scanf("%c", &ch);
+ printf("enter \'q\' to quit\n");
+ }
+
+ /* for vdpa devices, need to free resources via driver */
+ for (i = 0; i < devcnt; i++) {
+ vdpa_virtio_dev_stop(vports[i].eid, vports[i].did);
+ vdpa_virtio_dev_uninit(vports[i].eid, vports[i].did);
+ }
+
+ ret = unregister_engine();
+
+ return ret;
+}
diff --git a/examples/vdpa/vdpa_virtio_net.c b/examples/vdpa/vdpa_virtio_net.c
new file mode 100644
index 0000000..62ab797
--- /dev/null
+++ b/examples/vdpa/vdpa_virtio_net.c
@@ -0,0 +1,1274 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/syscall.h>
+#include <linux/pci_regs.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <rte_log.h>
+#include <rte_ethdev.h>
+#include <rte_io.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_cycles.h>
+#include "vdpa_virtio_net.h"
+
+#define True 1
+#define False 0
+
+#define PCI_CAPABILITY_LIST 0x34
+#define PCI_CAP_ID_VNDR 0x09
+
+/*
+ * Maximum number of virtqueues per device.
+ */
+#define VIRTIO_MAX_VIRTQUEUES 1
+
+/* Common configuration */
+#define VIRTIO_PCI_CAP_COMMON_CFG 1
+/* Notifications */
+#define VIRTIO_PCI_CAP_NOTIFY_CFG 2
+/* ISR Status */
+#define VIRTIO_PCI_CAP_ISR_CFG 3
+/* Device specific configuration */
+#define VIRTIO_PCI_CAP_DEVICE_CFG 4
+/* PCI configuration access */
+#define VIRTIO_PCI_CAP_PCI_CFG 5
+
+/* The feature bitmap for virtio net */
+#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */
+#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
+#define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO w/ ECN in. */
+#define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */
+#define VIRTIO_NET_F_STATUS 16 /* virtio_net_config.status available */
+#define VIRTIO_NET_F_CTRL_VQ 17 /* Control channel available */
+#define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */
+#define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */
+#define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */
+#define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce device on the network */
+#define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow Steering */
+#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */
+
+#define VIRTIO_F_NOTIFY_ON_EMPTY 24
+
+#define VIRTIO_F_ANY_LAYOUT 27
+
+/* We support indirect buffer descriptors */
+#define VIRTIO_RING_F_INDIRECT_DESC 28
+
+#define VIRTIO_F_VERSION_1 32
+#define VIRTIO_F_IOMMU_PLATFORM 33
+
+/*
+ * Some VirtIO feature bits (currently bits 28 through 31) are
+ * reserved for the transport being used (eg. virtio_ring), the
+ * rest are per-device feature bits.
+ */
+#define VIRTIO_TRANSPORT_F_START 28
+#define VIRTIO_NET_VIRTIO_TRANSPORT_F_END 34
+
+/*
+ * The Guest publishes the used index for which it expects an interrupt
+ * at the end of the avail ring.
+ * The Host publishes the avail index for which it expects a kick
+ * at the end of the used ring.
+ */
+#define VIRTIO_RING_F_EVENT_IDX 29
+
+/* Status byte for driver to report progress. */
+#define VIRTIO_CONFIG_STATUS_RESET 0x00
+#define VIRTIO_CONFIG_STATUS_ACK 0x01
+#define VIRTIO_CONFIG_STATUS_DRIVER 0x02
+#define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04
+#define VIRTIO_CONFIG_STATUS_FEATURES_OK 0x08
+#define VIRTIO_CONFIG_STATUS_FAILED 0x80
+#define VIRTIO_MSI_NO_VECTOR 0xFFFF
+
+#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
+#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
+ sizeof(int) * (32 + 1))
+#define PAGE_SIZE (sysconf(_SC_PAGESIZE))
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+static int pool_initiated[MAX_VDPA_ENGINE_NUM] = {0};
+static int total_virtio[MAX_VDPA_ENGINE_NUM] = {0};
+
+struct virtio_net_pci virtio_net_pool[MAX_VDPA_ENGINE_NUM][MAX_VDPA_DEVICE_VIRTIO];
+
+void *pci_find_max_end_va(void);
+int pci_get_kernel_driver_by_path(const char *filename, char *dri_name);
+int pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev);
+
+int vfio_get_container_fd(void);
+int vfio_get_group_fd(int iommu_group_no);
+int vfio_get_group_no(const char *sysfs_base,
+ const char *dev_addr, int *iommu_group_no);
+
+int pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table);
+int pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd);
+int pci_vfio_set_bus_master(int dev_fd, bool op);
+
+extern void *pci_map_addr;
+
+static int
+read_pci_dev(struct rte_pci_device *dev)
+{
+ char filename[PATH_MAX];
+ char dev_dir[PATH_MAX];
+ char driver[PATH_MAX];
+ int ret;
+
+ snprintf(dev_dir, sizeof(dev_dir), "%s/" PCI_PRI_FMT,
+ rte_pci_get_sysfs_path(),
+ dev->addr.domain, dev->addr.bus,
+ dev->addr.devid, dev->addr.function);
+ if (access(dev_dir, R_OK) != 0) {
+ DEBUG("\n%s: %s not exist\n", __func__, dev_dir);
+ return -1;
+ }
+
+ /* parse resources */
+ snprintf(filename, sizeof(filename), "%s/resource", dev_dir);
+ if (pci_parse_sysfs_resource(filename, dev) < 0) {
+ DEBUG("%s(): cannot parse resource\n", __func__);
+ return -1;
+ }
+
+ /* parse driver */
+ snprintf(filename, sizeof(filename), "%s/driver", dev_dir);
+ ret = pci_get_kernel_driver_by_path(filename, driver);
+ if (ret < 0) {
+ DEBUG("Fail to get kernel driver\n");
+ return -1;
+ }
+
+ if (ret > 0 || strcmp(driver, "vfio-pci") != 0) {
+ DEBUG("Kernel driver is not vfio-pci\n");
+ return -1;
+ }
+ return 0;
+}
+
+static inline int invalid_port(int eid, int did)
+{
+ if (did < 0 || did >= total_virtio[eid])
+ return 1;
+ return 0;
+}
+
+static int extract_index(char *path)
+{
+ int i, len, device_id;
+ char *str, *end;
+ len = strlen(path);
+ for (i = len - 1; i >= 0; i--) {
+ if (path[i] == '-')
+ break;
+ }
+ str = &path[i+1];
+ device_id = strtoul(str, &end, 10);
+ if ((str[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
+ return -1;
+
+ return device_id;
+}
+
+static int virtio_net_alloc_hw_ele(int eid, char *args)
+{
+ int device_id;
+
+ DEBUG("\n%s: the vhost socket path %s\n", __func__, args);
+ device_id = extract_index(args);
+ if (device_id < 0 || device_id >= total_virtio[eid]) {
+ DEBUG("\n%s: device_id %d must be within the interval 0 ~ %d\n",
+ __func__, device_id, total_virtio[eid] - 1);
+ return -1;
+ }
+
+ if (True == virtio_net_pool[eid][device_id].used) {
+ DEBUG("\n%s: device_id %d has been taken already\n",
+ __func__, device_id);
+ return -1;
+ }
+
+ virtio_net_pool[eid][device_id].used = True;
+ return device_id;
+}
+
+static int virtio_net_free_hw_ele(int eid, int did)
+{
+ if (invalid_port(eid, did))
+ return -1;
+
+ virtio_net_pool[eid][did].used = False;
+ return 0;
+}
+
+static void *notify_relay(void *arg)
+{
+ int i, kickfd, epfd, nfds = 0;
+ struct virtio_net *dev = (struct virtio_net *)arg;
+ struct virtio_net_pci *vpci = &virtio_net_pool[dev->eid][dev->did];
+ struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+ uint32_t n, qid, q_num = dev->nr_vring;
+ struct epoll_event events[q_num];
+ struct epoll_event ev;
+ struct vhost_virtqueue **vq = dev->virtqueue;
+ uint64_t buf;
+ int nbytes;
+
+ epfd = epoll_create(32);
+ vpci->epfd = epfd;
+ for (n = 0; n < dev->nr_vring; n++) {
+ ev.data.u32 = n;
+ ev.events = EPOLLIN | EPOLLPRI;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, vq[n]->kickfd, &ev) < 0) {
+ DEBUG("Error epoll add failed, %s\n", strerror(errno));
+ return NULL;
+ }
+ }
+
+ for (;;) {
+ nfds = epoll_wait(epfd, events, q_num, -1);
+ if (nfds < 0) {
+ if (errno == EINTR)
+ continue;
+ DEBUG("epoll_wait return fail\n");
+ return NULL;
+ } else if (nfds == 0)
+ continue;
+
+ for (i = 0; i < nfds; i++) {
+ qid = events[i].data.u32;
+ kickfd = vq[qid]->kickfd;
+
+ do {
+ nbytes = read(kickfd, &buf, 8);
+ if (nbytes < 0) {
+ if (errno == EINTR || errno == EWOULDBLOCK ||
+ errno == EAGAIN)
+ continue;
+ DEBUG("Error reading from kickfd %d: %s\n",
+ kickfd, strerror(errno));
+ } else if (nbytes == 0)
+ DEBUG("Read nothing from kickfd %d\n", kickfd);
+ break;
+ } while (1);
+
+ rte_write16(qid, hw->notify_addr[qid]);
+ }
+ }
+
+ return NULL;
+}
+
+static int setup_notify_relay(struct virtio_net *dev)
+{
+ struct virtio_net_pci *vpci = &virtio_net_pool[dev->eid][dev->did];
+ int ret;
+
+ ret = pthread_create(&vpci->tid, NULL, notify_relay, dev);
+ if (ret != 0) {
+ DEBUG("failed to create notify relay pthread\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int unset_notify_relay(struct virtio_net *dev)
+{
+ struct virtio_net_pci *vpci = &virtio_net_pool[dev->eid][dev->did];
+ void *status;
+ if (vpci->tid) {
+ pthread_cancel(vpci->tid);
+ pthread_join(vpci->tid, &status);
+ DEBUG("\n%s device_id %d, cancel relay tid %lu\n", __func__,
+ dev->did, vpci->tid);
+ }
+ vpci->tid = 0;
+
+ if (vpci->epfd >= 0) {
+ close(vpci->epfd);
+ DEBUG("\n%s close epfd %d\n", __func__, vpci->epfd);
+ }
+ vpci->epfd = -1;
+
+ return 0;
+}
+
+static void *
+get_cap_addr(struct rte_pci_device *dev, struct virtio_net_pci_cap *cap)
+{
+ uint8_t bar = cap->bar;
+ uint32_t length = cap->length;
+ uint32_t offset = cap->offset;
+ uint8_t *base;
+
+ if (bar > 5) {
+ DEBUG("invalid bar: %u", bar);
+ return NULL;
+ }
+
+ if (offset + length < offset) {
+ DEBUG("offset(%u) + length(%u) overflows",
+ offset, length);
+ return NULL;
+ }
+
+ if (offset + length > dev->mem_resource[bar].len) {
+ DEBUG("invalid cap: overflows bar space: %u > %" PRIu64,
+ offset + length, dev->mem_resource[bar].len);
+ return NULL;
+ }
+
+ base = dev->mem_resource[bar].addr;
+ if (base == NULL) {
+ DEBUG("bar %u base addr is NULL", bar);
+ return NULL;
+ }
+
+ return base + offset;
+}
+
+static int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+ int *vfio_dev_fd, struct vfio_device_info *device_info,
+ struct virtio_net_hw *hw)
+{
+ struct vfio_group_status group_status = {
+ .argsz = sizeof(group_status)
+ };
+ int vfio_group_fd;
+ int iommu_group_no;
+ int ret;
+ struct vfio_config *vfio_cfg;
+
+ vfio_cfg = &(hw->vfio_cfg);
+ vfio_cfg->group_fd = -1;
+ vfio_cfg->group_no = -1;
+ vfio_cfg->vfio_container_fd = vfio_get_container_fd();
+
+ /* check if we have VFIO driver enabled */
+ if (vfio_cfg->vfio_container_fd == -1) {
+ DEBUG("VFIO support could not be initialized\n");
+ return -1;
+ }
+
+ /* get group number */
+ ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no);
+ if (ret <= 0) {
+ DEBUG("%s not managed by VFIO driver\n", dev_addr);
+ return -1;
+ }
+
+ /* get the actual group fd */
+ vfio_group_fd = vfio_get_group_fd(iommu_group_no);
+ DEBUG("\nget group no %u group fd %u\n", iommu_group_no, vfio_group_fd);
+ if (vfio_group_fd <= 0)
+ return -1;
+
+ /* store group fd */
+ vfio_cfg->group_no = iommu_group_no;
+ vfio_cfg->group_fd = vfio_group_fd;
+
+ /* check if the group is viable */
+ ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status);
+ if (ret) {
+ DEBUG("%s cannot get group status, error %i (%s)\n",
+ dev_addr, errno, strerror(errno));
+ close(vfio_group_fd);
+ return -1;
+ } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+ DEBUG("%s VFIO group is not viable!\n", dev_addr);
+ close(vfio_group_fd);
+ return -1;
+ }
+
+ /* check if group does not have a container yet */
+ if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
+ /* add group to a container */
+ ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
+ &vfio_cfg->vfio_container_fd);
+ if (ret) {
+ DEBUG("%s cannot add VFIO group to container, error %i (%s)\n",
+ dev_addr, errno, strerror(errno));
+ close(vfio_group_fd);
+ return -1;
+ }
+ DEBUG("\nvfio_group_fd %u ---> container_fd %u\n",
+ vfio_group_fd, vfio_cfg->vfio_container_fd);
+ }
+
+ ret = ioctl(vfio_cfg->vfio_container_fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
+ if (ret) {
+ DEBUG("%s set IOMMU type failed, error %i (%s)\n",
+ dev_addr, errno, strerror(errno));
+ return -1;
+ }
+
+ /* get a file descriptor for the device */
+ *vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr);
+ if (*vfio_dev_fd < 0) {
+ DEBUG("%s not managed by VFIO driver\n", dev_addr);
+ return -1;
+ }
+
+ /* test and setup the device */
+ ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info);
+ if (ret) {
+ DEBUG("%s cannot get device info, error %i (%s)\n",
+ dev_addr, errno, strerror(errno));
+ close(*vfio_dev_fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+virtio_net_pci_vfio_map_resource(struct virtio_net_pci *vpci)
+{
+ struct rte_pci_device *pdev = &vpci->pdev;
+ struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+ char pci_addr[PATH_MAX] = {0};
+ int vfio_dev_fd;
+ struct rte_pci_addr *loc = &pdev->addr;
+ int i, ret, nb_maps;
+
+ uint32_t ioport_bar;
+ struct pci_msix_table msix_table;
+
+ pdev->intr_handle.fd = -1;
+ pdev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+ /* store PCI address string */
+ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+ loc->domain, loc->bus, loc->devid, loc->function);
+
+ ret = vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
+ &vfio_dev_fd, &device_info, &vpci->hw);
+ if (ret)
+ return ret;
+
+ ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_table);
+ if (ret < 0) {
+ DEBUG("%s cannot get MSI-X BAR number!\n", pci_addr);
+ close(vfio_dev_fd);
+ return -1;
+ }
+
+ /* get number of regions (up to BAR5) */
+ nb_maps = RTE_MIN((int) device_info.num_regions,
+ VFIO_PCI_BAR5_REGION_INDEX + 1);
+
+ /* map BARs */
+ for (i = 0; i < nb_maps; i++) {
+ struct vfio_region_info reg = { .argsz = sizeof(reg) };
+ void *bar_addr;
+
+ reg.index = i;
+ ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®);
+
+ if (ret) {
+ DEBUG("%s cannot get device region info error %i (%s)\n",
+ pci_addr, errno, strerror(errno));
+ goto fail;
+ }
+
+ ret = pread(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
+ + PCI_BASE_ADDRESS_0 + i * 4);
+
+ if (ret != sizeof(ioport_bar)) {
+ DEBUG("Cannot read command (%x) from config space!\n",
+ PCI_BASE_ADDRESS_0 + i * 4);
+ goto fail;
+ }
+
+ /* check for io port region */
+ if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO)
+ continue;
+
+ /* skip non-mmapable BARs */
+ if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
+ continue;
+
+ if (i == msix_table.bar_index)
+ continue;
+
+ /* try mapping somewhere close to the end of hugepages */
+ if (pci_map_addr == NULL)
+ pci_map_addr = pci_find_max_end_va();
+
+ bar_addr = pci_map_addr;
+ pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+
+ /* reserve the address using an inaccessible mapping */
+ bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE |
+ MAP_ANONYMOUS, -1, 0);
+ if (bar_addr != MAP_FAILED) {
+ void *map_addr = NULL;
+ if (reg.size) {
+ map_addr = pci_map_resource(bar_addr, vfio_dev_fd,
+ reg.offset, reg.size, MAP_FIXED);
+ }
+
+ if (map_addr == MAP_FAILED || !map_addr) {
+ munmap(bar_addr, reg.size);
+ bar_addr = MAP_FAILED;
+ }
+ }
+
+ if (bar_addr == MAP_FAILED) {
+ DEBUG("%s mapping BAR%i failed: %s\n", pci_addr, i,
+ strerror(errno));
+ goto fail;
+ }
+ pdev->mem_resource[i].addr = bar_addr;
+ }
+
+ if (pci_vfio_setup_interrupts(pdev, vfio_dev_fd) != 0) {
+ DEBUG("%s error setting up interrupts!\n", pci_addr);
+ goto fail;
+ }
+
+ /* set bus mastering for the device */
+ if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
+ DEBUG("%s cannot set up bus mastering!\n", pci_addr);
+ goto fail;
+ }
+
+ /* Reset the device */
+ ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
+ vpci->hw.vfio_cfg.vfio_dev_fd = vfio_dev_fd;
+
+ return 0;
+
+fail:
+ close(vfio_dev_fd);
+ return -1;
+}
+
+/* With vfio-pci, map config space to virtio_net_hw. */
+static int
+virtio_net_map_pci(struct virtio_net_pci *vpci)
+{
+ uint8_t pos;
+ struct virtio_net_pci_cap cap;
+ struct rte_pci_device *dev = &vpci->pdev;
+ struct virtio_net_hw *hw = &vpci->hw;
+ int ret;
+
+ if (virtio_net_pci_vfio_map_resource(vpci)) {
+ DEBUG("failed to map pci device!\n");
+ return -1;
+ }
+
+ ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
+ if (ret < 0) {
+ DEBUG("failed to read pci capability list\n");
+ return -1;
+ }
+
+ while (pos) {
+ ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
+ if (ret < 0) {
+ DEBUG("failed to read pci cap at pos: %x", pos);
+ break;
+ }
+
+ if (cap.cap_vndr != PCI_CAP_ID_VNDR)
+ goto next;
+
+ DEBUG("[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u\n",
+ pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
+
+ switch (cap.cfg_type) {
+ case VIRTIO_PCI_CAP_COMMON_CFG:
+ hw->common_cfg = get_cap_addr(dev, &cap);
+ break;
+ case VIRTIO_PCI_CAP_NOTIFY_CFG:
+ rte_pci_read_config(dev, &hw->notify_off_multiplier,
+ 4, pos + sizeof(cap));
+ hw->notify_base = get_cap_addr(dev, &cap);
+ break;
+ case VIRTIO_PCI_CAP_DEVICE_CFG:
+ hw->dev_cfg = get_cap_addr(dev, &cap);
+ break;
+ case VIRTIO_PCI_CAP_ISR_CFG:
+ hw->isr = get_cap_addr(dev, &cap);
+ break;
+ }
+
+next:
+ pos = cap.cap_next;
+ }
+
+ if (hw->common_cfg == NULL || hw->notify_base == NULL ||
+ hw->dev_cfg == NULL || hw->isr == NULL) {
+ DEBUG("no modern virtio pci device found.\n");
+ return -1;
+ }
+
+ DEBUG("capability mapping:\ncommon cfg: %p\ndevice cfg: %p\n"
+ "isr cfg: %p\nnotify base: %p\nmultiplier: %u\n",
+ hw->common_cfg, hw->dev_cfg,
+ hw->isr, hw->notify_base, hw->notify_off_multiplier);
+
+ return 0;
+}
+
+static uint8_t
+virtio_net_get_status(struct virtio_net_hw *hw)
+{
+ return rte_read8(&hw->common_cfg->device_status);
+}
+
+static void
+virtio_net_set_status(struct virtio_net_hw *hw, uint8_t status)
+{
+ rte_write8(status, &hw->common_cfg->device_status);
+}
+
+static void
+virtio_net_vtpci_reset(struct virtio_net_hw *hw)
+{
+ virtio_net_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+ /* flush status write */
+ while (virtio_net_get_status(hw))
+ rte_delay_ms(1);
+}
+
+static void
+virtio_net_vtpci_set_status(struct virtio_net_hw *hw, uint8_t status)
+{
+ if (status != VIRTIO_CONFIG_STATUS_RESET)
+ status |= virtio_net_get_status(hw);
+
+ virtio_net_set_status(hw, status);
+ virtio_net_get_status(hw);
+}
+
+static uint64_t
+virtio_net_get_features(struct virtio_net_hw *hw)
+{
+ uint32_t features_lo, features_hi;
+ struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+
+ rte_write32(0, &cfg->device_feature_select);
+ features_lo = rte_read32(&cfg->device_feature);
+
+ rte_write32(1, &cfg->device_feature_select);
+ features_hi = rte_read32(&cfg->device_feature);
+
+ return ((uint64_t)features_hi << 32) | features_lo;
+}
+
+static void
+virtio_net_set_features(struct virtio_net_hw *hw, uint64_t features)
+{
+ struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+
+ /** vIOMMU to support this virtio device used as vDPA device **/
+ features |= (1ULL << VIRTIO_F_IOMMU_PLATFORM);
+
+ rte_write32(0, &cfg->guest_feature_select);
+ rte_write32(features & ((1ULL << 32) - 1), &cfg->guest_feature);
+
+ rte_write32(1, &cfg->guest_feature_select);
+ rte_write32(features >> 32, &cfg->guest_feature);
+}
+
+static int
+virtio_net_negotiate_features(struct virtio_net_hw *hw, uint64_t req_features)
+{
+ uint64_t host_features;
+
+ DEBUG("%s: qemu and guest negotiated feature: 0x%lx\n",
+ __func__, req_features);
+
+ /* Read device(host) feature bits */
+ host_features = virtio_net_get_features(hw);
+ DEBUG("%s: VIRTIO_NET device supported feature: 0x%lx\n",
+ __func__, host_features);
+
+ hw->req_guest_features = req_features;
+ hw->guest_features = req_features & host_features;
+ virtio_net_set_features(hw, hw->guest_features);
+ DEBUG("%s: VIRTIO_NET device configed feature: 0x%lx\n",
+ __func__, hw->guest_features);
+
+ virtio_net_vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
+ if (!(virtio_net_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
+ DEBUG("failed to set FEATURES_OK status!\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline void
+virtio_net_io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
+{
+ rte_write32(val & ((1ULL << 32) - 1), lo);
+ rte_write32(val >> 32, hi);
+}
+
+static inline uint64_t qva_to_gpa(struct virtio_net *dev, uint64_t qva)
+{
+ struct rte_vhost_mem_region *reg;
+ uint32_t i;
+ uint64_t gpa = 0;
+
+ for (i = 0; i < dev->mem->nregions; i++) {
+ reg = &dev->mem->regions[i];
+
+ if (qva >= reg->host_user_addr &&
+ qva < reg->host_user_addr + reg->size) {
+ gpa = qva - reg->host_user_addr + reg->guest_phys_addr;
+ }
+ }
+
+ if (gpa == 0)
+ rte_panic("failed to get gpa\n");
+
+ return gpa;
+}
+
+static int virtio_net_config_queues(struct virtio_net *dev)
+{
+ struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+ struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+ struct vhost_virtqueue *vq;
+ uint64_t desc_addr, avail_addr, used_addr;
+ uint32_t i;
+ uint16_t notify_off;
+
+ for (i = 0; i < dev->nr_vring; i++) {
+ vq = dev->virtqueue[i];
+ desc_addr = qva_to_gpa(dev, (uint64_t)vq->desc);
+ avail_addr = qva_to_gpa(dev, (uint64_t)vq->avail);
+ used_addr = qva_to_gpa(dev, (uint64_t)vq->used);
+
+ rte_write16(i, &cfg->queue_select);
+ virtio_net_io_write64_twopart(desc_addr, &cfg->queue_desc_lo,
+ &cfg->queue_desc_hi);
+ virtio_net_io_write64_twopart(avail_addr, &cfg->queue_avail_lo,
+ &cfg->queue_avail_hi);
+ virtio_net_io_write64_twopart(used_addr, &cfg->queue_used_lo,
+ &cfg->queue_used_hi);
+ rte_write16((uint16_t)vq->size, &cfg->queue_size);
+
+ notify_off = rte_read16(&cfg->queue_notify_off);
+ hw->notify_addr[i] = (void *)((uint8_t *)hw->notify_base +
+ notify_off * hw->notify_off_multiplier);
+ rte_write16(1, &cfg->queue_enable);
+
+ DEBUG("queue %u addresses:\n"
+ "desc_addr: 0x%lx\tavail_addr: 0x%lx\tused_addr: 0x%lx\n"
+ "queue size: %u\t\tnotify addr: %p\tnotify offset: %u\n",
+ i, desc_addr, avail_addr, used_addr,
+ vq->size, hw->notify_addr[i], notify_off);
+ }
+
+ return 0;
+}
+
+static int virtio_net_config_irqs(struct virtio_net *dev)
+{
+ uint32_t i;
+ struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+ struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+
+ rte_write16(0, &cfg->msix_config);
+ if (rte_read16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) {
+ DEBUG("For LSC, allocate msix vec failed\n");
+ return -1;
+ }
+
+ for (i = 0; i < dev->nr_vring; i++) {
+ rte_write16(i, &cfg->queue_select);
+ rte_write16(i + 1, &cfg->queue_msix_vector);
+ if (rte_read16(&cfg->queue_msix_vector) == VIRTIO_MSI_NO_VECTOR) {
+ DEBUG("queue id %u, allocate msix vec failed\n", i);
+ return -1;
+ }
+ }
+ DEBUG("\n%s config irqs OK, num of queues %u\n", __func__, i);
+ return 0;
+}
+
+static void virtio_net_stop_queues(struct virtio_net *dev)
+{
+ uint32_t i;
+ struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+ struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+
+ rte_write16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config);
+ for (i = 0; i < dev->nr_vring; i++) {
+ rte_write16(i, &cfg->queue_select);
+ rte_write16(0, &cfg->queue_enable);
+ rte_write16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector);
+ }
+}
+
+static int virtio_net_enable_vfio_intr(struct virtio_net *dev)
+{
+ int ret;
+ uint32_t i, len;
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+ struct vfio_irq_set *irq_set;
+ int *fd_ptr;
+ struct virtio_net_pci *vpci;
+
+ vpci = &virtio_net_pool[dev->eid][dev->did];
+ len = sizeof(irq_set_buf);
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = dev->nr_vring + 1;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+ irq_set->start = 0;
+ fd_ptr = (int *) &irq_set->data;
+ fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = vpci->pdev.intr_handle.fd;
+
+ DEBUG("\n%s device_id %d LSC fd %u, vfio_dev_fd %u\n", __func__,
+ dev->did, vpci->pdev.intr_handle.fd,
+ vpci->pdev.intr_handle.vfio_dev_fd);
+ for (i = 0; i < dev->nr_vring; i++)
+ fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = dev->virtqueue[i]->callfd;
+
+ ret = ioctl(vpci->pdev.intr_handle.vfio_dev_fd,
+ VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ DEBUG("Error enabling MSI-X interrupts, dev id %u\n", dev->did);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int virtio_net_disable_vfio_intr(struct virtio_net *dev)
+{
+ int len, ret;
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+ struct vfio_irq_set *irq_set;
+ struct virtio_net_pci *vpci;
+
+ vpci = &virtio_net_pool[dev->eid][dev->did];
+ len = sizeof(irq_set_buf);
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = 0;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+ irq_set->start = 0;
+
+ ret = ioctl(vpci->pdev.intr_handle.vfio_dev_fd,
+ VFIO_DEVICE_SET_IRQS, irq_set);
+ if (ret) {
+ DEBUG("Error disabling MSI-X interrupts, dev id %u\n", dev->did);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int virtio_net_conf_pci(struct virtio_net *dev)
+{
+ struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+ uint64_t features = dev->features;
+
+ /* Reset the device although not necessary at startup. */
+ virtio_net_vtpci_reset(hw);
+
+ /* Tell the host we've noticed this device. */
+ virtio_net_vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
+
+ /* Tell the host we've known how to drive the device. */
+ virtio_net_vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
+
+ if (virtio_net_negotiate_features(hw, features) < 0)
+ return -1;
+
+ virtio_net_config_queues(dev);
+ virtio_net_config_irqs(dev);
+ virtio_net_vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
+ return 0;
+}
+
+static int
+virtio_net_pci_dma_map(int vfio_container_fd, struct virtio_net *vdev)
+{
+ uint32_t i, ret;
+ struct rte_vhost_memory *mem = vdev->mem;
+
+ for (i = 0; i < mem->nregions; i++) {
+ struct vfio_iommu_type1_dma_map dma_map;
+ struct rte_vhost_mem_region *reg;
+ reg = &mem->regions[i];
+
+ DEBUG("\n%s device_id %d vfio_container_fd %d, %u th regison, total %u region\n",
+ __func__, vdev->did, vfio_container_fd,
+ i, mem->nregions);
+ DEBUG("region %u host_user_addr 0x%lx, guest_phys_addr 0x%lx, size 0x%0lx\n",
+ i, reg->host_user_addr,
+ reg->guest_phys_addr, reg->size);
+
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = reg->host_user_addr;
+ dma_map.size = reg->size;
+ dma_map.iova = reg->guest_phys_addr;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+ if (ret) {
+ DEBUG(" cannot set up DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+virtio_net_pci_dma_unmap(int vfio_container_fd, struct virtio_net *vdev)
+{
+ uint32_t i, ret;
+ struct rte_vhost_memory *mem = vdev->mem;
+
+ /* VM start fails */
+ if (mem == NULL)
+ return 0;
+
+ for (i = 0; i < mem->nregions; i++) {
+ struct vfio_iommu_type1_dma_unmap dma_unmap;
+ struct rte_vhost_mem_region *reg;
+ reg = &mem->regions[i];
+
+ DEBUG("region %u host_user_addr 0x%lx, guest_phys_addr 0x%lx, size 0x%0lx\n",
+ i, reg->host_user_addr,
+ reg->guest_phys_addr, reg->size);
+
+ memset(&dma_unmap, 0, sizeof(dma_unmap));
+ dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+ dma_unmap.size = reg->size;
+ dma_unmap.iova = reg->guest_phys_addr;
+ dma_unmap.flags = 0;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
+
+ if (ret) {
+ DEBUG(" cannot unset DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int disable_device(struct virtio_net *dev)
+{
+ struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+
+ virtio_net_stop_queues(dev);
+ virtio_net_vtpci_reset(hw);
+
+ return 0;
+}
+
+static int virtio_net_dev_config(struct virtio_net *dev)
+{
+ if (!dev || invalid_port(dev->eid, dev->did)) {
+ DEBUG("Invalid virtio_net struct\n");
+ return -1;
+ }
+
+ int ret;
+ struct rte_pci_device *pdev = &virtio_net_pool[dev->eid][dev->did].pdev;
+ struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+
+ DEBUG("Try to config device: dev id %u bus %02x device %02x function %0x\n",
+ dev->did, pdev->addr.bus,
+ pdev->addr.devid, pdev->addr.function);
+
+ DEBUG("\ndevice_id %d mapped, set DMAR\n", dev->did);
+ ret = virtio_net_pci_dma_map(hw->vfio_cfg.vfio_container_fd, dev);
+ if (ret) {
+ DEBUG("device_id %u DMA remapping failed, error %i (%s)\n",
+ dev->did, errno, strerror(errno));
+ return -1;
+ }
+
+ DEBUG("\ndevice_id %d DMAR set, config it\n", dev->did);
+ ret = virtio_net_conf_pci(dev);
+ if (ret)
+ return -1;
+
+ ret = virtio_net_enable_vfio_intr(dev);
+ if (ret)
+ return -1;
+
+ ret = setup_notify_relay(dev);
+ if (ret)
+ return -1;
+
+ return 0;
+}
+
+static int virtio_net_dev_close(struct virtio_net *dev)
+{
+ int ret;
+ struct virtio_net_pci *vpci = &virtio_net_pool[dev->eid][dev->did];
+ int vfio_container_fd = vpci->hw.vfio_cfg.vfio_container_fd;
+
+ if (!dev || invalid_port(dev->eid, dev->did)) {
+ DEBUG("Invalid virtio_net struct\n");
+ return -1;
+ }
+
+ disable_device(dev);
+ unset_notify_relay(dev);
+ ret = virtio_net_disable_vfio_intr(dev);
+ if (ret < 0)
+ return -1;
+
+ DEBUG("\n%s: unset DMAR for device_id %d\n", __func__, dev->did);
+ ret = virtio_net_pci_dma_unmap(vfio_container_fd, dev);
+ if (ret) {
+ DEBUG("device_id %u DMA reset DMAR failed, error %i (%s)\n",
+ dev->did, errno, strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int virtio_net_pool_init(int eid)
+{
+ uint32_t i, ret;
+ struct virtio_net_pci *vpci;
+ struct rte_pci_device *pdev;
+ struct rte_pci_addr *eng_addr;
+ char dev_dir[PATH_MAX];
+
+ eng_addr = &(vdpa_engines[eid]->eng_attr.id->pci_addr);
+ snprintf(dev_dir, sizeof(dev_dir), "%s/" PCI_PRI_FMT,
+ rte_pci_get_sysfs_path(),
+ eng_addr->domain, eng_addr->bus,
+ eng_addr->devid, eng_addr->function);
+
+ if (access(dev_dir, R_OK) != 0) {
+ DEBUG("%s: "PCI_PRI_FMT" does not exist\n", __func__,
+ eng_addr->domain, eng_addr->bus,
+ eng_addr->devid, eng_addr->function);
+ return -1;
+ }
+
+ memset((char *)virtio_net_pool[eid], 0, sizeof(virtio_net_pool[0]));
+
+ for (i = 0; i < MAX_VDPA_DEVICE_VIRTIO; i++) {
+ pdev = &virtio_net_pool[eid][i].pdev;
+ pdev->addr.domain = eng_addr->domain;
+ pdev->addr.bus = eng_addr->bus;
+ pdev->addr.devid = eng_addr->devid;
+ pdev->addr.function = eng_addr->function;
+ /*
+ * Assume that the virtio-net-pci listed is like
+ * 00:03.0, 00:04.0, 00:05,0, and so on.
+ */
+ pdev->addr.devid += i % 8;
+ if (read_pci_dev(pdev) < 0) {
+ DEBUG("Read PCI device failed, dev id %d\n", i);
+ errno = 0;
+ break;
+ }
+ DEBUG("%s: detected "PCI_PRI_FMT"\n", __func__,
+ pdev->addr.domain, pdev->addr.bus,
+ pdev->addr.devid, pdev->addr.function);
+
+ /* Take control of an device by mapping it with vfio. */
+ vpci = &virtio_net_pool[eid][i];
+ ret = virtio_net_map_pci(vpci);
+ if (ret) {
+ DEBUG("\npci map to userspace failed\n");
+ break;
+ }
+ }
+ total_virtio[eid] = i;
+
+ if (total_virtio[eid] <= 0) {
+ DEBUG("\n%s: find no virtio devices\n", __func__);
+ return -1;
+ }
+
+ pool_initiated[eid] = 1;
+ return 0;
+}
+
+static int virtio_net_pool_uninit(int eid)
+{
+ int i;
+ struct vfio_config *vfio_cfg;
+
+ for (i = 0; i < total_virtio[eid]; i++) {
+ vfio_cfg = &virtio_net_pool[eid][i].hw.vfio_cfg;
+ close(vfio_cfg->vfio_dev_fd);
+ close(vfio_cfg->group_fd);
+ close(vfio_cfg->vfio_container_fd);
+ }
+
+ total_virtio[eid] = 0;
+ pool_initiated[eid] = 0;
+ memset((char *)virtio_net_pool[eid], 0, sizeof(virtio_net_pool[0]));
+
+ return 0;
+}
+
+static int vdpa_virtio_eng_init(int eid,
+ struct rte_vdpa_eng_id *id __rte_unused)
+{
+ if (!pool_initiated[eid] && virtio_net_pool_init(eid) == 0)
+ return 0;
+ return -1;
+}
+
+static int vdpa_virtio_eng_uninit(int eid __rte_unused)
+{
+ if (pool_initiated[eid] && virtio_net_pool_uninit(eid) == 0)
+ return 0;
+ return -1;
+}
+
+static int vdpa_virtio_dev_conf(int vid)
+{
+ struct virtio_net *dev = get_device(vid);
+
+ return virtio_net_dev_config(dev);
+}
+
+static int vdpa_virtio_dev_close(int vid)
+{
+ struct virtio_net *dev = get_device(vid);
+
+ return virtio_net_dev_close(dev);
+}
+
+int vdpa_virtio_get_device_num(int eid __rte_unused)
+{
+ /* Assume we have MAX_VDPA_DEVICE_VIRTIO virtio_net_pci devices */
+ return MAX_VDPA_DEVICE_VIRTIO;
+}
+
+int vdpa_virtio_get_queue_num(int eid __rte_unused, int did __rte_unused)
+{
+ return MAX_QUEUES_VIRTIO;
+}
+
+#define VDPA_SUPPORTED_FEATURES \
+ ((1ULL << VIRTIO_F_ANY_LAYOUT) | \
+ (1ULL << VIRTIO_F_VERSION_1) | \
+ (1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
+ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))
+
+uint64_t vdpa_virtio_get_features(int eid __rte_unused, int did __rte_unused)
+{
+ return VDPA_SUPPORTED_FEATURES;
+}
+
+#define VDPA_SUPPORTED_PROTOCOL_FEATURES \
+ ((1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+ (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK))
+
+uint64_t vdpa_virtio_get_protocol_features(int eid __rte_unused,
+ int did __rte_unused)
+{
+ return VDPA_SUPPORTED_PROTOCOL_FEATURES;
+}
+
+int vdpa_virtio_dev_init(int eid, char *args)
+{
+ return virtio_net_alloc_hw_ele(eid, args);
+}
+
+int vdpa_virtio_dev_uninit(int eid, int did)
+{
+ return virtio_net_free_hw_ele(eid, did);
+}
+
+int vdpa_virtio_dev_start(int eid __rte_unused, int did __rte_unused)
+{
+ return 0;
+}
+
+int vdpa_virtio_dev_stop(int eid __rte_unused, int did __rte_unused)
+{
+ return 0;
+}
+
+struct rte_vdpa_eng_driver vdpa_virtio_net_driver = {
+ .name = "vdpa_virtio_net",
+ .eng_ops = {
+ .eng_init = vdpa_virtio_eng_init,
+ .eng_uninit = vdpa_virtio_eng_uninit,
+ },
+ .dev_ops = {
+ .dev_conf = vdpa_virtio_dev_conf,
+ .dev_close = vdpa_virtio_dev_close,
+ .vring_state_set = NULL,
+ .migration_done = NULL,
+ },
+};
+
+RTE_VDPA_REGISTER_DRIVER(vdpa_virtio_net, vdpa_virtio_net_driver);
diff --git a/examples/vdpa/vdpa_virtio_net.h b/examples/vdpa/vdpa_virtio_net.h
new file mode 100644
index 0000000..b0a386a
--- /dev/null
+++ b/examples/vdpa/vdpa_virtio_net.h
@@ -0,0 +1,144 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VDPA_VIRTIO_NET_H_
+#define _VDPA_VIRTIO_NET_H
+#include <stdint.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <linux/vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_vdpa.h>
+#include <vhost.h>
+#include <vhost_user.h>
+
+#define MAX_VDPA_DEVICE_VIRTIO 8
+#define MAX_QUEUES_VIRTIO 1
+
+/* This is the PCI capability header: */
+struct virtio_net_pci_cap {
+ uint8_t cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */
+ uint8_t cap_next; /* Generic PCI field: next ptr. */
+ uint8_t cap_len; /* Generic PCI field: capability length */
+ uint8_t cfg_type; /* Identifies the structure. */
+ uint8_t bar; /* Where to find it. */
+ uint8_t padding[3]; /* Pad to full dword. */
+ uint32_t offset; /* Offset within bar. */
+ uint32_t length; /* Length of the structure, in bytes. */
+};
+
+struct virtio_net_pci_notify_cap {
+ struct virtio_net_pci_cap cap;
+ uint32_t notify_off_multiplier; /* Multiplier for queue_notify_off. */
+};
+
+/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
+struct virtio_net_pci_common_cfg {
+ /* About the whole device. */
+ uint32_t device_feature_select; /* read-write */
+ uint32_t device_feature; /* read-only */
+ uint32_t guest_feature_select; /* read-write */
+ uint32_t guest_feature; /* read-write */
+ uint16_t msix_config; /* read-write */
+ uint16_t num_queues; /* read-only */
+ uint8_t device_status; /* read-write */
+ uint8_t config_generation; /* read-only */
+
+ /* About a specific virtqueue. */
+ uint16_t queue_select; /* read-write */
+ uint16_t queue_size; /* read-write, power of 2. */
+ uint16_t queue_msix_vector; /* read-write */
+ uint16_t queue_enable; /* read-write */
+ uint16_t queue_notify_off; /* read-only */
+ uint32_t queue_desc_lo; /* read-write */
+ uint32_t queue_desc_hi; /* read-write */
+ uint32_t queue_avail_lo; /* read-write */
+ uint32_t queue_avail_hi; /* read-write */
+ uint32_t queue_used_lo; /* read-write */
+ uint32_t queue_used_hi; /* read-write */
+};
+
+struct virtio_net_net_config {
+ /* The config defining mac address (if VIRTIO_NET_F_MAC) */
+ uint8_t mac[ETHER_ADDR_LEN];
+ /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
+ uint16_t status;
+ uint16_t max_virtqueue_pairs;
+} __attribute__((packed));
+
+struct vfio_config {
+ int vfio_dev_fd;
+ int vfio_container_fd;
+ int group_no;
+ int group_fd;
+};
+
+struct virtio_net_hw {
+ struct virtnet_ctl *cvq;
+ uint64_t req_guest_features;
+ uint64_t guest_features;
+ uint32_t notify_off_multiplier;
+ uint8_t *isr;
+ uint16_t *notify_base;
+ struct virtio_net_pci_common_cfg *common_cfg;
+ struct virtio_net_net_config *dev_cfg;
+ uint16_t *notify_addr[MAX_QUEUES_VIRTIO * 2];
+ struct vfio_config vfio_cfg;
+};
+
+struct virtio_net_pci {
+ bool used;
+ struct rte_pci_device pdev;
+ struct virtio_net_hw hw;
+ pthread_t tid; /* thread for notify relay */
+ int epfd;
+};
+
+#define RTE_LIBRTE_VHOST_VIRTIO_NET_DEBUG
+#ifdef RTE_LIBRTE_VHOST_VIRTIO_NET_DEBUG
+ #define DEBUG(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#else
+ #define DEBUG(fmt, ...) do {} while (0)
+#endif
+
+int vdpa_virtio_get_device_num(int eid);
+int vdpa_virtio_get_queue_num(int eid, int did);
+uint64_t vdpa_virtio_get_features(int eid, int did);
+uint64_t vdpa_virtio_get_protocol_features(int eid, int did);
+int vdpa_virtio_dev_init(int eid, char *args);
+int vdpa_virtio_dev_uninit(int eid, int did);
+int vdpa_virtio_dev_start(int eid, int did);
+int vdpa_virtio_dev_stop(int eid, int did);
+
+#endif /* _VDPA_VIRTIO_NET_H_ */
--
1.8.3.1
^ permalink raw reply [flat|nested] 5+ messages in thread