From: Xiao Wang <xiao.w.wang@intel.com>
To: dev@dpdk.org
Cc: jianfeng.tan@intel.com, tiwei.bie@intel.com,
maxime.coquelin@redhat.com, yliu@fridaylinux.org,
cunming.liang@intel.com, dan.daly@intel.com,
zhihong.wang@intel.com, Xiao Wang <xiao.w.wang@intel.com>
Subject: [dpdk-dev] [PATCH 3/3] examples/vdpa: add a new sample for vdpa
Date: Sun, 4 Feb 2018 22:55:42 +0800 [thread overview]
Message-ID: <20180204145542.38345-4-xiao.w.wang@intel.com> (raw)
In-Reply-To: <20180204145542.38345-1-xiao.w.wang@intel.com>
This patch adds a sample which creates vhost-user socket based on
vdpa driver. vdpa driver can help to set up vhost datapath so this
app doesn't need to spend a dedicated worker thread on vhost
enqueue/dequeue operations.
Below are setup steps for your reference:
1. Make sure your kernnel vhost module and QEMU support vIOMMU.
- OS: CentOS 7.4
- QEMU: 2.10.1
- Guest OS: CentOS 7.2
- Nested VM OS: CentOS 7.2
2. enable VT-x feature for vCPU in VM.
modprobe kvm_intel nested=1
3. Start a VM with a virtio-net-pci device.
./qemu-2.10.1/x86_64-softmmu/qemu-system-x86_64 -enable-kvm -cpu host \
<snip>
-machine q35 \
-device intel-iommu \
-netdev tap,id=mytap,ifname=vdpa,vhostforce=on \
-device virtio-net-pci,netdev=mytap,mac=00:aa:bb:cc:dd:ee,\
disable-modern=off,disable-legacy=on,iommu_platform=on \
4. Bind VFIO-pci to virtio_net_pci device
a) login to VM;
b) modprobe vfio-pci
c) rmmod vfio_iommu_type1
d) modprobe vfio_iommu_type1 allow_unsafe_interrupts=1
e) ./usertools/dpdk-devbind.py -b vfio-pci 00:03.0
5. Start vdpa sample
./examples/vdpa/build/vdpa -c 0x2 -n 4 --socket-mem 1024 --no-pci \
--vdev "net_vdpa_virtio_pci0,bdf=0000:00:03.0" -- --bdf 0000:00:03.0 \
--iface /tmp/vhost-user- --devcnt 1 --queue 1
6. Start nested VM
./qemu-2.10.1/x86_64-softmmu/qemu-system-x86_64 -cpu host -enable-kvm \
<snip>
-mem-prealloc \
-chardev socket,id=char0,path=/tmp/vhost-user-0 \
-netdev type=vhost-user,id=vdpa,chardev=char0,vhostforce \
-device virtio-net-pci,netdev=vdpa,mac=00:aa:bb:cc:dd:ee \
7. Login the nested VM, and verify the virtio in nested VM can communicate
with tap device on the host.
Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
examples/vdpa/Makefile | 32 ++++
examples/vdpa/main.c | 387 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 419 insertions(+)
create mode 100644 examples/vdpa/Makefile
create mode 100644 examples/vdpa/main.c
diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
new file mode 100644
index 000000000..42672a2bc
--- /dev/null
+++ b/examples/vdpa/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = vdpa
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -D_GNU_SOURCE
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
new file mode 100644
index 000000000..1c9143469
--- /dev/null
+++ b/examples/vdpa/main.c
@@ -0,0 +1,387 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <getopt.h>
+#include <signal.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vhost.h>
+#include <rte_vdpa.h>
+
+#define NUM_MBUFS 8191
+#define MBUF_CACHE_SIZE 250
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 128
+
+#define MAX_PATH_LEN 128
+#define MAX_VDPA_SAMPLE_PORTS 1024
+
+struct vdpa_port {
+ char ifname[MAX_PATH_LEN];
+ int eid;
+ int did;
+ int vid;
+};
+
+struct vdpa_port vports[MAX_VDPA_SAMPLE_PORTS];
+
+struct rte_vdpa_eng_attr attr;
+struct rte_vdpa_eng_addr dev_id;
+char iface[MAX_PATH_LEN];
+int queue;
+int devcnt;
+
+/* display usage */
+static void
+vdpa_usage(const char *prgname)
+{
+ printf("%s [EAL options]"
+ " -- --bdf B:D:F --iface <path> --devcnt ND --queue NQ\n"
+ " --bdf B:D:F, the PCI device used for vdpa\n"
+ " --iface <path>: The path of the socket file\n"
+ " --devcnt ND: number of vhost sockets to be created, default 1\n"
+ " --queue NQ: number of queue pairs to be configured, default 1\n",
+ prgname);
+}
+
+static int
+get_unsigned(const char *str, int base)
+{
+ unsigned long num;
+ char *end = NULL;
+
+ errno = 0;
+ num = strtoul(str, &end, base);
+ if (str[0] == '\0' || end == NULL || *end != '\0' || errno != 0)
+ return -1;
+
+ return num;
+}
+
+static int
+parse_args(int argc, char **argv)
+{
+ static const char *short_option = "";
+ static struct option long_option[] = {
+ {"bdf", required_argument, NULL, 0},
+ {"queue", required_argument, NULL, 0},
+ {"devcnt", required_argument, NULL, 0},
+ {"iface", required_argument, NULL, 0},
+ {NULL, 0, 0, 0},
+ };
+ char str[MAX_PATH_LEN];
+ int opt, idx;
+ int num[4] = {0};
+ int i, j;
+ char *prgname = argv[0];
+
+ while ((opt = getopt_long(argc, argv, short_option, long_option, &idx))
+ != EOF) {
+ switch (opt) {
+ case 0:
+ if (strncmp(long_option[idx].name, "bdf",
+ MAX_PATH_LEN) == 0) {
+ strcpy(str, optarg);
+ i = strlen(str) - 1;
+ j = 3;
+ while (i > 0 && j >= 0) {
+ while ((str[i - 1] != ':'
+ && str[i - 1] != '.')
+ && i > 0)
+ i--;
+ num[j--] = get_unsigned(&str[i], 16);
+ i--;
+ if (i >= 0)
+ str[i] = '\0';
+ }
+ dev_id.pci_addr.domain = num[0];
+ dev_id.pci_addr.bus = num[1];
+ dev_id.pci_addr.devid = num[2];
+ dev_id.pci_addr.function = num[3];
+ printf("bdf %04x:%02x:%02x.%02x\n",
+ dev_id.pci_addr.domain,
+ dev_id.pci_addr.bus,
+ dev_id.pci_addr.devid,
+ dev_id.pci_addr.function);
+ } else if (strncmp(long_option[idx].name, "queue",
+ MAX_PATH_LEN) == 0) {
+ queue = get_unsigned(optarg, 10);
+ printf("queue %d\n", queue);
+ } else if (strncmp(long_option[idx].name, "devcnt",
+ MAX_PATH_LEN) == 0) {
+ devcnt = get_unsigned(optarg, 10);
+ printf("devcnt %d\n", devcnt);
+ } else if (strncmp(long_option[idx].name, "iface",
+ MAX_PATH_LEN) == 0) {
+ strncpy(iface, optarg, MAX_PATH_LEN);
+ printf("iface %s\n", iface);
+ }
+
+ break;
+
+ default:
+ vdpa_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (queue <= 0 || devcnt <= 0 || *iface == '\0') {
+ vdpa_usage(prgname);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+data_init(void)
+{
+ devcnt = 1;
+ queue = 1;
+ memset(&dev_id, 0, sizeof(dev_id));
+ memset(iface, 0, MAX_PATH_LEN * sizeof(iface[0]));
+ memset(vports, 0, MAX_VDPA_SAMPLE_PORTS * sizeof(vports[0]));
+
+ return;
+}
+
+static void
+signal_handler(int signum)
+{
+ uint16_t portid, nb_ports;
+
+ if (signum == SIGINT || signum == SIGTERM) {
+ printf("\nSignal %d received, preparing to exit...\n",
+ signum);
+ nb_ports = rte_eth_dev_count();
+ for (portid = 0; portid < nb_ports; portid++) {
+ printf("Closing port %d...\n", portid);
+ rte_eth_dev_stop(portid);
+ rte_eth_dev_close(portid);
+ }
+ exit(0);
+ }
+}
+
+static int
+new_device(int vid)
+{
+ char ifname[MAX_PATH_LEN];
+ int i;
+
+ rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+ for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++) {
+ if (strcmp(ifname, vports[i].ifname) == 0) {
+ printf("\nport %s connected, eid: %d, did %d\n",
+ ifname, vports[i].eid, vports[i].did);
+ vports[i].vid = vid;
+ break;
+ }
+ }
+
+ if (i >= MAX_VDPA_SAMPLE_PORTS)
+ return -1;
+
+ return 0;
+}
+
+static void
+destroy_device(int vid)
+{
+ char ifname[MAX_PATH_LEN];
+ int i;
+
+ rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+ for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++) {
+ if (strcmp(ifname, vports[i].ifname) == 0) {
+ printf("\nport %s disconnected, eid: %d, did %d\n",
+ ifname, vports[i].eid, vports[i].did);
+ vports[i].vid = vid;
+ break;
+ }
+ }
+
+ return;
+}
+
+static const struct vhost_device_ops vdpa_sample_devops = {
+ .new_device = new_device,
+ .destroy_device = destroy_device,
+ .vring_state_changed = NULL,
+ .features_changed = NULL,
+ .new_connection = NULL,
+ .destroy_connection = NULL,
+};
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = {
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ .ignore_offload_bitfield = 1,
+ },
+};
+
+static inline int
+port_init(uint16_t port, struct rte_mempool *mbuf_pool)
+{
+ uint16_t rx_rings = 1, tx_rings = 1;
+ uint16_t nb_rxd = RX_RING_SIZE;
+ uint16_t nb_txd = TX_RING_SIZE;
+ int retval;
+ uint16_t q;
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf txconf;
+ struct ether_addr addr;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ rte_eth_dev_info_get(port, &dev_info);
+
+ /* Configure the Ethernet device. */
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings,
+ &port_conf_default);
+ if (retval < 0)
+ return retval;
+
+ /* Allocate and set up 1 Rx queue per Ethernet port. */
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
+ rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ txconf = dev_info.default_txconf;
+ /* Allocate and set up 1 Tx queue per Ethernet port. */
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, nb_txd,
+ rte_eth_dev_socket_id(port), &txconf);
+ if (retval < 0)
+ return retval;
+ }
+
+ /* Start the Ethernet port. */
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ /* Display the port MAC address. */
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
+ " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
+ port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ char ifname[MAX_PATH_LEN];
+ uint16_t nb_ports, portid;
+ struct rte_mempool *mbuf_pool;
+ char ch;
+ int i, eid, did;
+ int ret;
+ uint64_t flags = 0;
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "eal init failed\n");
+ argc -= ret;
+ argv += ret;
+
+ signal(SIGINT, signal_handler);
+ signal(SIGTERM, signal_handler);
+
+ nb_ports = rte_eth_dev_count();
+ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+ MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+
+ /* Initialize all ports. */
+ for (portid = 0; portid < nb_ports; portid++)
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot init port %d\n",
+ portid);
+
+ data_init();
+
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "invalid argument\n");
+
+ eid = rte_vdpa_find_engine_id(&dev_id);
+ if (eid < 0)
+ rte_exit(EXIT_FAILURE, "no vDPA engine found\n");
+
+ printf("\nuse engine %d to create vhost socket\n", eid);
+ rte_vdpa_info_query(eid, &attr);
+ if (devcnt > (int)attr.dev_num)
+ rte_exit(EXIT_FAILURE, "not enough devices in engine\n");
+
+ if (queue > (int)attr.queue_num)
+ rte_exit(EXIT_FAILURE, "not enough queues in engine\n");
+
+ for (i = 0; i < RTE_MIN(MAX_VDPA_SAMPLE_PORTS, devcnt); i++) {
+ snprintf(ifname, sizeof(ifname), "%s%d", iface, i);
+ did = i;
+ vports[i].eid = eid;
+ vports[i].did = did;
+ strcpy(vports[i].ifname, ifname);
+
+ ret = rte_vhost_driver_register(ifname, flags);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE,
+ "register driver failed: %s\n",
+ ifname);
+
+ ret = rte_vhost_driver_callback_register(ifname,
+ &vdpa_sample_devops);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE,
+ "register driver ops failed: %s\n",
+ ifname);
+
+ rte_vhost_driver_set_vdpa_eid(ifname, eid);
+ rte_vhost_driver_set_vdpa_did(ifname, did);
+ /*
+ * Configure vhost port with vDPA device's maximum capability.
+ * App has the flexibility to change the features, queue num.
+ */
+ rte_vhost_driver_set_queue_num(ifname, attr.queue_num);
+ rte_vhost_driver_set_features(ifname, attr.features);
+ rte_vhost_driver_set_protocol_features(ifname,
+ attr.protocol_features);
+
+ if (rte_vhost_driver_start(ifname) < 0)
+ rte_exit(EXIT_FAILURE,
+ "start vhost driver failed: %s\n",
+ ifname);
+ }
+
+ printf("enter \'q\' to quit\n");
+ while (scanf("%c", &ch)) {
+ if (ch == 'q')
+ break;
+ while (ch != '\n')
+ scanf("%c", &ch);
+ printf("enter \'q\' to quit\n");
+ }
+
+ for (portid = 0; portid < nb_ports; portid++) {
+ printf("Closing port %d...\n", portid);
+ rte_eth_dev_stop(portid);
+ rte_eth_dev_close(portid);
+ }
+
+ return 0;
+}
--
2.15.1
prev parent reply other threads:[~2018-02-04 14:56 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-02-04 14:55 [dpdk-dev] [PATCH 0/3] add vDPA sample driver Xiao Wang
2018-02-04 14:55 ` [dpdk-dev] [PATCH 1/3] bus/pci: expose API for vDPA Xiao Wang
2018-02-04 14:55 ` [dpdk-dev] [PATCH 2/3] net/vdpa_virtio_pci: introduce vdpa sample driver Xiao Wang
2018-02-06 14:24 ` Maxime Coquelin
2018-02-08 2:23 ` Wang, Xiao W
2018-02-08 9:08 ` Maxime Coquelin
2018-02-12 15:36 ` Wang, Xiao W
2018-02-04 14:55 ` Xiao Wang [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180204145542.38345-4-xiao.w.wang@intel.com \
--to=xiao.w.wang@intel.com \
--cc=cunming.liang@intel.com \
--cc=dan.daly@intel.com \
--cc=dev@dpdk.org \
--cc=jianfeng.tan@intel.com \
--cc=maxime.coquelin@redhat.com \
--cc=tiwei.bie@intel.com \
--cc=yliu@fridaylinux.org \
--cc=zhihong.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).