From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by dpdk.org (Postfix) with ESMTP id 48D5FC516 for ; Thu, 18 Jun 2015 03:03:48 +0200 (CEST) Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga103.fm.intel.com with ESMTP; 17 Jun 2015 18:03:47 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.13,636,1427785200"; d="scan'208";a="745508979" Received: from shvmail01.sh.intel.com ([10.239.29.42]) by fmsmga002.fm.intel.com with ESMTP; 17 Jun 2015 18:03:46 -0700 Received: from shecgisg004.sh.intel.com (shecgisg004.sh.intel.com [10.239.29.89]) by shvmail01.sh.intel.com with ESMTP id t5I13inn011132; Thu, 18 Jun 2015 09:03:44 +0800 Received: from shecgisg004.sh.intel.com (localhost [127.0.0.1]) by shecgisg004.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP id t5I13fr1005439; Thu, 18 Jun 2015 09:03:44 +0800 Received: (from jijiangl@localhost) by shecgisg004.sh.intel.com (8.13.6/8.13.6/Submit) id t5I13faL005435; Thu, 18 Jun 2015 09:03:41 +0800 From: Jijiang Liu To: dev@dpdk.org Date: Thu, 18 Jun 2015 09:03:28 +0800 Message-Id: <1434589417-5395-2-git-send-email-jijiang.liu@intel.com> X-Mailer: git-send-email 1.7.12.2 In-Reply-To: <1434589417-5395-1-git-send-email-jijiang.liu@intel.com> References: <1434589417-5395-1-git-send-email-jijiang.liu@intel.com> Subject: [dpdk-dev] [PATCH v4 01/10] examples/tep_termination:initialize the VXLAN sample X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 18 Jun 2015 01:03:49 -0000 This sample uses the basic virtio devices management function from the vHost example, which includes virtio device creation, destroying and maintenance. Signed-off-by: Jijiang Liu --- examples/Makefile | 1 + examples/tep_termination/Makefile | 55 ++ examples/tep_termination/main.c | 1130 +++++++++++++++++++++++++++++++++++++ examples/tep_termination/main.h | 123 ++++ 4 files changed, 1309 insertions(+), 0 deletions(-) create mode 100644 examples/tep_termination/Makefile create mode 100644 examples/tep_termination/main.c create mode 100644 examples/tep_termination/main.h diff --git a/examples/Makefile b/examples/Makefile index e659f6f..d157e15 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -73,5 +73,6 @@ DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += vhost_xen DIRS-y += vmdq DIRS-y += vmdq_dcb DIRS-$(CONFIG_RTE_LIBRTE_POWER) += vm_power_manager +DIRS-y += tep_termination include $(RTE_SDK)/mk/rte.extsubdir.mk diff --git a/examples/tep_termination/Makefile b/examples/tep_termination/Makefile new file mode 100644 index 0000000..75be3ac --- /dev/null +++ b/examples/tep_termination/Makefile @@ -0,0 +1,55 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp") +$(error This application can only operate in a linuxapp environment, \ +please change the definition of the RTE_TARGET environment variable) +endif + +# binary name +APP = tep_termination + +# all source are stored in SRCS-y +SRCS-y := main.c + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c new file mode 100644 index 0000000..ff56077 --- /dev/null +++ b/examples/tep_termination/main.c @@ -0,0 +1,1130 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "main.h" + +/* the maximum number of external ports supported */ +#define MAX_SUP_PORTS 1 + +/** + * Calculate the number of buffers needed per port + */ +#define NUM_MBUFS_PER_PORT ((MAX_QUEUES * RTE_TEST_RX_DESC_DEFAULT) +\ + (nb_switching_cores * MAX_PKT_BURST) +\ + (nb_switching_cores * \ + RTE_TEST_TX_DESC_DEFAULT) +\ + (nb_switching_cores * MBUF_CACHE_SIZE)) + +#define MBUF_CACHE_SIZE 128 +#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) + +#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ +#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ + +/* Defines how long we wait between retries on RX */ +#define BURST_RX_WAIT_US 15 + +#define BURST_RX_RETRIES 4 /* Number of retries on RX. */ + +#define JUMBO_FRAME_MAX_SIZE 0x2600 + +/* State of virtio device. */ +#define DEVICE_MAC_LEARNING 0 +#define DEVICE_RX 1 +#define DEVICE_SAFE_REMOVE 2 + +/* Config_core_flag status definitions. */ +#define REQUEST_DEV_REMOVAL 1 +#define ACK_DEV_REMOVAL 0 + +/* Configurable number of RX/TX ring descriptors */ +#define RTE_TEST_RX_DESC_DEFAULT 1024 +#define RTE_TEST_TX_DESC_DEFAULT 512 + +/* Get first 4 bytes in mbuf headroom. */ +#define MBUF_HEADROOM_UINT32(mbuf) (*(uint32_t *)((uint8_t *)(mbuf) \ + + sizeof(struct rte_mbuf))) + +#define INVALID_PORT_ID 0xFF + +/* Size of buffers used for snprintfs. */ +#define MAX_PRINT_BUFF 6072 + +/* Maximum character device basename size. */ +#define MAX_BASENAME_SZ 20 + +/* Maximum long option length for option parsing. */ +#define MAX_LONG_OPT_SZ 64 + +/* Used to compare MAC addresses. */ +#define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL + +#define CMD_LINE_OPT_NB_DEVICES "nb-devices" +#define CMD_LINE_OPT_RX_RETRY "rx-retry" +#define CMD_LINE_OPT_RX_RETRY_DELAY "rx-retry-delay" +#define CMD_LINE_OPT_RX_RETRY_NUM "rx-retry-num" +#define CMD_LINE_OPT_STATS "stats" +#define CMD_LINE_OPT_DEV_BASENAME "dev-basename" + +/* mask of enabled ports */ +static uint32_t enabled_port_mask; + +/*Number of switching cores enabled*/ +static uint32_t nb_switching_cores; + +/* number of devices/queues to support*/ +uint32_t nb_devices; + +/* max ring descriptor, ixgbe, i40e, e1000 all are 4096. */ +#define MAX_RING_DESC 4096 + +struct vpool { + struct rte_mempool *pool; + struct rte_ring *ring; + uint32_t buf_size; +} vpool_array[MAX_QUEUES+MAX_QUEUES]; + +/* Enable stats. */ +uint32_t enable_stats = 0; +/* Enable retries on RX. */ +static uint32_t enable_retry = 1; +/* Specify timeout (in useconds) between retries on RX. */ +static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US; +/* Specify the number of retries on RX. */ +static uint32_t burst_rx_retry_num = BURST_RX_RETRIES; + +/* Character device basename. Can be set by user. */ +static char dev_basename[MAX_BASENAME_SZ] = "vhost-net"; + +static unsigned lcore_ids[RTE_MAX_LCORE]; +uint8_t ports[RTE_MAX_ETHPORTS]; + +static unsigned nb_ports; /**< The number of ports specified in command line */ + +/* ethernet addresses of ports */ +struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + +/* heads for the main used and free linked lists for the data path. */ +static struct virtio_net_data_ll *ll_root_used; +static struct virtio_net_data_ll *ll_root_free; + +/** + * Array of data core structures containing information on + * individual core linked lists. + */ +static struct lcore_info lcore_info[RTE_MAX_LCORE]; + +/* Used for queueing bursts of TX packets. */ +struct mbuf_table { + unsigned len; + unsigned txq_id; + struct rte_mbuf *m_table[MAX_PKT_BURST]; +}; + +/* TX queue for each data core. */ +struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE]; + +struct device_statistics dev_statistics[MAX_DEVICES]; + +static inline int +validate_nb_devices(uint32_t max_nb_devices) +{ + if (nb_devices > max_nb_devices) { + RTE_LOG(ERR, VHOST_PORT, "invalid number of devices\n"); + return -1; + } + return 0; +} + +/** + * Set character device basename. + */ +static int +us_vhost_parse_basename(const char *q_arg) +{ + /* parse number string */ + if (strlen(q_arg) >= MAX_BASENAME_SZ) + return -1; + else + snprintf((char *)&dev_basename, MAX_BASENAME_SZ, "%s", q_arg); + + return 0; +} + +/** + * Parse the portmask provided at run time. + */ +static int +parse_portmask(const char *portmask) +{ + char *end = NULL; + unsigned long pm; + + /* parse hexadecimal string */ + pm = strtoul(portmask, &end, 16); + if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (pm == 0) + return -1; + + return pm; +} + +/** + * Parse num options at run time. + */ +static int +parse_num_opt(const char *q_arg, uint32_t max_valid_value) +{ + char *end = NULL; + unsigned long num; + + /* parse unsigned int string */ + num = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (num > max_valid_value) + return -1; + + return num; +} + +/** + * Display usage + */ +static void +tep_termination_usage(const char *prgname) +{ + RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n" + " --nb-devices[1-64]: The number of virtIO device\n" + " -p PORTMASK: Set mask for ports to be used by application\n" + " --rx-retry [0|1]: disable/enable(default) retries on rx." + " Enable retry if destintation queue is full\n" + " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX." + " This makes effect only if retries on rx enabled\n" + " --rx-retry-num [0-N]: the number of retries on rx." + " This makes effect only if retries on rx enabled\n" + " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n" + " --dev-basename: The basename to be used for the character device.\n", + prgname); +} + +/** + * Parse the arguments given in the command line of the application. + */ +static int +tep_termination_parse_args(int argc, char **argv) +{ + int opt, ret; + int option_index; + unsigned i; + const char *prgname = argv[0]; + static struct option long_option[] = { + {CMD_LINE_OPT_NB_DEVICES, required_argument, NULL, 0}, + {CMD_LINE_OPT_RX_RETRY, required_argument, NULL, 0}, + {CMD_LINE_OPT_RX_RETRY_DELAY, required_argument, NULL, 0}, + {CMD_LINE_OPT_RX_RETRY_NUM, required_argument, NULL, 0}, + {CMD_LINE_OPT_STATS, required_argument, NULL, 0}, + {CMD_LINE_OPT_DEV_BASENAME, required_argument, NULL, 0}, + {NULL, 0, 0, 0}, + }; + + /* Parse command line */ + while ((opt = getopt_long(argc, argv, "p:", + long_option, &option_index)) != EOF) { + switch (opt) { + /* Portmask */ + case 'p': + enabled_port_mask = parse_portmask(optarg); + if (enabled_port_mask == 0) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid portmask\n"); + tep_termination_usage(prgname); + return -1; + } + break; + case 0: + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_NB_DEVICES, + sizeof(CMD_LINE_OPT_NB_DEVICES))) { + ret = parse_num_opt(optarg, MAX_DEVICES); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for nb-devices [0-%d]\n", + MAX_DEVICES); + tep_termination_usage(prgname); + return -1; + } else + nb_devices = ret; + } + + /* Enable/disable retries on RX. */ + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_RX_RETRY, + sizeof(CMD_LINE_OPT_RX_RETRY))) { + ret = parse_num_opt(optarg, 1); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for rx-retry [0|1]\n"); + tep_termination_usage(prgname); + return -1; + } else { + enable_retry = ret; + } + } + + /* Specify the retries delay time (in useconds) on RX.*/ + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_RX_RETRY_DELAY, + sizeof(CMD_LINE_OPT_RX_RETRY_DELAY))) { + ret = parse_num_opt(optarg, INT32_MAX); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for rx-retry-delay [0-N]\n"); + tep_termination_usage(prgname); + return -1; + } else { + burst_rx_delay_time = ret; + } + } + + /* Specify the retries number on RX. */ + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_RX_RETRY_NUM, + sizeof(CMD_LINE_OPT_RX_RETRY_NUM))) { + ret = parse_num_opt(optarg, INT32_MAX); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for rx-retry-num [0-N]\n"); + tep_termination_usage(prgname); + return -1; + } else { + burst_rx_retry_num = ret; + } + } + + /* Enable/disable stats. */ + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_STATS, + sizeof(CMD_LINE_OPT_STATS))) { + ret = parse_num_opt(optarg, INT32_MAX); + if (ret == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for stats [0..N]\n"); + tep_termination_usage(prgname); + return -1; + } else { + enable_stats = ret; + } + } + + /* Set character device basename. */ + if (!strncmp(long_option[option_index].name, + CMD_LINE_OPT_DEV_BASENAME, + sizeof(CMD_LINE_OPT_DEV_BASENAME))) { + if (us_vhost_parse_basename(optarg) == -1) { + RTE_LOG(INFO, VHOST_CONFIG, + "Invalid argument for character " + "device basename (Max %d characters)\n", + MAX_BASENAME_SZ); + tep_termination_usage(prgname); + return -1; + } + } + + break; + + /* Invalid option - print options. */ + default: + tep_termination_usage(prgname); + return -1; + } + } + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (enabled_port_mask & (1 << i)) + ports[nb_ports++] = (uint8_t)i; + } + + if ((nb_ports == 0) || (nb_ports > MAX_SUP_PORTS)) { + RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u," + "but only %u port can be enabled\n", nb_ports, + MAX_SUP_PORTS); + return -1; + } + + return 0; +} + +/** + * Update the global var NB_PORTS and array PORTS + * according to system ports number and return valid ports number + */ +static unsigned +check_ports_num(unsigned max_nb_ports) +{ + unsigned valid_nb_ports = nb_ports; + unsigned portid; + + if (nb_ports > max_nb_ports) { + RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) " + " exceeds total system port number(%u)\n", + nb_ports, max_nb_ports); + nb_ports = max_nb_ports; + } + + for (portid = 0; portid < nb_ports; portid++) { + if (ports[portid] >= max_nb_ports) { + RTE_LOG(INFO, VHOST_PORT, + "\nSpecified port ID(%u) exceeds max " + " system port ID(%u)\n", + ports[portid], (max_nb_ports - 1)); + ports[portid] = INVALID_PORT_ID; + valid_nb_ports--; + } + } + return valid_nb_ports; +} + +/** + * This function routes the TX packet to the correct interface. This may be a local device + * or the physical port. + */ +static inline void __attribute__((always_inline)) +virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m) +{ + struct mbuf_table *tx_q; + struct rte_mbuf **m_table; + unsigned len, ret; + const uint16_t lcore_id = rte_lcore_id(); + struct virtio_net *dev = vdev->dev; + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is external\n", + dev->device_fh); + + /* Add packet to the port tx queue */ + tx_q = &lcore_tx_queue[lcore_id]; + len = tx_q->len; + + tx_q->m_table[len] = m; + len++; + if (enable_stats) { + dev_statistics[dev->device_fh].tx_total++; + dev_statistics[dev->device_fh].tx++; + } + + if (unlikely(len == MAX_PKT_BURST)) { + m_table = (struct rte_mbuf **)tx_q->m_table; + /* Free any buffers not handled by TX and update + * the port stats. + */ + if (unlikely(ret < len)) { + do { + rte_pktmbuf_free(m_table[ret]); + } while (++ret < len); + } + + len = 0; + } + + tx_q->len = len; + return; +} + +/** + * This function is called by each data core. It handles all + * RX/TX registered with the core. For TX the specific lcore + * linked list is used. For RX, MAC addresses are compared + * with all devices in the main linked list. + */ +static int +switch_worker(__rte_unused void *arg) +{ + struct rte_mempool *mbuf_pool = arg; + struct virtio_net *dev = NULL; + struct vhost_dev *vdev = NULL; + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct virtio_net_data_ll *dev_ll; + struct mbuf_table *tx_q; + volatile struct lcore_ll_info *lcore_ll; + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) + / US_PER_S * BURST_TX_DRAIN_US; + uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0; + unsigned ret, i; + const uint16_t lcore_id = rte_lcore_id(); + const uint16_t num_cores = (uint16_t)rte_lcore_count(); + uint16_t rx_count = 0; + uint16_t tx_count; + uint32_t retry = 0; + + RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id); + lcore_ll = lcore_info[lcore_id].lcore_ll; + prev_tsc = 0; + + tx_q = &lcore_tx_queue[lcore_id]; + for (i = 0; i < num_cores; i++) { + if (lcore_ids[i] == lcore_id) { + tx_q->txq_id = i; + break; + } + } + + while (1) { + cur_tsc = rte_rdtsc(); + /* + * TX burst queue drain + */ + diff_tsc = cur_tsc - prev_tsc; + if (unlikely(diff_tsc > drain_tsc)) { + + if (tx_q->len) { + LOG_DEBUG(VHOST_DATA, "TX queue drained after " + "timeout with burst size %u\n", + tx_q->len); + if (unlikely(ret < tx_q->len)) { + do { + rte_pktmbuf_free(tx_q->m_table[ret]); + } while (++ret < tx_q->len); + } + + tx_q->len = 0; + } + + prev_tsc = cur_tsc; + + } + + rte_prefetch0(lcore_ll->ll_root_used); + + /** + * Inform the configuration core that we have exited + * the linked list and that no devices are + * in use if requested. + */ + if (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL) + lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL; + + /* + * Process devices + */ + dev_ll = lcore_ll->ll_root_used; + + while (dev_ll != NULL) { + vdev = dev_ll->vdev; + dev = vdev->dev; + + if (unlikely(vdev->remove)) { + dev_ll = dev_ll->next; + vdev->ready = DEVICE_SAFE_REMOVE; + continue; + } + if (likely(vdev->ready == DEVICE_RX)) { + /* Handle guest RX */ + rx_count = rte_eth_rx_burst(ports[0], + vdev->rx_q, pkts_burst, MAX_PKT_BURST); + + if (rx_count) { + /* + * Retry is enabled and the queue is + * full then we wait and retry to + * avoid packet loss. Here MAX_PKT_BURST + * must be less than virtio queue size + */ + if (enable_retry && unlikely(rx_count > + rte_vring_available_entries(dev, VIRTIO_RXQ))) { + for (retry = 0; retry < burst_rx_retry_num; + retry++) { + rte_delay_us(burst_rx_delay_time); + if (rx_count <= rte_vring_available_entries(dev, VIRTIO_RXQ)) + break; + } + } + + if (enable_stats) { + rte_atomic64_add( + &dev_statistics[dev->device_fh].rx_total_atomic, + rx_count); + rte_atomic64_add( + &dev_statistics[dev->device_fh].rx_atomic, ret_count); + } + while (likely(rx_count)) { + rx_count--; + rte_pktmbuf_free(pkts_burst[rx_count]); + } + + } + } + + if (likely(!vdev->remove)) { + /* Handle guest TX*/ + tx_count = rte_vhost_dequeue_burst(dev, + VIRTIO_TXQ, mbuf_pool, + pkts_burst, MAX_PKT_BURST); + /* If this is the first received packet we need to learn the MAC */ + if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) { + if (vdev->remove) { + while (tx_count) + rte_pktmbuf_free(pkts_burst[--tx_count]); + } + } + while (tx_count) + virtio_tx_route(vdev, pkts_burst[--tx_count]); + } + + /* move to the next device in the list */ + dev_ll = dev_ll->next; + } + } + + return 0; +} + +/** + * Add an entry to a used linked list. A free entry must first be found + * in the free linked list using get_data_ll_free_entry(); + */ +static void +add_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, + struct virtio_net_data_ll *ll_dev) +{ + struct virtio_net_data_ll *ll = *ll_root_addr; + + /* Set next as NULL and use a compiler barrier to avoid reordering. */ + ll_dev->next = NULL; + rte_compiler_barrier(); + + /* If ll == NULL then this is the first device. */ + if (ll) { + /* Increment to the tail of the linked list. */ + while (ll->next != NULL) + ll = ll->next; + + ll->next = ll_dev; + } else { + *ll_root_addr = ll_dev; + } +} + +/** + * Remove an entry from a used linked list. The entry must then be added to + * the free linked list using put_data_ll_free_entry(). + */ +static void +rm_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, + struct virtio_net_data_ll *ll_dev, + struct virtio_net_data_ll *ll_dev_last) +{ + struct virtio_net_data_ll *ll = *ll_root_addr; + + if (unlikely((ll == NULL) || (ll_dev == NULL))) + return; + + if (ll_dev == ll) + *ll_root_addr = ll_dev->next; + else + if (likely(ll_dev_last != NULL)) + ll_dev_last->next = ll_dev->next; + else + RTE_LOG(ERR, VHOST_CONFIG, + "Remove entry form ll failed.\n"); +} + +/** + * Find and return an entry from the free linked list. + */ +static struct virtio_net_data_ll * +get_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr) +{ + struct virtio_net_data_ll *ll_free = *ll_root_addr; + struct virtio_net_data_ll *ll_dev; + + if (ll_free == NULL) + return NULL; + + ll_dev = ll_free; + *ll_root_addr = ll_free->next; + + return ll_dev; +} + +/** + * Place an entry back on to the free linked list. + */ +static void +put_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr, + struct virtio_net_data_ll *ll_dev) +{ + struct virtio_net_data_ll *ll_free = *ll_root_addr; + + if (ll_dev == NULL) + return; + + ll_dev->next = ll_free; + *ll_root_addr = ll_dev; +} + +/** + * Creates a linked list of a given size. + */ +static struct virtio_net_data_ll * +alloc_data_ll(uint32_t size) +{ + struct virtio_net_data_ll *ll_new; + uint32_t i; + + /* Malloc and then chain the linked list. */ + ll_new = malloc(size * sizeof(struct virtio_net_data_ll)); + if (ll_new == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to allocate memory for ll_new.\n"); + return NULL; + } + + for (i = 0; i < size - 1; i++) { + ll_new[i].vdev = NULL; + ll_new[i].next = &ll_new[i+1]; + } + ll_new[i].next = NULL; + + return ll_new; +} + +/** + * Create the main linked list along with each individual cores + * linked list. A used and a free list are created to manage entries. + */ +static int +init_data_ll(void) +{ + int lcore; + + RTE_LCORE_FOREACH_SLAVE(lcore) { + lcore_info[lcore].lcore_ll = + malloc(sizeof(struct lcore_ll_info)); + if (lcore_info[lcore].lcore_ll == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to allocate memory for lcore_ll.\n"); + return -1; + } + + lcore_info[lcore].lcore_ll->device_num = 0; + lcore_info[lcore].lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL; + lcore_info[lcore].lcore_ll->ll_root_used = NULL; + if (nb_devices % nb_switching_cores) + lcore_info[lcore].lcore_ll->ll_root_free = + alloc_data_ll((nb_devices / nb_switching_cores) + + 1); + else + lcore_info[lcore].lcore_ll->ll_root_free = + alloc_data_ll(nb_devices / nb_switching_cores); + } + + /* Allocate devices up to a maximum of MAX_DEVICES. */ + ll_root_free = alloc_data_ll(MIN((nb_devices), MAX_DEVICES)); + + return 0; +} + +/** + * Remove a device from the specific data core linked list and + * from the main linked list. Synchonization occurs through the use + * of the lcore dev_removal_flag. Device is made volatile here + * to avoid re-ordering of dev->remove=1 which can cause an infinite + * loop in the rte_pause loop. + */ +static void +destroy_device(volatile struct virtio_net *dev) +{ + struct virtio_net_data_ll *ll_lcore_dev_cur; + struct virtio_net_data_ll *ll_main_dev_cur; + struct virtio_net_data_ll *ll_lcore_dev_last = NULL; + struct virtio_net_data_ll *ll_main_dev_last = NULL; + struct vhost_dev *vdev; + int lcore; + + dev->flags &= ~VIRTIO_DEV_RUNNING; + + vdev = (struct vhost_dev *)dev->priv; + + /* set the remove flag. */ + vdev->remove = 1; + while (vdev->ready != DEVICE_SAFE_REMOVE) + rte_pause(); + + /* Search for entry to be removed from lcore ll */ + ll_lcore_dev_cur = lcore_info[vdev->coreid].lcore_ll->ll_root_used; + while (ll_lcore_dev_cur != NULL) { + if (ll_lcore_dev_cur->vdev == vdev) { + break; + } else { + ll_lcore_dev_last = ll_lcore_dev_cur; + ll_lcore_dev_cur = ll_lcore_dev_cur->next; + } + } + + if (ll_lcore_dev_cur == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") Failed to find the dev to be destroy.\n", + dev->device_fh); + return; + } + + /* Search for entry to be removed from main ll */ + ll_main_dev_cur = ll_root_used; + ll_main_dev_last = NULL; + while (ll_main_dev_cur != NULL) { + if (ll_main_dev_cur->vdev == vdev) { + break; + } else { + ll_main_dev_last = ll_main_dev_cur; + ll_main_dev_cur = ll_main_dev_cur->next; + } + } + + /* Remove entries from the lcore and main ll. */ + rm_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used, + ll_lcore_dev_cur, ll_lcore_dev_last); + rm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last); + + /* Set the dev_removal_flag on each lcore. */ + RTE_LCORE_FOREACH_SLAVE(lcore) { + lcore_info[lcore].lcore_ll->dev_removal_flag = + REQUEST_DEV_REMOVAL; + } + + /* + * Once each core has set the dev_removal_flag to + * ACK_DEV_REMOVAL we can be sure that they can no longer access + * the device removed from the linked lists and that the devices + * are no longer in use. + */ + RTE_LCORE_FOREACH_SLAVE(lcore) { + while (lcore_info[lcore].lcore_ll->dev_removal_flag + != ACK_DEV_REMOVAL) + rte_pause(); + } + + /* Add the entries back to the lcore and main free ll.*/ + put_data_ll_free_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_free, + ll_lcore_dev_cur); + put_data_ll_free_entry(&ll_root_free, ll_main_dev_cur); + + /* Decrement number of device on the lcore. */ + lcore_info[vdev->coreid].lcore_ll->device_num--; + + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been removed " + "from data core\n", dev->device_fh); + + rte_free(vdev); + +} + +/** + * A new device is added to a data core. First the device is added + * to the main linked list and the allocated to a specific data core. + */ +static int +new_device(struct virtio_net *dev) +{ + struct virtio_net_data_ll *ll_dev; + int lcore, core_add = 0; + uint32_t device_num_min = nb_devices; + struct vhost_dev *vdev; + + vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE); + if (vdev == NULL) { + RTE_LOG(INFO, VHOST_DATA, + "(%"PRIu64") Couldn't allocate memory for vhost dev\n", + dev->device_fh); + return -1; + } + vdev->dev = dev; + dev->priv = vdev; + /* Add device to main ll */ + ll_dev = get_data_ll_free_entry(&ll_root_free); + if (ll_dev == NULL) { + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") No free entry found in" + " linked list Device limit of %d devices per core" + " has been reached\n", dev->device_fh, nb_devices); + if (vdev->regions_hpa) + rte_free(vdev->regions_hpa); + rte_free(vdev); + return -1; + } + ll_dev->vdev = vdev; + add_data_ll_entry(&ll_root_used, ll_dev); + vdev->rx_q = dev->device_fh; + + /* reset ready flag */ + vdev->ready = DEVICE_MAC_LEARNING; + vdev->remove = 0; + + /* Find a suitable lcore to add the device. */ + RTE_LCORE_FOREACH_SLAVE(lcore) { + if (lcore_info[lcore].lcore_ll->device_num < device_num_min) { + device_num_min = lcore_info[lcore].lcore_ll->device_num; + core_add = lcore; + } + } + /* Add device to lcore ll */ + ll_dev = get_data_ll_free_entry(&lcore_info[core_add].lcore_ll->ll_root_free); + if (ll_dev == NULL) { + RTE_LOG(INFO, VHOST_DATA, + "(%"PRIu64") Failed to add device to data core\n", + dev->device_fh); + vdev->ready = DEVICE_SAFE_REMOVE; + destroy_device(dev); + rte_free(vdev->regions_hpa); + rte_free(vdev); + return -1; + } + ll_dev->vdev = vdev; + vdev->coreid = core_add; + + add_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used, + ll_dev); + + /* Initialize device stats */ + memset(&dev_statistics[dev->device_fh], 0, + sizeof(struct device_statistics)); + + /* Disable notifications. */ + rte_vhost_enable_guest_notification(dev, VIRTIO_RXQ, 0); + rte_vhost_enable_guest_notification(dev, VIRTIO_TXQ, 0); + lcore_info[vdev->coreid].lcore_ll->device_num++; + dev->flags |= VIRTIO_DEV_RUNNING; + + RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d\n", + dev->device_fh, vdev->coreid); + + return 0; +} + +/** + * These callback allow devices to be added to the data core when configuration + * has been fully complete. + */ +static const struct virtio_net_device_ops virtio_net_device_ops = { + .new_device = new_device, + .destroy_device = destroy_device, +}; + +/** + * This is a thread will wake up after a period to print stats if the user has + * enabled them. + */ +static void +print_stats(void) +{ + struct virtio_net_data_ll *dev_ll; + uint64_t tx_dropped, rx_dropped; + uint64_t tx, tx_total, rx, rx_total; + uint32_t device_fh; + const char clr[] = { 27, '[', '2', 'J', '\0' }; + const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' }; + + while (1) { + sleep(enable_stats); + + /* Clear screen and move to top left */ + printf("%s%s", clr, top_left); + + printf("\nDevice statistics ================================"); + + dev_ll = ll_root_used; + while (dev_ll != NULL) { + device_fh = (uint32_t)dev_ll->vdev->dev->device_fh; + tx_total = dev_statistics[device_fh].tx_total; + tx = dev_statistics[device_fh].tx; + tx_dropped = tx_total - tx; + + rx_total = rte_atomic64_read( + &dev_statistics[device_fh].rx_total_atomic); + rx = rte_atomic64_read( + &dev_statistics[device_fh].rx_atomic); + rx_dropped = rx_total - rx; + + printf("\nStatistics for device %"PRIu32" ----------" + "\nTX total: %"PRIu64"" + "\nTX dropped: %"PRIu64"" + "\nTX successful: %"PRIu64"" + "\nRX total: %"PRIu64"" + "\nRX dropped: %"PRIu64"" + "\nRX successful: %"PRIu64"", + device_fh, + tx_total, + tx_dropped, + tx, + rx_total, + rx_dropped, + rx); + + dev_ll = dev_ll->next; + } + printf("\n================================================\n"); + } +} + +/** + * Main function, does initialisation and calls the per-lcore functions. The CUSE + * device is also registered here to handle the IOCTLs. + */ +int +main(int argc, char *argv[]) +{ + struct rte_mempool *mbuf_pool = NULL; + unsigned lcore_id, core_id = 0; + unsigned nb_ports, valid_nb_ports; + int ret; + uint8_t portid; + uint16_t queue_id; + static pthread_t tid; + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + argc -= ret; + argv += ret; + + /* parse app arguments */ + ret = tep_termination_parse_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid argument\n"); + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) + if (rte_lcore_is_enabled(lcore_id)) + lcore_ids[core_id++] = lcore_id; + + /* set the number of swithcing cores available */ + nb_switching_cores = rte_lcore_count()-1; + + /* Get the number of physical ports. */ + nb_ports = rte_eth_dev_count(); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + + /* + * Update the global var NB_PORTS and global array PORTS + * and get value of var VALID_NB_PORTS according to system ports number + */ + valid_nb_ports = check_ports_num(nb_ports); + + if ((valid_nb_ports == 0) || (valid_nb_ports > MAX_SUP_PORTS)) { + rte_exit(EXIT_FAILURE, "Current enabled port number is %u," + "but only %u port can be enabled\n", nb_ports, + MAX_SUP_PORTS); + } + + /* Create the mbuf pool. */ + mbuf_pool = rte_mempool_create( + "MBUF_POOL", + NUM_MBUFS_PER_PORT + * valid_nb_ports, + MBUF_SIZE, MBUF_CACHE_SIZE, + sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, NULL, + rte_pktmbuf_init, NULL, + rte_socket_id(), 0); + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + for (queue_id = 0; queue_id < MAX_QUEUES + 1; queue_id++) + vpool_array[queue_id].pool = mbuf_pool; + + /* Set log level. */ + rte_set_log_level(LOG_LEVEL); + + /* initialize all ports */ + for (portid = 0; portid < nb_ports; portid++) { + /* skip ports that are not enabled */ + if ((enabled_port_mask & (1 << portid)) == 0) { + RTE_LOG(INFO, VHOST_PORT, + "Skipping disabled port %d\n", portid); + continue; + } + } + + /* Initialise all linked lists. */ + if (init_data_ll() == -1) + rte_exit(EXIT_FAILURE, "Failed to initialize linked list\n"); + + /* Initialize device stats */ + memset(&dev_statistics, 0, sizeof(dev_statistics)); + + /* Enable stats if the user option is set. */ + if (enable_stats) + pthread_create(&tid, NULL, (void *)print_stats, NULL); + + /* Launch all data cores. */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_remote_launch(switch_worker, + mbuf_pool, lcore_id); + } + + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF); + + /* Register CUSE device to handle IOCTLs. */ + ret = rte_vhost_driver_register((char *)&dev_basename); + if (ret != 0) + rte_exit(EXIT_FAILURE, "CUSE device setup failure.\n"); + + rte_vhost_driver_callback_register(&virtio_net_device_ops); + + /* Start CUSE session. */ + rte_vhost_driver_session_start(); + + return 0; +} diff --git a/examples/tep_termination/main.h b/examples/tep_termination/main.h new file mode 100644 index 0000000..ef843c1 --- /dev/null +++ b/examples/tep_termination/main.h @@ -0,0 +1,123 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MAIN_H_ +#define _MAIN_H_ + +#ifdef DEBUG +#define LOG_LEVEL RTE_LOG_DEBUG +#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args) +#else +#define LOG_LEVEL RTE_LOG_INFO +#define LOG_DEBUG(log_type, fmt, args...) do {} while (0) +#endif + +/* Macros for printing using RTE_LOG */ +#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 +#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER2 +#define RTE_LOGTYPE_VHOST_PORT RTE_LOGTYPE_USER3 + +/* State of virtio device. */ +#define DEVICE_MAC_LEARNING 0 +#define DEVICE_RX 1 +#define DEVICE_SAFE_REMOVE 2 + +#define MAX_QUEUES 512 + +/* Max burst size for RX/TX */ +#define MAX_PKT_BURST 32 + +/* Max number of devices. Limited by the application. */ +#define MAX_DEVICES 64 + +/* Per-device statistics struct */ +struct device_statistics { + uint64_t tx_total; + rte_atomic64_t rx_total_atomic; + uint64_t rx_total; + uint64_t tx; + rte_atomic64_t rx_atomic; +} __rte_cache_aligned; + +/** + * Device linked list structure for data path. + */ +struct vhost_dev { + /**< Pointer to device created by vhost lib. */ + struct virtio_net *dev; + /**< Number of memory regions for gpa to hpa translation. */ + uint32_t nregions_hpa; + /**< Memory region information for gpa to hpa translation. */ + struct virtio_memory_regions_hpa *regions_hpa; + /**< Device MAC address (Obtained on first TX packet). */ + struct ether_addr mac_address; + /**< RX queue number. */ + uint16_t rx_q; + /**< Data core that the device is added to. */ + uint16_t coreid; + /**< A device is set as ready if the MAC address has been set. */ + volatile uint8_t ready; + /**< Device is marked for removal from the data core. */ + volatile uint8_t remove; +} __rte_cache_aligned; + +/** + * Structure containing data core specific information. + */ +struct lcore_ll_info { + /**< Pointer to head in free linked list. */ + struct virtio_net_data_ll *ll_root_free; + /**< Pointer to head of used linked list. */ + struct virtio_net_data_ll *ll_root_used; + /**< Number of devices on lcore. */ + uint32_t device_num; + /**< Flag to synchronize device removal. */ + volatile uint8_t dev_removal_flag; +}; + +struct lcore_info { + /**< Pointer to data core specific lcore_ll_info struct */ + struct lcore_ll_info *lcore_ll; +}; + +struct virtio_net_data_ll { + /**< Pointer to device created by configuration core. */ + struct vhost_dev *vdev; + /**< Pointer to next device in linked list. */ + struct virtio_net_data_ll *next; +}; + +uint32_t +virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count); + +#endif /* _MAIN_H_ */ -- 1.7.7.6