From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga03.intel.com (mga03.intel.com [143.182.124.21]) by dpdk.org (Postfix) with ESMTP id AEA98593A for ; Fri, 23 May 2014 09:59:29 +0200 (CEST) Received: from azsmga001.ch.intel.com ([10.2.17.19]) by azsmga101.ch.intel.com with ESMTP; 23 May 2014 00:59:38 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.98,892,1392192000"; d="scan'208";a="435946634" Received: from shilc102.sh.intel.com ([10.239.39.44]) by azsmga001.ch.intel.com with ESMTP; 23 May 2014 00:59:36 -0700 Received: from shecgisg004.sh.intel.com (shecgisg004.sh.intel.com [10.239.29.89]) by shilc102.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP id s4N7xWBj026365; Fri, 23 May 2014 15:59:34 +0800 Received: from shecgisg004.sh.intel.com (localhost [127.0.0.1]) by shecgisg004.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP id s4N7xUFn016289; Fri, 23 May 2014 15:59:32 +0800 Received: (from couyang@localhost) by shecgisg004.sh.intel.com (8.13.6/8.13.6/Submit) id s4N7xU3E016285; Fri, 23 May 2014 15:59:30 +0800 From: Ouyang Changchun To: dev@dpdk.org Date: Fri, 23 May 2014 15:59:27 +0800 Message-Id: <1400831967-16188-1-git-send-email-changchun.ouyang@intel.com> X-Mailer: git-send-email 1.7.0.7 Subject: [dpdk-dev] [PATCH v2] virtio: Support multiple queues feature in DPDK based virtio-net frontend X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 23 May 2014 07:59:31 -0000 This patch supports multiple queues feature in DPDK based virtio-net frontend. It firstly gets max queue number of virtio-net from virtio PCI configuration and then send command to negotiate the queue number with backend; When receiving and transmitting packets, it negotiates multiple virtio-net queues which serve RX/TX; To utilize this feature, the backend also need support multiple queues feature and enable it. It also fixes some patch style issues. Signed-off-by: Ouyang Changchun --- lib/librte_pmd_virtio/virtio_ethdev.c | 326 ++++++++++++++++++++++++++++------ lib/librte_pmd_virtio/virtio_ethdev.h | 10 +- lib/librte_pmd_virtio/virtio_pci.h | 4 +- lib/librte_pmd_virtio/virtio_rxtx.c | 72 ++++++-- lib/librte_pmd_virtio/virtqueue.h | 60 +++++-- 5 files changed, 384 insertions(+), 88 deletions(-) diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c b/lib/librte_pmd_virtio/virtio_ethdev.c index 49e236b..79693f4 100644 --- a/lib/librte_pmd_virtio/virtio_ethdev.c +++ b/lib/librte_pmd_virtio/virtio_ethdev.c @@ -81,6 +81,9 @@ static void virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats * static void virtio_dev_stats_reset(struct rte_eth_dev *dev); static void virtio_dev_free_mbufs(struct rte_eth_dev *dev); +static int virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev, +__rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx, __rte_unused uint8_t is_rx); + /* * The set of PCI devices this driver supports */ @@ -92,6 +95,130 @@ static struct rte_pci_id pci_id_virtio_map[] = { { .vendor_id = 0, /* sentinel */ }, }; +static int +virtio_send_command(struct virtqueue* vq, struct virtio_pmd_ctrl* ctrl, + int* dlen, int pkt_num) +{ + uint32_t head = vq->vq_desc_head_idx, i; + int k, sum = 0; + virtio_net_ctrl_ack status = ~0; + struct virtio_pmd_ctrl result; + + ctrl->status = status; + + if (!vq->hw->cvq) { + PMD_INIT_LOG(ERR, "%s(): Control queue is " + "not supported by this device.\n", __func__); + return -1; + } + + PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, vq->hw->cvq = %p \n" + "vq = %p \n", vq->vq_desc_head_idx, status, vq->hw->cvq, vq); + + if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1)) { + return -1; + } + + memcpy(vq->virtio_net_hdr_mz->addr, ctrl, sizeof(struct virtio_pmd_ctrl)); + + /* + * Format is enforced in qemu code: + * One TX packet for header; + * At least one TX packet per argument; + * One RX packet for ACK. + */ + vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT; + vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr; + vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr); + vq->vq_free_cnt--; + i = vq->vq_ring.desc[head].next; + + for (k = 0; k < pkt_num; k++) { + vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT; + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + + sizeof(struct virtio_net_ctrl_hdr) + sizeof(ctrl->status) + sizeof(uint8_t)*sum; + vq->vq_ring.desc[i].len = dlen[k]; + sum += dlen[k]; + vq->vq_free_cnt--; + i = vq->vq_ring.desc[i].next; + } + + vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + sizeof(struct virtio_net_ctrl_hdr); + vq->vq_ring.desc[i].len = sizeof(ctrl->status); + vq->vq_free_cnt--; + + vq->vq_desc_head_idx = vq->vq_ring.desc[i].next; + + vq_update_avail_ring(vq, head); + vq_update_avail_idx(vq); + + PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d \n", vq->vq_queue_index); + + virtqueue_notify(vq); + + while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) { + usleep(100); + } + + while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) { + uint32_t idx, desc_idx, used_idx; + struct vring_used_elem *uep; + + rmb(); + + used_idx = (uint32_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + uep = &vq->vq_ring.used->ring[used_idx]; + idx = (uint32_t) uep->id; + desc_idx = idx; + + while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) { + desc_idx = vq->vq_ring.desc[desc_idx].next; + vq->vq_free_cnt++; + } + + vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx; + vq->vq_desc_head_idx = idx; + + vq->vq_used_cons_idx++; + vq->vq_free_cnt++; + } + + PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d\n", + vq->vq_free_cnt, vq->vq_desc_head_idx); + + memcpy(&result, vq->virtio_net_hdr_mz->addr, sizeof(struct virtio_pmd_ctrl)); + + return result.status; +} + +static int +virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues) +{ + struct virtio_hw *hw = VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct virtio_pmd_ctrl ctrl; + int dlen[1]; + int ret; + + ctrl.hdr.class = VIRTIO_NET_CTRL_MQ; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; + memcpy(ctrl.data, &nb_queues, sizeof(uint16_t)); + + PMD_INIT_LOG(DEBUG, "ctrl.data=%d\n", *(int*)ctrl.data); + + dlen[0] = sizeof(uint16_t); + + ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); + + if (ret) { + PMD_INIT_LOG(ERR, "Multiqueue configured but send command " + "failed, this is too late now...\n"); + return -EINVAL; + } + + return 0; +} + int virtio_dev_queue_setup(struct rte_eth_dev *dev, int queue_type, uint16_t queue_idx, @@ -134,7 +261,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, if (queue_type == VTNET_RQ) { rte_snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d", - dev->data->port_id, queue_idx); + dev->data->port_id, queue_idx); vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + vq_size * sizeof(struct vq_desc_extra), CACHE_LINE_SIZE); memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name)); @@ -146,15 +273,16 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name)); } else if(queue_type == VTNET_CQ) { rte_snprintf(vq_name, sizeof(vq_name), "port%d_cvq", - dev->data->port_id); - vq = rte_zmalloc(vq_name, sizeof(struct virtqueue), - CACHE_LINE_SIZE); + dev->data->port_id); + vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + + vq_size * sizeof(struct vq_desc_extra), CACHE_LINE_SIZE); memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name)); } if (vq == NULL) { PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue\n", __func__); return (-ENOMEM); } + vq->hw = hw; vq->port_id = dev->data->port_id; vq->queue_id = queue_idx; @@ -171,11 +299,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d\n", size, vq->vq_ring_size); mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size, - socket_id, 0, VIRTIO_PCI_VRING_ALIGN); + socket_id, 0, VIRTIO_PCI_VRING_ALIGN); if (mz == NULL) { rte_free(vq); return (-ENOMEM); } + /* * Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, * and only accepts 32 bit page frame number. @@ -186,6 +315,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, rte_free(vq); return (-ENOMEM); } + memset(mz->addr, 0, sizeof(mz->len)); vq->mz = mz; vq->vq_ring_mem = mz->phys_addr; @@ -197,8 +327,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, if (queue_type == VTNET_TQ) { /* - * For each xmit packet, allocate a virtio_net_hdr - */ + * For each xmit packet, allocate a virtio_net_hdr + */ rte_snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone", dev->data->port_id, queue_idx); vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name, @@ -235,8 +365,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, } static int -virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, - unsigned int socket_id) +virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx, + uint32_t socket_id) { struct virtqueue *vq; uint16_t nb_desc = 0; @@ -245,8 +375,9 @@ virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private); PMD_INIT_FUNC_TRACE(); - ret = virtio_dev_queue_setup(dev, VTNET_CQ, 0, VTNET_SQ_CQ_QUEUE_IDX, - nb_desc, socket_id, &vq); + ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX, + vtpci_queue_idx, nb_desc, socket_id, &vq); + if (ret < 0) { PMD_INIT_LOG(ERR, "control vq initialization failed\n"); return ret; @@ -264,26 +395,26 @@ virtio_dev_close(struct rte_eth_dev *dev) virtio_dev_stop(dev); } - /* * dev_ops for virtio, bare necessities for basic operation */ static struct eth_dev_ops virtio_eth_dev_ops = { - .dev_configure = virtio_dev_configure, - .dev_start = virtio_dev_start, - .dev_stop = virtio_dev_stop, - .dev_close = virtio_dev_close, - - .dev_infos_get = virtio_dev_info_get, - .stats_get = virtio_dev_stats_get, - .stats_reset = virtio_dev_stats_reset, - .link_update = virtio_dev_link_update, - .mac_addr_add = NULL, - .mac_addr_remove = NULL, - .rx_queue_setup = virtio_dev_rx_queue_setup, - .rx_queue_release = virtio_dev_rx_queue_release, /* meaningfull only to multiple queue */ - .tx_queue_setup = virtio_dev_tx_queue_setup, - .tx_queue_release = virtio_dev_tx_queue_release /* meaningfull only to multiple queue */ + .dev_configure = virtio_dev_configure, + .dev_start = virtio_dev_start, + .dev_stop = virtio_dev_stop, + .dev_close = virtio_dev_close, + + .dev_infos_get = virtio_dev_info_get, + .stats_get = virtio_dev_stats_get, + .stats_reset = virtio_dev_stats_reset, + .link_update = virtio_dev_link_update, + .mac_addr_add = NULL, + .mac_addr_remove = NULL, + .rx_queue_setup = virtio_dev_rx_queue_setup, + .rx_queue_release = virtio_dev_rx_queue_release, /* meaningfull only to multiple queue */ + .tx_queue_setup = virtio_dev_tx_queue_setup, + .tx_queue_release = virtio_dev_tx_queue_release, /* meaningfull only to multiple queue */ + .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set /* collect stats per queue */ }; static inline int @@ -370,7 +501,7 @@ static void virtio_negotiate_features(struct virtio_hw *hw) { uint32_t guest_features, mask; - mask = VIRTIO_NET_F_CTRL_VQ | VIRTIO_NET_F_CTRL_RX | VIRTIO_NET_F_CTRL_VLAN; + mask = VIRTIO_NET_F_CTRL_RX | VIRTIO_NET_F_CTRL_VLAN; mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM ; /* TSO and LRO are only available when their corresponding @@ -388,12 +519,15 @@ virtio_negotiate_features(struct virtio_hw *hw) /* Prepare guest_features: feature that driver wants to support */ guest_features = VTNET_FEATURES & ~mask; + PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %x\n", guest_features); /* Read device(host) feature bits */ hw->host_features = VIRTIO_READ_REG_4(hw, VIRTIO_PCI_HOST_FEATURES); + PMD_INIT_LOG(DEBUG, "host_features before negotiate = %x\n", hw->host_features); /* Negotiate features: Subset of device feature bits are written back (guest feature bits) */ hw->guest_features = vtpci_negotiate_features(hw, guest_features); + PMD_INIT_LOG(DEBUG, "features after negotiate = %x\n", hw->guest_features); } #ifdef RTE_EXEC_ENV_LINUXAPP @@ -501,9 +635,13 @@ static int eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv, struct rte_eth_dev *eth_dev) { + struct virtio_net_config *config; + struct virtio_net_config local_config; + uint32_t offset_conf = sizeof(config->mac); struct rte_pci_device *pci_dev; struct virtio_hw *hw = VIRTIO_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + if (RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr) ) { PMD_INIT_LOG(ERR, "MBUF HEADROOM should be enough to hold virtio net hdr\n"); @@ -561,9 +699,6 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv, #endif hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr; - hw->max_rx_queues = VIRTIO_MAX_RX_QUEUES; - hw->max_tx_queues = VIRTIO_MAX_TX_QUEUES; - /* Reset the device although not necessary at startup */ vtpci_reset(hw); @@ -573,6 +708,7 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv, /* Tell the host we've known how to drive the device. */ vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER); virtio_negotiate_features(hw); + /* Setting up rx_header size for the device */ if(vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); @@ -587,6 +723,7 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv, ETHER_ADDR_LEN); return (-ENOMEM); } + /* Copy the permanent MAC address to: virtio_hw */ virtio_get_hwaddr(hw); ether_addr_copy((struct ether_addr *) hw->mac_addr, @@ -594,9 +731,46 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv, PMD_INIT_LOG(DEBUG, "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X\n", hw->mac_addr[0], hw->mac_addr[1],hw->mac_addr[2], hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]); - if(vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) - virtio_dev_cq_queue_setup(eth_dev, SOCKET_ID_ANY); + if(vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) { + config = &local_config; + + if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { + offset_conf += sizeof(config->status); + } else { + PMD_INIT_LOG(DEBUG, "VIRTIO_NET_F_STATUS is not supported\n"); + config->status = 0; + } + + if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) { + offset_conf += sizeof(config->max_virtqueue_pairs); + } else { + PMD_INIT_LOG(DEBUG, "VIRTIO_NET_F_MQ is not supported!!!\n"); + config->max_virtqueue_pairs = 1; + } + + vtpci_read_dev_config(hw, 0, (uint8_t*)config, offset_conf); + + hw->max_rx_queues = (VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ? + VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs; + hw->max_tx_queues = (VIRTIO_MAX_TX_QUEUES < config->max_virtqueue_pairs) ? + VIRTIO_MAX_TX_QUEUES : config->max_virtqueue_pairs; + + virtio_dev_cq_queue_setup(eth_dev,config->max_virtqueue_pairs * 2,SOCKET_ID_ANY); + PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d\n", config->max_virtqueue_pairs); + PMD_INIT_LOG(DEBUG, "config->status=%d\n", config->status); + PMD_INIT_LOG(DEBUG, "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X\n", config->mac[0], + config->mac[1],config->mac[2], config->mac[3], config->mac[4], config->mac[5]); + } else { + hw->max_rx_queues = 1; + hw->max_tx_queues = 1; + } + + eth_dev->data->nb_rx_queues = hw->max_rx_queues; + eth_dev->data->nb_tx_queues = hw->max_tx_queues; + + PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d hw->max_tx_queues=%d\n", + hw->max_rx_queues, hw->max_tx_queues); PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); @@ -607,6 +781,7 @@ static struct eth_driver rte_virtio_pmd = { { .name = "rte_virtio_pmd", .id_table = pci_id_virtio_map, + .drv_flags = RTE_PCI_DRV_NEED_IGB_UIO, }, .eth_dev_init = eth_virtio_dev_init, .dev_private_size = sizeof(struct virtio_adapter), @@ -652,6 +827,7 @@ virtio_dev_configure(__rte_unused struct rte_eth_dev *dev) static int virtio_dev_start(struct rte_eth_dev *dev) { + uint16_t nb_queues, i; uint16_t status; struct virtio_hw *hw = VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private); @@ -664,12 +840,13 @@ virtio_dev_start(struct rte_eth_dev *dev) hw->adapter_stopped = 0; + virtio_dev_cq_start(dev); + /* Do final configuration before rx/tx engine starts */ virtio_dev_rxtx_start(dev); /* Check VIRTIO_NET_F_STATUS for link status*/ if(vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { - vtpci_read_dev_config(hw, offsetof(struct virtio_net_config, status), &status, sizeof(status)); @@ -686,34 +863,63 @@ virtio_dev_start(struct rte_eth_dev *dev) *Otherwise the tap backend might already stop its queue due to fullness. *vhost backend will have no chance to be waked up */ - virtqueue_notify(dev->data->rx_queues[0]); + nb_queues = dev->data->nb_rx_queues; + if (nb_queues > 1) { + if (virtio_set_multiple_queues(dev, nb_queues) != 0) + return -EINVAL; + } + + PMD_INIT_LOG(DEBUG, "nb_queues=%d\n",nb_queues); + + for(i = 0; i < nb_queues; i++){ + virtqueue_notify(dev->data->rx_queues[i]); + } + PMD_INIT_LOG(DEBUG, "Notified backend at initialization\n"); + + for( i = 0; i < dev->data->nb_rx_queues; i++){ + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]); + } + + for(i = 0; i < dev->data->nb_tx_queues; i++){ + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]); + } + return (0); } static void virtio_dev_free_mbufs(struct rte_eth_dev *dev) { struct rte_mbuf * buf; - int i = 0; - PMD_INIT_LOG(DEBUG, "Before freeing rxq used and unused buf \n"); - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[0]); - while( (buf =(struct rte_mbuf *)virtqueue_detatch_unused(dev->data->rx_queues[0])) != NULL) { - rte_pktmbuf_free_seg(buf); - i++; - } - PMD_INIT_LOG(DEBUG, "free %d mbufs\n", i); - PMD_INIT_LOG(DEBUG, "After freeing rxq used and unused buf\n"); - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[0]); - PMD_INIT_LOG(DEBUG, "Before freeing txq used and unused bufs\n"); - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[0]); - i = 0; - while( (buf = (struct rte_mbuf *)virtqueue_detatch_unused(dev->data->tx_queues[0])) != NULL) { - rte_pktmbuf_free_seg(buf); - i++; - } - PMD_INIT_LOG(DEBUG, "free %d mbufs\n", i); - PMD_INIT_LOG(DEBUG, "After freeing txq used and unused buf\n"); - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[0]); + int i, mbuf_num = 0; + for( i = 0; i < dev->data->nb_rx_queues; i++) { + PMD_INIT_LOG(DEBUG, "Before freeing rxq[%d] used and unused buf \n", i); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]); + + while( (buf =(struct rte_mbuf *)virtqueue_detatch_unused(dev->data->rx_queues[i])) != NULL) { + rte_pktmbuf_free_seg(buf); + mbuf_num ++; + } + + PMD_INIT_LOG(DEBUG, "free %d mbufs\n", mbuf_num); + PMD_INIT_LOG(DEBUG, "After freeing rxq[%d] used and unused buf\n", i); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]); + } + + for( i = 0; i < dev->data->nb_tx_queues; i++) { + PMD_INIT_LOG(DEBUG, "Before freeing txq[%d] used and unused bufs\n", i); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]); + + mbuf_num = 0; + while( (buf = (struct rte_mbuf *)virtqueue_detatch_unused(dev->data->tx_queues[i])) != NULL) { + rte_pktmbuf_free_seg(buf); + mbuf_num ++; + } + + PMD_INIT_LOG(DEBUG, "free %d mbufs\n", mbuf_num); + PMD_INIT_LOG(DEBUG, "After freeing txq[%d] used and unused buf\n", i); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]); + } } /* @@ -776,6 +982,16 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS; } +/* + * It enables testpmd to collect per queue stats. + */ +static int +virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev, +__rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx, __rte_unused uint8_t is_rx) +{ + return 0; +} + static struct rte_driver rte_virtio_driver = { .type = PMD_PDEV, .init = rte_virtio_pmd_init, diff --git a/lib/librte_pmd_virtio/virtio_ethdev.h b/lib/librte_pmd_virtio/virtio_ethdev.h index 80c9d63..afb77c0 100644 --- a/lib/librte_pmd_virtio/virtio_ethdev.h +++ b/lib/librte_pmd_virtio/virtio_ethdev.h @@ -49,8 +49,8 @@ #define PAGE_SIZE 4096 #endif -#define VIRTIO_MAX_RX_QUEUES 1 -#define VIRTIO_MAX_TX_QUEUES 1 +#define VIRTIO_MAX_RX_QUEUES 128 +#define VIRTIO_MAX_TX_QUEUES 128 #define VIRTIO_MAX_MAC_ADDRS 1 #define VIRTIO_MIN_RX_BUFSIZE 64 #define VIRTIO_MAX_RX_PKTLEN 1518 @@ -59,6 +59,7 @@ #define VTNET_FEATURES \ (VIRTIO_NET_F_MAC | \ VIRTIO_NET_F_STATUS | \ + VIRTIO_NET_F_MQ | \ VIRTIO_NET_F_CTRL_VQ | \ VIRTIO_NET_F_CTRL_RX | \ VIRTIO_NET_F_CTRL_VLAN | \ @@ -74,6 +75,11 @@ VIRTIO_RING_F_INDIRECT_DESC) /* + * CQ function prototype + */ +void virtio_dev_cq_start(struct rte_eth_dev *dev); + +/* * RX/TX function prototypes */ void virtio_dev_rxtx_start(struct rte_eth_dev *dev); diff --git a/lib/librte_pmd_virtio/virtio_pci.h b/lib/librte_pmd_virtio/virtio_pci.h index f163877..c37a32c 100644 --- a/lib/librte_pmd_virtio/virtio_pci.h +++ b/lib/librte_pmd_virtio/virtio_pci.h @@ -192,7 +192,9 @@ struct virtio_net_config { uint8_t mac[ETHER_ADDR_LEN]; /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ uint16_t status; -}; + uint16_t max_virtqueue_pairs; +}__attribute__((packed)); + /* Value indicated in device config */ #define VIRTIO_PCI_FLAG_MSIX 0x0020 /* diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c b/lib/librte_pmd_virtio/virtio_rxtx.c index 2dd2102..e1153ab 100644 --- a/lib/librte_pmd_virtio/virtio_rxtx.c +++ b/lib/librte_pmd_virtio/virtio_rxtx.c @@ -82,14 +82,14 @@ virtio_dev_vring_start(struct rte_eth_dev *dev, struct virtqueue *vq, int queue_ PMD_INIT_FUNC_TRACE(); /* - * Reinitialise since virtio port might have been stopped and restarted - */ + * Reinitialise since virtio port might have been stopped and restarted + */ memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size); vring_init(vr, size, ring_mem, vq->vq_alignment); vq->vq_used_cons_idx = 0; vq->vq_desc_head_idx = 0; vq->vq_avail_idx = 0; - vq->vq_desc_tail_idx = vq->vq_nentries - 1; + vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1); vq->vq_free_cnt = vq->vq_nentries; memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); @@ -110,37 +110,56 @@ virtio_dev_vring_start(struct rte_eth_dev *dev, struct virtqueue *vq, int queue_ /* Only rx virtqueue needs mbufs to be allocated at initialization */ if (queue_type == VTNET_RQ) { if (vq->mpool == NULL) - rte_exit(EXIT_FAILURE, "Cannot allocate initial mbufs for rx virtqueue\n"); - /* Allocate blank mbufs for the each rx descriptor */ + rte_exit(EXIT_FAILURE, "Cannot allocate initial mbufs for rx virtqueue\n"); + + /* Allocate blank mbufs for the each rx descriptor */ nbufs = 0; error = ENOSPC; while (!virtqueue_full(vq)) { m = rte_rxmbuf_alloc(vq->mpool); if (m == NULL) break; + /****************************************** * Enqueue allocated buffers * *******************************************/ error = virtqueue_enqueue_recv_refill(vq, m); + if (error) { rte_pktmbuf_free_seg(m); break; } nbufs++; } + vq_update_avail_idx(vq); + PMD_INIT_LOG(DEBUG, "Allocated %d bufs\n", nbufs); - VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, VTNET_SQ_RQ_QUEUE_IDX); + + VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index); + VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN, + vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); + } else if(queue_type == VTNET_TQ) { + VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index); VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN, vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); } else { - VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, VTNET_SQ_TQ_QUEUE_IDX); + VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index); VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN, vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); } } void +virtio_dev_cq_start(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = VIRTIO_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + virtio_dev_vring_start(dev, hw->cvq, VTNET_CQ); + VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq); +} + +void virtio_dev_rxtx_start(struct rte_eth_dev *dev) { /* @@ -150,15 +169,20 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev) * - Allocate blank mbufs for the each rx descriptor * */ + int i; PMD_INIT_FUNC_TRACE(); - /* Start rx vring: by default we have 1 rx virtqueue. */ - virtio_dev_vring_start(dev, dev->data->rx_queues[0], VTNET_RQ); - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[0]); + /* Start rx vring. */ + for( i = 0; i < dev->data->nb_rx_queues; i++){ + virtio_dev_vring_start(dev, dev->data->rx_queues[i], VTNET_RQ); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]); + } - /* Start tx vring: by default we have 1 tx virtqueue. */ - virtio_dev_vring_start(dev, dev->data->tx_queues[0], VTNET_TQ); - VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[0]); + /* Start tx vring. */ + for(i = 0; i < dev->data->nb_tx_queues; i++){ + virtio_dev_vring_start(dev, dev->data->tx_queues[i], VTNET_TQ); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]); + } } int @@ -169,7 +193,7 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, __rte_unused const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp) { - uint8_t vtpci_queue_idx = VTNET_SQ_RQ_QUEUE_IDX; + uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; struct virtqueue *vq; int ret; @@ -180,6 +204,7 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, PMD_INIT_LOG(ERR, "tvq initialization failed\n"); return ret; } + /* Create mempool for rx mbuf allocation */ vq->mpool = mp; @@ -201,7 +226,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, unsigned int socket_id, __rte_unused const struct rte_eth_txconf *tx_conf) { - uint8_t vtpci_queue_idx = VTNET_SQ_TQ_QUEUE_IDX; + uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; struct virtqueue *vq; int ret; @@ -254,12 +279,16 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ); if (likely(num > DESC_PER_CACHELINE)) num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); + if(num == 0) return 0; + num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num); PMD_RX_LOG(DEBUG, "used:%d dequeue:%d\n", nb_used, num); for (i = 0; i < num ; i ++) { rxm = rcv_pkts[i]; + PMD_RX_LOG(DEBUG, "packet len:%d\n", len[i]); + if (unlikely(len[i] < (uint32_t)hw->vtnet_hdr_size + ETHER_HDR_LEN)) { PMD_RX_LOG(ERR, "Packet drop\n"); nb_enqueued++; @@ -267,17 +296,23 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) hw->eth_stats.ierrors++; continue; } + rxm->pkt.in_port = rxvq->port_id; rxm->pkt.data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM; rxm->pkt.nb_segs = 1; rxm->pkt.next = NULL; rxm->pkt.pkt_len = (uint32_t)(len[i] - sizeof(struct virtio_net_hdr)); rxm->pkt.data_len = (uint16_t)(len[i] - sizeof(struct virtio_net_hdr)); + VIRTIO_DUMP_PACKET(rxm, rxm->pkt.data_len); + rx_pkts[nb_rx++] = rxm; hw->eth_stats.ibytes += len[i] - sizeof(struct virtio_net_hdr); + hw->eth_stats.q_ibytes[rxvq->queue_id] += len[i] - sizeof(struct virtio_net_hdr); } + hw->eth_stats.ipackets += nb_rx; + hw->eth_stats.q_ipackets[rxvq->queue_id] += nb_rx; /* Allocate new mbuf for the used descriptor */ error = ENOSPC; @@ -300,8 +335,9 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) PMD_RX_LOG(DEBUG, "Notified\n"); } } + vq_update_avail_idx(rxvq); - + return (nb_rx); } @@ -332,6 +368,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) virtqueue_dequeue_pkt_tx(txvq); num--; } + if(!virtqueue_full(txvq)) { txm = tx_pkts[nb_tx]; /* Enqueue Packet buffers */ @@ -347,6 +384,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) } nb_tx++; hw->eth_stats.obytes += txm->pkt.data_len; + hw->eth_stats.q_obytes[txvq->queue_id] += txm->pkt.data_len; } else { PMD_TX_LOG(ERR, "No free tx descriptors to transmit\n"); break; @@ -355,10 +393,12 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) vq_update_avail_idx(txvq); hw->eth_stats.opackets += nb_tx; + hw->eth_stats.q_opackets[txvq->queue_id] += nb_tx; if(unlikely(virtqueue_kick_prepare(txvq))) { virtqueue_notify(txvq); PMD_TX_LOG(DEBUG, "Notified backend after xmit\n"); } + return (nb_tx); } diff --git a/lib/librte_pmd_virtio/virtqueue.h b/lib/librte_pmd_virtio/virtqueue.h index b67c223..a21fef7 100644 --- a/lib/librte_pmd_virtio/virtqueue.h +++ b/lib/librte_pmd_virtio/virtqueue.h @@ -103,6 +103,24 @@ enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 }; #define VIRTIO_NET_CTRL_VLAN_ADD 0 #define VIRTIO_NET_CTRL_VLAN_DEL 1 +struct virtio_net_ctrl_hdr { + uint8_t class; + uint8_t cmd; +} __attribute__((packed)); + +typedef uint8_t virtio_net_ctrl_ack; + +#define VIRTIO_NET_OK 0 +#define VIRTIO_NET_ERR 1 + +#define VIRTIO_MAX_CTRL_DATA 128 + +struct virtio_pmd_ctrl { + struct virtio_net_ctrl_hdr hdr; + virtio_net_ctrl_ack status; + uint8_t data[VIRTIO_MAX_CTRL_DATA]; +}; + struct virtqueue { char vq_name[VIRTQUEUE_MAX_NAME_SZ]; struct virtio_hw *hw; /**< virtio_hw structure pointer. */ @@ -142,6 +160,15 @@ struct virtqueue { } vq_descx[0]; }; +/* If multiqueue is provided by host, then we suppport it. */ +#ifndef VIRTIO_NET_F_MQ +#define VIRTIO_NET_F_MQ 0x400000 /* Device supports Receive Flow Steering */ +#define VIRTIO_NET_CTRL_MQ 4 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 +#endif + /** * This is the first element of the scatter-gather list. If you don't * specify GSO or CSUM features, you can simply ignore the header. @@ -204,9 +231,10 @@ vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx) uint16_t avail_idx; /* * Place the head of the descriptor chain into the next slot and make - * it usable to the host. We wait to inform the host until after the burst - * is complete to avoid cache alignment issues with descriptors. This - * also helps to avoid any contention on the available index. + * it usable to the host. The chain is made available now rather than + * deferring to virtqueue_notify() in the hopes that if the host is + * currently running on another CPU, we can keep it processing the new + * descriptor. */ avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1)); vq->vq_ring.avail->ring[avail_idx] = desc_idx; @@ -242,7 +270,7 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { while (dp->flags & VRING_DESC_F_NEXT) { - desc_idx_last = dp->next; + desc_idx_last = dp->next; dp = &vq->vq_ring.desc[dp->next]; } } @@ -259,6 +287,7 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx]; dp_tail->next = desc_idx; } + vq->vq_desc_tail_idx = desc_idx_last; dp->next = VQ_RING_DESC_CHAIN_END; } @@ -294,7 +323,7 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) idx = start_dp[idx].next; vq->vq_desc_head_idx = idx; if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) - vq->vq_desc_tail_idx = idx; + vq->vq_desc_tail_idx = idx; vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); vq_update_avail_ring(vq, head_idx); @@ -335,7 +364,7 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie) idx = start_dp[idx].next; txvq->vq_desc_head_idx = idx; if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) - txvq->vq_desc_tail_idx = idx; + txvq->vq_desc_tail_idx = idx; txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed); vq_update_avail_ring(txvq, head_idx); @@ -357,11 +386,13 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, uint desc_idx = (uint16_t) uep->id; len[i] = uep->len; cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; + if (unlikely(cookie == NULL)) { PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n", vq->vq_used_cons_idx); break; } + rte_prefetch0(cookie); rte_packet_prefetch(cookie->pkt.data); rx_pkts[i] = cookie; @@ -369,22 +400,23 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, uint vq_ring_free_chain(vq, desc_idx); vq->vq_descx[desc_idx].cookie = NULL; } + return (i); } static inline uint16_t __attribute__((always_inline)) virtqueue_dequeue_pkt_tx(struct virtqueue *vq) { - struct vring_used_elem *uep; - uint16_t used_idx, desc_idx; + struct vring_used_elem *uep; + uint16_t used_idx, desc_idx; - used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); - uep = &vq->vq_ring.used->ring[used_idx]; - desc_idx = (uint16_t) uep->id; - vq->vq_used_cons_idx++; - vq_ring_free_chain(vq, desc_idx); + used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + uep = &vq->vq_ring.used->ring[used_idx]; + desc_idx = (uint16_t) uep->id; + vq->vq_used_cons_idx++; + vq_ring_free_chain(vq, desc_idx); - return 0; + return 0; } #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP -- 1.9.0