From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by dpdk.org (Postfix) with ESMTP id A9CF78DAA for ; Fri, 6 Nov 2015 02:31:37 +0100 (CET) Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga101.jf.intel.com with ESMTP; 05 Nov 2015 17:31:38 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.20,249,1444719600"; d="scan'208";a="812931204" Received: from tan-s2600cw.sh.intel.com ([10.239.128.225]) by orsmga001.jf.intel.com with ESMTP; 05 Nov 2015 17:31:35 -0800 From: Jianfeng Tan To: dev@dpdk.org Date: Fri, 6 Nov 2015 02:31:13 +0800 Message-Id: <1446748276-132087-3-git-send-email-jianfeng.tan@intel.com> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1446748276-132087-1-git-send-email-jianfeng.tan@intel.com> References: <1446748276-132087-1-git-send-email-jianfeng.tan@intel.com> Cc: nakajima.yoshihiro@lab.ntt.co.jp, zhbzg@huawei.com, mst@redhat.com, gaoxiaoqiu@huawei.com, oscar.zhangbo@huawei.com, ann.zhuangyanying@huawei.com, zhoujingbin@huawei.com, guohongzhen@huawei.com Subject: [dpdk-dev] [RFC 2/5] virtio/container: add a new virtual device named eth_cvio X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 06 Nov 2015 01:31:38 -0000 Add a new virtual device named eth_cvio, it can be used just like eth_ring, eth_null, etc. Configured paramters include number of rx, tx, cq, path of vhost unix socket, and queue size. The major difference with virtio for vm is that here we use virtual address instead of physical address for vhost to calculate relative address. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_ethdev.c | 319 +++++++++++++++++++++++++++++-------- drivers/net/virtio/virtio_ethdev.h | 16 ++ drivers/net/virtio/virtqueue.h | 9 +- 3 files changed, 275 insertions(+), 69 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 6001108..b5e2126 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -56,6 +56,7 @@ #include #include #include +#include #include "virtio_ethdev.h" #include "virtio_pci.h" @@ -63,7 +64,6 @@ #include "virtqueue.h" #include "virtio_rxtx.h" - static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev); static int virtio_dev_configure(struct rte_eth_dev *dev); @@ -164,8 +164,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1)) return -1; - memcpy(vq->virtio_net_hdr_mz->addr, ctrl, - sizeof(struct virtio_pmd_ctrl)); + memcpy(vq->virtio_net_hdr_vaddr, ctrl, sizeof(struct virtio_pmd_ctrl)); /* * Format is enforced in qemu code: @@ -174,14 +173,14 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, * One RX packet for ACK. */ vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT; - vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr; + vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mem; vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr); vq->vq_free_cnt--; i = vq->vq_ring.desc[head].next; for (k = 0; k < pkt_num; k++) { vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT; - vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mem + sizeof(struct virtio_net_ctrl_hdr) + sizeof(ctrl->status) + sizeof(uint8_t)*sum; vq->vq_ring.desc[i].len = dlen[k]; @@ -191,7 +190,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, } vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; - vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mem + sizeof(struct virtio_net_ctrl_hdr); vq->vq_ring.desc[i].len = sizeof(ctrl->status); vq->vq_free_cnt--; @@ -236,7 +235,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d", vq->vq_free_cnt, vq->vq_desc_head_idx); - memcpy(&result, vq->virtio_net_hdr_mz->addr, + memcpy(&result, vq->virtio_net_hdr_vaddr, sizeof(struct virtio_pmd_ctrl)); return result.status; @@ -374,66 +373,79 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, } } - /* - * Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, - * and only accepts 32 bit page frame number. - * Check if the allocated physical memory exceeds 16TB. - */ - if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { - PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); - rte_free(vq); - return -ENOMEM; - } - memset(mz->addr, 0, sizeof(mz->len)); vq->mz = mz; - vq->vq_ring_mem = mz->phys_addr; vq->vq_ring_virt_mem = mz->addr; - PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%"PRIx64, (uint64_t)mz->phys_addr); - PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64, (uint64_t)(uintptr_t)mz->addr); + + if (dev->dev_type == RTE_ETH_DEV_PCI) { + vq->vq_ring_mem = mz->phys_addr; + + /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, + * and only accepts 32 bit page frame number. + * Check if the allocated physical memory exceeds 16TB. + */ + uint64_t last_physaddr = vq->vq_ring_mem + vq->vq_ring_size - 1; + if (last_physaddr >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { + PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); + rte_free(vq); + return -ENOMEM; + } + } +#ifdef RTE_VIRTIO_VDEV + else { /* RTE_ETH_DEV_VIRTUAL */ + /* Use virtual addr to fill!!! */ + vq->vq_ring_mem = (phys_addr_t)mz->addr; + + /* TODO: check last_physaddr */ + } +#endif + + PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%"PRIx64, + (uint64_t)vq->vq_ring_mem); + PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64, + (uint64_t)(uintptr_t)vq->vq_ring_virt_mem); vq->virtio_net_hdr_mz = NULL; vq->virtio_net_hdr_mem = 0; + uint64_t hdr_size = 0; if (queue_type == VTNET_TQ) { /* * For each xmit packet, allocate a virtio_net_hdr */ snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone", - dev->data->port_id, queue_idx); - vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name, - vq_size * hw->vtnet_hdr_size, - socket_id, 0, RTE_CACHE_LINE_SIZE); - if (vq->virtio_net_hdr_mz == NULL) { - if (rte_errno == EEXIST) - vq->virtio_net_hdr_mz = - rte_memzone_lookup(vq_name); - if (vq->virtio_net_hdr_mz == NULL) { - rte_free(vq); - return -ENOMEM; - } - } - vq->virtio_net_hdr_mem = - vq->virtio_net_hdr_mz->phys_addr; - memset(vq->virtio_net_hdr_mz->addr, 0, - vq_size * hw->vtnet_hdr_size); + dev->data->port_id, queue_idx); + hdr_size = vq_size * hw->vtnet_hdr_size; } else if (queue_type == VTNET_CQ) { - /* Allocate a page for control vq command, data and status */ snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone", - dev->data->port_id); - vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name, - PAGE_SIZE, socket_id, 0, RTE_CACHE_LINE_SIZE); - if (vq->virtio_net_hdr_mz == NULL) { + dev->data->port_id); + /* Allocate a page for control vq command, data and status */ + hdr_size = PAGE_SIZE; + } + + if (hdr_size) { /* queue_type is VTNET_TQ or VTNET_CQ */ + mz = rte_memzone_reserve_aligned(vq_name, + hdr_size, socket_id, 0, RTE_CACHE_LINE_SIZE); + if (mz == NULL) { if (rte_errno == EEXIST) - vq->virtio_net_hdr_mz = - rte_memzone_lookup(vq_name); - if (vq->virtio_net_hdr_mz == NULL) { + mz = rte_memzone_lookup(vq_name); + if (mz == NULL) { rte_free(vq); return -ENOMEM; } } - vq->virtio_net_hdr_mem = - vq->virtio_net_hdr_mz->phys_addr; - memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); + vq->virtio_net_hdr_mz = mz; + vq->virtio_net_hdr_vaddr = mz->addr; + memset(vq->virtio_net_hdr_vaddr, 0, hdr_size); + + if (dev->dev_type == RTE_ETH_DEV_PCI) { + vq->virtio_net_hdr_mem = mz->phys_addr; + } +#ifdef RTE_VIRTIO_VDEV + else { + /* Use vaddr!!! */ + vq->virtio_net_hdr_mem = (phys_addr_t)mz->addr; + } +#endif } /* @@ -491,8 +503,10 @@ virtio_dev_close(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "virtio_dev_close"); /* reset the NIC */ - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) - vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR); + if (dev->dev_type == RTE_ETH_DEV_PCI) { + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR); + } vtpci_reset(hw); hw->started = 0; virtio_dev_free_mbufs(dev); @@ -1288,11 +1302,18 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) } pci_dev = eth_dev->pci_dev; - if (virtio_resource_init(pci_dev) < 0) - return -1; - - hw->use_msix = virtio_has_msix(&pci_dev->addr); - hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr; + if (eth_dev->dev_type == RTE_ETH_DEV_PCI) { + if (virtio_resource_init(pci_dev) < 0) + return -1; + hw->use_msix = virtio_has_msix(&pci_dev->addr); + hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr; + } +#ifdef RTE_VIRTIO_VDEV + else { + hw->use_msix = 0; + hw->io_base = 0; + } +#endif /* Reset the device although not necessary at startup */ vtpci_reset(hw); @@ -1305,8 +1326,10 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) virtio_negotiate_features(hw); /* If host does not support status then disable LSC */ - if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) - pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC; + if (eth_dev->dev_type == RTE_ETH_DEV_PCI) { + if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) + pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC; + } rx_func_get(eth_dev); @@ -1385,12 +1408,12 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); - - /* Setup interrupt callback */ - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) - rte_intr_callback_register(&pci_dev->intr_handle, - virtio_interrupt_handler, eth_dev); - + /* Setup interrupt callback */ + if (eth_dev->dev_type == RTE_ETH_DEV_PCI) { + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + rte_intr_callback_register(&pci_dev->intr_handle, + virtio_interrupt_handler, eth_dev); + } virtio_dev_cq_start(eth_dev); return 0; @@ -1423,10 +1446,12 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) eth_dev->data->mac_addrs = NULL; /* reset interrupt callback */ - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) - rte_intr_callback_unregister(&pci_dev->intr_handle, - virtio_interrupt_handler, - eth_dev); + if (eth_dev->dev_type == RTE_ETH_DEV_PCI) { + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + rte_intr_callback_unregister(&pci_dev->intr_handle, + virtio_interrupt_handler, + eth_dev); + } PMD_INIT_LOG(DEBUG, "dev_uninit completed"); @@ -1481,6 +1506,11 @@ virtio_dev_configure(struct rte_eth_dev *dev) return (-EINVAL); } +#ifdef RTE_VIRTIO_VDEV + if (dev->dev_type == RTE_ETH_DEV_VIRTUAL) + return 0; +#endif + hw->vlan_strip = rxmode->hw_vlan_strip; if (rxmode->hw_vlan_filter @@ -1688,3 +1718,156 @@ static struct rte_driver rte_virtio_driver = { }; PMD_REGISTER_DRIVER(rte_virtio_driver); + +#ifdef RTE_VIRTIO_VDEV + +#define ETH_CVIO_ARG_RX_NUM "rx" +#define ETH_CVIO_ARG_TX_NUM "tx" +#define ETH_CVIO_ARG_CQ_NUM "cq" +#define ETH_CVIO_ARG_SK_PATH "path" +#define ETH_CVIO_ARG_QUEUE_SIZE "queue_num" +/*TODO: specify mac addr */ +static const char *valid_args[] = { + ETH_CVIO_ARG_RX_NUM, + ETH_CVIO_ARG_TX_NUM, + ETH_CVIO_ARG_CQ_NUM, + ETH_CVIO_ARG_SK_PATH, + ETH_CVIO_ARG_QUEUE_SIZE, + NULL +}; + +static int +get_string_arg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + if ((value == NULL) || (extra_args == NULL)) + return -EINVAL; + + strcpy(extra_args, value); + + return 0; +} + +static int +get_integer_arg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + uint64_t *p_u64 = extra_args; + + if ((value == NULL) || (extra_args == NULL)) + return -EINVAL; + + *p_u64 = (uint64_t)strtoull(value, NULL, 0); + + return 0; +} + +static struct rte_eth_dev * +cvio_eth_dev_alloc(const char *name) +{ + struct rte_eth_dev *eth_dev; + struct rte_eth_dev_data *data; + struct rte_pci_device *pci_dev; + struct virtio_hw *hw; + + eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + if (eth_dev == NULL) + rte_panic("cannot alloc rte_eth_dev\n"); + + data = eth_dev->data; + + pci_dev = rte_zmalloc(NULL, sizeof(*pci_dev), 0); + if (!pci_dev) + rte_panic("cannot alloc pci_dev\n"); + hw = rte_zmalloc(NULL, sizeof(*hw), 0); + if (!hw) + rte_panic("malloc virtio_hw failed\n"); + + data->dev_private = hw; + pci_dev->numa_node = SOCKET_ID_ANY; + /* TODO: should remove pci_dev after Bernard Iremonger's patch applied */ + eth_dev->pci_dev = pci_dev; + /* will be used in virtio_dev_info_get() */ + eth_dev->driver = &rte_virtio_pmd; + /* TAILQ_INIT(&(eth_dev->link_intr_cbs)); */ + return eth_dev; +} + +/* + * Dev initialization routine. + * Invoked once for each virtio vdev at EAL init time, + * See rte_eal_dev_init(). + * Returns 0 on success. + */ +static int +rte_cvio_pmd_devinit(const char *name, const char *params) +{ + struct rte_kvargs *kvlist = NULL; + struct rte_eth_dev *eth_dev = NULL; + uint64_t nb_rx = 1, nb_tx = 1, nb_cq = 0, queue_num = 256; + char sock_path[256]; + + if (params == NULL || params[0] == '\0') { + rte_panic("param is null\n"); + } + + kvlist = rte_kvargs_parse(params, valid_args); + if (!kvlist) + rte_panic("error when parsing param\n"); + + if (rte_kvargs_count(kvlist, ETH_CVIO_ARG_SK_PATH) == 1) { + rte_kvargs_process(kvlist, ETH_CVIO_ARG_SK_PATH, + &get_string_arg, sock_path); + } else { + rte_panic("no arg: %s\n", ETH_CVIO_ARG_SK_PATH); + } + + if (rte_kvargs_count(kvlist, ETH_CVIO_ARG_QUEUE_SIZE) == 1) { + rte_kvargs_process(kvlist, ETH_CVIO_ARG_QUEUE_SIZE, + &get_integer_arg, &queue_num); + } + + if (rte_kvargs_count(kvlist, ETH_CVIO_ARG_RX_NUM) == 1) { + rte_kvargs_process(kvlist, ETH_CVIO_ARG_RX_NUM, + &get_integer_arg, &nb_rx); + } + + if (rte_kvargs_count(kvlist, ETH_CVIO_ARG_TX_NUM) == 1) { + rte_kvargs_process(kvlist, ETH_CVIO_ARG_TX_NUM, + &get_integer_arg, &nb_tx); + } + + if (rte_kvargs_count(kvlist, ETH_CVIO_ARG_CQ_NUM) == 1) { + rte_kvargs_process(kvlist, ETH_CVIO_ARG_CQ_NUM, + &get_integer_arg, &nb_cq); + } + + eth_dev = cvio_eth_dev_alloc(name); + + virtio_vdev_init(eth_dev->data, sock_path, + nb_rx, nb_tx, nb_cq, queue_num); + + /* originally, this will be called in rte_eal_pci_probe() */ + eth_virtio_dev_init(eth_dev); + + return 0; +} + +static int +rte_cvio_pmd_devuninit(const char *name) +{ + /* TODO: if it's last one, memory init, free memory */ + rte_panic("%s", name); + return 0; +} + +static struct rte_driver rte_cvio_driver = { + .name = "eth_cvio", + .type = PMD_VDEV, + .init = rte_cvio_pmd_devinit, + .uninit = rte_cvio_pmd_devuninit, +}; + +PMD_REGISTER_DRIVER(rte_cvio_driver); + +#endif diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h index ae2d47d..25613ac 100644 --- a/drivers/net/virtio/virtio_ethdev.h +++ b/drivers/net/virtio/virtio_ethdev.h @@ -56,6 +56,17 @@ #define VIRTIO_MAX_RX_PKTLEN 9728 /* Features desired/implemented by this driver. */ +#ifdef RTE_VIRTIO_VDEV +/* use random mac addr for now */ +/* control queue not available for now */ +#define VIRTIO_PMD_GUEST_FEATURES \ + (1u << VIRTIO_NET_F_STATUS | \ + 1u << VIRTIO_NET_F_MQ | \ + 1u << VIRTIO_NET_F_CTRL_MAC_ADDR | \ + 1u << VIRTIO_NET_F_CTRL_RX | \ + 1u << VIRTIO_NET_F_CTRL_VLAN | \ + 1u << VIRTIO_NET_F_MRG_RXBUF) +#else #define VIRTIO_PMD_GUEST_FEATURES \ (1u << VIRTIO_NET_F_MAC | \ 1u << VIRTIO_NET_F_STATUS | \ @@ -65,6 +76,7 @@ 1u << VIRTIO_NET_F_CTRL_RX | \ 1u << VIRTIO_NET_F_CTRL_VLAN | \ 1u << VIRTIO_NET_F_MRG_RXBUF) +#endif /* * CQ function prototype @@ -122,5 +134,9 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, #define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \ VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN) +#ifdef RTE_VIRTIO_VDEV +int virtio_vdev_init(struct rte_eth_dev_data *data, const char *path, + int nb_rx, int nb_tx, int nb_cq, int queue_num); +#endif #endif /* _VIRTIO_ETHDEV_H_ */ diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index 689c321..7eb4187 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -68,8 +68,13 @@ struct rte_mbuf; #define VIRTQUEUE_MAX_NAME_SZ 32 +#ifdef RTE_VIRTIO_VDEV +#define RTE_MBUF_DATA_DMA_ADDR(mb) \ + ((uint64_t)(mb)->buf_addr + (mb)->data_off) +#else #define RTE_MBUF_DATA_DMA_ADDR(mb) \ (uint64_t) ((mb)->buf_physaddr + (mb)->data_off) +#endif /* RTE_VIRTIO_VDEV */ #define VTNET_SQ_RQ_QUEUE_IDX 0 #define VTNET_SQ_TQ_QUEUE_IDX 1 @@ -169,7 +174,8 @@ struct virtqueue { void *vq_ring_virt_mem; /**< linear address of vring*/ unsigned int vq_ring_size; - phys_addr_t vq_ring_mem; /**< physical address of vring */ + phys_addr_t vq_ring_mem; /**< physical address of vring for non-vdev, + virtual addr of vring for vdev*/ struct vring vq_ring; /**< vring keeping desc, used and avail */ uint16_t vq_free_cnt; /**< num of desc available */ @@ -190,6 +196,7 @@ struct virtqueue { uint16_t vq_avail_idx; uint64_t mbuf_initializer; /**< value to init mbufs. */ phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */ + void *virtio_net_hdr_vaddr; /**< linear address of vring*/ struct rte_mbuf **sw_ring; /**< RX software ring. */ /* dummy mbuf, for wraparound when processing RX ring. */ -- 2.1.4