From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pf0-f176.google.com (mail-pf0-f176.google.com [209.85.192.176]) by dpdk.org (Postfix) with ESMTP id C1C6A2B96 for ; Fri, 4 Mar 2016 19:19:09 +0100 (CET) Received: by mail-pf0-f176.google.com with SMTP id 63so39170582pfe.3 for ; Fri, 04 Mar 2016 10:19:09 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=networkplumber-org.20150623.gappssmtp.com; s=20150623; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=bF3VLXEk25L3r7zaV8ENF4KK48K9g5ZwXIyz+jYA+BY=; b=0Men+j33vYAWrV1cgZiMqsSms8qWfYPucOHdhHClmadCMUDA73wTIpLwI/otLBEuJ4 LEnYWQc4a1AZasfkJTwXSnYSqZzusSGVLQCK5EpK6yogzrd38O2Vf6SCIc2nXAxq5YIu eQQCfeEbKN4ZMEf5aKyRm+jfJ469ht6jjX7kJdsHND8hESxVdLuC4DzemvrlC0ggxhVf 50j9Qrpu1WAnOxR/aUZSPKIEEVBTUstcFxGWFZqLxPhECSqSt5nfXcgFldQE9osKuQqK NvtQqFwGjNN/LRyM+WImDgJgK45K4fHC1WHjGA1xvY7TC0Gm8AOsqMfrIvjXC8CUTs8e fw7A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=bF3VLXEk25L3r7zaV8ENF4KK48K9g5ZwXIyz+jYA+BY=; b=fxpVK0qN0jcKTxFs0+uIEi2WbmKzZsZ52MP7zQ8AsHPi8kF1MfPEvGQ2gvxvPQtEC0 0vCbth3uIxs8gejkFycdDWhGUwi3djQnjHeeab5LXZxaNU6mG2rKucGwT2TgMU2EuB6l /fHHpBscjxnPn2LuZJL7k6s3qdjKx3RF0t5iInz9AHoqyuCMn0Q2G+1y/mlRBQ560qju no6tgR+evYWW4Ep/xDnI2HQZzzpKetOJ5qo2Wx7U6TjlPoYjvfrGCUG8EbRyLyNOyMQ9 xzoTDlSkrHfZnxLOREBLFqQPL3yDGq9TEu/5JJ8dRoTqi0CiJdflzU8vm1e/wH8IsM1w kg/Q== X-Gm-Message-State: AD7BkJLDbEq4x0/M3VXC4Sj+r9wr0an94CAEoVybSBp8JWCBOMBppexb27jf38jvxEvEuQ== X-Received: by 10.98.86.142 with SMTP id h14mr13984929pfj.78.1457115549208; Fri, 04 Mar 2016 10:19:09 -0800 (PST) Received: from xeon-e3.home.lan (static-50-53-82-155.bvtn.or.frontiernet.net. [50.53.82.155]) by smtp.gmail.com with ESMTPSA id qy7sm7040857pab.34.2016.03.04.10.19.07 (version=TLS1_2 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Fri, 04 Mar 2016 10:19:08 -0800 (PST) From: Stephen Hemminger To: dev@dpdk.org Date: Fri, 4 Mar 2016 10:19:19 -0800 Message-Id: <1457115561-31186-2-git-send-email-stephen@networkplumber.org> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1457115561-31186-1-git-send-email-stephen@networkplumber.org> References: <1457115561-31186-1-git-send-email-stephen@networkplumber.org> Subject: [dpdk-dev] [PATCH 1/3] virtio: use indirect ring elements X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 04 Mar 2016 18:19:10 -0000 The virtio ring in QEMU/KVM is usually limited to 256 entries and the normal way that virtio driver was queuing mbufs required nsegs + 1 ring elements. By using the indirect ring element feature if available, each packet will take only one ring slot even for multi-segment packets. Signed-off-by: Stephen Hemminger --- drivers/net/virtio/virtio_ethdev.c | 44 +++++++++++++++++------- drivers/net/virtio/virtio_rxtx.c | 69 +++++++++++++++++++++++++++++--------- drivers/net/virtio/virtqueue.h | 19 +++++++++++ 3 files changed, 104 insertions(+), 28 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index caa970c..95c2203 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -387,27 +387,47 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, vq->virtio_net_hdr_mem = 0; if (queue_type == VTNET_TQ) { + const struct rte_memzone *hdr_mz; + struct virtio_tx_region *txr; + unsigned int i; + /* * For each xmit packet, allocate a virtio_net_hdr + * and indirect ring elements */ snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone", - dev->data->port_id, queue_idx); - vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name, - vq_size * hw->vtnet_hdr_size, - socket_id, 0, RTE_CACHE_LINE_SIZE); - if (vq->virtio_net_hdr_mz == NULL) { + dev->data->port_id, queue_idx); + hdr_mz = rte_memzone_reserve_aligned(vq_name, + vq_size * sizeof(*txr), + socket_id, 0, + RTE_CACHE_LINE_SIZE); + if (hdr_mz == NULL) { if (rte_errno == EEXIST) - vq->virtio_net_hdr_mz = - rte_memzone_lookup(vq_name); - if (vq->virtio_net_hdr_mz == NULL) { + hdr_mz = rte_memzone_lookup(vq_name); + if (hdr_mz == NULL) { rte_free(vq); return -ENOMEM; } } - vq->virtio_net_hdr_mem = - vq->virtio_net_hdr_mz->phys_addr; - memset(vq->virtio_net_hdr_mz->addr, 0, - vq_size * hw->vtnet_hdr_size); + vq->virtio_net_hdr_mz = hdr_mz; + vq->virtio_net_hdr_mem = hdr_mz->phys_addr; + + txr = hdr_mz->addr; + memset(txr, 0, vq_size * sizeof(*txr)); + for (i = 0; i < vq_size; i++) { + struct vring_desc *start_dp = txr[i].tx_indir; + + vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir)); + + /* first indirect descriptor is always the tx header */ + start_dp->addr = vq->virtio_net_hdr_mem + + i * sizeof(*txr) + + offsetof(struct virtio_tx_region, tx_hdr); + + start_dp->len = vq->hw->vtnet_hdr_size; + start_dp->flags = VRING_DESC_F_NEXT; + } + } else if (queue_type == VTNET_CQ) { /* Allocate a page for control vq command, data and status */ snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone", diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index e96352c..5fe3eec 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -209,14 +209,15 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) } static int -virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie) +virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie, + int use_indirect) { struct vq_desc_extra *dxp; struct vring_desc *start_dp; uint16_t seg_num = cookie->nb_segs; - uint16_t needed = 1 + seg_num; + uint16_t needed = use_indirect ? 1 : 1 + seg_num; uint16_t head_idx, idx; - size_t head_size = txvq->hw->vtnet_hdr_size; + unsigned long offs; if (unlikely(txvq->vq_free_cnt == 0)) return -ENOSPC; @@ -232,10 +233,37 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie) dxp->ndescs = needed; start_dp = txvq->vq_ring.desc; - start_dp[idx].addr = - txvq->virtio_net_hdr_mem + idx * head_size; - start_dp[idx].len = head_size; - start_dp[idx].flags = VRING_DESC_F_NEXT; + + if (use_indirect) { + /* setup tx ring slot to point to indirect + * descriptor list stored in reserved region. + * + * the first slot in indirect ring is already preset + * to point to the header in reserved region + */ + struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; + + offs = idx * sizeof(struct virtio_tx_region) + + offsetof(struct virtio_tx_region, tx_indir); + + start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; + start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); + start_dp[idx].flags = VRING_DESC_F_INDIRECT; + + /* loop below will fill in rest of the indirect elements */ + start_dp = txr[idx].tx_indir; + idx = 0; + } else { + /* setup first tx ring slot to point to header + * stored in reserved region. + */ + offs = idx * sizeof(struct virtio_tx_region) + + offsetof(struct virtio_tx_region, tx_hdr); + + start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; + start_dp[idx].len = txvq->hw->vtnet_hdr_size; + start_dp[idx].flags = VRING_DESC_F_NEXT; + } for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) { idx = start_dp[idx].next; @@ -246,7 +274,12 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie) } start_dp[idx].flags &= ~VRING_DESC_F_NEXT; - idx = start_dp[idx].next; + + if (use_indirect) + idx = txvq->vq_ring.desc[head_idx].next; + else + idx = start_dp[idx].next; + txvq->vq_desc_head_idx = idx; if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) txvq->vq_desc_tail_idx = idx; @@ -289,10 +322,7 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type) vq->vq_free_cnt = vq->vq_nentries; memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); - /* Chain all the descriptors in the ring with an END */ - for (i = 0; i < size - 1; i++) - vr->desc[i].next = (uint16_t)(i + 1); - vr->desc[i].next = VQ_RING_DESC_CHAIN_END; + vring_desc_init(vr->desc, size); /* * Disable device(host) interrupting guest @@ -848,8 +878,15 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { struct rte_mbuf *txm = tx_pkts[nb_tx]; - /* Need one more descriptor for virtio header. */ - int need = txm->nb_segs - txvq->vq_free_cnt + 1; + int use_indirect, slots, need; + + use_indirect = vtpci_with_feature(txvq->hw, + VIRTIO_RING_F_INDIRECT_DESC) + && (txm->nb_segs < VIRTIO_MAX_TX_INDIRECT); + + /* How many main ring entries are needed to this Tx? */ + slots = use_indirect ? 1 : 1 + txm->nb_segs; + need = slots - txvq->vq_free_cnt; /* Positive value indicates it need free vring descriptors */ if (unlikely(need > 0)) { @@ -858,7 +895,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) need = RTE_MIN(need, (int)nb_used); virtio_xmit_cleanup(txvq, need); - need = txm->nb_segs - txvq->vq_free_cnt + 1; + need = slots - txvq->vq_free_cnt; if (unlikely(need > 0)) { PMD_TX_LOG(ERR, "No free tx descriptors to transmit"); @@ -876,7 +913,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) } /* Enqueue Packet buffers */ - error = virtqueue_enqueue_xmit(txvq, txm); + error = virtqueue_enqueue_xmit(txvq, txm, use_indirect); if (unlikely(error)) { if (error == ENOSPC) PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0"); diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index 68e0b4b..4e9239e 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -243,6 +243,25 @@ struct virtio_net_hdr_mrg_rxbuf { uint16_t num_buffers; /**< Number of merged rx buffers */ }; +/* Region reserved to allow for transmit header and indirect ring */ +#define VIRTIO_MAX_TX_INDIRECT 8 +struct virtio_tx_region { + struct virtio_net_hdr_mrg_rxbuf tx_hdr; + struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT] + __attribute__((__aligned__(16))); +}; + +/* Chain all the descriptors in the ring with an END */ +static inline void +vring_desc_init(struct vring_desc *dp, uint16_t n) +{ + uint16_t i; + + for (i = 0; i < n - 1; i++) + dp[i].next = (uint16_t)(i + 1); + dp[i].next = VQ_RING_DESC_CHAIN_END; +} + /** * Tell the backend not to interrupt us. */ -- 2.1.4