From: Stephen Hemminger <stephen@networkplumber.org>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH 1/3] virtio: use indirect ring elements
Date: Fri, 4 Mar 2016 10:19:19 -0800 [thread overview]
Message-ID: <1457115561-31186-2-git-send-email-stephen@networkplumber.org> (raw)
In-Reply-To: <1457115561-31186-1-git-send-email-stephen@networkplumber.org>
The virtio ring in QEMU/KVM is usually limited to 256 entries
and the normal way that virtio driver was queuing mbufs required
nsegs + 1 ring elements. By using the indirect ring element feature
if available, each packet will take only one ring slot even for
multi-segment packets.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
drivers/net/virtio/virtio_ethdev.c | 44 +++++++++++++++++-------
drivers/net/virtio/virtio_rxtx.c | 69 +++++++++++++++++++++++++++++---------
drivers/net/virtio/virtqueue.h | 19 +++++++++++
3 files changed, 104 insertions(+), 28 deletions(-)
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index caa970c..95c2203 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -387,27 +387,47 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
vq->virtio_net_hdr_mem = 0;
if (queue_type == VTNET_TQ) {
+ const struct rte_memzone *hdr_mz;
+ struct virtio_tx_region *txr;
+ unsigned int i;
+
/*
* For each xmit packet, allocate a virtio_net_hdr
+ * and indirect ring elements
*/
snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
- dev->data->port_id, queue_idx);
- vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
- vq_size * hw->vtnet_hdr_size,
- socket_id, 0, RTE_CACHE_LINE_SIZE);
- if (vq->virtio_net_hdr_mz == NULL) {
+ dev->data->port_id, queue_idx);
+ hdr_mz = rte_memzone_reserve_aligned(vq_name,
+ vq_size * sizeof(*txr),
+ socket_id, 0,
+ RTE_CACHE_LINE_SIZE);
+ if (hdr_mz == NULL) {
if (rte_errno == EEXIST)
- vq->virtio_net_hdr_mz =
- rte_memzone_lookup(vq_name);
- if (vq->virtio_net_hdr_mz == NULL) {
+ hdr_mz = rte_memzone_lookup(vq_name);
+ if (hdr_mz == NULL) {
rte_free(vq);
return -ENOMEM;
}
}
- vq->virtio_net_hdr_mem =
- vq->virtio_net_hdr_mz->phys_addr;
- memset(vq->virtio_net_hdr_mz->addr, 0,
- vq_size * hw->vtnet_hdr_size);
+ vq->virtio_net_hdr_mz = hdr_mz;
+ vq->virtio_net_hdr_mem = hdr_mz->phys_addr;
+
+ txr = hdr_mz->addr;
+ memset(txr, 0, vq_size * sizeof(*txr));
+ for (i = 0; i < vq_size; i++) {
+ struct vring_desc *start_dp = txr[i].tx_indir;
+
+ vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir));
+
+ /* first indirect descriptor is always the tx header */
+ start_dp->addr = vq->virtio_net_hdr_mem
+ + i * sizeof(*txr)
+ + offsetof(struct virtio_tx_region, tx_hdr);
+
+ start_dp->len = vq->hw->vtnet_hdr_size;
+ start_dp->flags = VRING_DESC_F_NEXT;
+ }
+
} else if (queue_type == VTNET_CQ) {
/* Allocate a page for control vq command, data and status */
snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone",
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index e96352c..5fe3eec 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -209,14 +209,15 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
}
static int
-virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
+virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie,
+ int use_indirect)
{
struct vq_desc_extra *dxp;
struct vring_desc *start_dp;
uint16_t seg_num = cookie->nb_segs;
- uint16_t needed = 1 + seg_num;
+ uint16_t needed = use_indirect ? 1 : 1 + seg_num;
uint16_t head_idx, idx;
- size_t head_size = txvq->hw->vtnet_hdr_size;
+ unsigned long offs;
if (unlikely(txvq->vq_free_cnt == 0))
return -ENOSPC;
@@ -232,10 +233,37 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
dxp->ndescs = needed;
start_dp = txvq->vq_ring.desc;
- start_dp[idx].addr =
- txvq->virtio_net_hdr_mem + idx * head_size;
- start_dp[idx].len = head_size;
- start_dp[idx].flags = VRING_DESC_F_NEXT;
+
+ if (use_indirect) {
+ /* setup tx ring slot to point to indirect
+ * descriptor list stored in reserved region.
+ *
+ * the first slot in indirect ring is already preset
+ * to point to the header in reserved region
+ */
+ struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
+
+ offs = idx * sizeof(struct virtio_tx_region)
+ + offsetof(struct virtio_tx_region, tx_indir);
+
+ start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs;
+ start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc);
+ start_dp[idx].flags = VRING_DESC_F_INDIRECT;
+
+ /* loop below will fill in rest of the indirect elements */
+ start_dp = txr[idx].tx_indir;
+ idx = 0;
+ } else {
+ /* setup first tx ring slot to point to header
+ * stored in reserved region.
+ */
+ offs = idx * sizeof(struct virtio_tx_region)
+ + offsetof(struct virtio_tx_region, tx_hdr);
+
+ start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs;
+ start_dp[idx].len = txvq->hw->vtnet_hdr_size;
+ start_dp[idx].flags = VRING_DESC_F_NEXT;
+ }
for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
idx = start_dp[idx].next;
@@ -246,7 +274,12 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
}
start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
- idx = start_dp[idx].next;
+
+ if (use_indirect)
+ idx = txvq->vq_ring.desc[head_idx].next;
+ else
+ idx = start_dp[idx].next;
+
txvq->vq_desc_head_idx = idx;
if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
txvq->vq_desc_tail_idx = idx;
@@ -289,10 +322,7 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
vq->vq_free_cnt = vq->vq_nentries;
memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
- /* Chain all the descriptors in the ring with an END */
- for (i = 0; i < size - 1; i++)
- vr->desc[i].next = (uint16_t)(i + 1);
- vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
+ vring_desc_init(vr->desc, size);
/*
* Disable device(host) interrupting guest
@@ -848,8 +878,15 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
struct rte_mbuf *txm = tx_pkts[nb_tx];
- /* Need one more descriptor for virtio header. */
- int need = txm->nb_segs - txvq->vq_free_cnt + 1;
+ int use_indirect, slots, need;
+
+ use_indirect = vtpci_with_feature(txvq->hw,
+ VIRTIO_RING_F_INDIRECT_DESC)
+ && (txm->nb_segs < VIRTIO_MAX_TX_INDIRECT);
+
+ /* How many main ring entries are needed to this Tx? */
+ slots = use_indirect ? 1 : 1 + txm->nb_segs;
+ need = slots - txvq->vq_free_cnt;
/* Positive value indicates it need free vring descriptors */
if (unlikely(need > 0)) {
@@ -858,7 +895,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
need = RTE_MIN(need, (int)nb_used);
virtio_xmit_cleanup(txvq, need);
- need = txm->nb_segs - txvq->vq_free_cnt + 1;
+ need = slots - txvq->vq_free_cnt;
if (unlikely(need > 0)) {
PMD_TX_LOG(ERR,
"No free tx descriptors to transmit");
@@ -876,7 +913,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
}
/* Enqueue Packet buffers */
- error = virtqueue_enqueue_xmit(txvq, txm);
+ error = virtqueue_enqueue_xmit(txvq, txm, use_indirect);
if (unlikely(error)) {
if (error == ENOSPC)
PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0");
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 68e0b4b..4e9239e 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -243,6 +243,25 @@ struct virtio_net_hdr_mrg_rxbuf {
uint16_t num_buffers; /**< Number of merged rx buffers */
};
+/* Region reserved to allow for transmit header and indirect ring */
+#define VIRTIO_MAX_TX_INDIRECT 8
+struct virtio_tx_region {
+ struct virtio_net_hdr_mrg_rxbuf tx_hdr;
+ struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]
+ __attribute__((__aligned__(16)));
+};
+
+/* Chain all the descriptors in the ring with an END */
+static inline void
+vring_desc_init(struct vring_desc *dp, uint16_t n)
+{
+ uint16_t i;
+
+ for (i = 0; i < n - 1; i++)
+ dp[i].next = (uint16_t)(i + 1);
+ dp[i].next = VQ_RING_DESC_CHAIN_END;
+}
+
/**
* Tell the backend not to interrupt us.
*/
--
2.1.4
next prev parent reply other threads:[~2016-03-04 18:19 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-04 18:19 [dpdk-dev] [PATCH 0/3 v3] virtio: Tx performance improvements Stephen Hemminger
2016-03-04 18:19 ` Stephen Hemminger [this message]
2016-03-04 18:19 ` [dpdk-dev] [PATCH 2/3] virtio: use any layout on transmit Stephen Hemminger
2016-03-04 18:19 ` [dpdk-dev] [PATCH 3/3] virtio: optimize transmit enqueue Stephen Hemminger
2016-03-14 10:56 ` [dpdk-dev] [PATCH 0/3 v3] virtio: Tx performance improvements Bruce Richardson
2016-03-14 12:24 ` Yuanhan Liu
2016-03-16 8:25 ` Xie, Huawei
2016-03-16 17:40 ` Bruce Richardson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1457115561-31186-2-git-send-email-stephen@networkplumber.org \
--to=stephen@networkplumber.org \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).