DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [RFC PATCH] virtio: virtio ring layout optimization and vectorization rx
@ 2015-09-17 16:17 Huawei Xie
  2015-09-22  8:37 ` Thomas Monjalon
  2015-09-24 18:03 ` [dpdk-dev] [RFC PATCH v2] virtio ring layout optimization and vectorization Huawei Xie
  0 siblings, 2 replies; 6+ messages in thread
From: Huawei Xie @ 2015-09-17 16:17 UTC (permalink / raw)
  To: dev

This single patch is for people to get familiar with the optimization and is for collecting feedbacks.
It isn't splitted because it is straightforward.
Haven't finished the cleanups.
The description and illustration of the idea is in a previous mail titled "virtio optimization idea".

---
 config/common_linuxapp                  |   1 +
 drivers/net/virtio/Makefile             |   2 +-
 drivers/net/virtio/virtio_ethdev.c      |  16 ++
 drivers/net/virtio/virtio_ethdev.h      |   5 +
 drivers/net/virtio/virtio_rxtx.c        |  74 +++++-
 drivers/net/virtio/virtio_rxtx.h        |  41 ++++
 drivers/net/virtio/virtio_rxtx_simple.c | 383 ++++++++++++++++++++++++++++++++
 drivers/net/virtio/virtqueue.h          |   4 +
 8 files changed, 522 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_rxtx.h
 create mode 100644 drivers/net/virtio/virtio_rxtx_simple.c

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0de43d5..02b80cd 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -241,6 +241,7 @@ CONFIG_RTE_LIBRTE_ENIC_DEBUG=n
 # Compile burst-oriented VIRTIO PMD driver
 #
 CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
+CONFIG_RTE_VIRTIO_SIMPLE=y
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_INIT=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 930b60f..89a8a37 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -50,7 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-
+SRCS-$(CONFIG_RTE_VIRTIO_SIMPLE)     += virtio_rxtx_simple.c
 
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 465d3cd..7b24b96 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -61,6 +61,7 @@
 #include "virtio_pci.h"
 #include "virtio_logs.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"
 
 
 static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
@@ -291,6 +292,9 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 			dev->data->port_id, queue_idx);
 		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
 			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
+		vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
+			(RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * sizeof(vq->sw_ring[0]),
+			RTE_CACHE_LINE_SIZE, socket_id);
 	} else if (queue_type == VTNET_TQ) {
 		snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
 			dev->data->port_id, queue_idx);
@@ -307,6 +311,10 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 		PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
 		return (-ENOMEM);
 	}
+	if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
+		PMD_INIT_LOG(ERR, "%s: Can not allocate soft ring", __func__);
+		return -ENOMEM;
+	}
 
 	vq->hw = hw;
 	vq->port_id = dev->data->port_id;
@@ -1150,6 +1158,14 @@ rx_func_get(struct rte_eth_dev *eth_dev)
 		eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
 	else
 		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
+
+#ifdef RTE_VIRTIO_SIMPLE
+	if (!vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
+		printf("use simple rxtx\n");
+       		eth_dev->rx_pkt_burst = &virtio_recv_pkts_simple;
+       		eth_dev->tx_pkt_burst = &virtio_xmit_pkts_simple;
+	}
+#endif
 }
 
 /*
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 9026d42..e45e863 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -108,6 +108,11 @@ uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
 
+uint16_t virtio_recv_pkts_simple(void *rx_queue, struct rte_mbuf **rx_pkts,
+                uint16_t nb_pkts);
+
+uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+                uint16_t nb_pkts);
 
 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index c5b53bb..0ba1ea1 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -54,6 +54,7 @@
 #include "virtio_logs.h"
 #include "virtio_ethdev.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"
 
 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
@@ -105,7 +106,7 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
 	uint16_t i;
 
 	/*  Caller does the check */
-	for (i = 0; i < num ; i++) {
+	for (i = 0; i < num; i++) {
 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
 		uep = &vq->vq_ring.used->ring[used_idx];
 		desc_idx = (uint16_t) uep->id;
@@ -298,6 +299,20 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
 		/* Allocate blank mbufs for the each rx descriptor */
 		nbufs = 0;
 		error = ENOSPC;
+
+#ifdef RTE_VIRTIO_SIMPLE
+		printf("\n\n%s(): ok, initialised rx avail ring, and i will never update it again\n", __func__);
+		for (i = 0; i < vq->vq_nentries; i++) {
+			vq->vq_ring.avail->ring[i] = i;
+			vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+		}
+		printf("%s(): free_cnt=%d\n", __func__, vq->vq_free_cnt);
+		memset(&vq->fake_mbuf, 0, sizeof(vq->fake_mbuf));
+		for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
+			vq->sw_ring[vq->vq_nentries + i] = &vq->fake_mbuf;
+
+#endif
+
 		while (!virtqueue_full(vq)) {
 			m = rte_rxmbuf_alloc(vq->mpool);
 			if (m == NULL)
@@ -306,8 +321,11 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
 			/******************************************
 			*         Enqueue allocated buffers        *
 			*******************************************/
+#ifdef RTE_VIRTIO_SIMPLE
+			error = virtqueue_enqueue_recv_refill_simple(vq, m);
+#else
 			error = virtqueue_enqueue_recv_refill(vq, m);
-
+#endif
 			if (error) {
 				rte_pktmbuf_free(m);
 				break;
@@ -317,6 +335,10 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
 
 		vq_update_avail_idx(vq);
 
+#ifdef RTE_VIRTIO_SIMPLE
+		printf("%s(): free_cnt=%d avail->idx=%d\n", __func__,
+			vq->vq_free_cnt, vq->vq_ring.avail->idx);
+#endif
 		PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
 
 		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
@@ -324,6 +346,24 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
 		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
 			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
 	} else if (queue_type == VTNET_TQ) {
+#ifdef RTE_VIRTIO_SIMPLE
+		int mid_idx  = vq->vq_nentries >> 1;
+		printf("%s(): ok, initialised tx avail ring, and i will never update it again, mid_idx=%d\n", __func__, mid_idx);
+		for (i = 0; i < mid_idx; i++) {
+			vq->vq_ring.avail->ring[i] = i + mid_idx;
+			vq->vq_ring.desc[i + mid_idx].next = i;
+			vq->vq_ring.desc[i + mid_idx].addr =
+				vq->virtio_net_hdr_mem +
+					mid_idx * vq->hw->vtnet_hdr_size;
+			vq->vq_ring.desc[i + mid_idx].len =
+				vq->hw->vtnet_hdr_size;
+			vq->vq_ring.desc[i + mid_idx].flags = VRING_DESC_F_NEXT;
+			vq->vq_ring.desc[i].flags = 0;
+		}
+		for (i = mid_idx; i < vq->vq_nentries; i++)
+                        vq->vq_ring.avail->ring[i] = i;
+#endif
+
 		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
 			vq->vq_queue_index);
 		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
@@ -398,6 +438,9 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	vq->mpool = mp;
 
 	dev->data->rx_queues[queue_idx] = vq;
+
+	virtio_rxq_vec_setup(vq);
+
 	return 0;
 }
 
@@ -502,7 +545,8 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	virtio_rmb();
 
 	num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
-	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
+	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ?
+		num : VIRTIO_MBUF_BURST_SZ);
 	if (likely(num > DESC_PER_CACHELINE))
 		num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
 
@@ -825,3 +869,27 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
 	return nb_tx;
 }
+
+uint16_t __attribute__((weak))
+virtio_recv_pkts_simple(
+	void __rte_unused *rx_queue,
+	struct rte_mbuf __rte_unused **rx_pkts,
+	uint16_t __rte_unused nb_pkts)
+{
+	return 0;
+}
+
+uint16_t __attribute__((weak))
+virtio_xmit_pkts_simple(
+	void __rte_unused *tx_queue,
+	struct rte_mbuf __rte_unused **tx_pkts,
+	uint16_t __rte_unused nb_pkts)
+{
+	return 0;
+}
+
+int __attribute__((weak))
+virtio_rxq_vec_setup(struct virtqueue __rte_unused *rxq)
+{
+	return -1;
+}
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
new file mode 100644
index 0000000..8085dd3
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -0,0 +1,41 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define RTE_PMD_VIRTIO_RX_MAX_BURST 64
+int virtio_rxq_vec_setup(struct virtqueue *rxq);
+
+#ifdef RTE_VIRTIO_SIMPLE
+int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+	struct rte_mbuf *m);
+#endif
+
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
new file mode 100644
index 0000000..4b99060
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -0,0 +1,383 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <tmmintrin.h>
+
+#include <rte_cycles.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_branch_prediction.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_prefetch.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+#include <rte_byteorder.h>
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+
+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
+int __attribute__((cold))
+virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+	struct rte_mbuf *cookie)
+{
+	struct vq_desc_extra *dxp;
+	struct vring_desc *start_dp;
+	uint16_t desc_idx;
+
+	desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
+	dxp = &vq->vq_descx[desc_idx];
+	dxp->cookie = (void *)cookie;
+	vq->sw_ring[desc_idx] = cookie;
+
+	start_dp = vq->vq_ring.desc;
+	start_dp[desc_idx].addr = (uint64_t)((uintptr_t)cookie->buf_physaddr +
+		RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
+	start_dp[desc_idx].len = cookie->buf_len -
+		RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr);
+
+	vq->vq_free_cnt--;
+	vq->vq_avail_idx++;
+
+	return 0;
+}
+
+static void
+virtio_rxq_rearm_vec(struct virtqueue *rxvq)
+{
+	int i;
+	uint16_t desc_idx;
+	struct rte_mbuf **sw_ring;
+	struct vring_desc *start_dp;
+	int ret;
+
+	desc_idx = rxvq->vq_avail_idx & (rxvq->vq_nentries - 1);
+	sw_ring = &rxvq->sw_ring[desc_idx];
+	start_dp = &rxvq->vq_ring.desc[desc_idx];
+
+	ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
+		RTE_VIRTIO_VPMD_RX_REARM_THRESH);
+	if (unlikely(ret)) {
+		printf("error? ret=%d count=%d\n", ret,
+			rte_mempool_count(rxvq->mpool));
+		return;
+	}
+
+	for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) {
+		uintptr_t p;
+		p = (uintptr_t)&sw_ring[i]->rearm_data;
+		*(uint64_t *)p = rxvq->mbuf_initializer;
+		start_dp[i].addr =
+			(uint64_t)((uintptr_t)sw_ring[i]->buf_physaddr +
+			RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
+		start_dp[i].len = sw_ring[i]->buf_len -
+			RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr);
+	}
+
+	rxvq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+	rxvq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+	vq_update_avail_idx(rxvq);
+}
+
+uint16_t
+virtio_recv_pkts_simple(void *rx_queue, struct rte_mbuf **rx_pkts,
+	uint16_t nb_pkts)
+{
+	struct virtqueue *rxvq = rx_queue;
+	uint16_t nb_used;
+
+	uint16_t desc_idx;
+	struct vring_used_elem *rused;
+	struct rte_mbuf **sw_ring;
+	struct rte_mbuf **sw_ring_end;
+	uint32_t nb_pkts_received;
+
+	__m128i shuf_msk1, shuf_msk2, len_adjust;
+
+	shuf_msk1 = _mm_set_epi8(
+		0xFF, 0xFF, 0xFF, 0xFF,	/* RSS */
+		0xFF, 0xFF, 0xFF, 0xFF,	/* vlan tci, reserved */
+		0xFF, 0xFF, 5, 4,	/* pkt len */
+		5, 4,			/* dat len */
+		0xFF, 0xFF		/* packet type */
+
+	);
+
+	shuf_msk2 = _mm_set_epi8(
+		0xFF, 0xFF, 0xFF, 0xFF,	/* RSS */
+		0xFF, 0xFF, 0xFF, 0xFF,	/* vlan, macip, reserved */
+		0xFF, 0xFF, 13, 12,	/* pkt len */
+		13, 12,			/* dat len */
+		0xFF, 0xFF		/* packet type */
+	);
+
+	len_adjust = _mm_set_epi16(
+		0, 0, 0, 0,
+		0,
+		(uint16_t) -sizeof(struct virtio_net_hdr),
+		(uint16_t) -sizeof(struct virtio_net_hdr),
+		0);
+
+	if (unlikely(nb_pkts < RTE_VIRTIO_VPMD_RX_BURST))
+		return 0;
+
+	nb_used = *(volatile uint16_t *)&rxvq->vq_ring.used->idx -
+		rxvq->vq_used_cons_idx;
+
+	rte_compiler_barrier();
+
+	desc_idx = (uint16_t)(rxvq->vq_used_cons_idx & (rxvq->vq_nentries - 1));
+	rused = &rxvq->vq_ring.used->ring[desc_idx];
+	sw_ring  = &rxvq->sw_ring[desc_idx];
+	sw_ring_end = &rxvq->sw_ring[rxvq->vq_nentries];
+
+	_mm_prefetch((const void *)rused, _MM_HINT_T0);
+
+	if (rxvq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
+		virtio_rxq_rearm_vec(rxvq);
+		if (unlikely(virtqueue_kick_prepare(rxvq)))
+			virtqueue_notify(rxvq);
+	}
+
+	for (nb_pkts_received = 0;
+		nb_pkts_received < RTE_VIRTIO_VPMD_RX_BURST;) {
+		__m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
+		__m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
+		__m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
+
+		mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6));
+		desc[3] = _mm_loadu_si128((__m128i *)(rused + 6));
+		_mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]);
+
+		mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4));
+		desc[2] = _mm_loadu_si128((__m128i *)(rused + 4));
+		_mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]);
+
+		mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2));
+		desc[1] = _mm_loadu_si128((__m128i *)(rused + 2));
+		_mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]);
+
+		mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0));
+		desc[0] = _mm_loadu_si128((__m128i *)(rused + 0));
+		_mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]);
+
+		pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk1);
+		pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk2);
+		pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust);
+		pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust);
+		_mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1,
+			pkt_mb[7]);
+		_mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1,
+			pkt_mb[6]);
+
+		pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2);
+		pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1);
+		pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust);
+		pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust);
+		_mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1,
+			pkt_mb[5]);
+		_mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1,
+			pkt_mb[4]);
+
+		pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2);
+		pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1);
+		pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust);
+		pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust);
+		_mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1,
+			pkt_mb[3]);
+		_mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1,
+			pkt_mb[2]);
+
+		pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2);
+		pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1);
+		pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust);
+		pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust);
+		_mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1,
+			pkt_mb[1]);
+		_mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1,
+			pkt_mb[0]);
+
+		if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
+			if (sw_ring + nb_used <= sw_ring_end)
+				nb_pkts_received += nb_used;
+			else
+				nb_pkts_received += sw_ring_end - sw_ring;
+			break;
+		} else {
+			if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
+				sw_ring_end)) {
+				nb_pkts_received += sw_ring_end - sw_ring;
+				break;
+			} else {
+				nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
+
+				rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
+				sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
+				rused   += RTE_VIRTIO_DESC_PER_LOOP;
+				nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
+			}
+		}
+	}
+	rxvq->vq_used_cons_idx += nb_pkts_received;
+	rxvq->vq_free_cnt += nb_pkts_received;
+	rxvq->packets += nb_pkts_received;
+	return nb_pkts_received;
+}
+
+#define VIRTIO_TX_FREE_THRESH 32
+#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
+#define VIRTIO_TX_FREE_NR 32
+/* vq->tx_free_cnt could mean num of free slots so we could avoid shift */
+static inline void __attribute__((always_inline))
+virtio_xmit_cleanup(struct virtqueue *vq)
+{
+	uint16_t i, desc_idx;
+	int nb_free = 0;
+	struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ];
+
+	desc_idx = (uint16_t)(vq->vq_used_cons_idx &
+		((vq->vq_nentries >> 1) - 1));
+	free[0] = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+	nb_free = 1;
+
+	for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+		m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+		if (likely(m->pool == free[0]->pool))
+			free[nb_free++] = m;
+		else {
+			rte_mempool_put_bulk(free[0]->pool, (void **)free,
+				nb_free);
+			free[0] = m;
+			nb_free = 1;
+		}
+	}
+
+	rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+	vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR;
+	vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1);
+
+	return;
+}
+
+uint16_t
+virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+	uint16_t nb_pkts)
+{
+	struct virtqueue *txvq = tx_queue;
+	uint16_t nb_used;
+	uint16_t desc_idx;
+	struct vring_desc *start_dp;
+	uint16_t n, nb_commit;
+	int i;
+	uint16_t desc_idx_max = (txvq->vq_nentries >> 1) - 1;
+
+	nb_used = VIRTQUEUE_NUSED(txvq);
+	rte_compiler_barrier();
+
+	nb_commit = nb_pkts = RTE_MIN((txvq->vq_free_cnt >> 1), nb_pkts);
+	desc_idx = (uint16_t) (txvq->vq_avail_idx & desc_idx_max);
+	start_dp = txvq->vq_ring.desc;
+	n = (uint16_t) (desc_idx_max + 1 - desc_idx);
+
+	if (nb_used >= VIRTIO_TX_FREE_THRESH)
+		virtio_xmit_cleanup(tx_queue);
+
+	if (nb_commit >= n) {
+		for (i = 0; i < n; i++)
+			txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+		for (i = 0; i < n; i++) {
+			start_dp[desc_idx].addr =
+				RTE_MBUF_DATA_DMA_ADDR(*tx_pkts);
+			start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
+			tx_pkts++;
+			desc_idx++;
+		}
+		nb_commit -= n;
+		desc_idx = 0;
+	}
+	for (i = 0; i < nb_commit; i++)
+		txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+	for (i = 0; i < nb_commit; i++) {
+		start_dp[desc_idx].addr = RTE_MBUF_DATA_DMA_ADDR(*tx_pkts);
+		start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
+		tx_pkts++;
+		desc_idx++;
+	}
+
+	rte_compiler_barrier();
+
+	txvq->vq_free_cnt -= (uint16_t)(nb_pkts << 1);
+	txvq->vq_avail_idx += nb_pkts;
+	txvq->vq_ring.avail->idx = txvq->vq_avail_idx;
+
+	if (likely(nb_pkts)) {
+		if (unlikely(virtqueue_kick_prepare(txvq)))
+			virtqueue_notify(txvq);
+	}
+
+	return nb_pkts;
+}
+
+int __attribute__((cold))
+virtio_rxq_vec_setup(struct virtqueue *rxq)
+{
+	uintptr_t p;
+	struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
+
+	mb_def.nb_segs = 1;
+	mb_def.data_off = RTE_PKTMBUF_HEADROOM;
+	mb_def.port = rxq->port_id;
+	rte_mbuf_refcnt_set(&mb_def, 1);
+
+	/* prevent compiler reordering: rearm_data covers previous fields */
+	rte_compiler_barrier();
+	p = (uintptr_t)&mb_def.rearm_data;
+	rxq->mbuf_initializer = *(uint64_t *)p;
+
+	return 0;
+}
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 7789411..4948228 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -188,6 +188,7 @@ struct virtqueue {
 	 */
 	uint16_t vq_used_cons_idx;
 	uint16_t vq_avail_idx;
+	uint64_t mbuf_initializer; /**< value to init mbufs. */
 	phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */
 
 	/* Statistics */
@@ -195,6 +196,9 @@ struct virtqueue {
 	uint64_t	bytes;
 	uint64_t	errors;
 
+	struct	rte_mbuf **sw_ring;
+	struct  rte_mbuf fake_mbuf;
+
 	struct vq_desc_extra {
 		void              *cookie;
 		uint16_t          ndescs;
-- 
1.9.3

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [RFC PATCH] virtio: virtio ring layout optimization and vectorization rx
  2015-09-17 16:17 [dpdk-dev] [RFC PATCH] virtio: virtio ring layout optimization and vectorization rx Huawei Xie
@ 2015-09-22  8:37 ` Thomas Monjalon
  2015-09-22  9:29   ` Xie, Huawei
  2015-09-24 18:03 ` [dpdk-dev] [RFC PATCH v2] virtio ring layout optimization and vectorization Huawei Xie
  1 sibling, 1 reply; 6+ messages in thread
From: Thomas Monjalon @ 2015-09-22  8:37 UTC (permalink / raw)
  To: Huawei Xie; +Cc: dev

2015-09-18 00:17, Huawei Xie:
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -241,6 +241,7 @@ CONFIG_RTE_LIBRTE_ENIC_DEBUG=n
>  # Compile burst-oriented VIRTIO PMD driver
>  #
>  CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
> +CONFIG_RTE_VIRTIO_SIMPLE=y
>  CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_INIT=n
>  CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
>  CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n

-1

> @@ -1150,6 +1158,14 @@ rx_func_get(struct rte_eth_dev *eth_dev)
>  		eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
>  	else
>  		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
> +
> +#ifdef RTE_VIRTIO_SIMPLE
> +	if (!vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
> +		printf("use simple rxtx\n");
> +       		eth_dev->rx_pkt_burst = &virtio_recv_pkts_simple;
> +       		eth_dev->tx_pkt_burst = &virtio_xmit_pkts_simple;
> +	}
> +#endif

I thought it was clear we must avoid #ifdef for configuration.
Please check how to add a runtime flag.
Or better: keep only one path which works everywhere and well optimized.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [RFC PATCH] virtio: virtio ring layout optimization and vectorization rx
  2015-09-22  8:37 ` Thomas Monjalon
@ 2015-09-22  9:29   ` Xie, Huawei
  2015-09-22 13:37     ` Thomas Monjalon
  0 siblings, 1 reply; 6+ messages in thread
From: Xie, Huawei @ 2015-09-22  9:29 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On 9/22/2015 4:38 PM, Thomas Monjalon wrote:
> 2015-09-18 00:17, Huawei Xie:
>> --- a/config/common_linuxapp
>> +++ b/config/common_linuxapp
>> @@ -241,6 +241,7 @@ CONFIG_RTE_LIBRTE_ENIC_DEBUG=n
>>  # Compile burst-oriented VIRTIO PMD driver
>>  #
>>  CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
>> +CONFIG_RTE_VIRTIO_SIMPLE=y
>>  CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_INIT=n
>>  CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
>>  CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
> -1
>
>> @@ -1150,6 +1158,14 @@ rx_func_get(struct rte_eth_dev *eth_dev)
>>  		eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
>>  	else
>>  		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
>> +
>> +#ifdef RTE_VIRTIO_SIMPLE
>> +	if (!vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
>> +		printf("use simple rxtx\n");
>> +       		eth_dev->rx_pkt_burst = &virtio_recv_pkts_simple;
>> +       		eth_dev->tx_pkt_burst = &virtio_xmit_pkts_simple;
>> +	}
>> +#endif
> I thought it was clear we must avoid #ifdef for configuration.
> Please check how to add a runtime flag.
> Or better: keep only one path which works everywhere and well optimized.
>
>
Thomas, i have this in mind when i am developing this feature and i plan
to discuss this with you. In theory, the best solution would be using
run time flag and the result would be normally we always choose the
optimized path.
The situation in real world is, in my mind, this feature isn't mature
enough(Of course this doesn't mean it has known issues). One good
solution in kernel community is experimental features.
Could we also have experimental/development tag? I would like to put
this as experimental/development feature temporarily, turn it off by
default and continue to improve it.
The advantage is we have it in tree and every developers could pull it
and help improve it. Also, it is the base for continuous vhost
optimization. We should have a performance optimized driver for the
device side optimization.
Cced some developers to see their opinion.
Thoughts?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [RFC PATCH] virtio: virtio ring layout optimization and vectorization rx
  2015-09-22  9:29   ` Xie, Huawei
@ 2015-09-22 13:37     ` Thomas Monjalon
  2015-09-22 22:55       ` Stephen Hemminger
  0 siblings, 1 reply; 6+ messages in thread
From: Thomas Monjalon @ 2015-09-22 13:37 UTC (permalink / raw)
  To: Xie, Huawei; +Cc: dev

2015-09-22 09:29, Xie, Huawei:
> On 9/22/2015 4:38 PM, Thomas Monjalon wrote:
> > I thought it was clear we must avoid #ifdef for configuration.
> > Please check how to add a runtime flag.
> > Or better: keep only one path which works everywhere and well optimized.
> 
> Thomas, i have this in mind when i am developing this feature and i plan
> to discuss this with you. In theory, the best solution would be using
> run time flag and the result would be normally we always choose the
> optimized path.
> The situation in real world is, in my mind, this feature isn't mature
> enough(Of course this doesn't mean it has known issues). One good
> solution in kernel community is experimental features.
> Could we also have experimental/development tag? I would like to put
> this as experimental/development feature temporarily, turn it off by
> default and continue to improve it.

I think it is a rework, not an experimental feature.

> The advantage is we have it in tree and every developers could pull it
> and help improve it. Also, it is the base for continuous vhost
> optimization. We should have a performance optimized driver for the
> device side optimization.

If there are some acceptance problem, it can be solved on the list.
If there are some bugs after integration, it can be fixed before the release,
as there are some validation and non-regression tests.

If the new Rx/Tx code can replace the old one, please replace it.
If the use cases are different, please use a runtime flag.
It it is not ready, please do more tests and re-submit later.

By the way, thanks for your work.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-dev] [RFC PATCH] virtio: virtio ring layout optimization and vectorization rx
  2015-09-22 13:37     ` Thomas Monjalon
@ 2015-09-22 22:55       ` Stephen Hemminger
  0 siblings, 0 replies; 6+ messages in thread
From: Stephen Hemminger @ 2015-09-22 22:55 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

On Tue, 22 Sep 2015 15:37:13 +0200
Thomas Monjalon <thomas.monjalon@6wind.com> wrote:

> 2015-09-22 09:29, Xie, Huawei:
> > On 9/22/2015 4:38 PM, Thomas Monjalon wrote:
> > > I thought it was clear we must avoid #ifdef for configuration.
> > > Please check how to add a runtime flag.
> > > Or better: keep only one path which works everywhere and well optimized.
> > 
> > Thomas, i have this in mind when i am developing this feature and i plan
> > to discuss this with you. In theory, the best solution would be using
> > run time flag and the result would be normally we always choose the
> > optimized path.
> > The situation in real world is, in my mind, this feature isn't mature
> > enough(Of course this doesn't mean it has known issues). One good
> > solution in kernel community is experimental features.
> > Could we also have experimental/development tag? I would like to put
> > this as experimental/development feature temporarily, turn it off by
> > default and continue to improve it.
> 
> I think it is a rework, not an experimental feature.
> 
> > The advantage is we have it in tree and every developers could pull it
> > and help improve it. Also, it is the base for continuous vhost
> > optimization. We should have a performance optimized driver for the
> > device side optimization.
> 
> If there are some acceptance problem, it can be solved on the list.
> If there are some bugs after integration, it can be fixed before the release,
> as there are some validation and non-regression tests.
> 
> If the new Rx/Tx code can replace the old one, please replace it.
> If the use cases are different, please use a runtime flag.
> It it is not ready, please do more tests and re-submit later.
> 
> By the way, thanks for your work.

Agree with Thomas.
Any feature should be always on, and negotiated by existing virtio
feature bits.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [dpdk-dev] [RFC PATCH v2] virtio ring layout optimization and vectorization.
  2015-09-17 16:17 [dpdk-dev] [RFC PATCH] virtio: virtio ring layout optimization and vectorization rx Huawei Xie
  2015-09-22  8:37 ` Thomas Monjalon
@ 2015-09-24 18:03 ` Huawei Xie
  1 sibling, 0 replies; 6+ messages in thread
From: Huawei Xie @ 2015-09-24 18:03 UTC (permalink / raw)
  To: dev

This single patch is for people to get familiar with the optimization and is for collecting feedbacks.
It isn't splitted because it is straightforward.
Haven't finished the cleanups.
The description and illustration of the idea is in a previous mail titled "virtio optimization idea".

v2 changes:
adapt to new rte_mbuf format
fixes pkt_mb[6] and [7] pkt_len issue 

Signed-off-by: Huawei Xie <huawei.xie@intel.com>
---
 config/common_linuxapp                  |   1 +
 drivers/net/virtio/Makefile             |   2 +-
 drivers/net/virtio/virtio_ethdev.c      |  19 ++
 drivers/net/virtio/virtio_ethdev.h      |   5 +
 drivers/net/virtio/virtio_rxtx.c        |  62 ++++-
 drivers/net/virtio/virtio_rxtx.h        |  41 ++++
 drivers/net/virtio/virtio_rxtx_simple.c | 386 ++++++++++++++++++++++++++++++++
 drivers/net/virtio/virtqueue.h          |   5 +
 8 files changed, 519 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_rxtx.h
 create mode 100644 drivers/net/virtio/virtio_rxtx_simple.c

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0de43d5..b597617 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -241,6 +241,7 @@ CONFIG_RTE_LIBRTE_ENIC_DEBUG=n
 # Compile burst-oriented VIRTIO PMD driver
 #
 CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
+CONFIG_RTE_VIRTIO_SIMPLE=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_INIT=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 930b60f..89a8a37 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -50,7 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-
+SRCS-$(CONFIG_RTE_VIRTIO_SIMPLE)     += virtio_rxtx_simple.c
 
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 465d3cd..9cd8a8b 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -61,6 +61,7 @@
 #include "virtio_pci.h"
 #include "virtio_logs.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"
 
 
 static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
@@ -246,6 +247,9 @@ virtio_dev_queue_release(struct virtqueue *vq) {
 		VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vq->queue_id);
 		VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN, 0);
 
+		if (vq->sw_ring)
+			rte_free(vq->sw_ring);
+
 		rte_free(vq);
 		vq = NULL;
 	}
@@ -291,6 +295,9 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 			dev->data->port_id, queue_idx);
 		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
 			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
+		vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
+			(RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * sizeof(vq->sw_ring[0]),
+			RTE_CACHE_LINE_SIZE, socket_id);
 	} else if (queue_type == VTNET_TQ) {
 		snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
 			dev->data->port_id, queue_idx);
@@ -307,6 +314,10 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 		PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
 		return (-ENOMEM);
 	}
+	if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
+		PMD_INIT_LOG(ERR, "%s: Can not allocate RX soft ring", __func__);
+		return -ENOMEM;
+	}
 
 	vq->hw = hw;
 	vq->port_id = dev->data->port_id;
@@ -1150,6 +1161,14 @@ rx_func_get(struct rte_eth_dev *eth_dev)
 		eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
 	else
 		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
+
+#ifdef RTE_VIRTIO_SIMPLE
+	if (!vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
+		printf("use simple rxtx\n");
+		eth_dev->rx_pkt_burst = &virtio_recv_pkts_vec;
+		eth_dev->tx_pkt_burst = &virtio_xmit_pkts_simple;
+	}
+#endif
 }
 
 /*
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 9026d42..fa11afa 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -108,6 +108,11 @@ uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
 
+uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+                uint16_t nb_pkts);
+
+uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+                uint16_t nb_pkts);
 
 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index c5b53bb..0c729b3 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -54,6 +54,7 @@
 #include "virtio_logs.h"
 #include "virtio_ethdev.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"
 
 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
@@ -298,6 +299,18 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
 		/* Allocate blank mbufs for the each rx descriptor */
 		nbufs = 0;
 		error = ENOSPC;
+
+#ifdef RTE_VIRTIO_SIMPLE
+		for (i = 0; i < vq->vq_nentries; i++) {
+			vq->vq_ring.avail->ring[i] = i;
+			vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+		}
+
+		memset(&vq->fake_mbuf, 0, sizeof(vq->fake_mbuf));
+		for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
+			vq->sw_ring[vq->vq_nentries + i] = &vq->fake_mbuf;
+#endif
+
 		while (!virtqueue_full(vq)) {
 			m = rte_rxmbuf_alloc(vq->mpool);
 			if (m == NULL)
@@ -306,8 +319,11 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
 			/******************************************
 			*         Enqueue allocated buffers        *
 			*******************************************/
+#ifdef RTE_VIRTIO_SIMPLE
+			error = virtqueue_enqueue_recv_refill_simple(vq, m);
+#else
 			error = virtqueue_enqueue_recv_refill(vq, m);
-
+#endif
 			if (error) {
 				rte_pktmbuf_free(m);
 				break;
@@ -324,6 +340,23 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
 		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
 			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
 	} else if (queue_type == VTNET_TQ) {
+#ifdef RTE_VIRTIO_SIMPLE
+		int mid_idx  = vq->vq_nentries >> 1;
+		for (i = 0; i < mid_idx; i++) {
+			vq->vq_ring.avail->ring[i] = i + mid_idx;
+			vq->vq_ring.desc[i + mid_idx].next = i;
+			vq->vq_ring.desc[i + mid_idx].addr =
+				vq->virtio_net_hdr_mem +
+					mid_idx * vq->hw->vtnet_hdr_size;
+			vq->vq_ring.desc[i + mid_idx].len =
+				vq->hw->vtnet_hdr_size;
+			vq->vq_ring.desc[i + mid_idx].flags = VRING_DESC_F_NEXT;
+			vq->vq_ring.desc[i].flags = 0;
+		}
+		for (i = mid_idx; i < vq->vq_nentries; i++)
+                        vq->vq_ring.avail->ring[i] = i;
+#endif
+
 		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
 			vq->vq_queue_index);
 		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
@@ -398,6 +431,9 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	vq->mpool = mp;
 
 	dev->data->rx_queues[queue_idx] = vq;
+
+	virtio_rxq_vec_setup(vq);
+
 	return 0;
 }
 
@@ -825,3 +861,27 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
 	return nb_tx;
 }
+
+uint16_t __attribute__((weak))
+virtio_recv_pkts_vec(
+	void __rte_unused *rx_queue,
+	struct rte_mbuf __rte_unused **rx_pkts,
+	uint16_t __rte_unused nb_pkts)
+{
+	return 0;
+}
+
+uint16_t __attribute__((weak))
+virtio_xmit_pkts_simple(
+	void __rte_unused *tx_queue,
+	struct rte_mbuf __rte_unused **tx_pkts,
+	uint16_t __rte_unused nb_pkts)
+{
+	return 0;
+}
+
+int __attribute__((weak))
+virtio_rxq_vec_setup(struct virtqueue __rte_unused *rxq)
+{
+	return -1;
+}
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
new file mode 100644
index 0000000..8bd8260
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -0,0 +1,41 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define RTE_PMD_VIRTIO_RX_MAX_BURST 64
+
+int virtio_rxq_vec_setup(struct virtqueue *rxq);
+
+#ifdef RTE_VIRTIO_SIMPLE
+int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+	struct rte_mbuf *m);
+#endif
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
new file mode 100644
index 0000000..4b45594
--- /dev/null
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -0,0 +1,386 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <tmmintrin.h>
+
+#include <rte_cycles.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_branch_prediction.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_prefetch.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+#include <rte_byteorder.h>
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+
+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
+int __attribute__((cold))
+virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
+	struct rte_mbuf *cookie)
+{
+	struct vq_desc_extra *dxp;
+	struct vring_desc *start_dp;
+	uint16_t desc_idx;
+
+	desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
+	dxp = &vq->vq_descx[desc_idx];
+	dxp->cookie = (void *)cookie;
+	vq->sw_ring[desc_idx] = cookie;
+
+	start_dp = vq->vq_ring.desc;
+	start_dp[desc_idx].addr = (uint64_t)((uintptr_t)cookie->buf_physaddr +
+		RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
+	start_dp[desc_idx].len = cookie->buf_len -
+		RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr);
+
+	vq->vq_free_cnt--;
+	vq->vq_avail_idx++;
+
+	return 0;
+}
+
+static void
+virtio_rxq_rearm_vec(struct virtqueue *rxvq)
+{
+	int i;
+	uint16_t desc_idx;
+	struct rte_mbuf **sw_ring;
+	struct vring_desc *start_dp;
+	int ret;
+
+	desc_idx = rxvq->vq_avail_idx & (rxvq->vq_nentries - 1);
+	sw_ring = &rxvq->sw_ring[desc_idx];
+	start_dp = &rxvq->vq_ring.desc[desc_idx];
+
+	ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
+		RTE_VIRTIO_VPMD_RX_REARM_THRESH);
+	if (unlikely(ret))
+		return;
+
+	for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) {
+		uintptr_t p;
+		p = (uintptr_t)&sw_ring[i]->rearm_data;
+		*(uint64_t *)p = rxvq->mbuf_initializer;
+		start_dp[i].addr =
+			(uint64_t)((uintptr_t)sw_ring[i]->buf_physaddr +
+			RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
+		start_dp[i].len = sw_ring[i]->buf_len -
+			RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr);
+	}
+
+	rxvq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+	rxvq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+	vq_update_avail_idx(rxvq);
+}
+
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+	uint16_t nb_pkts)
+{
+	struct virtqueue *rxvq = rx_queue;
+	uint16_t nb_used;
+
+	uint16_t desc_idx;
+	struct vring_used_elem *rused;
+	struct rte_mbuf **sw_ring;
+	struct rte_mbuf **sw_ring_end;
+	uint16_t nb_pkts_received;
+
+	__m128i shuf_msk1, shuf_msk2, len_adjust;
+
+	shuf_msk1 = _mm_set_epi8(
+		0xFF, 0xFF, 0xFF, 0xFF,
+		0xFF, 0xFF,		/* vlan tci */
+		5, 4,			/* dat len */
+		0xFF, 0xFF, 5, 4,	/* pkt len */
+		0xFF, 0xFF, 0xFF, 0xFF	/* packet type */
+
+	);
+
+	shuf_msk2 = _mm_set_epi8(
+		0xFF, 0xFF, 0xFF, 0xFF,
+		0xFF, 0xFF,		/* vlan tci */
+		13, 12,			/* dat len */
+		0xFF, 0xFF, 13, 12,	/* pkt len */
+		0xFF, 0xFF, 0xFF, 0xFF	/* packet type */
+	);
+
+	len_adjust = _mm_set_epi16(
+		0, 0,
+		0,
+		(uint16_t) -sizeof(struct virtio_net_hdr),
+		0, (uint16_t) -sizeof(struct virtio_net_hdr),
+		0, 0);
+
+	if (unlikely(nb_pkts < RTE_VIRTIO_VPMD_RX_BURST))
+		return 0;
+
+	nb_used = *(volatile uint16_t *)&rxvq->vq_ring.used->idx -
+		rxvq->vq_used_cons_idx;
+
+	rte_compiler_barrier();
+
+	if (unlikely(nb_used == 0))
+		return 0;
+
+	nb_used = likely(nb_used < RTE_VIRTIO_VPMD_RX_BURST) ?
+			nb_used : RTE_VIRTIO_VPMD_RX_BURST;
+
+	desc_idx = (uint16_t)(rxvq->vq_used_cons_idx & (rxvq->vq_nentries - 1));
+	rused = &rxvq->vq_ring.used->ring[desc_idx];
+	sw_ring  = &rxvq->sw_ring[desc_idx];
+	sw_ring_end = &rxvq->sw_ring[rxvq->vq_nentries];
+
+	_mm_prefetch((const void *)rused, _MM_HINT_T0);
+
+	if (rxvq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
+		virtio_rxq_rearm_vec(rxvq);
+		if (unlikely(virtqueue_kick_prepare(rxvq)))
+			virtqueue_notify(rxvq);
+	}
+
+	for (nb_pkts_received = 0;
+		nb_pkts_received < RTE_VIRTIO_VPMD_RX_BURST;) {
+		__m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
+		__m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
+		__m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
+
+		mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0));
+		desc[0] = _mm_loadu_si128((__m128i *)(rused + 0));
+		_mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]);
+
+		mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2));
+		desc[1] = _mm_loadu_si128((__m128i *)(rused + 2));
+		_mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]);
+
+		mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4));
+		desc[2] = _mm_loadu_si128((__m128i *)(rused + 4));
+		_mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]);
+
+		mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6));
+		desc[3] = _mm_loadu_si128((__m128i *)(rused + 6));
+		_mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]);
+
+		pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2);
+		pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1);
+		pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust);
+		pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust);
+		_mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1,
+			pkt_mb[1]);
+		_mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1,
+			pkt_mb[0]);
+
+		pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2);
+		pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1);
+		pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust);
+		pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust);
+		_mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1,
+			pkt_mb[3]);
+		_mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1,
+			pkt_mb[2]);
+
+		pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2);
+		pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1);
+		pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust);
+		pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust);
+		_mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1,
+			pkt_mb[5]);
+		_mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1,
+			pkt_mb[4]);
+
+		pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2);
+		pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1);
+		pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust);
+		pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust);
+		_mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1,
+			pkt_mb[7]);
+		_mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1,
+			pkt_mb[6]);
+
+		if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
+			if (sw_ring + nb_used <= sw_ring_end)
+				nb_pkts_received += nb_used;
+			else
+				nb_pkts_received += sw_ring_end - sw_ring;
+			break;
+		} else {
+			if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
+				sw_ring_end)) {
+				nb_pkts_received += sw_ring_end - sw_ring;
+				break;
+			} else {
+				nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
+
+				rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
+				sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
+				rused   += RTE_VIRTIO_DESC_PER_LOOP;
+				nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
+			}
+		}
+	}
+	rxvq->vq_used_cons_idx += nb_pkts_received;
+	rxvq->vq_free_cnt += nb_pkts_received;
+	rxvq->packets += nb_pkts_received;
+	return nb_pkts_received;
+}
+
+#define VIRTIO_TX_FREE_THRESH 32
+#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
+#define VIRTIO_TX_FREE_NR 32
+/* vq->tx_free_cnt could mean num of free slots so we could avoid shift */
+static inline void __attribute__((always_inline))
+virtio_xmit_cleanup(struct virtqueue *vq)
+{
+	uint16_t i, desc_idx;
+	int nb_free = 0;
+	struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ];
+
+	desc_idx = (uint16_t)(vq->vq_used_cons_idx &
+		((vq->vq_nentries >> 1) - 1));
+	free[0] = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+	nb_free = 1;
+
+	for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+		m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+		if (likely(m->pool == free[0]->pool))
+			free[nb_free++] = m;
+		else {
+			rte_mempool_put_bulk(free[0]->pool, (void **)free,
+				nb_free);
+			free[0] = m;
+			nb_free = 1;
+		}
+	}
+
+	rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+	vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR;
+	vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1);
+
+	return;
+}
+
+uint16_t
+virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+	uint16_t nb_pkts)
+{
+	struct virtqueue *txvq = tx_queue;
+	uint16_t nb_used;
+	uint16_t desc_idx;
+	struct vring_desc *start_dp;
+	uint16_t n, nb_commit;
+	int i;
+	uint16_t desc_idx_max = (txvq->vq_nentries >> 1) - 1;
+
+	nb_used = VIRTQUEUE_NUSED(txvq);
+	rte_compiler_barrier();
+
+	nb_commit = nb_pkts = RTE_MIN((txvq->vq_free_cnt >> 1), nb_pkts);
+	desc_idx = (uint16_t) (txvq->vq_avail_idx & desc_idx_max);
+	start_dp = txvq->vq_ring.desc;
+	n = (uint16_t) (desc_idx_max + 1 - desc_idx);
+
+	if (nb_used >= VIRTIO_TX_FREE_THRESH)
+		virtio_xmit_cleanup(tx_queue);
+
+	if (nb_commit >= n) {
+		for (i = 0; i < n; i++)
+			txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+		for (i = 0; i < n; i++) {
+			start_dp[desc_idx].addr =
+				RTE_MBUF_DATA_DMA_ADDR(*tx_pkts);
+			start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
+			tx_pkts++;
+			desc_idx++;
+		}
+		nb_commit -= n;
+		desc_idx = 0;
+	}
+	for (i = 0; i < nb_commit; i++)
+		txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+	for (i = 0; i < nb_commit; i++) {
+		start_dp[desc_idx].addr = RTE_MBUF_DATA_DMA_ADDR(*tx_pkts);
+		start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
+		tx_pkts++;
+		desc_idx++;
+	}
+
+	rte_compiler_barrier();
+
+	txvq->vq_free_cnt -= (uint16_t)(nb_pkts << 1);
+	txvq->vq_avail_idx += nb_pkts;
+	txvq->vq_ring.avail->idx = txvq->vq_avail_idx;
+
+	if (likely(nb_pkts)) {
+		if (unlikely(virtqueue_kick_prepare(txvq)))
+			virtqueue_notify(txvq);
+	}
+
+	return nb_pkts;
+}
+
+int __attribute__((cold))
+virtio_rxq_vec_setup(struct virtqueue *rxq)
+{
+	uintptr_t p;
+	struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
+
+	mb_def.nb_segs = 1;
+	mb_def.data_off = RTE_PKTMBUF_HEADROOM;
+	mb_def.port = rxq->port_id;
+	rte_mbuf_refcnt_set(&mb_def, 1);
+
+	/* prevent compiler reordering: rearm_data covers previous fields */
+	rte_compiler_barrier();
+	p = (uintptr_t)&mb_def.rearm_data;
+	rxq->mbuf_initializer = *(uint64_t *)p;
+
+	return 0;
+}
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 7789411..22b1f87 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -188,6 +188,7 @@ struct virtqueue {
 	 */
 	uint16_t vq_used_cons_idx;
 	uint16_t vq_avail_idx;
+	uint64_t mbuf_initializer; /**< value to init mbufs. */
 	phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */
 
 	/* Statistics */
@@ -195,6 +196,10 @@ struct virtqueue {
 	uint64_t	bytes;
 	uint64_t	errors;
 
+	struct rte_mbuf **sw_ring; /**< RX software ring. */
+	/* dummy mbuf, for wraparound when processing RX ring. */
+	struct rte_mbuf fake_mbuf;
+
 	struct vq_desc_extra {
 		void              *cookie;
 		uint16_t          ndescs;
-- 
1.9.3

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-09-24 18:03 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-17 16:17 [dpdk-dev] [RFC PATCH] virtio: virtio ring layout optimization and vectorization rx Huawei Xie
2015-09-22  8:37 ` Thomas Monjalon
2015-09-22  9:29   ` Xie, Huawei
2015-09-22 13:37     ` Thomas Monjalon
2015-09-22 22:55       ` Stephen Hemminger
2015-09-24 18:03 ` [dpdk-dev] [RFC PATCH v2] virtio ring layout optimization and vectorization Huawei Xie

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).