[dpdk-dev] [PATCH] vhost: remove internal lockless enqueue

DPDK patches and discussions
 help / color / mirror / Atom feed

From: Huawei Xie <huawei.xie@intel.com>
To: dev@dpdk.org
Cc: yuanhan.liu@intel.com, jianfeng.tan@intel.com, mukawa@igel.co.jp,
	kevin.traynor@intel.com, haifeng.lin@huawei.com,
	Huawei Xie <huawei.xie@intel.com>
Subject: [dpdk-dev] [PATCH] vhost: remove internal lockless enqueue
Date: Mon, 13 Jun 2016 19:52:12 +0800	[thread overview]
Message-ID: <1465818732-116302-1-git-send-email-huawei.xie@intel.com> (raw)

All other DPDK PMDs doesn't support concurrent receiving or sending
packets to the same queue. The upper application should deal with
this, normally through queue and core bindings.

Due to historical reason, vhost internally supports concurrent lockless
enqueuing packets to the same virtio queue through costly cmpset operation.
This patch removes this internal lockless implementation and should improve
performance a bit.

Luckily DPDK OVS doesn't rely on this behavior.

Signed-off-by: Huawei Xie <huawei.xie@intel.com>
---
 doc/guides/rel_notes/release_16_07.rst |   3 +
 lib/librte_vhost/rte_virtio_net.h      |   3 +-
 lib/librte_vhost/vhost_rxtx.c          | 106 +++++++--------------------------
 lib/librte_vhost/virtio-net.c          |   1 -
 4 files changed, 24 insertions(+), 89 deletions(-)

diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 30e78d4..e96250f 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -112,6 +112,9 @@ API Changes
    * Add a short 1-2 sentence description of the API change. Use fixed width
      quotes for ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
 
+* The function ``rte_vhost_enqueue_burst`` no longer supports concurrent enqueuing
+  packets to the same queue.
+
 * The following counters are removed from ``rte_eth_stats`` structure:
   ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
   tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff.
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 600b20b..dbba24e 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -88,7 +88,6 @@ struct vhost_virtqueue {
 	uint32_t		backend;		/**< Backend value to determine if device should started/stopped. */
 	uint16_t		vhost_hlen;		/**< Vhost header length (varies depending on RX merge buffers. */
 	volatile uint16_t	last_used_idx;		/**< Last index used on the available ring */
-	volatile uint16_t	last_used_idx_res;	/**< Used for multiple devices reserving buffers. */
 #define VIRTIO_INVALID_EVENTFD		(-1)
 #define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
 	int			callfd;			/**< Used to notify the guest (trigger interrupt). */
@@ -192,7 +191,7 @@ rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
 	if (!vq->enabled)
 		return 0;
 
-	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx_res;
+	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
 }
 
 /**
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 750821a..3fa75cb 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -205,49 +205,6 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return 0;
 }
 
-/*
- * As many data cores may want to access available buffers
- * they need to be reserved.
- */
-static inline uint32_t
-reserve_avail_buf(struct vhost_virtqueue *vq, uint32_t count,
-		  uint16_t *start, uint16_t *end)
-{
-	uint16_t res_start_idx;
-	uint16_t res_end_idx;
-	uint16_t avail_idx;
-	uint16_t free_entries;
-	int success;
-
-	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
-
-again:
-	res_start_idx = vq->last_used_idx_res;
-	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-
-	free_entries = avail_idx - res_start_idx;
-	count = RTE_MIN(count, free_entries);
-	if (count == 0)
-		return 0;
-
-	res_end_idx = res_start_idx + count;
-
-	/*
-	 * update vq->last_used_idx_res atomically; try again if failed.
-	 *
-	 * TODO: Allow to disable cmpset if no concurrency in application.
-	 */
-	success = rte_atomic16_cmpset(&vq->last_used_idx_res,
-				      res_start_idx, res_end_idx);
-	if (unlikely(!success))
-		goto again;
-
-	*start = res_start_idx;
-	*end   = res_end_idx;
-
-	return count;
-}
-
 /**
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtio device. A packet
@@ -260,7 +217,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	      struct rte_mbuf **pkts, uint32_t count)
 {
 	struct vhost_virtqueue *vq;
-	uint16_t res_start_idx, res_end_idx;
+	uint16_t avail_idx, free_entries, res_start_idx;
 	uint16_t desc_indexes[MAX_PKT_BURST];
 	uint32_t i;
 
@@ -276,13 +233,19 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	if (unlikely(vq->enabled == 0))
 		return 0;
 
-	count = reserve_avail_buf(vq, count, &res_start_idx, &res_end_idx);
+
+	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
+	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+	res_start_idx = vq->last_used_idx;
+	free_entries = avail_idx - res_start_idx;
+	count = RTE_MIN(count, free_entries);
+
 	if (count == 0)
 		return 0;
 
 	LOG_DEBUG(VHOST_DATA,
 		"(%"PRIu64") res_start_idx %d| res_end_idx Index %d\n",
-		dev->device_fh, res_start_idx, res_end_idx);
+		dev->device_fh, res_start_idx, res_start_idx + count);
 
 	/* Retrieve all of the desc indexes first to avoid caching issues. */
 	rte_prefetch0(&vq->avail->ring[res_start_idx & (vq->size - 1)]);
@@ -315,12 +278,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 	rte_smp_wmb();
 
-	/* Wait until it's our turn to add our buffer to the used ring. */
-	while (unlikely(vq->last_used_idx != res_start_idx))
-		rte_pause();
-
 	*(volatile uint16_t *)&vq->used->idx += count;
-	vq->last_used_idx = res_end_idx;
+	vq->last_used_idx += count;
 	vhost_log_used_vring(dev, vq,
 		offsetof(struct vring_used, idx),
 		sizeof(vq->used->idx));
@@ -366,29 +325,20 @@ fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx,
 }
 
 /*
- * As many data cores may want to access available buffers concurrently,
- * they need to be reserved.
- *
  * Returns -1 on fail, 0 on success
  */
 static inline int
 reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size,
-			    uint16_t *start, uint16_t *end)
+			    uint16_t *end)
 {
-	uint16_t res_start_idx;
 	uint16_t res_cur_idx;
 	uint16_t avail_idx;
-	uint32_t allocated;
-	uint32_t vec_idx;
-	uint16_t tries;
+	uint32_t allocated = 0;
+	uint32_t vec_idx = 0;
+	uint16_t tries = 0;
 
-again:
-	res_start_idx = vq->last_used_idx_res;
-	res_cur_idx  = res_start_idx;
+	res_cur_idx  = vq->last_used_idx;
 
-	allocated = 0;
-	vec_idx   = 0;
-	tries     = 0;
 	while (1) {
 		avail_idx = *((volatile uint16_t *)&vq->avail->idx);
 		if (unlikely(res_cur_idx == avail_idx))
@@ -413,26 +363,17 @@ again:
 			return -1;
 	}
 
-	/*
-	 * update vq->last_used_idx_res atomically.
-	 * retry again if failed.
-	 */
-	if (rte_atomic16_cmpset(&vq->last_used_idx_res,
-				res_start_idx, res_cur_idx) == 0)
-		goto again;
-
-	*start = res_start_idx;
 	*end   = res_cur_idx;
 	return 0;
 }
 
 static inline uint32_t __attribute__((always_inline))
 copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			    uint16_t res_start_idx, uint16_t res_end_idx,
-			    struct rte_mbuf *m)
+			    uint16_t res_end_idx, struct rte_mbuf *m)
 {
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 	uint32_t vec_idx = 0;
+	uint16_t res_start_idx = vq->last_used_idx;
 	uint16_t cur_idx = res_start_idx;
 	uint64_t desc_addr;
 	uint32_t mbuf_offset, mbuf_avail;
@@ -531,7 +472,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 {
 	struct vhost_virtqueue *vq;
 	uint32_t pkt_idx = 0, nr_used = 0;
-	uint16_t start, end;
+	uint16_t end;
 
 	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
 		dev->device_fh);
@@ -554,24 +495,17 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
 
 		if (unlikely(reserve_avail_buf_mergeable(vq, pkt_len,
-							 &start, &end) < 0)) {
+							 &end) < 0)) {
 			LOG_DEBUG(VHOST_DATA,
 				"(%" PRIu64 ") Failed to get enough desc from vring\n",
 				dev->device_fh);
 			break;
 		}
 
-		nr_used = copy_mbuf_to_desc_mergeable(dev, vq, start, end,
+		nr_used = copy_mbuf_to_desc_mergeable(dev, vq, end,
 						      pkts[pkt_idx]);
 		rte_smp_wmb();
 
-		/*
-		 * Wait until it's our turn to add our buffer
-		 * to the used ring.
-		 */
-		while (unlikely(vq->last_used_idx != start))
-			rte_pause();
-
 		*(volatile uint16_t *)&vq->used->idx += nr_used;
 		vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
 			sizeof(vq->used->idx));
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index f4695af..8912c89 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -601,7 +601,6 @@ vhost_set_vring_base(struct vhost_device_ctx ctx,
 
 	/* State->index refers to the queue index. The txq is 1, rxq is 0. */
 	dev->virtqueue[state->index]->last_used_idx = state->num;
-	dev->virtqueue[state->index]->last_used_idx_res = state->num;
 
 	return 0;
 }
-- 
1.8.1.4

next             reply	other threads:[~2016-06-14  3:47 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-13 11:52 Huawei Xie [this message]
2016-06-14 14:07 ` Yuanhan Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1465818732-116302-1-git-send-email-huawei.xie@intel.com \
    --to=huawei.xie@intel.com \
    --cc=dev@dpdk.org \
    --cc=haifeng.lin@huawei.com \
    --cc=jianfeng.tan@intel.com \
    --cc=kevin.traynor@intel.com \
    --cc=mukawa@igel.co.jp \
    --cc=yuanhan.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).