From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id 51ADF37AC for ; Tue, 3 May 2016 02:42:56 +0200 (CEST) Received: from orsmga002.jf.intel.com ([10.7.209.21]) by fmsmga102.fm.intel.com with ESMTP; 02 May 2016 17:42:55 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.24,570,1455004800"; d="scan'208";a="967323212" Received: from yliu-dev.sh.intel.com ([10.239.67.162]) by orsmga002.jf.intel.com with ESMTP; 02 May 2016 17:42:54 -0700 From: Yuanhan Liu To: dev@dpdk.org Cc: huawei.xie@intel.com, Yuanhan Liu , "Michael S. Tsirkin" Date: Mon, 2 May 2016 17:46:16 -0700 Message-Id: <1462236378-7604-2-git-send-email-yuanhan.liu@linux.intel.com> X-Mailer: git-send-email 1.9.0 In-Reply-To: <1462236378-7604-1-git-send-email-yuanhan.liu@linux.intel.com> References: <1462236378-7604-1-git-send-email-yuanhan.liu@linux.intel.com> Subject: [dpdk-dev] [PATCH 1/3] vhost: pre update used ring for Tx and Rx X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 03 May 2016 00:42:57 -0000 Pre update and update used ring in batch for Tx and Rx at the stage while fetching all avail desc idx. This would reduce some cache misses and hence, increase the performance a bit. Pre update would be feasible as guest driver will not start processing those entries as far as we don't update "used->idx". (I'm not 100% certain I don't miss anything, though). Cc: Michael S. Tsirkin Signed-off-by: Yuanhan Liu --- lib/librte_vhost/vhost_rxtx.c | 58 +++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c index c9cd1c5..2c3b810 100644 --- a/lib/librte_vhost/vhost_rxtx.c +++ b/lib/librte_vhost/vhost_rxtx.c @@ -137,7 +137,7 @@ copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr, static inline int __attribute__((always_inline)) copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, - struct rte_mbuf *m, uint16_t desc_idx, uint32_t *copied) + struct rte_mbuf *m, uint16_t desc_idx) { uint32_t desc_avail, desc_offset; uint32_t mbuf_avail, mbuf_offset; @@ -161,7 +161,6 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, desc_offset = dev->vhost_hlen; desc_avail = desc->len - dev->vhost_hlen; - *copied = rte_pktmbuf_pkt_len(m); mbuf_avail = rte_pktmbuf_data_len(m); mbuf_offset = 0; while (mbuf_avail != 0 || m->next != NULL) { @@ -262,6 +261,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, struct vhost_virtqueue *vq; uint16_t res_start_idx, res_end_idx; uint16_t desc_indexes[MAX_PKT_BURST]; + uint16_t used_idx; uint32_t i; LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); @@ -285,27 +285,29 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, /* Retrieve all of the desc indexes first to avoid caching issues. */ rte_prefetch0(&vq->avail->ring[res_start_idx & (vq->size - 1)]); for (i = 0; i < count; i++) { - desc_indexes[i] = vq->avail->ring[(res_start_idx + i) & - (vq->size - 1)]; + used_idx = (res_start_idx + i) & (vq->size - 1); + desc_indexes[i] = vq->avail->ring[used_idx]; + vq->used->ring[used_idx].id = desc_indexes[i]; + vq->used->ring[used_idx].len = pkts[i]->pkt_len + + dev->vhost_hlen; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); } rte_prefetch0(&vq->desc[desc_indexes[0]]); for (i = 0; i < count; i++) { uint16_t desc_idx = desc_indexes[i]; - uint16_t used_idx = (res_start_idx + i) & (vq->size - 1); - uint32_t copied; int err; - err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx, &copied); - - vq->used->ring[used_idx].id = desc_idx; - if (unlikely(err)) + err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx); + if (unlikely(err)) { + used_idx = (res_start_idx + i) & (vq->size - 1); vq->used->ring[used_idx].len = dev->vhost_hlen; - else - vq->used->ring[used_idx].len = copied + dev->vhost_hlen; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); + } if (i + 1 < count) rte_prefetch0(&vq->desc[desc_indexes[i+1]]); @@ -879,6 +881,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, /* Prefetch available ring to retrieve head indexes. */ used_idx = vq->last_used_idx & (vq->size - 1); rte_prefetch0(&vq->avail->ring[used_idx]); + rte_prefetch0(&vq->used->ring[used_idx]); count = RTE_MIN(count, MAX_PKT_BURST); count = RTE_MIN(count, free_entries); @@ -887,22 +890,23 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, /* Retrieve all of the head indexes first to avoid caching issues. */ for (i = 0; i < count; i++) { - desc_indexes[i] = vq->avail->ring[(vq->last_used_idx + i) & - (vq->size - 1)]; + used_idx = (vq->last_used_idx + i) & (vq->size - 1); + desc_indexes[i] = vq->avail->ring[used_idx]; + + vq->used->ring[used_idx].id = desc_indexes[i]; + vq->used->ring[used_idx].len = 0; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); } /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[desc_indexes[0]]); - rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]); - for (i = 0; i < count; i++) { int err; - if (likely(i + 1 < count)) { + if (likely(i + 1 < count)) rte_prefetch0(&vq->desc[desc_indexes[i + 1]]); - rte_prefetch0(&vq->used->ring[(used_idx + 1) & - (vq->size - 1)]); - } pkts[i] = rte_pktmbuf_alloc(mbuf_pool); if (unlikely(pkts[i] == NULL)) { @@ -916,18 +920,12 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, rte_pktmbuf_free(pkts[i]); break; } - - used_idx = vq->last_used_idx++ & (vq->size - 1); - vq->used->ring[used_idx].id = desc_indexes[i]; - vq->used->ring[used_idx].len = 0; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); } rte_smp_wmb(); rte_smp_rmb(); vq->used->idx += i; + vq->last_used_idx += i; vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), sizeof(vq->used->idx)); -- 1.9.0