From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by dpdk.org (Postfix) with ESMTP id D5E151B3AC for ; Tue, 4 Dec 2018 07:24:35 +0100 (CET) X-Amp-Result: UNKNOWN X-Amp-Original-Verdict: FILE UNKNOWN X-Amp-File-Uploaded: False Received: from orsmga007.jf.intel.com ([10.7.209.58]) by fmsmga101.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 03 Dec 2018 22:24:34 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,312,1539673200"; d="scan'208";a="95924037" Received: from btwcube1.sh.intel.com (HELO btwcube1) ([10.67.104.173]) by orsmga007.jf.intel.com with ESMTP; 03 Dec 2018 22:24:33 -0800 Date: Tue, 4 Dec 2018 14:22:41 +0800 From: Tiwei Bie To: Xiao Wang Cc: maxime.coquelin@redhat.com, dev@dpdk.org, zhihong.wang@intel.com, xiaolong.ye@intel.com Message-ID: <20181204062241.GA19852@btwcube1> References: <20181128094607.106173-1-xiao.w.wang@intel.com> <20181128094607.106173-3-xiao.w.wang@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20181128094607.106173-3-xiao.w.wang@intel.com> User-Agent: Mutt/1.9.4 (2018-02-28) Subject: Re: [dpdk-dev] [PATCH 2/9] vhost: provide helpers for virtio ring relay X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 04 Dec 2018 06:24:36 -0000 On Wed, Nov 28, 2018 at 05:46:00PM +0800, Xiao Wang wrote: [...] > + > +/** > + * @warning > + * @b EXPERIMENTAL: this API may change without prior notice > + * > + * Synchronize the available ring from guest to mediate ring, help to > + * check desc validity to protect against malicious guest driver. > + * > + * @param vid > + * vhost device id > + * @param qid > + * vhost queue id > + * @param m_vring > + * mediate virtio ring pointer > + * @return > + * number of synced available entries on success, -1 on failure > + */ > +int __rte_experimental > +rte_vdpa_relay_avail_ring(int vid, int qid, struct vring *m_vring); > + > +/** > + * @warning > + * @b EXPERIMENTAL: this API may change without prior notice > + * > + * Synchronize the used ring from mediate ring to guest, log dirty > + * page for each Rx buffer used. > + * > + * @param vid > + * vhost device id > + * @param qid > + * vhost queue id > + * @param m_vring > + * mediate virtio ring pointer > + * @return > + * number of synced used entries on success, -1 on failure > + */ > +int __rte_experimental > +rte_vdpa_relay_used_ring(int vid, int qid, struct vring *m_vring); Above APIs are split ring specific. We also need to take packed ring into consideration. > #endif /* _RTE_VDPA_H_ */ [...] > diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c > index e7d849ee0..e41117776 100644 > --- a/lib/librte_vhost/vdpa.c > +++ b/lib/librte_vhost/vdpa.c > @@ -122,3 +122,176 @@ rte_vdpa_get_device_num(void) > { > return vdpa_device_num; > } > + > +static int > +invalid_desc_check(struct virtio_net *dev, struct vhost_virtqueue *vq, > + uint64_t desc_iova, uint64_t desc_len, uint8_t perm) > +{ > + uint64_t desc_addr, desc_chunck_len; > + > + while (desc_len) { > + desc_chunck_len = desc_len; > + desc_addr = vhost_iova_to_vva(dev, vq, > + desc_iova, > + &desc_chunck_len, > + perm); > + > + if (!desc_addr) > + return -1; > + > + desc_len -= desc_chunck_len; > + desc_iova += desc_chunck_len; > + } > + > + return 0; > +} > + > +int > +rte_vdpa_relay_avail_ring(int vid, int qid, struct vring *m_vring) > +{ > + struct virtio_net *dev = get_device(vid); > + uint16_t idx, idx_m, desc_id; > + struct vring_desc desc; > + struct vhost_virtqueue *vq; > + struct vring_desc *desc_ring; > + struct vring_desc *idesc = NULL; > + uint64_t dlen; > + int ret; > + > + if (!dev) > + return -1; > + > + vq = dev->virtqueue[qid]; Better to also validate qid. > + idx = vq->avail->idx; > + idx_m = m_vring->avail->idx; > + ret = idx - idx_m; Need to cast (idx - idx_m) to uint16_t. > + > + while (idx_m != idx) { > + /* avail entry copy */ > + desc_id = vq->avail->ring[idx_m % vq->size]; idx_m & (vq->size - 1) should be faster. > + m_vring->avail->ring[idx_m % vq->size] = desc_id; > + desc_ring = vq->desc; > + > + if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) { > + dlen = vq->desc[desc_id].len; > + desc_ring = (struct vring_desc *)(uintptr_t) > + vhost_iova_to_vva(dev, vq, vq->desc[desc_id].addr, The indent needs to be fixed. > + &dlen, > + VHOST_ACCESS_RO); > + if (unlikely(!desc_ring)) > + return -1; > + > + if (unlikely(dlen < vq->desc[idx].len)) { > + idesc = alloc_copy_ind_table(dev, vq, > + vq->desc[idx].addr, vq->desc[idx].len); > + if (unlikely(!idesc)) > + return -1; > + > + desc_ring = idesc; > + } > + > + desc_id = 0; > + } > + > + /* check if the buf addr is within the guest memory */ > + do { > + desc = desc_ring[desc_id]; > + if (invalid_desc_check(dev, vq, desc.addr, desc.len, > + VHOST_ACCESS_RW)) Should check with < 0, otherwise should return bool. We may just have RO access. > + return -1; The memory allocated for idesc if any will leak in this case. > + desc_id = desc.next; > + } while (desc.flags & VRING_DESC_F_NEXT); > + > + if (unlikely(!!idesc)) { The !! isn't needed. > + free_ind_table(idesc); > + idesc = NULL; > + } > + > + idx_m++; > + } > + Barrier is needed here. > + m_vring->avail->idx = idx; > + > + if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) > + vhost_avail_event(vq) = vq->avail->idx; Need to use idx instead of vq->avail->idx which may have already been changed by driver. > + > + return ret; > +} > + > +int > +rte_vdpa_relay_used_ring(int vid, int qid, struct vring *m_vring) > +{ > + struct virtio_net *dev = get_device(vid); > + uint16_t idx, idx_m, desc_id; > + struct vhost_virtqueue *vq; > + struct vring_desc desc; > + struct vring_desc *desc_ring; > + struct vring_desc *idesc = NULL; > + uint64_t dlen; > + int ret; > + > + if (!dev) > + return -1; > + > + vq = dev->virtqueue[qid]; Better to also validate qid. > + idx = vq->used->idx; > + idx_m = m_vring->used->idx; > + ret = idx_m - idx; Need to cast (idx_m - idx) to uint16_t. > + > + while (idx != idx_m) { > + /* copy used entry, used ring logging is not covered here */ The used ring logging has been covered here by the following call to vhost_log_used_vring() after used ring is changed. > + vq->used->ring[idx % vq->size] = idx & (vq->size - 1) should be faster. > + m_vring->used->ring[idx % vq->size]; > + > + /* dirty page logging for used ring */ > + vhost_log_used_vring(dev, vq, > + offsetof(struct vring_used, ring[idx % vq->size]), > + sizeof(struct vring_used_elem)); > + > + desc_id = vq->used->ring[idx % vq->size].id; > + desc_ring = vq->desc; > + > + if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) { > + dlen = vq->desc[desc_id].len; > + desc_ring = (struct vring_desc *)(uintptr_t) > + vhost_iova_to_vva(dev, vq, vq->desc[desc_id].addr, The indent needs to be fixed. > + &dlen, > + VHOST_ACCESS_RO); > + if (unlikely(!desc_ring)) > + return -1; > + > + if (unlikely(dlen < vq->desc[idx].len)) { > + idesc = alloc_copy_ind_table(dev, vq, > + vq->desc[idx].addr, vq->desc[idx].len); > + if (unlikely(!idesc)) > + return -1; > + > + desc_ring = idesc; > + } > + > + desc_id = 0; > + } > + > + /* dirty page logging for Rx buffer */ Rx is for net, this API isn't net specific. > + do { > + desc = desc_ring[desc_id]; > + if (desc.flags & VRING_DESC_F_WRITE) > + vhost_log_write(dev, desc.addr, desc.len); > + desc_id = desc.next; > + } while (desc.flags & VRING_DESC_F_NEXT); > + > + if (unlikely(!!idesc)) { The !! isn't needed. > + free_ind_table(idesc); > + idesc = NULL; > + } > + > + idx++; > + } > + Barrier is needed here. > + vq->used->idx = idx_m; > + > + if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) > + vring_used_event(m_vring) = m_vring->used->idx; > + > + return ret; > +} [...]