From: Tiwei Bie <tiwei.bie@intel.com>
To: JinYu <jin.yu@intel.com>
Cc: dev@dpdk.org, changpeng.liu@intel.com, zhihong.wang@intel.com,
maxime.coquelin@redhat.com, Lin Li <lilin24@baidu.com>,
Xun Ni <nixun@baidu.com>, Yu Zhang <zhangyu31@baidu.com>
Subject: Re: [dpdk-dev] [PATCH v5 1/2] vhost: support inflight share memory protocol feature
Date: Mon, 26 Aug 2019 16:28:11 +0800 [thread overview]
Message-ID: <20190826082811.GA30331@___> (raw)
In-Reply-To: <20190806182500.22320-2-jin.yu@intel.com>
On Wed, Aug 07, 2019 at 02:24:59AM +0800, JinYu wrote:
> This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD
> and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared
> buffer between qemu and backend.
>
> Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the
> shared buffer from backend. Then qemu should send it back
> through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user.
>
> This shared buffer is used to process inflight I/O when backend
> reconnect.
>
> Signed-off-by: Lin Li <lilin24@baidu.com>
> Signed-off-by: Xun Ni <nixun@baidu.com>
> Signed-off-by: Yu Zhang <zhangyu31@baidu.com>
> Signed-off-by: JinYu <jin.yu@intel.com>
s/JinYu/Jin Yu/
> ---
> v1 - specify the APIs are split-ring only
> v2 - fix APIs and judge split or packed
> v3 - Add rte_vhost_ prefix and fix one issue.
> v4 - add the packed ring support
> v5 - revise get_vring_base func depend on Tiwei's suggestion
> ---
> lib/librte_vhost/rte_vhost.h | 255 ++++++++++++++-
> lib/librte_vhost/rte_vhost_version.map | 12 +
> lib/librte_vhost/vhost.c | 396 +++++++++++++++++++++-
> lib/librte_vhost/vhost.h | 61 ++--
> lib/librte_vhost/vhost_user.c | 437 ++++++++++++++++++++++++-
> lib/librte_vhost/vhost_user.h | 13 +-
> 6 files changed, 1128 insertions(+), 46 deletions(-)
This patch is too big, please divide it into small patches.
E.g. rte_vhost_vq_is_packed() can be introduced in a separate patch.
>
> diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
> index 0226b3eff..3f01429b1 100644
> --- a/lib/librte_vhost/rte_vhost.h
> +++ b/lib/librte_vhost/rte_vhost.h
> @@ -11,6 +11,7 @@
> */
>
> #include <stdint.h>
> +#include <stdbool.h>
> #include <sys/eventfd.h>
>
> #include <rte_memory.h>
> @@ -71,6 +72,10 @@ extern "C" {
> #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
> #endif
>
> +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
> +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
> +#endif
> +
> /** Indicate whether protocol features negotiation is supported. */
> #ifndef VHOST_USER_F_PROTOCOL_FEATURES
> #define VHOST_USER_F_PROTOCOL_FEATURES 30
> @@ -98,10 +103,92 @@ struct rte_vhost_memory {
> struct rte_vhost_mem_region regions[];
> };
>
> +struct inflight_desc_packed {
> + uint8_t inflight;
> + uint8_t padding;
> + uint16_t next;
> + uint16_t last;
> + uint16_t num;
> + uint64_t counter;
> + uint16_t id;
> + uint16_t flags;
> + uint32_t len;
> + uint64_t addr;
> +};
Why struct inflight_desc_split doesn't have to be part of
vhost API but struct inflight_desc_packed has to be?
> +
> +struct inflight_info_packed {
> + uint64_t features;
> + uint16_t version;
> + uint16_t desc_num;
> + uint16_t free_head;
> + uint16_t old_free_head;
> + uint16_t used_idx;
> + uint16_t old_used_idx;
> + uint8_t used_wrap_counter;
> + uint8_t old_used_wrap_counter;
> + uint8_t padding[7];
> + struct inflight_desc_packed desc[0];
> +};
> +
> +struct rte_vhost_resubmit_desc {
> + uint16_t index;
> + uint64_t counter;
> +};
> +
> +struct rte_vhost_resubmit_info {
> + struct rte_vhost_resubmit_desc *resubmit_list;
> + uint16_t resubmit_num;
> +};
> +
> +struct rte_vhost_ring_inflight {
> + union {
> + struct inflight_info_split *inflight_split;
struct inflight_info_split is used but not declared.
> + struct inflight_info_packed *inflight_packed;
> + };
> +
> + struct rte_vhost_resubmit_info *resubmit_inflight;
> +};
> +
> +/*
> + * Declare below packed ring defines unconditionally
> + * as Kernel header might use different names.
> + */
> +#ifndef VIRTIO_F_RING_PACKED
> +#define VIRTIO_F_RING_PACKED 34
> +
> +#define VRING_DESC_F_AVAIL (1ULL << 7)
> +#define VRING_DESC_F_USED (1ULL << 15)
You shouldn't put above macros under VIRTIO_F_RING_PACKED as
kernel doesn't define them. And the build will be broken when
kernel defines VIRTIO_F_RING_PACKED.
Besides, it seems not a good idea to make them parts of vhost
API.
> +
> +struct vring_packed_desc {
> + uint64_t addr;
> + uint32_t len;
> + uint16_t id;
> + uint16_t flags;
> +};
> +
> +#define VRING_EVENT_F_ENABLE 0x0
> +#define VRING_EVENT_F_DISABLE 0x1
> +#define VRING_EVENT_F_DESC 0x2
> +struct vring_packed_desc_event {
> + uint16_t off_wrap;
> + uint16_t flags;
> +};
You just need to declare instead of defining
vring_packed_desc/vring_packed_desc_event in vhost header.
> +#endif
> +
> struct rte_vhost_vring {
> - struct vring_desc *desc;
> - struct vring_avail *avail;
> - struct vring_used *used;
> + union {
> + struct vring_desc *desc;
> + struct vring_packed_desc *desc_packed;
> + };
> + union {
> + struct vring_avail *avail;
> + struct vring_packed_desc_event *driver_event;
> + };
> + union {
> + struct vring_used *used;
> + struct vring_packed_desc_event *device_event;
> + };
> +
> uint64_t log_guest_addr;
>
> /** Deprecated, use rte_vhost_vring_call() instead. */
> @@ -603,6 +690,33 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
> */
> int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
>
> +/**
> + * Get vq is packed
> + *
> + * @param vid
> + * vhost device ID
> + * @return
> + * 0 on success, -1 on failure
Return value should be true or false.
> + */
> +int __rte_experimental
> +rte_vhost_vq_is_packed(int vid);
> +
> +/**
> + * Get guest inflight vring info, including inflight ring and resubmit list.
> + *
> + * @param vid
> + * vhost device ID
> + * @param vring_idx
> + * vring index
> + * @param vring
> + * the structure to hold the requested inflight vring info
> + * @return
> + * 0 on success, -1 on failure
> + */
> +int __rte_experimental
You should put the `__rte_experimental` tag at the beginning.
See this for more details:
http://git.dpdk.org/dpdk/commit/lib/librte_vhost?id=18218713bf4248c4c6b97a12231e7d59b8a86865
> +rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
> + struct rte_vhost_ring_inflight *vring);
> +
> /**
> * Get guest vring info, including the vring address, vring size, etc.
> *
> @@ -616,7 +730,7 @@ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
> * 0 on success, -1 on failure
> */
> int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
> - struct rte_vhost_vring *vring);
> + struct rte_vhost_vring *vring);
>
> /**
> * Notify the guest that used descriptors have been added to the vring. This
> @@ -631,6 +745,112 @@ int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
> */
> int rte_vhost_vring_call(int vid, uint16_t vring_idx);
>
> +/**
> + * set split inflight descriptor.
Please capitalize the first letter consistently.
> + *
> + * This function save descriptors that has been comsumed in available
> + * ring
> + *
> + * @param vid
> + * vhost device ID
> + * @param vring_idx
> + * vring index
> + * @param idx
> + * inflight entry index
> + * @return
> + * 0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
> + uint16_t idx);
> +
> +/**
> + * set packed inflight descriptor and get corresponding inflight entry
> + *
> + * This function save descriptors that has been comsumed
> + *
> + * @param vid
> + * vhost device ID
> + * @param vring_idx
> + * vring index
> + * @param idx
> + * inflight entry index
> + * @return
> + * 0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
> + uint16_t head, uint16_t last, uint16_t *inflight_entry);
> +
> +/**
> + * save the head of list that the last batch of used descriptors.
> + *
> + * @param vid
> + * vhost device ID
> + * @param vring_idx
> + * vring index
> + * @param idx
> + * descriptor entry index
> + * @return
> + * 0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_set_last_inflight_io_split(int vid,
> + uint16_t vring_idx, uint16_t idx);
> +
> +/**
> + * update the inflight free_head, used_idx and used_wrap_counter.
> + *
> + * This function will update status first before updating descriptors
> + * to used
> + *
> + * @param vid
> + * vhost device ID
> + * @param vring_idx
> + * vring index
> + * @param idx
> + * inflight entry index
> + * @return
> + * 0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_set_last_inflight_io_packed(int vid,
> + uint16_t vring_idx, uint16_t head);
> +
> +/**
> + * clear the split inflight status.
> + *
> + * @param vid
> + * vhost device ID
> + * @param vring_idx
> + * vring index
> + * @param last_used_idx
> + * last used idx of used ring
> + * @param idx
> + * inflight entry index
> + * @return
> + * 0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
> + uint16_t last_used_idx, uint16_t idx);
> +
> +/**
> + * clear the packed inflight status.
> + *
> + * @param vid
> + * vhost device ID
> + * @param vring_idx
> + * vring index
> + * @param head
> + * inflight entry index
> + * @return
> + * 0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
> + uint16_t head);
> +
> /**
> * Get vhost RX queue avail count.
> *
> @@ -656,7 +876,8 @@ uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
> * 0 on success, -1 on failure
> */
> int __rte_experimental
> -rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size);
> +rte_vhost_get_log_base(int vid, uint64_t *log_base,
> + uint64_t *log_size);
This change isn't necessary.
>
> /**
> * Get last_avail/used_idx of the vhost virtqueue
> @@ -676,6 +897,28 @@ int __rte_experimental
> rte_vhost_get_vring_base(int vid, uint16_t queue_id,
> uint16_t *last_avail_idx, uint16_t *last_used_idx);
>
> +/**
> + * Get last_avail/last_used of the vhost virtqueue
> + *
> + * This function is designed for the reconnection and it's specific for
> + * the packed ring as we can get the two parameters from the inflight
> + * queueregion
> + *
> + * @param vid
> + * vhost device ID
> + * @param queue_id
> + * vhost queue index
> + * @param last_avail_idx
> + * vhost last_avail_idx to get
> + * @param last_used_idx
> + * vhost last_used_idx to get
> + * @return
> + * 0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_get_vring_base_from_inflight(int vid,
> + uint16_t queue_id, uint16_t *last_avail_idx, uint16_t *last_used_idx);
> +
> /**
> * Set last_avail/used_idx of the vhost virtqueue
> *
> @@ -692,7 +935,7 @@ rte_vhost_get_vring_base(int vid, uint16_t queue_id,
> */
> int __rte_experimental
> rte_vhost_set_vring_base(int vid, uint16_t queue_id,
> - uint16_t last_avail_idx, uint16_t last_used_idx);
> + uint16_t last_avail_idx, uint16_t last_used_idx);
>
> /**
> * Register external message handling callbacks
> diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
> index 5f1d4a75c..99f1134ea 100644
> --- a/lib/librte_vhost/rte_vhost_version.map
> +++ b/lib/librte_vhost/rte_vhost_version.map
> @@ -87,4 +87,16 @@ EXPERIMENTAL {
> rte_vdpa_relay_vring_used;
> rte_vhost_extern_callback_register;
> rte_vhost_driver_set_protocol_features;
> + rte_vhost_set_inflight_desc_split;
> + rte_vhost_clr_inflight_desc_split;
> + rte_vhost_set_last_inflight_io_split;
> + rte_vhost_get_vhost_ring_inflight;
> + rte_vhost_vq_is_packed;
> + rte_vhost_set_inflight_desc_packed;
> + rte_vhost_clr_inflight_desc_packed;
> + rte_vhost_set_last_inflight_io_packed;
> + rte_vhost_get_vring_base_counter;
> + rte_vhost_get_vring_base_from_inflight;
> + rte_vhost_get_vring_base_counter_from_inflight;
> + rte_vhost_set_vring_base_counter;
> };
> @@ -939,13 +1270,48 @@ int rte_vhost_get_log_base(int vid, uint64_t *log_base,
> int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
> uint16_t *last_avail_idx, uint16_t *last_used_idx)
> {
> + struct vhost_virtqueue *vq;
> struct virtio_net *dev = get_device(vid);
>
> if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
> return -1;
>
> - *last_avail_idx = dev->virtqueue[queue_id]->last_avail_idx;
> - *last_used_idx = dev->virtqueue[queue_id]->last_used_idx;
> + vq = dev->virtqueue[queue_id];
> + if (!vq)
> + return -1;
> +
> + if (vq_is_packed(dev)) {
> + *last_avail_idx = (vq->avail_wrap_counter << 15) |
> + vq->last_avail_idx;
> + *last_used_idx = (vq->used_wrap_counter << 15) |
> + vq->last_used_idx;
> + } else {
> + *last_avail_idx = vq->last_avail_idx;
> + *last_used_idx = vq->last_used_idx;
> + }
This should be a fix. Without this change, the user of this API
can't use packed ring properly.
Please do this in a separate patch, add a fixes line and Cc stable.
> +
> + return 0;
> +}
> +
> +int rte_vhost_get_vring_base_from_inflight(int vid,
> + uint16_t queue_id, uint16_t *last_avail_idx, uint16_t *last_used_idx)
> +{
> + struct inflight_info_packed *inflight_info;
> + struct virtio_net *dev = get_device(vid);
> +
> + if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
> + return -1;
> +
> + if (!vq_is_packed(dev))
> + return -1;
> +
> + inflight_info = dev->virtqueue[queue_id]->inflight_packed;
> + if (!inflight_info)
> + return -1;
> +
> + *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
> + inflight_info->old_used_idx;
> + *last_used_idx = *last_avail_idx;
>
> return 0;
> }
> @@ -953,13 +1319,25 @@ int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
> int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
> uint16_t last_avail_idx, uint16_t last_used_idx)
> {
> + struct vhost_virtqueue *vq;
> struct virtio_net *dev = get_device(vid);
>
> if (!dev)
> return -1;
>
> - dev->virtqueue[queue_id]->last_avail_idx = last_avail_idx;
> - dev->virtqueue[queue_id]->last_used_idx = last_used_idx;
> + vq = dev->virtqueue[queue_id];
> + if (!vq)
> + return -1;
> +
> + if (vq_is_packed(dev)) {
> + vq->last_avail_idx = last_avail_idx & 0x7fff;
> + vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
> + vq->last_used_idx = last_used_idx & 0x7fff;
> + vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
> + } else {
> + vq->last_avail_idx = last_avail_idx;
> + vq->last_used_idx = last_used_idx;
> + }
Ditto.
>
> return 0;
> }
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 884befa85..e9d0b983d 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -88,6 +88,22 @@ struct vring_used_elem_packed {
> uint32_t count;
> };
>
> +struct inflight_desc_split {
> + uint8_t inflight;
> + uint8_t padding[5];
> + uint16_t next;
> + uint64_t counter;
> +};
> +
> +struct inflight_info_split {
> + uint64_t features;
> + uint16_t version;
> + uint16_t desc_num;
> + uint16_t last_inflight_io;
> + uint16_t used_idx;
> + struct inflight_desc_split desc[0];
> +};
> +
next prev parent reply other threads:[~2019-08-26 8:30 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20190731204050.40633>
2019-08-06 18:24 ` [dpdk-dev] [PATCH v5 0/2] " JinYu
2019-08-06 18:24 ` [dpdk-dev] [PATCH v5 1/2] " JinYu
2019-08-26 8:28 ` Tiwei Bie [this message]
2019-08-06 18:25 ` [dpdk-dev] [PATCH v5 2/2] vhost: add vhost-user-blk example which support inflight JinYu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190826082811.GA30331@___ \
--to=tiwei.bie@intel.com \
--cc=changpeng.liu@intel.com \
--cc=dev@dpdk.org \
--cc=jin.yu@intel.com \
--cc=lilin24@baidu.com \
--cc=maxime.coquelin@redhat.com \
--cc=nixun@baidu.com \
--cc=zhangyu31@baidu.com \
--cc=zhihong.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).