From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <chuckylinchuckylin@gmail.com>
Received: from mail-oi1-f193.google.com (mail-oi1-f193.google.com
 [209.85.167.193]) by dpdk.org (Postfix) with ESMTP id 128B21B57C
 for <dev@dpdk.org>; Thu, 25 Apr 2019 12:56:17 +0200 (CEST)
Received: by mail-oi1-f193.google.com with SMTP id v7so16793473oie.8
 for <dev@dpdk.org>; Thu, 25 Apr 2019 03:56:16 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;
 h=mime-version:references:in-reply-to:from:date:message-id:subject:to
 :cc:content-transfer-encoding;
 bh=aaEP2RFtR80K0wLi1XoRaEyg4MGocFOvQldMRI+q+IE=;
 b=LbooB8tE2cZgINtzdMLd/WPfMaHPq04ozy0/GcoGIwNvDVSPj00/KNbF/85kNBWnYq
 /CPQPyVFlt2DqGuGTJiouXobES1TZr+iHdJVahcV86b1PlTtAgfriDi3EeKGhzuyzn0H
 Rk8YoqvsmhRyygIHcoJ177V9OOXD77tSyTBwQk3xd/038wuxez0u7ORLP/y8XeDTAm0y
 bbBa6p2HD+RA98rWEN2Cq5yEnDQYDuIuwbbnte8TD1u5LeRtxsn7AatxIQUb5d0O8GfN
 aViaiQ58lgbnxQL6F7C/yN8X0qDA2onB/tI4bzKtJgP6YNmTkyB3IKgRFXueq/TJX9I6
 wYmA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 d=1e100.net; s=20161025;
 h=x-gm-message-state:mime-version:references:in-reply-to:from:date
 :message-id:subject:to:cc:content-transfer-encoding;
 bh=aaEP2RFtR80K0wLi1XoRaEyg4MGocFOvQldMRI+q+IE=;
 b=fIOZgWuuFdzUcNBLpLgiQp4xPwXmLdpwu2eihs5OwV7eZojkYqA2htSrGkJKhpVdI2
 DFgceR/7qQ0I+5eExL6FHOdmPvli6l4lVpLyoFNlfGmrNEshexvX1dBF5jYdlv5sMuFk
 tGEOOIJZwA/ti00umNpAvcIJeeeQz4zeKqAgrM0p0qxkvVLchg2TOuxCWcaEikrV+ob+
 eg7fCsU1wffvhUWOXK+phA/rxo+GDQcFZBcWVYPfLze2kkHIQeoQlClOR32Uuflr+rpO
 +c1tRkYYptqtQW1YG9yEh/tT3rEmts2brTctS31OQcYDAMmguPgT7PkGFdqp/X6Cmwbs
 1uSQ==
X-Gm-Message-State: APjAAAUsybH2ESWv7yAt8Rah5H8nulVLXs3TuJ84qHwUJO8ibO2Cf/IO
 airJ08Fx6Sj6YQqDcrJ5p29wQjZhNiwdNUkGux0=
X-Google-Smtp-Source: APXvYqyDsQSQmAMyhLPGmIATbVAzA82N9Tz8sX6J/znS5QrfJGtnUDijTMDrJHuHExSCB0WGKYHwz8BcOoffI0Zo008=
X-Received: by 2002:aca:e008:: with SMTP id x8mr2872426oig.106.1556189776123; 
 Thu, 25 Apr 2019 03:56:16 -0700 (PDT)
MIME-Version: 1.0
References: <1556179018-7865-1-git-send-email-chuckylinchuckylin@gmail.com>
In-Reply-To: <1556179018-7865-1-git-send-email-chuckylinchuckylin@gmail.com>
From: lin li <chuckylinchuckylin@gmail.com>
Date: Thu, 25 Apr 2019 18:56:04 +0800
Message-ID: <CAF+hgq3gToAOB6+S8PxN19HgfBnAdjy=NCNPYZqGa6OmYfS4Zg@mail.gmail.com>
To: tiwei.bie@intel.com, maxime.coquelin@redhat.com, zhihong.wang@intel.com
Cc: dev@dpdk.org, dariusz.stojaczyk@intel.com, changpeng.liu@intel.com, 
 james.r.harris@intel.com, lilin24 <lilin24@baidu.com>, Ni Xun <nixun@baidu.com>,
 Zhang Yu <zhangyu31@baidu.com>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
Subject: Re: [dpdk-dev] [PATCH] [resend] vhost: support inflight share
	memory protocol feature
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Thu, 25 Apr 2019 10:56:17 -0000

Li Lin <chuckylinchuckylin@gmail.com> =E4=BA=8E2019=E5=B9=B44=E6=9C=8825=E6=
=97=A5=E5=91=A8=E5=9B=9B =E4=B8=8B=E5=8D=883:57=E5=86=99=E9=81=93=EF=BC=9A
>
> From: lilin24 <lilin24@baidu.com>
>
> This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD
> and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared
> buffer between qemu and backend.
>
> Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the
> shared buffer from backend. Then qemu should send it back
> through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user.
>
> This shared buffer is used to process inflight I/O when backend
> reconnect.
>
> Signed-off-by: lilin24 <lilin24@baidu.com>
> Signed-off-by: Ni Xun <nixun@baidu.com>
> Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> ---
>  lib/librte_vhost/rte_vhost.h  |  19 ++++
>  lib/librte_vhost/vhost.c      |  10 ++
>  lib/librte_vhost/vhost.h      |  12 ++
>  lib/librte_vhost/vhost_user.c | 248 ++++++++++++++++++++++++++++++++++++=
++++++
>  lib/librte_vhost/vhost_user.h |  16 ++-
>  5 files changed, 303 insertions(+), 2 deletions(-)
>
> diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
> index d2c0c93f4..9993b7ce5 100644
> --- a/lib/librte_vhost/rte_vhost.h
> +++ b/lib/librte_vhost/rte_vhost.h
> @@ -71,6 +71,10 @@ extern "C" {
>  #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
>  #endif
>
> +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
> +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
> +#endif
> +
>  /** Indicate whether protocol features negotiation is supported. */
>  #ifndef VHOST_USER_F_PROTOCOL_FEATURES
>  #define VHOST_USER_F_PROTOCOL_FEATURES 30
> @@ -98,12 +102,27 @@ struct rte_vhost_memory {
>         struct rte_vhost_mem_region regions[];
>  };
>
> +typedef struct VhostUserInflightEntry {
> +       uint8_t inflight;
> +} VhostUserInflightEntry;
> +
> +typedef struct VhostInflightInfo {
> +       uint16_t version;
> +       uint16_t last_inflight_io;
> +       uint16_t used_idx;
> +       VhostUserInflightEntry desc[0];
> +} VhostInflightInfo;
> +
>  struct rte_vhost_vring {
>         struct vring_desc       *desc;
>         struct vring_avail      *avail;
>         struct vring_used       *used;
>         uint64_t                log_guest_addr;
>
> +       VhostInflightInfo       *inflight;
> +       uint16_t                *inflight_reqs;
> +       uint16_t                inflight_cnt;
> +
>         /** Deprecated, use rte_vhost_vring_call() instead. */
>         int                     callfd;
>
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 163f4595e..e7d9eabec 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -76,6 +76,8 @@ cleanup_vq(struct vhost_virtqueue *vq, int destroy)
>                 close(vq->callfd);
>         if (vq->kickfd >=3D 0)
>                 close(vq->kickfd);
> +       if (vq->inflight)
> +               vq->inflight =3D NULL;
>  }
>
>  /*
> @@ -589,6 +591,14 @@ rte_vhost_get_vhost_vring(int vid, uint16_t vring_id=
x,
>         vring->kickfd  =3D vq->kickfd;
>         vring->size    =3D vq->size;
>
> +       vring->inflight =3D vq->inflight;
> +
> +       vring->inflight_reqs =3D vq->inflight_reqs;
> +       vq->inflight_reqs =3D NULL;
> +
> +       vring->inflight_cnt =3D vq->inflight_cnt;
> +       vq->inflight_cnt =3D 0;
> +
>         return 0;
>  }
>
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index e9138dfab..2ab9d6892 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -128,6 +128,11 @@ struct vhost_virtqueue {
>         /* Physical address of used ring, for logging */
>         uint64_t                log_guest_addr;
>
> +       /* Inflight share memory info */
> +       VhostInflightInfo       *inflight;
> +       uint16_t                *inflight_reqs;
> +       uint16_t                inflight_cnt;
> +
>         uint16_t                nr_zmbuf;
>         uint16_t                zmbuf_size;
>         uint16_t                last_zmbuf_idx;
> @@ -286,6 +291,12 @@ struct guest_page {
>         uint64_t size;
>  };
>
> +typedef struct VuDevInflightInfo {
> +       int fd;
> +       void *addr;
> +       uint64_t size;
> +} VuDevInflightInfo;
> +
>  /**
>   * Device structure contains all configuration information relating
>   * to the device.
> @@ -303,6 +314,7 @@ struct virtio_net {
>         uint32_t                nr_vring;
>         int                     dequeue_zero_copy;
>         struct vhost_virtqueue  *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
> +       VuDevInflightInfo inflight_info;
>  #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
>         char                    ifname[IF_NAME_SZ];
>         uint64_t                log_size;
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.=
c
> index c9e29ece8..c4b64137c 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -31,6 +31,8 @@
>  #include <sys/stat.h>
>  #include <sys/syscall.h>
>  #include <assert.h>
> +#include <sys/syscall.h>
> +#include <asm/unistd.h>
>  #ifdef RTE_LIBRTE_VHOST_NUMA
>  #include <numaif.h>
>  #endif
> @@ -49,6 +51,14 @@
>  #define VIRTIO_MIN_MTU 68
>  #define VIRTIO_MAX_MTU 65535
>
> +#define INFLIGHT_ALIGNMENT 64
> +#define INFLIGHT_VERSION 0xabcd
> +
> +#define CLOEXEC 0x0001U
> +
> +#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
> +#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
> +
>  static const char *vhost_message_str[VHOST_USER_MAX] =3D {
>         [VHOST_USER_NONE] =3D "VHOST_USER_NONE",
>         [VHOST_USER_GET_FEATURES] =3D "VHOST_USER_GET_FEATURES",
> @@ -78,6 +88,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] =
=3D {
>         [VHOST_USER_POSTCOPY_ADVISE]  =3D "VHOST_USER_POSTCOPY_ADVISE",
>         [VHOST_USER_POSTCOPY_LISTEN]  =3D "VHOST_USER_POSTCOPY_LISTEN",
>         [VHOST_USER_POSTCOPY_END]  =3D "VHOST_USER_POSTCOPY_END",
> +       [VHOST_USER_GET_INFLIGHT_FD] =3D "VHOST_USER_GET_INFLIGHT_FD",
> +       [VHOST_USER_SET_INFLIGHT_FD] =3D "VHOST_USER_SET_INFLIGHT_FD",
>  };
>
>  static int send_vhost_reply(int sockfd, struct VhostUserMsg *msg);
> @@ -160,6 +172,16 @@ vhost_backend_cleanup(struct virtio_net *dev)
>                 dev->log_addr =3D 0;
>         }
>
> +       if (dev->inflight_info.addr) {
> +               munmap(dev->inflight_info.addr, dev->inflight_info.size);
> +               dev->inflight_info.addr =3D NULL;
> +       }
> +
> +       if (dev->inflight_info.fd > 0) {
> +               close(dev->inflight_info.fd);
> +               dev->inflight_info.fd =3D -1;
> +       }
> +
>         if (dev->slave_req_fd >=3D 0) {
>                 close(dev->slave_req_fd);
>                 dev->slave_req_fd =3D -1;
> @@ -1165,6 +1187,184 @@ virtio_is_ready(struct virtio_net *dev)
>         return 1;
>  }
>
> +static int mem_create(const char *name, unsigned int flags)
> +{
> +#ifdef __NR_memfd_create
> +       return syscall(__NR_memfd_create, name, flags);
> +#else
> +       return -1;
> +#endif
> +}
> +
> +void *inflight_mem_alloc(const char *name, size_t size, int *fd)
> +{
> +       void *ptr;
> +       int mfd =3D -1;
> +       char fname[20] =3D "/tmp/memfd-XXXXXX";
> +
> +       *fd =3D -1;
> +       mfd =3D mem_create(name, CLOEXEC);
> +       if (mfd !=3D -1) {
> +               if (ftruncate(mfd, size) =3D=3D -1) {
> +                       RTE_LOG(ERR, VHOST_CONFIG,
> +                                       "ftruncate fail for alloc infligh=
t buffer\n");
> +                       close(mfd);
> +                       return NULL;
> +               }
> +       } else {
> +               mfd =3D mkstemp(fname);
> +               unlink(fname);
> +
> +               if (mfd =3D=3D -1) {
> +                       RTE_LOG(ERR, VHOST_CONFIG,
> +                                       "mkstemp fail for alloc inflight =
buffer\n");
> +                       return NULL;
> +               }
> +
> +               if (ftruncate(mfd, size) =3D=3D -1) {
> +                       RTE_LOG(ERR, VHOST_CONFIG,
> +                                       "ftruncate fail for alloc infligh=
t buffer\n");
> +                       close(mfd);
> +                       return NULL;
> +               }
> +       }
> +
> +       ptr =3D mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0)=
;
> +       if (ptr =3D=3D MAP_FAILED) {
> +               RTE_LOG(ERR, VHOST_CONFIG,
> +                               "mmap fail for alloc inflight buffer\n");
> +               close(mfd);
> +               return NULL;
> +       }
> +
> +       *fd =3D mfd;
> +       return ptr;
> +}
> +
> +static uint32_t get_pervq_shm_size(uint16_t queue_size)
> +{
> +       return ALIGN_UP(sizeof(VhostUserInflightEntry) * queue_size +
> +               sizeof(uint16_t) * 3, INFLIGHT_ALIGNMENT);
> +}
> +
> +static int
> +vhost_user_get_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
> +               int main_fd __rte_unused)
> +{
> +       int fd;
> +       uint64_t mmap_size;
> +       void *addr;
> +       uint16_t num_queues, queue_size;
> +       struct virtio_net *dev =3D *pdev;
> +
> +       if (msg->size !=3D sizeof(msg->payload.inflight)) {
> +               RTE_LOG(ERR, VHOST_CONFIG,
> +                       "Invalid get_inflight_fd message size is %d",
> +                       msg->size);
> +               msg->payload.inflight.mmap_size =3D 0;
> +               return 0;
> +       }
> +
> +       num_queues =3D msg->payload.inflight.num_queues;
> +       queue_size =3D msg->payload.inflight.queue_size;
> +
> +       RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd num_queues: %u\n",
> +                       msg->payload.inflight.num_queues);
> +       RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd queue_size: %u\n",
> +                       msg->payload.inflight.queue_size);
> +       mmap_size =3D num_queues * get_pervq_shm_size(queue_size);
> +
> +       addr =3D inflight_mem_alloc("vhost-inflight", mmap_size, &fd);
> +       if (!addr) {
> +               RTE_LOG(ERR, VHOST_CONFIG, "Failed to alloc vhost infligh=
t area");
> +                       msg->payload.inflight.mmap_size =3D 0;
> +               return 0;
> +       }
> +       memset(addr, 0, mmap_size);
> +
> +       dev->inflight_info.addr =3D addr;
> +       dev->inflight_info.size =3D msg->payload.inflight.mmap_size =3D m=
map_size;
> +       dev->inflight_info.fd =3D msg->fds[0] =3D fd;
> +       msg->payload.inflight.mmap_offset =3D 0;
> +       msg->fd_num =3D 1;
> +
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +                       "send inflight mmap_size: %lu\n",
> +                       msg->payload.inflight.mmap_size);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +                       "send inflight mmap_offset: %lu\n",
> +                       msg->payload.inflight.mmap_offset);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +                       "send inflight fd: %d\n", msg->fds[0]);
> +
> +       return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +static int
> +vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
> +               int main_fd __rte_unused)
> +{
> +       int fd, i;
> +       uint64_t mmap_size, mmap_offset;
> +       uint16_t num_queues, queue_size;
> +       uint32_t pervq_inflight_size;
> +       void *rc;
> +       struct vhost_virtqueue *vq;
> +       struct virtio_net *dev =3D *pdev;
> +
> +       fd =3D msg->fds[0];
> +       if (msg->size !=3D sizeof(msg->payload.inflight) || fd < 0) {
> +               RTE_LOG(ERR, VHOST_CONFIG, "Invalid set_inflight_fd messa=
ge size is %d,fd is %d\n",
> +                       msg->size, fd);
> +               return -1;
> +       }
> +
> +       mmap_size =3D msg->payload.inflight.mmap_size;
> +       mmap_offset =3D msg->payload.inflight.mmap_offset;
> +       num_queues =3D msg->payload.inflight.num_queues;
> +       queue_size =3D msg->payload.inflight.queue_size;
> +       pervq_inflight_size =3D get_pervq_shm_size(queue_size);
> +
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +               "set_inflight_fd mmap_size: %lu\n", mmap_size);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +               "set_inflight_fd mmap_offset: %lu\n", mmap_offset);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +               "set_inflight_fd num_queues: %u\n", num_queues);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +               "set_inflight_fd queue_size: %u\n", queue_size);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +               "set_inflight_fd fd: %d\n", fd);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +               "set_inflight_fd pervq_inflight_size: %d\n",
> +               pervq_inflight_size);
> +
> +       if (dev->inflight_info.addr)
> +               munmap(dev->inflight_info.addr, dev->inflight_info.size);
> +
> +       rc =3D mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
> +                       fd, mmap_offset);
> +       if (rc =3D=3D MAP_FAILED) {
> +               RTE_LOG(ERR, VHOST_CONFIG, "failed to mmap share memory.\=
n");
> +               return -1;
> +       }
> +
> +       if (dev->inflight_info.fd)
> +               close(dev->inflight_info.fd);
> +
> +       dev->inflight_info.fd =3D fd;
> +       dev->inflight_info.addr =3D rc;
> +       dev->inflight_info.size =3D mmap_size;
> +
> +       for (i =3D 0; i < num_queues; i++) {
> +               vq =3D dev->virtqueue[i];
> +               vq->inflight =3D (VhostInflightInfo *)rc;
> +               rc =3D (void *)((char *)rc + pervq_inflight_size);
> +       }
> +
> +       return RTE_VHOST_MSG_RESULT_OK;
> +}
> +
>  static int
>  vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg =
*msg,
>                         int main_fd __rte_unused)
> @@ -1202,6 +1402,47 @@ static int vhost_user_set_vring_err(struct virtio_=
net **pdev __rte_unused,
>  }
>
>  static int
> +vhost_check_queue_inflights(struct vhost_virtqueue *vq)
> +{
> +       struct vring_used *used =3D vq->used;
> +       uint16_t i =3D 0;
> +
> +       if ((!vq->inflight))
> +               return RTE_VHOST_MSG_RESULT_ERR;
> +
> +       if (!vq->inflight->version) {
> +               vq->inflight->version =3D INFLIGHT_VERSION;
> +               return RTE_VHOST_MSG_RESULT_OK;
> +       }
> +
> +       vq->inflight_reqs =3D calloc(vq->size, sizeof(uint16_t));
> +       if (!vq->inflight_reqs) {
> +               RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for=
 inflight reqs.\n");
> +               return RTE_VHOST_MSG_RESULT_ERR;
> +       }
> +
> +       if (vq->inflight->used_idx !=3D used->idx) {
> +               vq->inflight->desc[vq->inflight->last_inflight_io].inflig=
ht =3D 0;
> +               rte_compiler_barrier();
> +               vq->inflight->used_idx =3D used->idx;
> +       }
> +
> +       for (i =3D 0; i < vq->size; i++) {
> +               if (vq->inflight->desc[i].inflight =3D=3D 1)
> +                       vq->inflight_reqs[vq->inflight_cnt++] =3D i;
> +       }
> +
> +       if (!vq->inflight_cnt) {
> +               free(vq->inflight_reqs);
> +               vq->inflight_reqs =3D NULL;
> +       }
> +
> +       vq->last_avail_idx +=3D vq->inflight_cnt;
> +
> +       return RTE_VHOST_MSG_RESULT_OK;
> +}
> +
> +static int
>  vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg =
*msg,
>                         int main_fd __rte_unused)
>  {
> @@ -1242,6 +1483,11 @@ vhost_user_set_vring_kick(struct virtio_net **pdev=
, struct VhostUserMsg *msg,
>                 close(vq->kickfd);
>         vq->kickfd =3D file.fd;
>
> +       if (vhost_check_queue_inflights(vq)) {
> +               RTE_LOG(ERR, VHOST_CONFIG, "Failed to inflights for vq: %=
d\n", file.index);
> +               return RTE_VHOST_MSG_RESULT_ERR;
> +       }
> +
>         return RTE_VHOST_MSG_RESULT_OK;
>  }
>
> @@ -1762,6 +2008,8 @@ static vhost_message_handler_t vhost_message_handle=
rs[VHOST_USER_MAX] =3D {
>         [VHOST_USER_POSTCOPY_ADVISE] =3D vhost_user_set_postcopy_advise,
>         [VHOST_USER_POSTCOPY_LISTEN] =3D vhost_user_set_postcopy_listen,
>         [VHOST_USER_POSTCOPY_END] =3D vhost_user_postcopy_end,
> +       [VHOST_USER_GET_INFLIGHT_FD] =3D vhost_user_get_inflight_fd,
> +       [VHOST_USER_SET_INFLIGHT_FD] =3D vhost_user_set_inflight_fd,
>  };
>
>
> diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.=
h
> index 2a650fe4b..35f969b1b 100644
> --- a/lib/librte_vhost/vhost_user.h
> +++ b/lib/librte_vhost/vhost_user.h
> @@ -23,7 +23,8 @@
>                                          (1ULL << VHOST_USER_PROTOCOL_F_C=
RYPTO_SESSION) | \
>                                          (1ULL << VHOST_USER_PROTOCOL_F_S=
LAVE_SEND_FD) | \
>                                          (1ULL << VHOST_USER_PROTOCOL_F_H=
OST_NOTIFIER) | \
> -                                        (1ULL << VHOST_USER_PROTOCOL_F_P=
AGEFAULT))
> +                                       (1ULL << VHOST_USER_PROTOCOL_F_PA=
GEFAULT) | \
> +                                       (1ULL << VHOST_USER_PROTOCOL_F_IN=
FLIGHT_SHMFD))
>
>  typedef enum VhostUserRequest {
>         VHOST_USER_NONE =3D 0,
> @@ -54,7 +55,9 @@ typedef enum VhostUserRequest {
>         VHOST_USER_POSTCOPY_ADVISE =3D 28,
>         VHOST_USER_POSTCOPY_LISTEN =3D 29,
>         VHOST_USER_POSTCOPY_END =3D 30,
> -       VHOST_USER_MAX =3D 31
> +       VHOST_USER_GET_INFLIGHT_FD =3D 31,
> +       VHOST_USER_SET_INFLIGHT_FD =3D 32,
> +       VHOST_USER_MAX =3D 33
>  } VhostUserRequest;
>
>  typedef enum VhostUserSlaveRequest {
> @@ -112,6 +115,13 @@ typedef struct VhostUserVringArea {
>         uint64_t offset;
>  } VhostUserVringArea;
>
> +typedef struct VhostUserInflight {
> +       uint64_t mmap_size;
> +       uint64_t mmap_offset;
> +       uint16_t num_queues;
> +       uint16_t queue_size;
> +} VhostUserInflight;
> +
>  typedef struct VhostUserMsg {
>         union {
>                 uint32_t master; /* a VhostUserRequest value */
> @@ -131,6 +141,7 @@ typedef struct VhostUserMsg {
>                 struct vhost_vring_addr addr;
>                 VhostUserMemory memory;
>                 VhostUserLog    log;
> +               VhostUserInflight inflight;
>                 struct vhost_iotlb_msg iotlb;
>                 VhostUserCryptoSessionParam crypto_session;
>                 VhostUserVringArea area;
> @@ -148,6 +159,7 @@ typedef struct VhostUserMsg {
>  /* vhost_user.c */
>  int vhost_user_msg_handler(int vid, int fd);
>  int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t=
 perm);
> +void *inflight_mem_alloc(const char *name, size_t size, int *fd);
>
>  /* socket.c */
>  int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max=
_fds,
> --
> 2.11.0
>
This patch needs to add some functions. It will be abandoned . Later,
I will add set&clear inflight entry function and resubmit the V2
version

From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <dev-bounces@dpdk.org>
Received: from dpdk.org (dpdk.org [92.243.14.124])
	by dpdk.space (Postfix) with ESMTP id 74E0FA05D3
	for <public@inbox.dpdk.org>; Thu, 25 Apr 2019 12:56:19 +0200 (CEST)
Received: from [92.243.14.124] (localhost [127.0.0.1])
	by dpdk.org (Postfix) with ESMTP id 419491B57D;
	Thu, 25 Apr 2019 12:56:18 +0200 (CEST)
Received: from mail-oi1-f193.google.com (mail-oi1-f193.google.com
 [209.85.167.193]) by dpdk.org (Postfix) with ESMTP id 128B21B57C
 for <dev@dpdk.org>; Thu, 25 Apr 2019 12:56:17 +0200 (CEST)
Received: by mail-oi1-f193.google.com with SMTP id v7so16793473oie.8
 for <dev@dpdk.org>; Thu, 25 Apr 2019 03:56:16 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;
 h=mime-version:references:in-reply-to:from:date:message-id:subject:to
 :cc:content-transfer-encoding;
 bh=aaEP2RFtR80K0wLi1XoRaEyg4MGocFOvQldMRI+q+IE=;
 b=LbooB8tE2cZgINtzdMLd/WPfMaHPq04ozy0/GcoGIwNvDVSPj00/KNbF/85kNBWnYq
 /CPQPyVFlt2DqGuGTJiouXobES1TZr+iHdJVahcV86b1PlTtAgfriDi3EeKGhzuyzn0H
 Rk8YoqvsmhRyygIHcoJ177V9OOXD77tSyTBwQk3xd/038wuxez0u7ORLP/y8XeDTAm0y
 bbBa6p2HD+RA98rWEN2Cq5yEnDQYDuIuwbbnte8TD1u5LeRtxsn7AatxIQUb5d0O8GfN
 aViaiQ58lgbnxQL6F7C/yN8X0qDA2onB/tI4bzKtJgP6YNmTkyB3IKgRFXueq/TJX9I6
 wYmA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 d=1e100.net; s=20161025;
 h=x-gm-message-state:mime-version:references:in-reply-to:from:date
 :message-id:subject:to:cc:content-transfer-encoding;
 bh=aaEP2RFtR80K0wLi1XoRaEyg4MGocFOvQldMRI+q+IE=;
 b=fIOZgWuuFdzUcNBLpLgiQp4xPwXmLdpwu2eihs5OwV7eZojkYqA2htSrGkJKhpVdI2
 DFgceR/7qQ0I+5eExL6FHOdmPvli6l4lVpLyoFNlfGmrNEshexvX1dBF5jYdlv5sMuFk
 tGEOOIJZwA/ti00umNpAvcIJeeeQz4zeKqAgrM0p0qxkvVLchg2TOuxCWcaEikrV+ob+
 eg7fCsU1wffvhUWOXK+phA/rxo+GDQcFZBcWVYPfLze2kkHIQeoQlClOR32Uuflr+rpO
 +c1tRkYYptqtQW1YG9yEh/tT3rEmts2brTctS31OQcYDAMmguPgT7PkGFdqp/X6Cmwbs
 1uSQ==
X-Gm-Message-State: APjAAAUsybH2ESWv7yAt8Rah5H8nulVLXs3TuJ84qHwUJO8ibO2Cf/IO
 airJ08Fx6Sj6YQqDcrJ5p29wQjZhNiwdNUkGux0=
X-Google-Smtp-Source: APXvYqyDsQSQmAMyhLPGmIATbVAzA82N9Tz8sX6J/znS5QrfJGtnUDijTMDrJHuHExSCB0WGKYHwz8BcOoffI0Zo008=
X-Received: by 2002:aca:e008:: with SMTP id x8mr2872426oig.106.1556189776123; 
 Thu, 25 Apr 2019 03:56:16 -0700 (PDT)
MIME-Version: 1.0
References: <1556179018-7865-1-git-send-email-chuckylinchuckylin@gmail.com>
In-Reply-To: <1556179018-7865-1-git-send-email-chuckylinchuckylin@gmail.com>
From: lin li <chuckylinchuckylin@gmail.com>
Date: Thu, 25 Apr 2019 18:56:04 +0800
Message-ID:
 <CAF+hgq3gToAOB6+S8PxN19HgfBnAdjy=NCNPYZqGa6OmYfS4Zg@mail.gmail.com>
To: tiwei.bie@intel.com, maxime.coquelin@redhat.com, zhihong.wang@intel.com
Cc: dev@dpdk.org, dariusz.stojaczyk@intel.com, changpeng.liu@intel.com, 
 james.r.harris@intel.com, lilin24 <lilin24@baidu.com>, Ni Xun <nixun@baidu.com>,
 Zhang Yu <zhangyu31@baidu.com>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
Subject: Re: [dpdk-dev] [PATCH] [resend] vhost: support inflight share
	memory protocol feature
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org
Sender: "dev" <dev-bounces@dpdk.org>
Message-ID: <20190425105604.AXzwsQSVUTV4vg_UhyCTLUduFZY-kmZWw2Ab_4xWvC8@z>

Li Lin <chuckylinchuckylin@gmail.com> =E4=BA=8E2019=E5=B9=B44=E6=9C=8825=E6=
=97=A5=E5=91=A8=E5=9B=9B =E4=B8=8B=E5=8D=883:57=E5=86=99=E9=81=93=EF=BC=9A
>
> From: lilin24 <lilin24@baidu.com>
>
> This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD
> and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared
> buffer between qemu and backend.
>
> Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the
> shared buffer from backend. Then qemu should send it back
> through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user.
>
> This shared buffer is used to process inflight I/O when backend
> reconnect.
>
> Signed-off-by: lilin24 <lilin24@baidu.com>
> Signed-off-by: Ni Xun <nixun@baidu.com>
> Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> ---
>  lib/librte_vhost/rte_vhost.h  |  19 ++++
>  lib/librte_vhost/vhost.c      |  10 ++
>  lib/librte_vhost/vhost.h      |  12 ++
>  lib/librte_vhost/vhost_user.c | 248 ++++++++++++++++++++++++++++++++++++=
++++++
>  lib/librte_vhost/vhost_user.h |  16 ++-
>  5 files changed, 303 insertions(+), 2 deletions(-)
>
> diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
> index d2c0c93f4..9993b7ce5 100644
> --- a/lib/librte_vhost/rte_vhost.h
> +++ b/lib/librte_vhost/rte_vhost.h
> @@ -71,6 +71,10 @@ extern "C" {
>  #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
>  #endif
>
> +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
> +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
> +#endif
> +
>  /** Indicate whether protocol features negotiation is supported. */
>  #ifndef VHOST_USER_F_PROTOCOL_FEATURES
>  #define VHOST_USER_F_PROTOCOL_FEATURES 30
> @@ -98,12 +102,27 @@ struct rte_vhost_memory {
>         struct rte_vhost_mem_region regions[];
>  };
>
> +typedef struct VhostUserInflightEntry {
> +       uint8_t inflight;
> +} VhostUserInflightEntry;
> +
> +typedef struct VhostInflightInfo {
> +       uint16_t version;
> +       uint16_t last_inflight_io;
> +       uint16_t used_idx;
> +       VhostUserInflightEntry desc[0];
> +} VhostInflightInfo;
> +
>  struct rte_vhost_vring {
>         struct vring_desc       *desc;
>         struct vring_avail      *avail;
>         struct vring_used       *used;
>         uint64_t                log_guest_addr;
>
> +       VhostInflightInfo       *inflight;
> +       uint16_t                *inflight_reqs;
> +       uint16_t                inflight_cnt;
> +
>         /** Deprecated, use rte_vhost_vring_call() instead. */
>         int                     callfd;
>
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 163f4595e..e7d9eabec 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -76,6 +76,8 @@ cleanup_vq(struct vhost_virtqueue *vq, int destroy)
>                 close(vq->callfd);
>         if (vq->kickfd >=3D 0)
>                 close(vq->kickfd);
> +       if (vq->inflight)
> +               vq->inflight =3D NULL;
>  }
>
>  /*
> @@ -589,6 +591,14 @@ rte_vhost_get_vhost_vring(int vid, uint16_t vring_id=
x,
>         vring->kickfd  =3D vq->kickfd;
>         vring->size    =3D vq->size;
>
> +       vring->inflight =3D vq->inflight;
> +
> +       vring->inflight_reqs =3D vq->inflight_reqs;
> +       vq->inflight_reqs =3D NULL;
> +
> +       vring->inflight_cnt =3D vq->inflight_cnt;
> +       vq->inflight_cnt =3D 0;
> +
>         return 0;
>  }
>
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index e9138dfab..2ab9d6892 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -128,6 +128,11 @@ struct vhost_virtqueue {
>         /* Physical address of used ring, for logging */
>         uint64_t                log_guest_addr;
>
> +       /* Inflight share memory info */
> +       VhostInflightInfo       *inflight;
> +       uint16_t                *inflight_reqs;
> +       uint16_t                inflight_cnt;
> +
>         uint16_t                nr_zmbuf;
>         uint16_t                zmbuf_size;
>         uint16_t                last_zmbuf_idx;
> @@ -286,6 +291,12 @@ struct guest_page {
>         uint64_t size;
>  };
>
> +typedef struct VuDevInflightInfo {
> +       int fd;
> +       void *addr;
> +       uint64_t size;
> +} VuDevInflightInfo;
> +
>  /**
>   * Device structure contains all configuration information relating
>   * to the device.
> @@ -303,6 +314,7 @@ struct virtio_net {
>         uint32_t                nr_vring;
>         int                     dequeue_zero_copy;
>         struct vhost_virtqueue  *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
> +       VuDevInflightInfo inflight_info;
>  #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
>         char                    ifname[IF_NAME_SZ];
>         uint64_t                log_size;
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.=
c
> index c9e29ece8..c4b64137c 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -31,6 +31,8 @@
>  #include <sys/stat.h>
>  #include <sys/syscall.h>
>  #include <assert.h>
> +#include <sys/syscall.h>
> +#include <asm/unistd.h>
>  #ifdef RTE_LIBRTE_VHOST_NUMA
>  #include <numaif.h>
>  #endif
> @@ -49,6 +51,14 @@
>  #define VIRTIO_MIN_MTU 68
>  #define VIRTIO_MAX_MTU 65535
>
> +#define INFLIGHT_ALIGNMENT 64
> +#define INFLIGHT_VERSION 0xabcd
> +
> +#define CLOEXEC 0x0001U
> +
> +#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
> +#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
> +
>  static const char *vhost_message_str[VHOST_USER_MAX] =3D {
>         [VHOST_USER_NONE] =3D "VHOST_USER_NONE",
>         [VHOST_USER_GET_FEATURES] =3D "VHOST_USER_GET_FEATURES",
> @@ -78,6 +88,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] =
=3D {
>         [VHOST_USER_POSTCOPY_ADVISE]  =3D "VHOST_USER_POSTCOPY_ADVISE",
>         [VHOST_USER_POSTCOPY_LISTEN]  =3D "VHOST_USER_POSTCOPY_LISTEN",
>         [VHOST_USER_POSTCOPY_END]  =3D "VHOST_USER_POSTCOPY_END",
> +       [VHOST_USER_GET_INFLIGHT_FD] =3D "VHOST_USER_GET_INFLIGHT_FD",
> +       [VHOST_USER_SET_INFLIGHT_FD] =3D "VHOST_USER_SET_INFLIGHT_FD",
>  };
>
>  static int send_vhost_reply(int sockfd, struct VhostUserMsg *msg);
> @@ -160,6 +172,16 @@ vhost_backend_cleanup(struct virtio_net *dev)
>                 dev->log_addr =3D 0;
>         }
>
> +       if (dev->inflight_info.addr) {
> +               munmap(dev->inflight_info.addr, dev->inflight_info.size);
> +               dev->inflight_info.addr =3D NULL;
> +       }
> +
> +       if (dev->inflight_info.fd > 0) {
> +               close(dev->inflight_info.fd);
> +               dev->inflight_info.fd =3D -1;
> +       }
> +
>         if (dev->slave_req_fd >=3D 0) {
>                 close(dev->slave_req_fd);
>                 dev->slave_req_fd =3D -1;
> @@ -1165,6 +1187,184 @@ virtio_is_ready(struct virtio_net *dev)
>         return 1;
>  }
>
> +static int mem_create(const char *name, unsigned int flags)
> +{
> +#ifdef __NR_memfd_create
> +       return syscall(__NR_memfd_create, name, flags);
> +#else
> +       return -1;
> +#endif
> +}
> +
> +void *inflight_mem_alloc(const char *name, size_t size, int *fd)
> +{
> +       void *ptr;
> +       int mfd =3D -1;
> +       char fname[20] =3D "/tmp/memfd-XXXXXX";
> +
> +       *fd =3D -1;
> +       mfd =3D mem_create(name, CLOEXEC);
> +       if (mfd !=3D -1) {
> +               if (ftruncate(mfd, size) =3D=3D -1) {
> +                       RTE_LOG(ERR, VHOST_CONFIG,
> +                                       "ftruncate fail for alloc infligh=
t buffer\n");
> +                       close(mfd);
> +                       return NULL;
> +               }
> +       } else {
> +               mfd =3D mkstemp(fname);
> +               unlink(fname);
> +
> +               if (mfd =3D=3D -1) {
> +                       RTE_LOG(ERR, VHOST_CONFIG,
> +                                       "mkstemp fail for alloc inflight =
buffer\n");
> +                       return NULL;
> +               }
> +
> +               if (ftruncate(mfd, size) =3D=3D -1) {
> +                       RTE_LOG(ERR, VHOST_CONFIG,
> +                                       "ftruncate fail for alloc infligh=
t buffer\n");
> +                       close(mfd);
> +                       return NULL;
> +               }
> +       }
> +
> +       ptr =3D mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0)=
;
> +       if (ptr =3D=3D MAP_FAILED) {
> +               RTE_LOG(ERR, VHOST_CONFIG,
> +                               "mmap fail for alloc inflight buffer\n");
> +               close(mfd);
> +               return NULL;
> +       }
> +
> +       *fd =3D mfd;
> +       return ptr;
> +}
> +
> +static uint32_t get_pervq_shm_size(uint16_t queue_size)
> +{
> +       return ALIGN_UP(sizeof(VhostUserInflightEntry) * queue_size +
> +               sizeof(uint16_t) * 3, INFLIGHT_ALIGNMENT);
> +}
> +
> +static int
> +vhost_user_get_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
> +               int main_fd __rte_unused)
> +{
> +       int fd;
> +       uint64_t mmap_size;
> +       void *addr;
> +       uint16_t num_queues, queue_size;
> +       struct virtio_net *dev =3D *pdev;
> +
> +       if (msg->size !=3D sizeof(msg->payload.inflight)) {
> +               RTE_LOG(ERR, VHOST_CONFIG,
> +                       "Invalid get_inflight_fd message size is %d",
> +                       msg->size);
> +               msg->payload.inflight.mmap_size =3D 0;
> +               return 0;
> +       }
> +
> +       num_queues =3D msg->payload.inflight.num_queues;
> +       queue_size =3D msg->payload.inflight.queue_size;
> +
> +       RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd num_queues: %u\n",
> +                       msg->payload.inflight.num_queues);
> +       RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd queue_size: %u\n",
> +                       msg->payload.inflight.queue_size);
> +       mmap_size =3D num_queues * get_pervq_shm_size(queue_size);
> +
> +       addr =3D inflight_mem_alloc("vhost-inflight", mmap_size, &fd);
> +       if (!addr) {
> +               RTE_LOG(ERR, VHOST_CONFIG, "Failed to alloc vhost infligh=
t area");
> +                       msg->payload.inflight.mmap_size =3D 0;
> +               return 0;
> +       }
> +       memset(addr, 0, mmap_size);
> +
> +       dev->inflight_info.addr =3D addr;
> +       dev->inflight_info.size =3D msg->payload.inflight.mmap_size =3D m=
map_size;
> +       dev->inflight_info.fd =3D msg->fds[0] =3D fd;
> +       msg->payload.inflight.mmap_offset =3D 0;
> +       msg->fd_num =3D 1;
> +
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +                       "send inflight mmap_size: %lu\n",
> +                       msg->payload.inflight.mmap_size);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +                       "send inflight mmap_offset: %lu\n",
> +                       msg->payload.inflight.mmap_offset);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +                       "send inflight fd: %d\n", msg->fds[0]);
> +
> +       return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +static int
> +vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
> +               int main_fd __rte_unused)
> +{
> +       int fd, i;
> +       uint64_t mmap_size, mmap_offset;
> +       uint16_t num_queues, queue_size;
> +       uint32_t pervq_inflight_size;
> +       void *rc;
> +       struct vhost_virtqueue *vq;
> +       struct virtio_net *dev =3D *pdev;
> +
> +       fd =3D msg->fds[0];
> +       if (msg->size !=3D sizeof(msg->payload.inflight) || fd < 0) {
> +               RTE_LOG(ERR, VHOST_CONFIG, "Invalid set_inflight_fd messa=
ge size is %d,fd is %d\n",
> +                       msg->size, fd);
> +               return -1;
> +       }
> +
> +       mmap_size =3D msg->payload.inflight.mmap_size;
> +       mmap_offset =3D msg->payload.inflight.mmap_offset;
> +       num_queues =3D msg->payload.inflight.num_queues;
> +       queue_size =3D msg->payload.inflight.queue_size;
> +       pervq_inflight_size =3D get_pervq_shm_size(queue_size);
> +
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +               "set_inflight_fd mmap_size: %lu\n", mmap_size);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +               "set_inflight_fd mmap_offset: %lu\n", mmap_offset);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +               "set_inflight_fd num_queues: %u\n", num_queues);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +               "set_inflight_fd queue_size: %u\n", queue_size);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +               "set_inflight_fd fd: %d\n", fd);
> +       RTE_LOG(INFO, VHOST_CONFIG,
> +               "set_inflight_fd pervq_inflight_size: %d\n",
> +               pervq_inflight_size);
> +
> +       if (dev->inflight_info.addr)
> +               munmap(dev->inflight_info.addr, dev->inflight_info.size);
> +
> +       rc =3D mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
> +                       fd, mmap_offset);
> +       if (rc =3D=3D MAP_FAILED) {
> +               RTE_LOG(ERR, VHOST_CONFIG, "failed to mmap share memory.\=
n");
> +               return -1;
> +       }
> +
> +       if (dev->inflight_info.fd)
> +               close(dev->inflight_info.fd);
> +
> +       dev->inflight_info.fd =3D fd;
> +       dev->inflight_info.addr =3D rc;
> +       dev->inflight_info.size =3D mmap_size;
> +
> +       for (i =3D 0; i < num_queues; i++) {
> +               vq =3D dev->virtqueue[i];
> +               vq->inflight =3D (VhostInflightInfo *)rc;
> +               rc =3D (void *)((char *)rc + pervq_inflight_size);
> +       }
> +
> +       return RTE_VHOST_MSG_RESULT_OK;
> +}
> +
>  static int
>  vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg =
*msg,
>                         int main_fd __rte_unused)
> @@ -1202,6 +1402,47 @@ static int vhost_user_set_vring_err(struct virtio_=
net **pdev __rte_unused,
>  }
>
>  static int
> +vhost_check_queue_inflights(struct vhost_virtqueue *vq)
> +{
> +       struct vring_used *used =3D vq->used;
> +       uint16_t i =3D 0;
> +
> +       if ((!vq->inflight))
> +               return RTE_VHOST_MSG_RESULT_ERR;
> +
> +       if (!vq->inflight->version) {
> +               vq->inflight->version =3D INFLIGHT_VERSION;
> +               return RTE_VHOST_MSG_RESULT_OK;
> +       }
> +
> +       vq->inflight_reqs =3D calloc(vq->size, sizeof(uint16_t));
> +       if (!vq->inflight_reqs) {
> +               RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for=
 inflight reqs.\n");
> +               return RTE_VHOST_MSG_RESULT_ERR;
> +       }
> +
> +       if (vq->inflight->used_idx !=3D used->idx) {
> +               vq->inflight->desc[vq->inflight->last_inflight_io].inflig=
ht =3D 0;
> +               rte_compiler_barrier();
> +               vq->inflight->used_idx =3D used->idx;
> +       }
> +
> +       for (i =3D 0; i < vq->size; i++) {
> +               if (vq->inflight->desc[i].inflight =3D=3D 1)
> +                       vq->inflight_reqs[vq->inflight_cnt++] =3D i;
> +       }
> +
> +       if (!vq->inflight_cnt) {
> +               free(vq->inflight_reqs);
> +               vq->inflight_reqs =3D NULL;
> +       }
> +
> +       vq->last_avail_idx +=3D vq->inflight_cnt;
> +
> +       return RTE_VHOST_MSG_RESULT_OK;
> +}
> +
> +static int
>  vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg =
*msg,
>                         int main_fd __rte_unused)
>  {
> @@ -1242,6 +1483,11 @@ vhost_user_set_vring_kick(struct virtio_net **pdev=
, struct VhostUserMsg *msg,
>                 close(vq->kickfd);
>         vq->kickfd =3D file.fd;
>
> +       if (vhost_check_queue_inflights(vq)) {
> +               RTE_LOG(ERR, VHOST_CONFIG, "Failed to inflights for vq: %=
d\n", file.index);
> +               return RTE_VHOST_MSG_RESULT_ERR;
> +       }
> +
>         return RTE_VHOST_MSG_RESULT_OK;
>  }
>
> @@ -1762,6 +2008,8 @@ static vhost_message_handler_t vhost_message_handle=
rs[VHOST_USER_MAX] =3D {
>         [VHOST_USER_POSTCOPY_ADVISE] =3D vhost_user_set_postcopy_advise,
>         [VHOST_USER_POSTCOPY_LISTEN] =3D vhost_user_set_postcopy_listen,
>         [VHOST_USER_POSTCOPY_END] =3D vhost_user_postcopy_end,
> +       [VHOST_USER_GET_INFLIGHT_FD] =3D vhost_user_get_inflight_fd,
> +       [VHOST_USER_SET_INFLIGHT_FD] =3D vhost_user_set_inflight_fd,
>  };
>
>
> diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.=
h
> index 2a650fe4b..35f969b1b 100644
> --- a/lib/librte_vhost/vhost_user.h
> +++ b/lib/librte_vhost/vhost_user.h
> @@ -23,7 +23,8 @@
>                                          (1ULL << VHOST_USER_PROTOCOL_F_C=
RYPTO_SESSION) | \
>                                          (1ULL << VHOST_USER_PROTOCOL_F_S=
LAVE_SEND_FD) | \
>                                          (1ULL << VHOST_USER_PROTOCOL_F_H=
OST_NOTIFIER) | \
> -                                        (1ULL << VHOST_USER_PROTOCOL_F_P=
AGEFAULT))
> +                                       (1ULL << VHOST_USER_PROTOCOL_F_PA=
GEFAULT) | \
> +                                       (1ULL << VHOST_USER_PROTOCOL_F_IN=
FLIGHT_SHMFD))
>
>  typedef enum VhostUserRequest {
>         VHOST_USER_NONE =3D 0,
> @@ -54,7 +55,9 @@ typedef enum VhostUserRequest {
>         VHOST_USER_POSTCOPY_ADVISE =3D 28,
>         VHOST_USER_POSTCOPY_LISTEN =3D 29,
>         VHOST_USER_POSTCOPY_END =3D 30,
> -       VHOST_USER_MAX =3D 31
> +       VHOST_USER_GET_INFLIGHT_FD =3D 31,
> +       VHOST_USER_SET_INFLIGHT_FD =3D 32,
> +       VHOST_USER_MAX =3D 33
>  } VhostUserRequest;
>
>  typedef enum VhostUserSlaveRequest {
> @@ -112,6 +115,13 @@ typedef struct VhostUserVringArea {
>         uint64_t offset;
>  } VhostUserVringArea;
>
> +typedef struct VhostUserInflight {
> +       uint64_t mmap_size;
> +       uint64_t mmap_offset;
> +       uint16_t num_queues;
> +       uint16_t queue_size;
> +} VhostUserInflight;
> +
>  typedef struct VhostUserMsg {
>         union {
>                 uint32_t master; /* a VhostUserRequest value */
> @@ -131,6 +141,7 @@ typedef struct VhostUserMsg {
>                 struct vhost_vring_addr addr;
>                 VhostUserMemory memory;
>                 VhostUserLog    log;
> +               VhostUserInflight inflight;
>                 struct vhost_iotlb_msg iotlb;
>                 VhostUserCryptoSessionParam crypto_session;
>                 VhostUserVringArea area;
> @@ -148,6 +159,7 @@ typedef struct VhostUserMsg {
>  /* vhost_user.c */
>  int vhost_user_msg_handler(int vid, int fd);
>  int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t=
 perm);
> +void *inflight_mem_alloc(const char *name, size_t size, int *fd);
>
>  /* socket.c */
>  int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max=
_fds,
> --
> 2.11.0
>
This patch needs to add some functions. It will be abandoned . Later,
I will add set&clear inflight entry function and resubmit the V2
version