From: Eelco Chaudron <echaudro@redhat.com>
To: maxime.coquelin@redhat.com, chenbo.xia@intel.com,
david.marchand@redhat.com
Cc: dev@dpdk.org
Subject: [PATCH v3 4/4] vhost: add device op to offload the interrupt kick
Date: Wed, 17 May 2023 11:09:13 +0200 [thread overview]
Message-ID: <168431455219.558450.14986601389394385835.stgit@ebuild.local> (raw)
In-Reply-To: <168431450017.558450.16680518469610688737.stgit@ebuild.local>
This patch adds an operation callback which gets called every time the
library wants to call eventfd_write(). This eventfd_write() call could
result in a system call, which could potentially block the PMD thread.
The callback function can decide whether it's ok to handle the
eventfd_write() now or have the newly introduced function,
rte_vhost_notify_guest(), called at a later time.
This can be used by 3rd party applications, like OVS, to avoid system
calls being called as part of the PMD threads.
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
lib/vhost/meson.build | 2 ++
lib/vhost/rte_vhost.h | 23 +++++++++++++++++-
lib/vhost/socket.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++---
lib/vhost/version.map | 9 +++++++
lib/vhost/vhost.c | 38 ++++++++++++++++++++++++++++++
lib/vhost/vhost.h | 58 ++++++++++++++++++++++++++++++++-------------
6 files changed, 171 insertions(+), 22 deletions(-)
diff --git a/lib/vhost/meson.build b/lib/vhost/meson.build
index 0d1abf6283..05679447db 100644
--- a/lib/vhost/meson.build
+++ b/lib/vhost/meson.build
@@ -38,3 +38,5 @@ driver_sdk_headers = files(
'vdpa_driver.h',
)
deps += ['ethdev', 'cryptodev', 'hash', 'pci', 'dmadev']
+
+use_function_versioning = true
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index 58a5d4be92..7a10bc36cf 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -298,7 +298,13 @@ struct rte_vhost_device_ops {
*/
void (*guest_notified)(int vid);
- void *reserved[1]; /**< Reserved for future extension */
+ /**
+ * If this callback is registered, notification to the guest can
+ * be handled by the front-end calling rte_vhost_notify_guest().
+ * If it's not handled, 'false' should be returned. This can be used
+ * to remove the "slow" eventfd_write() syscall from the datapath.
+ */
+ bool (*guest_notify)(int vid, uint16_t queue_id);
};
/**
@@ -433,6 +439,21 @@ void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice.
+ *
+ * Inject the offloaded interrupt into the vhost device's queue. For more
+ * details see the 'guest_notify' vhost device operation.
+ *
+ * @param vid
+ * vhost device ID
+ * @param queue_id
+ * virtio queue index
+ */
+__rte_experimental
+void rte_vhost_notify_guest(int vid, uint16_t queue_id);
+
/**
* Register vhost driver. path could be different for multiple
* instance support.
diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index 669c322e12..f2c02075fe 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -15,6 +15,7 @@
#include <fcntl.h>
#include <pthread.h>
+#include <rte_function_versioning.h>
#include <rte_log.h>
#include "fd_man.h"
@@ -59,6 +60,7 @@ struct vhost_user_socket {
struct rte_vdpa_device *vdpa_dev;
struct rte_vhost_device_ops const *notify_ops;
+ struct rte_vhost_device_ops *malloc_notify_ops;
};
struct vhost_user_connection {
@@ -846,6 +848,11 @@ vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
vsocket->path = NULL;
}
+ if (vsocket && vsocket->malloc_notify_ops) {
+ free(vsocket->malloc_notify_ops);
+ vsocket->malloc_notify_ops = NULL;
+ }
+
if (vsocket) {
free(vsocket);
vsocket = NULL;
@@ -1099,21 +1106,69 @@ rte_vhost_driver_unregister(const char *path)
/*
* Register ops so that we can add/remove device to data core.
*/
-int
-rte_vhost_driver_callback_register(const char *path,
- struct rte_vhost_device_ops const * const ops)
+static int
+vhost_driver_callback_register(const char *path,
+ struct rte_vhost_device_ops const * const ops,
+ struct rte_vhost_device_ops *malloc_ops)
{
struct vhost_user_socket *vsocket;
pthread_mutex_lock(&vhost_user.mutex);
vsocket = find_vhost_user_socket(path);
- if (vsocket)
+ if (vsocket) {
vsocket->notify_ops = ops;
+ free(vsocket->malloc_notify_ops);
+ vsocket->malloc_notify_ops = malloc_ops;
+ }
pthread_mutex_unlock(&vhost_user.mutex);
return vsocket ? 0 : -1;
}
+int __vsym
+rte_vhost_driver_callback_register_v24(const char *path,
+ struct rte_vhost_device_ops const * const ops)
+{
+ return vhost_driver_callback_register(path, ops, NULL);
+}
+
+int __vsym
+rte_vhost_driver_callback_register_v23(const char *path,
+ struct rte_vhost_device_ops const * const ops)
+{
+ int ret;
+
+ /*
+ * Although the ops structure is a const structure, we do need to
+ * override the guest_notify operation. This is because with the
+ * previous APIs it was "reserved" and if any garbage value was passed,
+ * it could crash the application.
+ */
+ if (ops && !ops->guest_notify) {
+ struct rte_vhost_device_ops *new_ops;
+
+ new_ops = malloc(sizeof(*new_ops));
+ if (new_ops == NULL)
+ return -1;
+
+ memcpy(new_ops, ops, sizeof(*new_ops));
+ new_ops->guest_notify = NULL;
+
+ ret = vhost_driver_callback_register(path, new_ops, new_ops);
+ } else {
+ ret = vhost_driver_callback_register(path, ops, NULL);
+ }
+
+ return ret;
+}
+
+/* Mark the v23 function as the old version, and v24 as the default version. */
+VERSION_SYMBOL(rte_vhost_driver_callback_register, _v23, 23);
+BIND_DEFAULT_SYMBOL(rte_vhost_driver_callback_register, _v24, 24);
+MAP_STATIC_SYMBOL(int rte_vhost_driver_callback_register(const char *path,
+ struct rte_vhost_device_ops const * const ops),
+ rte_vhost_driver_callback_register_v24);
+
struct rte_vhost_device_ops const *
vhost_driver_callback_get(const char *path)
{
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index d322a4a888..7bcbfd12cf 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -64,6 +64,12 @@ DPDK_23 {
local: *;
};
+DPDK_24 {
+ global:
+
+ rte_vhost_driver_callback_register;
+} DPDK_23;
+
EXPERIMENTAL {
global:
@@ -98,6 +104,9 @@ EXPERIMENTAL {
# added in 22.11
rte_vhost_async_dma_unconfigure;
rte_vhost_vring_call_nonblock;
+
+ # added in 23.07
+ rte_vhost_notify_guest;
};
INTERNAL {
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 8ff6434c93..79e88f986e 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -44,6 +44,10 @@ static const struct vhost_vq_stats_name_off vhost_vq_stat_strings[] = {
{"size_1024_1518_packets", offsetof(struct vhost_virtqueue, stats.size_bins[6])},
{"size_1519_max_packets", offsetof(struct vhost_virtqueue, stats.size_bins[7])},
{"guest_notifications", offsetof(struct vhost_virtqueue, stats.guest_notifications)},
+ {"guest_notifications_offloaded", offsetof(struct vhost_virtqueue,
+ stats.guest_notifications_offloaded)},
+ {"guest_notifications_error", offsetof(struct vhost_virtqueue,
+ stats.guest_notifications_error)},
{"iotlb_hits", offsetof(struct vhost_virtqueue, stats.iotlb_hits)},
{"iotlb_misses", offsetof(struct vhost_virtqueue, stats.iotlb_misses)},
{"inflight_submitted", offsetof(struct vhost_virtqueue, stats.inflight_submitted)},
@@ -1467,6 +1471,40 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
return ret;
}
+void
+rte_vhost_notify_guest(int vid, uint16_t queue_id)
+{
+ struct virtio_net *dev = get_device(vid);
+ struct vhost_virtqueue *vq;
+
+ if (!dev || queue_id >= VHOST_MAX_VRING)
+ return;
+
+ vq = dev->virtqueue[queue_id];
+ if (!vq)
+ return;
+
+ rte_rwlock_read_lock(&vq->access_lock);
+
+ if (vq->callfd >= 0) {
+ int ret = eventfd_write(vq->callfd, (eventfd_t)1);
+
+ if (ret) {
+ if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+ __atomic_fetch_add(&vq->stats.guest_notifications_error,
+ 1, __ATOMIC_RELAXED);
+ } else {
+ if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+ __atomic_fetch_add(&vq->stats.guest_notifications,
+ 1, __ATOMIC_RELAXED);
+ if (dev->notify_ops->guest_notified)
+ dev->notify_ops->guest_notified(dev->vid);
+ }
+ }
+
+ rte_rwlock_read_unlock(&vq->access_lock);
+}
+
void
rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
{
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 23a4e2b1a7..8ad53e9bb5 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -141,6 +141,8 @@ struct virtqueue_stats {
uint64_t inflight_completed;
/* Counters below are atomic, and should be incremented as such. */
uint64_t guest_notifications;
+ uint64_t guest_notifications_offloaded;
+ uint64_t guest_notifications_error;
};
/**
@@ -884,6 +886,34 @@ vhost_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
}
+static __rte_always_inline void
+vhost_vring_inject_irq(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+ int ret;
+
+ if (dev->notify_ops->guest_notify &&
+ dev->notify_ops->guest_notify(dev->vid, vq->index)) {
+ if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+ __atomic_fetch_add(&vq->stats.guest_notifications_offloaded,
+ 1, __ATOMIC_RELAXED);
+ return;
+ }
+
+ ret = eventfd_write(vq->callfd, (eventfd_t) 1);
+ if (ret) {
+ if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+ __atomic_fetch_add(&vq->stats.guest_notifications_error,
+ 1, __ATOMIC_RELAXED);
+ return;
+ }
+
+ if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+ __atomic_fetch_add(&vq->stats.guest_notifications,
+ 1, __ATOMIC_RELAXED);
+ if (dev->notify_ops->guest_notified)
+ dev->notify_ops->guest_notified(dev->vid);
+}
+
static __rte_always_inline void
vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
@@ -906,23 +936,13 @@ vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
if ((vhost_need_event(vhost_used_event(vq), new, old) ||
unlikely(!signalled_used_valid)) &&
vq->callfd >= 0) {
- eventfd_write(vq->callfd, (eventfd_t) 1);
- if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
- __atomic_fetch_add(&vq->stats.guest_notifications,
- 1, __ATOMIC_RELAXED);
- if (dev->notify_ops->guest_notified)
- dev->notify_ops->guest_notified(dev->vid);
+ vhost_vring_inject_irq(dev, vq);
}
} else {
/* Kick the guest if necessary. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
&& (vq->callfd >= 0)) {
- eventfd_write(vq->callfd, (eventfd_t)1);
- if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
- __atomic_fetch_add(&vq->stats.guest_notifications,
- 1, __ATOMIC_RELAXED);
- if (dev->notify_ops->guest_notified)
- dev->notify_ops->guest_notified(dev->vid);
+ vhost_vring_inject_irq(dev, vq);
}
}
}
@@ -974,11 +994,8 @@ vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
if (vhost_need_event(off, new, old))
kick = true;
kick:
- if (kick && vq->callfd >= 0) {
- eventfd_write(vq->callfd, (eventfd_t)1);
- if (dev->notify_ops->guest_notified)
- dev->notify_ops->guest_notified(dev->vid);
- }
+ if (kick && vq->callfd >= 0)
+ vhost_vring_inject_irq(dev, vq);
}
static __rte_always_inline void
@@ -1017,4 +1034,11 @@ mbuf_is_consumed(struct rte_mbuf *m)
uint64_t hua_to_alignment(struct rte_vhost_memory *mem, void *ptr);
void mem_set_dump(void *ptr, size_t size, bool enable, uint64_t alignment);
+
+/* Versioned functions */
+int rte_vhost_driver_callback_register_v23(const char *path,
+ struct rte_vhost_device_ops const * const ops);
+int rte_vhost_driver_callback_register_v24(const char *path,
+ struct rte_vhost_device_ops const * const ops);
+
#endif /* _VHOST_NET_CDEV_H_ */
next prev parent reply other threads:[~2023-05-17 9:09 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-17 9:08 [PATCH v3 0/4] " Eelco Chaudron
2023-05-17 9:08 ` [PATCH v3 1/4] vhost: change vhost_virtqueue access lock to a read/write one Eelco Chaudron
2023-05-17 17:33 ` Maxime Coquelin
2023-05-18 14:46 ` Eelco Chaudron
2023-05-31 6:37 ` Xia, Chenbo
2023-05-31 9:27 ` Maxime Coquelin
2023-05-31 11:13 ` Eelco Chaudron
2023-06-01 1:45 ` Xia, Chenbo
2023-05-17 9:08 ` [PATCH v3 2/4] vhost: make the guest_notifications statistic counter atomic Eelco Chaudron
2023-05-30 12:52 ` Maxime Coquelin
2023-05-31 7:03 ` Xia, Chenbo
2023-05-17 9:09 ` [PATCH v3 3/4] vhost: fix invalid call FD handling Eelco Chaudron
2023-05-30 12:54 ` Maxime Coquelin
2023-05-31 6:12 ` Xia, Chenbo
2023-05-31 9:30 ` Maxime Coquelin
2023-05-17 9:09 ` Eelco Chaudron [this message]
2023-05-30 13:02 ` [PATCH v3 4/4] vhost: add device op to offload the interrupt kick Maxime Coquelin
2023-05-30 13:16 ` Thomas Monjalon
2023-05-30 15:16 ` Maxime Coquelin
2023-05-31 6:19 ` Xia, Chenbo
2023-05-31 9:29 ` Maxime Coquelin
2023-05-31 11:21 ` Eelco Chaudron
2023-06-01 2:18 ` Xia, Chenbo
2023-06-01 8:15 ` Eelco Chaudron
2023-06-01 8:29 ` Maxime Coquelin
2023-06-01 8:49 ` Eelco Chaudron
2023-06-01 8:53 ` Maxime Coquelin
2023-05-31 11:49 ` David Marchand
2023-05-31 12:01 ` David Marchand
2023-05-31 12:48 ` Maxime Coquelin
2023-05-31 13:13 ` Eelco Chaudron
2023-05-31 14:12 ` David Marchand
2023-05-31 14:18 ` Maxime Coquelin
2023-06-01 20:00 ` [PATCH v3 0/4] " Maxime Coquelin
2023-06-02 6:20 ` Eelco Chaudron
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=168431455219.558450.14986601389394385835.stgit@ebuild.local \
--to=echaudro@redhat.com \
--cc=chenbo.xia@intel.com \
--cc=david.marchand@redhat.com \
--cc=dev@dpdk.org \
--cc=maxime.coquelin@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).