[PATCH] vhost: add device op to offload the interrupt kick

DPDK patches and discussions
 help / color / mirror / Atom feed

* [PATCH] vhost: add device op to offload the interrupt kick
@ 2023-03-27 12:51 Eelco Chaudron
  2023-03-27 13:21 ` Maxime Coquelin
  2023-03-27 15:16 ` [EXT] " Gowrishankar Muthukrishnan
  0 siblings, 2 replies; 9+ messages in thread
From: Eelco Chaudron @ 2023-03-27 12:51 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia; +Cc: dev

This patch adds an operation callback which gets called every time the
library wants to call eventfd_write(). This eventfd_write() call could
result in a system call, which could potentially block the PMD thread.

The callback function can decide whether it's ok to handle the
eventfd_write() now or have the newly introduced function,
rte_vhost_notify_guest(), called at a later time.

This can be used by 3rd party applications, like OVS, to avoid system
calls being called as part of the PMD threads.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
 lib/vhost/rte_vhost.h |   10 +++++++++-
 lib/vhost/vhost.c     |   21 +++++++++++++++++++++
 lib/vhost/vhost.h     |   43 ++++++++++++++++++++++++++++---------------
 3 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index 58a5d4be92..af7a394d0f 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -298,7 +298,13 @@ struct rte_vhost_device_ops {
 	 */
 	void (*guest_notified)(int vid);
 
-	void *reserved[1]; /**< Reserved for future extension */
+	/**
+	 * If this callback is registered, notification to the guest can
+	 * be handled by the front-end calling rte_vhost_notify_guest().
+	 * If it's not handled, 'false' should be returned. This can be used
+	 * to remove the "slow" eventfd_write() syscall from the datapath.
+	 */
+	bool (*guest_notify)(int vid, uint16_t queue_id);
 };
 
 /**
@@ -433,6 +439,8 @@ void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
 
 int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
 
+void rte_vhost_notify_guest(int vid, uint16_t queue_id);
+
 /**
  * Register vhost driver. path could be different for multiple
  * instance support.
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index ef37943817..ee090d78ef 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -1467,6 +1467,27 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 	return ret;
 }
 
+void
+rte_vhost_notify_guest(int vid, uint16_t queue_id)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_virtqueue *vq;
+
+	if (!dev ||  queue_id >= VHOST_MAX_VRING)
+		return;
+
+	vq = dev->virtqueue[queue_id];
+	if (!vq)
+		return;
+
+	rte_spinlock_lock(&vq->access_lock);
+
+	if (vq->callfd >= 0)
+		eventfd_write(vq->callfd, (eventfd_t)1);
+
+	rte_spinlock_unlock(&vq->access_lock);
+}
+
 void
 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
 {
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 8fdab13c70..39ad8260a1 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -883,6 +883,30 @@ vhost_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
 	return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
 }
 
+static __rte_always_inline void
+vhost_vring_kick_guest(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	if (dev->notify_ops->guest_notify) {
+		uint16_t qid;
+		for (qid = 0;  qid < dev->nr_vring; qid++) {
+			if (dev->virtqueue[qid] == vq) {
+				if (dev->notify_ops->guest_notify(dev->vid,
+								  qid))
+					goto done;
+				break;
+			}
+		}
+	}
+	eventfd_write(vq->callfd, (eventfd_t) 1);
+
+done:
+	if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+		vq->stats.guest_notifications++;
+	if (dev->notify_ops->guest_notified)
+		dev->notify_ops->guest_notified(dev->vid);
+}
+
+
 static __rte_always_inline void
 vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
@@ -905,21 +929,13 @@ vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 		if ((vhost_need_event(vhost_used_event(vq), new, old) &&
 					(vq->callfd >= 0)) ||
 				unlikely(!signalled_used_valid)) {
-			eventfd_write(vq->callfd, (eventfd_t) 1);
-			if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
-				vq->stats.guest_notifications++;
-			if (dev->notify_ops->guest_notified)
-				dev->notify_ops->guest_notified(dev->vid);
+			vhost_vring_kick_guest(dev, vq);
 		}
 	} else {
 		/* Kick the guest if necessary. */
 		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
 				&& (vq->callfd >= 0)) {
-			eventfd_write(vq->callfd, (eventfd_t)1);
-			if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
-				vq->stats.guest_notifications++;
-			if (dev->notify_ops->guest_notified)
-				dev->notify_ops->guest_notified(dev->vid);
+			vhost_vring_kick_guest(dev, vq);
 		}
 	}
 }
@@ -971,11 +987,8 @@ vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
 	if (vhost_need_event(off, new, old))
 		kick = true;
 kick:
-	if (kick) {
-		eventfd_write(vq->callfd, (eventfd_t)1);
-		if (dev->notify_ops->guest_notified)
-			dev->notify_ops->guest_notified(dev->vid);
-	}
+	if (kick)
+		vhost_vring_kick_guest(dev, vq);
 }
 
 static __rte_always_inline void


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] vhost: add device op to offload the interrupt kick
  2023-03-27 12:51 [PATCH] vhost: add device op to offload the interrupt kick Eelco Chaudron
@ 2023-03-27 13:21 ` Maxime Coquelin
  2023-03-27 14:10   ` Eelco Chaudron
  2023-03-27 15:16 ` [EXT] " Gowrishankar Muthukrishnan
  1 sibling, 1 reply; 9+ messages in thread
From: Maxime Coquelin @ 2023-03-27 13:21 UTC (permalink / raw)
  To: Eelco Chaudron, chenbo.xia; +Cc: dev, David Marchand

Hi Eelco,

On 3/27/23 14:51, Eelco Chaudron wrote:
> This patch adds an operation callback which gets called every time the
> library wants to call eventfd_write(). This eventfd_write() call could
> result in a system call, which could potentially block the PMD thread.
> 
> The callback function can decide whether it's ok to handle the
> eventfd_write() now or have the newly introduced function,
> rte_vhost_notify_guest(), called at a later time.
> 
> This can be used by 3rd party applications, like OVS, to avoid system
> calls being called as part of the PMD threads.

That's a good idea, please find some comments inline:

> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
>   lib/vhost/rte_vhost.h |   10 +++++++++-
>   lib/vhost/vhost.c     |   21 +++++++++++++++++++++
>   lib/vhost/vhost.h     |   43 ++++++++++++++++++++++++++++---------------
>   3 files changed, 58 insertions(+), 16 deletions(-)
> 
> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
> index 58a5d4be92..af7a394d0f 100644
> --- a/lib/vhost/rte_vhost.h
> +++ b/lib/vhost/rte_vhost.h
> @@ -298,7 +298,13 @@ struct rte_vhost_device_ops {
>   	 */
>   	void (*guest_notified)(int vid);
>   
> -	void *reserved[1]; /**< Reserved for future extension */
> +	/**
> +	 * If this callback is registered, notification to the guest can
> +	 * be handled by the front-end calling rte_vhost_notify_guest().
> +	 * If it's not handled, 'false' should be returned. This can be used
> +	 * to remove the "slow" eventfd_write() syscall from the datapath.
> +	 */
> +	bool (*guest_notify)(int vid, uint16_t queue_id);
>   };
>   
>   /**
> @@ -433,6 +439,8 @@ void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
>   
>   int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
>   
> +void rte_vhost_notify_guest(int vid, uint16_t queue_id);

The new API needs to be tagged as experimental, and also documented. (I
see rte_vhost_enable_guest_notification is not properly documented, so
not a good example!)

> +
>   /**
>    * Register vhost driver. path could be different for multiple
>    * instance support.
> diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
> index ef37943817..ee090d78ef 100644
> --- a/lib/vhost/vhost.c
> +++ b/lib/vhost/vhost.c
> @@ -1467,6 +1467,27 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
>   	return ret;
>   }
>   
> +void
> +rte_vhost_notify_guest(int vid, uint16_t queue_id)
> +{
> +	struct virtio_net *dev = get_device(vid);
> +	struct vhost_virtqueue *vq;
> +
> +	if (!dev ||  queue_id >= VHOST_MAX_VRING)
> +		return;
> +
> +	vq = dev->virtqueue[queue_id];
> +	if (!vq)
> +		return;
> +
> +	rte_spinlock_lock(&vq->access_lock);
> +
> +	if (vq->callfd >= 0)
> +		eventfd_write(vq->callfd, (eventfd_t)1);

Maybe we should return an error of callfd is invalid or eventfd_write()
failed.

> +
> +	rte_spinlock_unlock(&vq->access_lock);
> +}
> +
>   void
>   rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
>   {
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index 8fdab13c70..39ad8260a1 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -883,6 +883,30 @@ vhost_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
>   	return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
>   }
>   
> +static __rte_always_inline void
> +vhost_vring_kick_guest(struct virtio_net *dev, struct vhost_virtqueue *vq)
> +{
> +	if (dev->notify_ops->guest_notify) {
> +		uint16_t qid;
> +		for (qid = 0;  qid < dev->nr_vring; qid++) {
> +			if (dev->virtqueue[qid] == vq) {
> +				if (dev->notify_ops->guest_notify(dev->vid,
> +								  qid))
> +					goto done;
> +				break;
> +			}
> +		}

Since v22.11, you no more need to iterate through the port's virtqueues,
David introduced index field in vhost_virtuqueue in v22.11 (57e414e3ec29
("vhost: keep a reference to virtqueue index")).


> +	}
> +	eventfd_write(vq->callfd, (eventfd_t) 1);
> +
> +done:
> +	if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
> +		vq->stats.guest_notifications++;
> +	if (dev->notify_ops->guest_notified)
> +		dev->notify_ops->guest_notified(dev->vid);
> +}

FYI, I have done almost the same refactoring in the VDUSE series I will
soon send:

https://gitlab.com/mcoquelin/dpdk-next-virtio/-/commit/7089a8a6d1e89b9db90547eb5b4fef886f24fd5b

My version also introduces a new counter in case the notification
failed. Maybe we could also have a counter in case the notification has
been "offloaded" to the application?

> +
> +
>   static __rte_always_inline void
>   vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
>   {
> @@ -905,21 +929,13 @@ vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
>   		if ((vhost_need_event(vhost_used_event(vq), new, old) &&
>   					(vq->callfd >= 0)) ||
>   				unlikely(!signalled_used_valid)) {
> -			eventfd_write(vq->callfd, (eventfd_t) 1);
> -			if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
> -				vq->stats.guest_notifications++;
> -			if (dev->notify_ops->guest_notified)
> -				dev->notify_ops->guest_notified(dev->vid);
> +			vhost_vring_kick_guest(dev, vq);
>   		}
>   	} else {
>   		/* Kick the guest if necessary. */
>   		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
>   				&& (vq->callfd >= 0)) {
> -			eventfd_write(vq->callfd, (eventfd_t)1);
> -			if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
> -				vq->stats.guest_notifications++;
> -			if (dev->notify_ops->guest_notified)
> -				dev->notify_ops->guest_notified(dev->vid);
> +			vhost_vring_kick_guest(dev, vq);
>   		}
>   	}
>   }
> @@ -971,11 +987,8 @@ vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
>   	if (vhost_need_event(off, new, old))
>   		kick = true;
>   kick:
> -	if (kick) {
> -		eventfd_write(vq->callfd, (eventfd_t)1);
> -		if (dev->notify_ops->guest_notified)
> -			dev->notify_ops->guest_notified(dev->vid);
> -	}
> +	if (kick)
> +		vhost_vring_kick_guest(dev, vq);
>   }
>   
>   static __rte_always_inline void
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] vhost: add device op to offload the interrupt kick
  2023-03-27 13:21 ` Maxime Coquelin
@ 2023-03-27 14:10   ` Eelco Chaudron
  0 siblings, 0 replies; 9+ messages in thread
From: Eelco Chaudron @ 2023-03-27 14:10 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: chenbo.xia, dev, David Marchand



On 27 Mar 2023, at 15:21, Maxime Coquelin wrote:

> Hi Eelco,
>
> On 3/27/23 14:51, Eelco Chaudron wrote:
>> This patch adds an operation callback which gets called every time the
>> library wants to call eventfd_write(). This eventfd_write() call could
>> result in a system call, which could potentially block the PMD thread.
>>
>> The callback function can decide whether it's ok to handle the
>> eventfd_write() now or have the newly introduced function,
>> rte_vhost_notify_guest(), called at a later time.
>>
>> This can be used by 3rd party applications, like OVS, to avoid system
>> calls being called as part of the PMD threads.
>
> That's a good idea, please find some comments inline:

Thanks for the review, see inline.
>
>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>> ---
>>   lib/vhost/rte_vhost.h |   10 +++++++++-
>>   lib/vhost/vhost.c     |   21 +++++++++++++++++++++
>>   lib/vhost/vhost.h     |   43 ++++++++++++++++++++++++++++---------------
>>   3 files changed, 58 insertions(+), 16 deletions(-)
>>
>> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
>> index 58a5d4be92..af7a394d0f 100644
>> --- a/lib/vhost/rte_vhost.h
>> +++ b/lib/vhost/rte_vhost.h
>> @@ -298,7 +298,13 @@ struct rte_vhost_device_ops {
>>   	 */
>>   	void (*guest_notified)(int vid);
>>  -	void *reserved[1]; /**< Reserved for future extension */
>> +	/**
>> +	 * If this callback is registered, notification to the guest can
>> +	 * be handled by the front-end calling rte_vhost_notify_guest().
>> +	 * If it's not handled, 'false' should be returned. This can be used
>> +	 * to remove the "slow" eventfd_write() syscall from the datapath.
>> +	 */
>> +	bool (*guest_notify)(int vid, uint16_t queue_id);
>>   };
>>    /**
>> @@ -433,6 +439,8 @@ void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
>>    int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
>>  +void rte_vhost_notify_guest(int vid, uint16_t queue_id);
>
> The new API needs to be tagged as experimental, and also documented. (I
> see rte_vhost_enable_guest_notification is not properly documented, so
> not a good example!)

I used exactly that as an example, so thought it should be good ;)
Will add this in the next revision...

>> +
>>   /**
>>    * Register vhost driver. path could be different for multiple
>>    * instance support.
>> diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
>> index ef37943817..ee090d78ef 100644
>> --- a/lib/vhost/vhost.c
>> +++ b/lib/vhost/vhost.c
>> @@ -1467,6 +1467,27 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
>>   	return ret;
>>   }
>>  +void
>> +rte_vhost_notify_guest(int vid, uint16_t queue_id)
>> +{
>> +	struct virtio_net *dev = get_device(vid);
>> +	struct vhost_virtqueue *vq;
>> +
>> +	if (!dev ||  queue_id >= VHOST_MAX_VRING)
>> +		return;
>> +
>> +	vq = dev->virtqueue[queue_id];
>> +	if (!vq)
>> +		return;
>> +
>> +	rte_spinlock_lock(&vq->access_lock);
>> +
>> +	if (vq->callfd >= 0)
>> +		eventfd_write(vq->callfd, (eventfd_t)1);
>
> Maybe we should return an error of callfd is invalid or eventfd_write()
> failed.

See below

>> +
>> +	rte_spinlock_unlock(&vq->access_lock);
>> +}
>> +
>>   void
>>   rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
>>   {
>> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
>> index 8fdab13c70..39ad8260a1 100644
>> --- a/lib/vhost/vhost.h
>> +++ b/lib/vhost/vhost.h
>> @@ -883,6 +883,30 @@ vhost_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
>>   	return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
>>   }
>>  +static __rte_always_inline void
>> +vhost_vring_kick_guest(struct virtio_net *dev, struct vhost_virtqueue *vq)
>> +{
>> +	if (dev->notify_ops->guest_notify) {
>> +		uint16_t qid;
>> +		for (qid = 0;  qid < dev->nr_vring; qid++) {
>> +			if (dev->virtqueue[qid] == vq) {
>> +				if (dev->notify_ops->guest_notify(dev->vid,
>> +								  qid))
>> +					goto done;
>> +				break;
>> +			}
>> +		}
>
> Since v22.11, you no more need to iterate through the port's virtqueues,
> David introduced index field in vhost_virtuqueue in v22.11 (57e414e3ec29
> ("vhost: keep a reference to virtqueue index")).

Thanks will change this, I did most of this a while back and did not re-check.

>> +	}
>> +	eventfd_write(vq->callfd, (eventfd_t) 1);
>> +
>> +done:
>> +	if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
>> +		vq->stats.guest_notifications++;
>> +	if (dev->notify_ops->guest_notified)
>> +		dev->notify_ops->guest_notified(dev->vid);
>> +}
>
> FYI, I have done almost the same refactoring in the VDUSE series I will
> soon send:
>
> https://gitlab.com/mcoquelin/dpdk-next-virtio/-/commit/7089a8a6d1e89b9db90547eb5b4fef886f24fd5b
>
> My version also introduces a new counter in case the notification
> failed. Maybe we could also have a counter in case the notification has
> been "offloaded" to the application?

What would be the best approach!? I can wait to send a new rev until your patch is out.

>> +
>> +
>>   static __rte_always_inline void
>>   vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
>>   {
>> @@ -905,21 +929,13 @@ vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
>>   		if ((vhost_need_event(vhost_used_event(vq), new, old) &&
>>   					(vq->callfd >= 0)) ||
>>   				unlikely(!signalled_used_valid)) {
>> -			eventfd_write(vq->callfd, (eventfd_t) 1);
>> -			if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
>> -				vq->stats.guest_notifications++;
>> -			if (dev->notify_ops->guest_notified)
>> -				dev->notify_ops->guest_notified(dev->vid);
>> +			vhost_vring_kick_guest(dev, vq);
>>   		}
>>   	} else {
>>   		/* Kick the guest if necessary. */
>>   		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
>>   				&& (vq->callfd >= 0)) {
>> -			eventfd_write(vq->callfd, (eventfd_t)1);
>> -			if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
>> -				vq->stats.guest_notifications++;
>> -			if (dev->notify_ops->guest_notified)
>> -				dev->notify_ops->guest_notified(dev->vid);
>> +			vhost_vring_kick_guest(dev, vq);
>>   		}
>>   	}
>>   }
>> @@ -971,11 +987,8 @@ vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
>>   	if (vhost_need_event(off, new, old))
>>   		kick = true;
>>   kick:
>> -	if (kick) {
>> -		eventfd_write(vq->callfd, (eventfd_t)1);
>> -		if (dev->notify_ops->guest_notified)
>> -			dev->notify_ops->guest_notified(dev->vid);
>> -	}
>> +	if (kick)
>> +		vhost_vring_kick_guest(dev, vq);
>>   }
>>    static __rte_always_inline void
>>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [EXT] [PATCH] vhost: add device op to offload the interrupt kick
  2023-03-27 12:51 [PATCH] vhost: add device op to offload the interrupt kick Eelco Chaudron
  2023-03-27 13:21 ` Maxime Coquelin
@ 2023-03-27 15:16 ` Gowrishankar Muthukrishnan
  2023-03-27 16:04   ` Eelco Chaudron
  1 sibling, 1 reply; 9+ messages in thread
From: Gowrishankar Muthukrishnan @ 2023-03-27 15:16 UTC (permalink / raw)
  To: Eelco Chaudron, maxime.coquelin, chenbo.xia; +Cc: dev

Hi Eelco,

> +void
> +rte_vhost_notify_guest(int vid, uint16_t queue_id) {
> +	struct virtio_net *dev = get_device(vid);
> +	struct vhost_virtqueue *vq;
> +
> +	if (!dev ||  queue_id >= VHOST_MAX_VRING)
> +		return;
> +
> +	vq = dev->virtqueue[queue_id];
> +	if (!vq)
> +		return;
> +
> +	rte_spinlock_lock(&vq->access_lock);
> +

Is spin lock needed here before system call ?

> +	if (vq->callfd >= 0)
> +		eventfd_write(vq->callfd, (eventfd_t)1);
> +
> +	rte_spinlock_unlock(&vq->access_lock);
> +}
> +

Thanks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [EXT] [PATCH] vhost: add device op to offload the interrupt kick
  2023-03-27 15:16 ` [EXT] " Gowrishankar Muthukrishnan
@ 2023-03-27 16:04   ` Eelco Chaudron
  2023-03-27 16:35     ` Maxime Coquelin
  0 siblings, 1 reply; 9+ messages in thread
From: Eelco Chaudron @ 2023-03-27 16:04 UTC (permalink / raw)
  To: Gowrishankar Muthukrishnan; +Cc: maxime.coquelin, chenbo.xia, dev



On 27 Mar 2023, at 17:16, Gowrishankar Muthukrishnan wrote:

> Hi Eelco,
>
>> +void
>> +rte_vhost_notify_guest(int vid, uint16_t queue_id) {
>> +	struct virtio_net *dev = get_device(vid);
>> +	struct vhost_virtqueue *vq;
>> +
>> +	if (!dev ||  queue_id >= VHOST_MAX_VRING)
>> +		return;
>> +
>> +	vq = dev->virtqueue[queue_id];
>> +	if (!vq)
>> +		return;
>> +
>> +	rte_spinlock_lock(&vq->access_lock);
>> +
>
> Is spin lock needed here before system call ?

I assumed access_lock is protecting all the following fields in this structure, so I need the lock to read the vq->callfd, however, I can/should move the eventfd_write outside of the lock.

>> +	if (vq->callfd >= 0)
>> +		eventfd_write(vq->callfd, (eventfd_t)1);
>> +
>> +	rte_spinlock_unlock(&vq->access_lock);
>> +}
>> +
>
> Thanks.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [EXT] [PATCH] vhost: add device op to offload the interrupt kick
  2023-03-27 16:04   ` Eelco Chaudron
@ 2023-03-27 16:35     ` Maxime Coquelin
  2023-03-28 12:14       ` Eelco Chaudron
  2023-04-03 14:51       ` Eelco Chaudron
  0 siblings, 2 replies; 9+ messages in thread
From: Maxime Coquelin @ 2023-03-27 16:35 UTC (permalink / raw)
  To: Eelco Chaudron, Gowrishankar Muthukrishnan; +Cc: chenbo.xia, dev

On 3/27/23 18:04, Eelco Chaudron wrote:
> 
> 
> On 27 Mar 2023, at 17:16, Gowrishankar Muthukrishnan wrote:
> 
>> Hi Eelco,
>>
>>> +void
>>> +rte_vhost_notify_guest(int vid, uint16_t queue_id) {
>>> +	struct virtio_net *dev = get_device(vid);
>>> +	struct vhost_virtqueue *vq;
>>> +
>>> +	if (!dev ||  queue_id >= VHOST_MAX_VRING)
>>> +		return;
>>> +
>>> +	vq = dev->virtqueue[queue_id];
>>> +	if (!vq)
>>> +		return;
>>> +
>>> +	rte_spinlock_lock(&vq->access_lock);
>>> +
>>
>> Is spin lock needed here before system call ?
> 
> I assumed access_lock is protecting all the following fields in this structure, so I need the lock to read the vq->callfd, however, I can/should move the eventfd_write outside of the lock.

The FD might be closed between the check and the call to eventfd_write
though, but I agree this is not optimal to call the eventfd_write under
the spinlock in your case, as you will block the pmd thread if it tries
to enqueue/dequeue packets on this queue, defeating the purpose of this
patch.

Maybe the solution is to change to read-write locks for the access_lock
spinlock. The datapath (rte_vhost_enqueue_burst/rte_vhost_dequeue_burst)
and this API would use the read version, meaning they won't lock each
other, and the control path (lib/vhost/vhost_user.c) will use the write
version.

Does that make sense?

Maxime
> 
>>> +	if (vq->callfd >= 0)
>>> +		eventfd_write(vq->callfd, (eventfd_t)1);
>>> +
>>> +	rte_spinlock_unlock(&vq->access_lock);
>>> +}
>>> +
>>
>> Thanks.
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [EXT] [PATCH] vhost: add device op to offload the interrupt kick
  2023-03-27 16:35     ` Maxime Coquelin
@ 2023-03-28 12:14       ` Eelco Chaudron
  2023-04-03 14:51       ` Eelco Chaudron
  1 sibling, 0 replies; 9+ messages in thread
From: Eelco Chaudron @ 2023-03-28 12:14 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: Gowrishankar Muthukrishnan, chenbo.xia, dev



On 27 Mar 2023, at 18:35, Maxime Coquelin wrote:

> On 3/27/23 18:04, Eelco Chaudron wrote:
>>
>>
>> On 27 Mar 2023, at 17:16, Gowrishankar Muthukrishnan wrote:
>>
>>> Hi Eelco,
>>>
>>>> +void
>>>> +rte_vhost_notify_guest(int vid, uint16_t queue_id) {
>>>> +	struct virtio_net *dev = get_device(vid);
>>>> +	struct vhost_virtqueue *vq;
>>>> +
>>>> +	if (!dev ||  queue_id >= VHOST_MAX_VRING)
>>>> +		return;
>>>> +
>>>> +	vq = dev->virtqueue[queue_id];
>>>> +	if (!vq)
>>>> +		return;
>>>> +
>>>> +	rte_spinlock_lock(&vq->access_lock);
>>>> +
>>>
>>> Is spin lock needed here before system call ?
>>
>> I assumed access_lock is protecting all the following fields in this structure, so I need the lock to read the vq->callfd, however, I can/should move the eventfd_write outside of the lock.
>
> The FD might be closed between the check and the call to eventfd_write
> though, but I agree this is not optimal to call the eventfd_write under
> the spinlock in your case, as you will block the pmd thread if it tries
> to enqueue/dequeue packets on this queue, defeating the purpose of this
> patch.
>
> Maybe the solution is to change to read-write locks for the access_lock
> spinlock. The datapath (rte_vhost_enqueue_burst/rte_vhost_dequeue_burst)
> and this API would use the read version, meaning they won't lock each
> other, and the control path (lib/vhost/vhost_user.c) will use the write
> version.
>
> Does that make sense?

Yes, this makes sense, let me investigate this and get back.

>>>> +	if (vq->callfd >= 0)
>>>> +		eventfd_write(vq->callfd, (eventfd_t)1);
>>>> +
>>>> +	rte_spinlock_unlock(&vq->access_lock);
>>>> +}
>>>> +
>>>
>>> Thanks.
>>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [EXT] [PATCH] vhost: add device op to offload the interrupt kick
  2023-03-27 16:35     ` Maxime Coquelin
  2023-03-28 12:14       ` Eelco Chaudron
@ 2023-04-03 14:51       ` Eelco Chaudron
  2023-04-03 15:26         ` Maxime Coquelin
  1 sibling, 1 reply; 9+ messages in thread
From: Eelco Chaudron @ 2023-04-03 14:51 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: Gowrishankar Muthukrishnan, chenbo.xia, dev



On 27 Mar 2023, at 18:35, Maxime Coquelin wrote:

> On 3/27/23 18:04, Eelco Chaudron wrote:
>>
>>
>> On 27 Mar 2023, at 17:16, Gowrishankar Muthukrishnan wrote:
>>
>>> Hi Eelco,
>>>
>>>> +void
>>>> +rte_vhost_notify_guest(int vid, uint16_t queue_id) {
>>>> +	struct virtio_net *dev = get_device(vid);
>>>> +	struct vhost_virtqueue *vq;
>>>> +
>>>> +	if (!dev ||  queue_id >= VHOST_MAX_VRING)
>>>> +		return;
>>>> +
>>>> +	vq = dev->virtqueue[queue_id];
>>>> +	if (!vq)
>>>> +		return;
>>>> +
>>>> +	rte_spinlock_lock(&vq->access_lock);
>>>> +
>>>
>>> Is spin lock needed here before system call ?
>>
>> I assumed access_lock is protecting all the following fields in this structure, so I need the lock to read the vq->callfd, however, I can/should move the eventfd_write outside of the lock.
>
> The FD might be closed between the check and the call to eventfd_write
> though, but I agree this is not optimal to call the eventfd_write under
> the spinlock in your case, as you will block the pmd thread if it tries
> to enqueue/dequeue packets on this queue, defeating the purpose of this
> patch.
>
> Maybe the solution is to change to read-write locks for the access_lock
> spinlock. The datapath (rte_vhost_enqueue_burst/rte_vhost_dequeue_burst)
> and this API would use the read version, meaning they won't lock each
> other, and the control path (lib/vhost/vhost_user.c) will use the write
> version.
>
> Does that make sense?

Hi Maxime, I prepped a patch, but not the read/write part yet, https://github.com/chaudron/dpdk/commit/d51c93b4ff08b43daed33e3c0fee193a6d039c25#.

I was thinking that even a read/write lock does not work (or maybe we need a combination of taking the read and write lock). As we need to update statistics, which need protection.
For example here you see the split (with just two locks, but the sys call could be called in the read lock):

https://github.com/chaudron/dpdk/blob/d51c93b4ff08b43daed33e3c0fee193a6d039c25/lib/vhost/vhost.c#L1493

The best would be to not have a lock when calling the system call, but that does not seem safe. I do not see a clear solution, guess also as I have some trouble understanding the lock rules around vq’s.

Do you have some more insights to share? I can ping you offline and discuss this.

Cheers,

Eelco

>>
>>>> +	if (vq->callfd >= 0)
>>>> +		eventfd_write(vq->callfd, (eventfd_t)1);
>>>> +
>>>> +	rte_spinlock_unlock(&vq->access_lock);
>>>> +}
>>>> +
>>>
>>> Thanks.
>>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [EXT] [PATCH] vhost: add device op to offload the interrupt kick
  2023-04-03 14:51       ` Eelco Chaudron
@ 2023-04-03 15:26         ` Maxime Coquelin
  0 siblings, 0 replies; 9+ messages in thread
From: Maxime Coquelin @ 2023-04-03 15:26 UTC (permalink / raw)
  To: Eelco Chaudron; +Cc: Gowrishankar Muthukrishnan, chenbo.xia, dev

Hi Eelco,

On 4/3/23 16:51, Eelco Chaudron wrote:
> 
> 
> On 27 Mar 2023, at 18:35, Maxime Coquelin wrote:
> 
>> On 3/27/23 18:04, Eelco Chaudron wrote:
>>>
>>>
>>> On 27 Mar 2023, at 17:16, Gowrishankar Muthukrishnan wrote:
>>>
>>>> Hi Eelco,
>>>>
>>>>> +void
>>>>> +rte_vhost_notify_guest(int vid, uint16_t queue_id) {
>>>>> +	struct virtio_net *dev = get_device(vid);
>>>>> +	struct vhost_virtqueue *vq;
>>>>> +
>>>>> +	if (!dev ||  queue_id >= VHOST_MAX_VRING)
>>>>> +		return;
>>>>> +
>>>>> +	vq = dev->virtqueue[queue_id];
>>>>> +	if (!vq)
>>>>> +		return;
>>>>> +
>>>>> +	rte_spinlock_lock(&vq->access_lock);
>>>>> +
>>>>
>>>> Is spin lock needed here before system call ?
>>>
>>> I assumed access_lock is protecting all the following fields in this structure, so I need the lock to read the vq->callfd, however, I can/should move the eventfd_write outside of the lock.
>>
>> The FD might be closed between the check and the call to eventfd_write
>> though, but I agree this is not optimal to call the eventfd_write under
>> the spinlock in your case, as you will block the pmd thread if it tries
>> to enqueue/dequeue packets on this queue, defeating the purpose of this
>> patch.
>>
>> Maybe the solution is to change to read-write locks for the access_lock
>> spinlock. The datapath (rte_vhost_enqueue_burst/rte_vhost_dequeue_burst)
>> and this API would use the read version, meaning they won't lock each
>> other, and the control path (lib/vhost/vhost_user.c) will use the write
>> version.
>>
>> Does that make sense?
> 
> Hi Maxime, I prepped a patch, but not the read/write part yet, https://github.com/chaudron/dpdk/commit/d51c93b4ff08b43daed33e3c0fee193a6d039c25#.
> 
> I was thinking that even a read/write lock does not work (or maybe we need a combination of taking the read and write lock). As we need to update statistics, which need protection.
> For example here you see the split (with just two locks, but the sys call could be called in the read lock):
> 
> https://github.com/chaudron/dpdk/blob/d51c93b4ff08b43daed33e3c0fee193a6d039c25/lib/vhost/vhost.c#L1493
> 
> The best would be to not have a lock when calling the system call, but that does not seem safe. I do not see a clear solution, guess also as I have some trouble understanding the lock rules around vq’s.

The lock is indeed required. Maybe we can use read-lock, and use atomic
operations for counters that could be accessed by several threads?

> 
> Do you have some more insights to share? I can ping you offline and discuss this.

Sure, I'll be happy to discuss more about this.

Thanks,
Maxime

> Cheers,
> 
> Eelco
> 
>>>
>>>>> +	if (vq->callfd >= 0)
>>>>> +		eventfd_write(vq->callfd, (eventfd_t)1);
>>>>> +
>>>>> +	rte_spinlock_unlock(&vq->access_lock);
>>>>> +}
>>>>> +
>>>>
>>>> Thanks.
>>>
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-04-03 15:26 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-27 12:51 [PATCH] vhost: add device op to offload the interrupt kick Eelco Chaudron
2023-03-27 13:21 ` Maxime Coquelin
2023-03-27 14:10   ` Eelco Chaudron
2023-03-27 15:16 ` [EXT] " Gowrishankar Muthukrishnan
2023-03-27 16:04   ` Eelco Chaudron
2023-03-27 16:35     ` Maxime Coquelin
2023-03-28 12:14       ` Eelco Chaudron
2023-04-03 14:51       ` Eelco Chaudron
2023-04-03 15:26         ` Maxime Coquelin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).