DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH v3 0/3] Fix Vhost regressions
@ 2020-07-29  9:19 Maxime Coquelin
  2020-07-29  9:19 ` [dpdk-dev] [PATCH v3 1/3] vhost: fix guest notification setting Maxime Coquelin
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Maxime Coquelin @ 2020-07-29  9:19 UTC (permalink / raw)
  To: dev, matan, chenbo.xia, yong.liu, yinan.wang
  Cc: thomas, ferruh.yigit, david.marchand, Maxime Coquelin

This series aims at fixing the regressions reported by Intel QE.
I managed to reproduce the issues, and this series fixes them.

The two first patches fix the performance regression. They have
been tested by intel QE which confirms the fix.

In this v3, only change is in the third patch, which aims at
avoiding race by assuming the application is not modifying
the rte_epoll_event once it has requested the enablement of the
corresponding interrupt. I tested this v3 with l3fwd-power
example.

Thanks to Intel QE team for all the validation work!
Maxime


Maxime Coquelin (3):
  vhost: fix guest notification setting
  net/vhost: fix queue update
  net/vhost: fix interrupt mode

 drivers/net/vhost/rte_eth_vhost.c | 93 +++++++++++++++++++++++--------
 lib/librte_vhost/vhost.c          | 24 ++++++--
 lib/librte_vhost/vhost.h          |  5 ++
 lib/librte_vhost/vhost_user.c     | 11 +++-
 4 files changed, 103 insertions(+), 30 deletions(-)

-- 
2.26.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [dpdk-dev] [PATCH v3 1/3] vhost: fix guest notification setting
  2020-07-29  9:19 [dpdk-dev] [PATCH v3 0/3] Fix Vhost regressions Maxime Coquelin
@ 2020-07-29  9:19 ` Maxime Coquelin
  2020-07-29  9:19 ` [dpdk-dev] [PATCH v3 2/3] net/vhost: fix queue update Maxime Coquelin
  2020-07-29  9:20 ` [dpdk-dev] [PATCH v3 3/3] net/vhost: fix interrupt mode Maxime Coquelin
  2 siblings, 0 replies; 9+ messages in thread
From: Maxime Coquelin @ 2020-07-29  9:19 UTC (permalink / raw)
  To: dev, matan, chenbo.xia, yong.liu, yinan.wang
  Cc: thomas, ferruh.yigit, david.marchand, Maxime Coquelin

If rte_vhost_enable_guest_notification is called before
the virtqueue is ready, the configuration is lost.

This patch fixes this by saving the guest notification
enablement value requested by the application, and apply
it before the virtqueue is made ready to the application.

Fixes: 604052ae5395 ("net/vhost: support queue update")

Reported-by: Yinan Wang <yinan.wang@intel.com>
Tested-by: Yinan Wang <yinan.wang@intel.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
---
 lib/librte_vhost/vhost.c      | 24 ++++++++++++++++++++----
 lib/librte_vhost/vhost.h      |  5 +++++
 lib/librte_vhost/vhost_user.c | 11 ++++++++---
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 14b3e253e8..8f20a0818f 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -534,6 +534,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 
 	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
 	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+	vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
 
 	vhost_user_iotlb_init(dev, vring_idx);
 	/* Backends are set to -1 indicating an inactive device. */
@@ -1311,6 +1312,23 @@ vhost_enable_notify_packed(struct virtio_net *dev,
 	return 0;
 }
 
+int
+vhost_enable_guest_notification(struct virtio_net *dev,
+		struct vhost_virtqueue *vq, int enable)
+{
+	/*
+	 * If the virtqueue is not ready yet, it will be applied
+	 * when it will become ready.
+	 */
+	if (!vq->ready)
+		return 0;
+
+	if (vq_is_packed(dev))
+		return vhost_enable_notify_packed(dev, vq, enable);
+	else
+		return vhost_enable_notify_split(dev, vq, enable);
+}
+
 int
 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 {
@@ -1325,10 +1343,8 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 
 	rte_spinlock_lock(&vq->access_lock);
 
-	if (vq_is_packed(dev))
-		ret = vhost_enable_notify_packed(dev, vq, enable);
-	else
-		ret = vhost_enable_notify_split(dev, vq, enable);
+	vq->notif_enable = enable;
+	ret = vhost_enable_guest_notification(dev, vq, enable);
 
 	rte_spinlock_unlock(&vq->access_lock);
 
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 0f7212f888..a29c6638e2 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -164,6 +164,9 @@ struct vhost_virtqueue {
 	int			enabled;
 	int			access_ok;
 	int			ready;
+	int			notif_enable;
+#define VIRTIO_UNINITIALIZED_NOTIF	(-1)
+
 	rte_spinlock_t		access_lock;
 
 	/* Used to notify the guest (trigger interrupt) */
@@ -668,6 +671,8 @@ void vhost_enable_dequeue_zero_copy(int vid);
 void vhost_set_builtin_virtio_net(int vid, bool enable);
 void vhost_enable_extbuf(int vid);
 void vhost_enable_linearbuf(int vid);
+int vhost_enable_guest_notification(struct virtio_net *dev,
+		struct vhost_virtqueue *vq, int enable);
 
 struct vhost_device_ops const *vhost_driver_callback_get(const char *path);
 
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 9ddeae3622..c3c924faec 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -235,6 +235,11 @@ vhost_user_notify_queue_state(struct virtio_net *dev, uint16_t index,
 			      int enable)
 {
 	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	struct vhost_virtqueue *vq = dev->virtqueue[index];
+
+	/* Configure guest notifications on enable */
+	if (enable && vq->notif_enable != VIRTIO_UNINITIALIZED_NOTIF)
+		vhost_enable_guest_notification(dev, vq, vq->notif_enable);
 
 	if (vdpa_dev && vdpa_dev->ops->set_vring_state)
 		vdpa_dev->ops->set_vring_state(dev->vid, index, enable);
@@ -1640,8 +1645,8 @@ vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg *msg,
 	vq = dev->virtqueue[file.index];
 
 	if (vq->ready) {
-		vhost_user_notify_queue_state(dev, file.index, 0);
 		vq->ready = 0;
+		vhost_user_notify_queue_state(dev, file.index, 0);
 	}
 
 	if (vq->callfd >= 0)
@@ -1903,8 +1908,8 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg,
 	}
 
 	if (vq->ready) {
-		vhost_user_notify_queue_state(dev, file.index, 0);
 		vq->ready = 0;
+		vhost_user_notify_queue_state(dev, file.index, 0);
 	}
 
 	if (vq->kickfd >= 0)
@@ -2917,8 +2922,8 @@ vhost_user_msg_handler(int vid, int fd)
 		bool cur_ready = vq_is_ready(dev, vq);
 
 		if (cur_ready != (vq && vq->ready)) {
-			vhost_user_notify_queue_state(dev, i, cur_ready);
 			vq->ready = cur_ready;
+			vhost_user_notify_queue_state(dev, i, cur_ready);
 		}
 	}
 
-- 
2.26.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [dpdk-dev] [PATCH v3 2/3] net/vhost: fix queue update
  2020-07-29  9:19 [dpdk-dev] [PATCH v3 0/3] Fix Vhost regressions Maxime Coquelin
  2020-07-29  9:19 ` [dpdk-dev] [PATCH v3 1/3] vhost: fix guest notification setting Maxime Coquelin
@ 2020-07-29  9:19 ` Maxime Coquelin
  2020-07-29  9:20 ` [dpdk-dev] [PATCH v3 3/3] net/vhost: fix interrupt mode Maxime Coquelin
  2 siblings, 0 replies; 9+ messages in thread
From: Maxime Coquelin @ 2020-07-29  9:19 UTC (permalink / raw)
  To: dev, matan, chenbo.xia, yong.liu, yinan.wang
  Cc: thomas, ferruh.yigit, david.marchand, Maxime Coquelin

Now that the vhost library saves the guest notifications
enablement value in its virtqueues metadata, it is not
necessary to do it in the vring_state_changed callback.

One effect of the patch is also to prevent possible
deadlock happening in vhost library.

Fixes: 604052ae5395 ("net/vhost: support queue update")

Reported-by: Yinan Wang <yinan.wang@intel.com>
Tested-by: Yinan Wang <yinan.wang@intel.com>
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/net/vhost/rte_eth_vhost.c | 28 +++++++++-------------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index bbf79b2c0e..951929c663 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -94,7 +94,6 @@ struct vhost_queue {
 	struct rte_mempool *mb_pool;
 	uint16_t port;
 	uint16_t virtqueue_id;
-	bool intr_en;
 	struct vhost_stats stats;
 };
 
@@ -547,8 +546,6 @@ eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
 	rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1);
 	rte_wmb();
 
-	vq->intr_en = true;
-
 	return ret;
 }
 
@@ -574,8 +571,6 @@ eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid)
 	rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0);
 	rte_wmb();
 
-	vq->intr_en = false;
-
 	return 0;
 }
 
@@ -841,7 +836,6 @@ vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
 	struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
 	struct pmd_internal *internal = eth_dev->data->dev_private;
 	struct rte_vhost_vring vring;
-	struct vhost_queue *vq;
 	int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1;
 	int ret = 0;
 
@@ -853,21 +847,17 @@ vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
 	    rte_atomic32_read(&internal->dev_attached) &&
 	    rte_atomic32_read(&internal->started) &&
 	    dev_conf->intr_conf.rxq) {
-		vq = eth_dev->data->rx_queues[rx_idx];
 		ret = rte_vhost_get_vhost_vring(vid, vring_id, &vring);
-		if (!ret) {
-			if (vring.kickfd !=
-			    eth_dev->intr_handle->efds[rx_idx]) {
-				VHOST_LOG(INFO,
-					  "kickfd for rxq-%d was changed.\n",
-					  rx_idx);
-				eth_dev->intr_handle->efds[rx_idx] =
-								   vring.kickfd;
-			}
+		if (ret) {
+			VHOST_LOG(ERR, "Failed to get vring %d information.\n",
+					vring_id);
+			return ret;
+		}
 
-			rte_vhost_enable_guest_notification(vid, vring_id,
-							    vq->intr_en);
-			rte_wmb();
+		if (vring.kickfd != eth_dev->intr_handle->efds[rx_idx]) {
+			VHOST_LOG(INFO, "kickfd for rxq-%d was changed.\n",
+					  rx_idx);
+			eth_dev->intr_handle->efds[rx_idx] = vring.kickfd;
 		}
 	}
 
-- 
2.26.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [dpdk-dev] [PATCH v3 3/3] net/vhost: fix interrupt mode
  2020-07-29  9:19 [dpdk-dev] [PATCH v3 0/3] Fix Vhost regressions Maxime Coquelin
  2020-07-29  9:19 ` [dpdk-dev] [PATCH v3 1/3] vhost: fix guest notification setting Maxime Coquelin
  2020-07-29  9:19 ` [dpdk-dev] [PATCH v3 2/3] net/vhost: fix queue update Maxime Coquelin
@ 2020-07-29  9:20 ` Maxime Coquelin
  2020-07-29 11:27   ` David Marchand
  2020-07-29 12:53   ` Maxime Coquelin
  2 siblings, 2 replies; 9+ messages in thread
From: Maxime Coquelin @ 2020-07-29  9:20 UTC (permalink / raw)
  To: dev, matan, chenbo.xia, yong.liu, yinan.wang
  Cc: thomas, ferruh.yigit, david.marchand, Maxime Coquelin

At .new_device() time, only the first vring pair is
now ready, other vrings are consfigured later.

Problem is that when application will setup and enable
interrupts, only the first queue pair Rx interrupt will
be enabled.

This patches fixes the issue by setting the number of
max interrupts to the number of Rx queues that will be
later initialized. Then, as soon as a Rx vring is ready
and interrupt enabled by the application, it removes the
corresponding uninitialized epoll event, and install a
new one with the valid FD.

Fixes: 604052ae5395 ("net/vhost: support queue update")

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/net/vhost/rte_eth_vhost.c | 75 +++++++++++++++++++++++++++----
 1 file changed, 66 insertions(+), 9 deletions(-)

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 951929c663..237785dd66 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -5,6 +5,7 @@
 #include <unistd.h>
 #include <pthread.h>
 #include <stdbool.h>
+#include <sys/epoll.h>
 
 #include <rte_mbuf.h>
 #include <rte_ethdev_driver.h>
@@ -95,6 +96,8 @@ struct vhost_queue {
 	uint16_t port;
 	uint16_t virtqueue_id;
 	struct vhost_stats stats;
+	int intr_enable;
+	rte_spinlock_t intr_lock;
 };
 
 struct pmd_internal {
@@ -524,6 +527,45 @@ find_internal_resource(char *ifname)
 	return list;
 }
 
+static int
+eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx)
+{
+	struct rte_intr_handle *handle = eth_dev->intr_handle;
+	struct rte_epoll_event rev;
+	int epfd, ret;
+
+	if (handle->efds[rxq_idx] == handle->elist[rxq_idx].fd)
+		return 0;
+
+	VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n",
+			rxq_idx);
+
+	/*
+	 * First remove invalid epoll event, and then isntall
+	 * the new one. May be solved with a proper API in the
+	 * future.
+	 */
+	epfd = handle->elist[rxq_idx].epfd;
+	rev = handle->elist[rxq_idx];
+	ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd,
+			&handle->elist[rxq_idx]);
+	if (ret) {
+		VHOST_LOG(ERR, "Delete epoll event failed.\n");
+		return ret;
+	}
+
+	rev.fd = handle->efds[rxq_idx];
+	handle->elist[rxq_idx] = rev;
+	ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd,
+			&handle->elist[rxq_idx]);
+	if (ret) {
+		VHOST_LOG(ERR, "Add epoll event failed.\n");
+		return ret;
+	}
+
+	return 0;
+}
+
 static int
 eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
 {
@@ -537,6 +579,11 @@ eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
 		return -1;
 	}
 
+	rte_spinlock_lock(&vq->intr_lock);
+	vq->intr_enable = 1;
+	ret = eth_vhost_update_intr(dev, qid);
+	rte_spinlock_unlock(&vq->intr_lock);
+
 	ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
 	if (ret < 0) {
 		VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);
@@ -571,6 +618,8 @@ eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid)
 	rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0);
 	rte_wmb();
 
+	vq->intr_enable = 0;
+
 	return 0;
 }
 
@@ -593,7 +642,6 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
 {
 	struct rte_vhost_vring vring;
 	struct vhost_queue *vq;
-	int count = 0;
 	int nb_rxq = dev->data->nb_rx_queues;
 	int i;
 	int ret;
@@ -623,6 +671,8 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
 
 	VHOST_LOG(INFO, "Prepare intr vec\n");
 	for (i = 0; i < nb_rxq; i++) {
+		dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i;
+		dev->intr_handle->efds[i] = -1;
 		vq = dev->data->rx_queues[i];
 		if (!vq) {
 			VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i);
@@ -641,14 +691,12 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
 				"rxq-%d's kickfd is invalid, skip!\n", i);
 			continue;
 		}
-		dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i;
 		dev->intr_handle->efds[i] = vring.kickfd;
-		count++;
 		VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i);
 	}
 
-	dev->intr_handle->nb_efd = count;
-	dev->intr_handle->max_intr = count + 1;
+	dev->intr_handle->nb_efd = nb_rxq;
+	dev->intr_handle->max_intr = nb_rxq + 1;
 	dev->intr_handle->type = RTE_INTR_HANDLE_VDEV;
 
 	return 0;
@@ -835,6 +883,7 @@ vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
 {
 	struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
 	struct pmd_internal *internal = eth_dev->data->dev_private;
+	struct vhost_queue *vq;
 	struct rte_vhost_vring vring;
 	int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1;
 	int ret = 0;
@@ -853,12 +902,18 @@ vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
 					vring_id);
 			return ret;
 		}
+		eth_dev->intr_handle->efds[rx_idx] = vring.kickfd;
 
-		if (vring.kickfd != eth_dev->intr_handle->efds[rx_idx]) {
-			VHOST_LOG(INFO, "kickfd for rxq-%d was changed.\n",
-					  rx_idx);
-			eth_dev->intr_handle->efds[rx_idx] = vring.kickfd;
+		vq = eth_dev->data->rx_queues[rx_idx];
+		if (!vq) {
+			VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx);
+			return -1;
 		}
+
+		rte_spinlock_lock(&vq->intr_lock);
+		if (vq->intr_enable)
+			ret = eth_vhost_update_intr(eth_dev, rx_idx);
+		rte_spinlock_unlock(&vq->intr_lock);
 	}
 
 	return ret;
@@ -1152,6 +1207,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
 
 	vq->mb_pool = mb_pool;
 	vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
+	rte_spinlock_init(&vq->intr_lock);
 	dev->data->rx_queues[rx_queue_id] = vq;
 
 	return 0;
@@ -1173,6 +1229,7 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
 	}
 
 	vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
+	rte_spinlock_init(&vq->intr_lock);
 	dev->data->tx_queues[tx_queue_id] = vq;
 
 	return 0;
-- 
2.26.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/3] net/vhost: fix interrupt mode
  2020-07-29  9:20 ` [dpdk-dev] [PATCH v3 3/3] net/vhost: fix interrupt mode Maxime Coquelin
@ 2020-07-29 11:27   ` David Marchand
  2020-07-29 13:19     ` Maxime Coquelin
  2020-07-29 12:53   ` Maxime Coquelin
  1 sibling, 1 reply; 9+ messages in thread
From: David Marchand @ 2020-07-29 11:27 UTC (permalink / raw)
  To: Maxime Coquelin
  Cc: dev, Matan Azrad, Xia, Chenbo, Marvin Liu, Wang, Yinan,
	Thomas Monjalon, Yigit, Ferruh

On Wed, Jul 29, 2020 at 11:20 AM Maxime Coquelin
<maxime.coquelin@redhat.com> wrote:
>
> At .new_device() time, only the first vring pair is
> now ready, other vrings are consfigured later.

configured*

>
> Problem is that when application will setup and enable
> interrupts, only the first queue pair Rx interrupt will
> be enabled.
>
> This patches fixes the issue by setting the number of
> max interrupts to the number of Rx queues that will be
> later initialized. Then, as soon as a Rx vring is ready
> and interrupt enabled by the application, it removes the
> corresponding uninitialized epoll event, and install a

installs*

> new one with the valid FD.
>
> Fixes: 604052ae5395 ("net/vhost: support queue update")
>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>

It seems a bit of a hack, but I _think_ the patch is good wrt races on
epoll configuration.

We are only touching the vhost pmd, in interrupt mode.
The interrupt mode is not that frequently used (I found no usage in
opensource projects).
The vhost pmd is not used in OVS as it lags behind the vhost library
and has limitations.

So my opinion is that the risk of taking this patch rather than
reverting the changes (which is not trivial iiuc) in the vhost library
is acceptable.


One comment below:

> ---
>  drivers/net/vhost/rte_eth_vhost.c | 75 +++++++++++++++++++++++++++----
>  1 file changed, 66 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
> index 951929c663..237785dd66 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -5,6 +5,7 @@
>  #include <unistd.h>
>  #include <pthread.h>
>  #include <stdbool.h>
> +#include <sys/epoll.h>
>
>  #include <rte_mbuf.h>
>  #include <rte_ethdev_driver.h>
> @@ -95,6 +96,8 @@ struct vhost_queue {
>         uint16_t port;
>         uint16_t virtqueue_id;
>         struct vhost_stats stats;
> +       int intr_enable;
> +       rte_spinlock_t intr_lock;
>  };
>
>  struct pmd_internal {
> @@ -524,6 +527,45 @@ find_internal_resource(char *ifname)
>         return list;
>  }
>
> +static int
> +eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx)
> +{
> +       struct rte_intr_handle *handle = eth_dev->intr_handle;
> +       struct rte_epoll_event rev;
> +       int epfd, ret;
> +
> +       if (handle->efds[rxq_idx] == handle->elist[rxq_idx].fd)
> +               return 0;

Feel free to ignore if this situation can not happen.

We are expecting only -1 -> valid fd transitions.
Maybe add an error log if we are in another situation?
This would indicate something quite broken.



> +
> +       VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n",
> +                       rxq_idx);
> +
> +       /*
> +        * First remove invalid epoll event, and then isntall
> +        * the new one. May be solved with a proper API in the
> +        * future.
> +        */
> +       epfd = handle->elist[rxq_idx].epfd;
> +       rev = handle->elist[rxq_idx];
> +       ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd,
> +                       &handle->elist[rxq_idx]);
> +       if (ret) {
> +               VHOST_LOG(ERR, "Delete epoll event failed.\n");
> +               return ret;
> +       }
> +
> +       rev.fd = handle->efds[rxq_idx];
> +       handle->elist[rxq_idx] = rev;
> +       ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd,
> +                       &handle->elist[rxq_idx]);
> +       if (ret) {
> +               VHOST_LOG(ERR, "Add epoll event failed.\n");
> +               return ret;
> +       }
> +
> +       return 0;
> +}
> +
>  static int
>  eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
>  {
> @@ -537,6 +579,11 @@ eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
>                 return -1;
>         }
>
> +       rte_spinlock_lock(&vq->intr_lock);
> +       vq->intr_enable = 1;
> +       ret = eth_vhost_update_intr(dev, qid);
> +       rte_spinlock_unlock(&vq->intr_lock);
> +
>         ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
>         if (ret < 0) {
>                 VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);
> @@ -571,6 +618,8 @@ eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid)
>         rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0);
>         rte_wmb();
>
> +       vq->intr_enable = 0;
> +
>         return 0;
>  }
>
> @@ -593,7 +642,6 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
>  {
>         struct rte_vhost_vring vring;
>         struct vhost_queue *vq;
> -       int count = 0;
>         int nb_rxq = dev->data->nb_rx_queues;
>         int i;
>         int ret;
> @@ -623,6 +671,8 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
>
>         VHOST_LOG(INFO, "Prepare intr vec\n");
>         for (i = 0; i < nb_rxq; i++) {
> +               dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i;
> +               dev->intr_handle->efds[i] = -1;
>                 vq = dev->data->rx_queues[i];
>                 if (!vq) {
>                         VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i);
> @@ -641,14 +691,12 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
>                                 "rxq-%d's kickfd is invalid, skip!\n", i);
>                         continue;
>                 }
> -               dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i;
>                 dev->intr_handle->efds[i] = vring.kickfd;
> -               count++;
>                 VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i);
>         }
>
> -       dev->intr_handle->nb_efd = count;
> -       dev->intr_handle->max_intr = count + 1;
> +       dev->intr_handle->nb_efd = nb_rxq;
> +       dev->intr_handle->max_intr = nb_rxq + 1;
>         dev->intr_handle->type = RTE_INTR_HANDLE_VDEV;
>
>         return 0;
> @@ -835,6 +883,7 @@ vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
>  {
>         struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
>         struct pmd_internal *internal = eth_dev->data->dev_private;
> +       struct vhost_queue *vq;
>         struct rte_vhost_vring vring;
>         int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1;
>         int ret = 0;
> @@ -853,12 +902,18 @@ vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
>                                         vring_id);
>                         return ret;
>                 }
> +               eth_dev->intr_handle->efds[rx_idx] = vring.kickfd;
>
> -               if (vring.kickfd != eth_dev->intr_handle->efds[rx_idx]) {
> -                       VHOST_LOG(INFO, "kickfd for rxq-%d was changed.\n",
> -                                         rx_idx);
> -                       eth_dev->intr_handle->efds[rx_idx] = vring.kickfd;
> +               vq = eth_dev->data->rx_queues[rx_idx];
> +               if (!vq) {
> +                       VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx);
> +                       return -1;
>                 }
> +
> +               rte_spinlock_lock(&vq->intr_lock);
> +               if (vq->intr_enable)
> +                       ret = eth_vhost_update_intr(eth_dev, rx_idx);
> +               rte_spinlock_unlock(&vq->intr_lock);
>         }
>
>         return ret;
> @@ -1152,6 +1207,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
>
>         vq->mb_pool = mb_pool;
>         vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
> +       rte_spinlock_init(&vq->intr_lock);
>         dev->data->rx_queues[rx_queue_id] = vq;
>
>         return 0;
> @@ -1173,6 +1229,7 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
>         }
>
>         vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
> +       rte_spinlock_init(&vq->intr_lock);
>         dev->data->tx_queues[tx_queue_id] = vq;
>
>         return 0;
> --
> 2.26.2
>



--
David Marchand


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/3] net/vhost: fix interrupt mode
  2020-07-29  9:20 ` [dpdk-dev] [PATCH v3 3/3] net/vhost: fix interrupt mode Maxime Coquelin
  2020-07-29 11:27   ` David Marchand
@ 2020-07-29 12:53   ` Maxime Coquelin
  2020-07-29 13:24     ` Xia, Chenbo
  1 sibling, 1 reply; 9+ messages in thread
From: Maxime Coquelin @ 2020-07-29 12:53 UTC (permalink / raw)
  To: dev, matan, chenbo.xia, yong.liu, yinan.wang
  Cc: thomas, ferruh.yigit, david.marchand



On 7/29/20 11:20 AM, Maxime Coquelin wrote:
> At .new_device() time, only the first vring pair is
> now ready, other vrings are consfigured later.
> 
> Problem is that when application will setup and enable
> interrupts, only the first queue pair Rx interrupt will
> be enabled.
> 
> This patches fixes the issue by setting the number of
> max interrupts to the number of Rx queues that will be
> later initialized. Then, as soon as a Rx vring is ready
> and interrupt enabled by the application, it removes the
> corresponding uninitialized epoll event, and install a
> new one with the valid FD.
> 
> Fixes: 604052ae5395 ("net/vhost: support queue update")
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  drivers/net/vhost/rte_eth_vhost.c | 75 +++++++++++++++++++++++++++----
>  1 file changed, 66 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
> index 951929c663..237785dd66 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -5,6 +5,7 @@
>  #include <unistd.h>
>  #include <pthread.h>
>  #include <stdbool.h>
> +#include <sys/epoll.h>
>  
>  #include <rte_mbuf.h>
>  #include <rte_ethdev_driver.h>
> @@ -95,6 +96,8 @@ struct vhost_queue {
>  	uint16_t port;
>  	uint16_t virtqueue_id;
>  	struct vhost_stats stats;
> +	int intr_enable;
> +	rte_spinlock_t intr_lock;
>  };
>  
>  struct pmd_internal {
> @@ -524,6 +527,45 @@ find_internal_resource(char *ifname)
>  	return list;
>  }
>  
> +static int
> +eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx)
> +{
> +	struct rte_intr_handle *handle = eth_dev->intr_handle;
> +	struct rte_epoll_event rev;
> +	int epfd, ret;
> +

Chenbo reported that we can have a NULL pointer dereference on handle
when using Virtio-user on the other end and quitting.



> +	if (handle->efds[rxq_idx] == handle->elist[rxq_idx].fd)
> +		return 0;
> +
> +	VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n",
> +			rxq_idx);
> +
> +	/*
> +	 * First remove invalid epoll event, and then isntall
> +	 * the new one. May be solved with a proper API in the
> +	 * future.
> +	 */
> +	epfd = handle->elist[rxq_idx].epfd;
> +	rev = handle->elist[rxq_idx];
> +	ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd,
> +			&handle->elist[rxq_idx]);
> +	if (ret) {
> +		VHOST_LOG(ERR, "Delete epoll event failed.\n");
> +		return ret;
> +	}
> +
> +	rev.fd = handle->efds[rxq_idx];
> +	handle->elist[rxq_idx] = rev;
> +	ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd,
> +			&handle->elist[rxq_idx]);
> +	if (ret) {
> +		VHOST_LOG(ERR, "Add epoll event failed.\n");
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
>  static int
>  eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
>  {
> @@ -537,6 +579,11 @@ eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
>  		return -1;
>  	}
>  
> +	rte_spinlock_lock(&vq->intr_lock);
> +	vq->intr_enable = 1;
> +	ret = eth_vhost_update_intr(dev, qid);
> +	rte_spinlock_unlock(&vq->intr_lock);
> +

I missed to check ret value here, will add it in v4.

>  	ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
>  	if (ret < 0) {
>  		VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/3] net/vhost: fix interrupt mode
  2020-07-29 11:27   ` David Marchand
@ 2020-07-29 13:19     ` Maxime Coquelin
  0 siblings, 0 replies; 9+ messages in thread
From: Maxime Coquelin @ 2020-07-29 13:19 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, Matan Azrad, Xia, Chenbo, Marvin Liu, Wang, Yinan,
	Thomas Monjalon, Yigit, Ferruh



On 7/29/20 1:27 PM, David Marchand wrote:
> On Wed, Jul 29, 2020 at 11:20 AM Maxime Coquelin
> <maxime.coquelin@redhat.com> wrote:
>>
>> At .new_device() time, only the first vring pair is
>> now ready, other vrings are consfigured later.
> 
> configured*
> 
>>
>> Problem is that when application will setup and enable
>> interrupts, only the first queue pair Rx interrupt will
>> be enabled.
>>
>> This patches fixes the issue by setting the number of
>> max interrupts to the number of Rx queues that will be
>> later initialized. Then, as soon as a Rx vring is ready
>> and interrupt enabled by the application, it removes the
>> corresponding uninitialized epoll event, and install a
> 
> installs*
> 
>> new one with the valid FD.
>>
>> Fixes: 604052ae5395 ("net/vhost: support queue update")
>>
>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> 
> It seems a bit of a hack, but I _think_ the patch is good wrt races on
> epoll configuration.
> 
> We are only touching the vhost pmd, in interrupt mode.
> The interrupt mode is not that frequently used (I found no usage in
> opensource projects).
> The vhost pmd is not used in OVS as it lags behind the vhost library
> and has limitations.
> 
> So my opinion is that the risk of taking this patch rather than
> reverting the changes (which is not trivial iiuc) in the vhost library
> is acceptable.
> 
> 
> One comment below:
> 
>> ---
>>  drivers/net/vhost/rte_eth_vhost.c | 75 +++++++++++++++++++++++++++----
>>  1 file changed, 66 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
>> index 951929c663..237785dd66 100644
>> --- a/drivers/net/vhost/rte_eth_vhost.c
>> +++ b/drivers/net/vhost/rte_eth_vhost.c
>> @@ -5,6 +5,7 @@
>>  #include <unistd.h>
>>  #include <pthread.h>
>>  #include <stdbool.h>
>> +#include <sys/epoll.h>
>>
>>  #include <rte_mbuf.h>
>>  #include <rte_ethdev_driver.h>
>> @@ -95,6 +96,8 @@ struct vhost_queue {
>>         uint16_t port;
>>         uint16_t virtqueue_id;
>>         struct vhost_stats stats;
>> +       int intr_enable;
>> +       rte_spinlock_t intr_lock;
>>  };
>>
>>  struct pmd_internal {
>> @@ -524,6 +527,45 @@ find_internal_resource(char *ifname)
>>         return list;
>>  }
>>
>> +static int
>> +eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx)
>> +{
>> +       struct rte_intr_handle *handle = eth_dev->intr_handle;
>> +       struct rte_epoll_event rev;
>> +       int epfd, ret;
>> +
>> +       if (handle->efds[rxq_idx] == handle->elist[rxq_idx].fd)
>> +               return 0;
> 
> Feel free to ignore if this situation can not happen.
> 
> We are expecting only -1 -> valid fd transitions.
> Maybe add an error log if we are in another situation?
> This would indicate something quite broken.

That's a very good idea, I will add such warning in v4.

Thanks,
Maxime

> 
> 
>> +
>> +       VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n",
>> +                       rxq_idx);
>> +
>> +       /*
>> +        * First remove invalid epoll event, and then isntall
>> +        * the new one. May be solved with a proper API in the
>> +        * future.
>> +        */
>> +       epfd = handle->elist[rxq_idx].epfd;
>> +       rev = handle->elist[rxq_idx];
>> +       ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd,
>> +                       &handle->elist[rxq_idx]);
>> +       if (ret) {
>> +               VHOST_LOG(ERR, "Delete epoll event failed.\n");
>> +               return ret;
>> +       }
>> +
>> +       rev.fd = handle->efds[rxq_idx];
>> +       handle->elist[rxq_idx] = rev;
>> +       ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd,
>> +                       &handle->elist[rxq_idx]);
>> +       if (ret) {
>> +               VHOST_LOG(ERR, "Add epoll event failed.\n");
>> +               return ret;
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>>  static int
>>  eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
>>  {
>> @@ -537,6 +579,11 @@ eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
>>                 return -1;
>>         }
>>
>> +       rte_spinlock_lock(&vq->intr_lock);
>> +       vq->intr_enable = 1;
>> +       ret = eth_vhost_update_intr(dev, qid);
>> +       rte_spinlock_unlock(&vq->intr_lock);
>> +
>>         ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
>>         if (ret < 0) {
>>                 VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);
>> @@ -571,6 +618,8 @@ eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid)
>>         rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0);
>>         rte_wmb();
>>
>> +       vq->intr_enable = 0;
>> +
>>         return 0;
>>  }
>>
>> @@ -593,7 +642,6 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
>>  {
>>         struct rte_vhost_vring vring;
>>         struct vhost_queue *vq;
>> -       int count = 0;
>>         int nb_rxq = dev->data->nb_rx_queues;
>>         int i;
>>         int ret;
>> @@ -623,6 +671,8 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
>>
>>         VHOST_LOG(INFO, "Prepare intr vec\n");
>>         for (i = 0; i < nb_rxq; i++) {
>> +               dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i;
>> +               dev->intr_handle->efds[i] = -1;
>>                 vq = dev->data->rx_queues[i];
>>                 if (!vq) {
>>                         VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i);
>> @@ -641,14 +691,12 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
>>                                 "rxq-%d's kickfd is invalid, skip!\n", i);
>>                         continue;
>>                 }
>> -               dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i;
>>                 dev->intr_handle->efds[i] = vring.kickfd;
>> -               count++;
>>                 VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i);
>>         }
>>
>> -       dev->intr_handle->nb_efd = count;
>> -       dev->intr_handle->max_intr = count + 1;
>> +       dev->intr_handle->nb_efd = nb_rxq;
>> +       dev->intr_handle->max_intr = nb_rxq + 1;
>>         dev->intr_handle->type = RTE_INTR_HANDLE_VDEV;
>>
>>         return 0;
>> @@ -835,6 +883,7 @@ vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
>>  {
>>         struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
>>         struct pmd_internal *internal = eth_dev->data->dev_private;
>> +       struct vhost_queue *vq;
>>         struct rte_vhost_vring vring;
>>         int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1;
>>         int ret = 0;
>> @@ -853,12 +902,18 @@ vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
>>                                         vring_id);
>>                         return ret;
>>                 }
>> +               eth_dev->intr_handle->efds[rx_idx] = vring.kickfd;
>>
>> -               if (vring.kickfd != eth_dev->intr_handle->efds[rx_idx]) {
>> -                       VHOST_LOG(INFO, "kickfd for rxq-%d was changed.\n",
>> -                                         rx_idx);
>> -                       eth_dev->intr_handle->efds[rx_idx] = vring.kickfd;
>> +               vq = eth_dev->data->rx_queues[rx_idx];
>> +               if (!vq) {
>> +                       VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx);
>> +                       return -1;
>>                 }
>> +
>> +               rte_spinlock_lock(&vq->intr_lock);
>> +               if (vq->intr_enable)
>> +                       ret = eth_vhost_update_intr(eth_dev, rx_idx);
>> +               rte_spinlock_unlock(&vq->intr_lock);
>>         }
>>
>>         return ret;
>> @@ -1152,6 +1207,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
>>
>>         vq->mb_pool = mb_pool;
>>         vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
>> +       rte_spinlock_init(&vq->intr_lock);
>>         dev->data->rx_queues[rx_queue_id] = vq;
>>
>>         return 0;
>> @@ -1173,6 +1229,7 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
>>         }
>>
>>         vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
>> +       rte_spinlock_init(&vq->intr_lock);
>>         dev->data->tx_queues[tx_queue_id] = vq;
>>
>>         return 0;
>> --
>> 2.26.2
>>
> 
> 
> 
> --
> David Marchand
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/3] net/vhost: fix interrupt mode
  2020-07-29 12:53   ` Maxime Coquelin
@ 2020-07-29 13:24     ` Xia, Chenbo
  2020-07-29 13:27       ` Maxime Coquelin
  0 siblings, 1 reply; 9+ messages in thread
From: Xia, Chenbo @ 2020-07-29 13:24 UTC (permalink / raw)
  To: Maxime Coquelin, dev, matan, Liu, Yong, Wang, Yinan
  Cc: thomas, Yigit, Ferruh, david.marchand

Hi Maxime,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Wednesday, July 29, 2020 8:54 PM
> To: dev@dpdk.org; matan@mellanox.com; Xia, Chenbo
> <chenbo.xia@intel.com>; Liu, Yong <yong.liu@intel.com>; Wang, Yinan
> <yinan.wang@intel.com>
> Cc: thomas@monjalon.net; Yigit, Ferruh <ferruh.yigit@intel.com>;
> david.marchand@redhat.com
> Subject: Re: [PATCH v3 3/3] net/vhost: fix interrupt mode
> 
> 
> 
> On 7/29/20 11:20 AM, Maxime Coquelin wrote:
> > At .new_device() time, only the first vring pair is now ready, other
> > vrings are consfigured later.
> >
> > Problem is that when application will setup and enable interrupts,
> > only the first queue pair Rx interrupt will be enabled.
> >
> > This patches fixes the issue by setting the number of max interrupts
> > to the number of Rx queues that will be later initialized. Then, as
> > soon as a Rx vring is ready and interrupt enabled by the application,
> > it removes the corresponding uninitialized epoll event, and install a
> > new one with the valid FD.
> >
> > Fixes: 604052ae5395 ("net/vhost: support queue update")
> >
> > Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > ---
> >  drivers/net/vhost/rte_eth_vhost.c | 75
> > +++++++++++++++++++++++++++----
> >  1 file changed, 66 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/net/vhost/rte_eth_vhost.c
> > b/drivers/net/vhost/rte_eth_vhost.c
> > index 951929c663..237785dd66 100644
> > --- a/drivers/net/vhost/rte_eth_vhost.c
> > +++ b/drivers/net/vhost/rte_eth_vhost.c
> > @@ -5,6 +5,7 @@
> >  #include <unistd.h>
> >  #include <pthread.h>
> >  #include <stdbool.h>
> > +#include <sys/epoll.h>
> >
> >  #include <rte_mbuf.h>
> >  #include <rte_ethdev_driver.h>
> > @@ -95,6 +96,8 @@ struct vhost_queue {
> >  	uint16_t port;
> >  	uint16_t virtqueue_id;
> >  	struct vhost_stats stats;
> > +	int intr_enable;
> > +	rte_spinlock_t intr_lock;
> >  };
> >
> >  struct pmd_internal {
> > @@ -524,6 +527,45 @@ find_internal_resource(char *ifname)
> >  	return list;
> >  }
> >
> > +static int
> > +eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx)
> > +{
> > +	struct rte_intr_handle *handle = eth_dev->intr_handle;
> > +	struct rte_epoll_event rev;
> > +	int epfd, ret;
> > +
> 
> Chenbo reported that we can have a NULL pointer dereference on handle when
> using Virtio-user on the other end and quitting.

To clarity the root cause, it's because 'destroy_device' calls 'eth_vhost_uninstall_intr'
when connection lost but l3fwd-power APP are still enabling intr.

Thanks,
Chenbo

> 
> 
> 
> > +	if (handle->efds[rxq_idx] == handle->elist[rxq_idx].fd)
> > +		return 0;
> > +
> > +	VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating
> handler.\n",
> > +			rxq_idx);
> > +
> > +	/*
> > +	 * First remove invalid epoll event, and then isntall
> > +	 * the new one. May be solved with a proper API in the
> > +	 * future.
> > +	 */
> > +	epfd = handle->elist[rxq_idx].epfd;
> > +	rev = handle->elist[rxq_idx];
> > +	ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd,
> > +			&handle->elist[rxq_idx]);
> > +	if (ret) {
> > +		VHOST_LOG(ERR, "Delete epoll event failed.\n");
> > +		return ret;
> > +	}
> > +
> > +	rev.fd = handle->efds[rxq_idx];
> > +	handle->elist[rxq_idx] = rev;
> > +	ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd,
> > +			&handle->elist[rxq_idx]);
> > +	if (ret) {
> > +		VHOST_LOG(ERR, "Add epoll event failed.\n");
> > +		return ret;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  static int
> >  eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)  { @@
> > -537,6 +579,11 @@ eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
> >  		return -1;
> >  	}
> >
> > +	rte_spinlock_lock(&vq->intr_lock);
> > +	vq->intr_enable = 1;
> > +	ret = eth_vhost_update_intr(dev, qid);
> > +	rte_spinlock_unlock(&vq->intr_lock);
> > +
> 
> I missed to check ret value here, will add it in v4.
> 
> >  	ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
> >  	if (ret < 0) {
> >  		VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/3] net/vhost: fix interrupt mode
  2020-07-29 13:24     ` Xia, Chenbo
@ 2020-07-29 13:27       ` Maxime Coquelin
  0 siblings, 0 replies; 9+ messages in thread
From: Maxime Coquelin @ 2020-07-29 13:27 UTC (permalink / raw)
  To: Xia, Chenbo, dev, matan, Liu, Yong, Wang, Yinan
  Cc: thomas, Yigit, Ferruh, david.marchand



On 7/29/20 3:24 PM, Xia, Chenbo wrote:
> Hi Maxime,
> 
>> -----Original Message-----
>> From: Maxime Coquelin <maxime.coquelin@redhat.com>
>> Sent: Wednesday, July 29, 2020 8:54 PM
>> To: dev@dpdk.org; matan@mellanox.com; Xia, Chenbo
>> <chenbo.xia@intel.com>; Liu, Yong <yong.liu@intel.com>; Wang, Yinan
>> <yinan.wang@intel.com>
>> Cc: thomas@monjalon.net; Yigit, Ferruh <ferruh.yigit@intel.com>;
>> david.marchand@redhat.com
>> Subject: Re: [PATCH v3 3/3] net/vhost: fix interrupt mode
>>
>>
>>
>> On 7/29/20 11:20 AM, Maxime Coquelin wrote:
>>> At .new_device() time, only the first vring pair is now ready, other
>>> vrings are consfigured later.
>>>
>>> Problem is that when application will setup and enable interrupts,
>>> only the first queue pair Rx interrupt will be enabled.
>>>
>>> This patches fixes the issue by setting the number of max interrupts
>>> to the number of Rx queues that will be later initialized. Then, as
>>> soon as a Rx vring is ready and interrupt enabled by the application,
>>> it removes the corresponding uninitialized epoll event, and install a
>>> new one with the valid FD.
>>>
>>> Fixes: 604052ae5395 ("net/vhost: support queue update")
>>>
>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>> ---
>>>  drivers/net/vhost/rte_eth_vhost.c | 75
>>> +++++++++++++++++++++++++++----
>>>  1 file changed, 66 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/net/vhost/rte_eth_vhost.c
>>> b/drivers/net/vhost/rte_eth_vhost.c
>>> index 951929c663..237785dd66 100644
>>> --- a/drivers/net/vhost/rte_eth_vhost.c
>>> +++ b/drivers/net/vhost/rte_eth_vhost.c
>>> @@ -5,6 +5,7 @@
>>>  #include <unistd.h>
>>>  #include <pthread.h>
>>>  #include <stdbool.h>
>>> +#include <sys/epoll.h>
>>>
>>>  #include <rte_mbuf.h>
>>>  #include <rte_ethdev_driver.h>
>>> @@ -95,6 +96,8 @@ struct vhost_queue {
>>>  	uint16_t port;
>>>  	uint16_t virtqueue_id;
>>>  	struct vhost_stats stats;
>>> +	int intr_enable;
>>> +	rte_spinlock_t intr_lock;
>>>  };
>>>
>>>  struct pmd_internal {
>>> @@ -524,6 +527,45 @@ find_internal_resource(char *ifname)
>>>  	return list;
>>>  }
>>>
>>> +static int
>>> +eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx)
>>> +{
>>> +	struct rte_intr_handle *handle = eth_dev->intr_handle;
>>> +	struct rte_epoll_event rev;
>>> +	int epfd, ret;
>>> +
>>
>> Chenbo reported that we can have a NULL pointer dereference on handle when
>> using Virtio-user on the other end and quitting.
> 
> To clarity the root cause, it's because 'destroy_device' calls 'eth_vhost_uninstall_intr'
> when connection lost but l3fwd-power APP are still enabling intr.

Agree, l3fwd-power will need some rework, as even with fixing this, we
get flooded with error logs because the app tried to enable the intr in
loop once the frontend has disconnected. But this is also reproducible
in v20.05.

> Thanks,
> Chenbo
> 
>>
>>
>>
>>> +	if (handle->efds[rxq_idx] == handle->elist[rxq_idx].fd)
>>> +		return 0;
>>> +
>>> +	VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating
>> handler.\n",
>>> +			rxq_idx);
>>> +
>>> +	/*
>>> +	 * First remove invalid epoll event, and then isntall
>>> +	 * the new one. May be solved with a proper API in the
>>> +	 * future.
>>> +	 */
>>> +	epfd = handle->elist[rxq_idx].epfd;
>>> +	rev = handle->elist[rxq_idx];
>>> +	ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd,
>>> +			&handle->elist[rxq_idx]);
>>> +	if (ret) {
>>> +		VHOST_LOG(ERR, "Delete epoll event failed.\n");
>>> +		return ret;
>>> +	}
>>> +
>>> +	rev.fd = handle->efds[rxq_idx];
>>> +	handle->elist[rxq_idx] = rev;
>>> +	ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd,
>>> +			&handle->elist[rxq_idx]);
>>> +	if (ret) {
>>> +		VHOST_LOG(ERR, "Add epoll event failed.\n");
>>> +		return ret;
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>  static int
>>>  eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)  { @@
>>> -537,6 +579,11 @@ eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
>>>  		return -1;
>>>  	}
>>>
>>> +	rte_spinlock_lock(&vq->intr_lock);
>>> +	vq->intr_enable = 1;
>>> +	ret = eth_vhost_update_intr(dev, qid);
>>> +	rte_spinlock_unlock(&vq->intr_lock);
>>> +
>>
>> I missed to check ret value here, will add it in v4.
>>
>>>  	ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
>>>  	if (ret < 0) {
>>>  		VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-07-29 13:27 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-29  9:19 [dpdk-dev] [PATCH v3 0/3] Fix Vhost regressions Maxime Coquelin
2020-07-29  9:19 ` [dpdk-dev] [PATCH v3 1/3] vhost: fix guest notification setting Maxime Coquelin
2020-07-29  9:19 ` [dpdk-dev] [PATCH v3 2/3] net/vhost: fix queue update Maxime Coquelin
2020-07-29  9:20 ` [dpdk-dev] [PATCH v3 3/3] net/vhost: fix interrupt mode Maxime Coquelin
2020-07-29 11:27   ` David Marchand
2020-07-29 13:19     ` Maxime Coquelin
2020-07-29 12:53   ` Maxime Coquelin
2020-07-29 13:24     ` Xia, Chenbo
2020-07-29 13:27       ` Maxime Coquelin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).