patches for DPDK stable branches
 help / color / mirror / Atom feed
* [PATCH 20.11] net/vhost: fix Rx interrupt
@ 2023-03-22 13:25 David Marchand
  0 siblings, 0 replies; only message in thread
From: David Marchand @ 2023-03-22 13:25 UTC (permalink / raw)
  To: stable, luca.boccassi
  Cc: Chenbo Xia, Dukai Yuan, Maxime Coquelin, Junjie Chen,
	Jianfeng Tan, Hyong Youb Kim, Harman Kalra

[ upstream commit 7995bf4c295464c3a4c3f4ac500f890ab55567e7 ]

In the situation when a port was started while no virtio driver was
connected, Rx interrupts were broken.
They were also broken after a virtio driver reconnects.

There were several issues mixed in:
- this driver was not exposing a fixed file descriptor per Rx queue,
  If a virtio driver was not connected yet, each Rx queue vector was
  pointing at a -1 fd, and an application could interpret this as a lack
  of Rx interrupt support,
- when a virtio driver later (re)connected, this net/vhost driver was
  hacking into the EAL layer epoll fd to remove a old vring kickfd and
  insert the new vring kickfd. This hack constitutes a layer violation
  plus users of rte_eth_dev_rx_intr_ctl_q_get_fd() were not notified of
  this change,
- in the case of reconnection, because the interrupt handle was
  reallocated, a 0 fd was failing to be removed from the EAL layer
  epoll fd, which resulted in never fixing the EAL epoll fd,

To fix Rx interrupts:
- allocating (eth_vhost_install_intr) / releasing
  (eth_vhost_uninstall_intr) the interrupt handle is moved when
  starting / closing the port, while setting / resetting per rxq fd is
  triggered by vhost events via some new helpers (see
  eth_vhost_configure_intr and eth_vhost_unconfigure_intr),
- a "proxy" epoll fd is created per Rx queue at the time the interrupt
  handle is allocated, so applications can start waiting for events on
  those fds, even before a virtio driver initialises,
- when available, vring kickd are populated in the "proxy" epoll fd,

Bugzilla ID: 1135
Fixes: 3f8ff12821e4 ("vhost: support interrupt mode")
Fixes: 3d4cd4be577c ("net/vhost: fix interrupt mode")
Fixes: d61138d4f0e2 ("drivers: remove direct access to interrupt handle")

Signed-off-by: David Marchand <david.marchand@redhat.com>
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
Tested-by: Dukai Yuan <dukaix.yuan@intel.com>
---
Some notes on this backport.

The main reason for the conflict is the interrupt handle rework from
v21.11.

The l3fwd-power example is the only in-tree example using Rx interrupts.
But trying to directly reproduce bugzilla 1135 is not possible because
commit f0b00d983e38 ("examples/l3fwd-power: support virtio/vhost") is
missing in v20.11.
So for testing purpose, I cherry-picked this commit.

For the record, I tested as a normal user on my laptop.

I started (in this order), the l3fwd-power application, then testpmd.

$ rm -f plop.sock; ./build/examples/dpdk-l3fwd-power -l0,1 --in-memory \
 -a 0000:00:00.0 --vdev net_vhost0,iface=plop.sock,client=1 \
 --log-level=lib.*:debug -- \
 -p 0x1 --interrupt-only --config '(0,0,1)' --parse-ptype 0

$ ./build/app/dpdk-testpmd -l0,2 --log-level=*:debug \
 --single-file-segment --in-memory -a 0000:00:00.0 \
 --vdev net_virtio_user0,path=plop.sock,server=1 -- \
 -i --total-num-mbufs=2048

Then I started / stopped traffic in testpmd.
I closed / restarted testpmd, and then started / stopped traffic.

In all those steps, I checked l3fwd-power application logs which
indicate Rx interrupt delivery / packet reception.

---
 drivers/net/vhost/rte_eth_vhost.c | 312 +++++++++++++-----------------
 1 file changed, 130 insertions(+), 182 deletions(-)

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 5e57e35ebd..44ebcd3b53 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -97,8 +97,9 @@ struct vhost_queue {
 	uint16_t port;
 	uint16_t virtqueue_id;
 	struct vhost_stats stats;
-	int intr_enable;
 	rte_spinlock_t intr_lock;
+	struct epoll_event ev;
+	int kickfd;
 };
 
 struct pmd_internal {
@@ -525,112 +526,68 @@ find_internal_resource(char *ifname)
 	return list;
 }
 
-static int
+static void
 eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx)
 {
-	struct rte_intr_handle *handle = eth_dev->intr_handle;
-	struct rte_epoll_event rev;
-	int epfd, ret;
-
-	if (!handle)
-		return 0;
+	struct rte_vhost_vring vring;
+	struct vhost_queue *vq;
 
-	if (handle->efds[rxq_idx] == handle->elist[rxq_idx].fd)
-		return 0;
+	vq = eth_dev->data->rx_queues[rxq_idx];
+	if (vq == NULL || vq->vid < 0)
+		return;
 
-	VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n",
-			rxq_idx);
+	if (rte_vhost_get_vhost_vring(vq->vid, (rxq_idx << 1) + 1, &vring) < 0) {
+		VHOST_LOG(DEBUG, "Failed to get rxq-%d's vring, skip!\n", rxq_idx);
+		return;
+	}
 
-	if (handle->elist[rxq_idx].fd != -1)
-		VHOST_LOG(ERR, "Unexpected previous kickfd value (Got %d, expected -1).\n",
-				handle->elist[rxq_idx].fd);
+	rte_spinlock_lock(&vq->intr_lock);
 
-	/*
-	 * First remove invalid epoll event, and then install
-	 * the new one. May be solved with a proper API in the
-	 * future.
-	 */
-	epfd = handle->elist[rxq_idx].epfd;
-	rev = handle->elist[rxq_idx];
-	ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd,
-			&handle->elist[rxq_idx]);
-	if (ret) {
-		VHOST_LOG(ERR, "Delete epoll event failed.\n");
-		return ret;
+	/* Remove previous kickfd from proxy epoll */
+	if (vq->kickfd >= 0 && vq->kickfd != vring.kickfd) {
+		if (epoll_ctl(vq->ev.data.fd, EPOLL_CTL_DEL, vq->kickfd, &vq->ev) < 0) {
+			VHOST_LOG(DEBUG, "Failed to unregister %d from rxq-%d epoll: %s\n",
+				vq->kickfd, rxq_idx, strerror(errno));
+		} else {
+			VHOST_LOG(DEBUG, "Unregistered %d from rxq-%d epoll\n",
+				vq->kickfd, rxq_idx);
+		}
+		vq->kickfd = -1;
 	}
 
-	rev.fd = handle->efds[rxq_idx];
-	handle->elist[rxq_idx] = rev;
-	ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd,
-			&handle->elist[rxq_idx]);
-	if (ret) {
-		VHOST_LOG(ERR, "Add epoll event failed.\n");
-		return ret;
+	/* Add new one, if valid */
+	if (vq->kickfd != vring.kickfd && vring.kickfd >= 0) {
+		if (epoll_ctl(vq->ev.data.fd, EPOLL_CTL_ADD, vring.kickfd, &vq->ev) < 0) {
+			VHOST_LOG(ERR, "Failed to register %d in rxq-%d epoll: %s\n",
+				vring.kickfd, rxq_idx, strerror(errno));
+		} else {
+			vq->kickfd = vring.kickfd;
+			VHOST_LOG(DEBUG, "Registered %d in rxq-%d epoll\n",
+				vq->kickfd, rxq_idx);
+		}
 	}
 
-	return 0;
+	rte_spinlock_unlock(&vq->intr_lock);
 }
 
 static int
 eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
 {
-	struct rte_vhost_vring vring;
-	struct vhost_queue *vq;
-	int old_intr_enable, ret = 0;
-
-	vq = dev->data->rx_queues[qid];
-	if (!vq) {
-		VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid);
-		return -1;
-	}
-
-	rte_spinlock_lock(&vq->intr_lock);
-	old_intr_enable = vq->intr_enable;
-	vq->intr_enable = 1;
-	ret = eth_vhost_update_intr(dev, qid);
-	rte_spinlock_unlock(&vq->intr_lock);
-
-	if (ret < 0) {
-		VHOST_LOG(ERR, "Failed to update rxq%d's intr\n", qid);
-		vq->intr_enable = old_intr_enable;
-		return ret;
-	}
+	struct vhost_queue *vq = dev->data->rx_queues[qid];
 
-	ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
-	if (ret < 0) {
-		VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);
-		return ret;
-	}
-	VHOST_LOG(INFO, "Enable interrupt for rxq%d\n", qid);
-	rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1);
-	rte_wmb();
+	if (vq->vid >= 0)
+		rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1);
 
-	return ret;
+	return 0;
 }
 
 static int
 eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid)
 {
-	struct rte_vhost_vring vring;
-	struct vhost_queue *vq;
-	int ret = 0;
-
-	vq = dev->data->rx_queues[qid];
-	if (!vq) {
-		VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid);
-		return -1;
-	}
-
-	ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
-	if (ret < 0) {
-		VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);
-		return ret;
-	}
-	VHOST_LOG(INFO, "Disable interrupt for rxq%d\n", qid);
-	rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0);
-	rte_wmb();
+	struct vhost_queue *vq = dev->data->rx_queues[qid];
 
-	vq->intr_enable = 0;
+	if (vq->vid >= 0)
+		rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0);
 
 	return 0;
 }
@@ -641,70 +598,64 @@ eth_vhost_uninstall_intr(struct rte_eth_dev *dev)
 	struct rte_intr_handle *intr_handle = dev->intr_handle;
 
 	if (intr_handle) {
+		int i;
+
+		for (i = 0; i < dev->data->nb_rx_queues; i++) {
+			int epoll_fd = intr_handle->efds[i];
+
+			if (epoll_fd >= 0)
+				close(epoll_fd);
+		}
 		if (intr_handle->intr_vec)
 			free(intr_handle->intr_vec);
 		free(intr_handle);
 	}
-
 	dev->intr_handle = NULL;
 }
 
 static int
 eth_vhost_install_intr(struct rte_eth_dev *dev)
 {
-	struct rte_vhost_vring vring;
-	struct vhost_queue *vq;
 	int nb_rxq = dev->data->nb_rx_queues;
-	int i;
-	int ret;
+	struct vhost_queue *vq;
 
-	/* uninstall firstly if we are reconnecting */
-	if (dev->intr_handle)
-		eth_vhost_uninstall_intr(dev);
+	int ret;
+	int i;
 
 	dev->intr_handle = malloc(sizeof(*dev->intr_handle));
 	if (!dev->intr_handle) {
 		VHOST_LOG(ERR, "Fail to allocate intr_handle\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto error;
 	}
 	memset(dev->intr_handle, 0, sizeof(*dev->intr_handle));
 
-	dev->intr_handle->efd_counter_size = sizeof(uint64_t);
-
 	dev->intr_handle->intr_vec =
 		malloc(nb_rxq * sizeof(dev->intr_handle->intr_vec[0]));
 
 	if (!dev->intr_handle->intr_vec) {
 		VHOST_LOG(ERR,
 			"Failed to allocate memory for interrupt vector\n");
-		free(dev->intr_handle);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto error;
 	}
 
-	VHOST_LOG(INFO, "Prepare intr vec\n");
+	VHOST_LOG(DEBUG, "Prepare intr vec\n");
 	for (i = 0; i < nb_rxq; i++) {
-		dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i;
-		dev->intr_handle->efds[i] = -1;
-		vq = dev->data->rx_queues[i];
-		if (!vq) {
-			VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i);
-			continue;
-		}
+		int epoll_fd = epoll_create1(0);
 
-		ret = rte_vhost_get_vhost_vring(vq->vid, (i << 1) + 1, &vring);
-		if (ret < 0) {
-			VHOST_LOG(INFO,
-				"Failed to get rxq-%d's vring, skip!\n", i);
-			continue;
+		if (epoll_fd < 0) {
+			VHOST_LOG(ERR, "Failed to create proxy epoll fd for rxq-%d\n", i);
+			ret = -errno;
+			goto error;
 		}
 
-		if (vring.kickfd < 0) {
-			VHOST_LOG(INFO,
-				"rxq-%d's kickfd is invalid, skip!\n", i);
-			continue;
-		}
-		dev->intr_handle->efds[i] = vring.kickfd;
-		VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i);
+		dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i;
+		dev->intr_handle->efds[i] = epoll_fd;
+		vq = dev->data->rx_queues[i];
+		memset(&vq->ev, 0, sizeof(vq->ev));
+		vq->ev.events = EPOLLIN;
+		vq->ev.data.fd = epoll_fd;
 	}
 
 	dev->intr_handle->nb_efd = nb_rxq;
@@ -712,6 +663,50 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
 	dev->intr_handle->type = RTE_INTR_HANDLE_VDEV;
 
 	return 0;
+
+error:
+	eth_vhost_uninstall_intr(dev);
+	return ret;
+}
+
+static void
+eth_vhost_configure_intr(struct rte_eth_dev *dev)
+{
+	int i;
+
+	VHOST_LOG(DEBUG, "Configure intr vec\n");
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		eth_vhost_update_intr(dev, i);
+}
+
+static void
+eth_vhost_unconfigure_intr(struct rte_eth_dev *eth_dev)
+{
+	struct vhost_queue *vq;
+	int i;
+
+	VHOST_LOG(DEBUG, "Unconfigure intr vec\n");
+	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+		vq = eth_dev->data->rx_queues[i];
+		if (vq == NULL || vq->vid < 0)
+			continue;
+
+		rte_spinlock_lock(&vq->intr_lock);
+
+		/* Remove previous kickfd from proxy epoll */
+		if (vq->kickfd >= 0) {
+			if (epoll_ctl(vq->ev.data.fd, EPOLL_CTL_DEL, vq->kickfd, &vq->ev) < 0) {
+				VHOST_LOG(DEBUG, "Failed to unregister %d from rxq-%d epoll: %s\n",
+					vq->kickfd, i, strerror(errno));
+			} else {
+				VHOST_LOG(DEBUG, "Unregistered %d from rxq-%d epoll\n",
+					vq->kickfd, i);
+			}
+			vq->kickfd = -1;
+		}
+
+		rte_spinlock_unlock(&vq->intr_lock);
+	}
 }
 
 static void
@@ -815,16 +810,8 @@ new_device(int vid)
 	internal->vid = vid;
 	if (rte_atomic32_read(&internal->started) == 1) {
 		queue_setup(eth_dev, internal);
-
-		if (dev_conf->intr_conf.rxq) {
-			if (eth_vhost_install_intr(eth_dev) < 0) {
-				VHOST_LOG(INFO,
-					"Failed to install interrupt handler.\n");
-					return -1;
-			}
-		}
-	} else {
-		VHOST_LOG(INFO, "RX/TX queues not exist yet\n");
+		if (dev_conf->intr_conf.rxq)
+			eth_vhost_configure_intr(eth_dev);
 	}
 
 	for (i = 0; i < rte_vhost_get_vring_num(vid); i++)
@@ -866,6 +853,7 @@ destroy_device(int vid)
 
 	rte_atomic32_set(&internal->dev_attached, 0);
 	update_queuing_status(eth_dev, true);
+	eth_vhost_unconfigure_intr(eth_dev);
 
 	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
 
@@ -894,52 +882,10 @@ destroy_device(int vid)
 	rte_spinlock_unlock(&state->lock);
 
 	VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid);
-	eth_vhost_uninstall_intr(eth_dev);
 
 	rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
 }
 
-static int
-vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
-{
-	struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
-	struct pmd_internal *internal = eth_dev->data->dev_private;
-	struct vhost_queue *vq;
-	struct rte_vhost_vring vring;
-	int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1;
-	int ret = 0;
-
-	/*
-	 * The vring kickfd may be changed after the new device notification.
-	 * Update it when the vring state is updated.
-	 */
-	if (rx_idx >= 0 && rx_idx < eth_dev->data->nb_rx_queues &&
-	    rte_atomic32_read(&internal->dev_attached) &&
-	    rte_atomic32_read(&internal->started) &&
-	    dev_conf->intr_conf.rxq) {
-		ret = rte_vhost_get_vhost_vring(vid, vring_id, &vring);
-		if (ret) {
-			VHOST_LOG(ERR, "Failed to get vring %d information.\n",
-					vring_id);
-			return ret;
-		}
-		eth_dev->intr_handle->efds[rx_idx] = vring.kickfd;
-
-		vq = eth_dev->data->rx_queues[rx_idx];
-		if (!vq) {
-			VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx);
-			return -1;
-		}
-
-		rte_spinlock_lock(&vq->intr_lock);
-		if (vq->intr_enable)
-			ret = eth_vhost_update_intr(eth_dev, rx_idx);
-		rte_spinlock_unlock(&vq->intr_lock);
-	}
-
-	return ret;
-}
-
 static int
 vring_state_changed(int vid, uint16_t vring, int enable)
 {
@@ -959,9 +905,8 @@ vring_state_changed(int vid, uint16_t vring, int enable)
 	/* won't be NULL */
 	state = vring_states[eth_dev->data->port_id];
 
-	if (enable && vring_conf_update(vid, eth_dev, vring))
-		VHOST_LOG(INFO, "Failed to update vring-%d configuration.\n",
-			  (int)vring);
+	if (eth_dev->data->dev_conf.intr_conf.rxq && vring % 2)
+		eth_vhost_update_intr(eth_dev, (vring - 1) >> 1);
 
 	rte_spinlock_lock(&state->lock);
 	if (state->cur[vring] == enable) {
@@ -1146,18 +1091,17 @@ eth_dev_start(struct rte_eth_dev *eth_dev)
 	struct pmd_internal *internal = eth_dev->data->dev_private;
 	struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
 
-	queue_setup(eth_dev, internal);
-
-	if (rte_atomic32_read(&internal->dev_attached) == 1) {
-		if (dev_conf->intr_conf.rxq) {
-			if (eth_vhost_install_intr(eth_dev) < 0) {
-				VHOST_LOG(INFO,
-					"Failed to install interrupt handler.\n");
-					return -1;
-			}
-		}
+	eth_vhost_uninstall_intr(eth_dev);
+	if (dev_conf->intr_conf.rxq && eth_vhost_install_intr(eth_dev) < 0) {
+		VHOST_LOG(ERR, "Failed to install interrupt handler.\n");
+		return -1;
 	}
 
+	queue_setup(eth_dev, internal);
+	if (rte_atomic32_read(&internal->dev_attached) == 1 &&
+			dev_conf->intr_conf.rxq)
+		eth_vhost_configure_intr(eth_dev);
+
 	rte_atomic32_set(&internal->started, 1);
 	update_queuing_status(eth_dev, false);
 
@@ -1212,6 +1156,8 @@ eth_dev_close(struct rte_eth_dev *dev)
 	rte_free(internal->iface_name);
 	rte_free(internal);
 
+	eth_vhost_uninstall_intr(dev);
+
 	dev->data->dev_private = NULL;
 
 	rte_free(vring_states[dev->data->port_id]);
@@ -1239,6 +1185,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
 	vq->mb_pool = mb_pool;
 	vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
 	rte_spinlock_init(&vq->intr_lock);
+	vq->kickfd = -1;
 	dev->data->rx_queues[rx_queue_id] = vq;
 
 	return 0;
@@ -1261,6 +1208,7 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
 
 	vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
 	rte_spinlock_init(&vq->intr_lock);
+	vq->kickfd = -1;
 	dev->data->tx_queues[tx_queue_id] = vq;
 
 	return 0;
-- 
2.39.2


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-03-22 13:25 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-22 13:25 [PATCH 20.11] net/vhost: fix Rx interrupt David Marchand

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).