patches for DPDK stable branches
 help / color / mirror / Atom feed
* [PATCH 1/3] common/mlx5: add Netlink event helpers
       [not found] <20220223164333.3834590-1-dkozlyuk@nvidia.com>
@ 2022-02-23 16:43 ` Dmitry Kozlyuk
  2022-02-23 16:43 ` [PATCH 2/3] net/mlx5: fix link status change detection Dmitry Kozlyuk
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Dmitry Kozlyuk @ 2022-02-23 16:43 UTC (permalink / raw)
  To: dev; +Cc: stable, Viacheslav Ovsiienko, Matan Azrad, Ray Kinsella

Introduce mlx5_nl_read_events() to read Netlink events
(technically, messages) from a socket that was configured
to listen for them via a new mlx5_nl_init() parameter.
Add mlx5_nl_parse_link_status_update() helper
to extract information from link-related events.
This patch is a shared base for later fixes.

Cc: stable@dpdk.org

Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/common/mlx5/linux/mlx5_common_os.c |   2 +-
 drivers/common/mlx5/linux/mlx5_nl.c        | 102 ++++++++++++++++++++-
 drivers/common/mlx5/linux/mlx5_nl.h        |   8 +-
 drivers/common/mlx5/version.map            |   2 +
 drivers/net/mlx5/linux/mlx5_os.c           |   8 +-
 drivers/net/mlx5/linux/mlx5_vlan_os.c      |   2 +-
 6 files changed, 116 insertions(+), 8 deletions(-)

diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c
index 0d3e24e04e..25e09bb55b 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.c
+++ b/drivers/common/mlx5/linux/mlx5_common_os.c
@@ -487,7 +487,7 @@ mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
 static int
 mlx5_nl_roce_disable(const char *addr)
 {
-	int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC);
+	int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC, 0);
 	int devlink_id;
 	int enable;
 	int ret;
diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c
index fd4c2d2625..5d04857b38 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.c
+++ b/drivers/common/mlx5/linux/mlx5_nl.c
@@ -185,19 +185,22 @@ uint32_t atomic_sn;
  *
  * @param protocol
  *   Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA).
+ * @param groups
+ *   Groups to listen (e.g. RTMGRP_LINK), can be 0.
  *
  * @return
  *   A file descriptor on success, a negative errno value otherwise and
  *   rte_errno is set.
  */
 int
-mlx5_nl_init(int protocol)
+mlx5_nl_init(int protocol, int groups)
 {
 	int fd;
 	int buf_size;
 	socklen_t opt_size;
 	struct sockaddr_nl local = {
 		.nl_family = AF_NETLINK,
+		.nl_groups = groups,
 	};
 	int ret;
 
@@ -1862,3 +1865,100 @@ mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr,
 	/* Now, need to reload the driver. */
 	return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr);
 }
+
+/**
+ * Try to parse a Netlink message as a link status update.
+ *
+ * @param hdr
+ *  Netlink message header.
+ * @param[out] ifindex
+ *  Index of the updated interface.
+ *
+ * @return
+ *  0 on success, negative on failure.
+ */
+int
+mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex)
+{
+	struct ifinfomsg *info;
+
+	switch (hdr->nlmsg_type) {
+	case RTM_NEWLINK:
+	case RTM_DELLINK:
+	case RTM_GETLINK:
+	case RTM_SETLINK:
+		info = NLMSG_DATA(hdr);
+		*ifindex = info->ifi_index;
+		return 0;
+	}
+	return -1;
+}
+
+/**
+ * Read pending events from a Netlink socket.
+ *
+ * @param nlsk_fd
+ *  Netlink socket.
+ * @param cb
+ *  Callback invoked for each of the events.
+ * @param cb_arg
+ *  User data for the callback.
+ *
+ * @return
+ *  0 on success, including the case when there are no events.
+ *  Negative on failure and rte_errno is set.
+ */
+int
+mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg)
+{
+	char buf[8192];
+	struct sockaddr_nl addr;
+	struct iovec iov = {
+		.iov_base = buf,
+		.iov_len = sizeof(buf),
+	};
+	struct msghdr msg = {
+		.msg_name = &addr,
+		.msg_namelen = sizeof(addr),
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+	};
+	struct nlmsghdr *hdr;
+	ssize_t size;
+
+	while (1) {
+		size = recvmsg(nlsk_fd, &msg, MSG_DONTWAIT);
+		if (size < 0) {
+			if (errno == EAGAIN)
+				return 0;
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(DEBUG, "Failed to receive netlink message: %s",
+				strerror(errno));
+			rte_errno = errno;
+			return -rte_errno;
+		}
+		hdr = (struct nlmsghdr *)buf;
+		while (size >= (ssize_t)sizeof(*hdr)) {
+			ssize_t msg_len = hdr->nlmsg_len;
+			ssize_t data_len = msg_len - sizeof(*hdr);
+			ssize_t aligned_len;
+
+			if (data_len < 0) {
+				DRV_LOG(DEBUG, "Netlink message too short");
+				rte_errno = EINVAL;
+				return -rte_errno;
+			}
+			aligned_len = NLMSG_ALIGN(msg_len);
+			if (aligned_len > size) {
+				DRV_LOG(DEBUG, "Netlink message too long");
+				rte_errno = EINVAL;
+				return -rte_errno;
+			}
+			cb(hdr, cb_arg);
+			hdr = RTE_PTR_ADD(hdr, aligned_len);
+			size -= aligned_len;
+		}
+	}
+	return 0;
+}
diff --git a/drivers/common/mlx5/linux/mlx5_nl.h b/drivers/common/mlx5/linux/mlx5_nl.h
index 2063c0deeb..0b7552338a 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.h
+++ b/drivers/common/mlx5/linux/mlx5_nl.h
@@ -11,6 +11,7 @@
 
 #include "mlx5_common.h"
 
+typedef void (mlx5_nl_event_cb)(struct nlmsghdr *hdr, void *user_data);
 
 /* VLAN netdev for VLAN workaround. */
 struct mlx5_nl_vlan_dev {
@@ -30,7 +31,7 @@ struct mlx5_nl_vlan_vmwa_context {
 };
 
 __rte_internal
-int mlx5_nl_init(int protocol);
+int mlx5_nl_init(int protocol, int groups);
 __rte_internal
 int mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own,
 			 struct rte_ether_addr *mac, uint32_t index);
@@ -75,4 +76,9 @@ int mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr,
 int mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr,
 			    int enable);
 
+__rte_internal
+int mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg);
+__rte_internal
+int mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex);
+
 #endif /* RTE_PMD_MLX5_NL_H_ */
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index 462b7cea5e..d9b7ccacde 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -123,9 +123,11 @@ INTERNAL {
 	mlx5_nl_mac_addr_flush; # WINDOWS_NO_EXPORT
 	mlx5_nl_mac_addr_remove; # WINDOWS_NO_EXPORT
 	mlx5_nl_mac_addr_sync; # WINDOWS_NO_EXPORT
+	mlx5_nl_parse_link_status_update; # WINDOWS_NO_EXPORT
 	mlx5_nl_port_state; # WINDOWS_NO_EXPORT
 	mlx5_nl_portnum; # WINDOWS_NO_EXPORT
 	mlx5_nl_promisc; # WINDOWS_NO_EXPORT
+	mlx5_nl_read_events; # WINDOWS_NO_EXPORT
 	mlx5_nl_switch_info; # WINDOWS_NO_EXPORT
 	mlx5_nl_vf_mac_addr_modify; # WINDOWS_NO_EXPORT
 	mlx5_nl_vlan_vmwa_create; # WINDOWS_NO_EXPORT
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index bbe05bb837..602473e8f7 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1086,7 +1086,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		" old OFED/rdma-core version or firmware configuration");
 #endif
 	config->mpls_en = mpls_en;
-	nl_rdma = mlx5_nl_init(NETLINK_RDMA);
+	nl_rdma = mlx5_nl_init(NETLINK_RDMA, 0);
 	/* Check port status. */
 	if (spawn->phys_port <= UINT8_MAX) {
 		/* Legacy Verbs api only support u8 port number. */
@@ -1133,7 +1133,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	priv->mtu = RTE_ETHER_MTU;
 	/* Some internal functions rely on Netlink sockets, open them now. */
 	priv->nl_socket_rdma = nl_rdma;
-	priv->nl_socket_route =	mlx5_nl_init(NETLINK_ROUTE);
+	priv->nl_socket_route =	mlx5_nl_init(NETLINK_ROUTE, 0);
 	priv->representor = !!switch_info->representor;
 	priv->master = !!switch_info->master;
 	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
@@ -2130,8 +2130,8 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
 	 * matching ones, gathering into the list.
 	 */
 	struct ibv_device *ibv_match[ret + 1];
-	int nl_route = mlx5_nl_init(NETLINK_ROUTE);
-	int nl_rdma = mlx5_nl_init(NETLINK_RDMA);
+	int nl_route = mlx5_nl_init(NETLINK_ROUTE, 0);
+	int nl_rdma = mlx5_nl_init(NETLINK_RDMA, 0);
 	unsigned int i;
 
 	while (ret-- > 0) {
diff --git a/drivers/net/mlx5/linux/mlx5_vlan_os.c b/drivers/net/mlx5/linux/mlx5_vlan_os.c
index 005904bdfe..7ee2460a23 100644
--- a/drivers/net/mlx5/linux/mlx5_vlan_os.c
+++ b/drivers/net/mlx5/linux/mlx5_vlan_os.c
@@ -136,7 +136,7 @@ mlx5_vlan_vmwa_init(struct rte_eth_dev *dev, uint32_t ifindex)
 		return NULL;
 	}
 	rte_spinlock_init(&vmwa->sl);
-	vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE);
+	vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE, 0);
 	if (vmwa->nl_socket < 0) {
 		DRV_LOG(WARNING,
 			"Can not create Netlink socket"
-- 
2.25.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2/3] net/mlx5: fix link status change detection
       [not found] <20220223164333.3834590-1-dkozlyuk@nvidia.com>
  2022-02-23 16:43 ` [PATCH 1/3] common/mlx5: add Netlink event helpers Dmitry Kozlyuk
@ 2022-02-23 16:43 ` Dmitry Kozlyuk
  2022-02-23 16:43 ` [PATCH 3/3] net/mlx5: fix initial link status detection Dmitry Kozlyuk
       [not found] ` <20220301121514.41497-1-dkozlyuk@nvidia.com>
  3 siblings, 0 replies; 10+ messages in thread
From: Dmitry Kozlyuk @ 2022-02-23 16:43 UTC (permalink / raw)
  To: dev; +Cc: stable, Viacheslav Ovsiienko, Matan Azrad

Sometimes net/mlx5 devices did not detect link status change to "up".

Each shared device was monitoring IBV_EVENT_PORT_{ACTIVE,ERR}
and queried the link status upon receiving the event.
IBV_EVENT_PORT_ACTIVE is delivered when the logical link status
(UP flag) is set, but the physical link status (RUNNING flag)
may be down at that time, in which case the new link status
would be erroneously considered down.

IBV interface is insufficient for the task.
Monitor interface events using Netlink.

Fixes: 198a3c339a8f ("mlx5: handle link status interrupts")
Cc: stable@dpdk.org

Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
Coverity complains about unchecked rte_intr_fd/type_set() return values.
That would be useless, because rte_intr_handle is in a known state.

 drivers/net/mlx5/linux/mlx5_ethdev_os.c | 63 ++++++++++++++++++++-----
 drivers/net/mlx5/linux/mlx5_os.c        | 55 +++++++++++++++++++++
 drivers/net/mlx5/mlx5.c                 |  1 +
 drivers/net/mlx5/mlx5.h                 |  3 ++
 drivers/net/mlx5/mlx5_trigger.c         | 12 ++++-
 5 files changed, 120 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index c19825ee52..8fe73f1adb 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -38,6 +38,7 @@
 #include <mlx5_devx_cmds.h>
 #include <mlx5_common.h>
 #include <mlx5_malloc.h>
+#include <mlx5_nl.h>
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
@@ -760,6 +761,56 @@ mlx5_dev_interrupt_device_fatal(struct mlx5_dev_ctx_shared *sh)
 	}
 }
 
+static void
+mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg)
+{
+	struct mlx5_dev_ctx_shared *sh = cb_arg;
+	uint32_t i;
+	uint32_t if_index;
+
+	if (mlx5_nl_parse_link_status_update(hdr, &if_index) < 0)
+		return;
+	for (i = 0; i < sh->max_port; i++) {
+		struct mlx5_dev_shared_port *port = &sh->port[i];
+		struct rte_eth_dev *dev;
+		struct mlx5_priv *priv;
+
+		if (port->nl_ih_port_id >= RTE_MAX_ETHPORTS)
+			continue;
+		dev = &rte_eth_devices[port->nl_ih_port_id];
+		/* Probing may initiate an LSC before configuration is done. */
+		if (dev->data->dev_configured &&
+		    !dev->data->dev_conf.intr_conf.lsc)
+			break;
+		priv = dev->data->dev_private;
+		if (priv->if_index == if_index) {
+			/* Block logical LSC events. */
+			uint16_t prev_status = dev->data->dev_link.link_status;
+
+			if (mlx5_link_update(dev, 0) < 0)
+				DRV_LOG(ERR, "Failed to update link status: %s",
+					rte_strerror(rte_errno));
+			else if (prev_status != dev->data->dev_link.link_status)
+				rte_eth_dev_callback_process
+					(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
+			break;
+		}
+	}
+}
+
+void
+mlx5_dev_interrupt_handler_nl(void *arg)
+{
+	struct mlx5_dev_ctx_shared *sh = arg;
+	int nlsk_fd = rte_intr_fd_get(sh->intr_handle_nl);
+
+	if (nlsk_fd < 0)
+		return;
+	if (mlx5_nl_read_events(nlsk_fd, mlx5_dev_interrupt_nl_cb, sh) < 0)
+		DRV_LOG(ERR, "Failed to process Netlink events: %s",
+			rte_strerror(rte_errno));
+}
+
 /**
  * Handle shared asynchronous events the NIC (removal event
  * and link status change). Supports multiport IB device.
@@ -823,18 +874,6 @@ mlx5_dev_interrupt_handler(void *cb_arg)
 		tmp = sh->port[tmp - 1].ih_port_id;
 		dev = &rte_eth_devices[tmp];
 		MLX5_ASSERT(dev);
-		if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
-		     event.event_type == IBV_EVENT_PORT_ERR) &&
-			dev->data->dev_conf.intr_conf.lsc) {
-			mlx5_glue->ack_async_event(&event);
-			if (mlx5_link_update(dev, 0) == -EAGAIN) {
-				usleep(0);
-				continue;
-			}
-			rte_eth_dev_callback_process
-				(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
-			continue;
-		}
 		DRV_LOG(DEBUG,
 			"port %u cannot handle an unknown event (type %d)",
 			dev->data->port_id, event.event_type);
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 602473e8f7..86dbc870e3 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -2707,6 +2707,40 @@ mlx5_os_net_cleanup(void)
 	mlx5_pmd_socket_uninit();
 }
 
+static int
+mlx5_os_dev_shared_handler_install_lsc(struct mlx5_dev_ctx_shared *sh)
+{
+	int nlsk_fd, flags, ret;
+
+	nlsk_fd = mlx5_nl_init(NETLINK_ROUTE, RTMGRP_LINK);
+	if (nlsk_fd < 0) {
+		DRV_LOG(ERR, "Failed to create a socket for Netlink events: %s",
+			rte_strerror(rte_errno));
+		return -1;
+	}
+	flags = fcntl(nlsk_fd, F_GETFL);
+	ret = fcntl(nlsk_fd, F_SETFL, flags | O_NONBLOCK);
+	if (ret != 0) {
+		DRV_LOG(ERR, "Failed to make Netlink event socket non-blocking: %s",
+			strerror(errno));
+		rte_errno = errno;
+		goto error;
+	}
+	rte_intr_type_set(sh->intr_handle_nl, RTE_INTR_HANDLE_EXT);
+	rte_intr_fd_set(sh->intr_handle_nl, nlsk_fd);
+	if (rte_intr_callback_register(sh->intr_handle_nl,
+				       mlx5_dev_interrupt_handler_nl,
+				       sh) != 0) {
+		DRV_LOG(ERR, "Failed to register Netlink events interrupt");
+		rte_intr_fd_set(sh->intr_handle_nl, -1);
+		goto error;
+	}
+	return 0;
+error:
+	close(nlsk_fd);
+	return -1;
+}
+
 /**
  * Install shared asynchronous device events handler.
  * This function is implemented to support event sharing
@@ -2744,6 +2778,18 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 			rte_intr_fd_set(sh->intr_handle, -1);
 		}
 	}
+	sh->intr_handle_nl = rte_intr_instance_alloc
+						(RTE_INTR_INSTANCE_F_SHARED);
+	if (sh->intr_handle_nl == NULL) {
+		DRV_LOG(ERR, "Fail to allocate intr_handle");
+		rte_errno = ENOMEM;
+		return;
+	}
+	rte_intr_fd_set(sh->intr_handle_nl, -1);
+	if (mlx5_os_dev_shared_handler_install_lsc(sh) < 0) {
+		DRV_LOG(INFO, "Fail to install the shared Netlink event handler.");
+		rte_intr_fd_set(sh->intr_handle_nl, -1);
+	}
 	if (sh->devx) {
 #ifdef HAVE_IBV_DEVX_ASYNC
 		sh->intr_handle_devx =
@@ -2791,10 +2837,19 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 void
 mlx5_os_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh)
 {
+	int nlsk_fd;
+
 	if (rte_intr_fd_get(sh->intr_handle) >= 0)
 		mlx5_intr_callback_unregister(sh->intr_handle,
 					      mlx5_dev_interrupt_handler, sh);
 	rte_intr_instance_free(sh->intr_handle);
+	nlsk_fd = rte_intr_fd_get(sh->intr_handle_nl);
+	if (nlsk_fd >= 0) {
+		mlx5_intr_callback_unregister
+			(sh->intr_handle_nl, mlx5_dev_interrupt_handler_nl, sh);
+		close(nlsk_fd);
+	}
+	rte_intr_instance_free(sh->intr_handle_nl);
 #ifdef HAVE_IBV_DEVX_ASYNC
 	if (rte_intr_fd_get(sh->intr_handle_devx) >= 0)
 		rte_intr_callback_unregister(sh->intr_handle_devx,
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 5571e90677..4b9534c59e 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1206,6 +1206,7 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
 	for (i = 0; i < sh->max_port; i++) {
 		sh->port[i].ih_port_id = RTE_MAX_ETHPORTS;
 		sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS;
+		sh->port[i].nl_ih_port_id = RTE_MAX_ETHPORTS;
 	}
 	if (sh->devx) {
 		sh->td = mlx5_devx_cmd_create_td(sh->cdev->ctx);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 737ad6895c..da9c8f5086 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -605,6 +605,7 @@ struct mlx5_age_info {
 struct mlx5_dev_shared_port {
 	uint32_t ih_port_id;
 	uint32_t devx_ih_port_id;
+	uint32_t nl_ih_port_id;
 	/*
 	 * Interrupt handler port_id. Used by shared interrupt
 	 * handler to find the corresponding rte_eth device
@@ -1203,6 +1204,7 @@ struct mlx5_dev_ctx_shared {
 	/* Shared interrupt handler section. */
 	struct rte_intr_handle *intr_handle; /* Interrupt handler for device. */
 	struct rte_intr_handle *intr_handle_devx; /* DEVX interrupt handler. */
+	struct rte_intr_handle *intr_handle_nl; /* Netlink interrupt handler. */
 	void *devx_comp; /* DEVX async comp obj. */
 	struct mlx5_devx_obj *tis[16]; /* TIS object. */
 	struct mlx5_devx_obj *td; /* Transport domain. */
@@ -1585,6 +1587,7 @@ int mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev,
 			   struct rte_eth_fc_conf *fc_conf);
 void mlx5_dev_interrupt_handler(void *arg);
 void mlx5_dev_interrupt_handler_devx(void *arg);
+void mlx5_dev_interrupt_handler_nl(void *arg);
 int mlx5_set_link_down(struct rte_eth_dev *dev);
 int mlx5_set_link_up(struct rte_eth_dev *dev);
 int mlx5_is_removed(struct rte_eth_dev *dev);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 3a59237b1a..9fe7731036 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -1205,11 +1205,18 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		priv->sh->port[priv->dev_port - 1].ih_port_id =
 					(uint32_t)dev->data->port_id;
 	} else {
-		DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
+		DRV_LOG(INFO, "port %u starts without RMV interrupts.",
 			dev->data->port_id);
-		dev->data->dev_conf.intr_conf.lsc = 0;
 		dev->data->dev_conf.intr_conf.rmv = 0;
 	}
+	if (rte_intr_fd_get(priv->sh->intr_handle_nl) >= 0) {
+		priv->sh->port[priv->dev_port - 1].nl_ih_port_id =
+					(uint32_t)dev->data->port_id;
+	} else {
+		DRV_LOG(INFO, "port %u starts without LSC interrupts.",
+			dev->data->port_id);
+		dev->data->dev_conf.intr_conf.lsc = 0;
+	}
 	if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
 		priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
 					(uint32_t)dev->data->port_id;
@@ -1261,6 +1268,7 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	mlx5_rx_intr_vec_disable(dev);
 	priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
 	priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
+	priv->sh->port[priv->dev_port - 1].nl_ih_port_id = RTE_MAX_ETHPORTS;
 	mlx5_txq_stop(dev);
 	mlx5_rxq_stop(dev);
 	if (priv->obj_ops.lb_dummy_queue_release)
-- 
2.25.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 3/3] net/mlx5: fix initial link status detection
       [not found] <20220223164333.3834590-1-dkozlyuk@nvidia.com>
  2022-02-23 16:43 ` [PATCH 1/3] common/mlx5: add Netlink event helpers Dmitry Kozlyuk
  2022-02-23 16:43 ` [PATCH 2/3] net/mlx5: fix link status change detection Dmitry Kozlyuk
@ 2022-02-23 16:43 ` Dmitry Kozlyuk
       [not found] ` <20220301121514.41497-1-dkozlyuk@nvidia.com>
  3 siblings, 0 replies; 10+ messages in thread
From: Dmitry Kozlyuk @ 2022-02-23 16:43 UTC (permalink / raw)
  To: dev; +Cc: stable, Viacheslav Ovsiienko, Matan Azrad

Link status change takes time that depends on the HW and the kernel.
It was checked immediately after the change was issued at probing.
If the port had beed down before probing, a "down" state may be read,
while the port would be "up" imminently.
After that, DPDK reported the port as "down" mistakenly
and "ifconfig $DEV up" did not trigger an LSC event,
because from the system's perspective the port was "up" already.

Install Netlink event handler at port probe before requesting the port
to come up in order to receive LSC event even if it comes up
between probe and start.

Fixes: b6499434b83e ("net/mlx5: fix link status initialization")
Cc: stable@dpdk.org

Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 86dbc870e3..6b1c59e895 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1645,13 +1645,10 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	/* Bring Ethernet device up. */
 	DRV_LOG(DEBUG, "port %u forcing Ethernet interface up",
 		eth_dev->data->port_id);
+	/* Watch LSC interrupts between port probe and port start. */
+	priv->sh->port[priv->dev_port - 1].nl_ih_port_id =
+							eth_dev->data->port_id;
 	mlx5_set_link_up(eth_dev);
-	/*
-	 * Even though the interrupt handler is not installed yet,
-	 * interrupts will still trigger on the async_fd from
-	 * Verbs context returned by ibv_open_device().
-	 */
-	mlx5_link_update(eth_dev, 0);
 #ifdef HAVE_MLX5DV_DR_ESWITCH
 	if (!(config->hca_attr.eswitch_manager && config->dv_flow_en &&
 	      (switch_info->representor || switch_info->master)))
-- 
2.25.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 1/3] common/mlx5: add Netlink event helpers
       [not found] ` <20220301121514.41497-1-dkozlyuk@nvidia.com>
@ 2022-03-01 12:15   ` Dmitry Kozlyuk
  2022-03-02 15:49     ` Ferruh Yigit
  2022-03-01 12:15   ` [PATCH v2 2/3] net/mlx5: fix link status change detection Dmitry Kozlyuk
  2022-03-01 12:15   ` [PATCH v2 3/3] net/mlx5: fix initial link status detection Dmitry Kozlyuk
  2 siblings, 1 reply; 10+ messages in thread
From: Dmitry Kozlyuk @ 2022-03-01 12:15 UTC (permalink / raw)
  To: dev; +Cc: stable, Viacheslav Ovsiienko, Matan Azrad, Ray Kinsella

Introduce mlx5_nl_read_events() to read Netlink events
(technically, messages) from a socket that was configured
to listen for them via a new mlx5_nl_init() parameter.
Add mlx5_nl_parse_link_status_update() helper
to extract information from link-related events.
This patch is a shared base for later fixes.

Cc: stable@dpdk.org

Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/common/mlx5/linux/mlx5_common_os.c |   2 +-
 drivers/common/mlx5/linux/mlx5_nl.c        | 102 ++++++++++++++++++++-
 drivers/common/mlx5/linux/mlx5_nl.h        |   8 +-
 drivers/common/mlx5/version.map            |   2 +
 drivers/net/mlx5/linux/mlx5_os.c           |   8 +-
 drivers/net/mlx5/linux/mlx5_vlan_os.c      |   2 +-
 6 files changed, 116 insertions(+), 8 deletions(-)

diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c
index a3c25638da..030ceb561f 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.c
+++ b/drivers/common/mlx5/linux/mlx5_common_os.c
@@ -590,7 +590,7 @@ mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
 static int
 mlx5_nl_roce_disable(const char *addr)
 {
-	int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC);
+	int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC, 0);
 	int devlink_id;
 	int enable;
 	int ret;
diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c
index fd4c2d2625..5d04857b38 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.c
+++ b/drivers/common/mlx5/linux/mlx5_nl.c
@@ -185,19 +185,22 @@ uint32_t atomic_sn;
  *
  * @param protocol
  *   Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA).
+ * @param groups
+ *   Groups to listen (e.g. RTMGRP_LINK), can be 0.
  *
  * @return
  *   A file descriptor on success, a negative errno value otherwise and
  *   rte_errno is set.
  */
 int
-mlx5_nl_init(int protocol)
+mlx5_nl_init(int protocol, int groups)
 {
 	int fd;
 	int buf_size;
 	socklen_t opt_size;
 	struct sockaddr_nl local = {
 		.nl_family = AF_NETLINK,
+		.nl_groups = groups,
 	};
 	int ret;
 
@@ -1862,3 +1865,100 @@ mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr,
 	/* Now, need to reload the driver. */
 	return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr);
 }
+
+/**
+ * Try to parse a Netlink message as a link status update.
+ *
+ * @param hdr
+ *  Netlink message header.
+ * @param[out] ifindex
+ *  Index of the updated interface.
+ *
+ * @return
+ *  0 on success, negative on failure.
+ */
+int
+mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex)
+{
+	struct ifinfomsg *info;
+
+	switch (hdr->nlmsg_type) {
+	case RTM_NEWLINK:
+	case RTM_DELLINK:
+	case RTM_GETLINK:
+	case RTM_SETLINK:
+		info = NLMSG_DATA(hdr);
+		*ifindex = info->ifi_index;
+		return 0;
+	}
+	return -1;
+}
+
+/**
+ * Read pending events from a Netlink socket.
+ *
+ * @param nlsk_fd
+ *  Netlink socket.
+ * @param cb
+ *  Callback invoked for each of the events.
+ * @param cb_arg
+ *  User data for the callback.
+ *
+ * @return
+ *  0 on success, including the case when there are no events.
+ *  Negative on failure and rte_errno is set.
+ */
+int
+mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg)
+{
+	char buf[8192];
+	struct sockaddr_nl addr;
+	struct iovec iov = {
+		.iov_base = buf,
+		.iov_len = sizeof(buf),
+	};
+	struct msghdr msg = {
+		.msg_name = &addr,
+		.msg_namelen = sizeof(addr),
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+	};
+	struct nlmsghdr *hdr;
+	ssize_t size;
+
+	while (1) {
+		size = recvmsg(nlsk_fd, &msg, MSG_DONTWAIT);
+		if (size < 0) {
+			if (errno == EAGAIN)
+				return 0;
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(DEBUG, "Failed to receive netlink message: %s",
+				strerror(errno));
+			rte_errno = errno;
+			return -rte_errno;
+		}
+		hdr = (struct nlmsghdr *)buf;
+		while (size >= (ssize_t)sizeof(*hdr)) {
+			ssize_t msg_len = hdr->nlmsg_len;
+			ssize_t data_len = msg_len - sizeof(*hdr);
+			ssize_t aligned_len;
+
+			if (data_len < 0) {
+				DRV_LOG(DEBUG, "Netlink message too short");
+				rte_errno = EINVAL;
+				return -rte_errno;
+			}
+			aligned_len = NLMSG_ALIGN(msg_len);
+			if (aligned_len > size) {
+				DRV_LOG(DEBUG, "Netlink message too long");
+				rte_errno = EINVAL;
+				return -rte_errno;
+			}
+			cb(hdr, cb_arg);
+			hdr = RTE_PTR_ADD(hdr, aligned_len);
+			size -= aligned_len;
+		}
+	}
+	return 0;
+}
diff --git a/drivers/common/mlx5/linux/mlx5_nl.h b/drivers/common/mlx5/linux/mlx5_nl.h
index 2063c0deeb..0b7552338a 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.h
+++ b/drivers/common/mlx5/linux/mlx5_nl.h
@@ -11,6 +11,7 @@
 
 #include "mlx5_common.h"
 
+typedef void (mlx5_nl_event_cb)(struct nlmsghdr *hdr, void *user_data);
 
 /* VLAN netdev for VLAN workaround. */
 struct mlx5_nl_vlan_dev {
@@ -30,7 +31,7 @@ struct mlx5_nl_vlan_vmwa_context {
 };
 
 __rte_internal
-int mlx5_nl_init(int protocol);
+int mlx5_nl_init(int protocol, int groups);
 __rte_internal
 int mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own,
 			 struct rte_ether_addr *mac, uint32_t index);
@@ -75,4 +76,9 @@ int mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr,
 int mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr,
 			    int enable);
 
+__rte_internal
+int mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg);
+__rte_internal
+int mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex);
+
 #endif /* RTE_PMD_MLX5_NL_H_ */
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index cb20a7d893..a23a30a6c0 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -127,9 +127,11 @@ INTERNAL {
 	mlx5_nl_mac_addr_flush; # WINDOWS_NO_EXPORT
 	mlx5_nl_mac_addr_remove; # WINDOWS_NO_EXPORT
 	mlx5_nl_mac_addr_sync; # WINDOWS_NO_EXPORT
+	mlx5_nl_parse_link_status_update; # WINDOWS_NO_EXPORT
 	mlx5_nl_port_state; # WINDOWS_NO_EXPORT
 	mlx5_nl_portnum; # WINDOWS_NO_EXPORT
 	mlx5_nl_promisc; # WINDOWS_NO_EXPORT
+	mlx5_nl_read_events; # WINDOWS_NO_EXPORT
 	mlx5_nl_switch_info; # WINDOWS_NO_EXPORT
 	mlx5_nl_vf_mac_addr_modify; # WINDOWS_NO_EXPORT
 	mlx5_nl_vlan_vmwa_create; # WINDOWS_NO_EXPORT
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index be95095521..17e7144cc9 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1117,7 +1117,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	sh = mlx5_alloc_shared_dev_ctx(spawn, mkvlist);
 	if (!sh)
 		return NULL;
-	nl_rdma = mlx5_nl_init(NETLINK_RDMA);
+	nl_rdma = mlx5_nl_init(NETLINK_RDMA, 0);
 	/* Check port status. */
 	if (spawn->phys_port <= UINT8_MAX) {
 		/* Legacy Verbs api only support u8 port number. */
@@ -1180,7 +1180,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	priv->mtu = RTE_ETHER_MTU;
 	/* Some internal functions rely on Netlink sockets, open them now. */
 	priv->nl_socket_rdma = nl_rdma;
-	priv->nl_socket_route =	mlx5_nl_init(NETLINK_ROUTE);
+	priv->nl_socket_route =	mlx5_nl_init(NETLINK_ROUTE, 0);
 	priv->representor = !!switch_info->representor;
 	priv->master = !!switch_info->master;
 	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
@@ -1927,8 +1927,8 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
 	 * matching ones, gathering into the list.
 	 */
 	struct ibv_device *ibv_match[ret + 1];
-	int nl_route = mlx5_nl_init(NETLINK_ROUTE);
-	int nl_rdma = mlx5_nl_init(NETLINK_RDMA);
+	int nl_route = mlx5_nl_init(NETLINK_ROUTE, 0);
+	int nl_rdma = mlx5_nl_init(NETLINK_RDMA, 0);
 	unsigned int i;
 
 	while (ret-- > 0) {
diff --git a/drivers/net/mlx5/linux/mlx5_vlan_os.c b/drivers/net/mlx5/linux/mlx5_vlan_os.c
index 80ccd5a460..81611a8d3f 100644
--- a/drivers/net/mlx5/linux/mlx5_vlan_os.c
+++ b/drivers/net/mlx5/linux/mlx5_vlan_os.c
@@ -135,7 +135,7 @@ mlx5_vlan_vmwa_init(struct rte_eth_dev *dev, uint32_t ifindex)
 		return NULL;
 	}
 	rte_spinlock_init(&vmwa->sl);
-	vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE);
+	vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE, 0);
 	if (vmwa->nl_socket < 0) {
 		DRV_LOG(WARNING,
 			"Can not create Netlink socket"
-- 
2.25.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 2/3] net/mlx5: fix link status change detection
       [not found] ` <20220301121514.41497-1-dkozlyuk@nvidia.com>
  2022-03-01 12:15   ` [PATCH v2 1/3] common/mlx5: add Netlink event helpers Dmitry Kozlyuk
@ 2022-03-01 12:15   ` Dmitry Kozlyuk
  2022-03-01 12:15   ` [PATCH v2 3/3] net/mlx5: fix initial link status detection Dmitry Kozlyuk
  2 siblings, 0 replies; 10+ messages in thread
From: Dmitry Kozlyuk @ 2022-03-01 12:15 UTC (permalink / raw)
  To: dev; +Cc: stable, Viacheslav Ovsiienko, Matan Azrad

Sometimes net/mlx5 devices did not detect link status change to "up".

Each shared device was monitoring IBV_EVENT_PORT_{ACTIVE,ERR}
and queried the link status upon receiving the event.
IBV_EVENT_PORT_ACTIVE is delivered when the logical link status
(UP flag) is set, but the physical link status (RUNNING flag)
may be down at that time, in which case the new link status
would be erroneously considered down.

IBV interface is insufficient for the task.
Monitor interface events using Netlink.

Fixes: 198a3c339a8f ("mlx5: handle link status interrupts")
Cc: stable@dpdk.org

Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
Coverity complains about unchecked rte_intr_fd/type_set() return values.
That would be useless, because rte_intr_handle is in a known state.

 drivers/net/mlx5/linux/mlx5_ethdev_os.c | 63 ++++++++++++++++++++-----
 drivers/net/mlx5/linux/mlx5_os.c        | 55 +++++++++++++++++++++
 drivers/net/mlx5/mlx5.c                 |  1 +
 drivers/net/mlx5/mlx5.h                 |  3 ++
 drivers/net/mlx5/mlx5_trigger.c         | 12 ++++-
 5 files changed, 120 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index c19825ee52..8fe73f1adb 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -38,6 +38,7 @@
 #include <mlx5_devx_cmds.h>
 #include <mlx5_common.h>
 #include <mlx5_malloc.h>
+#include <mlx5_nl.h>
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
@@ -760,6 +761,56 @@ mlx5_dev_interrupt_device_fatal(struct mlx5_dev_ctx_shared *sh)
 	}
 }
 
+static void
+mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg)
+{
+	struct mlx5_dev_ctx_shared *sh = cb_arg;
+	uint32_t i;
+	uint32_t if_index;
+
+	if (mlx5_nl_parse_link_status_update(hdr, &if_index) < 0)
+		return;
+	for (i = 0; i < sh->max_port; i++) {
+		struct mlx5_dev_shared_port *port = &sh->port[i];
+		struct rte_eth_dev *dev;
+		struct mlx5_priv *priv;
+
+		if (port->nl_ih_port_id >= RTE_MAX_ETHPORTS)
+			continue;
+		dev = &rte_eth_devices[port->nl_ih_port_id];
+		/* Probing may initiate an LSC before configuration is done. */
+		if (dev->data->dev_configured &&
+		    !dev->data->dev_conf.intr_conf.lsc)
+			break;
+		priv = dev->data->dev_private;
+		if (priv->if_index == if_index) {
+			/* Block logical LSC events. */
+			uint16_t prev_status = dev->data->dev_link.link_status;
+
+			if (mlx5_link_update(dev, 0) < 0)
+				DRV_LOG(ERR, "Failed to update link status: %s",
+					rte_strerror(rte_errno));
+			else if (prev_status != dev->data->dev_link.link_status)
+				rte_eth_dev_callback_process
+					(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
+			break;
+		}
+	}
+}
+
+void
+mlx5_dev_interrupt_handler_nl(void *arg)
+{
+	struct mlx5_dev_ctx_shared *sh = arg;
+	int nlsk_fd = rte_intr_fd_get(sh->intr_handle_nl);
+
+	if (nlsk_fd < 0)
+		return;
+	if (mlx5_nl_read_events(nlsk_fd, mlx5_dev_interrupt_nl_cb, sh) < 0)
+		DRV_LOG(ERR, "Failed to process Netlink events: %s",
+			rte_strerror(rte_errno));
+}
+
 /**
  * Handle shared asynchronous events the NIC (removal event
  * and link status change). Supports multiport IB device.
@@ -823,18 +874,6 @@ mlx5_dev_interrupt_handler(void *cb_arg)
 		tmp = sh->port[tmp - 1].ih_port_id;
 		dev = &rte_eth_devices[tmp];
 		MLX5_ASSERT(dev);
-		if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
-		     event.event_type == IBV_EVENT_PORT_ERR) &&
-			dev->data->dev_conf.intr_conf.lsc) {
-			mlx5_glue->ack_async_event(&event);
-			if (mlx5_link_update(dev, 0) == -EAGAIN) {
-				usleep(0);
-				continue;
-			}
-			rte_eth_dev_callback_process
-				(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
-			continue;
-		}
 		DRV_LOG(DEBUG,
 			"port %u cannot handle an unknown event (type %d)",
 			dev->data->port_id, event.event_type);
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 17e7144cc9..29a4890d14 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -2495,6 +2495,40 @@ mlx5_os_net_cleanup(void)
 	mlx5_pmd_socket_uninit();
 }
 
+static int
+mlx5_os_dev_shared_handler_install_lsc(struct mlx5_dev_ctx_shared *sh)
+{
+	int nlsk_fd, flags, ret;
+
+	nlsk_fd = mlx5_nl_init(NETLINK_ROUTE, RTMGRP_LINK);
+	if (nlsk_fd < 0) {
+		DRV_LOG(ERR, "Failed to create a socket for Netlink events: %s",
+			rte_strerror(rte_errno));
+		return -1;
+	}
+	flags = fcntl(nlsk_fd, F_GETFL);
+	ret = fcntl(nlsk_fd, F_SETFL, flags | O_NONBLOCK);
+	if (ret != 0) {
+		DRV_LOG(ERR, "Failed to make Netlink event socket non-blocking: %s",
+			strerror(errno));
+		rte_errno = errno;
+		goto error;
+	}
+	rte_intr_type_set(sh->intr_handle_nl, RTE_INTR_HANDLE_EXT);
+	rte_intr_fd_set(sh->intr_handle_nl, nlsk_fd);
+	if (rte_intr_callback_register(sh->intr_handle_nl,
+				       mlx5_dev_interrupt_handler_nl,
+				       sh) != 0) {
+		DRV_LOG(ERR, "Failed to register Netlink events interrupt");
+		rte_intr_fd_set(sh->intr_handle_nl, -1);
+		goto error;
+	}
+	return 0;
+error:
+	close(nlsk_fd);
+	return -1;
+}
+
 /**
  * Install shared asynchronous device events handler.
  * This function is implemented to support event sharing
@@ -2532,6 +2566,18 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 			rte_intr_fd_set(sh->intr_handle, -1);
 		}
 	}
+	sh->intr_handle_nl = rte_intr_instance_alloc
+						(RTE_INTR_INSTANCE_F_SHARED);
+	if (sh->intr_handle_nl == NULL) {
+		DRV_LOG(ERR, "Fail to allocate intr_handle");
+		rte_errno = ENOMEM;
+		return;
+	}
+	rte_intr_fd_set(sh->intr_handle_nl, -1);
+	if (mlx5_os_dev_shared_handler_install_lsc(sh) < 0) {
+		DRV_LOG(INFO, "Fail to install the shared Netlink event handler.");
+		rte_intr_fd_set(sh->intr_handle_nl, -1);
+	}
 	if (sh->cdev->config.devx) {
 #ifdef HAVE_IBV_DEVX_ASYNC
 		sh->intr_handle_devx =
@@ -2579,10 +2625,19 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 void
 mlx5_os_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh)
 {
+	int nlsk_fd;
+
 	if (rte_intr_fd_get(sh->intr_handle) >= 0)
 		mlx5_intr_callback_unregister(sh->intr_handle,
 					      mlx5_dev_interrupt_handler, sh);
 	rte_intr_instance_free(sh->intr_handle);
+	nlsk_fd = rte_intr_fd_get(sh->intr_handle_nl);
+	if (nlsk_fd >= 0) {
+		mlx5_intr_callback_unregister
+			(sh->intr_handle_nl, mlx5_dev_interrupt_handler_nl, sh);
+		close(nlsk_fd);
+	}
+	rte_intr_instance_free(sh->intr_handle_nl);
 #ifdef HAVE_IBV_DEVX_ASYNC
 	if (rte_intr_fd_get(sh->intr_handle_devx) >= 0)
 		rte_intr_callback_unregister(sh->intr_handle_devx,
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 74841caaf9..09cd1367db 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1457,6 +1457,7 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
 	for (i = 0; i < sh->max_port; i++) {
 		sh->port[i].ih_port_id = RTE_MAX_ETHPORTS;
 		sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS;
+		sh->port[i].nl_ih_port_id = RTE_MAX_ETHPORTS;
 	}
 	if (sh->cdev->config.devx) {
 		sh->td = mlx5_devx_cmd_create_td(sh->cdev->ctx);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 9a4aff97cb..0f0045a2b5 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -637,6 +637,7 @@ struct mlx5_age_info {
 struct mlx5_dev_shared_port {
 	uint32_t ih_port_id;
 	uint32_t devx_ih_port_id;
+	uint32_t nl_ih_port_id;
 	/*
 	 * Interrupt handler port_id. Used by shared interrupt
 	 * handler to find the corresponding rte_eth device
@@ -1239,6 +1240,7 @@ struct mlx5_dev_ctx_shared {
 	/* Shared interrupt handler section. */
 	struct rte_intr_handle *intr_handle; /* Interrupt handler for device. */
 	struct rte_intr_handle *intr_handle_devx; /* DEVX interrupt handler. */
+	struct rte_intr_handle *intr_handle_nl; /* Netlink interrupt handler. */
 	void *devx_comp; /* DEVX async comp obj. */
 	struct mlx5_devx_obj *tis[16]; /* TIS object. */
 	struct mlx5_devx_obj *td; /* Transport domain. */
@@ -1666,6 +1668,7 @@ int mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev,
 			   struct rte_eth_fc_conf *fc_conf);
 void mlx5_dev_interrupt_handler(void *arg);
 void mlx5_dev_interrupt_handler_devx(void *arg);
+void mlx5_dev_interrupt_handler_nl(void *arg);
 int mlx5_set_link_down(struct rte_eth_dev *dev);
 int mlx5_set_link_up(struct rte_eth_dev *dev);
 int mlx5_is_removed(struct rte_eth_dev *dev);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index fe8b42c414..c68b32cf14 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -1207,11 +1207,18 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		priv->sh->port[priv->dev_port - 1].ih_port_id =
 					(uint32_t)dev->data->port_id;
 	} else {
-		DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
+		DRV_LOG(INFO, "port %u starts without RMV interrupts.",
 			dev->data->port_id);
-		dev->data->dev_conf.intr_conf.lsc = 0;
 		dev->data->dev_conf.intr_conf.rmv = 0;
 	}
+	if (rte_intr_fd_get(priv->sh->intr_handle_nl) >= 0) {
+		priv->sh->port[priv->dev_port - 1].nl_ih_port_id =
+					(uint32_t)dev->data->port_id;
+	} else {
+		DRV_LOG(INFO, "port %u starts without LSC interrupts.",
+			dev->data->port_id);
+		dev->data->dev_conf.intr_conf.lsc = 0;
+	}
 	if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
 		priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
 					(uint32_t)dev->data->port_id;
@@ -1263,6 +1270,7 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	mlx5_rx_intr_vec_disable(dev);
 	priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
 	priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
+	priv->sh->port[priv->dev_port - 1].nl_ih_port_id = RTE_MAX_ETHPORTS;
 	mlx5_txq_stop(dev);
 	mlx5_rxq_stop(dev);
 	if (priv->obj_ops.lb_dummy_queue_release)
-- 
2.25.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 3/3] net/mlx5: fix initial link status detection
       [not found] ` <20220301121514.41497-1-dkozlyuk@nvidia.com>
  2022-03-01 12:15   ` [PATCH v2 1/3] common/mlx5: add Netlink event helpers Dmitry Kozlyuk
  2022-03-01 12:15   ` [PATCH v2 2/3] net/mlx5: fix link status change detection Dmitry Kozlyuk
@ 2022-03-01 12:15   ` Dmitry Kozlyuk
  2 siblings, 0 replies; 10+ messages in thread
From: Dmitry Kozlyuk @ 2022-03-01 12:15 UTC (permalink / raw)
  To: dev; +Cc: stable, Viacheslav Ovsiienko, Matan Azrad

Link status change takes time that depends on the HW and the kernel.
It was checked immediately after the change was issued at probing.
If the port had beed down before probing, a "down" state may be read,
while the port would be "up" imminently.
After that, DPDK reported the port as "down" mistakenly
and "ifconfig $DEV up" did not trigger an LSC event,
because from the system's perspective the port was "up" already.

Install Netlink event handler at port probe before requesting the port
to come up in order to receive LSC event even if it comes up
between probe and start.

Fixes: b6499434b83e ("net/mlx5: fix link status initialization")
Cc: stable@dpdk.org

Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 29a4890d14..ff65efb2a2 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1483,13 +1483,12 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	/* Bring Ethernet device up. */
 	DRV_LOG(DEBUG, "port %u forcing Ethernet interface up",
 		eth_dev->data->port_id);
-	mlx5_set_link_up(eth_dev);
-	/*
-	 * Even though the interrupt handler is not installed yet,
-	 * interrupts will still trigger on the async_fd from
-	 * Verbs context returned by ibv_open_device().
-	 */
+	/* Read link status in case it is up and there will be no event. */
 	mlx5_link_update(eth_dev, 0);
+	/* Watch LSC interrupts between port probe and port start. */
+	priv->sh->port[priv->dev_port - 1].nl_ih_port_id =
+							eth_dev->data->port_id;
+	mlx5_set_link_up(eth_dev);
 	for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
 		icfg[i].release_mem_en = !!sh->config.reclaim_mode;
 		if (sh->config.reclaim_mode)
-- 
2.25.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/3] common/mlx5: add Netlink event helpers
  2022-03-01 12:15   ` [PATCH v2 1/3] common/mlx5: add Netlink event helpers Dmitry Kozlyuk
@ 2022-03-02 15:49     ` Ferruh Yigit
  2022-03-02 15:56       ` Dmitry Kozlyuk
  0 siblings, 1 reply; 10+ messages in thread
From: Ferruh Yigit @ 2022-03-02 15:49 UTC (permalink / raw)
  To: Dmitry Kozlyuk, dev, Kevin Traynor, Luca Boccassi
  Cc: stable, Viacheslav Ovsiienko, Matan Azrad, Ray Kinsella

On 3/1/2022 12:15 PM, Dmitry Kozlyuk wrote:
> Introduce mlx5_nl_read_events() to read Netlink events
> (technically, messages) from a socket that was configured
> to listen for them via a new mlx5_nl_init() parameter.
> Add mlx5_nl_parse_link_status_update() helper
> to extract information from link-related events.
> This patch is a shared base for later fixes.
> 
> Cc: stable@dpdk.org
> 

Hi Dmitry,

For clarification, this patch is not fix, but it is requested
to be backported to be able to backport fixes in this patchset,
right?

This looks OK to me, but cc'ed LTS maintainers in case they
have objection.

> Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
> Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>


<...>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v2 1/3] common/mlx5: add Netlink event helpers
  2022-03-02 15:49     ` Ferruh Yigit
@ 2022-03-02 15:56       ` Dmitry Kozlyuk
  2022-03-08 13:48         ` Kevin Traynor
  0 siblings, 1 reply; 10+ messages in thread
From: Dmitry Kozlyuk @ 2022-03-02 15:56 UTC (permalink / raw)
  To: Ferruh Yigit, dev, Kevin Traynor, Luca Boccassi
  Cc: stable, Slava Ovsiienko, Matan Azrad, Ray Kinsella

Hi Ferruh,

> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@intel.com>
[...]
> Hi Dmitry,
> 
> For clarification, this patch is not fix, but it is requested
> to be backported to be able to backport fixes in this patchset,
> right?

Yes. 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/3] common/mlx5: add Netlink event helpers
  2022-03-02 15:56       ` Dmitry Kozlyuk
@ 2022-03-08 13:48         ` Kevin Traynor
  2022-03-08 15:18           ` Dmitry Kozlyuk
  0 siblings, 1 reply; 10+ messages in thread
From: Kevin Traynor @ 2022-03-08 13:48 UTC (permalink / raw)
  To: Dmitry Kozlyuk, Ferruh Yigit, dev, Luca Boccassi
  Cc: stable, Slava Ovsiienko, Matan Azrad, Ray Kinsella

On 02/03/2022 15:56, Dmitry Kozlyuk wrote:
> Hi Ferruh,
> 
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@intel.com>
> [...]
>> Hi Dmitry,
>>
>> For clarification, this patch is not fix, but it is requested
>> to be backported to be able to backport fixes in this patchset,
>> right?
> 
> Yes.

The updated API is internal so that should be ok. I'm ok to take this on 
21.11 as long as you can confirm it's not breaking any user 
compatibility with external sw versions/ABI/API etc from 21.11.0 ?

Assuming that's ok, please send a rebased series for 21.11. I'm not 
comfortable rebasing the series with the amount of changes on dpdk main 
branch to same functions in mlx5_os.c.

P.S. Better to rebase on patch queue [0] to avoid conflicts with other 
backports not pushed to dpdk.org yet.

thanks,
Kevin.

[0] https://github.com/kevintraynor/dpdk-stable.git


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v2 1/3] common/mlx5: add Netlink event helpers
  2022-03-08 13:48         ` Kevin Traynor
@ 2022-03-08 15:18           ` Dmitry Kozlyuk
  0 siblings, 0 replies; 10+ messages in thread
From: Dmitry Kozlyuk @ 2022-03-08 15:18 UTC (permalink / raw)
  To: Kevin Traynor, Ferruh Yigit, dev, Luca Boccassi
  Cc: stable, Slava Ovsiienko, Matan Azrad, Ray Kinsella

Hi Kevin,

> -----Original Message-----
> From: Kevin Traynor <ktraynor@redhat.com>
[...]
> The updated API is internal so that should be ok. I'm ok to take this
> on
> 21.11 as long as you can confirm it's not breaking any user
> compatibility with external sw versions/ABI/API etc from 21.11.0 ?
> 
> Assuming that's ok, please send a rebased series for 21.11. I'm not
> comfortable rebasing the series with the amount of changes on dpdk
> main branch to same functions in mlx5_os.c.

Changes are not breaking any external SW.
Backport sent:
http://inbox.dpdk.org/stable/20220308151044.1012413-1-dkozlyuk@nvidia.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-03-08 15:18 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20220223164333.3834590-1-dkozlyuk@nvidia.com>
2022-02-23 16:43 ` [PATCH 1/3] common/mlx5: add Netlink event helpers Dmitry Kozlyuk
2022-02-23 16:43 ` [PATCH 2/3] net/mlx5: fix link status change detection Dmitry Kozlyuk
2022-02-23 16:43 ` [PATCH 3/3] net/mlx5: fix initial link status detection Dmitry Kozlyuk
     [not found] ` <20220301121514.41497-1-dkozlyuk@nvidia.com>
2022-03-01 12:15   ` [PATCH v2 1/3] common/mlx5: add Netlink event helpers Dmitry Kozlyuk
2022-03-02 15:49     ` Ferruh Yigit
2022-03-02 15:56       ` Dmitry Kozlyuk
2022-03-08 13:48         ` Kevin Traynor
2022-03-08 15:18           ` Dmitry Kozlyuk
2022-03-01 12:15   ` [PATCH v2 2/3] net/mlx5: fix link status change detection Dmitry Kozlyuk
2022-03-01 12:15   ` [PATCH v2 3/3] net/mlx5: fix initial link status detection Dmitry Kozlyuk

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).