* [PATCH] net/mlx5: fix min and max MTU reporting
@ 2025-07-16 10:25 Dariusz Sosnowski
0 siblings, 0 replies; only message in thread
From: Dariusz Sosnowski @ 2025-07-16 10:25 UTC (permalink / raw)
To: Viacheslav Ovsiienko, Bing Zhao, Ori Kam, Suanming Mou,
Matan Azrad, Didier Pallard, Adrien Mazarguil, Nelio Laranjeiro,
Francesco Santoro
Cc: dev, stable
mlx5 PMD used hardcoded and incorrect values when reporting
maximum MTU and maximum Rx packet length through rte_eth_dev_info_get().
This patch adds support for querying OS for minimum and maximum
allowed MTU values. Maximum Rx packet length is then calculated
based on these values.
On Linux, these values are queried through netlink,
using IFLA_MIN_MTU and IFLA_MAX_MTU attributes added in Linux 4.18.
Windows API unfortunately does not expose minimum and maximum
allowed MTU values. In this case, fallback hardcoded values
(working on currently supported HW) will be used.
Bugzilla ID: 1719
Fixes: e60fbd5b24fc ("mlx5: add device configure/start/stop")
Cc: stable@dpdk.org
Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
drivers/common/mlx5/linux/mlx5_nl.c | 109 ++++++++++++++++++++++
drivers/common/mlx5/linux/mlx5_nl.h | 3 +
drivers/net/mlx5/linux/mlx5_ethdev_os.c | 30 ++++++
drivers/net/mlx5/linux/mlx5_os.c | 2 +
drivers/net/mlx5/mlx5.h | 13 +++
drivers/net/mlx5/mlx5_ethdev.c | 42 ++++++++-
drivers/net/mlx5/windows/mlx5_ethdev_os.c | 28 ++++++
drivers/net/mlx5/windows/mlx5_os.c | 2 +
8 files changed, 228 insertions(+), 1 deletion(-)
diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c
index 86166e92d0..dd69e229e3 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.c
+++ b/drivers/common/mlx5/linux/mlx5_nl.c
@@ -2247,3 +2247,112 @@ mlx5_nl_rdma_monitor_cap_get(int nl, uint8_t *cap)
}
return 0;
}
+
+struct mlx5_mtu {
+ uint32_t min_mtu;
+ bool min_mtu_set;
+ uint32_t max_mtu;
+ bool max_mtu_set;
+};
+
+static int
+mlx5_nl_get_mtu_bounds_cb(struct nlmsghdr *nh, void *arg)
+{
+ size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ struct mlx5_mtu *out = arg;
+
+ while (off < nh->nlmsg_len) {
+ struct rtattr *ra = RTE_PTR_ADD(nh, off);
+ uint32_t *payload;
+
+ switch (ra->rta_type) {
+ case IFLA_MIN_MTU:
+ payload = RTA_DATA(ra);
+ out->min_mtu = *payload;
+ out->min_mtu_set = true;
+ break;
+ case IFLA_MAX_MTU:
+ payload = RTA_DATA(ra);
+ out->max_mtu = *payload;
+ out->max_mtu_set = true;
+ break;
+ default:
+ /* Nothing to do for other attributes. */
+ break;
+ }
+ off += RTA_ALIGN(ra->rta_len);
+ }
+
+ return 0;
+}
+
+/**
+ * Query minimum and maximum allowed MTU values for given Linux network interface.
+ *
+ * This function queries the following interface attributes exposed in netlink since Linux 4.18:
+ *
+ * - IFLA_MIN_MTU - minimum allowed MTU
+ * - IFLA_MAX_MTU - maximum allowed MTU
+ *
+ * @param[in] nl
+ * Netlink socket of the ROUTE kind (NETLINK_ROUTE).
+ * @param[in] ifindex
+ * Linux network device index.
+ * @param[out] min_mtu
+ * Pointer to minimum allowed MTU. Populated only if both minimum and maximum MTU was queried.
+ * @param[out] max_mtu
+ * Pointer to maximum allowed MTU. Populated only if both minimum and maximum MTU was queried.
+ *
+ * @return
+ * 0 on success, negative on error and rte_errno is set.
+ *
+ * Known errors:
+ *
+ * - (-EINVAL) - either @p min_mtu or @p max_mtu is NULL.
+ * - (-ENOENT) - either minimum or maximum allowed MTU was not found in interface attributes.
+ */
+RTE_EXPORT_INTERNAL_SYMBOL(mlx5_nl_get_mtu_bounds)
+int
+mlx5_nl_get_mtu_bounds(int nl, unsigned int ifindex, uint16_t *min_mtu, uint16_t *max_mtu)
+{
+ struct mlx5_mtu out = { 0 };
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ } req = {
+ .nh = {
+ .nlmsg_len = NLMSG_LENGTH(sizeof(req.info)),
+ .nlmsg_type = RTM_GETLINK,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ },
+ .info = {
+ .ifi_family = AF_UNSPEC,
+ .ifi_index = ifindex,
+ },
+ };
+ uint32_t sn = MLX5_NL_SN_GENERATE;
+ int ret;
+
+ if (min_mtu == NULL || max_mtu == NULL) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+
+ ret = mlx5_nl_send(nl, &req.nh, sn);
+ if (ret < 0)
+ return ret;
+
+ ret = mlx5_nl_recv(nl, sn, mlx5_nl_get_mtu_bounds_cb, &out);
+ if (ret < 0)
+ return ret;
+
+ if (!out.min_mtu_set || !out.max_mtu_set) {
+ rte_errno = ENOENT;
+ return -rte_errno;
+ }
+
+ *min_mtu = out.min_mtu;
+ *max_mtu = out.max_mtu;
+
+ return ret;
+}
diff --git a/drivers/common/mlx5/linux/mlx5_nl.h b/drivers/common/mlx5/linux/mlx5_nl.h
index e32080fa63..26923a88fd 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.h
+++ b/drivers/common/mlx5/linux/mlx5_nl.h
@@ -117,4 +117,7 @@ void mlx5_nl_rdma_monitor_info_get(struct nlmsghdr *hdr, struct mlx5_nl_port_inf
__rte_internal
int mlx5_nl_rdma_monitor_cap_get(int nl, uint8_t *cap);
+__rte_internal
+int mlx5_nl_get_mtu_bounds(int nl, unsigned int ifindex, uint16_t *min_mtu, uint16_t *max_mtu);
+
#endif /* RTE_PMD_MLX5_NL_H_ */
diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index 9daeda5435..a371c2c747 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -159,6 +159,36 @@ mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr)
return mlx5_ifreq_by_ifname(ifname, req, ifr);
}
+/**
+ * Get device minimum and maximum allowed MTU values.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[out] min_mtu
+ * Minimum MTU value output buffer.
+ * @param[out] max_mtu
+ * Maximum MTU value output buffer.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_os_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ int nl_route;
+ int ret;
+
+ nl_route = mlx5_nl_init(NETLINK_ROUTE, 0);
+ if (nl_route < 0)
+ return nl_route;
+
+ ret = mlx5_nl_get_mtu_bounds(nl_route, priv->if_index, min_mtu, max_mtu);
+
+ close(nl_route);
+ return ret;
+}
+
/**
* Get device MTU.
*
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 696a3e12c7..2bc8ca9284 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1562,6 +1562,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
eth_dev->data->mac_addrs = priv->mac;
eth_dev->device = dpdk_dev;
eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
+ /* Fetch minimum and maximum allowed MTU from the device. */
+ mlx5_get_mtu_bounds(eth_dev, &priv->min_mtu, &priv->max_mtu);
/* Configure the first MAC address by default. */
if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) {
DRV_LOG(ERR,
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c08894cd03..53f0a27445 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -74,6 +74,15 @@
/* Maximal number of field/field parts to map into sample registers .*/
#define MLX5_FLEX_ITEM_MAPPING_NUM 32
+/* Number of bytes not included in MTU. */
+#define MLX5_ETH_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_VLAN_HLEN + RTE_ETHER_CRC_LEN)
+
+/* Minimum allowed MTU to be reported whenever PMD cannot query it from OS. */
+#define MLX5_ETH_MIN_MTU (RTE_ETHER_MIN_MTU)
+
+/* Maximum allowed MTU to be reported whenever PMD cannot query it from OS. */
+#define MLX5_ETH_MAX_MTU (9978)
+
enum mlx5_ipool_index {
#if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
MLX5_IPOOL_DECAP_ENCAP = 0, /* Pool for encap/decap resource. */
@@ -1981,6 +1990,8 @@ struct mlx5_priv {
unsigned int vlan_filter_n; /* Number of configured VLAN filters. */
/* Device properties. */
uint16_t mtu; /* Configured MTU. */
+ uint16_t min_mtu; /* Minimum MTU allowed on the NIC. */
+ uint16_t max_mtu; /* Maximum MTU allowed on the NIC. */
unsigned int isolated:1; /* Whether isolated mode is enabled. */
unsigned int representor:1; /* Device is a port representor. */
unsigned int master:1; /* Device is a E-Switch master. */
@@ -2333,6 +2344,7 @@ struct mlx5_priv *mlx5_dev_to_eswitch_info(struct rte_eth_dev *dev);
int mlx5_dev_configure_rss_reta(struct rte_eth_dev *dev);
uint64_t mlx5_get_restore_flags(struct rte_eth_dev *dev,
enum rte_eth_dev_operation op);
+void mlx5_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu);
/* mlx5_ethdev_os.c */
@@ -2372,6 +2384,7 @@ int mlx5_os_get_stats_n(struct rte_eth_dev *dev, bool bond_master,
uint16_t *n_stats, uint16_t *n_stats_sec);
void mlx5_os_stats_init(struct rte_eth_dev *dev);
int mlx5_get_flag_dropless_rq(struct rte_eth_dev *dev);
+int mlx5_os_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu);
/* mlx5_mac.c */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 68d1c1bfa7..7747b0c869 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -360,9 +360,11 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
unsigned int max;
uint16_t max_wqe;
+ info->min_mtu = priv->min_mtu;
+ info->max_mtu = priv->max_mtu;
+ info->max_rx_pktlen = info->max_mtu + MLX5_ETH_OVERHEAD;
/* FIXME: we should ask the device for these values. */
info->min_rx_bufsize = 32;
- info->max_rx_pktlen = 65536;
info->max_lro_pkt_size = MLX5_MAX_LRO_SIZE;
/*
* Since we need one CQ per QP, the limit is the minimum number
@@ -863,3 +865,41 @@ mlx5_get_restore_flags(__rte_unused struct rte_eth_dev *dev,
/* mlx5 PMD does not require any configuration restore. */
return 0;
}
+
+/**
+ * Query minimum and maximum allowed MTU value on the device.
+ *
+ * This functions will always return valid MTU bounds.
+ * In case platform-specific implementation fails or current platform does not support it,
+ * the fallback default values will be used.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device
+ * @param[out] min_mtu
+ * Minimum MTU value output buffer.
+ * @param[out] max_mtu
+ * Maximum MTU value output buffer.
+ */
+void
+mlx5_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu)
+{
+ int ret;
+
+ MLX5_ASSERT(min_mtu != NULL);
+ MLX5_ASSERT(max_mtu != NULL);
+
+ ret = mlx5_os_get_mtu_bounds(dev, min_mtu, max_mtu);
+ if (ret < 0) {
+ if (ret != -ENOTSUP)
+ DRV_LOG(INFO, "port %u failed to query MTU bounds, using fallback values",
+ dev->data->port_id);
+ *min_mtu = MLX5_ETH_MIN_MTU;
+ *max_mtu = MLX5_ETH_MAX_MTU;
+
+ /* This function does not fail. Clear rte_errno. */
+ rte_errno = 0;
+ }
+
+ DRV_LOG(INFO, "port %u minimum MTU is %u", dev->data->port_id, *min_mtu);
+ DRV_LOG(INFO, "port %u maximum MTU is %u", dev->data->port_id, *max_mtu);
+}
diff --git a/drivers/net/mlx5/windows/mlx5_ethdev_os.c b/drivers/net/mlx5/windows/mlx5_ethdev_os.c
index 49f750be68..4f43b95a09 100644
--- a/drivers/net/mlx5/windows/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/windows/mlx5_ethdev_os.c
@@ -71,6 +71,34 @@ mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[MLX5_NAMESIZE])
return 0;
}
+/**
+ * Get device minimum and maximum allowed MTU.
+ *
+ * Windows API does not expose minimum and maximum allowed MTU.
+ * In this case, this just returns (-ENOTSUP) to allow platform-independent code
+ * to fallback to default values.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[out] min_mtu
+ * Minimum MTU value output buffer.
+ * @param[out] max_mtu
+ * Maximum MTU value output buffer.
+ *
+ * @return
+ * (-ENOTSUP) - not supported on Windows
+ */
+int
+mlx5_os_get_mtu_bounds(struct rte_eth_dev *dev, uint16_t *min_mtu, uint16_t *max_mtu)
+{
+ RTE_SET_USED(dev);
+ RTE_SET_USED(min_mtu);
+ RTE_SET_USED(max_mtu);
+
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+}
+
/**
* Get device MTU.
*
diff --git a/drivers/net/mlx5/windows/mlx5_os.c b/drivers/net/mlx5/windows/mlx5_os.c
index d583730066..c4e3430bdc 100644
--- a/drivers/net/mlx5/windows/mlx5_os.c
+++ b/drivers/net/mlx5/windows/mlx5_os.c
@@ -477,6 +477,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
eth_dev->data->mac_addrs = priv->mac;
eth_dev->device = dpdk_dev;
eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
+ /* Fetch minimum and maximum allowed MTU from the device. */
+ mlx5_get_mtu_bounds(eth_dev, &priv->min_mtu, &priv->max_mtu);
/* Configure the first MAC address by default. */
if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) {
DRV_LOG(ERR,
--
2.39.5
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2025-07-16 10:26 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-07-16 10:25 [PATCH] net/mlx5: fix min and max MTU reporting Dariusz Sosnowski
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).