patches for DPDK stable branches
 help / color / mirror / Atom feed
* [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11
@ 2021-08-16 16:29 Bing Zhao
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 1/6] app/testpmd: fix offloads for newly attached port Bing Zhao
                   ` (7 more replies)
  0 siblings, 8 replies; 11+ messages in thread
From: Bing Zhao @ 2021-08-16 16:29 UTC (permalink / raw)
  To: stable, christian.ehrhardt; +Cc: viacheslavo, matan

This patch set contains 6 fixes backported from upstream to 19.11
stable branch.

Viacheslav Ovsiienko (6):
  app/testpmd: fix offloads for newly attached port
  common/mlx5: fix compatibility with OFED port query API
  net/mlx5: fix switchdev mode recognition
  net/mlx5: fix RoCE LAG bond device probing
  common/mlx5: use new port query API if available
  net/mlx5: fix multi-segment inline for the first segments

 app/test-pmd/testpmd.c          | 145 ++++++++++++++------------------
 drivers/net/mlx5/Makefile       |  10 +++
 drivers/net/mlx5/meson.build    |   4 +
 drivers/net/mlx5/mlx5.c         |  94 ++++++++++-----------
 drivers/net/mlx5/mlx5_flow_dv.c |   2 +-
 drivers/net/mlx5/mlx5_glue.c    |  57 ++++++++++---
 drivers/net/mlx5/mlx5_glue.h    |  16 +++-
 drivers/net/mlx5/mlx5_rxtx.c    |  27 +++---
 8 files changed, 202 insertions(+), 153 deletions(-)

-- 
2.21.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [dpdk-stable] [PATCH 19.11 1/6] app/testpmd: fix offloads for newly attached port
  2021-08-16 16:29 [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11 Bing Zhao
@ 2021-08-16 16:29 ` Bing Zhao
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 2/6] common/mlx5: fix compatibility with OFED port query API Bing Zhao
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Bing Zhao @ 2021-08-16 16:29 UTC (permalink / raw)
  To: stable, christian.ehrhardt
  Cc: viacheslavo, matan, Aman Deep Singh, Xiaoyun Li

From: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

[ upstream commit b6b8a1ebd4dadc82733ce4b0a711da918c386115 ]

For the newly attached ports (with "port attach" command) the
default offloads settings, configured from application command
line, were not applied, causing port start failure following
the attach.

For example, if scattering offload was configured in command
line and rxpkts was configured for multiple segments, the newly
attached port start was failed due to missing scattering offload
enable in the new port settings. The missing code to apply
the offloads to the new device and its queues is added.

The new local routine init_config_port_offloads() is introduced,
embracing the shared part of port offloads initialization code.

Fixes: c9cce42876f5 ("ethdev: remove deprecated attach/detach functions")
Cc: stable@dpdk.org

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Aman Deep Singh <aman.deep.singh@intel.com>
Acked-by: Xiaoyun Li <xiaoyun.li@intel.com>
Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 app/test-pmd/testpmd.c | 145 ++++++++++++++++++-----------------------
 1 file changed, 65 insertions(+), 80 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 9485953aba..ea25e9a984 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -1289,23 +1289,69 @@ check_nb_hairpinq(queueid_t hairpinq)
 	return 0;
 }
 
+static void
+init_config_port_offloads(portid_t pid, uint32_t socket_id)
+{
+	struct rte_port *port = &ports[pid];
+	uint16_t data_size;
+	int ret;
+	int i;
+
+	port->dev_conf.txmode = tx_mode;
+	port->dev_conf.rxmode = rx_mode;
+
+	ret = eth_dev_info_get_print_err(pid, &port->dev_info);
+	if (ret != 0)
+		rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
+
+	ret = update_jumbo_frame_offload(pid);
+	if (ret != 0)
+		printf("Updating jumbo frame offload failed for port %u\n",
+			pid);
+
+	if (!(port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE))
+		port->dev_conf.txmode.offloads &=
+			~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
+
+	/* Apply Rx offloads configuration */
+	for (i = 0; i < port->dev_info.max_rx_queues; i++)
+		port->rx_conf[i].offloads = port->dev_conf.rxmode.offloads;
+	/* Apply Tx offloads configuration */
+	for (i = 0; i < port->dev_info.max_tx_queues; i++)
+		port->tx_conf[i].offloads = port->dev_conf.txmode.offloads;
+
+	/* set flag to initialize port/queue */
+	port->need_reconfig = 1;
+	port->need_reconfig_queues = 1;
+	port->socket_id = socket_id;
+	port->tx_metadata = 0;
+
+	/*
+	 * Check for maximum number of segments per MTU.
+	 * Accordingly update the mbuf data size.
+	 */
+	if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
+	    port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
+		data_size = rx_mode.max_rx_pkt_len /
+			port->dev_info.rx_desc_lim.nb_mtu_seg_max;
+
+		if ((data_size + RTE_PKTMBUF_HEADROOM) > mbuf_data_size) {
+			mbuf_data_size = data_size + RTE_PKTMBUF_HEADROOM;
+			TESTPMD_LOG(WARNING, "Configured mbuf size %hu\n",
+				    mbuf_data_size);
+		}
+	}
+}
+
 static void
 init_config(void)
 {
 	portid_t pid;
-	struct rte_port *port;
 	struct rte_mempool *mbp;
 	unsigned int nb_mbuf_per_pool;
 	lcoreid_t  lc_id;
-	uint8_t port_per_socket[RTE_MAX_NUMA_NODES];
 	struct rte_gro_param gro_param;
 	uint32_t gso_types;
-	uint16_t data_size;
-	bool warning = 0;
-	int k;
-	int ret;
-
-	memset(port_per_socket,0,RTE_MAX_NUMA_NODES);
 
 	/* Configuration of logical cores. */
 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
@@ -1327,30 +1373,12 @@ init_config(void)
 	}
 
 	RTE_ETH_FOREACH_DEV(pid) {
-		port = &ports[pid];
-		/* Apply default TxRx configuration for all ports */
-		port->dev_conf.txmode = tx_mode;
-		port->dev_conf.rxmode = rx_mode;
+		uint32_t socket_id;
 
-		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
-		if (ret != 0)
-			rte_exit(EXIT_FAILURE,
-				 "rte_eth_dev_info_get() failed\n");
-
-		ret = update_jumbo_frame_offload(pid);
-		if (ret != 0)
-			printf("Updating jumbo frame offload failed for port %u\n",
-				pid);
-
-		if (!(port->dev_info.tx_offload_capa &
-		      DEV_TX_OFFLOAD_MBUF_FAST_FREE))
-			port->dev_conf.txmode.offloads &=
-				~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
 		if (numa_support) {
-			if (port_numa[pid] != NUMA_NO_CONFIG)
-				port_per_socket[port_numa[pid]]++;
-			else {
-				uint32_t socket_id = rte_eth_dev_socket_id(pid);
+			socket_id = port_numa[pid];
+			if (port_numa[pid] == NUMA_NO_CONFIG) {
+				socket_id = rte_eth_dev_socket_id(pid);
 
 				/*
 				 * if socket_id is invalid,
@@ -1358,45 +1386,15 @@ init_config(void)
 				 */
 				if (check_socket_id(socket_id) < 0)
 					socket_id = socket_ids[0];
-				port_per_socket[socket_id]++;
-			}
-		}
-
-		/* Apply Rx offloads configuration */
-		for (k = 0; k < port->dev_info.max_rx_queues; k++)
-			port->rx_conf[k].offloads =
-				port->dev_conf.rxmode.offloads;
-		/* Apply Tx offloads configuration */
-		for (k = 0; k < port->dev_info.max_tx_queues; k++)
-			port->tx_conf[k].offloads =
-				port->dev_conf.txmode.offloads;
-
-		/* set flag to initialize port/queue */
-		port->need_reconfig = 1;
-		port->need_reconfig_queues = 1;
-		port->tx_metadata = 0;
-
-		/* Check for maximum number of segments per MTU. Accordingly
-		 * update the mbuf data size.
-		 */
-		if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
-				port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
-			data_size = rx_mode.max_rx_pkt_len /
-				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
-
-			if ((data_size + RTE_PKTMBUF_HEADROOM) >
-							mbuf_data_size) {
-				mbuf_data_size = data_size +
-						 RTE_PKTMBUF_HEADROOM;
-				warning = 1;
 			}
+		} else {
+			socket_id = (socket_num == UMA_NO_CONFIG) ?
+				    0 : socket_num;
 		}
+		/* Apply default TxRx configuration for all ports */
+		init_config_port_offloads(pid, socket_id);
 	}
 
-	if (warning)
-		TESTPMD_LOG(WARNING, "Configured mbuf size %hu\n",
-			    mbuf_data_size);
-
 	/*
 	 * Create pools of mbuf.
 	 * If NUMA support is disabled, create a single pool of mbuf in
@@ -1479,7 +1477,7 @@ init_config(void)
 #if defined RTE_LIBRTE_PMD_SOFTNIC
 	if (strcmp(cur_fwd_eng->fwd_mode_name, "softnic") == 0) {
 		RTE_ETH_FOREACH_DEV(pid) {
-			port = &ports[pid];
+			struct rte_port *port = &ports[pid];
 			const char *driver = port->dev_info.driver_name;
 
 			if (strcmp(driver, "net_softnic") == 0)
@@ -1494,21 +1492,8 @@ init_config(void)
 void
 reconfig(portid_t new_port_id, unsigned socket_id)
 {
-	struct rte_port *port;
-	int ret;
-
 	/* Reconfiguration of Ethernet ports. */
-	port = &ports[new_port_id];
-
-	ret = eth_dev_info_get_print_err(new_port_id, &port->dev_info);
-	if (ret != 0)
-		return;
-
-	/* set flag to initialize port/queue */
-	port->need_reconfig = 1;
-	port->need_reconfig_queues = 1;
-	port->socket_id = socket_id;
-
+	init_config_port_offloads(new_port_id, socket_id);
 	init_port_config();
 }
 
-- 
2.21.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [dpdk-stable] [PATCH 19.11 2/6] common/mlx5: fix compatibility with OFED port query API
  2021-08-16 16:29 [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11 Bing Zhao
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 1/6] app/testpmd: fix offloads for newly attached port Bing Zhao
@ 2021-08-16 16:29 ` Bing Zhao
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 3/6] net/mlx5: fix switchdev mode recognition Bing Zhao
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Bing Zhao @ 2021-08-16 16:29 UTC (permalink / raw)
  To: stable, christian.ehrhardt; +Cc: viacheslavo, matan

From: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

[ upstream commit 0fd928bbbae5e6f89376f9996d5340a861fc14e3 ]

The compilation flag HAVE_MLX5DV_DR_DEVX_PORT depends on presence
of mlx5dv_query_devx_port routine in rdma-core library.

The mlx5dv_query_devx_port routine exists only in OFED versions
of rdma-core library and is being planned to be removed and replaced
with Upstream compatible mlx5dv_query_port.

As mlx5dv_query_devx_port is being removed all the dependencies on
the HAVE_MLX5DV_DR_DEVX_PORT compilation flag are reconsidered.

The new compilation flag HAVE_MLX5DV_DR_CREATE_DEST_IB_PORT is for
backward compatibility with older OFED versions.

Fixes: 6cfe84fbe7b1 ("net/mlx5: fix port action for LAG")
Cc: stable@dpdk.org

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
Signed-off-by: Matan Azrad <matan@nvidia.com>
Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/Makefile       | 5 +++++
 drivers/net/mlx5/meson.build    | 2 ++
 drivers/net/mlx5/mlx5_flow_dv.c | 2 +-
 drivers/net/mlx5/mlx5_glue.c    | 2 +-
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 605975c245..3719f0f11e 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -178,6 +178,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		infiniband/mlx5dv.h \
 		func mlx5dv_query_devx_port \
 		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
+		HAVE_MLX5DV_DR_CREATE_DEST_IB_PORT \
+		infiniband/mlx5dv.h \
+		func mlx5dv_dr_action_create_dest_ib_port \
+		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
 		HAVE_IBV_DEVX_OBJ \
 		infiniband/mlx5dv.h \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index 139056cbe8..3336c58ce5 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -138,6 +138,8 @@ if build
 		'IBV_WQ_FLAG_RX_END_PADDING' ],
 		[ 'HAVE_MLX5DV_DR_DEVX_PORT', 'infiniband/mlx5dv.h',
 		'mlx5dv_query_devx_port' ],
+		[ 'HAVE_MLX5DV_DR_CREATE_DEST_IB_PORT', 'infiniband/mlx5dv.h',
+		'mlx5dv_dr_action_create_dest_ib_port' ],
 		[ 'HAVE_IBV_DEVX_OBJ', 'infiniband/mlx5dv.h',
 		'mlx5dv_devx_obj_create' ],
 		[ 'HAVE_IBV_FLOW_DEVX_COUNTERS', 'infiniband/mlx5dv.h',
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 42ae1bbe2c..44c47ce8e4 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -7117,7 +7117,7 @@ flow_dv_translate_action_port_id(struct rte_eth_dev *dev,
 					  RTE_FLOW_ERROR_TYPE_ACTION,
 					  NULL,
 					  "No eswitch info was found for port");
-#ifdef HAVE_MLX5DV_DR_DEVX_PORT
+#ifdef HAVE_MLX5DV_DR_CREATE_DEST_IB_PORT
 	/*
 	 * This parameter is transferred to
 	 * mlx5dv_dr_action_create_dest_ib_port().
diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c
index 65b63bd607..1553a9b41c 100644
--- a/drivers/net/mlx5/mlx5_glue.c
+++ b/drivers/net/mlx5/mlx5_glue.c
@@ -393,7 +393,7 @@ mlx5_glue_dr_create_flow_action_dest_flow_tbl(void *tbl)
 static void *
 mlx5_glue_dr_create_flow_action_dest_port(void *domain, uint32_t port)
 {
-#ifdef HAVE_MLX5DV_DR_DEVX_PORT
+#ifdef HAVE_MLX5DV_DR_CREATE_DEST_IB_PORT
 	return mlx5dv_dr_action_create_dest_ib_port(domain, port);
 #else
 #ifdef HAVE_MLX5DV_DR_ESWITCH
-- 
2.21.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [dpdk-stable] [PATCH 19.11 3/6] net/mlx5: fix switchdev mode recognition
  2021-08-16 16:29 [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11 Bing Zhao
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 1/6] app/testpmd: fix offloads for newly attached port Bing Zhao
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 2/6] common/mlx5: fix compatibility with OFED port query API Bing Zhao
@ 2021-08-16 16:29 ` Bing Zhao
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 4/6] net/mlx5: fix RoCE LAG bond device probing Bing Zhao
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Bing Zhao @ 2021-08-16 16:29 UTC (permalink / raw)
  To: stable, christian.ehrhardt; +Cc: viacheslavo, matan

From: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

[ upstream commit 6b157f3bfa64b734a069772bdbb4a3d3a8d26b83 ]

The new kernels might add the switch_id attribute to the
Netlink replies and this caused the wrong recognition
of the E-Switch presence. The single uplink device was
erroneously recognized as master and it caused the
extending match for source vport index on all installed
flows, including the default ones, and adding extra hops
in the steering engine, that affected the maximal
throughput packet rate.

The extra check for the new device name format (it supposes
the new kernel) and the device is only one is added. If this
check succeeds the E-Switch presence is considered as wrongly
detected and overridden.

Fixes: 30a86157f6d5 ("net/mlx5: support PF representor")
Cc: stable@dpdk.org

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 3208b2eda7..3f14bd5419 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -3364,6 +3364,18 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 			ret = -rte_errno;
 			goto exit;
 		}
+		/*
+		 * New kernels may add the switch_id attribute for the case
+		 * there is no E-Switch and we wrongly recognized the
+		 * only device as master. Override this if there is the
+		 * single device with single port and new device name
+		 * format present.
+		 */
+		if (nd == 1 &&
+		    list[0].info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) {
+			list[0].info.master = 0;
+			list[0].info.representor = 0;
+		}
 	}
 	assert(ns);
 	/*
-- 
2.21.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [dpdk-stable] [PATCH 19.11 4/6] net/mlx5: fix RoCE LAG bond device probing
  2021-08-16 16:29 [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11 Bing Zhao
                   ` (2 preceding siblings ...)
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 3/6] net/mlx5: fix switchdev mode recognition Bing Zhao
@ 2021-08-16 16:29 ` Bing Zhao
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 5/6] common/mlx5: use new port query API if available Bing Zhao
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Bing Zhao @ 2021-08-16 16:29 UTC (permalink / raw)
  To: stable, christian.ehrhardt; +Cc: viacheslavo, matan

From: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

[ upstream commit 9f430dd75107d47fb9b67ab756503397b59b8955 ]

The RoCE LAG bond device requires neither E-Switch nor SR-IOV
configurations. It means the RoCE LAG bond device might be
presented as a single port Infiniband device.

The mlx5 PMD wrongly recognized standalone RoCE LAG bond device
as E-Switch configuration, this triggered the calls of E-Switch
ports related API and the latter failed (over the new OFED kernel
driver, starting since 5.4.1), causing the overall device probe
failure.

If there is a single port Infiniband bond device found the
E-Switch related flags must be cleared indicating standalone
configuration.

Also, it is not true anymore the bond device can exist
over E-Switch configurations only (as it was claimed for VF LAG
bond devices). The related checks are not relevant anymore
and removed.

Fixes: 790164ce1d2d ("net/mlx5: check kernel support for VF LAG bonding")
Cc: stable@dpdk.org

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 3f14bd5419..4696a1f2d1 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -3154,19 +3154,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 			goto exit;
 		}
 	}
-#ifndef HAVE_MLX5DV_DR_DEVX_PORT
-	if (bd >= 0) {
-		/*
-		 * This may happen if there is VF LAG kernel support and
-		 * application is compiled with older rdma_core library.
-		 */
-		DRV_LOG(ERR,
-			"No kernel/verbs support for VF LAG bonding found.");
-		rte_errno = ENOTSUP;
-		ret = -rte_errno;
-		goto exit;
-	}
-#endif
 	/*
 	 * Now we can determine the maximal
 	 * amount of devices to be spawned.
@@ -3230,6 +3217,15 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 			if (!ret && bd >= 0) {
 				switch (list[ns].info.name_type) {
 				case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+					if (np == 1) {
+						/*
+						 * Force standalone bonding
+						 * device for ROCE LAG
+						 * confgiurations.
+						 */
+						list[ns].info.master = 0;
+						list[ns].info.representor = 0;
+					}
 					if (list[ns].info.port_name == bd)
 						ns++;
 					break;
-- 
2.21.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [dpdk-stable] [PATCH 19.11 5/6] common/mlx5: use new port query API if available
  2021-08-16 16:29 [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11 Bing Zhao
                   ` (3 preceding siblings ...)
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 4/6] net/mlx5: fix RoCE LAG bond device probing Bing Zhao
@ 2021-08-16 16:29 ` Bing Zhao
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 6/6] net/mlx5: fix multi-segment inline for the first segments Bing Zhao
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Bing Zhao @ 2021-08-16 16:29 UTC (permalink / raw)
  To: stable, christian.ehrhardt; +Cc: viacheslavo, matan

From: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

[ upstream commit d0cf77e8c2b64319057f5f629a7a595ce6e8b556 ]

In order to get E-Switch vport identifiers the mlx5 PMD relies
on two approaches:
  [a] use port query API if it is provided by rdma-core library
  [b] otherwise, deduce vport ids from the related VF index
The latter is not reliable and may not work with newer kernel
drivers and in some configurations (LAG), causing E-Switch
malfunction. Hence, engaging the port query API is highly
desirable.

Depending on rdma-core version the port query API is:
  - very old OFED versions have no query API (approach [b])
  - rdma-core OFED < 5.5 provides mlx5dv_query_devx_port,
    HAVE_MLX5DV_DR_DEVX_PORT flag is defined (approach [a])
  - rdma-core OFED >= 5.5 has mlx5dv_query_port, flag
    HAVE_MLX5DV_DR_DEVX_PORT_V35 is defined (approach [a])
  - future OFED versions might remove mlx5dv_query_devx_port
    and HAVE_MLX5DV_DR_DEVX_PORT will not be defined
  - Upstream rdma-core < v35 has no port query API (approach [b])
  - Upstream rdma-core >= v35 has  mlx5dv_query_port, flag
    HAVE_MLX5DV_DR_DEVX_PORT_V35 is defined (approach [a])

In order to support the new mlx5dv_query_port routine, the
conditional compilation flag HAVE_MLX5DV_DR_DEVX_PORT_V35
is introduced by this patch. The flag HAVE_MLX5DV_DR_DEVX_PORT
is kept for compatibility with previous rdma-core versions.

Despite this patch is not a bugfix (it follows the introduced API
variation in underlying library), it resolves the compatibility
issue and is highly desired to be ported to DPDK LTS.

Cc: stable@dpdk.org

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/Makefile    |  5 +++
 drivers/net/mlx5/meson.build |  2 ++
 drivers/net/mlx5/mlx5.c      | 60 ++++++++++++++++--------------------
 drivers/net/mlx5/mlx5_glue.c | 55 +++++++++++++++++++++++++++------
 drivers/net/mlx5/mlx5_glue.h | 16 +++++++++-
 5 files changed, 94 insertions(+), 44 deletions(-)

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 3719f0f11e..d62bfd6875 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -178,6 +178,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		infiniband/mlx5dv.h \
 		func mlx5dv_query_devx_port \
 		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
+		HAVE_MLX5DV_DR_DEVX_PORT_V35 \
+		infiniband/mlx5dv.h \
+		func mlx5dv_query_port \
+		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
 		HAVE_MLX5DV_DR_CREATE_DEST_IB_PORT \
 		infiniband/mlx5dv.h \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index 3336c58ce5..81126d6a1b 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -138,6 +138,8 @@ if build
 		'IBV_WQ_FLAG_RX_END_PADDING' ],
 		[ 'HAVE_MLX5DV_DR_DEVX_PORT', 'infiniband/mlx5dv.h',
 		'mlx5dv_query_devx_port' ],
+		[ 'HAVE_MLX5DV_DR_DEVX_PORT_V35', 'infiniband/mlx5dv.h',
+		'mlx5dv_query_port' ],
 		[ 'HAVE_MLX5DV_DR_CREATE_DEST_IB_PORT', 'infiniband/mlx5dv.h',
 		'mlx5dv_dr_action_create_dest_ib_port' ],
 		[ 'HAVE_IBV_DEVX_OBJ', 'infiniband/mlx5dv.h',
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 4696a1f2d1..18fccecf3d 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -2175,9 +2175,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	int own_domain_id = 0;
 	uint16_t port_id;
 	unsigned int i;
-#ifdef HAVE_MLX5DV_DR_DEVX_PORT
-	struct mlx5dv_devx_port devx_port = { .comp_mask = 0 };
-#endif
+	struct mlx5_port_info vport_info = { .query_flags = 0 };
 
 	/* Determine if this port representor is supposed to be spawned. */
 	if (switch_info->representor && dpdk_dev->devargs) {
@@ -2411,28 +2409,26 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	priv->vport_meta_tag = 0;
 	priv->vport_meta_mask = 0;
 	priv->pf_bond = spawn->pf_bond;
-#ifdef HAVE_MLX5DV_DR_DEVX_PORT
 	/*
-	 * The DevX port query API is implemented. E-Switch may use
-	 * either vport or reg_c[0] metadata register to match on
-	 * vport index. The engaged part of metadata register is
-	 * defined by mask.
+	 * If we have E-Switch we should determine the vport attributes.
+	 * E-Switch may use either source vport field or reg_c[0] metadata
+	 * register to match on vport index. The engaged part of metadata
+	 * register is defined by mask.
 	 */
 	if (switch_info->representor || switch_info->master) {
-		devx_port.comp_mask = MLX5DV_DEVX_PORT_VPORT |
-				      MLX5DV_DEVX_PORT_MATCH_REG_C_0;
-		err = mlx5_glue->devx_port_query(sh->ctx, spawn->ibv_port,
-						 &devx_port);
+		err = mlx5_glue->devx_port_query(sh->ctx,
+						 spawn->ibv_port,
+						 &vport_info);
 		if (err) {
 			DRV_LOG(WARNING,
 				"can't query devx port %d on device %s",
 				spawn->ibv_port, spawn->ibv_dev->name);
-			devx_port.comp_mask = 0;
+			vport_info.query_flags = 0;
 		}
 	}
-	if (devx_port.comp_mask & MLX5DV_DEVX_PORT_MATCH_REG_C_0) {
-		priv->vport_meta_tag = devx_port.reg_c_0.value;
-		priv->vport_meta_mask = devx_port.reg_c_0.mask;
+	if (vport_info.query_flags & MLX5_PORT_QUERY_REG_C0) {
+		priv->vport_meta_tag = vport_info.vport_meta_tag;
+		priv->vport_meta_mask = vport_info.vport_meta_mask;
 		if (!priv->vport_meta_mask) {
 			DRV_LOG(ERR, "vport zero mask for port %d"
 				     " on bonding device %s",
@@ -2448,8 +2444,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			goto error;
 		}
 	}
-	if (devx_port.comp_mask & MLX5DV_DEVX_PORT_VPORT) {
-		priv->vport_id = devx_port.vport_num;
+	if (vport_info.query_flags & MLX5_PORT_QUERY_VPORT) {
+		priv->vport_id = vport_info.vport_id;
 	} else if (spawn->pf_bond >= 0 &&
 			(switch_info->representor || switch_info->master)) {
 		DRV_LOG(ERR, "can't deduce vport index for port %d"
@@ -2458,25 +2454,21 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		err = ENOTSUP;
 		goto error;
 	} else {
-		/* Suppose vport index in compatible way. */
+		/*
+		 * Suppose vport index in compatible way. Kernel/rdma_core
+		 * support single E-Switch per PF configurations only and
+		 * vport_id field contains the vport index for associated VF,
+		 * which is deduced from representor port name.
+		 * For example, let's have the IB device port 10, it has
+		 * attached network device eth0, which has port name attribute
+		 * pf0vf2, we can deduce the VF number as 2, and set vport index
+		 * as 3 (2+1). This assigning schema should be changed if the
+		 * multiple E-Switch instances per PF configurations or/and PCI
+		 * subfunctions are added.
+		 */
 		priv->vport_id = switch_info->representor ?
 				 switch_info->port_name + 1 : -1;
 	}
-#else
-	/*
-	 * Kernel/rdma_core support single E-Switch per PF configurations
-	 * only and vport_id field contains the vport index for
-	 * associated VF, which is deduced from representor port name.
-	 * For example, let's have the IB device port 10, it has
-	 * attached network device eth0, which has port name attribute
-	 * pf0vf2, we can deduce the VF number as 2, and set vport index
-	 * as 3 (2+1). This assigning schema should be changed if the
-	 * multiple E-Switch instances per PF configurations or/and PCI
-	 * subfunctions are added.
-	 */
-	priv->vport_id = switch_info->representor ?
-			 switch_info->port_name + 1 : -1;
-#endif
 	/* representor_id field keeps the unmodified VF index. */
 	priv->representor_id = switch_info->representor ?
 			       switch_info->port_name : -1;
diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c
index 1553a9b41c..33f604364a 100644
--- a/drivers/net/mlx5/mlx5_glue.c
+++ b/drivers/net/mlx5/mlx5_glue.c
@@ -1025,17 +1025,54 @@ mlx5_glue_devx_qp_query(struct ibv_qp *qp,
 static int
 mlx5_glue_devx_port_query(struct ibv_context *ctx,
 			  uint32_t port_num,
-			  struct mlx5dv_devx_port *mlx5_devx_port)
-{
+			  struct mlx5_port_info *info)
+{
+	int err = 0;
+
+	info->query_flags = 0;
+#ifdef HAVE_MLX5DV_DR_DEVX_PORT_V35
+	/* The DevX port query API is implemented (rdma-core v35 and above). */
+	struct mlx5_ib_uapi_query_port devx_port;
+
+	memset(&devx_port, 0, sizeof(devx_port));
+	err = mlx5dv_query_port(ctx, port_num, &devx_port);
+	if (err)
+		return err;
+	if (devx_port.flags & MLX5DV_QUERY_PORT_VPORT_REG_C0) {
+		info->vport_meta_tag = devx_port.reg_c0.value;
+		info->vport_meta_mask = devx_port.reg_c0.mask;
+		info->query_flags |= MLX5_PORT_QUERY_REG_C0;
+	}
+	if (devx_port.flags & MLX5DV_QUERY_PORT_VPORT) {
+		info->vport_id = devx_port.vport;
+		info->query_flags |= MLX5_PORT_QUERY_VPORT;
+	}
+#else
 #ifdef HAVE_MLX5DV_DR_DEVX_PORT
-	return mlx5dv_query_devx_port(ctx, port_num, mlx5_devx_port);
+	/* The legacy DevX port query API is implemented (prior v35). */
+	struct mlx5dv_devx_port devx_port = {
+		.comp_mask = MLX5DV_DEVX_PORT_VPORT |
+			     MLX5DV_DEVX_PORT_MATCH_REG_C_0
+	};
+
+	err = mlx5dv_query_devx_port(ctx, port_num, &devx_port);
+	if (err)
+		return err;
+	if (devx_port.comp_mask & MLX5DV_DEVX_PORT_MATCH_REG_C_0) {
+		info->vport_meta_tag = devx_port.reg_c_0.value;
+		info->vport_meta_mask = devx_port.reg_c_0.mask;
+		info->query_flags |= MLX5_PORT_QUERY_REG_C0;
+	}
+	if (devx_port.comp_mask & MLX5DV_DEVX_PORT_VPORT) {
+		info->vport_id = devx_port.vport_num;
+		info->query_flags |= MLX5_PORT_QUERY_VPORT;
+	}
 #else
-	(void)ctx;
-	(void)port_num;
-	(void)mlx5_devx_port;
-	errno = ENOTSUP;
-	return errno;
-#endif
+	RTE_SET_USED(ctx);
+	RTE_SET_USED(port_num);
+#endif /* HAVE_MLX5DV_DR_DEVX_PORT */
+#endif /* HAVE_MLX5DV_DR_DEVX_PORT_V35 */
+	return err;
 }
 
 alignas(RTE_CACHE_LINE_SIZE)
diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h
index 9895e55974..a639ab28b0 100644
--- a/drivers/net/mlx5/mlx5_glue.h
+++ b/drivers/net/mlx5/mlx5_glue.h
@@ -81,6 +81,20 @@ struct mlx5dv_dr_domain;
 struct mlx5dv_devx_port;
 #endif
 
+#ifndef HAVE_MLX5DV_DR_DEVX_PORT_V35
+struct mlx5dv_port;
+#endif
+
+#define MLX5_PORT_QUERY_VPORT (1u << 0)
+#define MLX5_PORT_QUERY_REG_C0 (1u << 1)
+
+struct mlx5_port_info {
+	uint16_t query_flags;
+	uint16_t vport_id; /* Associated VF vport index (if any). */
+	uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */
+	uint32_t vport_meta_mask; /* Used for vport index field match mask. */
+};
+
 #ifndef HAVE_MLX5_DR_CREATE_ACTION_FLOW_METER
 struct mlx5dv_dr_flow_meter_attr;
 #endif
@@ -255,7 +269,7 @@ struct mlx5_glue {
 			     void *out, size_t outlen);
 	int (*devx_port_query)(struct ibv_context *ctx,
 			       uint32_t port_num,
-			       struct mlx5dv_devx_port *mlx5_devx_port);
+			       struct mlx5_port_info *info);
 };
 
 extern const struct mlx5_glue *mlx5_glue;
-- 
2.21.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [dpdk-stable] [PATCH 19.11 6/6] net/mlx5: fix multi-segment inline for the first segments
  2021-08-16 16:29 [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11 Bing Zhao
                   ` (4 preceding siblings ...)
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 5/6] common/mlx5: use new port query API if available Bing Zhao
@ 2021-08-16 16:29 ` Bing Zhao
  2021-08-17 11:55   ` Christian Ehrhardt
  2021-08-17  9:42 ` [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11 Christian Ehrhardt
  2021-08-17 13:54 ` [dpdk-stable] [PATCH 19.11 v2] net/mlx5: fix multi-segment inline for the first segments Bing Zhao
  7 siblings, 1 reply; 11+ messages in thread
From: Bing Zhao @ 2021-08-16 16:29 UTC (permalink / raw)
  To: stable, christian.ehrhardt; +Cc: viacheslavo, matan

From: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

[ upstream commit ec837ad0fc7c6df4912cc2706b9cd54b225f4a34 ]

Before 19.08 release the Tx burst routines of mlx5 PMD
provided data inline for the first short segments of the
multi-segment packets. In the release 19.08 mlx5 Tx datapath
was refactored and this behavior was broken, affecting the
performance.

For example, the T-Rex traffic generator might use small
leading segments to handle packet headers and performance
degradation was noticed.

If the first segments of the multi-segment packet are short
and the overall length is below the inline threshold it
should be inline into the WQE to fix the performance.

Fixes: 18a1c20044c0 ("net/mlx5: implement Tx burst template")
Cc: stable@dpdk.org

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 73dbf68d2b..094e359e55 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -3336,6 +3336,8 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq,
 		unsigned int nxlen;
 		uintptr_t start;
 
+		mbuf = loc->mbuf;
+		nxlen = rte_pktmbuf_data_len(mbuf);
 		/*
 		 * Packet length exceeds the allowed inline
 		 * data length, check whether the minimal
@@ -3345,27 +3347,23 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq,
 			assert(txq->inlen_mode >= MLX5_ESEG_MIN_INLINE_SIZE);
 			assert(txq->inlen_mode <= txq->inlen_send);
 			inlen = txq->inlen_mode;
-		} else {
-			if (!vlan || txq->vlan_en) {
-				/*
-				 * VLAN insertion will be done inside by HW.
-				 * It is not utmost effective - VLAN flag is
-				 * checked twice, but we should proceed the
-				 * inlining length correctly and take into
-				 * account the VLAN header being inserted.
-				 */
-				return mlx5_tx_packet_multi_send
-							(txq, loc, olx);
-			}
+		} else if (vlan && !txq->vlan_en) {
+			/*
+			 * VLAN insertion is requested and hardware does not
+			 * support the offload, will do with software inline.
+			 */
 			inlen = MLX5_ESEG_MIN_INLINE_SIZE;
+		} else if (mbuf->ol_flags & PKT_TX_DYNF_NOINLINE ||
+			   nxlen > txq->inlen_send) {
+			return mlx5_tx_packet_multi_send(txq, loc, olx);
+		} else {
+			goto do_first;
 		}
 		/*
 		 * Now we know the minimal amount of data is requested
 		 * to inline. Check whether we should inline the buffers
 		 * from the chain beginning to eliminate some mbufs.
 		 */
-		mbuf = loc->mbuf;
-		nxlen = rte_pktmbuf_data_len(mbuf);
 		if (unlikely(nxlen <= txq->inlen_send)) {
 			/* We can inline first mbuf at least. */
 			if (nxlen < inlen) {
@@ -3387,6 +3385,7 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq,
 					goto do_align;
 				}
 			}
+do_first:
 			do {
 				inlen = nxlen;
 				mbuf = NEXT(mbuf);
-- 
2.21.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11
  2021-08-16 16:29 [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11 Bing Zhao
                   ` (5 preceding siblings ...)
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 6/6] net/mlx5: fix multi-segment inline for the first segments Bing Zhao
@ 2021-08-17  9:42 ` Christian Ehrhardt
  2021-08-17 13:54 ` [dpdk-stable] [PATCH 19.11 v2] net/mlx5: fix multi-segment inline for the first segments Bing Zhao
  7 siblings, 0 replies; 11+ messages in thread
From: Christian Ehrhardt @ 2021-08-17  9:42 UTC (permalink / raw)
  To: Bing Zhao; +Cc: dpdk stable, Viacheslav Ovsiienko, Matan Azrad

On Mon, Aug 16, 2021 at 6:30 PM Bing Zhao <bingz@nvidia.com> wrote:
>
> This patch set contains 6 fixes backported from upstream to 19.11
> stable branch.

Thank you, applied all six of them to the 19.11 WIP stable branch.

> Viacheslav Ovsiienko (6):
>   app/testpmd: fix offloads for newly attached port
>   common/mlx5: fix compatibility with OFED port query API
>   net/mlx5: fix switchdev mode recognition
>   net/mlx5: fix RoCE LAG bond device probing
>   common/mlx5: use new port query API if available
>   net/mlx5: fix multi-segment inline for the first segments
>
>  app/test-pmd/testpmd.c          | 145 ++++++++++++++------------------
>  drivers/net/mlx5/Makefile       |  10 +++
>  drivers/net/mlx5/meson.build    |   4 +
>  drivers/net/mlx5/mlx5.c         |  94 ++++++++++-----------
>  drivers/net/mlx5/mlx5_flow_dv.c |   2 +-
>  drivers/net/mlx5/mlx5_glue.c    |  57 ++++++++++---
>  drivers/net/mlx5/mlx5_glue.h    |  16 +++-
>  drivers/net/mlx5/mlx5_rxtx.c    |  27 +++---
>  8 files changed, 202 insertions(+), 153 deletions(-)
>
> --
> 2.21.0
>


-- 
Christian Ehrhardt
Staff Engineer, Ubuntu Server
Canonical Ltd

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [dpdk-stable] [PATCH 19.11 6/6] net/mlx5: fix multi-segment inline for the first segments
  2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 6/6] net/mlx5: fix multi-segment inline for the first segments Bing Zhao
@ 2021-08-17 11:55   ` Christian Ehrhardt
  2021-08-17 13:55     ` Bing Zhao
  0 siblings, 1 reply; 11+ messages in thread
From: Christian Ehrhardt @ 2021-08-17 11:55 UTC (permalink / raw)
  To: Bing Zhao; +Cc: dpdk stable, Viacheslav Ovsiienko, Matan Azrad

On Mon, Aug 16, 2021 at 6:30 PM Bing Zhao <bingz@nvidia.com> wrote:
>
> From: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
>
> [ upstream commit ec837ad0fc7c6df4912cc2706b9cd54b225f4a34 ]

While not applying this causes build fails on some platforms.

Looks like:
[  974s] ../drivers/net/mlx5/mlx5_rxtx.c: In function
‘mlx5_tx_packet_multi_inline’:
[  974s] ../drivers/net/mlx5/mlx5_rxtx.c:3356:31: error:
‘PKT_TX_DYNF_NOINLINE’ undeclared (first use in this function)
[  974s]  3356 |   } else if (mbuf->ol_flags & PKT_TX_DYNF_NOINLINE ||
[  974s]       |                               ^~~~~~~~~~~~~~~~~~~~
[  974s] ../drivers/net/mlx5/mlx5_rxtx.c:3356:31: note: each
undeclared identifier is reported only once for each function it
appears in
[  974s] ninja: build stopped: subcommand failed.

And indeed this is the only occurrence

$ grep -Hrn PKT_TX_DYNF_NOINLINE *
drivers/net/mlx5/mlx5_rxtx.c:3356: } else if (mbuf->ol_flags &
PKT_TX_DYNF_NOINLINE ||

Since it only happens on some releases I'd assume the other
arch/distros just do not build this?
It seems to only affect those building with meson.

For now I've removed this patch again from 19.11 - please have a look
and let me know if you'll provide a refreshed backport.



> Before 19.08 release the Tx burst routines of mlx5 PMD
> provided data inline for the first short segments of the
> multi-segment packets. In the release 19.08 mlx5 Tx datapath
> was refactored and this behavior was broken, affecting the
> performance.
>
> For example, the T-Rex traffic generator might use small
> leading segments to handle packet headers and performance
> degradation was noticed.
>
> If the first segments of the multi-segment packet are short
> and the overall length is below the inline threshold it
> should be inline into the WQE to fix the performance.
>
> Fixes: 18a1c20044c0 ("net/mlx5: implement Tx burst template")
> Cc: stable@dpdk.org
>
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> Signed-off-by: Bing Zhao <bingz@nvidia.com>
> ---
>  drivers/net/mlx5/mlx5_rxtx.c | 27 +++++++++++++--------------
>  1 file changed, 13 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 73dbf68d2b..094e359e55 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -3336,6 +3336,8 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq,
>                 unsigned int nxlen;
>                 uintptr_t start;
>
> +               mbuf = loc->mbuf;
> +               nxlen = rte_pktmbuf_data_len(mbuf);
>                 /*
>                  * Packet length exceeds the allowed inline
>                  * data length, check whether the minimal
> @@ -3345,27 +3347,23 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq,
>                         assert(txq->inlen_mode >= MLX5_ESEG_MIN_INLINE_SIZE);
>                         assert(txq->inlen_mode <= txq->inlen_send);
>                         inlen = txq->inlen_mode;
> -               } else {
> -                       if (!vlan || txq->vlan_en) {
> -                               /*
> -                                * VLAN insertion will be done inside by HW.
> -                                * It is not utmost effective - VLAN flag is
> -                                * checked twice, but we should proceed the
> -                                * inlining length correctly and take into
> -                                * account the VLAN header being inserted.
> -                                */
> -                               return mlx5_tx_packet_multi_send
> -                                                       (txq, loc, olx);
> -                       }
> +               } else if (vlan && !txq->vlan_en) {
> +                       /*
> +                        * VLAN insertion is requested and hardware does not
> +                        * support the offload, will do with software inline.
> +                        */
>                         inlen = MLX5_ESEG_MIN_INLINE_SIZE;
> +               } else if (mbuf->ol_flags & PKT_TX_DYNF_NOINLINE ||
> +                          nxlen > txq->inlen_send) {
> +                       return mlx5_tx_packet_multi_send(txq, loc, olx);
> +               } else {
> +                       goto do_first;
>                 }
>                 /*
>                  * Now we know the minimal amount of data is requested
>                  * to inline. Check whether we should inline the buffers
>                  * from the chain beginning to eliminate some mbufs.
>                  */
> -               mbuf = loc->mbuf;
> -               nxlen = rte_pktmbuf_data_len(mbuf);
>                 if (unlikely(nxlen <= txq->inlen_send)) {
>                         /* We can inline first mbuf at least. */
>                         if (nxlen < inlen) {
> @@ -3387,6 +3385,7 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq,
>                                         goto do_align;
>                                 }
>                         }
> +do_first:
>                         do {
>                                 inlen = nxlen;
>                                 mbuf = NEXT(mbuf);
> --
> 2.21.0
>


-- 
Christian Ehrhardt
Staff Engineer, Ubuntu Server
Canonical Ltd

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [dpdk-stable] [PATCH 19.11 v2] net/mlx5: fix multi-segment inline for the first segments
  2021-08-16 16:29 [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11 Bing Zhao
                   ` (6 preceding siblings ...)
  2021-08-17  9:42 ` [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11 Christian Ehrhardt
@ 2021-08-17 13:54 ` Bing Zhao
  7 siblings, 0 replies; 11+ messages in thread
From: Bing Zhao @ 2021-08-17 13:54 UTC (permalink / raw)
  To: stable, christian.ehrhardt; +Cc: viacheslavo, matan

From: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

[ upstream commit ec837ad0fc7c6df4912cc2706b9cd54b225f4a34 ]

Before 19.08 release the Tx burst routines of mlx5 PMD
provided data inline for the first short segments of the
multi-segment packets. In the release 19.08 mlx5 Tx datapath
was refactored and this behavior was broken, affecting the
performance.

For example, the T-Rex traffic generator might use small
leading segments to handle packet headers and performance
degradation was noticed.

If the first segments of the multi-segment packet are short
and the overall length is below the inline threshold it
should be inline into the WQE to fix the performance.

Fixes: 18a1c20044c0 ("net/mlx5: implement Tx burst template")
Cc: stable@dpdk.org

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 73dbf68d2b..8a6162ebd6 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -3336,6 +3336,8 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq,
 		unsigned int nxlen;
 		uintptr_t start;
 
+		mbuf = loc->mbuf;
+		nxlen = rte_pktmbuf_data_len(mbuf);
 		/*
 		 * Packet length exceeds the allowed inline
 		 * data length, check whether the minimal
@@ -3345,27 +3347,22 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq,
 			assert(txq->inlen_mode >= MLX5_ESEG_MIN_INLINE_SIZE);
 			assert(txq->inlen_mode <= txq->inlen_send);
 			inlen = txq->inlen_mode;
-		} else {
-			if (!vlan || txq->vlan_en) {
-				/*
-				 * VLAN insertion will be done inside by HW.
-				 * It is not utmost effective - VLAN flag is
-				 * checked twice, but we should proceed the
-				 * inlining length correctly and take into
-				 * account the VLAN header being inserted.
-				 */
-				return mlx5_tx_packet_multi_send
-							(txq, loc, olx);
-			}
+		} else if (vlan && !txq->vlan_en) {
+			/*
+			 * VLAN insertion is requested and hardware does not
+			 * support the offload, will do with software inline.
+			 */
 			inlen = MLX5_ESEG_MIN_INLINE_SIZE;
+		} else if (nxlen > txq->inlen_send) {
+			return mlx5_tx_packet_multi_send(txq, loc, olx);
+		} else {
+			goto do_first;
 		}
 		/*
 		 * Now we know the minimal amount of data is requested
 		 * to inline. Check whether we should inline the buffers
 		 * from the chain beginning to eliminate some mbufs.
 		 */
-		mbuf = loc->mbuf;
-		nxlen = rte_pktmbuf_data_len(mbuf);
 		if (unlikely(nxlen <= txq->inlen_send)) {
 			/* We can inline first mbuf at least. */
 			if (nxlen < inlen) {
@@ -3387,6 +3384,7 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq,
 					goto do_align;
 				}
 			}
+do_first:
 			do {
 				inlen = nxlen;
 				mbuf = NEXT(mbuf);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [dpdk-stable] [PATCH 19.11 6/6] net/mlx5: fix multi-segment inline for the first segments
  2021-08-17 11:55   ` Christian Ehrhardt
@ 2021-08-17 13:55     ` Bing Zhao
  0 siblings, 0 replies; 11+ messages in thread
From: Bing Zhao @ 2021-08-17 13:55 UTC (permalink / raw)
  To: Christian Ehrhardt; +Cc: dpdk stable, Slava Ovsiienko, Matan Azrad

Hi Christian,

I talked with Slava, some feature is not supported in LTS 19.11. So please discard this commit.
And I sent a v2 to reply the original email thread.

Thanks

> -----Original Message-----
> From: Christian Ehrhardt <christian.ehrhardt@canonical.com>
> Sent: Tuesday, August 17, 2021 7:55 PM
> To: Bing Zhao <bingz@nvidia.com>
> Cc: dpdk stable <stable@dpdk.org>; Slava Ovsiienko
> <viacheslavo@nvidia.com>; Matan Azrad <matan@nvidia.com>
> Subject: Re: [PATCH 19.11 6/6] net/mlx5: fix multi-segment inline
> for the first segments
> 
> External email: Use caution opening links or attachments
> 
> 
> On Mon, Aug 16, 2021 at 6:30 PM Bing Zhao <bingz@nvidia.com> wrote:
> >
> > From: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> >
> > [ upstream commit ec837ad0fc7c6df4912cc2706b9cd54b225f4a34 ]
> 
> While not applying this causes build fails on some platforms.
> 
> Looks like:
> [  974s] ../drivers/net/mlx5/mlx5_rxtx.c: In function
> ‘mlx5_tx_packet_multi_inline’:
> [  974s] ../drivers/net/mlx5/mlx5_rxtx.c:3356:31: error:
> ‘PKT_TX_DYNF_NOINLINE’ undeclared (first use in this function)
> [  974s]  3356 |   } else if (mbuf->ol_flags & PKT_TX_DYNF_NOINLINE
> ||
> [  974s]       |                               ^~~~~~~~~~~~~~~~~~~~
> [  974s] ../drivers/net/mlx5/mlx5_rxtx.c:3356:31: note: each
> undeclared identifier is reported only once for each function it
> appears in [  974s] ninja: build stopped: subcommand failed.
> 
> And indeed this is the only occurrence
> 
> $ grep -Hrn PKT_TX_DYNF_NOINLINE *
> drivers/net/mlx5/mlx5_rxtx.c:3356: } else if (mbuf->ol_flags &
> PKT_TX_DYNF_NOINLINE ||
> 
> Since it only happens on some releases I'd assume the other
> arch/distros just do not build this?
> It seems to only affect those building with meson.
> 
> For now I've removed this patch again from 19.11 - please have a
> look and let me know if you'll provide a refreshed backport.
> 
> 
> 
> > Before 19.08 release the Tx burst routines of mlx5 PMD provided
> data
> > inline for the first short segments of the multi-segment packets.
> In
> > the release 19.08 mlx5 Tx datapath was refactored and this
> behavior
> > was broken, affecting the performance.
> >
> > For example, the T-Rex traffic generator might use small leading
> > segments to handle packet headers and performance degradation was
> > noticed.
> >
> > If the first segments of the multi-segment packet are short and
> the
> > overall length is below the inline threshold it should be inline
> into
> > the WQE to fix the performance.
> >
> > Fixes: 18a1c20044c0 ("net/mlx5: implement Tx burst template")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> > Signed-off-by: Bing Zhao <bingz@nvidia.com>
> > ---
> >  drivers/net/mlx5/mlx5_rxtx.c | 27 +++++++++++++--------------
> >  1 file changed, 13 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/net/mlx5/mlx5_rxtx.c
> > b/drivers/net/mlx5/mlx5_rxtx.c index 73dbf68d2b..094e359e55 100644
> > --- a/drivers/net/mlx5/mlx5_rxtx.c
> > +++ b/drivers/net/mlx5/mlx5_rxtx.c
> > @@ -3336,6 +3336,8 @@ mlx5_tx_packet_multi_inline(struct
> mlx5_txq_data *restrict txq,
> >                 unsigned int nxlen;
> >                 uintptr_t start;
> >
> > +               mbuf = loc->mbuf;
> > +               nxlen = rte_pktmbuf_data_len(mbuf);
> >                 /*
> >                  * Packet length exceeds the allowed inline
> >                  * data length, check whether the minimal @@ -
> 3345,27
> > +3347,23 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data
> *restrict txq,
> >                         assert(txq->inlen_mode >=
> MLX5_ESEG_MIN_INLINE_SIZE);
> >                         assert(txq->inlen_mode <= txq->inlen_send);
> >                         inlen = txq->inlen_mode;
> > -               } else {
> > -                       if (!vlan || txq->vlan_en) {
> > -                               /*
> > -                                * VLAN insertion will be done
> inside by HW.
> > -                                * It is not utmost effective -
> VLAN flag is
> > -                                * checked twice, but we should
> proceed the
> > -                                * inlining length correctly and
> take into
> > -                                * account the VLAN header being
> inserted.
> > -                                */
> > -                               return mlx5_tx_packet_multi_send
> > -                                                       (txq, loc,
> olx);
> > -                       }
> > +               } else if (vlan && !txq->vlan_en) {
> > +                       /*
> > +                        * VLAN insertion is requested and
> hardware does not
> > +                        * support the offload, will do with
> software inline.
> > +                        */
> >                         inlen = MLX5_ESEG_MIN_INLINE_SIZE;
> > +               } else if (mbuf->ol_flags & PKT_TX_DYNF_NOINLINE
> ||
> > +                          nxlen > txq->inlen_send) {
> > +                       return mlx5_tx_packet_multi_send(txq, loc,
> olx);
> > +               } else {
> > +                       goto do_first;
> >                 }
> >                 /*
> >                  * Now we know the minimal amount of data is
> requested
> >                  * to inline. Check whether we should inline the
> buffers
> >                  * from the chain beginning to eliminate some
> mbufs.
> >                  */
> > -               mbuf = loc->mbuf;
> > -               nxlen = rte_pktmbuf_data_len(mbuf);
> >                 if (unlikely(nxlen <= txq->inlen_send)) {
> >                         /* We can inline first mbuf at least. */
> >                         if (nxlen < inlen) { @@ -3387,6 +3385,7 @@
> > mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq,
> >                                         goto do_align;
> >                                 }
> >                         }
> > +do_first:
> >                         do {
> >                                 inlen = nxlen;
> >                                 mbuf = NEXT(mbuf);
> > --
> > 2.21.0
> >
> 
> 
> --
> Christian Ehrhardt
> Staff Engineer, Ubuntu Server
> Canonical Ltd

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-08-17 13:55 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-16 16:29 [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11 Bing Zhao
2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 1/6] app/testpmd: fix offloads for newly attached port Bing Zhao
2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 2/6] common/mlx5: fix compatibility with OFED port query API Bing Zhao
2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 3/6] net/mlx5: fix switchdev mode recognition Bing Zhao
2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 4/6] net/mlx5: fix RoCE LAG bond device probing Bing Zhao
2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 5/6] common/mlx5: use new port query API if available Bing Zhao
2021-08-16 16:29 ` [dpdk-stable] [PATCH 19.11 6/6] net/mlx5: fix multi-segment inline for the first segments Bing Zhao
2021-08-17 11:55   ` Christian Ehrhardt
2021-08-17 13:55     ` Bing Zhao
2021-08-17  9:42 ` [dpdk-stable] [PATCH 19.11 0/6] Cumulative fixes for stable 19.11 Christian Ehrhardt
2021-08-17 13:54 ` [dpdk-stable] [PATCH 19.11 v2] net/mlx5: fix multi-segment inline for the first segments Bing Zhao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).