DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH] net/i40e: add diagnostic support in TX path
@ 2023-12-21 10:13 Mingjin Ye
  2023-12-22 16:17 ` Morten Brørup
  2024-01-04 10:20 ` [PATCH v3] " Mingjin Ye
  0 siblings, 2 replies; 11+ messages in thread
From: Mingjin Ye @ 2023-12-21 10:13 UTC (permalink / raw)
  To: dev; +Cc: qiming.yang, Mingjin Ye, Yuying Zhang, Beilei Xing

The only way to enable diagnostics for TX paths is to modify the
application source code. Making it difficult to diagnose faults.

In this patch, the devarg option "mbuf_check" is introduced and the
parameters are configured to enable the corresponding diagnostics.

supported cases: mbuf, size, segment, offload, strict.
 1. mbuf: check for corrupted mbuf.
 2. size: check min/max packet length according to hw spec.
 3. segment: check number of mbuf segments not exceed hw limitation.
 4. offload: check any unsupported offload flag.
 5. strict: check protocol headers.

parameter format: mbuf_check=[mbuf,<case1>,<case2>]
eg: dpdk-testpmd -a 0000:81:01.0,mbuf_check=[mbuf,size] -- -i

Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
---
 drivers/net/i40e/i40e_ethdev.c | 108 +++-
 drivers/net/i40e/i40e_ethdev.h |  32 ++
 drivers/net/i40e/i40e_rxtx.c   | 916 +++++++++++++++++++++++++++++++++
 drivers/net/i40e/i40e_rxtx.h   |  10 +
 4 files changed, 1064 insertions(+), 2 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 3ca226156b..d4a2b4b277 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -48,6 +48,7 @@
 #define ETH_I40E_SUPPORT_MULTI_DRIVER	"support-multi-driver"
 #define ETH_I40E_QUEUE_NUM_PER_VF_ARG	"queue-num-per-vf"
 #define ETH_I40E_VF_MSG_CFG		"vf_msg_cfg"
+#define ETH_I40E_MDD_CHECK_ARG       "mbuf_check"
 
 #define I40E_CLEAR_PXE_WAIT_MS     200
 #define I40E_VSI_TSR_QINQ_STRIP		0x4010
@@ -412,6 +413,7 @@ static const char *const valid_keys[] = {
 	ETH_I40E_SUPPORT_MULTI_DRIVER,
 	ETH_I40E_QUEUE_NUM_PER_VF_ARG,
 	ETH_I40E_VF_MSG_CFG,
+	ETH_I40E_MDD_CHECK_ARG,
 	NULL};
 
 static const struct rte_pci_id pci_id_i40e_map[] = {
@@ -545,6 +547,16 @@ static const struct rte_i40e_xstats_name_off rte_i40e_stats_strings[] = {
 #define I40E_NB_ETH_XSTATS (sizeof(rte_i40e_stats_strings) / \
 		sizeof(rte_i40e_stats_strings[0]))
 
+static const struct rte_i40e_xstats_name_off i40e_mdd_strings[] = {
+	{"mdd_mbuf_error_packets", offsetof(struct i40e_mdd_stats,
+		mdd_mbuf_err_count)},
+	{"mdd_pkt_error_packets", offsetof(struct i40e_mdd_stats,
+		mdd_pkt_err_count)},
+};
+
+#define I40E_NB_MDD_XSTATS (sizeof(i40e_mdd_strings) / \
+		sizeof(i40e_mdd_strings[0]))
+
 static const struct rte_i40e_xstats_name_off rte_i40e_hw_port_strings[] = {
 	{"tx_link_down_dropped", offsetof(struct i40e_hw_port_stats,
 		tx_dropped_link_down)},
@@ -1373,10 +1385,58 @@ read_vf_msg_config(__rte_unused const char *key,
 	return 0;
 }
 
+static int
+i40e_parse_mdd_checker(__rte_unused const char *key, const char *value, void *args)
+{
+	char *cur;
+	char *tmp;
+	int str_len;
+	int valid_len;
+
+	int ret = 0;
+	uint64_t *mc_flags = args;
+	char *str2 = strdup(value);
+	if (str2 == NULL)
+		return -1;
+
+	str_len = strlen(str2);
+	if (str2[0] == '[' && str2[str_len - 1] == ']') {
+		if (str_len < 3) {
+			ret = -1;
+			goto mdd_end;
+		}
+		valid_len = str_len - 2;
+		memmove(str2, str2 + 1, valid_len);
+		memset(str2 + valid_len, '\0', 2);
+	}
+	cur = strtok_r(str2, ",", &tmp);
+	while (cur != NULL) {
+		if (!strcmp(cur, "mbuf"))
+			*mc_flags |= I40E_MDD_CHECK_F_TX_MBUF;
+		else if (!strcmp(cur, "size"))
+			*mc_flags |= I40E_MDD_CHECK_F_TX_SIZE;
+		else if (!strcmp(cur, "segment"))
+			*mc_flags |= I40E_MDD_CHECK_F_TX_SEGMENT;
+		else if (!strcmp(cur, "offload"))
+			*mc_flags |= I40E_MDD_CHECK_F_TX_OFFLOAD;
+		else if (!strcmp(cur, "strict"))
+			*mc_flags |= I40E_MDD_CHECK_F_TX_STRICT;
+		else
+			PMD_DRV_LOG(ERR, "Unsupported mdd check type: %s", cur);
+		cur = strtok_r(NULL, ",", &tmp);
+	}
+
+mdd_end:
+	free(str2);
+	return ret;
+}
+
 static int
 i40e_parse_vf_msg_config(struct rte_eth_dev *dev,
 		struct i40e_vf_msg_cfg *msg_cfg)
 {
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct rte_kvargs *kvlist;
 	int kvargs_count;
 	int ret = 0;
@@ -1401,6 +1461,14 @@ i40e_parse_vf_msg_config(struct rte_eth_dev *dev,
 		goto free_end;
 	}
 
+	ret = rte_kvargs_process(kvlist, ETH_I40E_MDD_CHECK_ARG,
+				 &i40e_parse_mdd_checker, &ad->mc_flags);
+	if (ret)
+		goto free_end;
+
+	if (ad->mc_flags)
+		ad->devargs.mbuf_check = 1;
+
 	if (rte_kvargs_process(kvlist, ETH_I40E_VF_MSG_CFG,
 			read_vf_msg_config, msg_cfg) < 0)
 		ret = -EINVAL;
@@ -1433,7 +1501,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
 	dev->rx_pkt_burst = i40e_recv_pkts;
 	dev->tx_pkt_burst = i40e_xmit_pkts;
 	dev->tx_pkt_prepare = i40e_prep_pkts;
-
+	i40e_pkt_burst_init(dev);
 	/* for secondary processes, we don't initialise any further as primary
 	 * has already done this work. Only check we don't need a different
 	 * RX function */
@@ -2324,6 +2392,8 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct i40e_vsi *main_vsi = pf->main_vsi;
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	int ret, i;
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
@@ -2483,6 +2553,7 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	max_frame_size = dev->data->mtu ?
 		dev->data->mtu + I40E_ETH_OVERHEAD :
 		I40E_FRAME_SIZE_MAX;
+	ad->max_pkt_len = max_frame_size;
 
 	/* Set the max frame size to HW*/
 	i40e_aq_set_mac_config(hw, max_frame_size, TRUE, false, 0, NULL);
@@ -3508,7 +3579,8 @@ i40e_dev_stats_reset(struct rte_eth_dev *dev)
 static uint32_t
 i40e_xstats_calc_num(void)
 {
-	return I40E_NB_ETH_XSTATS + I40E_NB_HW_PORT_XSTATS +
+	return I40E_NB_ETH_XSTATS + I40E_NB_MDD_XSTATS +
+		I40E_NB_HW_PORT_XSTATS +
 		(I40E_NB_RXQ_PRIO_XSTATS * 8) +
 		(I40E_NB_TXQ_PRIO_XSTATS * 8);
 }
@@ -3533,6 +3605,14 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 		count++;
 	}
 
+	/* Get stats from i40e_mdd_stats struct */
+	for (i = 0; i < I40E_NB_MDD_XSTATS; i++) {
+		strlcpy(xstats_names[count].name,
+			i40e_mdd_strings[i].name,
+			sizeof(xstats_names[count].name));
+		count++;
+	}
+
 	/* Get individual stats from i40e_hw_port struct */
 	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
 		strlcpy(xstats_names[count].name,
@@ -3569,6 +3649,11 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 {
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct i40e_adapter *adapter =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct i40e_tx_queue *txq;
+	uint64_t mdd_mbuf_err_count = 0;
+	uint64_t mdd_pkt_err_count = 0;
 	unsigned i, count, prio;
 	struct i40e_hw_port_stats *hw_stats = &pf->stats;
 
@@ -3591,6 +3676,25 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 		count++;
 	}
 
+	/* Get stats from i40e_mdd_stats struct */
+	if (adapter->devargs.mbuf_check) {
+		for (i = 0; i < dev->data->nb_tx_queues; i++) {
+			txq = dev->data->tx_queues[i];
+			mdd_mbuf_err_count += txq->mdd_mbuf_err_count;
+			mdd_pkt_err_count += txq->mdd_pkt_err_count;
+		}
+		pf->mdd_stats.mdd_mbuf_err_count = mdd_mbuf_err_count;
+		pf->mdd_stats.mdd_pkt_err_count = mdd_pkt_err_count;
+	}
+
+	for (i = 0; i < I40E_NB_MDD_XSTATS; i++) {
+		xstats[count].value =
+			*(uint64_t *)((char *)&pf->mdd_stats +
+					i40e_mdd_strings[i].offset);
+		xstats[count].id = count;
+		count++;
+	}
+
 	/* Get individual stats from i40e_hw_port struct */
 	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
 		xstats[count].value = *(uint64_t *)(((char *)hw_stats) +
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 1bbe7ad376..64ea49d247 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -376,6 +376,11 @@ struct i40e_macvlan_filter {
 	uint16_t vlan_id;
 };
 
+struct i40e_mdd_stats {
+	uint64_t mdd_mbuf_err_count;
+	uint64_t mdd_pkt_err_count;
+};
+
 /*
  * Structure that defines a VSI, associated with a adapter.
  */
@@ -1122,6 +1127,7 @@ struct i40e_pf {
 
 	struct i40e_hw_port_stats stats_offset;
 	struct i40e_hw_port_stats stats;
+	struct i40e_mdd_stats mdd_stats;
 	u64 rx_err1;	/* rxerr1 */
 	u64 rx_err1_offset;
 
@@ -1224,6 +1230,29 @@ struct i40e_vsi_vlan_pvid_info {
 #define I40E_MAX_PKT_TYPE  256
 #define I40E_FLOW_TYPE_MAX 64
 
+
+struct i40e_rx_burst_elem {
+	TAILQ_ENTRY(i40e_rx_burst_elem) next;
+	eth_rx_burst_t rx_pkt_burst;
+};
+
+struct i40e_tx_burst_elem {
+	TAILQ_ENTRY(i40e_tx_burst_elem) next;
+	eth_tx_burst_t tx_pkt_burst;
+};
+
+#define I40E_MDD_CHECK_F_TX_MBUF        (1ULL << 0)
+#define I40E_MDD_CHECK_F_TX_SIZE        (1ULL << 1)
+#define I40E_MDD_CHECK_F_TX_SEGMENT     (1ULL << 2)
+#define I40E_MDD_CHECK_F_TX_OFFLOAD     (1ULL << 3)
+#define I40E_MDD_CHECK_F_TX_STRICT      (1ULL << 4)
+
+/**
+ * Cache devargs parse result.
+ */
+struct i40e_devargs {
+	int mbuf_check;
+};
 /*
  * Structure to store private data for each PF/VF instance.
  */
@@ -1240,6 +1269,9 @@ struct i40e_adapter {
 	bool tx_simple_allowed;
 	bool tx_vec_allowed;
 
+	struct i40e_devargs devargs;
+	uint64_t mc_flags; /* mdd check flags. */
+	uint16_t max_pkt_len; /* Maximum packet length */
 	/* For PTP */
 	struct rte_timecounter systime_tc;
 	struct rte_timecounter rx_tstamp_tc;
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 9aa5facb53..c1193829c4 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -24,6 +24,10 @@
 #include <rte_ip.h>
 #include <rte_net.h>
 #include <rte_vect.h>
+#include <rte_tailq.h>
+#include <rte_vxlan.h>
+#include <rte_gtp.h>
+#include <rte_geneve.h>
 
 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -79,6 +83,48 @@
 #define I40E_TX_OFFLOAD_SIMPLE_NOTSUP_MASK \
 		(RTE_MBUF_F_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_SIMPLE_SUP_MASK)
 
+#define GRE_CHECKSUM_PRESENT   0x8000
+#define GRE_KEY_PRESENT                0x2000
+#define GRE_SEQUENCE_PRESENT   0x1000
+#define GRE_EXT_LEN            4
+#define GRE_SUPPORTED_FIELDS	(GRE_CHECKSUM_PRESENT | GRE_KEY_PRESENT |\
+				 GRE_SEQUENCE_PRESENT)
+
+#ifndef IPPROTO_IPIP
+#define IPPROTO_IPIP 4
+#endif
+#ifndef IPPROTO_GRE
+#define IPPROTO_GRE    47
+#endif
+
+static uint16_t vxlan_gpe_udp_port = RTE_VXLAN_GPE_DEFAULT_PORT;
+static uint16_t geneve_udp_port = RTE_GENEVE_DEFAULT_PORT;
+
+struct simple_gre_hdr {
+	uint16_t flags;
+	uint16_t proto;
+} __rte_packed;
+
+/* structure that caches offload info for the current packet */
+struct offload_info {
+	uint16_t ethertype;
+	uint8_t gso_enable;
+	uint16_t l2_len;
+	uint16_t l3_len;
+	uint16_t l4_len;
+	uint8_t l4_proto;
+	uint8_t is_tunnel;
+	uint16_t outer_ethertype;
+	uint16_t outer_l2_len;
+	uint16_t outer_l3_len;
+	uint8_t outer_l4_proto;
+	uint16_t tso_segsz;
+	uint16_t tunnel_tso_segsz;
+	uint32_t pkt_len;
+};
+
+static struct i40e_pkt_burst i40e_rxtx_pkt_burst[RTE_MAX_ETHPORTS];
+
 static int
 i40e_monitor_callback(const uint64_t value,
 		const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
@@ -1536,6 +1582,811 @@ i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_tx;
 }
 
+/* Parse an IPv4 header to fill l3_len, l4_len, and l4_proto */
+static inline void
+parse_ipv4(struct rte_ipv4_hdr *ipv4_hdr, struct offload_info *info)
+{
+	struct rte_tcp_hdr *tcp_hdr;
+
+	info->l3_len = rte_ipv4_hdr_len(ipv4_hdr);
+	info->l4_proto = ipv4_hdr->next_proto_id;
+
+	/* only fill l4_len for TCP, it's useful for TSO */
+	if (info->l4_proto == IPPROTO_TCP) {
+		tcp_hdr = (struct rte_tcp_hdr *)
+			((char *)ipv4_hdr + info->l3_len);
+		info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+	} else if (info->l4_proto == IPPROTO_UDP) {
+		info->l4_len = sizeof(struct rte_udp_hdr);
+	} else {
+		info->l4_len = 0;
+	}
+}
+
+/* Parse an IPv6 header to fill l3_len, l4_len, and l4_proto */
+static inline void
+parse_ipv6(struct rte_ipv6_hdr *ipv6_hdr, struct offload_info *info)
+{
+	struct rte_tcp_hdr *tcp_hdr;
+
+	info->l3_len = sizeof(struct rte_ipv6_hdr);
+	info->l4_proto = ipv6_hdr->proto;
+
+	/* only fill l4_len for TCP, it's useful for TSO */
+	if (info->l4_proto == IPPROTO_TCP) {
+		tcp_hdr = (struct rte_tcp_hdr *)
+			((char *)ipv6_hdr + info->l3_len);
+		info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+	} else if (info->l4_proto == IPPROTO_UDP) {
+		info->l4_len = sizeof(struct rte_udp_hdr);
+	} else {
+		info->l4_len = 0;
+	}
+}
+
+/*
+ * Parse an ethernet header to fill the ethertype, l2_len, l3_len and
+ * ipproto. This function is able to recognize IPv4/IPv6 with optional VLAN
+ * headers. The l4_len argument is only set in case of TCP (useful for TSO).
+ */
+static inline void
+parse_ethernet(struct rte_ether_hdr *eth_hdr, struct offload_info *info)
+{
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_ipv6_hdr *ipv6_hdr;
+	struct rte_vlan_hdr *vlan_hdr;
+
+	info->l2_len = sizeof(struct rte_ether_hdr);
+	info->ethertype = eth_hdr->ether_type;
+
+	while (info->ethertype == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) ||
+	       info->ethertype == rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ)) {
+		vlan_hdr = (struct rte_vlan_hdr *)
+			((char *)eth_hdr + info->l2_len);
+		info->l2_len  += sizeof(struct rte_vlan_hdr);
+		info->ethertype = vlan_hdr->eth_proto;
+	}
+
+	switch (info->ethertype) {
+	case RTE_STATIC_BSWAP16(RTE_ETHER_TYPE_IPV4):
+		ipv4_hdr = (struct rte_ipv4_hdr *)
+			((char *)eth_hdr + info->l2_len);
+		parse_ipv4(ipv4_hdr, info);
+		break;
+	case RTE_STATIC_BSWAP16(RTE_ETHER_TYPE_IPV6):
+		ipv6_hdr = (struct rte_ipv6_hdr *)
+			((char *)eth_hdr + info->l2_len);
+		parse_ipv6(ipv6_hdr, info);
+		break;
+	default:
+		info->l4_len = 0;
+		info->l3_len = 0;
+		info->l4_proto = 0;
+		break;
+	}
+}
+
+/* Fill in outer layers length */
+static inline void
+update_tunnel_outer(struct offload_info *info)
+{
+	info->is_tunnel = 1;
+	info->outer_ethertype = info->ethertype;
+	info->outer_l2_len = info->l2_len;
+	info->outer_l3_len = info->l3_len;
+	info->outer_l4_proto = info->l4_proto;
+}
+
+/*
+ * Parse a GTP protocol header.
+ * No optional fields and next extension header type.
+ */
+static inline void
+parse_gtp(struct rte_udp_hdr *udp_hdr,
+	  struct offload_info *info)
+{
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_ipv6_hdr *ipv6_hdr;
+	struct rte_gtp_hdr *gtp_hdr;
+	uint8_t gtp_len = sizeof(*gtp_hdr);
+	uint8_t ip_ver;
+
+	/* Check UDP destination port. */
+	if (udp_hdr->dst_port != rte_cpu_to_be_16(RTE_GTPC_UDP_PORT) &&
+	    udp_hdr->src_port != rte_cpu_to_be_16(RTE_GTPC_UDP_PORT) &&
+	    udp_hdr->dst_port != rte_cpu_to_be_16(RTE_GTPU_UDP_PORT))
+		return;
+
+	update_tunnel_outer(info);
+	info->l2_len = 0;
+
+	gtp_hdr = (struct rte_gtp_hdr *)((char *)udp_hdr +
+		  sizeof(struct rte_udp_hdr));
+
+	/*
+	 * Check message type. If message type is 0xff, it is
+	 * a GTP data packet. If not, it is a GTP control packet
+	 */
+	if (gtp_hdr->msg_type == 0xff) {
+		ip_ver = *(uint8_t *)((char *)udp_hdr +
+			 sizeof(struct rte_udp_hdr) +
+			 sizeof(struct rte_gtp_hdr));
+		ip_ver = (ip_ver) & 0xf0;
+
+		if (ip_ver == RTE_GTP_TYPE_IPV4) {
+			ipv4_hdr = (struct rte_ipv4_hdr *)((char *)gtp_hdr +
+				   gtp_len);
+			info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+			parse_ipv4(ipv4_hdr, info);
+		} else if (ip_ver == RTE_GTP_TYPE_IPV6) {
+			ipv6_hdr = (struct rte_ipv6_hdr *)((char *)gtp_hdr +
+				   gtp_len);
+			info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
+			parse_ipv6(ipv6_hdr, info);
+		}
+	} else {
+		info->ethertype = 0;
+		info->l4_len = 0;
+		info->l3_len = 0;
+		info->l4_proto = 0;
+	}
+
+	info->l2_len += RTE_ETHER_GTP_HLEN;
+}
+
+/* Parse a VXLAN header */
+static inline void
+parse_vxlan(struct rte_udp_hdr *udp_hdr,
+	    struct offload_info *info)
+{
+	struct rte_ether_hdr *eth_hdr;
+
+	/* check UDP destination port, RTE_VXLAN_DEFAULT_PORT (4789) is the
+	 * default VXLAN port (rfc7348) or that the Rx offload flag is set
+	 * (i40e only currently)
+	 */
+	if (udp_hdr->dst_port != rte_cpu_to_be_16(RTE_VXLAN_DEFAULT_PORT))
+		return;
+
+	update_tunnel_outer(info);
+
+	eth_hdr = (struct rte_ether_hdr *)((char *)udp_hdr +
+		sizeof(struct rte_udp_hdr) +
+		sizeof(struct rte_vxlan_hdr));
+
+	parse_ethernet(eth_hdr, info);
+	info->l2_len += RTE_ETHER_VXLAN_HLEN; /* add UDP + VXLAN */
+}
+
+/* Parse a VXLAN-GPE header */
+static inline void
+parse_vxlan_gpe(struct rte_udp_hdr *udp_hdr,
+	    struct offload_info *info)
+{
+	struct rte_ether_hdr *eth_hdr;
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_ipv6_hdr *ipv6_hdr;
+	struct rte_vxlan_gpe_hdr *vxlan_gpe_hdr;
+	uint8_t vxlan_gpe_len = sizeof(*vxlan_gpe_hdr);
+
+	/* Check UDP destination port. */
+	if (udp_hdr->dst_port != rte_cpu_to_be_16(vxlan_gpe_udp_port))
+		return;
+
+	vxlan_gpe_hdr = (struct rte_vxlan_gpe_hdr *)((char *)udp_hdr +
+				sizeof(struct rte_udp_hdr));
+
+	if (!vxlan_gpe_hdr->proto || vxlan_gpe_hdr->proto ==
+	    RTE_VXLAN_GPE_TYPE_IPV4) {
+		update_tunnel_outer(info);
+
+		ipv4_hdr = (struct rte_ipv4_hdr *)((char *)vxlan_gpe_hdr +
+			   vxlan_gpe_len);
+
+		parse_ipv4(ipv4_hdr, info);
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+		info->l2_len = 0;
+
+	} else if (vxlan_gpe_hdr->proto == RTE_VXLAN_GPE_TYPE_IPV6) {
+		update_tunnel_outer(info);
+
+		ipv6_hdr = (struct rte_ipv6_hdr *)((char *)vxlan_gpe_hdr +
+			   vxlan_gpe_len);
+
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
+		parse_ipv6(ipv6_hdr, info);
+		info->l2_len = 0;
+
+	} else if (vxlan_gpe_hdr->proto == RTE_VXLAN_GPE_TYPE_ETH) {
+		update_tunnel_outer(info);
+
+		eth_hdr = (struct rte_ether_hdr *)((char *)vxlan_gpe_hdr +
+			  vxlan_gpe_len);
+
+		parse_ethernet(eth_hdr, info);
+	} else {
+		return;
+	}
+
+	info->l2_len += RTE_ETHER_VXLAN_GPE_HLEN;
+}
+
+/* Parse a GENEVE header */
+static inline void
+parse_geneve(struct rte_udp_hdr *udp_hdr,
+	    struct offload_info *info)
+{
+	struct rte_ether_hdr *eth_hdr;
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_ipv6_hdr *ipv6_hdr;
+	struct rte_geneve_hdr *geneve_hdr;
+	uint16_t geneve_len;
+
+	/* Check UDP destination port. */
+	if (udp_hdr->dst_port != rte_cpu_to_be_16(geneve_udp_port))
+		return;
+
+	geneve_hdr = (struct rte_geneve_hdr *)((char *)udp_hdr +
+				sizeof(struct rte_udp_hdr));
+	geneve_len = sizeof(struct rte_geneve_hdr) + geneve_hdr->opt_len * 4;
+	if (!geneve_hdr->proto || geneve_hdr->proto ==
+	    rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
+		update_tunnel_outer(info);
+		ipv4_hdr = (struct rte_ipv4_hdr *)((char *)geneve_hdr +
+			   geneve_len);
+		parse_ipv4(ipv4_hdr, info);
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+		info->l2_len = 0;
+	} else if (geneve_hdr->proto == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) {
+		update_tunnel_outer(info);
+		ipv6_hdr = (struct rte_ipv6_hdr *)((char *)geneve_hdr +
+			   geneve_len);
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
+		parse_ipv6(ipv6_hdr, info);
+		info->l2_len = 0;
+
+	} else if (geneve_hdr->proto == rte_cpu_to_be_16(RTE_GENEVE_TYPE_ETH)) {
+		update_tunnel_outer(info);
+		eth_hdr = (struct rte_ether_hdr *)((char *)geneve_hdr +
+			  geneve_len);
+		parse_ethernet(eth_hdr, info);
+	} else {
+		return;
+	}
+
+	info->l2_len +=
+		(sizeof(struct rte_udp_hdr) + sizeof(struct rte_geneve_hdr) +
+		((struct rte_geneve_hdr *)geneve_hdr)->opt_len * 4);
+}
+
+/* Parse a GRE header */
+static inline void
+parse_gre(struct simple_gre_hdr *gre_hdr, struct offload_info *info)
+{
+	struct rte_ether_hdr *eth_hdr;
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_ipv6_hdr *ipv6_hdr;
+	uint8_t gre_len = 0;
+
+	gre_len += sizeof(struct simple_gre_hdr);
+
+	if (gre_hdr->flags & rte_cpu_to_be_16(GRE_KEY_PRESENT))
+		gre_len += GRE_EXT_LEN;
+	if (gre_hdr->flags & rte_cpu_to_be_16(GRE_SEQUENCE_PRESENT))
+		gre_len += GRE_EXT_LEN;
+	if (gre_hdr->flags & rte_cpu_to_be_16(GRE_CHECKSUM_PRESENT))
+		gre_len += GRE_EXT_LEN;
+
+	if (gre_hdr->proto == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
+		update_tunnel_outer(info);
+
+		ipv4_hdr = (struct rte_ipv4_hdr *)((char *)gre_hdr + gre_len);
+
+		parse_ipv4(ipv4_hdr, info);
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+		info->l2_len = 0;
+
+	} else if (gre_hdr->proto == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) {
+		update_tunnel_outer(info);
+
+		ipv6_hdr = (struct rte_ipv6_hdr *)((char *)gre_hdr + gre_len);
+
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
+		parse_ipv6(ipv6_hdr, info);
+		info->l2_len = 0;
+
+	} else if (gre_hdr->proto == rte_cpu_to_be_16(RTE_ETHER_TYPE_TEB)) {
+		update_tunnel_outer(info);
+
+		eth_hdr = (struct rte_ether_hdr *)((char *)gre_hdr + gre_len);
+
+		parse_ethernet(eth_hdr, info);
+	} else {
+		return;
+	}
+
+	info->l2_len += gre_len;
+}
+
+/* Parse an encapsulated IP or IPv6 header */
+static inline void
+parse_encap_ip(void *encap_ip, struct offload_info *info)
+{
+	struct rte_ipv4_hdr *ipv4_hdr = encap_ip;
+	struct rte_ipv6_hdr *ipv6_hdr = encap_ip;
+	uint8_t ip_version;
+
+	ip_version = (ipv4_hdr->version_ihl & 0xf0) >> 4;
+
+	if (ip_version != 4 && ip_version != 6)
+		return;
+
+	info->is_tunnel = 1;
+	info->outer_ethertype = info->ethertype;
+	info->outer_l2_len = info->l2_len;
+	info->outer_l3_len = info->l3_len;
+
+	if (ip_version == 4) {
+		parse_ipv4(ipv4_hdr, info);
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+	} else {
+		parse_ipv6(ipv6_hdr, info);
+		info->ethertype = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
+	}
+	info->l2_len = 0;
+}
+
+static  inline int
+check_mbuf_len(struct offload_info *info, struct rte_mbuf *m)
+{
+	if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+		if (info->outer_l2_len != m->outer_l2_len) {
+			PMD_DRV_LOG(ERR, "outer_l2_len error in mbuf. Original "
+			"length: %hu, calculated length: %u", m->outer_l2_len,
+			info->outer_l2_len);
+			return -1;
+		}
+		if (info->outer_l3_len != m->outer_l3_len) {
+			PMD_DRV_LOG(ERR, "outer_l3_len error in mbuf. Original "
+			"length: %hu,calculated length: %u", m->outer_l3_len,
+			info->outer_l3_len);
+			return -1;
+		}
+	}
+
+	if (info->l2_len != m->l2_len) {
+		PMD_DRV_LOG(ERR, "l2_len error in mbuf. Original "
+		"length: %hu, calculated length: %u", m->l2_len,
+		info->l2_len);
+		return -1;
+	}
+	if (info->l3_len != m->l3_len) {
+		PMD_DRV_LOG(ERR, "l3_len error in mbuf. Original "
+		"length: %hu, calculated length: %u", m->l3_len,
+		info->l3_len);
+		return -1;
+	}
+	if (info->l4_len != m->l4_len) {
+		PMD_DRV_LOG(ERR, "l4_len error in mbuf. Original "
+		"length: %hu, calculated length: %u", m->l4_len,
+		info->l4_len);
+		return -1;
+	}
+
+	return 0;
+}
+
+static  inline int
+check_ether_type(struct offload_info *info, struct rte_mbuf *m)
+{
+	int ret = 0;
+
+	if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+		if (info->outer_ethertype ==
+			rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
+			if (!(m->ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) {
+				PMD_DRV_LOG(ERR, "Outer ethernet type is ipv4, "
+				"tx offload missing `RTE_MBUF_F_TX_OUTER_IPV4` flag.");
+				ret = -1;
+			}
+			if (m->ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) {
+				PMD_DRV_LOG(ERR, "Outer ethernet type is ipv4, tx "
+				"offload contains wrong `RTE_MBUF_F_TX_OUTER_IPV6` flag");
+				ret = -1;
+			}
+		} else if (info->outer_ethertype ==
+			rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) {
+			if (!(m->ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) {
+				PMD_DRV_LOG(ERR, "Outer ethernet type is ipv6, "
+				"tx offload missing `RTE_MBUF_F_TX_OUTER_IPV6` flag.");
+				ret = -1;
+			}
+			if (m->ol_flags & RTE_MBUF_F_TX_OUTER_IPV4) {
+				PMD_DRV_LOG(ERR, "Outer ethernet type is ipv6, tx "
+				"offload contains wrong `RTE_MBUF_F_TX_OUTER_IPV4` flag");
+				ret = -1;
+			}
+		}
+	}
+
+	if (info->ethertype ==
+		rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
+		if (!(m->ol_flags & RTE_MBUF_F_TX_IPV4)) {
+			PMD_DRV_LOG(ERR, "Ethernet type is ipv4, tx offload "
+			"missing `RTE_MBUF_F_TX_IPV4` flag.");
+			ret = -1;
+		}
+		if (m->ol_flags & RTE_MBUF_F_TX_IPV6) {
+			PMD_DRV_LOG(ERR, "Ethernet type is ipv4, tx "
+			"offload contains wrong `RTE_MBUF_F_TX_IPV6` flag");
+			ret = -1;
+		}
+	} else if (info->ethertype ==
+		rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) {
+		if (!(m->ol_flags & RTE_MBUF_F_TX_IPV6)) {
+			PMD_DRV_LOG(ERR, "Ethernet type is ipv6, tx offload "
+			"missing `RTE_MBUF_F_TX_IPV6` flag.");
+			ret = -1;
+		}
+		if (m->ol_flags & RTE_MBUF_F_TX_IPV4) {
+			PMD_DRV_LOG(ERR, "Ethernet type is ipv6, tx offload "
+			"contains wrong `RTE_MBUF_F_TX_IPV4` flag");
+			ret = -1;
+		}
+	}
+
+	return ret;
+}
+
+/* Check whether the parameters of mbuf are correct. */
+__rte_unused static  inline int
+i40e_check_mbuf(struct rte_mbuf *m)
+{
+	struct rte_ether_hdr *eth_hdr;
+	void *l3_hdr = NULL; /* can be IPv4 or IPv6 */
+	struct offload_info info = {0};
+	uint64_t ol_flags = m->ol_flags;
+	uint64_t tunnel_type = ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK;
+
+	eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+	parse_ethernet(eth_hdr, &info);
+	l3_hdr = (char *)eth_hdr + info.l2_len;
+	if (info.l4_proto == IPPROTO_UDP) {
+		struct rte_udp_hdr *udp_hdr;
+
+		udp_hdr = (struct rte_udp_hdr *)
+			((char *)l3_hdr + info.l3_len);
+		parse_gtp(udp_hdr, &info);
+		if (info.is_tunnel) {
+			if (!tunnel_type) {
+				PMD_DRV_LOG(ERR, "gtp tunnel packet missing tx "
+				"offload missing `RTE_MBUF_F_TX_TUNNEL_GTP` flag.");
+				return -1;
+			}
+			if (tunnel_type != RTE_MBUF_F_TX_TUNNEL_GTP) {
+				PMD_DRV_LOG(ERR, "gtp tunnel packet, tx offload has wrong "
+				"`%s` flag, correct is `RTE_MBUF_F_TX_TUNNEL_GTP` flag",
+				rte_get_tx_ol_flag_name(tunnel_type));
+				return -1;
+			}
+			goto check_len;
+		}
+		parse_vxlan_gpe(udp_hdr, &info);
+		if (info.is_tunnel) {
+			if (!tunnel_type) {
+				PMD_DRV_LOG(ERR, "vxlan gpe tunnel packet missing tx "
+				"offload missing `RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE` flag.");
+				return -1;
+			}
+			if (tunnel_type != RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE) {
+				PMD_DRV_LOG(ERR, "vxlan gpe tunnel packet, tx offload has "
+				"wrong `%s` flag, correct is "
+				"`RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE` flag",
+				rte_get_tx_ol_flag_name(tunnel_type));
+				return -1;
+			}
+			goto check_len;
+		}
+		parse_vxlan(udp_hdr, &info);
+		if (info.is_tunnel) {
+			if (!tunnel_type) {
+				PMD_DRV_LOG(ERR, "vxlan tunnel packet missing tx "
+				"offload missing `RTE_MBUF_F_TX_TUNNEL_VXLAN` flag.");
+				return -1;
+			}
+			if (tunnel_type != RTE_MBUF_F_TX_TUNNEL_VXLAN) {
+				PMD_DRV_LOG(ERR, "vxlan tunnel packet, tx offload has "
+				"wrong `%s` flag, correct is "
+				"`RTE_MBUF_F_TX_TUNNEL_VXLAN` flag",
+				rte_get_tx_ol_flag_name(tunnel_type));
+				return -1;
+			}
+			goto check_len;
+		}
+		parse_geneve(udp_hdr, &info);
+		if (info.is_tunnel) {
+			if (!tunnel_type) {
+				PMD_DRV_LOG(ERR, "geneve tunnel packet missing tx "
+				"offload missing `RTE_MBUF_F_TX_TUNNEL_GENEVE` flag.");
+				return -1;
+			}
+			if (tunnel_type != RTE_MBUF_F_TX_TUNNEL_GENEVE) {
+				PMD_DRV_LOG(ERR, "geneve tunnel packet, tx offload has "
+				"wrong `%s` flag, correct is "
+				"`RTE_MBUF_F_TX_TUNNEL_GENEVE` flag",
+				rte_get_tx_ol_flag_name(tunnel_type));
+				return -1;
+			}
+			goto check_len;
+		}
+		/* Always keep last. */
+		if (unlikely(RTE_ETH_IS_TUNNEL_PKT(m->packet_type)
+			!= 0)) {
+			PMD_DRV_LOG(ERR, "Unknown tunnel packet. UDP dst port: %hu",
+				udp_hdr->dst_port);
+				return -1;
+		}
+	} else if (info.l4_proto == IPPROTO_GRE) {
+		struct simple_gre_hdr *gre_hdr;
+
+		gre_hdr = (struct simple_gre_hdr *)((char *)l3_hdr +
+			info.l3_len);
+		parse_gre(gre_hdr, &info);
+		if (info.is_tunnel) {
+			if (!tunnel_type) {
+				PMD_DRV_LOG(ERR, "gre tunnel packet missing tx "
+				"offload missing `RTE_MBUF_F_TX_TUNNEL_GRE` flag.");
+				return -1;
+			}
+			if (tunnel_type != RTE_MBUF_F_TX_TUNNEL_GRE) {
+				PMD_DRV_LOG(ERR, "gre tunnel packet, tx offload has "
+				"wrong `%s` flag, correct is "
+				"`RTE_MBUF_F_TX_TUNNEL_GRE` flag",
+				rte_get_tx_ol_flag_name(tunnel_type));
+				return -1;
+			}
+			goto check_len;
+		}
+	} else if (info.l4_proto == IPPROTO_IPIP) {
+		void *encap_ip_hdr;
+
+		encap_ip_hdr = (char *)l3_hdr + info.l3_len;
+		parse_encap_ip(encap_ip_hdr, &info);
+		if (info.is_tunnel) {
+			if (!tunnel_type) {
+				PMD_DRV_LOG(ERR, "Ipip tunnel packet missing tx "
+				"offload missing `RTE_MBUF_F_TX_TUNNEL_IPIP` flag.");
+				return -1;
+			}
+			if (tunnel_type != RTE_MBUF_F_TX_TUNNEL_IPIP) {
+				PMD_DRV_LOG(ERR, "Ipip tunnel packet, tx offload has "
+				"wrong `%s` flag, correct is "
+				"`RTE_MBUF_F_TX_TUNNEL_IPIP` flag",
+				rte_get_tx_ol_flag_name(tunnel_type));
+				return -1;
+			}
+			goto check_len;
+		}
+	}
+
+check_len:
+	if (check_mbuf_len(&info, m) != 0)
+		return -1;
+
+	return check_ether_type(&info, m);
+}
+
+/* Tx MDD check */
+static uint16_t
+i40e_xmit_pkts_mdd(void *tx_queue, struct rte_mbuf **tx_pkts,
+	      uint16_t nb_pkts)
+{
+	struct i40e_tx_queue *txq = tx_queue;
+	struct rte_mbuf *mb;
+	uint16_t idx;
+	const char *reason = NULL;
+	struct i40e_adapter *adapter = txq->vsi->adapter;
+	uint64_t mdd_mbuf_err_count = 0;
+	uint64_t mdd_pkt_err_count = 0;
+	uint64_t ol_flags;
+
+	for (idx = 0; idx < nb_pkts; idx++) {
+		mb = tx_pkts[idx];
+		ol_flags = mb->ol_flags;
+
+		if ((adapter->mc_flags & I40E_MDD_CHECK_F_TX_MBUF) &&
+			(rte_mbuf_check(mb, 0, &reason) != 0)) {
+			PMD_DRV_LOG(ERR, "INVALID mbuf: %s\n", reason);
+			mdd_mbuf_err_count++;
+			continue;
+		}
+
+		if ((adapter->mc_flags & I40E_MDD_CHECK_F_TX_SIZE) &&
+			(mb->data_len > mb->pkt_len ||
+			mb->data_len < I40E_TX_MIN_PKT_LEN ||
+			mb->data_len > adapter->max_pkt_len)) {
+			PMD_DRV_LOG(ERR, "INVALID mbuf: data_len (%u) is out "
+			"of range, reasonable range (%d - %u)\n", mb->data_len,
+			I40E_TX_MIN_PKT_LEN, adapter->max_pkt_len);
+			mdd_pkt_err_count++;
+			continue;
+		}
+
+		if (adapter->mc_flags & I40E_MDD_CHECK_F_TX_SEGMENT) {
+			if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
+				/**
+				 * No TSO case: nb->segs, pkt_len to not exceed
+				 * the limites.
+				 */
+				if (mb->nb_segs > I40E_TX_MAX_MTU_SEG) {
+					PMD_DRV_LOG(ERR, "INVALID mbuf: nb_segs (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_TX_MAX_MTU_SEG);
+					mdd_pkt_err_count++;
+					continue;
+				}
+				if (mb->pkt_len > I40E_FRAME_SIZE_MAX) {
+					PMD_DRV_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_FRAME_SIZE_MAX);
+					mdd_pkt_err_count++;
+					continue;
+				}
+			} else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+				/** TSO case: tso_segsz, nb_segs, pkt_len not exceed
+				 * the limits.
+				 */
+				if (mb->tso_segsz < I40E_MIN_TSO_MSS ||
+					mb->tso_segsz > I40E_MAX_TSO_MSS) {
+					/**
+					 * MSS outside the range are considered malicious
+					 */
+					PMD_DRV_LOG(ERR, "INVALID mbuf: tso_segsz (%u) is out "
+					"of range, reasonable range (%d - %u)\n", mb->tso_segsz,
+					I40E_MIN_TSO_MSS, I40E_MAX_TSO_MSS);
+					mdd_pkt_err_count++;
+					continue;
+				}
+				if (mb->nb_segs >
+					((struct i40e_tx_queue *)tx_queue)->nb_tx_desc) {
+					PMD_DRV_LOG(ERR, "INVALID mbuf: nb_segs out "
+					"of ring length\n");
+					mdd_pkt_err_count++;
+					continue;
+				}
+				if (mb->pkt_len > I40E_TSO_FRAME_SIZE_MAX) {
+					PMD_DRV_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_TSO_FRAME_SIZE_MAX);
+					mdd_pkt_err_count++;
+					continue;
+				}
+			}
+		}
+
+		if (adapter->mc_flags & I40E_MDD_CHECK_F_TX_OFFLOAD) {
+			if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+				PMD_DRV_LOG(ERR, "INVALID mbuf: TX offload "
+				"is not supported\n");
+				mdd_pkt_err_count++;
+				continue;
+			}
+
+			if (!rte_validate_tx_offload(mb)) {
+				PMD_DRV_LOG(ERR, "INVALID mbuf: TX offload "
+				"setup error\n");
+				mdd_pkt_err_count++;
+				continue;
+			}
+		}
+
+		if (adapter->mc_flags & I40E_MDD_CHECK_F_TX_STRICT &&
+			i40e_check_mbuf(mb)) {
+			mdd_pkt_err_count++;
+			continue;
+		}
+	}
+
+	if (mdd_mbuf_err_count || mdd_pkt_err_count) {
+		if (mdd_mbuf_err_count)
+			rte_atomic_fetch_add_explicit(&txq->mdd_mbuf_err_count,
+					mdd_mbuf_err_count, rte_memory_order_release);
+		if (mdd_pkt_err_count)
+			rte_atomic_fetch_add_explicit(&txq->mdd_pkt_err_count,
+					mdd_pkt_err_count, rte_memory_order_release);
+		rte_errno = EINVAL;
+		return 0;
+	}
+
+	return idx;
+}
+
+static int
+i40e_tx_pkt_burst_insert(struct rte_eth_dev *dev, eth_tx_burst_t func)
+{
+	struct i40e_tx_burst_elem *elem;
+	struct i40e_pkt_burst *item;
+
+	if (!func) {
+		PMD_DRV_LOG(ERR, "TX functions cannot be NULL");
+		return -1;
+	}
+
+	elem = rte_malloc(NULL, sizeof(*elem), 0);
+	if (!elem) {
+		PMD_DRV_LOG(ERR, "Unable to allocate memory");
+		return -1;
+	}
+
+	item = &i40e_rxtx_pkt_burst[dev->data->port_id];
+	elem->tx_pkt_burst = func;
+	TAILQ_INSERT_TAIL(&item->tx_burst_list, elem, next);
+
+	return 0;
+}
+
+static uint16_t
+i40e_xmit_pkts_chain(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct i40e_tx_queue *txq = tx_queue;
+	struct i40e_adapter *adapter = txq->vsi->adapter;
+	struct i40e_tx_burst_elem *pos;
+	struct i40e_tx_burst_elem *save_next;
+	struct i40e_pkt_burst *item;
+	uint16_t ret = 0;
+
+	item = &i40e_rxtx_pkt_burst[adapter->pf.dev_data->port_id];
+	RTE_TAILQ_FOREACH_SAFE(pos, &item->tx_burst_list, next, save_next) {
+		ret = pos->tx_pkt_burst(tx_queue, tx_pkts, nb_pkts);
+		if (nb_pkts != ret)
+			break;
+	}
+
+	return ret;
+}
+
+/* choose tx interceptors*/
+static void
+i40e_set_tx_interceptors(struct rte_eth_dev *dev)
+{
+	struct i40e_adapter *adapter =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	eth_tx_burst_t tx_pkt_burst;
+	int err;
+	uint16_t mdd_check = adapter->devargs.mbuf_check;
+
+	if (!mdd_check)
+		return;
+
+	/* Replace tx_pkt_burst in struct rte_eth_dev to
+	 * intercept the purpose of the default TX path.
+	 * All tasks are done at i40e_xmit_pkts_chain.
+	 */
+	tx_pkt_burst = dev->tx_pkt_burst;
+	dev->tx_pkt_burst = i40e_xmit_pkts_chain;
+
+	/* Register all interceptors. We need to pay
+	 * attention to the order of precedence.
+	 */
+	if (mdd_check) {
+		err = i40e_tx_pkt_burst_insert(dev, i40e_xmit_pkts_mdd);
+		if (!err)
+			PMD_DRV_LOG(DEBUG, "Register diagnostics Tx callback (port=%d).",
+					    dev->data->port_id);
+		else
+			PMD_DRV_LOG(ERR, "Failed to register diagnostics TX callback (port %d).",
+					    dev->data->port_id);
+	}
+
+	err = i40e_tx_pkt_burst_insert(dev, tx_pkt_burst);
+	if (!err)
+		PMD_DRV_LOG(DEBUG, "Register PMD Tx callback (port=%d).",
+					dev->data->port_id);
+	else
+		PMD_DRV_LOG(ERR, "Failed to register PMD TX callback (port %d).",
+					dev->data->port_id);
+}
+
 /*********************************************************************
  *
  *  TX simple prep functions
@@ -1724,6 +2575,21 @@ i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	return 0;
 }
 
+static void
+i40e_rx_pkt_burst_cleanup(struct rte_eth_dev *dev)
+{
+	struct i40e_pkt_burst *item;
+	struct i40e_rx_burst_elem *pos;
+	struct i40e_rx_burst_elem *save_next;
+
+	item = &i40e_rxtx_pkt_burst[dev->data->port_id];
+
+	RTE_TAILQ_FOREACH_SAFE(pos, &item->rx_burst_list, next, save_next) {
+		TAILQ_REMOVE(&item->rx_burst_list, pos, next);
+		rte_free(pos);
+	}
+}
+
 int
 i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
@@ -1748,6 +2614,7 @@ i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 			    rx_queue_id);
 		return err;
 	}
+	i40e_rx_pkt_burst_cleanup(dev);
 	i40e_rx_queue_release_mbufs(rxq);
 	i40e_reset_rx_queue(rxq);
 	dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
@@ -1790,6 +2657,21 @@ i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 	return 0;
 }
 
+static void
+i40e_tx_pkt_burst_cleanup(struct rte_eth_dev *dev)
+{
+	struct i40e_pkt_burst *item;
+	struct i40e_tx_burst_elem *pos;
+	struct i40e_tx_burst_elem *save_next;
+
+	item = &i40e_rxtx_pkt_burst[dev->data->port_id];
+
+	RTE_TAILQ_FOREACH_SAFE(pos, &item->tx_burst_list, next, save_next) {
+		TAILQ_REMOVE(&item->tx_burst_list, pos, next);
+		rte_free(pos);
+	}
+}
+
 int
 i40e_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 {
@@ -1815,6 +2697,7 @@ i40e_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 		return err;
 	}
 
+	i40e_tx_pkt_burst_cleanup(dev);
 	i40e_tx_queue_release_mbufs(txq);
 	i40e_reset_tx_queue(txq);
 	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
@@ -3255,6 +4138,29 @@ get_avx_supported(bool request_avx512)
 }
 #endif /* RTE_ARCH_X86 */
 
+static int __rte_unused
+i40e_rx_pkt_burst_insert(struct rte_eth_dev *dev, eth_rx_burst_t func)
+{
+	struct i40e_rx_burst_elem *elem;
+	struct i40e_pkt_burst *item;
+
+	if (!func) {
+		PMD_DRV_LOG(ERR, "RX functions cannot be NULL");
+		return -1;
+	}
+
+	elem = rte_malloc(NULL, sizeof(*elem), 0);
+	if (!elem) {
+		PMD_DRV_LOG(ERR, "Unable to allocate memory");
+		return -1;
+	}
+
+	item = &i40e_rxtx_pkt_burst[dev->data->port_id];
+	elem->rx_pkt_burst = func;
+	TAILQ_INSERT_TAIL(&item->rx_burst_list, elem, next);
+
+	return 0;
+}
 
 void __rte_cold
 i40e_set_rx_function(struct rte_eth_dev *dev)
@@ -3529,6 +4435,16 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 		dev->tx_pkt_burst = i40e_xmit_pkts;
 		dev->tx_pkt_prepare = i40e_prep_pkts;
 	}
+	i40e_set_tx_interceptors(dev);
+}
+
+void i40e_pkt_burst_init(struct rte_eth_dev *dev)
+{
+	struct i40e_pkt_burst *item;
+
+	item = &i40e_rxtx_pkt_burst[dev->data->port_id];
+	TAILQ_INIT(&item->rx_burst_list);
+	TAILQ_INIT(&item->tx_burst_list);
 }
 
 static const struct {
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index b191f23e1f..b78940123f 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -81,6 +81,11 @@ struct i40e_rx_entry {
 	struct rte_mbuf *mbuf;
 };
 
+struct i40e_pkt_burst {
+	TAILQ_HEAD(rx_pkt_burst_list, i40e_rx_burst_elem) rx_burst_list;
+	TAILQ_HEAD(tx_pkt_burst_list, i40e_tx_burst_elem) tx_burst_list;
+};
+
 /*
  * Structure associated with each RX queue.
  */
@@ -167,6 +172,10 @@ struct i40e_tx_queue {
 	uint16_t tx_next_dd;
 	uint16_t tx_next_rs;
 	bool q_set; /**< indicate if tx queue has been configured */
+
+	uint64_t mdd_mbuf_err_count;
+	uint64_t mdd_pkt_err_count;
+
 	bool tx_deferred_start; /**< don't start this queue in dev start */
 	uint8_t dcb_tc;         /**< Traffic class of tx queue */
 	uint64_t offloads; /**< Tx offload flags of RTE_ETH_TX_OFFLOAD_* */
@@ -255,6 +264,7 @@ void i40e_set_rx_function(struct rte_eth_dev *dev);
 void i40e_set_tx_function_flag(struct rte_eth_dev *dev,
 			       struct i40e_tx_queue *txq);
 void i40e_set_tx_function(struct rte_eth_dev *dev);
+void i40e_pkt_burst_init(struct rte_eth_dev *dev);
 void i40e_set_default_ptype_table(struct rte_eth_dev *dev);
 void i40e_set_default_pctype_table(struct rte_eth_dev *dev);
 uint16_t i40e_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH] net/i40e: add diagnostic support in TX path
  2023-12-21 10:13 [PATCH] net/i40e: add diagnostic support in TX path Mingjin Ye
@ 2023-12-22 16:17 ` Morten Brørup
  2024-01-04 10:20 ` [PATCH v3] " Mingjin Ye
  1 sibling, 0 replies; 11+ messages in thread
From: Morten Brørup @ 2023-12-22 16:17 UTC (permalink / raw)
  To: Mingjin Ye, dev; +Cc: qiming.yang, Yuying Zhang, Beilei Xing

> From: Mingjin Ye [mailto:mingjinx.ye@intel.com]
> Sent: Thursday, 21 December 2023 11.14
> 
> The only way to enable diagnostics for TX paths is to modify the
> application source code. Making it difficult to diagnose faults.
> 
> In this patch, the devarg option "mbuf_check" is introduced and the
> parameters are configured to enable the corresponding diagnostics.

Why not use the already existing RTE_LIBRTE_MBUF_DEBUG?

> 
> supported cases: mbuf, size, segment, offload, strict.
>  1. mbuf: check for corrupted mbuf.
>  2. size: check min/max packet length according to hw spec.
>  3. segment: check number of mbuf segments not exceed hw limitation.
>  4. offload: check any unsupported offload flag.
>  5. strict: check protocol headers.

These are all good things to test for, when debugging.

The checks should be part of RTE_LIBRTE_MBUF_DEBUG, instead of introducing yet another runtime parameter.

Also, it would be better to add these checks somewhere in the ethdev library instead of in the individual drivers. That would make them available for all drivers.

> 
> parameter format: mbuf_check=[mbuf,<case1>,<case2>]
> eg: dpdk-testpmd -a 0000:81:01.0,mbuf_check=[mbuf,size] -- -i
> 
> Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
> ---

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v3] net/i40e: add diagnostic support in TX path
  2023-12-21 10:13 [PATCH] net/i40e: add diagnostic support in TX path Mingjin Ye
  2023-12-22 16:17 ` Morten Brørup
@ 2024-01-04 10:20 ` Mingjin Ye
  2024-01-05  9:59   ` [PATCH v4] " Mingjin Ye
  1 sibling, 1 reply; 11+ messages in thread
From: Mingjin Ye @ 2024-01-04 10:20 UTC (permalink / raw)
  To: dev; +Cc: qiming.yang, Mingjin Ye, Yuying Zhang, Beilei Xing

The only way to enable diagnostics for TX paths is to modify the
application source code. Making it difficult to diagnose faults.

In this patch, the devarg option "mbuf_check" is introduced and the
parameters are configured to enable the corresponding diagnostics.

supported cases: mbuf, size, segment, offload.
 1. mbuf: check for corrupted mbuf.
 2. size: check min/max packet length according to hw spec.
 3. segment: check number of mbuf segments not exceed hw limitation.
 4. offload: check any unsupported offload flag.

parameter format: mbuf_check=[mbuf,<case1>,<case2>]
eg: dpdk-testpmd -a 0000:81:01.0,mbuf_check=[mbuf,size] -- -i

Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
---
v2: remove strict.
---
v3: optimised.
---
 doc/guides/nics/i40e.rst       |  11 +++
 drivers/net/i40e/i40e_ethdev.c | 137 ++++++++++++++++++++++++++++-
 drivers/net/i40e/i40e_ethdev.h |  28 ++++++
 drivers/net/i40e/i40e_rxtx.c   | 153 +++++++++++++++++++++++++++++++--
 drivers/net/i40e/i40e_rxtx.h   |   2 +
 5 files changed, 323 insertions(+), 8 deletions(-)

diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index 15689ac958..b15b5b61c5 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -275,6 +275,17 @@ Runtime Configuration
 
   -a 84:00.0,vf_msg_cfg=80@120:180
 
+- ``Support TX diagnostics`` (default ``not enabled``)
+
+  Set the ``devargs`` parameter ``mbuf_check`` to enable TX diagnostics. For example,
+  ``-a 18:01.0,mbuf_check=mbuf`` or ``-a 18:01.0,mbuf_check=[mbuf,size]``.
+  Supported cases:
+
+  *   mbuf: Check for corrupted mbuf.
+  *   size: Check min/max packet length according to hw spec.
+  *   segment: Check number of mbuf segments not exceed hw limitation.
+  *   offload: Check any unsupported offload flag.
+
 Vector RX Pre-conditions
 ~~~~~~~~~~~~~~~~~~~~~~~~
 For Vector RX it is assumed that the number of descriptor rings will be a power
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 3ca226156b..e554bae1ab 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -48,6 +48,7 @@
 #define ETH_I40E_SUPPORT_MULTI_DRIVER	"support-multi-driver"
 #define ETH_I40E_QUEUE_NUM_PER_VF_ARG	"queue-num-per-vf"
 #define ETH_I40E_VF_MSG_CFG		"vf_msg_cfg"
+#define ETH_I40E_MBUF_CHECK_ARG       "mbuf_check"
 
 #define I40E_CLEAR_PXE_WAIT_MS     200
 #define I40E_VSI_TSR_QINQ_STRIP		0x4010
@@ -412,6 +413,7 @@ static const char *const valid_keys[] = {
 	ETH_I40E_SUPPORT_MULTI_DRIVER,
 	ETH_I40E_QUEUE_NUM_PER_VF_ARG,
 	ETH_I40E_VF_MSG_CFG,
+	ETH_I40E_MBUF_CHECK_ARG,
 	NULL};
 
 static const struct rte_pci_id pci_id_i40e_map[] = {
@@ -545,6 +547,14 @@ static const struct rte_i40e_xstats_name_off rte_i40e_stats_strings[] = {
 #define I40E_NB_ETH_XSTATS (sizeof(rte_i40e_stats_strings) / \
 		sizeof(rte_i40e_stats_strings[0]))
 
+static const struct rte_i40e_xstats_name_off i40e_mbuf_strings[] = {
+	{"tx_mbuf_error_packets", offsetof(struct i40e_mbuf_stats,
+		tx_pkt_errors)},
+};
+
+#define I40E_NB_MBUF_XSTATS (sizeof(i40e_mbuf_strings) / \
+		sizeof(i40e_mbuf_strings[0]))
+
 static const struct rte_i40e_xstats_name_off rte_i40e_hw_port_strings[] = {
 	{"tx_link_down_dropped", offsetof(struct i40e_hw_port_stats,
 		tx_dropped_link_down)},
@@ -1373,6 +1383,88 @@ read_vf_msg_config(__rte_unused const char *key,
 	return 0;
 }
 
+static int
+read_mbuf_check_config(__rte_unused const char *key, const char *value, void *args)
+{
+	char *cur;
+	char *tmp;
+	int str_len;
+	int valid_len;
+
+	int ret = 0;
+	uint64_t *mc_flags = args;
+	char *str2 = strdup(value);
+	if (str2 == NULL)
+		return -1;
+
+	str_len = strlen(str2);
+	if (str2[0] == '[' && str2[str_len - 1] == ']') {
+		if (str_len < 3) {
+			ret = -1;
+			goto mdd_end;
+		}
+		valid_len = str_len - 2;
+		memmove(str2, str2 + 1, valid_len);
+		memset(str2 + valid_len, '\0', 2);
+	}
+	cur = strtok_r(str2, ",", &tmp);
+	while (cur != NULL) {
+		if (!strcmp(cur, "mbuf"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_MBUF;
+		else if (!strcmp(cur, "size"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_SIZE;
+		else if (!strcmp(cur, "segment"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_SEGMENT;
+		else if (!strcmp(cur, "offload"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_OFFLOAD;
+		else
+			PMD_DRV_LOG(ERR, "Unsupported mdd check type: %s", cur);
+		cur = strtok_r(NULL, ",", &tmp);
+	}
+
+mdd_end:
+	free(str2);
+	return ret;
+}
+
+static int
+i40e_parse_mbuf_check(struct rte_eth_dev *dev)
+{
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct rte_kvargs *kvlist;
+	int kvargs_count;
+	int ret = 0;
+
+	if (!dev->device->devargs)
+		return ret;
+
+	kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
+	if (!kvlist)
+		return -EINVAL;
+
+	kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_MBUF_CHECK_ARG);
+	if (!kvargs_count)
+		goto free_end;
+
+	if (kvargs_count > 1)
+		PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and only "
+			    "the first invalid or last valid one is used !",
+			    ETH_I40E_MBUF_CHECK_ARG);
+
+	ret = rte_kvargs_process(kvlist, ETH_I40E_MBUF_CHECK_ARG,
+				read_mbuf_check_config, &ad->mc_flags);
+	if (ret)
+		goto free_end;
+
+	if (ad->mc_flags)
+		ad->devargs.mbuf_check = 1;
+
+free_end:
+	rte_kvargs_free(kvlist);
+	return ret;
+}
+
 static int
 i40e_parse_vf_msg_config(struct rte_eth_dev *dev,
 		struct i40e_vf_msg_cfg *msg_cfg)
@@ -1488,6 +1580,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
 	}
 
 	i40e_parse_vf_msg_config(dev, &pf->vf_msg_cfg);
+	i40e_parse_mbuf_check(dev);
 	/* Check if need to support multi-driver */
 	i40e_support_multi_driver(dev);
 
@@ -2324,6 +2417,8 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct i40e_vsi *main_vsi = pf->main_vsi;
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	int ret, i;
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
@@ -2483,6 +2578,7 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	max_frame_size = dev->data->mtu ?
 		dev->data->mtu + I40E_ETH_OVERHEAD :
 		I40E_FRAME_SIZE_MAX;
+	ad->max_pkt_len = max_frame_size;
 
 	/* Set the max frame size to HW*/
 	i40e_aq_set_mac_config(hw, max_frame_size, TRUE, false, 0, NULL);
@@ -3502,13 +3598,17 @@ i40e_dev_stats_reset(struct rte_eth_dev *dev)
 	/* read the stats, reading current register values into offset */
 	i40e_read_stats_registers(pf, hw);
 
+	memset(&pf->mbuf_stats, 0,
+		sizeof(struct i40e_mbuf_stats));
+
 	return 0;
 }
 
 static uint32_t
 i40e_xstats_calc_num(void)
 {
-	return I40E_NB_ETH_XSTATS + I40E_NB_HW_PORT_XSTATS +
+	return I40E_NB_ETH_XSTATS + I40E_NB_MBUF_XSTATS +
+		I40E_NB_HW_PORT_XSTATS +
 		(I40E_NB_RXQ_PRIO_XSTATS * 8) +
 		(I40E_NB_TXQ_PRIO_XSTATS * 8);
 }
@@ -3533,6 +3633,14 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 		count++;
 	}
 
+	/* Get stats from i40e_mbuf_stats struct */
+	for (i = 0; i < I40E_NB_MBUF_XSTATS; i++) {
+		strlcpy(xstats_names[count].name,
+			i40e_mbuf_strings[i].name,
+			sizeof(xstats_names[count].name));
+		count++;
+	}
+
 	/* Get individual stats from i40e_hw_port struct */
 	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
 		strlcpy(xstats_names[count].name,
@@ -3563,12 +3671,27 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	return count;
 }
 
+static void
+i40e_dev_update_mbuf_stats(struct rte_eth_dev *ethdev,
+		struct i40e_mbuf_stats *mbuf_stats)
+{
+	uint16_t idx;
+	struct i40e_tx_queue *txq;
+
+	for (idx = 0; idx < ethdev->data->nb_tx_queues; idx++) {
+		txq = ethdev->data->tx_queues[idx];
+		mbuf_stats->tx_pkt_errors += txq->mbuf_errors;
+	}
+}
+
 static int
 i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 		    unsigned n)
 {
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct i40e_adapter *adapter =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	unsigned i, count, prio;
 	struct i40e_hw_port_stats *hw_stats = &pf->stats;
 
@@ -3583,6 +3706,9 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 
 	count = 0;
 
+	if (adapter->devargs.mbuf_check)
+		i40e_dev_update_mbuf_stats(dev, &pf->mbuf_stats);
+
 	/* Get stats from i40e_eth_stats struct */
 	for (i = 0; i < I40E_NB_ETH_XSTATS; i++) {
 		xstats[count].value = *(uint64_t *)(((char *)&hw_stats->eth) +
@@ -3591,6 +3717,15 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 		count++;
 	}
 
+	/* Get stats from i40e_mbuf_stats struct */
+	for (i = 0; i < I40E_NB_MBUF_XSTATS; i++) {
+		xstats[count].value =
+			*(uint64_t *)((char *)&pf->mbuf_stats +
+					i40e_mbuf_strings[i].offset);
+		xstats[count].id = count;
+		count++;
+	}
+
 	/* Get individual stats from i40e_hw_port struct */
 	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
 		xstats[count].value = *(uint64_t *)(((char *)hw_stats) +
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 1bbe7ad376..41f9aab6ce 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1108,6 +1108,10 @@ struct i40e_vf_msg_cfg {
 	uint32_t ignore_second;
 };
 
+struct i40e_mbuf_stats {
+	uint64_t tx_pkt_errors;
+};
+
 /*
  * Structure to store private data specific for PF instance.
  */
@@ -1122,6 +1126,7 @@ struct i40e_pf {
 
 	struct i40e_hw_port_stats stats_offset;
 	struct i40e_hw_port_stats stats;
+	struct i40e_mbuf_stats mbuf_stats;
 	u64 rx_err1;	/* rxerr1 */
 	u64 rx_err1_offset;
 
@@ -1224,6 +1229,25 @@ struct i40e_vsi_vlan_pvid_info {
 #define I40E_MAX_PKT_TYPE  256
 #define I40E_FLOW_TYPE_MAX 64
 
+#define I40E_MBUF_CHECK_F_TX_MBUF        (1ULL << 0)
+#define I40E_MBUF_CHECK_F_TX_SIZE        (1ULL << 1)
+#define I40E_MBUF_CHECK_F_TX_SEGMENT     (1ULL << 2)
+#define I40E_MBUF_CHECK_F_TX_OFFLOAD     (1ULL << 3)
+
+enum i40e_tx_burst_type {
+	I40E_TX_DEFAULT,
+	I40E_TX_SIMPLE,
+	I40E_TX_SSE,
+	I40E_TX_AVX2,
+	I40E_TX_AVX512,
+};
+
+/**
+ * Cache devargs parse result.
+ */
+struct i40e_devargs {
+	int mbuf_check;
+};
 /*
  * Structure to store private data for each PF/VF instance.
  */
@@ -1240,6 +1264,10 @@ struct i40e_adapter {
 	bool tx_simple_allowed;
 	bool tx_vec_allowed;
 
+	struct i40e_devargs devargs;
+	uint64_t mc_flags; /* mbuf check flags. */
+	uint16_t max_pkt_len; /* Maximum packet length */
+	enum i40e_tx_burst_type tx_burst_type;
 	/* For PTP */
 	struct rte_timecounter systime_tc;
 	struct rte_timecounter rx_tstamp_tc;
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 9aa5facb53..6fed922253 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1536,6 +1536,138 @@ i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_tx;
 }
 
+static
+const eth_tx_burst_t i40e_tx_pkt_burst_ops[] = {
+	[I40E_TX_DEFAULT] = i40e_xmit_pkts,
+	[I40E_TX_SIMPLE] = i40e_xmit_pkts_simple,
+#ifdef RTE_ARCH_X86
+	[I40E_TX_SSE] = i40e_xmit_pkts_vec,
+	[I40E_TX_AVX2] = i40e_xmit_pkts_vec_avx2,
+#ifdef CC_AVX512_SUPPORT
+	[I40E_TX_AVX512] = i40e_xmit_pkts_vec_avx512,
+#endif
+#endif
+};
+
+/* Tx mbuf check */
+static uint16_t
+i40e_xmit_pkts_check(void *tx_queue, struct rte_mbuf **tx_pkts,
+	      uint16_t nb_pkts)
+{
+	struct i40e_tx_queue *txq = tx_queue;
+	uint16_t idx;
+	uint64_t ol_flags;
+	struct rte_mbuf *mb;
+	bool pkt_error = false;
+	const char *reason = NULL;
+	uint16_t good_pkts = nb_pkts;
+	struct i40e_adapter *adapter = txq->vsi->adapter;
+	enum i40e_tx_burst_type tx_burst_type =
+		txq->vsi->adapter->tx_burst_type;
+
+
+	for (idx = 0; idx < nb_pkts; idx++) {
+		mb = tx_pkts[idx];
+		ol_flags = mb->ol_flags;
+
+		if ((adapter->mc_flags & I40E_MBUF_CHECK_F_TX_MBUF) &&
+			(rte_mbuf_check(mb, 0, &reason) != 0)) {
+			PMD_TX_LOG(ERR, "INVALID mbuf: %s\n", reason);
+			pkt_error = true;
+			break;
+		}
+
+		if ((adapter->mc_flags & I40E_MBUF_CHECK_F_TX_SIZE) &&
+			(mb->data_len > mb->pkt_len ||
+			mb->data_len < I40E_TX_MIN_PKT_LEN ||
+			mb->data_len > adapter->max_pkt_len)) {
+			PMD_TX_LOG(ERR, "INVALID mbuf: data_len (%u) is out "
+			"of range, reasonable range (%d - %u)\n", mb->data_len,
+			I40E_TX_MIN_PKT_LEN, adapter->max_pkt_len);
+			pkt_error = true;
+			break;
+		}
+
+		if (adapter->mc_flags & I40E_MBUF_CHECK_F_TX_SEGMENT) {
+			if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
+				/**
+				 * No TSO case: nb->segs, pkt_len to not exceed
+				 * the limites.
+				 */
+				if (mb->nb_segs > I40E_TX_MAX_MTU_SEG) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_TX_MAX_MTU_SEG);
+					pkt_error = true;
+					break;
+				}
+				if (mb->pkt_len > I40E_FRAME_SIZE_MAX) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_FRAME_SIZE_MAX);
+					pkt_error = true;
+					break;
+				}
+			} else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+				/** TSO case: tso_segsz, nb_segs, pkt_len not exceed
+				 * the limits.
+				 */
+				if (mb->tso_segsz < I40E_MIN_TSO_MSS ||
+					mb->tso_segsz > I40E_MAX_TSO_MSS) {
+					/**
+					 * MSS outside the range are considered malicious
+					 */
+					PMD_TX_LOG(ERR, "INVALID mbuf: tso_segsz (%u) is out "
+					"of range, reasonable range (%d - %u)\n", mb->tso_segsz,
+					I40E_MIN_TSO_MSS, I40E_MAX_TSO_MSS);
+					pkt_error = true;
+					break;
+				}
+				if (mb->nb_segs >
+					((struct i40e_tx_queue *)tx_queue)->nb_tx_desc) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs out "
+					"of ring length\n");
+					pkt_error = true;
+					break;
+				}
+				if (mb->pkt_len > I40E_TSO_FRAME_SIZE_MAX) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_TSO_FRAME_SIZE_MAX);
+					pkt_error = true;
+					break;
+				}
+			}
+		}
+
+		if (adapter->mc_flags & I40E_MBUF_CHECK_F_TX_OFFLOAD) {
+			if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload "
+				"is not supported\n");
+				pkt_error = true;
+				break;
+			}
+
+			if (!rte_validate_tx_offload(mb)) {
+				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload "
+				"setup error\n");
+				pkt_error = true;
+				break;
+			}
+		}
+	}
+
+	if (pkt_error) {
+		txq->mbuf_errors++;
+		good_pkts = idx;
+		if (good_pkts == 0)
+			return 0;
+	}
+
+	return i40e_tx_pkt_burst_ops[tx_burst_type](tx_queue,
+								tx_pkts, good_pkts);
+}
+
 /*********************************************************************
  *
  *  TX simple prep functions
@@ -3467,6 +3599,8 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 {
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	enum i40e_tx_burst_type tx_burst_type;
+	int mbuf_check = ad->devargs.mbuf_check;
 	int i;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
@@ -3501,34 +3635,39 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 #ifdef CC_AVX512_SUPPORT
 				PMD_DRV_LOG(NOTICE, "Using AVX512 Vector Tx (port %d).",
 					    dev->data->port_id);
-				dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx512;
+				tx_burst_type = I40E_TX_AVX512;
 #endif
 			} else {
 				PMD_INIT_LOG(DEBUG, "Using %sVector Tx (port %d).",
 					     ad->tx_use_avx2 ? "avx2 " : "",
 					     dev->data->port_id);
-				dev->tx_pkt_burst = ad->tx_use_avx2 ?
-						    i40e_xmit_pkts_vec_avx2 :
-						    i40e_xmit_pkts_vec;
+				tx_burst_type = ad->tx_use_avx2 ? I40E_TX_AVX2 : I40E_TX_SSE;
 				dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 			}
 #else /* RTE_ARCH_X86 */
 			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
 				     dev->data->port_id);
-			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
+			tx_burst_type = I40E_TX_SSE;
 			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 #endif /* RTE_ARCH_X86 */
 		} else {
 			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
-			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
+			tx_burst_type = I40E_TX_SIMPLE;
 			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 		}
 		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
 	} else {
 		PMD_INIT_LOG(DEBUG, "Xmit tx finally be used.");
-		dev->tx_pkt_burst = i40e_xmit_pkts;
+		tx_burst_type = I40E_TX_DEFAULT;
 		dev->tx_pkt_prepare = i40e_prep_pkts;
 	}
+
+	if (mbuf_check) {
+		ad->tx_burst_type = tx_burst_type;
+		dev->tx_pkt_burst = i40e_xmit_pkts_check;
+	} else {
+		dev->tx_pkt_burst = i40e_tx_pkt_burst_ops[tx_burst_type];
+	}
 }
 
 static const struct {
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index b191f23e1f..818bf9d859 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -167,6 +167,8 @@ struct i40e_tx_queue {
 	uint16_t tx_next_dd;
 	uint16_t tx_next_rs;
 	bool q_set; /**< indicate if tx queue has been configured */
+	uint64_t mbuf_errors;
+
 	bool tx_deferred_start; /**< don't start this queue in dev start */
 	uint8_t dcb_tc;         /**< Traffic class of tx queue */
 	uint64_t offloads; /**< Tx offload flags of RTE_ETH_TX_OFFLOAD_* */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v4] net/i40e: add diagnostic support in TX path
  2024-01-04 10:20 ` [PATCH v3] " Mingjin Ye
@ 2024-01-05  9:59   ` Mingjin Ye
  2024-03-01  9:44     ` [PATCH v5] " Mingjin Ye
  0 siblings, 1 reply; 11+ messages in thread
From: Mingjin Ye @ 2024-01-05  9:59 UTC (permalink / raw)
  To: dev; +Cc: qiming.yang, Mingjin Ye, Yuying Zhang, Beilei Xing

The only way to enable diagnostics for TX paths is to modify the
application source code. Making it difficult to diagnose faults.

In this patch, the devarg option "mbuf_check" is introduced and the
parameters are configured to enable the corresponding diagnostics.

supported cases: mbuf, size, segment, offload.
 1. mbuf: check for corrupted mbuf.
 2. size: check min/max packet length according to hw spec.
 3. segment: check number of mbuf segments not exceed hw limitation.
 4. offload: check any unsupported offload flag.

parameter format: mbuf_check=[mbuf,<case1>,<case2>]
eg: dpdk-testpmd -a 0000:81:01.0,mbuf_check=[mbuf,size] -- -i

Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
---
v2: remove strict.
---
v3: optimised.
---
v4: rebase.
---
 doc/guides/nics/i40e.rst       |  13 +++
 drivers/net/i40e/i40e_ethdev.c | 138 ++++++++++++++++++++++++++++-
 drivers/net/i40e/i40e_ethdev.h |  28 ++++++
 drivers/net/i40e/i40e_rxtx.c   | 153 +++++++++++++++++++++++++++++++--
 drivers/net/i40e/i40e_rxtx.h   |   2 +
 5 files changed, 326 insertions(+), 8 deletions(-)

diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index 15689ac958..bf1d1e5d60 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -275,6 +275,19 @@ Runtime Configuration
 
   -a 84:00.0,vf_msg_cfg=80@120:180
 
+- ``Support TX diagnostics`` (default ``not enabled``)
+
+  Set the ``devargs`` parameter ``mbuf_check`` to enable TX diagnostics. For example,
+  ``-a 18:01.0,mbuf_check=<case>`` or ``-a 18:01.0,mbuf_check=[<case1>,<case2>...]``. Also,
+  ``xstats_get`` can be used to get the error counts, which are collected in
+  ``tx_mbuf_error_packets`` xstats. For example, ``testpmd> show port xstats all``.
+  Supported cases:
+
+  *   mbuf: Check for corrupted mbuf.
+  *   size: Check min/max packet length according to hw spec.
+  *   segment: Check number of mbuf segments not exceed hw limitation.
+  *   offload: Check any unsupported offload flag.
+
 Vector RX Pre-conditions
 ~~~~~~~~~~~~~~~~~~~~~~~~
 For Vector RX it is assumed that the number of descriptor rings will be a power
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 3ca226156b..f23f80fd16 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -48,6 +48,7 @@
 #define ETH_I40E_SUPPORT_MULTI_DRIVER	"support-multi-driver"
 #define ETH_I40E_QUEUE_NUM_PER_VF_ARG	"queue-num-per-vf"
 #define ETH_I40E_VF_MSG_CFG		"vf_msg_cfg"
+#define ETH_I40E_MBUF_CHECK_ARG       "mbuf_check"
 
 #define I40E_CLEAR_PXE_WAIT_MS     200
 #define I40E_VSI_TSR_QINQ_STRIP		0x4010
@@ -412,6 +413,7 @@ static const char *const valid_keys[] = {
 	ETH_I40E_SUPPORT_MULTI_DRIVER,
 	ETH_I40E_QUEUE_NUM_PER_VF_ARG,
 	ETH_I40E_VF_MSG_CFG,
+	ETH_I40E_MBUF_CHECK_ARG,
 	NULL};
 
 static const struct rte_pci_id pci_id_i40e_map[] = {
@@ -545,6 +547,14 @@ static const struct rte_i40e_xstats_name_off rte_i40e_stats_strings[] = {
 #define I40E_NB_ETH_XSTATS (sizeof(rte_i40e_stats_strings) / \
 		sizeof(rte_i40e_stats_strings[0]))
 
+static const struct rte_i40e_xstats_name_off i40e_mbuf_strings[] = {
+	{"tx_mbuf_error_packets", offsetof(struct i40e_mbuf_stats,
+		tx_pkt_errors)},
+};
+
+#define I40E_NB_MBUF_XSTATS (sizeof(i40e_mbuf_strings) / \
+		sizeof(i40e_mbuf_strings[0]))
+
 static const struct rte_i40e_xstats_name_off rte_i40e_hw_port_strings[] = {
 	{"tx_link_down_dropped", offsetof(struct i40e_hw_port_stats,
 		tx_dropped_link_down)},
@@ -1373,6 +1383,88 @@ read_vf_msg_config(__rte_unused const char *key,
 	return 0;
 }
 
+static int
+read_mbuf_check_config(__rte_unused const char *key, const char *value, void *args)
+{
+	char *cur;
+	char *tmp;
+	int str_len;
+	int valid_len;
+
+	int ret = 0;
+	uint64_t *mc_flags = args;
+	char *str2 = strdup(value);
+	if (str2 == NULL)
+		return -1;
+
+	str_len = strlen(str2);
+	if (str2[0] == '[' && str2[str_len - 1] == ']') {
+		if (str_len < 3) {
+			ret = -1;
+			goto mdd_end;
+		}
+		valid_len = str_len - 2;
+		memmove(str2, str2 + 1, valid_len);
+		memset(str2 + valid_len, '\0', 2);
+	}
+	cur = strtok_r(str2, ",", &tmp);
+	while (cur != NULL) {
+		if (!strcmp(cur, "mbuf"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_MBUF;
+		else if (!strcmp(cur, "size"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_SIZE;
+		else if (!strcmp(cur, "segment"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_SEGMENT;
+		else if (!strcmp(cur, "offload"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_OFFLOAD;
+		else
+			PMD_DRV_LOG(ERR, "Unsupported mdd check type: %s", cur);
+		cur = strtok_r(NULL, ",", &tmp);
+	}
+
+mdd_end:
+	free(str2);
+	return ret;
+}
+
+static int
+i40e_parse_mbuf_check(struct rte_eth_dev *dev)
+{
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct rte_kvargs *kvlist;
+	int kvargs_count;
+	int ret = 0;
+
+	if (!dev->device->devargs)
+		return ret;
+
+	kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
+	if (!kvlist)
+		return -EINVAL;
+
+	kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_MBUF_CHECK_ARG);
+	if (!kvargs_count)
+		goto free_end;
+
+	if (kvargs_count > 1)
+		PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and only "
+			    "the first invalid or last valid one is used !",
+			    ETH_I40E_MBUF_CHECK_ARG);
+
+	ret = rte_kvargs_process(kvlist, ETH_I40E_MBUF_CHECK_ARG,
+				read_mbuf_check_config, &ad->mc_flags);
+	if (ret)
+		goto free_end;
+
+	if (ad->mc_flags)
+		ad->devargs.mbuf_check = 1;
+
+free_end:
+	rte_kvargs_free(kvlist);
+	return ret;
+}
+
 static int
 i40e_parse_vf_msg_config(struct rte_eth_dev *dev,
 		struct i40e_vf_msg_cfg *msg_cfg)
@@ -1488,6 +1580,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
 	}
 
 	i40e_parse_vf_msg_config(dev, &pf->vf_msg_cfg);
+	i40e_parse_mbuf_check(dev);
 	/* Check if need to support multi-driver */
 	i40e_support_multi_driver(dev);
 
@@ -2324,6 +2417,8 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct i40e_vsi *main_vsi = pf->main_vsi;
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	int ret, i;
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
@@ -2483,6 +2578,7 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	max_frame_size = dev->data->mtu ?
 		dev->data->mtu + I40E_ETH_OVERHEAD :
 		I40E_FRAME_SIZE_MAX;
+	ad->max_pkt_len = max_frame_size;
 
 	/* Set the max frame size to HW*/
 	i40e_aq_set_mac_config(hw, max_frame_size, TRUE, false, 0, NULL);
@@ -3502,13 +3598,17 @@ i40e_dev_stats_reset(struct rte_eth_dev *dev)
 	/* read the stats, reading current register values into offset */
 	i40e_read_stats_registers(pf, hw);
 
+	memset(&pf->mbuf_stats, 0,
+		sizeof(struct i40e_mbuf_stats));
+
 	return 0;
 }
 
 static uint32_t
 i40e_xstats_calc_num(void)
 {
-	return I40E_NB_ETH_XSTATS + I40E_NB_HW_PORT_XSTATS +
+	return I40E_NB_ETH_XSTATS + I40E_NB_MBUF_XSTATS +
+		I40E_NB_HW_PORT_XSTATS +
 		(I40E_NB_RXQ_PRIO_XSTATS * 8) +
 		(I40E_NB_TXQ_PRIO_XSTATS * 8);
 }
@@ -3533,6 +3633,14 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 		count++;
 	}
 
+	/* Get stats from i40e_mbuf_stats struct */
+	for (i = 0; i < I40E_NB_MBUF_XSTATS; i++) {
+		strlcpy(xstats_names[count].name,
+			i40e_mbuf_strings[i].name,
+			sizeof(xstats_names[count].name));
+		count++;
+	}
+
 	/* Get individual stats from i40e_hw_port struct */
 	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
 		strlcpy(xstats_names[count].name,
@@ -3563,12 +3671,28 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	return count;
 }
 
+static void
+i40e_dev_update_mbuf_stats(struct rte_eth_dev *ethdev,
+		struct i40e_mbuf_stats *mbuf_stats)
+{
+	uint16_t idx;
+	struct i40e_tx_queue *txq;
+
+	for (idx = 0; idx < ethdev->data->nb_tx_queues; idx++) {
+		txq = ethdev->data->tx_queues[idx];
+		mbuf_stats->tx_pkt_errors += txq->mbuf_errors;
+	}
+}
+
 static int
 i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 		    unsigned n)
 {
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct i40e_adapter *adapter =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct i40e_mbuf_stats mbuf_stats = {0};
 	unsigned i, count, prio;
 	struct i40e_hw_port_stats *hw_stats = &pf->stats;
 
@@ -3583,6 +3707,9 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 
 	count = 0;
 
+	if (adapter->devargs.mbuf_check)
+		i40e_dev_update_mbuf_stats(dev, &mbuf_stats);
+
 	/* Get stats from i40e_eth_stats struct */
 	for (i = 0; i < I40E_NB_ETH_XSTATS; i++) {
 		xstats[count].value = *(uint64_t *)(((char *)&hw_stats->eth) +
@@ -3591,6 +3718,15 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 		count++;
 	}
 
+	/* Get stats from i40e_mbuf_stats struct */
+	for (i = 0; i < I40E_NB_MBUF_XSTATS; i++) {
+		xstats[count].value =
+			*(uint64_t *)((char *)&mbuf_stats +
+					i40e_mbuf_strings[i].offset);
+		xstats[count].id = count;
+		count++;
+	}
+
 	/* Get individual stats from i40e_hw_port struct */
 	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
 		xstats[count].value = *(uint64_t *)(((char *)hw_stats) +
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 1bbe7ad376..41f9aab6ce 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1108,6 +1108,10 @@ struct i40e_vf_msg_cfg {
 	uint32_t ignore_second;
 };
 
+struct i40e_mbuf_stats {
+	uint64_t tx_pkt_errors;
+};
+
 /*
  * Structure to store private data specific for PF instance.
  */
@@ -1122,6 +1126,7 @@ struct i40e_pf {
 
 	struct i40e_hw_port_stats stats_offset;
 	struct i40e_hw_port_stats stats;
+	struct i40e_mbuf_stats mbuf_stats;
 	u64 rx_err1;	/* rxerr1 */
 	u64 rx_err1_offset;
 
@@ -1224,6 +1229,25 @@ struct i40e_vsi_vlan_pvid_info {
 #define I40E_MAX_PKT_TYPE  256
 #define I40E_FLOW_TYPE_MAX 64
 
+#define I40E_MBUF_CHECK_F_TX_MBUF        (1ULL << 0)
+#define I40E_MBUF_CHECK_F_TX_SIZE        (1ULL << 1)
+#define I40E_MBUF_CHECK_F_TX_SEGMENT     (1ULL << 2)
+#define I40E_MBUF_CHECK_F_TX_OFFLOAD     (1ULL << 3)
+
+enum i40e_tx_burst_type {
+	I40E_TX_DEFAULT,
+	I40E_TX_SIMPLE,
+	I40E_TX_SSE,
+	I40E_TX_AVX2,
+	I40E_TX_AVX512,
+};
+
+/**
+ * Cache devargs parse result.
+ */
+struct i40e_devargs {
+	int mbuf_check;
+};
 /*
  * Structure to store private data for each PF/VF instance.
  */
@@ -1240,6 +1264,10 @@ struct i40e_adapter {
 	bool tx_simple_allowed;
 	bool tx_vec_allowed;
 
+	struct i40e_devargs devargs;
+	uint64_t mc_flags; /* mbuf check flags. */
+	uint16_t max_pkt_len; /* Maximum packet length */
+	enum i40e_tx_burst_type tx_burst_type;
 	/* For PTP */
 	struct rte_timecounter systime_tc;
 	struct rte_timecounter rx_tstamp_tc;
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 9aa5facb53..4e5b47cdb9 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1536,6 +1536,138 @@ i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_tx;
 }
 
+static
+const eth_tx_burst_t i40e_tx_pkt_burst_ops[] = {
+	[I40E_TX_DEFAULT] = i40e_xmit_pkts,
+	[I40E_TX_SIMPLE] = i40e_xmit_pkts_simple,
+#ifdef RTE_ARCH_X86
+	[I40E_TX_SSE] = i40e_xmit_pkts_vec,
+	[I40E_TX_AVX2] = i40e_xmit_pkts_vec_avx2,
+#ifdef CC_AVX512_SUPPORT
+	[I40E_TX_AVX512] = i40e_xmit_pkts_vec_avx512,
+#endif
+#endif
+};
+
+/* Tx mbuf check */
+static uint16_t
+i40e_xmit_pkts_check(void *tx_queue, struct rte_mbuf **tx_pkts,
+	      uint16_t nb_pkts)
+{
+	struct i40e_tx_queue *txq = tx_queue;
+	uint16_t idx;
+	uint64_t ol_flags;
+	struct rte_mbuf *mb;
+	bool pkt_error = false;
+	const char *reason = NULL;
+	uint16_t good_pkts = nb_pkts;
+	struct i40e_adapter *adapter = txq->vsi->adapter;
+	enum i40e_tx_burst_type tx_burst_type =
+		txq->vsi->adapter->tx_burst_type;
+
+
+	for (idx = 0; idx < nb_pkts; idx++) {
+		mb = tx_pkts[idx];
+		ol_flags = mb->ol_flags;
+
+		if ((adapter->mc_flags & I40E_MBUF_CHECK_F_TX_MBUF) &&
+			(rte_mbuf_check(mb, 0, &reason) != 0)) {
+			PMD_TX_LOG(ERR, "INVALID mbuf: %s\n", reason);
+			pkt_error = true;
+			break;
+		}
+
+		if ((adapter->mc_flags & I40E_MBUF_CHECK_F_TX_SIZE) &&
+			(mb->data_len > mb->pkt_len ||
+			mb->data_len < I40E_TX_MIN_PKT_LEN ||
+			mb->data_len > adapter->max_pkt_len)) {
+			PMD_TX_LOG(ERR, "INVALID mbuf: data_len (%u) is out "
+			"of range, reasonable range (%d - %u)\n", mb->data_len,
+			I40E_TX_MIN_PKT_LEN, adapter->max_pkt_len);
+			pkt_error = true;
+			break;
+		}
+
+		if (adapter->mc_flags & I40E_MBUF_CHECK_F_TX_SEGMENT) {
+			if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
+				/**
+				 * No TSO case: nb->segs, pkt_len to not exceed
+				 * the limites.
+				 */
+				if (mb->nb_segs > I40E_TX_MAX_MTU_SEG) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_TX_MAX_MTU_SEG);
+					pkt_error = true;
+					break;
+				}
+				if (mb->pkt_len > I40E_FRAME_SIZE_MAX) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_FRAME_SIZE_MAX);
+					pkt_error = true;
+					break;
+				}
+			} else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+				/** TSO case: tso_segsz, nb_segs, pkt_len not exceed
+				 * the limits.
+				 */
+				if (mb->tso_segsz < I40E_MIN_TSO_MSS ||
+					mb->tso_segsz > I40E_MAX_TSO_MSS) {
+					/**
+					 * MSS outside the range are considered malicious
+					 */
+					PMD_TX_LOG(ERR, "INVALID mbuf: tso_segsz (%u) is out "
+					"of range, reasonable range (%d - %u)\n", mb->tso_segsz,
+					I40E_MIN_TSO_MSS, I40E_MAX_TSO_MSS);
+					pkt_error = true;
+					break;
+				}
+				if (mb->nb_segs >
+					((struct i40e_tx_queue *)tx_queue)->nb_tx_desc) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs out "
+					"of ring length\n");
+					pkt_error = true;
+					break;
+				}
+				if (mb->pkt_len > I40E_TSO_FRAME_SIZE_MAX) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_TSO_FRAME_SIZE_MAX);
+					pkt_error = true;
+					break;
+				}
+			}
+		}
+
+		if (adapter->mc_flags & I40E_MBUF_CHECK_F_TX_OFFLOAD) {
+			if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload "
+				"is not supported\n");
+				pkt_error = true;
+				break;
+			}
+
+			if (!rte_validate_tx_offload(mb)) {
+				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload "
+				"setup error\n");
+				pkt_error = true;
+				break;
+			}
+		}
+	}
+
+	if (pkt_error) {
+		txq->mbuf_errors++;
+		good_pkts = idx;
+		if (good_pkts == 0)
+			return 0;
+	}
+
+	return i40e_tx_pkt_burst_ops[tx_burst_type](tx_queue,
+								tx_pkts, good_pkts);
+}
+
 /*********************************************************************
  *
  *  TX simple prep functions
@@ -3467,6 +3599,8 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 {
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	enum i40e_tx_burst_type tx_burst_type = I40E_TX_DEFAULT;
+	int mbuf_check = ad->devargs.mbuf_check;
 	int i;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
@@ -3501,34 +3635,39 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 #ifdef CC_AVX512_SUPPORT
 				PMD_DRV_LOG(NOTICE, "Using AVX512 Vector Tx (port %d).",
 					    dev->data->port_id);
-				dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx512;
+				tx_burst_type = I40E_TX_AVX512;
 #endif
 			} else {
 				PMD_INIT_LOG(DEBUG, "Using %sVector Tx (port %d).",
 					     ad->tx_use_avx2 ? "avx2 " : "",
 					     dev->data->port_id);
-				dev->tx_pkt_burst = ad->tx_use_avx2 ?
-						    i40e_xmit_pkts_vec_avx2 :
-						    i40e_xmit_pkts_vec;
+				tx_burst_type = ad->tx_use_avx2 ? I40E_TX_AVX2 : I40E_TX_SSE;
 				dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 			}
 #else /* RTE_ARCH_X86 */
 			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
 				     dev->data->port_id);
-			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
+			tx_burst_type = I40E_TX_SSE;
 			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 #endif /* RTE_ARCH_X86 */
 		} else {
 			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
-			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
+			tx_burst_type = I40E_TX_SIMPLE;
 			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 		}
 		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
 	} else {
 		PMD_INIT_LOG(DEBUG, "Xmit tx finally be used.");
-		dev->tx_pkt_burst = i40e_xmit_pkts;
+		tx_burst_type = I40E_TX_DEFAULT;
 		dev->tx_pkt_prepare = i40e_prep_pkts;
 	}
+
+	if (mbuf_check) {
+		ad->tx_burst_type = tx_burst_type;
+		dev->tx_pkt_burst = i40e_xmit_pkts_check;
+	} else {
+		dev->tx_pkt_burst = i40e_tx_pkt_burst_ops[tx_burst_type];
+	}
 }
 
 static const struct {
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index b191f23e1f..818bf9d859 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -167,6 +167,8 @@ struct i40e_tx_queue {
 	uint16_t tx_next_dd;
 	uint16_t tx_next_rs;
 	bool q_set; /**< indicate if tx queue has been configured */
+	uint64_t mbuf_errors;
+
 	bool tx_deferred_start; /**< don't start this queue in dev start */
 	uint8_t dcb_tc;         /**< Traffic class of tx queue */
 	uint64_t offloads; /**< Tx offload flags of RTE_ETH_TX_OFFLOAD_* */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v5] net/i40e: add diagnostic support in TX path
  2024-01-05  9:59   ` [PATCH v4] " Mingjin Ye
@ 2024-03-01  9:44     ` Mingjin Ye
  2024-03-01 10:24       ` Bruce Richardson
  2024-03-04  9:33       ` [PATCH v6] net/i40e: add diagnostic support in Tx path Mingjin Ye
  0 siblings, 2 replies; 11+ messages in thread
From: Mingjin Ye @ 2024-03-01  9:44 UTC (permalink / raw)
  To: dev; +Cc: Mingjin Ye, Yuying Zhang, Beilei Xing

Implemented a Tx wrapper to perform a thorough check on mbufs,
categorizing and counting invalid cases by types for diagnostic
purposes. The count of invalid cases is accessible through xstats_get.

Also, the devarg option "mbuf_check" was introduced to configure the
diagnostic parameters to enable the appropriate diagnostic features.

supported cases: mbuf, size, segment, offload.
 1. mbuf: check for corrupted mbuf.
 2. size: check min/max packet length according to hw spec.
 3. segment: check number of mbuf segments not exceed hw limitation.
 4. offload: check any unsupported offload flag.

parameter format: "mbuf_check=<case>" or "mbuf_check=[<case1>,<case2>]"
eg: dpdk-testpmd -a 0000:81:01.0,mbuf_check=[mbuf,size] -- -i

Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
---
v2: remove strict.
---
v3: optimised.
---
v4: rebase.
---
v5: fix ci error.
---
 doc/guides/nics/i40e.rst       |  13 +++
 drivers/net/i40e/i40e_ethdev.c | 138 ++++++++++++++++++++++++++++-
 drivers/net/i40e/i40e_ethdev.h |  28 ++++++
 drivers/net/i40e/i40e_rxtx.c   | 153 +++++++++++++++++++++++++++++++--
 drivers/net/i40e/i40e_rxtx.h   |   2 +
 5 files changed, 326 insertions(+), 8 deletions(-)

diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index 15689ac958..bf1d1e5d60 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -275,6 +275,19 @@ Runtime Configuration
 
   -a 84:00.0,vf_msg_cfg=80@120:180
 
+- ``Support TX diagnostics`` (default ``not enabled``)
+
+  Set the ``devargs`` parameter ``mbuf_check`` to enable TX diagnostics. For example,
+  ``-a 18:01.0,mbuf_check=<case>`` or ``-a 18:01.0,mbuf_check=[<case1>,<case2>...]``. Also,
+  ``xstats_get`` can be used to get the error counts, which are collected in
+  ``tx_mbuf_error_packets`` xstats. For example, ``testpmd> show port xstats all``.
+  Supported cases:
+
+  *   mbuf: Check for corrupted mbuf.
+  *   size: Check min/max packet length according to hw spec.
+  *   segment: Check number of mbuf segments not exceed hw limitation.
+  *   offload: Check any unsupported offload flag.
+
 Vector RX Pre-conditions
 ~~~~~~~~~~~~~~~~~~~~~~~~
 For Vector RX it is assumed that the number of descriptor rings will be a power
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 3ca226156b..f23f80fd16 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -48,6 +48,7 @@
 #define ETH_I40E_SUPPORT_MULTI_DRIVER	"support-multi-driver"
 #define ETH_I40E_QUEUE_NUM_PER_VF_ARG	"queue-num-per-vf"
 #define ETH_I40E_VF_MSG_CFG		"vf_msg_cfg"
+#define ETH_I40E_MBUF_CHECK_ARG       "mbuf_check"
 
 #define I40E_CLEAR_PXE_WAIT_MS     200
 #define I40E_VSI_TSR_QINQ_STRIP		0x4010
@@ -412,6 +413,7 @@ static const char *const valid_keys[] = {
 	ETH_I40E_SUPPORT_MULTI_DRIVER,
 	ETH_I40E_QUEUE_NUM_PER_VF_ARG,
 	ETH_I40E_VF_MSG_CFG,
+	ETH_I40E_MBUF_CHECK_ARG,
 	NULL};
 
 static const struct rte_pci_id pci_id_i40e_map[] = {
@@ -545,6 +547,14 @@ static const struct rte_i40e_xstats_name_off rte_i40e_stats_strings[] = {
 #define I40E_NB_ETH_XSTATS (sizeof(rte_i40e_stats_strings) / \
 		sizeof(rte_i40e_stats_strings[0]))
 
+static const struct rte_i40e_xstats_name_off i40e_mbuf_strings[] = {
+	{"tx_mbuf_error_packets", offsetof(struct i40e_mbuf_stats,
+		tx_pkt_errors)},
+};
+
+#define I40E_NB_MBUF_XSTATS (sizeof(i40e_mbuf_strings) / \
+		sizeof(i40e_mbuf_strings[0]))
+
 static const struct rte_i40e_xstats_name_off rte_i40e_hw_port_strings[] = {
 	{"tx_link_down_dropped", offsetof(struct i40e_hw_port_stats,
 		tx_dropped_link_down)},
@@ -1373,6 +1383,88 @@ read_vf_msg_config(__rte_unused const char *key,
 	return 0;
 }
 
+static int
+read_mbuf_check_config(__rte_unused const char *key, const char *value, void *args)
+{
+	char *cur;
+	char *tmp;
+	int str_len;
+	int valid_len;
+
+	int ret = 0;
+	uint64_t *mc_flags = args;
+	char *str2 = strdup(value);
+	if (str2 == NULL)
+		return -1;
+
+	str_len = strlen(str2);
+	if (str2[0] == '[' && str2[str_len - 1] == ']') {
+		if (str_len < 3) {
+			ret = -1;
+			goto mdd_end;
+		}
+		valid_len = str_len - 2;
+		memmove(str2, str2 + 1, valid_len);
+		memset(str2 + valid_len, '\0', 2);
+	}
+	cur = strtok_r(str2, ",", &tmp);
+	while (cur != NULL) {
+		if (!strcmp(cur, "mbuf"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_MBUF;
+		else if (!strcmp(cur, "size"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_SIZE;
+		else if (!strcmp(cur, "segment"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_SEGMENT;
+		else if (!strcmp(cur, "offload"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_OFFLOAD;
+		else
+			PMD_DRV_LOG(ERR, "Unsupported mdd check type: %s", cur);
+		cur = strtok_r(NULL, ",", &tmp);
+	}
+
+mdd_end:
+	free(str2);
+	return ret;
+}
+
+static int
+i40e_parse_mbuf_check(struct rte_eth_dev *dev)
+{
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct rte_kvargs *kvlist;
+	int kvargs_count;
+	int ret = 0;
+
+	if (!dev->device->devargs)
+		return ret;
+
+	kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
+	if (!kvlist)
+		return -EINVAL;
+
+	kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_MBUF_CHECK_ARG);
+	if (!kvargs_count)
+		goto free_end;
+
+	if (kvargs_count > 1)
+		PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and only "
+			    "the first invalid or last valid one is used !",
+			    ETH_I40E_MBUF_CHECK_ARG);
+
+	ret = rte_kvargs_process(kvlist, ETH_I40E_MBUF_CHECK_ARG,
+				read_mbuf_check_config, &ad->mc_flags);
+	if (ret)
+		goto free_end;
+
+	if (ad->mc_flags)
+		ad->devargs.mbuf_check = 1;
+
+free_end:
+	rte_kvargs_free(kvlist);
+	return ret;
+}
+
 static int
 i40e_parse_vf_msg_config(struct rte_eth_dev *dev,
 		struct i40e_vf_msg_cfg *msg_cfg)
@@ -1488,6 +1580,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
 	}
 
 	i40e_parse_vf_msg_config(dev, &pf->vf_msg_cfg);
+	i40e_parse_mbuf_check(dev);
 	/* Check if need to support multi-driver */
 	i40e_support_multi_driver(dev);
 
@@ -2324,6 +2417,8 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct i40e_vsi *main_vsi = pf->main_vsi;
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	int ret, i;
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
@@ -2483,6 +2578,7 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	max_frame_size = dev->data->mtu ?
 		dev->data->mtu + I40E_ETH_OVERHEAD :
 		I40E_FRAME_SIZE_MAX;
+	ad->max_pkt_len = max_frame_size;
 
 	/* Set the max frame size to HW*/
 	i40e_aq_set_mac_config(hw, max_frame_size, TRUE, false, 0, NULL);
@@ -3502,13 +3598,17 @@ i40e_dev_stats_reset(struct rte_eth_dev *dev)
 	/* read the stats, reading current register values into offset */
 	i40e_read_stats_registers(pf, hw);
 
+	memset(&pf->mbuf_stats, 0,
+		sizeof(struct i40e_mbuf_stats));
+
 	return 0;
 }
 
 static uint32_t
 i40e_xstats_calc_num(void)
 {
-	return I40E_NB_ETH_XSTATS + I40E_NB_HW_PORT_XSTATS +
+	return I40E_NB_ETH_XSTATS + I40E_NB_MBUF_XSTATS +
+		I40E_NB_HW_PORT_XSTATS +
 		(I40E_NB_RXQ_PRIO_XSTATS * 8) +
 		(I40E_NB_TXQ_PRIO_XSTATS * 8);
 }
@@ -3533,6 +3633,14 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 		count++;
 	}
 
+	/* Get stats from i40e_mbuf_stats struct */
+	for (i = 0; i < I40E_NB_MBUF_XSTATS; i++) {
+		strlcpy(xstats_names[count].name,
+			i40e_mbuf_strings[i].name,
+			sizeof(xstats_names[count].name));
+		count++;
+	}
+
 	/* Get individual stats from i40e_hw_port struct */
 	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
 		strlcpy(xstats_names[count].name,
@@ -3563,12 +3671,28 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	return count;
 }
 
+static void
+i40e_dev_update_mbuf_stats(struct rte_eth_dev *ethdev,
+		struct i40e_mbuf_stats *mbuf_stats)
+{
+	uint16_t idx;
+	struct i40e_tx_queue *txq;
+
+	for (idx = 0; idx < ethdev->data->nb_tx_queues; idx++) {
+		txq = ethdev->data->tx_queues[idx];
+		mbuf_stats->tx_pkt_errors += txq->mbuf_errors;
+	}
+}
+
 static int
 i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 		    unsigned n)
 {
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct i40e_adapter *adapter =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct i40e_mbuf_stats mbuf_stats = {0};
 	unsigned i, count, prio;
 	struct i40e_hw_port_stats *hw_stats = &pf->stats;
 
@@ -3583,6 +3707,9 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 
 	count = 0;
 
+	if (adapter->devargs.mbuf_check)
+		i40e_dev_update_mbuf_stats(dev, &mbuf_stats);
+
 	/* Get stats from i40e_eth_stats struct */
 	for (i = 0; i < I40E_NB_ETH_XSTATS; i++) {
 		xstats[count].value = *(uint64_t *)(((char *)&hw_stats->eth) +
@@ -3591,6 +3718,15 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 		count++;
 	}
 
+	/* Get stats from i40e_mbuf_stats struct */
+	for (i = 0; i < I40E_NB_MBUF_XSTATS; i++) {
+		xstats[count].value =
+			*(uint64_t *)((char *)&mbuf_stats +
+					i40e_mbuf_strings[i].offset);
+		xstats[count].id = count;
+		count++;
+	}
+
 	/* Get individual stats from i40e_hw_port struct */
 	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
 		xstats[count].value = *(uint64_t *)(((char *)hw_stats) +
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 1bbe7ad376..41f9aab6ce 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1108,6 +1108,10 @@ struct i40e_vf_msg_cfg {
 	uint32_t ignore_second;
 };
 
+struct i40e_mbuf_stats {
+	uint64_t tx_pkt_errors;
+};
+
 /*
  * Structure to store private data specific for PF instance.
  */
@@ -1122,6 +1126,7 @@ struct i40e_pf {
 
 	struct i40e_hw_port_stats stats_offset;
 	struct i40e_hw_port_stats stats;
+	struct i40e_mbuf_stats mbuf_stats;
 	u64 rx_err1;	/* rxerr1 */
 	u64 rx_err1_offset;
 
@@ -1224,6 +1229,25 @@ struct i40e_vsi_vlan_pvid_info {
 #define I40E_MAX_PKT_TYPE  256
 #define I40E_FLOW_TYPE_MAX 64
 
+#define I40E_MBUF_CHECK_F_TX_MBUF        (1ULL << 0)
+#define I40E_MBUF_CHECK_F_TX_SIZE        (1ULL << 1)
+#define I40E_MBUF_CHECK_F_TX_SEGMENT     (1ULL << 2)
+#define I40E_MBUF_CHECK_F_TX_OFFLOAD     (1ULL << 3)
+
+enum i40e_tx_burst_type {
+	I40E_TX_DEFAULT,
+	I40E_TX_SIMPLE,
+	I40E_TX_SSE,
+	I40E_TX_AVX2,
+	I40E_TX_AVX512,
+};
+
+/**
+ * Cache devargs parse result.
+ */
+struct i40e_devargs {
+	int mbuf_check;
+};
 /*
  * Structure to store private data for each PF/VF instance.
  */
@@ -1240,6 +1264,10 @@ struct i40e_adapter {
 	bool tx_simple_allowed;
 	bool tx_vec_allowed;
 
+	struct i40e_devargs devargs;
+	uint64_t mc_flags; /* mbuf check flags. */
+	uint16_t max_pkt_len; /* Maximum packet length */
+	enum i40e_tx_burst_type tx_burst_type;
 	/* For PTP */
 	struct rte_timecounter systime_tc;
 	struct rte_timecounter rx_tstamp_tc;
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 9aa5facb53..a34fb4d401 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1536,6 +1536,138 @@ i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_tx;
 }
 
+static
+const eth_tx_burst_t i40e_tx_pkt_burst_ops[] = {
+	[I40E_TX_DEFAULT] = i40e_xmit_pkts,
+	[I40E_TX_SIMPLE] = i40e_xmit_pkts_simple,
+	[I40E_TX_SSE] = i40e_xmit_pkts_vec,
+#ifdef RTE_ARCH_X86
+	[I40E_TX_AVX2] = i40e_xmit_pkts_vec_avx2,
+#ifdef CC_AVX512_SUPPORT
+	[I40E_TX_AVX512] = i40e_xmit_pkts_vec_avx512,
+#endif
+#endif
+};
+
+/* Tx mbuf check */
+static uint16_t
+i40e_xmit_pkts_check(void *tx_queue, struct rte_mbuf **tx_pkts,
+	      uint16_t nb_pkts)
+{
+	struct i40e_tx_queue *txq = tx_queue;
+	uint16_t idx;
+	uint64_t ol_flags;
+	struct rte_mbuf *mb;
+	bool pkt_error = false;
+	const char *reason = NULL;
+	uint16_t good_pkts = nb_pkts;
+	struct i40e_adapter *adapter = txq->vsi->adapter;
+	enum i40e_tx_burst_type tx_burst_type =
+		txq->vsi->adapter->tx_burst_type;
+
+
+	for (idx = 0; idx < nb_pkts; idx++) {
+		mb = tx_pkts[idx];
+		ol_flags = mb->ol_flags;
+
+		if ((adapter->mc_flags & I40E_MBUF_CHECK_F_TX_MBUF) &&
+			(rte_mbuf_check(mb, 0, &reason) != 0)) {
+			PMD_TX_LOG(ERR, "INVALID mbuf: %s\n", reason);
+			pkt_error = true;
+			break;
+		}
+
+		if ((adapter->mc_flags & I40E_MBUF_CHECK_F_TX_SIZE) &&
+			(mb->data_len > mb->pkt_len ||
+			mb->data_len < I40E_TX_MIN_PKT_LEN ||
+			mb->data_len > adapter->max_pkt_len)) {
+			PMD_TX_LOG(ERR, "INVALID mbuf: data_len (%u) is out "
+			"of range, reasonable range (%d - %u)\n", mb->data_len,
+			I40E_TX_MIN_PKT_LEN, adapter->max_pkt_len);
+			pkt_error = true;
+			break;
+		}
+
+		if (adapter->mc_flags & I40E_MBUF_CHECK_F_TX_SEGMENT) {
+			if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
+				/**
+				 * No TSO case: nb->segs, pkt_len to not exceed
+				 * the limites.
+				 */
+				if (mb->nb_segs > I40E_TX_MAX_MTU_SEG) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_TX_MAX_MTU_SEG);
+					pkt_error = true;
+					break;
+				}
+				if (mb->pkt_len > I40E_FRAME_SIZE_MAX) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_FRAME_SIZE_MAX);
+					pkt_error = true;
+					break;
+				}
+			} else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+				/** TSO case: tso_segsz, nb_segs, pkt_len not exceed
+				 * the limits.
+				 */
+				if (mb->tso_segsz < I40E_MIN_TSO_MSS ||
+					mb->tso_segsz > I40E_MAX_TSO_MSS) {
+					/**
+					 * MSS outside the range are considered malicious
+					 */
+					PMD_TX_LOG(ERR, "INVALID mbuf: tso_segsz (%u) is out "
+					"of range, reasonable range (%d - %u)\n", mb->tso_segsz,
+					I40E_MIN_TSO_MSS, I40E_MAX_TSO_MSS);
+					pkt_error = true;
+					break;
+				}
+				if (mb->nb_segs >
+					((struct i40e_tx_queue *)tx_queue)->nb_tx_desc) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs out "
+					"of ring length\n");
+					pkt_error = true;
+					break;
+				}
+				if (mb->pkt_len > I40E_TSO_FRAME_SIZE_MAX) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_TSO_FRAME_SIZE_MAX);
+					pkt_error = true;
+					break;
+				}
+			}
+		}
+
+		if (adapter->mc_flags & I40E_MBUF_CHECK_F_TX_OFFLOAD) {
+			if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload "
+				"is not supported\n");
+				pkt_error = true;
+				break;
+			}
+
+			if (!rte_validate_tx_offload(mb)) {
+				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload "
+				"setup error\n");
+				pkt_error = true;
+				break;
+			}
+		}
+	}
+
+	if (pkt_error) {
+		txq->mbuf_errors++;
+		good_pkts = idx;
+		if (good_pkts == 0)
+			return 0;
+	}
+
+	return i40e_tx_pkt_burst_ops[tx_burst_type](tx_queue,
+								tx_pkts, good_pkts);
+}
+
 /*********************************************************************
  *
  *  TX simple prep functions
@@ -3467,6 +3599,8 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 {
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	enum i40e_tx_burst_type tx_burst_type = I40E_TX_DEFAULT;
+	int mbuf_check = ad->devargs.mbuf_check;
 	int i;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
@@ -3501,34 +3635,39 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 #ifdef CC_AVX512_SUPPORT
 				PMD_DRV_LOG(NOTICE, "Using AVX512 Vector Tx (port %d).",
 					    dev->data->port_id);
-				dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx512;
+				tx_burst_type = I40E_TX_AVX512;
 #endif
 			} else {
 				PMD_INIT_LOG(DEBUG, "Using %sVector Tx (port %d).",
 					     ad->tx_use_avx2 ? "avx2 " : "",
 					     dev->data->port_id);
-				dev->tx_pkt_burst = ad->tx_use_avx2 ?
-						    i40e_xmit_pkts_vec_avx2 :
-						    i40e_xmit_pkts_vec;
+				tx_burst_type = ad->tx_use_avx2 ? I40E_TX_AVX2 : I40E_TX_SSE;
 				dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 			}
 #else /* RTE_ARCH_X86 */
 			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
 				     dev->data->port_id);
-			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
+			tx_burst_type = I40E_TX_SSE;
 			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 #endif /* RTE_ARCH_X86 */
 		} else {
 			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
-			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
+			tx_burst_type = I40E_TX_SIMPLE;
 			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 		}
 		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
 	} else {
 		PMD_INIT_LOG(DEBUG, "Xmit tx finally be used.");
-		dev->tx_pkt_burst = i40e_xmit_pkts;
+		tx_burst_type = I40E_TX_DEFAULT;
 		dev->tx_pkt_prepare = i40e_prep_pkts;
 	}
+
+	if (mbuf_check) {
+		ad->tx_burst_type = tx_burst_type;
+		dev->tx_pkt_burst = i40e_xmit_pkts_check;
+	} else {
+		dev->tx_pkt_burst = i40e_tx_pkt_burst_ops[tx_burst_type];
+	}
 }
 
 static const struct {
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index b191f23e1f..818bf9d859 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -167,6 +167,8 @@ struct i40e_tx_queue {
 	uint16_t tx_next_dd;
 	uint16_t tx_next_rs;
 	bool q_set; /**< indicate if tx queue has been configured */
+	uint64_t mbuf_errors;
+
 	bool tx_deferred_start; /**< don't start this queue in dev start */
 	uint8_t dcb_tc;         /**< Traffic class of tx queue */
 	uint64_t offloads; /**< Tx offload flags of RTE_ETH_TX_OFFLOAD_* */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v5] net/i40e: add diagnostic support in TX path
  2024-03-01  9:44     ` [PATCH v5] " Mingjin Ye
@ 2024-03-01 10:24       ` Bruce Richardson
  2024-03-01 16:13         ` Patrick Robb
  2024-03-04  9:33       ` [PATCH v6] net/i40e: add diagnostic support in Tx path Mingjin Ye
  1 sibling, 1 reply; 11+ messages in thread
From: Bruce Richardson @ 2024-03-01 10:24 UTC (permalink / raw)
  To: Mingjin Ye; +Cc: dev, Yuying Zhang, Beilei Xing

On Fri, Mar 01, 2024 at 09:44:21AM +0000, Mingjin Ye wrote:
> Implemented a Tx wrapper to perform a thorough check on mbufs,
> categorizing and counting invalid cases by types for diagnostic
> purposes. The count of invalid cases is accessible through xstats_get.
> 
> Also, the devarg option "mbuf_check" was introduced to configure the
> diagnostic parameters to enable the appropriate diagnostic features.
> 
> supported cases: mbuf, size, segment, offload.
>  1. mbuf: check for corrupted mbuf.
>  2. size: check min/max packet length according to hw spec.
>  3. segment: check number of mbuf segments not exceed hw limitation.
>  4. offload: check any unsupported offload flag.
> 
> parameter format: "mbuf_check=<case>" or "mbuf_check=[<case1>,<case2>]"
> eg: dpdk-testpmd -a 0000:81:01.0,mbuf_check=[mbuf,size] -- -i
> 
> Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
> ---
> v2: remove strict.
> ---
> v3: optimised.
> ---
> v4: rebase.
> ---
> v5: fix ci error.
> ---
>  doc/guides/nics/i40e.rst       |  13 +++
>  drivers/net/i40e/i40e_ethdev.c | 138 ++++++++++++++++++++++++++++-
>  drivers/net/i40e/i40e_ethdev.h |  28 ++++++
>  drivers/net/i40e/i40e_rxtx.c   | 153 +++++++++++++++++++++++++++++++--
>  drivers/net/i40e/i40e_rxtx.h   |   2 +
>  5 files changed, 326 insertions(+), 8 deletions(-)
> 
> diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
> index 15689ac958..bf1d1e5d60 100644
> --- a/doc/guides/nics/i40e.rst
> +++ b/doc/guides/nics/i40e.rst
> @@ -275,6 +275,19 @@ Runtime Configuration
>  
>    -a 84:00.0,vf_msg_cfg=80@120:180
>  
> +- ``Support TX diagnostics`` (default ``not enabled``)
> +
> +  Set the ``devargs`` parameter ``mbuf_check`` to enable TX diagnostics. For example,
> +  ``-a 18:01.0,mbuf_check=<case>`` or ``-a 18:01.0,mbuf_check=[<case1>,<case2>...]``. Also,
> +  ``xstats_get`` can be used to get the error counts, which are collected in
> +  ``tx_mbuf_error_packets`` xstats. For example, ``testpmd> show port xstats all``.
> +  Supported cases:
> +
> +  *   mbuf: Check for corrupted mbuf.
> +  *   size: Check min/max packet length according to hw spec.
> +  *   segment: Check number of mbuf segments not exceed hw limitation.
> +  *   offload: Check any unsupported offload flag.
> +

Hi Mingjin,

please see the changes made to the equivalent doc (and commit-log) updates
for iavf when I applied that earlier patch to next-net-intel. This patch
should be updated to match that. Changes were pretty basic, but still
useful, for example, aligning line breaks to punctuation.

Thanks,
/Bruce

PS: This feedback applies to the net/ice patch too.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v5] net/i40e: add diagnostic support in TX path
  2024-03-01 10:24       ` Bruce Richardson
@ 2024-03-01 16:13         ` Patrick Robb
  0 siblings, 0 replies; 11+ messages in thread
From: Patrick Robb @ 2024-03-01 16:13 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: Mingjin Ye, dev, Yuying Zhang, Beilei Xing

[-- Attachment #1: Type: text/plain, Size: 3145 bytes --]

The Community CI Testing Lab had an infra failure this morning and some
patches including yours were affected with false failures. The issue is now
resolved and we are rerunning the tests in question for all patches
submitted today.

On Fri, Mar 1, 2024 at 5:25 AM Bruce Richardson <bruce.richardson@intel.com>
wrote:

> On Fri, Mar 01, 2024 at 09:44:21AM +0000, Mingjin Ye wrote:
> > Implemented a Tx wrapper to perform a thorough check on mbufs,
> > categorizing and counting invalid cases by types for diagnostic
> > purposes. The count of invalid cases is accessible through xstats_get.
> >
> > Also, the devarg option "mbuf_check" was introduced to configure the
> > diagnostic parameters to enable the appropriate diagnostic features.
> >
> > supported cases: mbuf, size, segment, offload.
> >  1. mbuf: check for corrupted mbuf.
> >  2. size: check min/max packet length according to hw spec.
> >  3. segment: check number of mbuf segments not exceed hw limitation.
> >  4. offload: check any unsupported offload flag.
> >
> > parameter format: "mbuf_check=<case>" or "mbuf_check=[<case1>,<case2>]"
> > eg: dpdk-testpmd -a 0000:81:01.0,mbuf_check=[mbuf,size] -- -i
> >
> > Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
> > ---
> > v2: remove strict.
> > ---
> > v3: optimised.
> > ---
> > v4: rebase.
> > ---
> > v5: fix ci error.
> > ---
> >  doc/guides/nics/i40e.rst       |  13 +++
> >  drivers/net/i40e/i40e_ethdev.c | 138 ++++++++++++++++++++++++++++-
> >  drivers/net/i40e/i40e_ethdev.h |  28 ++++++
> >  drivers/net/i40e/i40e_rxtx.c   | 153 +++++++++++++++++++++++++++++++--
> >  drivers/net/i40e/i40e_rxtx.h   |   2 +
> >  5 files changed, 326 insertions(+), 8 deletions(-)
> >
> > diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
> > index 15689ac958..bf1d1e5d60 100644
> > --- a/doc/guides/nics/i40e.rst
> > +++ b/doc/guides/nics/i40e.rst
> > @@ -275,6 +275,19 @@ Runtime Configuration
> >
> >    -a 84:00.0,vf_msg_cfg=80@120:180
> >
> > +- ``Support TX diagnostics`` (default ``not enabled``)
> > +
> > +  Set the ``devargs`` parameter ``mbuf_check`` to enable TX
> diagnostics. For example,
> > +  ``-a 18:01.0,mbuf_check=<case>`` or ``-a
> 18:01.0,mbuf_check=[<case1>,<case2>...]``. Also,
> > +  ``xstats_get`` can be used to get the error counts, which are
> collected in
> > +  ``tx_mbuf_error_packets`` xstats. For example, ``testpmd> show port
> xstats all``.
> > +  Supported cases:
> > +
> > +  *   mbuf: Check for corrupted mbuf.
> > +  *   size: Check min/max packet length according to hw spec.
> > +  *   segment: Check number of mbuf segments not exceed hw limitation.
> > +  *   offload: Check any unsupported offload flag.
> > +
>
> Hi Mingjin,
>
> please see the changes made to the equivalent doc (and commit-log) updates
> for iavf when I applied that earlier patch to next-net-intel. This patch
> should be updated to match that. Changes were pretty basic, but still
> useful, for example, aligning line breaks to punctuation.
>
> Thanks,
> /Bruce
>
> PS: This feedback applies to the net/ice patch too.
>

[-- Attachment #2: Type: text/html, Size: 3952 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v6] net/i40e: add diagnostic support in Tx path
  2024-03-01  9:44     ` [PATCH v5] " Mingjin Ye
  2024-03-01 10:24       ` Bruce Richardson
@ 2024-03-04  9:33       ` Mingjin Ye
  2024-03-04 11:47         ` Bruce Richardson
  2024-03-05 10:17         ` [PATCH v7] " Mingjin Ye
  1 sibling, 2 replies; 11+ messages in thread
From: Mingjin Ye @ 2024-03-04  9:33 UTC (permalink / raw)
  To: dev; +Cc: Mingjin Ye, Yuying Zhang

Implemented a Tx wrapper to perform a thorough check on mbufs,
categorizing and counting invalid cases by type for diagnostic
purposes. The count of invalid cases is accessible through xstats_get.

Also, the devarg option "mbuf_check" was introduced to configure the
diagnostic parameters to enable the appropriate diagnostic features.

supported cases: mbuf, size, segment, offload.
 1. mbuf: Check for corrupted mbuf.
 2. size: Check min/max packet length according to HW spec.
 3. segment: Check number of mbuf segments not exceed HW limits.
 4. offload: Check for use of an unsupported offload flag.

parameter format: "mbuf_check=<case>" or "mbuf_check=[<case1>,<case2>]"
eg: dpdk-testpmd -a 0000:87:00.0,mbuf_check=[mbuf,size] -- -i

Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
---
v2: remove strict.
---
v3: optimised.
---
v4: rebase.
---
v5: fix ci error.
---
v6: Changes the commit log.
---
 doc/guides/nics/i40e.rst       |  14 +++
 drivers/net/i40e/i40e_ethdev.c | 138 ++++++++++++++++++++++++++++-
 drivers/net/i40e/i40e_ethdev.h |  28 ++++++
 drivers/net/i40e/i40e_rxtx.c   | 153 +++++++++++++++++++++++++++++++--
 drivers/net/i40e/i40e_rxtx.h   |   2 +
 5 files changed, 327 insertions(+), 8 deletions(-)

diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index 15689ac958..91b45e1d40 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -275,6 +275,20 @@ Runtime Configuration
 
   -a 84:00.0,vf_msg_cfg=80@120:180
 
+- ``Support TX diagnostics`` (default ``not enabled``)
+
+  Set the ``devargs`` parameter ``mbuf_check`` to enable TX diagnostics.
+  For example, ``-a 87:00.0,mbuf_check=<case>`` or ``-a 87:00.0,mbuf_check=[<case1>,<case2>...]``.
+  Thereafter, ``rte_eth_xstats_get()`` can be used to get the error counts,
+  which are collected in ``tx_mbuf_error_packets`` xstats.
+  In testpmd these can be shown via: ``testpmd> show port xstats all``.
+  Supported values for the ``case`` parameter are:
+
+  *   mbuf: Check for corrupted mbuf.
+  *   size: Check min/max packet length according to HW spec.
+  *   segment: Check number of mbuf segments does not exceed HW limits.
+  *   offload: Check for use of an unsupported offload flag.
+
 Vector RX Pre-conditions
 ~~~~~~~~~~~~~~~~~~~~~~~~
 For Vector RX it is assumed that the number of descriptor rings will be a power
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 4d21341382..3e2ddcaa3e 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -48,6 +48,7 @@
 #define ETH_I40E_SUPPORT_MULTI_DRIVER	"support-multi-driver"
 #define ETH_I40E_QUEUE_NUM_PER_VF_ARG	"queue-num-per-vf"
 #define ETH_I40E_VF_MSG_CFG		"vf_msg_cfg"
+#define ETH_I40E_MBUF_CHECK_ARG       "mbuf_check"
 
 #define I40E_CLEAR_PXE_WAIT_MS     200
 #define I40E_VSI_TSR_QINQ_STRIP		0x4010
@@ -412,6 +413,7 @@ static const char *const valid_keys[] = {
 	ETH_I40E_SUPPORT_MULTI_DRIVER,
 	ETH_I40E_QUEUE_NUM_PER_VF_ARG,
 	ETH_I40E_VF_MSG_CFG,
+	ETH_I40E_MBUF_CHECK_ARG,
 	NULL};
 
 static const struct rte_pci_id pci_id_i40e_map[] = {
@@ -545,6 +547,14 @@ static const struct rte_i40e_xstats_name_off rte_i40e_stats_strings[] = {
 #define I40E_NB_ETH_XSTATS (sizeof(rte_i40e_stats_strings) / \
 		sizeof(rte_i40e_stats_strings[0]))
 
+static const struct rte_i40e_xstats_name_off i40e_mbuf_strings[] = {
+	{"tx_mbuf_error_packets", offsetof(struct i40e_mbuf_stats,
+		tx_pkt_errors)},
+};
+
+#define I40E_NB_MBUF_XSTATS (sizeof(i40e_mbuf_strings) / \
+		sizeof(i40e_mbuf_strings[0]))
+
 static const struct rte_i40e_xstats_name_off rte_i40e_hw_port_strings[] = {
 	{"tx_link_down_dropped", offsetof(struct i40e_hw_port_stats,
 		tx_dropped_link_down)},
@@ -1373,6 +1383,88 @@ read_vf_msg_config(__rte_unused const char *key,
 	return 0;
 }
 
+static int
+read_mbuf_check_config(__rte_unused const char *key, const char *value, void *args)
+{
+	char *cur;
+	char *tmp;
+	int str_len;
+	int valid_len;
+
+	int ret = 0;
+	uint64_t *mc_flags = args;
+	char *str2 = strdup(value);
+	if (str2 == NULL)
+		return -1;
+
+	str_len = strlen(str2);
+	if (str2[0] == '[' && str2[str_len - 1] == ']') {
+		if (str_len < 3) {
+			ret = -1;
+			goto mdd_end;
+		}
+		valid_len = str_len - 2;
+		memmove(str2, str2 + 1, valid_len);
+		memset(str2 + valid_len, '\0', 2);
+	}
+	cur = strtok_r(str2, ",", &tmp);
+	while (cur != NULL) {
+		if (!strcmp(cur, "mbuf"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_MBUF;
+		else if (!strcmp(cur, "size"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_SIZE;
+		else if (!strcmp(cur, "segment"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_SEGMENT;
+		else if (!strcmp(cur, "offload"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_OFFLOAD;
+		else
+			PMD_DRV_LOG(ERR, "Unsupported mdd check type: %s", cur);
+		cur = strtok_r(NULL, ",", &tmp);
+	}
+
+mdd_end:
+	free(str2);
+	return ret;
+}
+
+static int
+i40e_parse_mbuf_check(struct rte_eth_dev *dev)
+{
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct rte_kvargs *kvlist;
+	int kvargs_count;
+	int ret = 0;
+
+	if (!dev->device->devargs)
+		return ret;
+
+	kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
+	if (!kvlist)
+		return -EINVAL;
+
+	kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_MBUF_CHECK_ARG);
+	if (!kvargs_count)
+		goto free_end;
+
+	if (kvargs_count > 1)
+		PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and only "
+			    "the first invalid or last valid one is used !",
+			    ETH_I40E_MBUF_CHECK_ARG);
+
+	ret = rte_kvargs_process(kvlist, ETH_I40E_MBUF_CHECK_ARG,
+				read_mbuf_check_config, &ad->mc_flags);
+	if (ret)
+		goto free_end;
+
+	if (ad->mc_flags)
+		ad->devargs.mbuf_check = 1;
+
+free_end:
+	rte_kvargs_free(kvlist);
+	return ret;
+}
+
 static int
 i40e_parse_vf_msg_config(struct rte_eth_dev *dev,
 		struct i40e_vf_msg_cfg *msg_cfg)
@@ -1488,6 +1580,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
 	}
 
 	i40e_parse_vf_msg_config(dev, &pf->vf_msg_cfg);
+	i40e_parse_mbuf_check(dev);
 	/* Check if need to support multi-driver */
 	i40e_support_multi_driver(dev);
 
@@ -2324,6 +2417,8 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct i40e_vsi *main_vsi = pf->main_vsi;
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	int ret, i;
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
@@ -2483,6 +2578,7 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	max_frame_size = dev->data->mtu ?
 		dev->data->mtu + I40E_ETH_OVERHEAD :
 		I40E_FRAME_SIZE_MAX;
+	ad->max_pkt_len = max_frame_size;
 
 	/* Set the max frame size to HW*/
 	i40e_aq_set_mac_config(hw, max_frame_size, TRUE, false, 0, NULL);
@@ -3502,13 +3598,17 @@ i40e_dev_stats_reset(struct rte_eth_dev *dev)
 	/* read the stats, reading current register values into offset */
 	i40e_read_stats_registers(pf, hw);
 
+	memset(&pf->mbuf_stats, 0,
+		sizeof(struct i40e_mbuf_stats));
+
 	return 0;
 }
 
 static uint32_t
 i40e_xstats_calc_num(void)
 {
-	return I40E_NB_ETH_XSTATS + I40E_NB_HW_PORT_XSTATS +
+	return I40E_NB_ETH_XSTATS + I40E_NB_MBUF_XSTATS +
+		I40E_NB_HW_PORT_XSTATS +
 		(I40E_NB_RXQ_PRIO_XSTATS * 8) +
 		(I40E_NB_TXQ_PRIO_XSTATS * 8);
 }
@@ -3533,6 +3633,14 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 		count++;
 	}
 
+	/* Get stats from i40e_mbuf_stats struct */
+	for (i = 0; i < I40E_NB_MBUF_XSTATS; i++) {
+		strlcpy(xstats_names[count].name,
+			i40e_mbuf_strings[i].name,
+			sizeof(xstats_names[count].name));
+		count++;
+	}
+
 	/* Get individual stats from i40e_hw_port struct */
 	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
 		strlcpy(xstats_names[count].name,
@@ -3563,12 +3671,28 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	return count;
 }
 
+static void
+i40e_dev_update_mbuf_stats(struct rte_eth_dev *ethdev,
+		struct i40e_mbuf_stats *mbuf_stats)
+{
+	uint16_t idx;
+	struct i40e_tx_queue *txq;
+
+	for (idx = 0; idx < ethdev->data->nb_tx_queues; idx++) {
+		txq = ethdev->data->tx_queues[idx];
+		mbuf_stats->tx_pkt_errors += txq->mbuf_errors;
+	}
+}
+
 static int
 i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 		    unsigned n)
 {
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct i40e_adapter *adapter =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct i40e_mbuf_stats mbuf_stats = {0};
 	unsigned i, count, prio;
 	struct i40e_hw_port_stats *hw_stats = &pf->stats;
 
@@ -3583,6 +3707,9 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 
 	count = 0;
 
+	if (adapter->devargs.mbuf_check)
+		i40e_dev_update_mbuf_stats(dev, &mbuf_stats);
+
 	/* Get stats from i40e_eth_stats struct */
 	for (i = 0; i < I40E_NB_ETH_XSTATS; i++) {
 		xstats[count].value = *(uint64_t *)(((char *)&hw_stats->eth) +
@@ -3591,6 +3718,15 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 		count++;
 	}
 
+	/* Get stats from i40e_mbuf_stats struct */
+	for (i = 0; i < I40E_NB_MBUF_XSTATS; i++) {
+		xstats[count].value =
+			*(uint64_t *)((char *)&mbuf_stats +
+					i40e_mbuf_strings[i].offset);
+		xstats[count].id = count;
+		count++;
+	}
+
 	/* Get individual stats from i40e_hw_port struct */
 	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
 		xstats[count].value = *(uint64_t *)(((char *)hw_stats) +
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 445e1c0b38..ae3f2f60ac 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1109,6 +1109,10 @@ struct i40e_vf_msg_cfg {
 	uint32_t ignore_second;
 };
 
+struct i40e_mbuf_stats {
+	uint64_t tx_pkt_errors;
+};
+
 /*
  * Structure to store private data specific for PF instance.
  */
@@ -1123,6 +1127,7 @@ struct i40e_pf {
 
 	struct i40e_hw_port_stats stats_offset;
 	struct i40e_hw_port_stats stats;
+	struct i40e_mbuf_stats mbuf_stats;
 	u64 rx_err1;	/* rxerr1 */
 	u64 rx_err1_offset;
 
@@ -1225,6 +1230,25 @@ struct i40e_vsi_vlan_pvid_info {
 #define I40E_MAX_PKT_TYPE  256
 #define I40E_FLOW_TYPE_MAX 64
 
+#define I40E_MBUF_CHECK_F_TX_MBUF        (1ULL << 0)
+#define I40E_MBUF_CHECK_F_TX_SIZE        (1ULL << 1)
+#define I40E_MBUF_CHECK_F_TX_SEGMENT     (1ULL << 2)
+#define I40E_MBUF_CHECK_F_TX_OFFLOAD     (1ULL << 3)
+
+enum i40e_tx_burst_type {
+	I40E_TX_DEFAULT,
+	I40E_TX_SIMPLE,
+	I40E_TX_SSE,
+	I40E_TX_AVX2,
+	I40E_TX_AVX512,
+};
+
+/**
+ * Cache devargs parse result.
+ */
+struct i40e_devargs {
+	int mbuf_check;
+};
 /*
  * Structure to store private data for each PF/VF instance.
  */
@@ -1241,6 +1265,10 @@ struct i40e_adapter {
 	bool tx_simple_allowed;
 	bool tx_vec_allowed;
 
+	struct i40e_devargs devargs;
+	uint64_t mc_flags; /* mbuf check flags. */
+	uint16_t max_pkt_len; /* Maximum packet length */
+	enum i40e_tx_burst_type tx_burst_type;
 	/* For PTP */
 	struct rte_timecounter systime_tc;
 	struct rte_timecounter rx_tstamp_tc;
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index a0bc30d45b..c7c9c945e4 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1536,6 +1536,138 @@ i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_tx;
 }
 
+static
+const eth_tx_burst_t i40e_tx_pkt_burst_ops[] = {
+	[I40E_TX_DEFAULT] = i40e_xmit_pkts,
+	[I40E_TX_SIMPLE] = i40e_xmit_pkts_simple,
+	[I40E_TX_SSE] = i40e_xmit_pkts_vec,
+#ifdef RTE_ARCH_X86
+	[I40E_TX_AVX2] = i40e_xmit_pkts_vec_avx2,
+#ifdef CC_AVX512_SUPPORT
+	[I40E_TX_AVX512] = i40e_xmit_pkts_vec_avx512,
+#endif
+#endif
+};
+
+/* Tx mbuf check */
+static uint16_t
+i40e_xmit_pkts_check(void *tx_queue, struct rte_mbuf **tx_pkts,
+	      uint16_t nb_pkts)
+{
+	struct i40e_tx_queue *txq = tx_queue;
+	uint16_t idx;
+	uint64_t ol_flags;
+	struct rte_mbuf *mb;
+	bool pkt_error = false;
+	const char *reason = NULL;
+	uint16_t good_pkts = nb_pkts;
+	struct i40e_adapter *adapter = txq->vsi->adapter;
+	enum i40e_tx_burst_type tx_burst_type =
+		txq->vsi->adapter->tx_burst_type;
+
+
+	for (idx = 0; idx < nb_pkts; idx++) {
+		mb = tx_pkts[idx];
+		ol_flags = mb->ol_flags;
+
+		if ((adapter->mc_flags & I40E_MBUF_CHECK_F_TX_MBUF) &&
+			(rte_mbuf_check(mb, 0, &reason) != 0)) {
+			PMD_TX_LOG(ERR, "INVALID mbuf: %s\n", reason);
+			pkt_error = true;
+			break;
+		}
+
+		if ((adapter->mc_flags & I40E_MBUF_CHECK_F_TX_SIZE) &&
+			(mb->data_len > mb->pkt_len ||
+			mb->data_len < I40E_TX_MIN_PKT_LEN ||
+			mb->data_len > adapter->max_pkt_len)) {
+			PMD_TX_LOG(ERR, "INVALID mbuf: data_len (%u) is out "
+			"of range, reasonable range (%d - %u)\n", mb->data_len,
+			I40E_TX_MIN_PKT_LEN, adapter->max_pkt_len);
+			pkt_error = true;
+			break;
+		}
+
+		if (adapter->mc_flags & I40E_MBUF_CHECK_F_TX_SEGMENT) {
+			if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
+				/**
+				 * No TSO case: nb->segs, pkt_len to not exceed
+				 * the limites.
+				 */
+				if (mb->nb_segs > I40E_TX_MAX_MTU_SEG) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_TX_MAX_MTU_SEG);
+					pkt_error = true;
+					break;
+				}
+				if (mb->pkt_len > I40E_FRAME_SIZE_MAX) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_FRAME_SIZE_MAX);
+					pkt_error = true;
+					break;
+				}
+			} else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+				/** TSO case: tso_segsz, nb_segs, pkt_len not exceed
+				 * the limits.
+				 */
+				if (mb->tso_segsz < I40E_MIN_TSO_MSS ||
+					mb->tso_segsz > I40E_MAX_TSO_MSS) {
+					/**
+					 * MSS outside the range are considered malicious
+					 */
+					PMD_TX_LOG(ERR, "INVALID mbuf: tso_segsz (%u) is out "
+					"of range, reasonable range (%d - %u)\n", mb->tso_segsz,
+					I40E_MIN_TSO_MSS, I40E_MAX_TSO_MSS);
+					pkt_error = true;
+					break;
+				}
+				if (mb->nb_segs >
+					((struct i40e_tx_queue *)tx_queue)->nb_tx_desc) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs out "
+					"of ring length\n");
+					pkt_error = true;
+					break;
+				}
+				if (mb->pkt_len > I40E_TSO_FRAME_SIZE_MAX) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					I40E_TSO_FRAME_SIZE_MAX);
+					pkt_error = true;
+					break;
+				}
+			}
+		}
+
+		if (adapter->mc_flags & I40E_MBUF_CHECK_F_TX_OFFLOAD) {
+			if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload "
+				"is not supported\n");
+				pkt_error = true;
+				break;
+			}
+
+			if (!rte_validate_tx_offload(mb)) {
+				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload "
+				"setup error\n");
+				pkt_error = true;
+				break;
+			}
+		}
+	}
+
+	if (pkt_error) {
+		txq->mbuf_errors++;
+		good_pkts = idx;
+		if (good_pkts == 0)
+			return 0;
+	}
+
+	return i40e_tx_pkt_burst_ops[tx_burst_type](tx_queue,
+								tx_pkts, good_pkts);
+}
+
 /*********************************************************************
  *
  *  TX simple prep functions
@@ -3468,6 +3600,8 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 {
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	enum i40e_tx_burst_type tx_burst_type = I40E_TX_DEFAULT;
+	int mbuf_check = ad->devargs.mbuf_check;
 	int i;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
@@ -3502,34 +3636,39 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 #ifdef CC_AVX512_SUPPORT
 				PMD_DRV_LOG(NOTICE, "Using AVX512 Vector Tx (port %d).",
 					    dev->data->port_id);
-				dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx512;
+				tx_burst_type = I40E_TX_AVX512;
 #endif
 			} else {
 				PMD_INIT_LOG(DEBUG, "Using %sVector Tx (port %d).",
 					     ad->tx_use_avx2 ? "avx2 " : "",
 					     dev->data->port_id);
-				dev->tx_pkt_burst = ad->tx_use_avx2 ?
-						    i40e_xmit_pkts_vec_avx2 :
-						    i40e_xmit_pkts_vec;
+				tx_burst_type = ad->tx_use_avx2 ? I40E_TX_AVX2 : I40E_TX_SSE;
 				dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 			}
 #else /* RTE_ARCH_X86 */
 			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
 				     dev->data->port_id);
-			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
+			tx_burst_type = I40E_TX_SSE;
 			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 #endif /* RTE_ARCH_X86 */
 		} else {
 			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
-			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
+			tx_burst_type = I40E_TX_SIMPLE;
 			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 		}
 		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
 	} else {
 		PMD_INIT_LOG(DEBUG, "Xmit tx finally be used.");
-		dev->tx_pkt_burst = i40e_xmit_pkts;
+		tx_burst_type = I40E_TX_DEFAULT;
 		dev->tx_pkt_prepare = i40e_prep_pkts;
 	}
+
+	if (mbuf_check) {
+		ad->tx_burst_type = tx_burst_type;
+		dev->tx_pkt_burst = i40e_xmit_pkts_check;
+	} else {
+		dev->tx_pkt_burst = i40e_tx_pkt_burst_ops[tx_burst_type];
+	}
 }
 
 static const struct {
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index 31dd947222..70320cf25e 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -162,6 +162,8 @@ struct i40e_tx_queue {
 	uint16_t tx_next_dd;
 	uint16_t tx_next_rs;
 	bool q_set; /**< indicate if tx queue has been configured */
+	uint64_t mbuf_errors;
+
 	bool tx_deferred_start; /**< don't start this queue in dev start */
 	uint8_t dcb_tc;         /**< Traffic class of tx queue */
 	uint64_t offloads; /**< Tx offload flags of RTE_ETH_TX_OFFLOAD_* */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v6] net/i40e: add diagnostic support in Tx path
  2024-03-04  9:33       ` [PATCH v6] net/i40e: add diagnostic support in Tx path Mingjin Ye
@ 2024-03-04 11:47         ` Bruce Richardson
  2024-03-05 10:17         ` [PATCH v7] " Mingjin Ye
  1 sibling, 0 replies; 11+ messages in thread
From: Bruce Richardson @ 2024-03-04 11:47 UTC (permalink / raw)
  To: Mingjin Ye; +Cc: dev, Yuying Zhang

On Mon, Mar 04, 2024 at 09:33:21AM +0000, Mingjin Ye wrote:
> Implemented a Tx wrapper to perform a thorough check on mbufs,
> categorizing and counting invalid cases by type for diagnostic
> purposes. The count of invalid cases is accessible through xstats_get.
> 
> Also, the devarg option "mbuf_check" was introduced to configure the
> diagnostic parameters to enable the appropriate diagnostic features.
> 
> supported cases: mbuf, size, segment, offload.
>  1. mbuf: Check for corrupted mbuf.
>  2. size: Check min/max packet length according to HW spec.
>  3. segment: Check number of mbuf segments not exceed HW limits.
>  4. offload: Check for use of an unsupported offload flag.
> 
> parameter format: "mbuf_check=<case>" or "mbuf_check=[<case1>,<case2>]"
> eg: dpdk-testpmd -a 0000:87:00.0,mbuf_check=[mbuf,size] -- -i
> 
> Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>

Review comments inline below, thanks.

This implementation seems more complex than the previous iavf one that I
previously reviewed and merged. This includes more changes to the TX path
selection logic, so it would be worthwhile including a note about that in
the commit log.

/Bruce

> ---
> v2: remove strict.
> ---
> v3: optimised.
> ---
> v4: rebase.
> ---
> v5: fix ci error.
> ---
> v6: Changes the commit log.
> ---
>  doc/guides/nics/i40e.rst       |  14 +++
>  drivers/net/i40e/i40e_ethdev.c | 138 ++++++++++++++++++++++++++++-
>  drivers/net/i40e/i40e_ethdev.h |  28 ++++++
>  drivers/net/i40e/i40e_rxtx.c   | 153 +++++++++++++++++++++++++++++++--
>  drivers/net/i40e/i40e_rxtx.h   |   2 +
>  5 files changed, 327 insertions(+), 8 deletions(-)
> 
> diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
> index 15689ac958..91b45e1d40 100644
> --- a/doc/guides/nics/i40e.rst
> +++ b/doc/guides/nics/i40e.rst
> @@ -275,6 +275,20 @@ Runtime Configuration
>  
>    -a 84:00.0,vf_msg_cfg=80@120:180
>  
> +- ``Support TX diagnostics`` (default ``not enabled``)
> +
> +  Set the ``devargs`` parameter ``mbuf_check`` to enable TX diagnostics.
> +  For example, ``-a 87:00.0,mbuf_check=<case>`` or ``-a 87:00.0,mbuf_check=[<case1>,<case2>...]``.
> +  Thereafter, ``rte_eth_xstats_get()`` can be used to get the error counts,
> +  which are collected in ``tx_mbuf_error_packets`` xstats.
> +  In testpmd these can be shown via: ``testpmd> show port xstats all``.
> +  Supported values for the ``case`` parameter are:
> +
> +  *   mbuf: Check for corrupted mbuf.
> +  *   size: Check min/max packet length according to HW spec.
> +  *   segment: Check number of mbuf segments does not exceed HW limits.
> +  *   offload: Check for use of an unsupported offload flag.
> +
>  Vector RX Pre-conditions
>  ~~~~~~~~~~~~~~~~~~~~~~~~
>  For Vector RX it is assumed that the number of descriptor rings will be a power
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index 4d21341382..3e2ddcaa3e 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -48,6 +48,7 @@
>  #define ETH_I40E_SUPPORT_MULTI_DRIVER	"support-multi-driver"
>  #define ETH_I40E_QUEUE_NUM_PER_VF_ARG	"queue-num-per-vf"
>  #define ETH_I40E_VF_MSG_CFG		"vf_msg_cfg"
> +#define ETH_I40E_MBUF_CHECK_ARG       "mbuf_check"
>  
>  #define I40E_CLEAR_PXE_WAIT_MS     200
>  #define I40E_VSI_TSR_QINQ_STRIP		0x4010
> @@ -412,6 +413,7 @@ static const char *const valid_keys[] = {
>  	ETH_I40E_SUPPORT_MULTI_DRIVER,
>  	ETH_I40E_QUEUE_NUM_PER_VF_ARG,
>  	ETH_I40E_VF_MSG_CFG,
> +	ETH_I40E_MBUF_CHECK_ARG,
>  	NULL};
>  
>  static const struct rte_pci_id pci_id_i40e_map[] = {
> @@ -545,6 +547,14 @@ static const struct rte_i40e_xstats_name_off rte_i40e_stats_strings[] = {
>  #define I40E_NB_ETH_XSTATS (sizeof(rte_i40e_stats_strings) / \
>  		sizeof(rte_i40e_stats_strings[0]))
>  
> +static const struct rte_i40e_xstats_name_off i40e_mbuf_strings[] = {
> +	{"tx_mbuf_error_packets", offsetof(struct i40e_mbuf_stats,
> +		tx_pkt_errors)},
> +};
> +
> +#define I40E_NB_MBUF_XSTATS (sizeof(i40e_mbuf_strings) / \
> +		sizeof(i40e_mbuf_strings[0]))
> +
>  static const struct rte_i40e_xstats_name_off rte_i40e_hw_port_strings[] = {
>  	{"tx_link_down_dropped", offsetof(struct i40e_hw_port_stats,
>  		tx_dropped_link_down)},
> @@ -1373,6 +1383,88 @@ read_vf_msg_config(__rte_unused const char *key,
>  	return 0;
>  }
>  
> +static int
> +read_mbuf_check_config(__rte_unused const char *key, const char *value, void *args)
> +{
> +	char *cur;
> +	char *tmp;
> +	int str_len;
> +	int valid_len;
> +
> +	int ret = 0;
> +	uint64_t *mc_flags = args;
> +	char *str2 = strdup(value);
> +	if (str2 == NULL)
> +		return -1;
> +
> +	str_len = strlen(str2);
> +	if (str2[0] == '[' && str2[str_len - 1] == ']') {
> +		if (str_len < 3) {
> +			ret = -1;
> +			goto mdd_end;
> +		}
> +		valid_len = str_len - 2;
> +		memmove(str2, str2 + 1, valid_len);
> +		memset(str2 + valid_len, '\0', 2);
> +	}
> +	cur = strtok_r(str2, ",", &tmp);
> +	while (cur != NULL) {
> +		if (!strcmp(cur, "mbuf"))
> +			*mc_flags |= I40E_MBUF_CHECK_F_TX_MBUF;
> +		else if (!strcmp(cur, "size"))
> +			*mc_flags |= I40E_MBUF_CHECK_F_TX_SIZE;
> +		else if (!strcmp(cur, "segment"))
> +			*mc_flags |= I40E_MBUF_CHECK_F_TX_SEGMENT;
> +		else if (!strcmp(cur, "offload"))
> +			*mc_flags |= I40E_MBUF_CHECK_F_TX_OFFLOAD;
> +		else
> +			PMD_DRV_LOG(ERR, "Unsupported mdd check type: %s", cur);
> +		cur = strtok_r(NULL, ",", &tmp);
> +	}
> +
> +mdd_end:
> +	free(str2);
> +	return ret;
> +}
> +
> +static int
> +i40e_parse_mbuf_check(struct rte_eth_dev *dev)
> +{
> +	struct i40e_adapter *ad =
> +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> +	struct rte_kvargs *kvlist;
> +	int kvargs_count;
> +	int ret = 0;
> +
> +	if (!dev->device->devargs)
> +		return ret;
> +
> +	kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
> +	if (!kvlist)
> +		return -EINVAL;
> +
> +	kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_MBUF_CHECK_ARG);
> +	if (!kvargs_count)
> +		goto free_end;
> +
> +	if (kvargs_count > 1)
> +		PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and only "
> +			    "the first invalid or last valid one is used !",
> +			    ETH_I40E_MBUF_CHECK_ARG);

Don't split error strings across lines, since it means you can't easily
grep for it if you get an error output.
Also, this error message doesn't make sense to me: "first invalid or last valid"?
Can you simplify and clarify it a bit?

> +
> +	ret = rte_kvargs_process(kvlist, ETH_I40E_MBUF_CHECK_ARG,
> +				read_mbuf_check_config, &ad->mc_flags);
> +	if (ret)
> +		goto free_end;
> +
> +	if (ad->mc_flags)
> +		ad->devargs.mbuf_check = 1;
> +
> +free_end:
> +	rte_kvargs_free(kvlist);
> +	return ret;
> +}
> +
>  static int
>  i40e_parse_vf_msg_config(struct rte_eth_dev *dev,
>  		struct i40e_vf_msg_cfg *msg_cfg)
> @@ -1488,6 +1580,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
>  	}
>  
>  	i40e_parse_vf_msg_config(dev, &pf->vf_msg_cfg);
> +	i40e_parse_mbuf_check(dev);
>  	/* Check if need to support multi-driver */
>  	i40e_support_multi_driver(dev);
>  
> @@ -2324,6 +2417,8 @@ i40e_dev_start(struct rte_eth_dev *dev)
>  	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
>  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>  	struct i40e_vsi *main_vsi = pf->main_vsi;
> +	struct i40e_adapter *ad =
> +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
>  	int ret, i;
>  	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
>  	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
> @@ -2483,6 +2578,7 @@ i40e_dev_start(struct rte_eth_dev *dev)
>  	max_frame_size = dev->data->mtu ?
>  		dev->data->mtu + I40E_ETH_OVERHEAD :
>  		I40E_FRAME_SIZE_MAX;
> +	ad->max_pkt_len = max_frame_size;
>  
>  	/* Set the max frame size to HW*/
>  	i40e_aq_set_mac_config(hw, max_frame_size, TRUE, false, 0, NULL);
> @@ -3502,13 +3598,17 @@ i40e_dev_stats_reset(struct rte_eth_dev *dev)
>  	/* read the stats, reading current register values into offset */
>  	i40e_read_stats_registers(pf, hw);
>  
> +	memset(&pf->mbuf_stats, 0,
> +		sizeof(struct i40e_mbuf_stats));
> +

This line surely fits in 100 chars, and so shouldn't need to be split?

>  	return 0;
>  }
>  
>  static uint32_t
>  i40e_xstats_calc_num(void)
>  {
> -	return I40E_NB_ETH_XSTATS + I40E_NB_HW_PORT_XSTATS +
> +	return I40E_NB_ETH_XSTATS + I40E_NB_MBUF_XSTATS +
> +		I40E_NB_HW_PORT_XSTATS +
>  		(I40E_NB_RXQ_PRIO_XSTATS * 8) +
>  		(I40E_NB_TXQ_PRIO_XSTATS * 8);
>  }
> @@ -3533,6 +3633,14 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
>  		count++;
>  	}
>  
> +	/* Get stats from i40e_mbuf_stats struct */
> +	for (i = 0; i < I40E_NB_MBUF_XSTATS; i++) {
> +		strlcpy(xstats_names[count].name,
> +			i40e_mbuf_strings[i].name,
> +			sizeof(xstats_names[count].name));
> +		count++;
> +	}
> +
>  	/* Get individual stats from i40e_hw_port struct */
>  	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
>  		strlcpy(xstats_names[count].name,
> @@ -3563,12 +3671,28 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
>  	return count;
>  }
>  
> +static void
> +i40e_dev_update_mbuf_stats(struct rte_eth_dev *ethdev,
> +		struct i40e_mbuf_stats *mbuf_stats)
> +{
> +	uint16_t idx;
> +	struct i40e_tx_queue *txq;
> +
> +	for (idx = 0; idx < ethdev->data->nb_tx_queues; idx++) {
> +		txq = ethdev->data->tx_queues[idx];
> +		mbuf_stats->tx_pkt_errors += txq->mbuf_errors;
> +	}
> +}
> +
>  static int
>  i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
>  		    unsigned n)
>  {
>  	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
>  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +	struct i40e_adapter *adapter =
> +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> +	struct i40e_mbuf_stats mbuf_stats = {0};
>  	unsigned i, count, prio;
>  	struct i40e_hw_port_stats *hw_stats = &pf->stats;
>  
> @@ -3583,6 +3707,9 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
>  
>  	count = 0;
>  
> +	if (adapter->devargs.mbuf_check)
> +		i40e_dev_update_mbuf_stats(dev, &mbuf_stats);
> +
>  	/* Get stats from i40e_eth_stats struct */
>  	for (i = 0; i < I40E_NB_ETH_XSTATS; i++) {
>  		xstats[count].value = *(uint64_t *)(((char *)&hw_stats->eth) +
> @@ -3591,6 +3718,15 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
>  		count++;
>  	}
>  
> +	/* Get stats from i40e_mbuf_stats struct */
> +	for (i = 0; i < I40E_NB_MBUF_XSTATS; i++) {
> +		xstats[count].value =
> +			*(uint64_t *)((char *)&mbuf_stats +
> +					i40e_mbuf_strings[i].offset);
> +		xstats[count].id = count;
> +		count++;
> +	}
> +
>  	/* Get individual stats from i40e_hw_port struct */
>  	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
>  		xstats[count].value = *(uint64_t *)(((char *)hw_stats) +
> diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> index 445e1c0b38..ae3f2f60ac 100644
> --- a/drivers/net/i40e/i40e_ethdev.h
> +++ b/drivers/net/i40e/i40e_ethdev.h
> @@ -1109,6 +1109,10 @@ struct i40e_vf_msg_cfg {
>  	uint32_t ignore_second;
>  };
>  
> +struct i40e_mbuf_stats {
> +	uint64_t tx_pkt_errors;
> +};
> +
>  /*
>   * Structure to store private data specific for PF instance.
>   */
> @@ -1123,6 +1127,7 @@ struct i40e_pf {
>  
>  	struct i40e_hw_port_stats stats_offset;
>  	struct i40e_hw_port_stats stats;
> +	struct i40e_mbuf_stats mbuf_stats;
>  	u64 rx_err1;	/* rxerr1 */
>  	u64 rx_err1_offset;
>  
> @@ -1225,6 +1230,25 @@ struct i40e_vsi_vlan_pvid_info {
>  #define I40E_MAX_PKT_TYPE  256
>  #define I40E_FLOW_TYPE_MAX 64
>  
> +#define I40E_MBUF_CHECK_F_TX_MBUF        (1ULL << 0)
> +#define I40E_MBUF_CHECK_F_TX_SIZE        (1ULL << 1)
> +#define I40E_MBUF_CHECK_F_TX_SEGMENT     (1ULL << 2)
> +#define I40E_MBUF_CHECK_F_TX_OFFLOAD     (1ULL << 3)
> +
> +enum i40e_tx_burst_type {
> +	I40E_TX_DEFAULT,
> +	I40E_TX_SIMPLE,
> +	I40E_TX_SSE,
> +	I40E_TX_AVX2,
> +	I40E_TX_AVX512,
> +};
> +
> +/**
> + * Cache devargs parse result.
> + */
> +struct i40e_devargs {
> +	int mbuf_check;
> +};
>  /*
>   * Structure to store private data for each PF/VF instance.
>   */
> @@ -1241,6 +1265,10 @@ struct i40e_adapter {
>  	bool tx_simple_allowed;
>  	bool tx_vec_allowed;
>  
> +	struct i40e_devargs devargs;
> +	uint64_t mc_flags; /* mbuf check flags. */
> +	uint16_t max_pkt_len; /* Maximum packet length */
> +	enum i40e_tx_burst_type tx_burst_type;
>  	/* For PTP */
>  	struct rte_timecounter systime_tc;
>  	struct rte_timecounter rx_tstamp_tc;
> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
> index a0bc30d45b..c7c9c945e4 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -1536,6 +1536,138 @@ i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>  	return nb_tx;
>  }
>  
> +static
> +const eth_tx_burst_t i40e_tx_pkt_burst_ops[] = {
> +	[I40E_TX_DEFAULT] = i40e_xmit_pkts,
> +	[I40E_TX_SIMPLE] = i40e_xmit_pkts_simple,
> +	[I40E_TX_SSE] = i40e_xmit_pkts_vec,
> +#ifdef RTE_ARCH_X86
> +	[I40E_TX_AVX2] = i40e_xmit_pkts_vec_avx2,
> +#ifdef CC_AVX512_SUPPORT
> +	[I40E_TX_AVX512] = i40e_xmit_pkts_vec_avx512,
> +#endif
> +#endif
> +};
> +
> +/* Tx mbuf check */
> +static uint16_t
> +i40e_xmit_pkts_check(void *tx_queue, struct rte_mbuf **tx_pkts,
> +	      uint16_t nb_pkts)

Strange indentation, mix of tabs and spaces that is neither a double-indent
nor lines up with the opening bracket.

> +{
> +	struct i40e_tx_queue *txq = tx_queue;
> +	uint16_t idx;
> +	uint64_t ol_flags;
> +	struct rte_mbuf *mb;
> +	bool pkt_error = false;
> +	const char *reason = NULL;
> +	uint16_t good_pkts = nb_pkts;
> +	struct i40e_adapter *adapter = txq->vsi->adapter;
> +	enum i40e_tx_burst_type tx_burst_type =
> +		txq->vsi->adapter->tx_burst_type;
> +
> +

One blank line should be enough here.

> +	for (idx = 0; idx < nb_pkts; idx++) {
> +		mb = tx_pkts[idx];
> +		ol_flags = mb->ol_flags;
> +
> +		if ((adapter->mc_flags & I40E_MBUF_CHECK_F_TX_MBUF) &&
> +			(rte_mbuf_check(mb, 0, &reason) != 0)) {

This continuation indent lines up with the first line of the block and so
is really unclear. Double indent it or align with opening bracket -
whichever style is already used in this file.

> +			PMD_TX_LOG(ERR, "INVALID mbuf: %s\n", reason);
> +			pkt_error = true;
> +			break;
> +		}
> +
> +		if ((adapter->mc_flags & I40E_MBUF_CHECK_F_TX_SIZE) &&
> +			(mb->data_len > mb->pkt_len ||
> +			mb->data_len < I40E_TX_MIN_PKT_LEN ||
> +			mb->data_len > adapter->max_pkt_len)) {

Same issue. Line continuations should never line up with the block body.

> +			PMD_TX_LOG(ERR, "INVALID mbuf: data_len (%u) is out "
> +			"of range, reasonable range (%d - %u)\n", mb->data_len,
> +			I40E_TX_MIN_PKT_LEN, adapter->max_pkt_len);

Don't split error messages. Also indent line continuation.
Comment applies to further error message below too.

> +			pkt_error = true;
> +			break;
> +		}
> +
> +		if (adapter->mc_flags & I40E_MBUF_CHECK_F_TX_SEGMENT) {
> +			if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
> +				/**
> +				 * No TSO case: nb->segs, pkt_len to not exceed
> +				 * the limites.
> +				 */
> +				if (mb->nb_segs > I40E_TX_MAX_MTU_SEG) {
> +					PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs (%d) exceeds "
> +					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
> +					I40E_TX_MAX_MTU_SEG);
> +					pkt_error = true;
> +					break;
> +				}
> +				if (mb->pkt_len > I40E_FRAME_SIZE_MAX) {
> +					PMD_TX_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds "
> +					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
> +					I40E_FRAME_SIZE_MAX);
> +					pkt_error = true;
> +					break;
> +				}
> +			} else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
> +				/** TSO case: tso_segsz, nb_segs, pkt_len not exceed
> +				 * the limits.
> +				 */
> +				if (mb->tso_segsz < I40E_MIN_TSO_MSS ||
> +					mb->tso_segsz > I40E_MAX_TSO_MSS) {
> +					/**
> +					 * MSS outside the range are considered malicious
> +					 */
> +					PMD_TX_LOG(ERR, "INVALID mbuf: tso_segsz (%u) is out "
> +					"of range, reasonable range (%d - %u)\n", mb->tso_segsz,
> +					I40E_MIN_TSO_MSS, I40E_MAX_TSO_MSS);
> +					pkt_error = true;
> +					break;
> +				}
> +				if (mb->nb_segs >
> +					((struct i40e_tx_queue *)tx_queue)->nb_tx_desc) {
> +					PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs out "
> +					"of ring length\n");
> +					pkt_error = true;
> +					break;
> +				}
> +				if (mb->pkt_len > I40E_TSO_FRAME_SIZE_MAX) {
> +					PMD_TX_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds "
> +					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
> +					I40E_TSO_FRAME_SIZE_MAX);
> +					pkt_error = true;
> +					break;
> +				}
> +			}
> +		}
> +
> +		if (adapter->mc_flags & I40E_MBUF_CHECK_F_TX_OFFLOAD) {
> +			if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
> +				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload "
> +				"is not supported\n");
> +				pkt_error = true;
> +				break;
> +			}
> +
> +			if (!rte_validate_tx_offload(mb)) {
> +				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload "
> +				"setup error\n");
> +				pkt_error = true;
> +				break;
> +			}
> +		}
> +	}
> +
> +	if (pkt_error) {
> +		txq->mbuf_errors++;
> +		good_pkts = idx;
> +		if (good_pkts == 0)
> +			return 0;
> +	}
> +
> +	return i40e_tx_pkt_burst_ops[tx_burst_type](tx_queue,
> +								tx_pkts, good_pkts);
> +}
> +
>  /*********************************************************************
>   *
>   *  TX simple prep functions
> @@ -3468,6 +3600,8 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
>  {
>  	struct i40e_adapter *ad =
>  		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> +	enum i40e_tx_burst_type tx_burst_type = I40E_TX_DEFAULT;
> +	int mbuf_check = ad->devargs.mbuf_check;
>  	int i;
>  
>  	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> @@ -3502,34 +3636,39 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
>  #ifdef CC_AVX512_SUPPORT
>  				PMD_DRV_LOG(NOTICE, "Using AVX512 Vector Tx (port %d).",
>  					    dev->data->port_id);
> -				dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx512;
> +				tx_burst_type = I40E_TX_AVX512;
>  #endif
>  			} else {
>  				PMD_INIT_LOG(DEBUG, "Using %sVector Tx (port %d).",
>  					     ad->tx_use_avx2 ? "avx2 " : "",
>  					     dev->data->port_id);
> -				dev->tx_pkt_burst = ad->tx_use_avx2 ?
> -						    i40e_xmit_pkts_vec_avx2 :
> -						    i40e_xmit_pkts_vec;
> +				tx_burst_type = ad->tx_use_avx2 ? I40E_TX_AVX2 : I40E_TX_SSE;
>  				dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
>  			}
>  #else /* RTE_ARCH_X86 */
>  			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
>  				     dev->data->port_id);
> -			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> +			tx_burst_type = I40E_TX_SSE;
>  			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
>  #endif /* RTE_ARCH_X86 */
>  		} else {
>  			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
> -			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
> +			tx_burst_type = I40E_TX_SIMPLE;
>  			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
>  		}
>  		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
>  	} else {
>  		PMD_INIT_LOG(DEBUG, "Xmit tx finally be used.");
> -		dev->tx_pkt_burst = i40e_xmit_pkts;
> +		tx_burst_type = I40E_TX_DEFAULT;
>  		dev->tx_pkt_prepare = i40e_prep_pkts;
>  	}
> +
> +	if (mbuf_check) {
> +		ad->tx_burst_type = tx_burst_type;
> +		dev->tx_pkt_burst = i40e_xmit_pkts_check;
> +	} else {
> +		dev->tx_pkt_burst = i40e_tx_pkt_burst_ops[tx_burst_type];
> +	}
>  }
>  
>  static const struct {
> diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
> index 31dd947222..70320cf25e 100644
> --- a/drivers/net/i40e/i40e_rxtx.h
> +++ b/drivers/net/i40e/i40e_rxtx.h
> @@ -162,6 +162,8 @@ struct i40e_tx_queue {
>  	uint16_t tx_next_dd;
>  	uint16_t tx_next_rs;
>  	bool q_set; /**< indicate if tx queue has been configured */
> +	uint64_t mbuf_errors;
> +
>  	bool tx_deferred_start; /**< don't start this queue in dev start */
>  	uint8_t dcb_tc;         /**< Traffic class of tx queue */
>  	uint64_t offloads; /**< Tx offload flags of RTE_ETH_TX_OFFLOAD_* */
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v7]  net/i40e: add diagnostic support in Tx path
  2024-03-04  9:33       ` [PATCH v6] net/i40e: add diagnostic support in Tx path Mingjin Ye
  2024-03-04 11:47         ` Bruce Richardson
@ 2024-03-05 10:17         ` Mingjin Ye
  2024-03-05 12:59           ` Bruce Richardson
  1 sibling, 1 reply; 11+ messages in thread
From: Mingjin Ye @ 2024-03-05 10:17 UTC (permalink / raw)
  To: dev; +Cc: Mingjin Ye, Yuying Zhang

Implemented a Tx wrapper to perform a thorough check on mbufs,
categorizing and counting invalid cases by type for diagnostic
purposes. The count of invalid cases is accessible through xstats_get.

Also, the devarg option "mbuf_check" was introduced to configure the
diagnostic parameters to enable the appropriate diagnostic features.

supported cases: mbuf, size, segment, offload.
 1. mbuf: Check for corrupted mbuf.
 2. size: Check min/max packet length according to HW spec.
 3. segment: Check number of mbuf segments not exceed HW limits.
 4. offload: Check for use of an unsupported offload flag.

parameter format: "mbuf_check=<case>" or "mbuf_check=[<case1>,<case2>]"
eg: dpdk-testpmd -a 0000:87:00.0,mbuf_check=[mbuf,size] -- -i

Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
---
v2: remove strict.
---
v3: optimised.
---
v4: rebase.
---
v5: fix ci error.
---
v6: Changes the commit log.
---
v7: Remove unnecessary changes.
---
 doc/guides/nics/i40e.rst       |  13 +++
 drivers/net/i40e/i40e_ethdev.c | 142 ++++++++++++++++++++++++++++++++-
 drivers/net/i40e/i40e_ethdev.h |  14 ++++
 drivers/net/i40e/i40e_rxtx.c   | 112 ++++++++++++++++++++++++++
 drivers/net/i40e/i40e_rxtx.h   |   2 +
 5 files changed, 282 insertions(+), 1 deletion(-)

diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index 15689ac958..bf1d1e5d60 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -275,6 +275,19 @@ Runtime Configuration
 
   -a 84:00.0,vf_msg_cfg=80@120:180
 
+- ``Support TX diagnostics`` (default ``not enabled``)
+
+  Set the ``devargs`` parameter ``mbuf_check`` to enable TX diagnostics. For example,
+  ``-a 18:01.0,mbuf_check=<case>`` or ``-a 18:01.0,mbuf_check=[<case1>,<case2>...]``. Also,
+  ``xstats_get`` can be used to get the error counts, which are collected in
+  ``tx_mbuf_error_packets`` xstats. For example, ``testpmd> show port xstats all``.
+  Supported cases:
+
+  *   mbuf: Check for corrupted mbuf.
+  *   size: Check min/max packet length according to hw spec.
+  *   segment: Check number of mbuf segments not exceed hw limitation.
+  *   offload: Check any unsupported offload flag.
+
 Vector RX Pre-conditions
 ~~~~~~~~~~~~~~~~~~~~~~~~
 For Vector RX it is assumed that the number of descriptor rings will be a power
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 4d21341382..84fefcb1f9 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -48,6 +48,7 @@
 #define ETH_I40E_SUPPORT_MULTI_DRIVER	"support-multi-driver"
 #define ETH_I40E_QUEUE_NUM_PER_VF_ARG	"queue-num-per-vf"
 #define ETH_I40E_VF_MSG_CFG		"vf_msg_cfg"
+#define ETH_I40E_MBUF_CHECK_ARG       "mbuf_check"
 
 #define I40E_CLEAR_PXE_WAIT_MS     200
 #define I40E_VSI_TSR_QINQ_STRIP		0x4010
@@ -412,6 +413,7 @@ static const char *const valid_keys[] = {
 	ETH_I40E_SUPPORT_MULTI_DRIVER,
 	ETH_I40E_QUEUE_NUM_PER_VF_ARG,
 	ETH_I40E_VF_MSG_CFG,
+	ETH_I40E_MBUF_CHECK_ARG,
 	NULL};
 
 static const struct rte_pci_id pci_id_i40e_map[] = {
@@ -545,6 +547,14 @@ static const struct rte_i40e_xstats_name_off rte_i40e_stats_strings[] = {
 #define I40E_NB_ETH_XSTATS (sizeof(rte_i40e_stats_strings) / \
 		sizeof(rte_i40e_stats_strings[0]))
 
+static const struct rte_i40e_xstats_name_off i40e_mbuf_strings[] = {
+	{"tx_mbuf_error_packets", offsetof(struct i40e_mbuf_stats,
+		tx_pkt_errors)},
+};
+
+#define I40E_NB_MBUF_XSTATS (sizeof(i40e_mbuf_strings) / \
+		sizeof(i40e_mbuf_strings[0]))
+
 static const struct rte_i40e_xstats_name_off rte_i40e_hw_port_strings[] = {
 	{"tx_link_down_dropped", offsetof(struct i40e_hw_port_stats,
 		tx_dropped_link_down)},
@@ -1373,6 +1383,94 @@ read_vf_msg_config(__rte_unused const char *key,
 	return 0;
 }
 
+static int
+read_mbuf_check_config(__rte_unused const char *key, const char *value, void *args)
+{
+	char *cur;
+	char *tmp;
+	int str_len;
+	int valid_len;
+
+	int ret = 0;
+	uint64_t *mc_flags = args;
+	char *str2 = strdup(value);
+	if (str2 == NULL)
+		return -1;
+
+	str_len = strlen(str2);
+	if (str_len == 0) {
+		ret = -1;
+		goto err_end;
+	}
+
+	/* Try stripping the outer square brackets of the parameter string. */
+	str_len = strlen(str2);
+	if (str2[0] == '[' && str2[str_len - 1] == ']') {
+		if (str_len < 3) {
+			ret = -1;
+			goto err_end;
+		}
+		valid_len = str_len - 2;
+		memmove(str2, str2 + 1, valid_len);
+		memset(str2 + valid_len, '\0', 2);
+	}
+
+	cur = strtok_r(str2, ",", &tmp);
+	while (cur != NULL) {
+		if (!strcmp(cur, "mbuf"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_MBUF;
+		else if (!strcmp(cur, "size"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_SIZE;
+		else if (!strcmp(cur, "segment"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_SEGMENT;
+		else if (!strcmp(cur, "offload"))
+			*mc_flags |= I40E_MBUF_CHECK_F_TX_OFFLOAD;
+		else
+			PMD_DRV_LOG(ERR, "Unsupported diagnostic type: %s", cur);
+		cur = strtok_r(NULL, ",", &tmp);
+	}
+
+err_end:
+	free(str2);
+	return ret;
+}
+
+static int
+i40e_parse_mbuf_check(struct rte_eth_dev *dev)
+{
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct rte_kvargs *kvlist;
+	int kvargs_count;
+	int ret = 0;
+
+	if (!dev->device->devargs)
+		return ret;
+
+	kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
+	if (!kvlist)
+		return -EINVAL;
+
+	kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_MBUF_CHECK_ARG);
+	if (!kvargs_count)
+		goto free_end;
+
+	if (kvargs_count > 1) {
+		PMD_DRV_LOG(ERR, "More than one argument \"%s\"!",
+				ETH_I40E_MBUF_CHECK_ARG);
+		ret = -EINVAL;
+		goto free_end;
+	}
+
+	if (rte_kvargs_process(kvlist, ETH_I40E_MBUF_CHECK_ARG,
+			read_mbuf_check_config, &ad->mbuf_check) < 0)
+		ret = -EINVAL;
+
+free_end:
+	rte_kvargs_free(kvlist);
+	return ret;
+}
+
 static int
 i40e_parse_vf_msg_config(struct rte_eth_dev *dev,
 		struct i40e_vf_msg_cfg *msg_cfg)
@@ -1488,6 +1586,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
 	}
 
 	i40e_parse_vf_msg_config(dev, &pf->vf_msg_cfg);
+	i40e_parse_mbuf_check(dev);
 	/* Check if need to support multi-driver */
 	i40e_support_multi_driver(dev);
 
@@ -2324,6 +2423,8 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct i40e_vsi *main_vsi = pf->main_vsi;
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	int ret, i;
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
@@ -2483,6 +2584,7 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	max_frame_size = dev->data->mtu ?
 		dev->data->mtu + I40E_ETH_OVERHEAD :
 		I40E_FRAME_SIZE_MAX;
+	ad->max_pkt_len = max_frame_size;
 
 	/* Set the max frame size to HW*/
 	i40e_aq_set_mac_config(hw, max_frame_size, TRUE, false, 0, NULL);
@@ -3502,13 +3604,16 @@ i40e_dev_stats_reset(struct rte_eth_dev *dev)
 	/* read the stats, reading current register values into offset */
 	i40e_read_stats_registers(pf, hw);
 
+	memset(&pf->mbuf_stats, 0, sizeof(struct i40e_mbuf_stats));
+
 	return 0;
 }
 
 static uint32_t
 i40e_xstats_calc_num(void)
 {
-	return I40E_NB_ETH_XSTATS + I40E_NB_HW_PORT_XSTATS +
+	return I40E_NB_ETH_XSTATS + I40E_NB_MBUF_XSTATS +
+		I40E_NB_HW_PORT_XSTATS +
 		(I40E_NB_RXQ_PRIO_XSTATS * 8) +
 		(I40E_NB_TXQ_PRIO_XSTATS * 8);
 }
@@ -3533,6 +3638,14 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 		count++;
 	}
 
+	/* Get stats from i40e_mbuf_stats struct */
+	for (i = 0; i < I40E_NB_MBUF_XSTATS; i++) {
+		strlcpy(xstats_names[count].name,
+			i40e_mbuf_strings[i].name,
+			sizeof(xstats_names[count].name));
+		count++;
+	}
+
 	/* Get individual stats from i40e_hw_port struct */
 	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
 		strlcpy(xstats_names[count].name,
@@ -3563,12 +3676,28 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
 	return count;
 }
 
+static void
+i40e_dev_update_mbuf_stats(struct rte_eth_dev *ethdev,
+		struct i40e_mbuf_stats *mbuf_stats)
+{
+	uint16_t idx;
+	struct i40e_tx_queue *txq;
+
+	for (idx = 0; idx < ethdev->data->nb_tx_queues; idx++) {
+		txq = ethdev->data->tx_queues[idx];
+		mbuf_stats->tx_pkt_errors += txq->mbuf_errors;
+	}
+}
+
 static int
 i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 		    unsigned n)
 {
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct i40e_adapter *adapter =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct i40e_mbuf_stats mbuf_stats = {0};
 	unsigned i, count, prio;
 	struct i40e_hw_port_stats *hw_stats = &pf->stats;
 
@@ -3583,6 +3712,9 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 
 	count = 0;
 
+	if (adapter->mbuf_check)
+		i40e_dev_update_mbuf_stats(dev, &mbuf_stats);
+
 	/* Get stats from i40e_eth_stats struct */
 	for (i = 0; i < I40E_NB_ETH_XSTATS; i++) {
 		xstats[count].value = *(uint64_t *)(((char *)&hw_stats->eth) +
@@ -3591,6 +3723,14 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 		count++;
 	}
 
+	/* Get stats from i40e_mbuf_stats struct */
+	for (i = 0; i < I40E_NB_MBUF_XSTATS; i++) {
+		xstats[count].value = *(uint64_t *)((char *)&mbuf_stats +
+			i40e_mbuf_strings[i].offset);
+		xstats[count].id = count;
+		count++;
+	}
+
 	/* Get individual stats from i40e_hw_port struct */
 	for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
 		xstats[count].value = *(uint64_t *)(((char *)hw_stats) +
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 445e1c0b38..b9628ca158 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1109,6 +1109,10 @@ struct i40e_vf_msg_cfg {
 	uint32_t ignore_second;
 };
 
+struct i40e_mbuf_stats {
+	uint64_t tx_pkt_errors;
+};
+
 /*
  * Structure to store private data specific for PF instance.
  */
@@ -1123,6 +1127,7 @@ struct i40e_pf {
 
 	struct i40e_hw_port_stats stats_offset;
 	struct i40e_hw_port_stats stats;
+	struct i40e_mbuf_stats mbuf_stats;
 	u64 rx_err1;	/* rxerr1 */
 	u64 rx_err1_offset;
 
@@ -1225,6 +1230,11 @@ struct i40e_vsi_vlan_pvid_info {
 #define I40E_MAX_PKT_TYPE  256
 #define I40E_FLOW_TYPE_MAX 64
 
+#define I40E_MBUF_CHECK_F_TX_MBUF        (1ULL << 0)
+#define I40E_MBUF_CHECK_F_TX_SIZE        (1ULL << 1)
+#define I40E_MBUF_CHECK_F_TX_SEGMENT     (1ULL << 2)
+#define I40E_MBUF_CHECK_F_TX_OFFLOAD     (1ULL << 3)
+
 /*
  * Structure to store private data for each PF/VF instance.
  */
@@ -1241,6 +1251,10 @@ struct i40e_adapter {
 	bool tx_simple_allowed;
 	bool tx_vec_allowed;
 
+	uint64_t mbuf_check; /* mbuf check flags. */
+	uint16_t max_pkt_len; /* Maximum packet length */
+	eth_tx_burst_t tx_pkt_burst;
+
 	/* For PTP */
 	struct rte_timecounter systime_tc;
 	struct rte_timecounter rx_tstamp_tc;
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index a0bc30d45b..a6f864574b 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1536,6 +1536,112 @@ i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_tx;
 }
 
+/* Tx mbuf check */
+static uint16_t
+i40e_xmit_pkts_check(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct i40e_tx_queue *txq = tx_queue;
+	uint16_t idx;
+	uint64_t ol_flags;
+	struct rte_mbuf *mb;
+	bool pkt_error = false;
+	const char *reason = NULL;
+	uint16_t good_pkts = nb_pkts;
+	struct i40e_adapter *adapter = txq->vsi->adapter;
+
+	for (idx = 0; idx < nb_pkts; idx++) {
+		mb = tx_pkts[idx];
+		ol_flags = mb->ol_flags;
+
+		if ((adapter->mbuf_check & I40E_MBUF_CHECK_F_TX_MBUF) &&
+		    (rte_mbuf_check(mb, 0, &reason) != 0)) {
+			PMD_TX_LOG(ERR, "INVALID mbuf: %s\n", reason);
+			pkt_error = true;
+			break;
+		}
+
+		if ((adapter->mbuf_check & I40E_MBUF_CHECK_F_TX_SIZE) &&
+		    (mb->data_len > mb->pkt_len ||
+		    mb->data_len < I40E_TX_MIN_PKT_LEN ||
+		    mb->data_len > adapter->max_pkt_len)) {
+			PMD_TX_LOG(ERR, "INVALID mbuf: data_len (%u) is out of range, reasonable range (%d - %u)\n",
+				mb->data_len, I40E_TX_MIN_PKT_LEN, adapter->max_pkt_len);
+			pkt_error = true;
+			break;
+		}
+
+		if (adapter->mbuf_check & I40E_MBUF_CHECK_F_TX_SEGMENT) {
+			if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
+				/**
+				 * No TSO case: nb->segs, pkt_len to not exceed
+				 * the limites.
+				 */
+				if (mb->nb_segs > I40E_TX_MAX_MTU_SEG) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs (%d) exceeds HW limit, maximum allowed value is %d\n",
+						mb->nb_segs, I40E_TX_MAX_MTU_SEG);
+					pkt_error = true;
+					break;
+				}
+				if (mb->pkt_len > I40E_FRAME_SIZE_MAX) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds HW limit, maximum allowed value is %d\n",
+						mb->nb_segs, I40E_FRAME_SIZE_MAX);
+					pkt_error = true;
+					break;
+				}
+			} else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+				/** TSO case: tso_segsz, nb_segs, pkt_len not exceed
+				 * the limits.
+				 */
+				if (mb->tso_segsz < I40E_MIN_TSO_MSS ||
+					mb->tso_segsz > I40E_MAX_TSO_MSS) {
+					/**
+					 * MSS outside the range are considered malicious
+					 */
+					PMD_TX_LOG(ERR, "INVALID mbuf: tso_segsz (%u) is out of range, reasonable range (%d - %u)\n",
+						mb->tso_segsz, I40E_MIN_TSO_MSS, I40E_MAX_TSO_MSS);
+					pkt_error = true;
+					break;
+				}
+				if (mb->nb_segs >
+					((struct i40e_tx_queue *)tx_queue)->nb_tx_desc) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs out of ring length\n");
+					pkt_error = true;
+					break;
+				}
+				if (mb->pkt_len > I40E_TSO_FRAME_SIZE_MAX) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: pkt_len (%d) exceeds HW limit, maximum allowed value is %d\n",
+						mb->nb_segs, I40E_TSO_FRAME_SIZE_MAX);
+					pkt_error = true;
+					break;
+				}
+			}
+		}
+
+		if (adapter->mbuf_check & I40E_MBUF_CHECK_F_TX_OFFLOAD) {
+			if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload is not supported\n");
+				pkt_error = true;
+				break;
+			}
+
+			if (!rte_validate_tx_offload(mb)) {
+				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload setup error\n");
+				pkt_error = true;
+				break;
+			}
+		}
+	}
+
+	if (pkt_error) {
+		txq->mbuf_errors++;
+		good_pkts = idx;
+		if (good_pkts == 0)
+			return 0;
+	}
+
+	return adapter->tx_pkt_burst(tx_queue, tx_pkts, good_pkts);
+}
+
 /*********************************************************************
  *
  *  TX simple prep functions
@@ -3468,6 +3574,7 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 {
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	uint64_t mbuf_check = ad->mbuf_check;
 	int i;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
@@ -3530,6 +3637,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
 		dev->tx_pkt_burst = i40e_xmit_pkts;
 		dev->tx_pkt_prepare = i40e_prep_pkts;
 	}
+
+	if (mbuf_check) {
+		ad->tx_pkt_burst = dev->tx_pkt_burst;
+		dev->tx_pkt_burst = i40e_xmit_pkts_check;
+	}
 }
 
 static const struct {
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index 31dd947222..70320cf25e 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -162,6 +162,8 @@ struct i40e_tx_queue {
 	uint16_t tx_next_dd;
 	uint16_t tx_next_rs;
 	bool q_set; /**< indicate if tx queue has been configured */
+	uint64_t mbuf_errors;
+
 	bool tx_deferred_start; /**< don't start this queue in dev start */
 	uint8_t dcb_tc;         /**< Traffic class of tx queue */
 	uint64_t offloads; /**< Tx offload flags of RTE_ETH_TX_OFFLOAD_* */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v7]  net/i40e: add diagnostic support in Tx path
  2024-03-05 10:17         ` [PATCH v7] " Mingjin Ye
@ 2024-03-05 12:59           ` Bruce Richardson
  0 siblings, 0 replies; 11+ messages in thread
From: Bruce Richardson @ 2024-03-05 12:59 UTC (permalink / raw)
  To: Mingjin Ye; +Cc: dev, Yuying Zhang

On Tue, Mar 05, 2024 at 10:17:47AM +0000, Mingjin Ye wrote:
> Implemented a Tx wrapper to perform a thorough check on mbufs,
> categorizing and counting invalid cases by type for diagnostic
> purposes. The count of invalid cases is accessible through xstats_get.
> 
> Also, the devarg option "mbuf_check" was introduced to configure the
> diagnostic parameters to enable the appropriate diagnostic features.
> 
> supported cases: mbuf, size, segment, offload.
>  1. mbuf: Check for corrupted mbuf.
>  2. size: Check min/max packet length according to HW spec.
>  3. segment: Check number of mbuf segments not exceed HW limits.
>  4. offload: Check for use of an unsupported offload flag.
> 
> parameter format: "mbuf_check=<case>" or "mbuf_check=[<case1>,<case2>]"
> eg: dpdk-testpmd -a 0000:87:00.0,mbuf_check=[mbuf,size] -- -i
> 
> Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
> ---
Acked-by: Bruce Richardson <bruce.richardson@intel.com>

Applied to dpdk-next-net-intel with some indentation and whitespace
cleanup.

Thanks,
/Bruce

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-03-05 12:59 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-21 10:13 [PATCH] net/i40e: add diagnostic support in TX path Mingjin Ye
2023-12-22 16:17 ` Morten Brørup
2024-01-04 10:20 ` [PATCH v3] " Mingjin Ye
2024-01-05  9:59   ` [PATCH v4] " Mingjin Ye
2024-03-01  9:44     ` [PATCH v5] " Mingjin Ye
2024-03-01 10:24       ` Bruce Richardson
2024-03-01 16:13         ` Patrick Robb
2024-03-04  9:33       ` [PATCH v6] net/i40e: add diagnostic support in Tx path Mingjin Ye
2024-03-04 11:47         ` Bruce Richardson
2024-03-05 10:17         ` [PATCH v7] " Mingjin Ye
2024-03-05 12:59           ` Bruce Richardson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).