patches for DPDK stable branches
 help / color / mirror / Atom feed
* [PATCH] net/mlx5: fix IP tunnel detection for IPIP and IPv6 ENCAP
@ 2025-11-19 10:28 Junfeng Guo
  0 siblings, 0 replies; 3+ messages in thread
From: Junfeng Guo @ 2025-11-19 10:28 UTC (permalink / raw)
  To: stable
  Cc: dsosnowski, viacheslavo, bingz, orika, suanmingm, matan, rasland,
	junfengg

The HWS (Hardware Steering) IP tunnel detection implementation was not
recognizing IPIP tunnel types based on the next protocol value in the
outer IP header as specified by RFC 2003 (IP-in-IP) and RFC 2473
(IPv6 Encapsulation).

The issue was that the code treated all combinations of [IPv4|6] / [IPv4|6]
as IPIP tunnels, setting MLX5_FLOW_LAYER_IPIP flag regardless of the actual
protocol. While this happened to work for simple cases due to the fallback
classification, it violated RFC compliance and could cause issues with
protocol validation.

The fix adds RFC-compliant tunnel detection by
1. Checking the protocol field (IPPROTO_IPIP=4 or IPPROTO_IPV6=41) in the
   outer IP header to determine tunnel type
2. Adding protocol mismatch validation to ensure outer protocol matches
   inner header type
3. Correctly setting MLX5_FLOW_LAYER_IPV6_ENCAP for IPv6 encapsulation

The expected tunnel patterns are now:
- [ipv4 | ipv6] proto is 4 / ipv4 -> MLX5_FLOW_LAYER_IPIP
- [ipv4 | ipv6] proto is 41 / ipv6 -> MLX5_FLOW_LAYER_IPV6_ENCAP

For cases without protocol specification, fallback classification based on
inner header type is still supported.

Fixes: f66c7c3ab983 ("net/mlx5/hws: recognize IP-in-IP tunnel in definer layer")
Fixes: 4b7044562f59 ("net/mlx5: support IP-in-IP tunnel for all combinations")
Cc: stable@dpdk.org

Signed-off-by: Junfeng Guo <junfengg@nvidia.com>
---
 drivers/net/mlx5/hws/mlx5dr_definer.c |   4 +-
 drivers/net/mlx5/mlx5_flow.c          |   5 -
 drivers/net/mlx5/mlx5_flow_hw.c       | 162 ++++++++++++++++++++++----
 3 files changed, 143 insertions(+), 28 deletions(-)

diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c b/drivers/net/mlx5/hws/mlx5dr_definer.c
index afa70bf793..03d2794367 100644
--- a/drivers/net/mlx5/hws/mlx5dr_definer.c
+++ b/drivers/net/mlx5/hws/mlx5dr_definer.c
@@ -3381,6 +3381,7 @@ mlx5dr_definer_conv_items_to_hl(struct mlx5dr_context *ctx,
 			if (cd.last_item == RTE_FLOW_ITEM_TYPE_IPV4 ||
 			    cd.last_item == RTE_FLOW_ITEM_TYPE_IPV6) {
 				cd.tunnel = true;
+				/* [IPv4 | IPv6] / IPv4: IPIP */
 				item_flags |= MLX5_FLOW_LAYER_IPIP;
 			}
 			ret = mlx5dr_definer_conv_item_ipv4(&cd, items, i);
@@ -3391,7 +3392,8 @@ mlx5dr_definer_conv_items_to_hl(struct mlx5dr_context *ctx,
 			if (cd.last_item == RTE_FLOW_ITEM_TYPE_IPV4 ||
 			    cd.last_item == RTE_FLOW_ITEM_TYPE_IPV6) {
 				cd.tunnel = true;
-				item_flags |= MLX5_FLOW_LAYER_IPIP;
+				/* [IPv4 | IPv6] / IPv6: IPV6_ENCAP */
+				item_flags |= MLX5_FLOW_LAYER_IPV6_ENCAP;
 			}
 			ret = mlx5dr_definer_conv_item_ipv6(&cd, items, i);
 			item_flags |= cd.tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 2c48f1b01b..0821c93a9a 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -2892,11 +2892,6 @@ mlx5_flow_validate_item_ipv4(const struct rte_eth_dev *dev,
 						  "multiple tunnel "
 						  "not supported");
 	}
-	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
-		return rte_flow_error_set(error, EINVAL,
-					  RTE_FLOW_ERROR_TYPE_ITEM, item,
-					  "wrong tunnel type - IPv6 specified "
-					  "but IPv4 item provided");
 	if (item_flags & l3m)
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index c41b99746f..75ac3bce28 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -8384,23 +8384,132 @@ mlx5_hw_validate_item_nsh(struct rte_eth_dev *dev,
 	return mlx5_flow_validate_item_nsh(dev, item, error);
 }
 
-static bool
-mlx5_hw_flow_tunnel_ip_check(uint64_t last_item, uint64_t *item_flags)
+static inline uint8_t
+mlx5_hw_flow_get_next_protocol(const struct rte_flow_item *item)
+{
+	if (!item || !item->spec)
+		return 0xff;
+
+	switch (item->type) {
+	case RTE_FLOW_ITEM_TYPE_IPV4: {
+		const struct rte_flow_item_ipv4 *spec = item->spec;
+		const struct rte_flow_item_ipv4 *mask = item->mask;
+
+		/* If mask is NULL or next_proto_id field in mask is 0,
+		 * then next_protocol in spec should not be read
+		 */
+		if (!mask || mask->hdr.next_proto_id == 0)
+			return 0xff;
+
+		return spec->hdr.next_proto_id & mask->hdr.next_proto_id;
+	}
+	case RTE_FLOW_ITEM_TYPE_IPV6: {
+		const struct rte_flow_item_ipv6 *spec = item->spec;
+		const struct rte_flow_item_ipv6 *mask = item->mask;
+
+		/* If mask is NULL or proto field in mask is 0,
+		 * then proto in spec should not be read
+		 */
+		if (!mask || mask->hdr.proto == 0)
+			return 0xff;
+
+		return spec->hdr.proto & mask->hdr.proto;
+	}
+	case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT: {
+		const struct rte_flow_item_ipv6_frag_ext *spec = item->spec;
+		const struct rte_flow_item_ipv6_frag_ext *mask = item->mask;
+
+		/* If mask is NULL or next_header field in mask is 0,
+		 * then next_header in spec should not be read
+		 */
+		if (!mask || mask->hdr.next_header == 0)
+			return 0xff;
+
+		return spec->hdr.next_header & mask->hdr.next_header;
+	}
+	default:
+		return 0xff;
+	}
+}
+
+static int
+mlx5_hw_flow_tunnel_ip_check(uint64_t last_item,
+			     const struct rte_flow_item *last_l3_item,
+			     const struct rte_flow_item *item,
+			     uint64_t *item_flags,
+			     struct rte_flow_error *error)
 {
-	bool tunnel;
+	uint64_t tunnel_flag = 0;
+	uint8_t outer_protocol;
+
+	/* IP tunnel detection - only single-level tunneling supported */
+	if (last_l3_item && (last_item == MLX5_FLOW_LAYER_OUTER_L3_IPV4 ||
+			     last_item == MLX5_FLOW_LAYER_OUTER_L3_IPV6)) {
+		/*
+		 * Tunnel type determination strategy:
+		 * 1. If previous L3 item has protocol field specified, use it (RFC compliant)
+		 * 2. Otherwise, fall back to inner header type (what's being encapsulated)
+		 */
+		outer_protocol = mlx5_hw_flow_get_next_protocol(last_l3_item);
+
+		if (outer_protocol != 0xff) {
+			/* Proto field specified in outer hdr mask - use RFC-compliant detection */
+			switch (outer_protocol) {
+			case IPPROTO_IPIP:  /* 4 - IP-in-IP */
+				/* Outer header indicates IPv4 payload */
+				if (item->type == RTE_FLOW_ITEM_TYPE_IPV6)
+					return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ITEM, item,
+						"protocol mismatch: outer proto is IPIP but inner is IPv6");
+				tunnel_flag = MLX5_FLOW_LAYER_IPIP;
+				break;
+			case IPPROTO_IPV6:  /* 41 - IPv6-in-IP */
+				/* Outer header indicates IPv6 payload */
+				if (item->type == RTE_FLOW_ITEM_TYPE_IPV4)
+					return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ITEM, item,
+						"protocol mismatch: outer proto is IPV6 but inner is IPv4");
+				tunnel_flag = MLX5_FLOW_LAYER_IPV6_ENCAP;
+				break;
+			default:
+				/* Unknown/unsupported protocol, fall back to inner header type */
+				goto fallback_classification;
+			}
+		} else {
+fallback_classification:
+			/*
+			 * Protocol field not specified or unknown - classify based on
+			 * what is being encapsulated (inner header type)
+			 */
+			if (item->type == RTE_FLOW_ITEM_TYPE_IPV4)
+				tunnel_flag = MLX5_FLOW_LAYER_IPIP;
+			else if (item->type == RTE_FLOW_ITEM_TYPE_IPV6)
+				tunnel_flag = MLX5_FLOW_LAYER_IPV6_ENCAP;
+			else
+				return 0; /* Not an IP item - shouldn't happen, but be defensive */
+		}
+
+		/* Check for unsupported nested tunneling after tunnel is detected */
+		if (*item_flags & MLX5_FLOW_LAYER_TUNNEL)
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM, item,
+						  "multiple tunnel layers not supported");
+
+		*item_flags |= tunnel_flag;
+		return 1; /* Tunnel detected */
+	} else if (last_item == MLX5_FLOW_ITEM_OUTER_IPV6_ROUTING_EXT) {
+		/* Special case: IPv6 routing extension header */
+		/* Check for unsupported nested tunneling */
+		if (*item_flags & MLX5_FLOW_LAYER_TUNNEL)
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM, item,
+						  "multiple tunnel layers not supported");
 
-	if (last_item == MLX5_FLOW_LAYER_OUTER_L3_IPV4 ||
-	    last_item == MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
-		tunnel = true;
-		*item_flags |= MLX5_FLOW_LAYER_IPIP;
-	} else if (last_item == MLX5_FLOW_LAYER_OUTER_L3_IPV6 ||
-		   last_item == MLX5_FLOW_ITEM_OUTER_IPV6_ROUTING_EXT) {
-		tunnel = true;
 		*item_flags |= MLX5_FLOW_LAYER_IPV6_ENCAP;
-	} else {
-		tunnel = false;
+		return 1; /* Tunnel detected */
 	}
-	return tunnel;
+
+	return 0; /* No tunnel */
 }
 
 const struct rte_flow_item_ipv4 hws_nic_ipv4_mask = {
@@ -8475,6 +8584,7 @@ __flow_hw_pattern_validate(struct rte_eth_dev *dev,
 			 struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
+	const struct rte_flow_item *last_l3_item = NULL;
 	const struct rte_flow_item *item;
 	const struct rte_flow_item *gtp_item = NULL;
 	const struct rte_flow_item *gre_item = NULL;
@@ -8677,21 +8787,28 @@ __flow_hw_pattern_validate(struct rte_eth_dev *dev,
 				    MLX5_FLOW_LAYER_OUTER_VLAN;
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV4:
-			tunnel |= mlx5_hw_flow_tunnel_ip_check(last_item,
-							       item_flags);
+			ret = mlx5_hw_flow_tunnel_ip_check(last_item, last_l3_item, item,
+							   item_flags, error);
+			if (ret < 0)
+				return ret;
+			tunnel |= (ret > 0);
 			ret = mlx5_flow_dv_validate_item_ipv4(dev, item,
-							      *item_flags,
-							      last_item, 0,
-							      &hws_nic_ipv4_mask,
-							      error);
+							*item_flags,
+							last_item, 0,
+							&hws_nic_ipv4_mask,
+							error);
 			if (ret)
 				return ret;
 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
-				    MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+				MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			last_l3_item = item;
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV6:
-			tunnel |= mlx5_hw_flow_tunnel_ip_check(last_item,
-							       item_flags);
+			ret = mlx5_hw_flow_tunnel_ip_check(last_item, last_l3_item, item,
+							item_flags, error);
+			if (ret < 0)
+				return ret;
+			tunnel |= (ret > 0);
 			ret = mlx5_flow_validate_item_ipv6(dev, item,
 							   *item_flags,
 							   last_item, 0,
@@ -8701,6 +8818,7 @@ __flow_hw_pattern_validate(struct rte_eth_dev *dev,
 				return ret;
 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
 				    MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			last_l3_item = item;
 			break;
 		case RTE_FLOW_ITEM_TYPE_UDP:
 			ret = mlx5_flow_validate_item_udp(dev, item,
-- 
2.43.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] net/mlx5: fix IP tunnel detection for IPIP and IPv6 ENCAP
  2025-11-19 10:35 Junfeng Guo
@ 2025-11-19 11:12 ` Dariusz Sosnowski
  0 siblings, 0 replies; 3+ messages in thread
From: Dariusz Sosnowski @ 2025-11-19 11:12 UTC (permalink / raw)
  To: Junfeng Guo
  Cc: dev, stable, viacheslavo, bingz, orika, suanmingm, matan, rasland

On Wed, Nov 19, 2025 at 12:35:54PM +0200, Junfeng Guo wrote:
> The HWS (Hardware Steering) IP tunnel detection implementation was not
> recognizing IPIP tunnel types based on the next protocol value in the
> outer IP header as specified by RFC 2003 (IP-in-IP) and RFC 2473
> (IPv6 Encapsulation).
> 
> The issue was that the code treated all combinations of [IPv4|6] / [IPv4|6]
> as IPIP tunnels, setting MLX5_FLOW_LAYER_IPIP flag regardless of the actual
> protocol. While this happened to work for simple cases due to the fallback
> classification, it violated RFC compliance and could cause issues with
> protocol validation.
> 
> The fix adds RFC-compliant tunnel detection by
> 1. Checking the protocol field (IPPROTO_IPIP=4 or IPPROTO_IPV6=41) in the
>    outer IP header to determine tunnel type
> 2. Adding protocol mismatch validation to ensure outer protocol matches
>    inner header type
> 3. Correctly setting MLX5_FLOW_LAYER_IPV6_ENCAP for IPv6 encapsulation
> 
> The expected tunnel patterns are now:
> - [ipv4 | ipv6] proto is 4 / ipv4 -> MLX5_FLOW_LAYER_IPIP
> - [ipv4 | ipv6] proto is 41 / ipv6 -> MLX5_FLOW_LAYER_IPV6_ENCAP
> 
> For cases without protocol specification, fallback classification based on
> inner header type is still supported.
> 
> Fixes: f66c7c3ab983 ("net/mlx5/hws: recognize IP-in-IP tunnel in definer layer")
> Fixes: 4b7044562f59 ("net/mlx5: support IP-in-IP tunnel for all combinations")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Junfeng Guo <junfengg@nvidia.com>

Acked-by: Dariusz Sosnowski <dsosnowski@nvidia.com>

Best regards,
Dariusz Sosnowski

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] net/mlx5: fix IP tunnel detection for IPIP and IPv6 ENCAP
@ 2025-11-19 10:35 Junfeng Guo
  2025-11-19 11:12 ` Dariusz Sosnowski
  0 siblings, 1 reply; 3+ messages in thread
From: Junfeng Guo @ 2025-11-19 10:35 UTC (permalink / raw)
  To: dev, stable
  Cc: dsosnowski, viacheslavo, bingz, orika, suanmingm, matan, rasland,
	junfengg

The HWS (Hardware Steering) IP tunnel detection implementation was not
recognizing IPIP tunnel types based on the next protocol value in the
outer IP header as specified by RFC 2003 (IP-in-IP) and RFC 2473
(IPv6 Encapsulation).

The issue was that the code treated all combinations of [IPv4|6] / [IPv4|6]
as IPIP tunnels, setting MLX5_FLOW_LAYER_IPIP flag regardless of the actual
protocol. While this happened to work for simple cases due to the fallback
classification, it violated RFC compliance and could cause issues with
protocol validation.

The fix adds RFC-compliant tunnel detection by
1. Checking the protocol field (IPPROTO_IPIP=4 or IPPROTO_IPV6=41) in the
   outer IP header to determine tunnel type
2. Adding protocol mismatch validation to ensure outer protocol matches
   inner header type
3. Correctly setting MLX5_FLOW_LAYER_IPV6_ENCAP for IPv6 encapsulation

The expected tunnel patterns are now:
- [ipv4 | ipv6] proto is 4 / ipv4 -> MLX5_FLOW_LAYER_IPIP
- [ipv4 | ipv6] proto is 41 / ipv6 -> MLX5_FLOW_LAYER_IPV6_ENCAP

For cases without protocol specification, fallback classification based on
inner header type is still supported.

Fixes: f66c7c3ab983 ("net/mlx5/hws: recognize IP-in-IP tunnel in definer layer")
Fixes: 4b7044562f59 ("net/mlx5: support IP-in-IP tunnel for all combinations")
Cc: stable@dpdk.org

Signed-off-by: Junfeng Guo <junfengg@nvidia.com>
---
 drivers/net/mlx5/hws/mlx5dr_definer.c |   4 +-
 drivers/net/mlx5/mlx5_flow.c          |   5 -
 drivers/net/mlx5/mlx5_flow_hw.c       | 162 ++++++++++++++++++++++----
 3 files changed, 143 insertions(+), 28 deletions(-)

diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c b/drivers/net/mlx5/hws/mlx5dr_definer.c
index afa70bf793..03d2794367 100644
--- a/drivers/net/mlx5/hws/mlx5dr_definer.c
+++ b/drivers/net/mlx5/hws/mlx5dr_definer.c
@@ -3381,6 +3381,7 @@ mlx5dr_definer_conv_items_to_hl(struct mlx5dr_context *ctx,
 			if (cd.last_item == RTE_FLOW_ITEM_TYPE_IPV4 ||
 			    cd.last_item == RTE_FLOW_ITEM_TYPE_IPV6) {
 				cd.tunnel = true;
+				/* [IPv4 | IPv6] / IPv4: IPIP */
 				item_flags |= MLX5_FLOW_LAYER_IPIP;
 			}
 			ret = mlx5dr_definer_conv_item_ipv4(&cd, items, i);
@@ -3391,7 +3392,8 @@ mlx5dr_definer_conv_items_to_hl(struct mlx5dr_context *ctx,
 			if (cd.last_item == RTE_FLOW_ITEM_TYPE_IPV4 ||
 			    cd.last_item == RTE_FLOW_ITEM_TYPE_IPV6) {
 				cd.tunnel = true;
-				item_flags |= MLX5_FLOW_LAYER_IPIP;
+				/* [IPv4 | IPv6] / IPv6: IPV6_ENCAP */
+				item_flags |= MLX5_FLOW_LAYER_IPV6_ENCAP;
 			}
 			ret = mlx5dr_definer_conv_item_ipv6(&cd, items, i);
 			item_flags |= cd.tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 2c48f1b01b..0821c93a9a 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -2892,11 +2892,6 @@ mlx5_flow_validate_item_ipv4(const struct rte_eth_dev *dev,
 						  "multiple tunnel "
 						  "not supported");
 	}
-	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
-		return rte_flow_error_set(error, EINVAL,
-					  RTE_FLOW_ERROR_TYPE_ITEM, item,
-					  "wrong tunnel type - IPv6 specified "
-					  "but IPv4 item provided");
 	if (item_flags & l3m)
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index c41b99746f..75ac3bce28 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -8384,23 +8384,132 @@ mlx5_hw_validate_item_nsh(struct rte_eth_dev *dev,
 	return mlx5_flow_validate_item_nsh(dev, item, error);
 }
 
-static bool
-mlx5_hw_flow_tunnel_ip_check(uint64_t last_item, uint64_t *item_flags)
+static inline uint8_t
+mlx5_hw_flow_get_next_protocol(const struct rte_flow_item *item)
+{
+	if (!item || !item->spec)
+		return 0xff;
+
+	switch (item->type) {
+	case RTE_FLOW_ITEM_TYPE_IPV4: {
+		const struct rte_flow_item_ipv4 *spec = item->spec;
+		const struct rte_flow_item_ipv4 *mask = item->mask;
+
+		/* If mask is NULL or next_proto_id field in mask is 0,
+		 * then next_protocol in spec should not be read
+		 */
+		if (!mask || mask->hdr.next_proto_id == 0)
+			return 0xff;
+
+		return spec->hdr.next_proto_id & mask->hdr.next_proto_id;
+	}
+	case RTE_FLOW_ITEM_TYPE_IPV6: {
+		const struct rte_flow_item_ipv6 *spec = item->spec;
+		const struct rte_flow_item_ipv6 *mask = item->mask;
+
+		/* If mask is NULL or proto field in mask is 0,
+		 * then proto in spec should not be read
+		 */
+		if (!mask || mask->hdr.proto == 0)
+			return 0xff;
+
+		return spec->hdr.proto & mask->hdr.proto;
+	}
+	case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT: {
+		const struct rte_flow_item_ipv6_frag_ext *spec = item->spec;
+		const struct rte_flow_item_ipv6_frag_ext *mask = item->mask;
+
+		/* If mask is NULL or next_header field in mask is 0,
+		 * then next_header in spec should not be read
+		 */
+		if (!mask || mask->hdr.next_header == 0)
+			return 0xff;
+
+		return spec->hdr.next_header & mask->hdr.next_header;
+	}
+	default:
+		return 0xff;
+	}
+}
+
+static int
+mlx5_hw_flow_tunnel_ip_check(uint64_t last_item,
+			     const struct rte_flow_item *last_l3_item,
+			     const struct rte_flow_item *item,
+			     uint64_t *item_flags,
+			     struct rte_flow_error *error)
 {
-	bool tunnel;
+	uint64_t tunnel_flag = 0;
+	uint8_t outer_protocol;
+
+	/* IP tunnel detection - only single-level tunneling supported */
+	if (last_l3_item && (last_item == MLX5_FLOW_LAYER_OUTER_L3_IPV4 ||
+			     last_item == MLX5_FLOW_LAYER_OUTER_L3_IPV6)) {
+		/*
+		 * Tunnel type determination strategy:
+		 * 1. If previous L3 item has protocol field specified, use it (RFC compliant)
+		 * 2. Otherwise, fall back to inner header type (what's being encapsulated)
+		 */
+		outer_protocol = mlx5_hw_flow_get_next_protocol(last_l3_item);
+
+		if (outer_protocol != 0xff) {
+			/* Proto field specified in outer hdr mask - use RFC-compliant detection */
+			switch (outer_protocol) {
+			case IPPROTO_IPIP:  /* 4 - IP-in-IP */
+				/* Outer header indicates IPv4 payload */
+				if (item->type == RTE_FLOW_ITEM_TYPE_IPV6)
+					return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ITEM, item,
+						"protocol mismatch: outer proto is IPIP but inner is IPv6");
+				tunnel_flag = MLX5_FLOW_LAYER_IPIP;
+				break;
+			case IPPROTO_IPV6:  /* 41 - IPv6-in-IP */
+				/* Outer header indicates IPv6 payload */
+				if (item->type == RTE_FLOW_ITEM_TYPE_IPV4)
+					return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ITEM, item,
+						"protocol mismatch: outer proto is IPV6 but inner is IPv4");
+				tunnel_flag = MLX5_FLOW_LAYER_IPV6_ENCAP;
+				break;
+			default:
+				/* Unknown/unsupported protocol, fall back to inner header type */
+				goto fallback_classification;
+			}
+		} else {
+fallback_classification:
+			/*
+			 * Protocol field not specified or unknown - classify based on
+			 * what is being encapsulated (inner header type)
+			 */
+			if (item->type == RTE_FLOW_ITEM_TYPE_IPV4)
+				tunnel_flag = MLX5_FLOW_LAYER_IPIP;
+			else if (item->type == RTE_FLOW_ITEM_TYPE_IPV6)
+				tunnel_flag = MLX5_FLOW_LAYER_IPV6_ENCAP;
+			else
+				return 0; /* Not an IP item - shouldn't happen, but be defensive */
+		}
+
+		/* Check for unsupported nested tunneling after tunnel is detected */
+		if (*item_flags & MLX5_FLOW_LAYER_TUNNEL)
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM, item,
+						  "multiple tunnel layers not supported");
+
+		*item_flags |= tunnel_flag;
+		return 1; /* Tunnel detected */
+	} else if (last_item == MLX5_FLOW_ITEM_OUTER_IPV6_ROUTING_EXT) {
+		/* Special case: IPv6 routing extension header */
+		/* Check for unsupported nested tunneling */
+		if (*item_flags & MLX5_FLOW_LAYER_TUNNEL)
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM, item,
+						  "multiple tunnel layers not supported");
 
-	if (last_item == MLX5_FLOW_LAYER_OUTER_L3_IPV4 ||
-	    last_item == MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
-		tunnel = true;
-		*item_flags |= MLX5_FLOW_LAYER_IPIP;
-	} else if (last_item == MLX5_FLOW_LAYER_OUTER_L3_IPV6 ||
-		   last_item == MLX5_FLOW_ITEM_OUTER_IPV6_ROUTING_EXT) {
-		tunnel = true;
 		*item_flags |= MLX5_FLOW_LAYER_IPV6_ENCAP;
-	} else {
-		tunnel = false;
+		return 1; /* Tunnel detected */
 	}
-	return tunnel;
+
+	return 0; /* No tunnel */
 }
 
 const struct rte_flow_item_ipv4 hws_nic_ipv4_mask = {
@@ -8475,6 +8584,7 @@ __flow_hw_pattern_validate(struct rte_eth_dev *dev,
 			 struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
+	const struct rte_flow_item *last_l3_item = NULL;
 	const struct rte_flow_item *item;
 	const struct rte_flow_item *gtp_item = NULL;
 	const struct rte_flow_item *gre_item = NULL;
@@ -8677,21 +8787,28 @@ __flow_hw_pattern_validate(struct rte_eth_dev *dev,
 				    MLX5_FLOW_LAYER_OUTER_VLAN;
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV4:
-			tunnel |= mlx5_hw_flow_tunnel_ip_check(last_item,
-							       item_flags);
+			ret = mlx5_hw_flow_tunnel_ip_check(last_item, last_l3_item, item,
+							   item_flags, error);
+			if (ret < 0)
+				return ret;
+			tunnel |= (ret > 0);
 			ret = mlx5_flow_dv_validate_item_ipv4(dev, item,
-							      *item_flags,
-							      last_item, 0,
-							      &hws_nic_ipv4_mask,
-							      error);
+							*item_flags,
+							last_item, 0,
+							&hws_nic_ipv4_mask,
+							error);
 			if (ret)
 				return ret;
 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
-				    MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+				MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			last_l3_item = item;
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV6:
-			tunnel |= mlx5_hw_flow_tunnel_ip_check(last_item,
-							       item_flags);
+			ret = mlx5_hw_flow_tunnel_ip_check(last_item, last_l3_item, item,
+							item_flags, error);
+			if (ret < 0)
+				return ret;
+			tunnel |= (ret > 0);
 			ret = mlx5_flow_validate_item_ipv6(dev, item,
 							   *item_flags,
 							   last_item, 0,
@@ -8701,6 +8818,7 @@ __flow_hw_pattern_validate(struct rte_eth_dev *dev,
 				return ret;
 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
 				    MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			last_l3_item = item;
 			break;
 		case RTE_FLOW_ITEM_TYPE_UDP:
 			ret = mlx5_flow_validate_item_udp(dev, item,
-- 
2.43.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-11-19 11:12 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-11-19 10:28 [PATCH] net/mlx5: fix IP tunnel detection for IPIP and IPv6 ENCAP Junfeng Guo
2025-11-19 10:35 Junfeng Guo
2025-11-19 11:12 ` Dariusz Sosnowski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).