patches for DPDK stable branches
 help / color / mirror / Atom feed
From: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
To: shahafs@mellanox.com
Cc: dev@dpdk.org, stable@dpdk.org
Subject: [dpdk-stable] [PATCH 3/5] net/mlx5: add tunnel inner items support on E-Switch
Date: Thu, 27 Dec 2018 15:34:43 +0000	[thread overview]
Message-ID: <1545924885-6215-4-git-send-email-viacheslavo@mellanox.com> (raw)
In-Reply-To: <1545924885-6215-1-git-send-email-viacheslavo@mellanox.com>

This patch updates the translation routine for the E-Switch Flows.
Inner tunnel pattern items are translated into Netlink message,
support for tunnel inner IP addresses (v4 or v6), IP protocol,
and TCP and UDP ports is added.

We are going to support Flows matching with outer tunnel items
and not containing the explicit tunnel decap action (this one
might be drop, redirect or table jump, for exapmle).
So we can not rely on presence of tunnel decap action in the
list to decide whether the Flow is for tunnel, instead we will
use the presence of tunnel item. Item translation is rebound
to presence of tunnel items, instead of relying on decap action.

There is no way to tell kernel driver the outer address type
(IPv4 or IPv6) but specify the address flower key. The outer
address key is put on Netlink with zero mask if there is no
RTE item is specified in the list.

Cc: stable@dpdk.org

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow_tcf.c | 174 ++++++++++++++++++++++++++++-----------
 1 file changed, 125 insertions(+), 49 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 5fc50c2..688422d 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -463,7 +463,9 @@ struct flow_tcf_stats_basic {
 	struct rte_flow_item_tcp tcp;
 	struct rte_flow_item_udp udp;
 	struct rte_flow_item_vxlan vxlan;
-} flow_tcf_mask_empty;
+} flow_tcf_mask_empty = {
+	{0},
+};
 
 /** Supported masks for known item types. */
 static const struct {
@@ -2292,13 +2294,16 @@ struct pedit_parser {
  *   Pointer to the flow attributes.
  * @param[in] items
  *   Pointer to the list of items.
+ * @param[out] action_flags
+ *   Pointer to the detected actions.
  *
  * @return
  *   Maximum size of memory for items.
  */
 static int
 flow_tcf_get_items_size(const struct rte_flow_attr *attr,
-			const struct rte_flow_item items[])
+			const struct rte_flow_item items[],
+			uint64_t *action_flags)
 {
 	int size = 0;
 
@@ -2349,6 +2354,16 @@ struct pedit_parser {
 			break;
 		case RTE_FLOW_ITEM_TYPE_VXLAN:
 			size += SZ_NLATTR_TYPE_OF(uint32_t);
+			/*
+			 * There might be no VXLAN decap action in the action
+			 * list, nonetheless the VXLAN tunnel flow requires
+			 * the decap structure to be correctly applied to
+			 * VXLAN device, set the flag to create the structure.
+			 * Translation routine will not put the decap action
+			 * in tne Netlink message if there is no actual action
+			 * in the list.
+			 */
+			*action_flags |= MLX5_FLOW_ACTION_VXLAN_DECAP;
 			break;
 		default:
 			DRV_LOG(WARNING,
@@ -2597,7 +2612,7 @@ struct pedit_parser {
 	struct tcmsg *tcm;
 	uint8_t *sp, *tun = NULL;
 
-	size += flow_tcf_get_items_size(attr, items);
+	size += flow_tcf_get_items_size(attr, items, &action_flags);
 	size += flow_tcf_get_actions_and_size(actions, &action_flags);
 	dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
 	if (!dev_flow) {
@@ -3001,6 +3016,7 @@ struct pedit_parser {
 	bool vlan_present = 0;
 	bool vlan_eth_type_set = 0;
 	bool ip_proto_set = 0;
+	bool tunnel_outer = 0;
 	struct nlattr *na_flower;
 	struct nlattr *na_flower_act;
 	struct nlattr *na_vlan_id = NULL;
@@ -3014,6 +3030,7 @@ struct pedit_parser {
 		switch (dev_flow->tcf.tunnel->type) {
 		case FLOW_TCF_TUNACT_VXLAN_DECAP:
 			decap.vxlan = dev_flow->tcf.vxlan_decap;
+			tunnel_outer = 1;
 			break;
 		case FLOW_TCF_TUNACT_VXLAN_ENCAP:
 			encap.vxlan = dev_flow->tcf.vxlan_encap;
@@ -3068,7 +3085,7 @@ struct pedit_parser {
 			tcm->tcm_ifindex = ptoi[i].ifindex;
 			break;
 		case RTE_FLOW_ITEM_TYPE_ETH:
-			item_flags |= (item_flags & MLX5_FLOW_LAYER_VXLAN) ?
+			item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
 				      MLX5_FLOW_LAYER_INNER_L2 :
 				      MLX5_FLOW_LAYER_OUTER_L2;
 			mask.eth = flow_tcf_item_mask
@@ -3081,12 +3098,11 @@ struct pedit_parser {
 			if (mask.eth == &flow_tcf_mask_empty.eth)
 				break;
 			spec.eth = items->spec;
-			if (decap.vxlan &&
-			    !(item_flags & MLX5_FLOW_LAYER_VXLAN)) {
+			if (tunnel_outer) {
 				DRV_LOG(WARNING,
-					"outer L2 addresses cannot be forced"
-					" for vxlan decapsulation, parameter"
-					" ignored");
+					"outer L2 addresses cannot be"
+					" forced is outer ones for tunnel,"
+					" parameter is ignored");
 				break;
 			}
 			if (mask.eth->type) {
@@ -3115,6 +3131,7 @@ struct pedit_parser {
 		case RTE_FLOW_ITEM_TYPE_VLAN:
 			assert(!encap.hdr);
 			assert(!decap.hdr);
+			assert(!tunnel_outer);
 			item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
 			mask.vlan = flow_tcf_item_mask
 				(items, &rte_flow_item_vlan_mask,
@@ -3149,7 +3166,9 @@ struct pedit_parser {
 			assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV4:
-			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
+				      MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+				      MLX5_FLOW_LAYER_OUTER_L3_IPV4;
 			mask.ipv4 = flow_tcf_item_mask
 				(items, &rte_flow_item_ipv4_mask,
 				 &flow_tcf_mask_supported.ipv4,
@@ -3158,7 +3177,7 @@ struct pedit_parser {
 				 error);
 			assert(mask.ipv4);
 			spec.ipv4 = items->spec;
-			if (!decap.vxlan) {
+			if (!tunnel_outer) {
 				if (!eth_type_set ||
 				    (!vlan_eth_type_set && vlan_present))
 					mnl_attr_put_u16
@@ -3169,45 +3188,70 @@ struct pedit_parser {
 						 RTE_BE16(ETH_P_IP));
 				eth_type_set = 1;
 				vlan_eth_type_set = 1;
-				if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
+			}
+			if (!tunnel_outer && mask.ipv4->hdr.next_proto_id) {
+				/*
+				 * No way to set IP protocol for outer tunnel
+				 * layers. Usually it is fixed, for example,
+				 * to UDP for VXLAN/GPE.
+				 */
+				assert(spec.ipv4); /* Mask is not empty. */
+				mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+						spec.ipv4->hdr.next_proto_id);
+				ip_proto_set = 1;
+			}
+			if (mask.ipv4 == &flow_tcf_mask_empty.ipv4 ||
+			     (!mask.ipv4->hdr.src_addr &&
+			      !mask.ipv4->hdr.dst_addr)) {
+				if (!tunnel_outer)
 					break;
-				if (mask.ipv4->hdr.next_proto_id) {
-					mnl_attr_put_u8
-						(nlh, TCA_FLOWER_KEY_IP_PROTO,
-						 spec.ipv4->hdr.next_proto_id);
-					ip_proto_set = 1;
-				}
-			} else {
-				assert(mask.ipv4 != &flow_tcf_mask_empty.ipv4);
+				/*
+				 * For tunnel outer we must set outer IP key
+				 * anyway, even if the specification/mask is
+				 * empty. There is no another way to tell
+				 * kernel about he outer layer protocol.
+				 */
+				mnl_attr_put_u32
+					(nlh, TCA_FLOWER_KEY_ENC_IPV4_SRC,
+					 mask.ipv4->hdr.src_addr);
+				mnl_attr_put_u32
+					(nlh, TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
+					 mask.ipv4->hdr.src_addr);
+				assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
+				break;
 			}
 			if (mask.ipv4->hdr.src_addr) {
 				mnl_attr_put_u32
-					(nlh, decap.vxlan ?
+					(nlh, tunnel_outer ?
 					 TCA_FLOWER_KEY_ENC_IPV4_SRC :
 					 TCA_FLOWER_KEY_IPV4_SRC,
 					 spec.ipv4->hdr.src_addr);
 				mnl_attr_put_u32
-					(nlh, decap.vxlan ?
+					(nlh, tunnel_outer ?
 					 TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK :
 					 TCA_FLOWER_KEY_IPV4_SRC_MASK,
 					 mask.ipv4->hdr.src_addr);
 			}
 			if (mask.ipv4->hdr.dst_addr) {
 				mnl_attr_put_u32
-					(nlh, decap.vxlan ?
+					(nlh, tunnel_outer ?
 					 TCA_FLOWER_KEY_ENC_IPV4_DST :
 					 TCA_FLOWER_KEY_IPV4_DST,
 					 spec.ipv4->hdr.dst_addr);
 				mnl_attr_put_u32
-					(nlh, decap.vxlan ?
+					(nlh, tunnel_outer ?
 					 TCA_FLOWER_KEY_ENC_IPV4_DST_MASK :
 					 TCA_FLOWER_KEY_IPV4_DST_MASK,
 					 mask.ipv4->hdr.dst_addr);
 			}
 			assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
 			break;
-		case RTE_FLOW_ITEM_TYPE_IPV6:
-			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+		case RTE_FLOW_ITEM_TYPE_IPV6: {
+			bool ipv6_src, ipv6_dst;
+
+			item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
+				      MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+				      MLX5_FLOW_LAYER_OUTER_L3_IPV6;
 			mask.ipv6 = flow_tcf_item_mask
 				(items, &rte_flow_item_ipv6_mask,
 				 &flow_tcf_mask_supported.ipv6,
@@ -3216,7 +3260,7 @@ struct pedit_parser {
 				 error);
 			assert(mask.ipv6);
 			spec.ipv6 = items->spec;
-			if (!decap.vxlan) {
+			if (!tunnel_outer) {
 				if (!eth_type_set ||
 				    (!vlan_eth_type_set && vlan_present))
 					mnl_attr_put_u16
@@ -3227,36 +3271,62 @@ struct pedit_parser {
 						 RTE_BE16(ETH_P_IPV6));
 				eth_type_set = 1;
 				vlan_eth_type_set = 1;
-				if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
+			}
+			if (!tunnel_outer && mask.ipv6->hdr.proto) {
+				/*
+				 * No way to set IP protocol for outer tunnel
+				 * layers. Usually it is fixed, for example,
+				 * to UDP for VXLAN/GPE.
+				 */
+				assert(spec.ipv6); /* Mask is not empty. */
+				mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+						spec.ipv6->hdr.proto);
+				ip_proto_set = 1;
+			}
+			ipv6_dst = !IN6_IS_ADDR_UNSPECIFIED
+						(mask.ipv6->hdr.dst_addr);
+			ipv6_src = !IN6_IS_ADDR_UNSPECIFIED
+						(mask.ipv6->hdr.src_addr);
+			if (mask.ipv6 == &flow_tcf_mask_empty.ipv6 ||
+			     (!ipv6_dst && !ipv6_src)) {
+				if (!tunnel_outer)
 					break;
-				if (mask.ipv6->hdr.proto) {
-					mnl_attr_put_u8
-						(nlh, TCA_FLOWER_KEY_IP_PROTO,
-						 spec.ipv6->hdr.proto);
-					ip_proto_set = 1;
-				}
-			} else {
-				assert(mask.ipv6 != &flow_tcf_mask_empty.ipv6);
+				/*
+				 * For tunnel outer we must set outer IP key
+				 * anyway, even if the specification/mask is
+				 * empty. There is no another way to tell
+				 * kernel about he outer layer protocol.
+				 */
+				mnl_attr_put(nlh,
+					     TCA_FLOWER_KEY_ENC_IPV6_SRC,
+					     IPV6_ADDR_LEN,
+					     mask.ipv6->hdr.src_addr);
+				mnl_attr_put(nlh,
+					     TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
+					     IPV6_ADDR_LEN,
+					     mask.ipv6->hdr.src_addr);
+				assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
+				break;
 			}
-			if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
-				mnl_attr_put(nlh, decap.vxlan ?
+			if (ipv6_src) {
+				mnl_attr_put(nlh, tunnel_outer ?
 					     TCA_FLOWER_KEY_ENC_IPV6_SRC :
 					     TCA_FLOWER_KEY_IPV6_SRC,
 					     IPV6_ADDR_LEN,
 					     spec.ipv6->hdr.src_addr);
-				mnl_attr_put(nlh, decap.vxlan ?
+				mnl_attr_put(nlh, tunnel_outer ?
 					     TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK :
 					     TCA_FLOWER_KEY_IPV6_SRC_MASK,
 					     IPV6_ADDR_LEN,
 					     mask.ipv6->hdr.src_addr);
 			}
-			if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
-				mnl_attr_put(nlh, decap.vxlan ?
+			if (ipv6_dst) {
+				mnl_attr_put(nlh, tunnel_outer ?
 					     TCA_FLOWER_KEY_ENC_IPV6_DST :
 					     TCA_FLOWER_KEY_IPV6_DST,
 					     IPV6_ADDR_LEN,
 					     spec.ipv6->hdr.dst_addr);
-				mnl_attr_put(nlh, decap.vxlan ?
+				mnl_attr_put(nlh, tunnel_outer ?
 					     TCA_FLOWER_KEY_ENC_IPV6_DST_MASK :
 					     TCA_FLOWER_KEY_IPV6_DST_MASK,
 					     IPV6_ADDR_LEN,
@@ -3264,8 +3334,11 @@ struct pedit_parser {
 			}
 			assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
 			break;
+		}
 		case RTE_FLOW_ITEM_TYPE_UDP:
-			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+			item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
+				      MLX5_FLOW_LAYER_INNER_L4_UDP :
+				      MLX5_FLOW_LAYER_OUTER_L4_UDP;
 			mask.udp = flow_tcf_item_mask
 				(items, &rte_flow_item_udp_mask,
 				 &flow_tcf_mask_supported.udp,
@@ -3274,7 +3347,7 @@ struct pedit_parser {
 				 error);
 			assert(mask.udp);
 			spec.udp = items->spec;
-			if (!decap.vxlan) {
+			if (!tunnel_outer) {
 				if (!ip_proto_set)
 					mnl_attr_put_u8
 						(nlh, TCA_FLOWER_KEY_IP_PROTO,
@@ -3289,24 +3362,24 @@ struct pedit_parser {
 			}
 			if (mask.udp->hdr.src_port) {
 				mnl_attr_put_u16
-					(nlh, decap.vxlan ?
+					(nlh, tunnel_outer ?
 					 TCA_FLOWER_KEY_ENC_UDP_SRC_PORT :
 					 TCA_FLOWER_KEY_UDP_SRC,
 					 spec.udp->hdr.src_port);
 				mnl_attr_put_u16
-					(nlh, decap.vxlan ?
+					(nlh, tunnel_outer ?
 					 TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK :
 					 TCA_FLOWER_KEY_UDP_SRC_MASK,
 					 mask.udp->hdr.src_port);
 			}
 			if (mask.udp->hdr.dst_port) {
 				mnl_attr_put_u16
-					(nlh, decap.vxlan ?
+					(nlh, tunnel_outer ?
 					 TCA_FLOWER_KEY_ENC_UDP_DST_PORT :
 					 TCA_FLOWER_KEY_UDP_DST,
 					 spec.udp->hdr.dst_port);
 				mnl_attr_put_u16
-					(nlh, decap.vxlan ?
+					(nlh, tunnel_outer ?
 					 TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK :
 					 TCA_FLOWER_KEY_UDP_DST_MASK,
 					 mask.udp->hdr.dst_port);
@@ -3314,7 +3387,9 @@ struct pedit_parser {
 			assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
 			break;
 		case RTE_FLOW_ITEM_TYPE_TCP:
-			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+			item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
+				      MLX5_FLOW_LAYER_INNER_L4_TCP :
+				      MLX5_FLOW_LAYER_OUTER_L4_TCP;
 			mask.tcp = flow_tcf_item_mask
 				(items, &rte_flow_item_tcp_mask,
 				 &flow_tcf_mask_supported.tcp,
@@ -3358,6 +3433,7 @@ struct pedit_parser {
 			break;
 		case RTE_FLOW_ITEM_TYPE_VXLAN:
 			assert(decap.vxlan);
+			tunnel_outer = 0;
 			item_flags |= MLX5_FLOW_LAYER_VXLAN;
 			spec.vxlan = items->spec;
 			mnl_attr_put_u32(nlh,
-- 
1.8.3.1

  parent reply	other threads:[~2018-12-27 15:35 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-27 15:34 [dpdk-stable] [PATCH 0/5] net/mlx5: add inner tunnel " Viacheslav Ovsiienko
2018-12-27 15:34 ` [dpdk-stable] [PATCH 1/5] net/mlx5: remove checks for outer tunnel items " Viacheslav Ovsiienko
2018-12-27 15:34 ` [dpdk-stable] [PATCH 2/5] net/mlx5: add tunnel inner items validation " Viacheslav Ovsiienko
2018-12-27 15:34 ` Viacheslav Ovsiienko [this message]
2018-12-27 15:34 ` [dpdk-stable] [PATCH 4/5] net/mlx5: add ethernet type " Viacheslav Ovsiienko
2018-12-27 15:34 ` [dpdk-stable] [PATCH 5/5] net/mlx5: add ethernet type support for tunnels " Viacheslav Ovsiienko
2019-01-13 12:11 ` [dpdk-stable] [dpdk-dev] [PATCH 0/5] net/mlx5: add inner tunnel items support " Shahaf Shuler
2019-01-31 14:52 ` Kevin Traynor
2019-01-31 16:13   ` Slava Ovsiienko
2019-02-06 11:00     ` Kevin Traynor
2019-02-06 11:56       ` Slava Ovsiienko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1545924885-6215-4-git-send-email-viacheslavo@mellanox.com \
    --to=viacheslavo@mellanox.com \
    --cc=dev@dpdk.org \
    --cc=shahafs@mellanox.com \
    --cc=stable@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).