From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id 3E3401B105 for ; Thu, 27 Dec 2018 16:35:01 +0100 (CET) Received: from Internal Mail-Server by MTLPINE1 (envelope-from viacheslavo@mellanox.com) with ESMTPS (AES256-SHA encrypted); 27 Dec 2018 17:34:56 +0200 Received: from pegasus12.mtr.labs.mlnx. (pegasus12.mtr.labs.mlnx [10.210.17.40]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBRFYuV2005767; Thu, 27 Dec 2018 17:34:56 +0200 From: Viacheslav Ovsiienko To: shahafs@mellanox.com Cc: dev@dpdk.org, stable@dpdk.org Date: Thu, 27 Dec 2018 15:34:43 +0000 Message-Id: <1545924885-6215-4-git-send-email-viacheslavo@mellanox.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1545924885-6215-1-git-send-email-viacheslavo@mellanox.com> References: <1545924885-6215-1-git-send-email-viacheslavo@mellanox.com> Subject: [dpdk-dev] [PATCH 3/5] net/mlx5: add tunnel inner items support on E-Switch X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 27 Dec 2018 15:35:02 -0000 This patch updates the translation routine for the E-Switch Flows. Inner tunnel pattern items are translated into Netlink message, support for tunnel inner IP addresses (v4 or v6), IP protocol, and TCP and UDP ports is added. We are going to support Flows matching with outer tunnel items and not containing the explicit tunnel decap action (this one might be drop, redirect or table jump, for exapmle). So we can not rely on presence of tunnel decap action in the list to decide whether the Flow is for tunnel, instead we will use the presence of tunnel item. Item translation is rebound to presence of tunnel items, instead of relying on decap action. There is no way to tell kernel driver the outer address type (IPv4 or IPv6) but specify the address flower key. The outer address key is put on Netlink with zero mask if there is no RTE item is specified in the list. Cc: stable@dpdk.org Signed-off-by: Viacheslav Ovsiienko --- drivers/net/mlx5/mlx5_flow_tcf.c | 174 ++++++++++++++++++++++++++++----------- 1 file changed, 125 insertions(+), 49 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c index 5fc50c2..688422d 100644 --- a/drivers/net/mlx5/mlx5_flow_tcf.c +++ b/drivers/net/mlx5/mlx5_flow_tcf.c @@ -463,7 +463,9 @@ struct flow_tcf_stats_basic { struct rte_flow_item_tcp tcp; struct rte_flow_item_udp udp; struct rte_flow_item_vxlan vxlan; -} flow_tcf_mask_empty; +} flow_tcf_mask_empty = { + {0}, +}; /** Supported masks for known item types. */ static const struct { @@ -2292,13 +2294,16 @@ struct pedit_parser { * Pointer to the flow attributes. * @param[in] items * Pointer to the list of items. + * @param[out] action_flags + * Pointer to the detected actions. * * @return * Maximum size of memory for items. */ static int flow_tcf_get_items_size(const struct rte_flow_attr *attr, - const struct rte_flow_item items[]) + const struct rte_flow_item items[], + uint64_t *action_flags) { int size = 0; @@ -2349,6 +2354,16 @@ struct pedit_parser { break; case RTE_FLOW_ITEM_TYPE_VXLAN: size += SZ_NLATTR_TYPE_OF(uint32_t); + /* + * There might be no VXLAN decap action in the action + * list, nonetheless the VXLAN tunnel flow requires + * the decap structure to be correctly applied to + * VXLAN device, set the flag to create the structure. + * Translation routine will not put the decap action + * in tne Netlink message if there is no actual action + * in the list. + */ + *action_flags |= MLX5_FLOW_ACTION_VXLAN_DECAP; break; default: DRV_LOG(WARNING, @@ -2597,7 +2612,7 @@ struct pedit_parser { struct tcmsg *tcm; uint8_t *sp, *tun = NULL; - size += flow_tcf_get_items_size(attr, items); + size += flow_tcf_get_items_size(attr, items, &action_flags); size += flow_tcf_get_actions_and_size(actions, &action_flags); dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO); if (!dev_flow) { @@ -3001,6 +3016,7 @@ struct pedit_parser { bool vlan_present = 0; bool vlan_eth_type_set = 0; bool ip_proto_set = 0; + bool tunnel_outer = 0; struct nlattr *na_flower; struct nlattr *na_flower_act; struct nlattr *na_vlan_id = NULL; @@ -3014,6 +3030,7 @@ struct pedit_parser { switch (dev_flow->tcf.tunnel->type) { case FLOW_TCF_TUNACT_VXLAN_DECAP: decap.vxlan = dev_flow->tcf.vxlan_decap; + tunnel_outer = 1; break; case FLOW_TCF_TUNACT_VXLAN_ENCAP: encap.vxlan = dev_flow->tcf.vxlan_encap; @@ -3068,7 +3085,7 @@ struct pedit_parser { tcm->tcm_ifindex = ptoi[i].ifindex; break; case RTE_FLOW_ITEM_TYPE_ETH: - item_flags |= (item_flags & MLX5_FLOW_LAYER_VXLAN) ? + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? MLX5_FLOW_LAYER_INNER_L2 : MLX5_FLOW_LAYER_OUTER_L2; mask.eth = flow_tcf_item_mask @@ -3081,12 +3098,11 @@ struct pedit_parser { if (mask.eth == &flow_tcf_mask_empty.eth) break; spec.eth = items->spec; - if (decap.vxlan && - !(item_flags & MLX5_FLOW_LAYER_VXLAN)) { + if (tunnel_outer) { DRV_LOG(WARNING, - "outer L2 addresses cannot be forced" - " for vxlan decapsulation, parameter" - " ignored"); + "outer L2 addresses cannot be" + " forced is outer ones for tunnel," + " parameter is ignored"); break; } if (mask.eth->type) { @@ -3115,6 +3131,7 @@ struct pedit_parser { case RTE_FLOW_ITEM_TYPE_VLAN: assert(!encap.hdr); assert(!decap.hdr); + assert(!tunnel_outer); item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN; mask.vlan = flow_tcf_item_mask (items, &rte_flow_item_vlan_mask, @@ -3149,7 +3166,9 @@ struct pedit_parser { assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; case RTE_FLOW_ITEM_TYPE_IPV4: - item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? + MLX5_FLOW_LAYER_INNER_L3_IPV4 : + MLX5_FLOW_LAYER_OUTER_L3_IPV4; mask.ipv4 = flow_tcf_item_mask (items, &rte_flow_item_ipv4_mask, &flow_tcf_mask_supported.ipv4, @@ -3158,7 +3177,7 @@ struct pedit_parser { error); assert(mask.ipv4); spec.ipv4 = items->spec; - if (!decap.vxlan) { + if (!tunnel_outer) { if (!eth_type_set || (!vlan_eth_type_set && vlan_present)) mnl_attr_put_u16 @@ -3169,45 +3188,70 @@ struct pedit_parser { RTE_BE16(ETH_P_IP)); eth_type_set = 1; vlan_eth_type_set = 1; - if (mask.ipv4 == &flow_tcf_mask_empty.ipv4) + } + if (!tunnel_outer && mask.ipv4->hdr.next_proto_id) { + /* + * No way to set IP protocol for outer tunnel + * layers. Usually it is fixed, for example, + * to UDP for VXLAN/GPE. + */ + assert(spec.ipv4); /* Mask is not empty. */ + mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO, + spec.ipv4->hdr.next_proto_id); + ip_proto_set = 1; + } + if (mask.ipv4 == &flow_tcf_mask_empty.ipv4 || + (!mask.ipv4->hdr.src_addr && + !mask.ipv4->hdr.dst_addr)) { + if (!tunnel_outer) break; - if (mask.ipv4->hdr.next_proto_id) { - mnl_attr_put_u8 - (nlh, TCA_FLOWER_KEY_IP_PROTO, - spec.ipv4->hdr.next_proto_id); - ip_proto_set = 1; - } - } else { - assert(mask.ipv4 != &flow_tcf_mask_empty.ipv4); + /* + * For tunnel outer we must set outer IP key + * anyway, even if the specification/mask is + * empty. There is no another way to tell + * kernel about he outer layer protocol. + */ + mnl_attr_put_u32 + (nlh, TCA_FLOWER_KEY_ENC_IPV4_SRC, + mask.ipv4->hdr.src_addr); + mnl_attr_put_u32 + (nlh, TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, + mask.ipv4->hdr.src_addr); + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); + break; } if (mask.ipv4->hdr.src_addr) { mnl_attr_put_u32 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV4_SRC : TCA_FLOWER_KEY_IPV4_SRC, spec.ipv4->hdr.src_addr); mnl_attr_put_u32 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK : TCA_FLOWER_KEY_IPV4_SRC_MASK, mask.ipv4->hdr.src_addr); } if (mask.ipv4->hdr.dst_addr) { mnl_attr_put_u32 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV4_DST : TCA_FLOWER_KEY_IPV4_DST, spec.ipv4->hdr.dst_addr); mnl_attr_put_u32 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV4_DST_MASK : TCA_FLOWER_KEY_IPV4_DST_MASK, mask.ipv4->hdr.dst_addr); } assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; - case RTE_FLOW_ITEM_TYPE_IPV6: - item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; + case RTE_FLOW_ITEM_TYPE_IPV6: { + bool ipv6_src, ipv6_dst; + + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? + MLX5_FLOW_LAYER_INNER_L3_IPV6 : + MLX5_FLOW_LAYER_OUTER_L3_IPV6; mask.ipv6 = flow_tcf_item_mask (items, &rte_flow_item_ipv6_mask, &flow_tcf_mask_supported.ipv6, @@ -3216,7 +3260,7 @@ struct pedit_parser { error); assert(mask.ipv6); spec.ipv6 = items->spec; - if (!decap.vxlan) { + if (!tunnel_outer) { if (!eth_type_set || (!vlan_eth_type_set && vlan_present)) mnl_attr_put_u16 @@ -3227,36 +3271,62 @@ struct pedit_parser { RTE_BE16(ETH_P_IPV6)); eth_type_set = 1; vlan_eth_type_set = 1; - if (mask.ipv6 == &flow_tcf_mask_empty.ipv6) + } + if (!tunnel_outer && mask.ipv6->hdr.proto) { + /* + * No way to set IP protocol for outer tunnel + * layers. Usually it is fixed, for example, + * to UDP for VXLAN/GPE. + */ + assert(spec.ipv6); /* Mask is not empty. */ + mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO, + spec.ipv6->hdr.proto); + ip_proto_set = 1; + } + ipv6_dst = !IN6_IS_ADDR_UNSPECIFIED + (mask.ipv6->hdr.dst_addr); + ipv6_src = !IN6_IS_ADDR_UNSPECIFIED + (mask.ipv6->hdr.src_addr); + if (mask.ipv6 == &flow_tcf_mask_empty.ipv6 || + (!ipv6_dst && !ipv6_src)) { + if (!tunnel_outer) break; - if (mask.ipv6->hdr.proto) { - mnl_attr_put_u8 - (nlh, TCA_FLOWER_KEY_IP_PROTO, - spec.ipv6->hdr.proto); - ip_proto_set = 1; - } - } else { - assert(mask.ipv6 != &flow_tcf_mask_empty.ipv6); + /* + * For tunnel outer we must set outer IP key + * anyway, even if the specification/mask is + * empty. There is no another way to tell + * kernel about he outer layer protocol. + */ + mnl_attr_put(nlh, + TCA_FLOWER_KEY_ENC_IPV6_SRC, + IPV6_ADDR_LEN, + mask.ipv6->hdr.src_addr); + mnl_attr_put(nlh, + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK, + IPV6_ADDR_LEN, + mask.ipv6->hdr.src_addr); + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); + break; } - if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) { - mnl_attr_put(nlh, decap.vxlan ? + if (ipv6_src) { + mnl_attr_put(nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV6_SRC : TCA_FLOWER_KEY_IPV6_SRC, IPV6_ADDR_LEN, spec.ipv6->hdr.src_addr); - mnl_attr_put(nlh, decap.vxlan ? + mnl_attr_put(nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK : TCA_FLOWER_KEY_IPV6_SRC_MASK, IPV6_ADDR_LEN, mask.ipv6->hdr.src_addr); } - if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) { - mnl_attr_put(nlh, decap.vxlan ? + if (ipv6_dst) { + mnl_attr_put(nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV6_DST : TCA_FLOWER_KEY_IPV6_DST, IPV6_ADDR_LEN, spec.ipv6->hdr.dst_addr); - mnl_attr_put(nlh, decap.vxlan ? + mnl_attr_put(nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV6_DST_MASK : TCA_FLOWER_KEY_IPV6_DST_MASK, IPV6_ADDR_LEN, @@ -3264,8 +3334,11 @@ struct pedit_parser { } assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; + } case RTE_FLOW_ITEM_TYPE_UDP: - item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP; + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? + MLX5_FLOW_LAYER_INNER_L4_UDP : + MLX5_FLOW_LAYER_OUTER_L4_UDP; mask.udp = flow_tcf_item_mask (items, &rte_flow_item_udp_mask, &flow_tcf_mask_supported.udp, @@ -3274,7 +3347,7 @@ struct pedit_parser { error); assert(mask.udp); spec.udp = items->spec; - if (!decap.vxlan) { + if (!tunnel_outer) { if (!ip_proto_set) mnl_attr_put_u8 (nlh, TCA_FLOWER_KEY_IP_PROTO, @@ -3289,24 +3362,24 @@ struct pedit_parser { } if (mask.udp->hdr.src_port) { mnl_attr_put_u16 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_UDP_SRC_PORT : TCA_FLOWER_KEY_UDP_SRC, spec.udp->hdr.src_port); mnl_attr_put_u16 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK : TCA_FLOWER_KEY_UDP_SRC_MASK, mask.udp->hdr.src_port); } if (mask.udp->hdr.dst_port) { mnl_attr_put_u16 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_UDP_DST_PORT : TCA_FLOWER_KEY_UDP_DST, spec.udp->hdr.dst_port); mnl_attr_put_u16 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK : TCA_FLOWER_KEY_UDP_DST_MASK, mask.udp->hdr.dst_port); @@ -3314,7 +3387,9 @@ struct pedit_parser { assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; case RTE_FLOW_ITEM_TYPE_TCP: - item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP; + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? + MLX5_FLOW_LAYER_INNER_L4_TCP : + MLX5_FLOW_LAYER_OUTER_L4_TCP; mask.tcp = flow_tcf_item_mask (items, &rte_flow_item_tcp_mask, &flow_tcf_mask_supported.tcp, @@ -3358,6 +3433,7 @@ struct pedit_parser { break; case RTE_FLOW_ITEM_TYPE_VXLAN: assert(decap.vxlan); + tunnel_outer = 0; item_flags |= MLX5_FLOW_LAYER_VXLAN; spec.vxlan = items->spec; mnl_attr_put_u32(nlh, -- 1.8.3.1