From: Adrien Mazarguil <adrien.mazarguil@6wind.com>
To: Shahaf Shuler <shahafs@mellanox.com>,
Yongseok Koh <yskoh@mellanox.com>,
Slava Ovsiienko <viacheslavo@mellanox.com>
Cc: dev@dpdk.org
Subject: [dpdk-dev] [PATCH 8/8] net/mlx5: add VXLAN decap support to switch flow rules
Date: Fri, 31 Aug 2018 11:57:42 +0200 [thread overview]
Message-ID: <20180831092038.23051-9-adrien.mazarguil@6wind.com> (raw)
In-Reply-To: <20180831092038.23051-1-adrien.mazarguil@6wind.com>
This provides support for the VXLAN_DECAP action. Outer tunnel properties
are specified as the initial part of the flow rule pattern (up to and
including VXLAN item), optionally followed by inner traffic properties.
Testpmd examples:
- Creating a flow on port ID 1 performing VXLAN decapsulation and directing
the result to port ID 2 without checking inner properties:
flow create 1 ingress transfer pattern eth src is 66:77:88:99:aa:bb
dst is 00:11:22:33:44:55 / ipv4 src is 2.2.2.2 dst is 1.1.1.1 /
udp src is 4789 dst is 4242 / vxlan vni is 0x112233 / end
actions vxlan_decap / port_id id 2 / end
- Same as above except only inner TCPv6 packets with destination port 42
will be let through:
flow create 1 ingress transfer pattern eth src is 66:77:88:99:aa:bb
dst is 00:11:22:33:44:55 / ipv4 src is 2.2.2.2 dst is 1.1.1.1 /
udp src is 4789 dst is 4242 / vxlan vni is 0x112233 /
eth / ipv6 / tcp dst is 42 / end
actions vxlan_decap / port_id id 2 / end
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
drivers/net/mlx5/Makefile | 65 +++++++
drivers/net/mlx5/mlx5_nl_flow.c | 344 ++++++++++++++++++++++++++++++++---
2 files changed, 379 insertions(+), 30 deletions(-)
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 1ba4ce612..85672abd6 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -335,6 +335,71 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
enum TCA_FLOWER_KEY_VLAN_ETH_TYPE \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_ENC_KEY_ID \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_ENC_KEY_ID \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_ENC_IPV4_SRC \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_ENC_IPV4_DST \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_ENC_IPV4_DST_MASK \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_ENC_IPV6_SRC \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_ENC_IPV6_DST \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_ENC_IPV6_DST_MASK \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_ENC_UDP_SRC_PORT \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_ENC_UDP_DST_PORT \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_TC_ACT_VLAN \
linux/tc_act/tc_vlan.h \
enum TCA_VLAN_PUSH_VLAN_PRIORITY \
diff --git a/drivers/net/mlx5/mlx5_nl_flow.c b/drivers/net/mlx5/mlx5_nl_flow.c
index 672f92863..12802796a 100644
--- a/drivers/net/mlx5/mlx5_nl_flow.c
+++ b/drivers/net/mlx5/mlx5_nl_flow.c
@@ -201,6 +201,45 @@ struct tc_tunnel_key {
#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
#define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID
+#define TCA_FLOWER_KEY_ENC_KEY_ID 26
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC
+#define TCA_FLOWER_KEY_ENC_IPV4_SRC 27
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
+#define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST
+#define TCA_FLOWER_KEY_ENC_IPV4_DST 29
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK
+#define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC
+#define TCA_FLOWER_KEY_ENC_IPV6_SRC 31
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
+#define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST
+#define TCA_FLOWER_KEY_ENC_IPV6_DST 33
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK
+#define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT
+#define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK
+#define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT
+#define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK
+#define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46
+#endif
#define BIT(b) (1 << (b))
#define BIT_ENCAP(e) BIT(MLX5_NL_FLOW_ENCAP_ ## e)
@@ -278,6 +317,7 @@ struct mlx5_nl_flow_ctx {
struct mlx5_nl_flow {
uint32_t size; /**< Size of this object. */
uint32_t applied:1; /**< Whether rule is currently applied. */
+ uint32_t decap:1; /**< Decapsulate @p encap. */
unsigned int encap_ifindex; /**< Interface to use with @p encap. */
unsigned int *ifindex_src; /**< Source interface. */
unsigned int *ifindex_dst; /**< Destination interface. */
@@ -301,6 +341,11 @@ enum mlx5_nl_flow_trans {
ITEM_TCP,
ITEM_UDP,
ITEM_VXLAN,
+ ITEM_VXLAN_END,
+ ITEM_TUN_ETH,
+ ITEM_TUN_IPV4,
+ ITEM_TUN_IPV6,
+ ITEM_TUN_UDP,
ACTIONS,
ACTION_VOID,
ACTION_PORT_ID,
@@ -339,7 +384,12 @@ static const enum mlx5_nl_flow_trans *const mlx5_nl_flow_trans[] = {
[ITEM_IPV6] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
[ITEM_TCP] = TRANS(PATTERN_COMMON),
[ITEM_UDP] = TRANS(ITEM_VXLAN, PATTERN_COMMON),
- [ITEM_VXLAN] = TRANS(PATTERN_COMMON),
+ [ITEM_VXLAN] = TRANS(ITEM_TUN_ETH, PATTERN_COMMON),
+ [ITEM_VXLAN_END] = TRANS(ITEM_ETH, PATTERN_COMMON),
+ [ITEM_TUN_ETH] = TRANS(ITEM_TUN_IPV4, ITEM_TUN_IPV6, PATTERN_COMMON),
+ [ITEM_TUN_IPV4] = TRANS(ITEM_TUN_UDP, PATTERN_COMMON),
+ [ITEM_TUN_IPV6] = TRANS(ITEM_TUN_UDP, PATTERN_COMMON),
+ [ITEM_TUN_UDP] = TRANS(ITEM_VXLAN_END, ITEM_VOID, ITEM_PORT_ID),
[ACTIONS] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
[ACTION_VOID] = TRANS(BACK),
[ACTION_PORT_ID] = TRANS(ACTION_VOID, END),
@@ -805,6 +855,7 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow,
bool vlan_present;
bool vlan_eth_type_set;
bool ip_proto_set;
+ bool vxlan_decap;
struct mlx5_nl_flow_encap encap;
struct nlattr *na_flower;
struct nlattr *na_flower_act;
@@ -819,6 +870,7 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow,
goto error_nobufs;
nl_flow->size = offsetof(struct mlx5_nl_flow, msg);
nl_flow->applied = 0;
+ nl_flow->decap = 0;
nl_flow->encap_ifindex = 0;
nl_flow->ifindex_src = NULL;
nl_flow->ifindex_dst = NULL;
@@ -833,6 +885,7 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow,
vlan_present = false;
vlan_eth_type_set = false;
ip_proto_set = false;
+ vxlan_decap = false;
memset(&encap, 0, sizeof(encap));
na_flower = NULL;
na_flower_act = NULL;
@@ -850,6 +903,7 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow,
const struct rte_flow_item_ipv6 *ipv6;
const struct rte_flow_item_tcp *tcp;
const struct rte_flow_item_udp *udp;
+ const struct rte_flow_item_vxlan *vxlan;
} spec, mask;
union {
const struct rte_flow_action_port_id *port_id;
@@ -943,9 +997,6 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow,
na_flower = mnl_attr_nest_start_check(buf, size, TCA_OPTIONS);
if (!na_flower)
goto error_nobufs;
- if (!mnl_attr_put_u32_check(buf, size, TCA_FLOWER_FLAGS,
- TCA_CLS_FLAGS_SKIP_SW))
- goto error_nobufs;
break;
case ITEM_VOID:
if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
@@ -1286,16 +1337,215 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow,
++item;
break;
case ITEM_VXLAN:
+ case ITEM_VXLAN_END:
if (item->type != RTE_FLOW_ITEM_TYPE_VXLAN)
goto trans;
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
- "VXLAN header matching is not supported yet");
+ if (vxlan_decap) {
+ /* Done with outer, continue with inner. */
+ ++item;
+ break;
+ }
+ if (encap.mask)
+ return rte_flow_error_set
+ (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+ item, "no support for stacked encapsulation");
+ mask.vxlan = mlx5_nl_flow_item_mask
+ (item, &rte_flow_item_vxlan_mask,
+ &mlx5_nl_flow_encap_mask_supported.vxlan,
+ &mlx5_nl_flow_mask_empty.vxlan,
+ sizeof(rte_flow_item_vxlan_mask), error);
+ if (!mask.vxlan)
+ return -rte_errno;
+ spec.vxlan = item->spec;
+ /*
+ * No TCA_FLOWER_* to match VXLAN traffic. This can only be
+ * done indirectly through ACTION_VXLAN_DECAP.
+ *
+ * Since tunnel encapsulation information must be collected
+ * from the previous pattern items, the message built so far
+ * must be discarded, inner traffic will be matched by
+ * subsequent pattern items.
+ *
+ * Reset inner context and process pattern again through a
+ * different path.
+ */
+ eth_type_set = false;
+ vlan_present = false;
+ vlan_eth_type_set = false;
+ ip_proto_set = false;
+ nlh = buf;
+ mnl_attr_nest_cancel(nlh, na_flower);
+ na_flower = mnl_attr_nest_start_check(buf, size, TCA_OPTIONS);
+ if (!na_flower)
+ goto error_nobufs;
+ if (memcmp(mask.vxlan->vni, VXLAN_VNI_MASK, 3))
+ return rte_flow_error_set
+ (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+ mask.vxlan,
+ "VXLAN VNI is either incomplete or missing");
+ if (!mnl_attr_put_u32_check(buf, size,
+ TCA_FLOWER_KEY_ENC_KEY_ID,
+ vxlan_vni_as_be32(spec.vxlan->vni)))
+ goto error_nobufs;
+ encap.vxlan.vni = vxlan_vni_as_be32(spec.vxlan->vni);
+ encap.mask |= BIT_ENCAP(VXLAN_VNI);
+ vxlan_decap = true;
+ item = pattern;
+ break;
+ case ITEM_TUN_ETH:
+ if (item->type != RTE_FLOW_ITEM_TYPE_ETH)
+ goto trans;
+ mask.eth = mlx5_nl_flow_item_mask
+ (item, &rte_flow_item_eth_mask,
+ &mlx5_nl_flow_encap_mask_supported.eth,
+ &mlx5_nl_flow_mask_empty.eth,
+ sizeof(rte_flow_item_eth_mask), error);
+ if (!mask.eth)
+ return -rte_errno;
+ spec.eth = item->spec;
+ if ((!is_zero_ether_addr(&mask.eth->dst) ||
+ !is_zero_ether_addr(&mask.eth->src)) &&
+ nl_flow != (void *)buf_tmp)
+ DRV_LOG(WARNING,
+ "Ethernet source/destination addresses cannot"
+ " be matched along with VXLAN traffic;"
+ " parameters ignored");
+ /* Source and destination are swapped for decap. */
+ if (is_broadcast_ether_addr(&mask.eth->dst)) {
+ encap.eth.src = spec.eth->dst;
+ encap.mask |= BIT_ENCAP(ETH_SRC);
+ }
+ if (is_broadcast_ether_addr(&mask.eth->src)) {
+ encap.eth.dst = spec.eth->src;
+ encap.mask |= BIT_ENCAP(ETH_DST);
+ }
+ ++item;
+ break;
+ case ITEM_TUN_IPV4:
+ if (item->type != RTE_FLOW_ITEM_TYPE_IPV4)
+ goto trans;
+ mask.ipv4 = mlx5_nl_flow_item_mask
+ (item, &rte_flow_item_ipv4_mask,
+ &mlx5_nl_flow_encap_mask_supported.ipv4,
+ &mlx5_nl_flow_mask_empty.ipv4,
+ sizeof(rte_flow_item_ipv4_mask), error);
+ if (!mask.ipv4)
+ return -rte_errno;
+ spec.ipv4 = item->spec;
+ if ((mask.ipv4->hdr.src_addr &&
+ (!mnl_attr_put_u32_check(buf, size,
+ TCA_FLOWER_KEY_ENC_IPV4_SRC,
+ spec.ipv4->hdr.src_addr) ||
+ !mnl_attr_put_u32_check(buf, size,
+ TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
+ mask.ipv4->hdr.src_addr))) ||
+ (mask.ipv4->hdr.dst_addr &&
+ (!mnl_attr_put_u32_check(buf, size,
+ TCA_FLOWER_KEY_ENC_IPV4_DST,
+ spec.ipv4->hdr.dst_addr) ||
+ !mnl_attr_put_u32_check(buf, size,
+ TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
+ mask.ipv4->hdr.dst_addr))))
+ goto error_nobufs;
+ /* Source and destination are swapped for decap. */
+ if (mask.ipv4->hdr.src_addr == IN_ADDR_MASK) {
+ encap.ip.dst.v4.s_addr = spec.ipv4->hdr.src_addr;
+ encap.mask |= BIT_ENCAP(IPV4_DST);
+ }
+ if (mask.ipv4->hdr.dst_addr == IN_ADDR_MASK) {
+ encap.ip.src.v4.s_addr = spec.ipv4->hdr.dst_addr;
+ encap.mask |= BIT_ENCAP(IPV4_SRC);
+ }
+ ++item;
+ break;
+ case ITEM_TUN_IPV6:
+ if (item->type != RTE_FLOW_ITEM_TYPE_IPV6)
+ goto trans;
+ mask.ipv6 = mlx5_nl_flow_item_mask
+ (item, &rte_flow_item_ipv6_mask,
+ &mlx5_nl_flow_encap_mask_supported.ipv6,
+ &mlx5_nl_flow_mask_empty.ipv6,
+ sizeof(rte_flow_item_ipv6_mask), error);
+ if (!mask.ipv6)
+ return -rte_errno;
+ spec.ipv6 = item->spec;
+ if ((!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr) &&
+ (!mnl_attr_put_check(buf, size,
+ TCA_FLOWER_KEY_ENC_IPV6_SRC,
+ sizeof(spec.ipv6->hdr.src_addr),
+ spec.ipv6->hdr.src_addr) ||
+ !mnl_attr_put_check(buf, size,
+ TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
+ sizeof(mask.ipv6->hdr.src_addr),
+ mask.ipv6->hdr.src_addr))) ||
+ (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr) &&
+ (!mnl_attr_put_check(buf, size,
+ TCA_FLOWER_KEY_ENC_IPV6_DST,
+ sizeof(spec.ipv6->hdr.dst_addr),
+ spec.ipv6->hdr.dst_addr) ||
+ !mnl_attr_put_check(buf, size,
+ TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
+ sizeof(mask.ipv6->hdr.dst_addr),
+ mask.ipv6->hdr.dst_addr))))
+ goto error_nobufs;
+ /* Source and destination are swapped for decap. */
+ if (!memcmp(mask.ipv6->hdr.src_addr, IN6_ADDR_MASK, 16)) {
+ encap.ip.dst.v6 =
+ *(struct in6_addr *)&spec.ipv6->hdr.src_addr;
+ encap.mask |= BIT_ENCAP(IPV6_DST);
+ }
+ if (!memcmp(mask.ipv6->hdr.dst_addr, IN6_ADDR_MASK, 16)) {
+ encap.ip.src.v6 =
+ *(struct in6_addr *)&spec.ipv6->hdr.dst_addr;
+ encap.mask |= BIT_ENCAP(IPV6_SRC);
+ }
+ ++item;
+ break;
+ case ITEM_TUN_UDP:
+ if (item->type != RTE_FLOW_ITEM_TYPE_UDP)
+ goto trans;
+ mask.udp = mlx5_nl_flow_item_mask
+ (item, &rte_flow_item_udp_mask,
+ &mlx5_nl_flow_encap_mask_supported.udp,
+ &mlx5_nl_flow_mask_empty.udp,
+ sizeof(rte_flow_item_udp_mask), error);
+ if (!mask.udp)
+ return -rte_errno;
+ spec.udp = item->spec;
+ if ((mask.udp->hdr.src_port &&
+ (!mnl_attr_put_u16_check(buf, size,
+ TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
+ spec.udp->hdr.src_port) ||
+ !mnl_attr_put_u16_check
+ (buf, size, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
+ mask.udp->hdr.src_port))) ||
+ (mask.udp->hdr.dst_port &&
+ (!mnl_attr_put_u16_check(buf, size,
+ TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
+ spec.udp->hdr.dst_port) ||
+ !mnl_attr_put_u16_check
+ (buf, size, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
+ mask.udp->hdr.dst_port))))
+ goto error_nobufs;
+ /* Source and destination are swapped for decap. */
+ if (mask.udp->hdr.src_port == BE16_MASK) {
+ encap.udp.dst = spec.udp->hdr.src_port;
+ encap.mask |= BIT_ENCAP(UDP_DST);
+ }
+ if (mask.udp->hdr.dst_port == BE16_MASK) {
+ encap.udp.src = spec.udp->hdr.dst_port;
+ encap.mask |= BIT_ENCAP(UDP_SRC);
+ }
+ ++item;
+ break;
case ACTIONS:
if (item->type != RTE_FLOW_ITEM_TYPE_END)
goto trans;
assert(na_flower);
assert(!na_flower_act);
+ if (!mnl_attr_put_u32_check(buf, size, TCA_FLOWER_FLAGS,
+ TCA_CLS_FLAGS_SKIP_SW))
+ goto error_nobufs;
na_flower_act =
mnl_attr_nest_start_check(buf, size, TCA_FLOWER_ACT);
if (!na_flower_act)
@@ -1446,14 +1696,35 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow,
}
++action;
break;
+ case ACTION_VXLAN_DECAP:
+ if (action->type != RTE_FLOW_ACTION_TYPE_VXLAN_DECAP)
+ goto trans;
+ if (!vxlan_decap)
+ return rte_flow_error_set
+ (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ action,
+ "VXLAN decapsulation is only supported after"
+ " matching VXLAN traffic explicitly first");
+ i = TCA_TUNNEL_KEY_ACT_RELEASE;
+ nl_flow->decap = 1;
+ conf.vxlan_encap = NULL;
+ goto vxlan_encap;
case ACTION_VXLAN_ENCAP:
if (action->type != RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP)
goto trans;
+ if (vxlan_decap)
+ return rte_flow_error_set
+ (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ action,
+ "cannot combine VXLAN header matching with"
+ " encapsulation");
conf.vxlan_encap = action->conf;
if (mlx5_nl_flow_encap_reap(&encap,
conf.vxlan_encap->definition,
error))
return -rte_errno;
+ i = TCA_TUNNEL_KEY_ACT_SET;
+vxlan_encap:
act_index =
mnl_attr_nest_start_check(buf, size, act_index_cur++);
if (!act_index ||
@@ -1467,10 +1738,11 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow,
sizeof(struct tc_tunnel_key),
&(struct tc_tunnel_key){
.action = TC_ACT_PIPE,
- .t_action =
- TCA_TUNNEL_KEY_ACT_SET,
+ .t_action = i,
}))
goto error_nobufs;
+ if (!conf.vxlan_encap)
+ goto vxlan_encap_end;
if (encap.mask & BIT_ENCAP(IPV4_SRC) &&
!mnl_attr_put_u32_check
(buf, size, TCA_TUNNEL_KEY_ENC_IPV4_SRC,
@@ -1507,16 +1779,11 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow,
if (!mnl_attr_put_u32_check
(buf, size, TCA_TUNNEL_KEY_ENC_KEY_ID, encap.vxlan.vni))
goto error_nobufs;
+vxlan_encap_end:
mnl_attr_nest_end(buf, act);
mnl_attr_nest_end(buf, act_index);
++action;
break;
- case ACTION_VXLAN_DECAP:
- if (action->type != RTE_FLOW_ACTION_TYPE_VXLAN_DECAP)
- goto trans;
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, action,
- "VXLAN decap is not supported yet");
case END:
if (item->type != RTE_FLOW_ITEM_TYPE_END ||
action->type != RTE_FLOW_ACTION_TYPE_END)
@@ -1844,15 +2111,26 @@ mlx5_nl_flow_ifindex_vxlan(struct mlx5_nl_flow_ctx *ctx, unsigned int ifindex,
* cannot be worked around by picking a random value here and using
* a different one when creating flow rules later.
*
- * Therefore request a hopefully unique VNI based on the interface
- * index in order to work around EEXIST. VNI will be overridden
- * later on a flow rule basis thanks to IFLA_VXLAN_COLLECT_METADATA.
+ * There is another way to work around EEXIST by assigning a unique
+ * VNI to the VXLAN interface (e.g. by emitting IFLA_VXLAN_ID based
+ * on underlying ifindex), however doing so breaks decap as it
+ * prevents the kernel from matching VNI when looking for a VXLAN
+ * interface in that direction. Note that iproute2 doesn't allow
+ * this combination either.
+ *
+ * Creating non-external VXLAN interfaces with fixed outer
+ * properties was also considered. Problem is that not only it won't
+ * scale to large numbers, it appears that only interfaces with
+ * dynamic properties (external) can be offloaded to hardware.
+ *
+ * Hence the following limitation: as long as VXLAN encap/decap flow
+ * rules exist on a given DPDK port, the local UDP port they rely on
+ * can only be used by flow rules on that port. They will fail with
+ * EEXIST on others.
*/
if (!mnl_attr_put_u16_check(nlh, sizeof(buf), IFLA_VXLAN_PORT,
vxlan_port))
goto exit;
- if (!mnl_attr_put_u32_check(nlh, sizeof(buf), IFLA_VXLAN_ID, ifindex))
- goto exit;
mnl_attr_nest_end(nlh, na_vxlan);
mnl_attr_nest_end(nlh, na_info);
ret = mlx5_nl_flow_chat(ctx, nlh, NULL, NULL);
@@ -2022,8 +2300,9 @@ mlx5_nl_flow_encap_neigh(struct mlx5_nl_flow_ctx *ctx,
goto error_nobufs;
if (encap->mask & BIT_ENCAP(ETH_SRC) && enable)
DRV_LOG(WARNING,
- "Ethernet source address cannot be forced"
- " for VXLAN encap; parameter ignored");
+ "Ethernet source address (encap) or destination"
+ " address (decap) cannot be forced for VXLAN"
+ " encap/decap; parameter ignored");
if (encap->mask & BIT_ENCAP(ETH_DST) &&
!mnl_attr_put_check(nlh, sizeof(buf), NDA_LLADDR,
sizeof(encap->eth.dst), &encap->eth.dst))
@@ -2325,9 +2604,12 @@ mlx5_nl_flow_create(struct mlx5_nl_flow_ctx *ctx, struct mlx5_nl_flow *nl_flow,
{
struct nlmsghdr *nlh = (void *)nl_flow->msg;
struct mlx5_nl_flow_encap *encap =
- nl_flow->encap && nl_flow->ifindex_dst ?
+ nl_flow->encap && nl_flow->ifindex_dst && nl_flow->ifindex_src ?
nl_flow->encap : NULL;
- unsigned int ifindex = encap ? *nl_flow->ifindex_dst : 0;
+ unsigned int *ifindex_target =
+ nl_flow->decap ?
+ nl_flow->ifindex_src : nl_flow->ifindex_dst;
+ unsigned int ifindex = encap ? *ifindex_target : 0;
int ret;
if (nl_flow->applied)
@@ -2339,11 +2621,11 @@ mlx5_nl_flow_create(struct mlx5_nl_flow_ctx *ctx, struct mlx5_nl_flow *nl_flow,
(ctx, encap, ifindex, true, error);
if (!nl_flow->encap_ifindex)
return -rte_errno;
- *nl_flow->ifindex_dst = nl_flow->encap_ifindex;
+ *ifindex_target = nl_flow->encap_ifindex;
}
ret = mlx5_nl_flow_chat(ctx, nlh, NULL, NULL);
if (encap)
- *nl_flow->ifindex_dst = ifindex;
+ *ifindex_target = ifindex;
if (!ret) {
nl_flow->applied = 1;
return 0;
@@ -2378,9 +2660,11 @@ mlx5_nl_flow_destroy(struct mlx5_nl_flow_ctx *ctx, struct mlx5_nl_flow *nl_flow,
{
struct nlmsghdr *nlh = (void *)nl_flow->msg;
struct mlx5_nl_flow_encap *encap =
- nl_flow->encap && nl_flow->ifindex_dst ?
+ nl_flow->encap && nl_flow->ifindex_dst && nl_flow->ifindex_src ?
nl_flow->encap : NULL;
- unsigned int ifindex = encap ? *nl_flow->ifindex_dst : 0;
+ unsigned int *ifindex_target =
+ nl_flow->decap ? nl_flow->ifindex_src : nl_flow->ifindex_dst;
+ unsigned int ifindex = encap ? *ifindex_target : 0;
int err = 0;
int ret;
@@ -2392,11 +2676,11 @@ mlx5_nl_flow_destroy(struct mlx5_nl_flow_ctx *ctx, struct mlx5_nl_flow *nl_flow,
if (!mlx5_nl_flow_encap_ifindex
(ctx, encap, ifindex, false, error))
err = rte_errno;
- *nl_flow->ifindex_dst = nl_flow->encap_ifindex;
+ *ifindex_target = nl_flow->encap_ifindex;
}
ret = mlx5_nl_flow_chat(ctx, nlh, NULL, NULL);
if (encap)
- *nl_flow->ifindex_dst = ifindex;
+ *ifindex_target = ifindex;
nl_flow->applied = 0;
if (err) {
rte_errno = err;
--
2.11.0
prev parent reply other threads:[~2018-08-31 9:57 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-08-31 9:57 [dpdk-dev] [PATCH 0/8] net/mlx5: add switch offload for VXLAN encap/decap Adrien Mazarguil
2018-08-31 9:57 ` [dpdk-dev] [PATCH 1/8] net/mlx5: speed up interface index retrieval for flow rules Adrien Mazarguil
2018-08-31 9:57 ` [dpdk-dev] [PATCH 2/8] net/mlx5: clean up redundant interface name getters Adrien Mazarguil
2018-08-31 9:57 ` [dpdk-dev] [PATCH 3/8] net/mlx5: rename internal function Adrien Mazarguil
2018-08-31 9:57 ` [dpdk-dev] [PATCH 4/8] net/mlx5: enhance TC flow rule send/ack function Adrien Mazarguil
2018-08-31 9:57 ` [dpdk-dev] [PATCH 5/8] net/mlx5: prepare switch flow rule parser for encap offloads Adrien Mazarguil
2018-08-31 9:57 ` [dpdk-dev] [PATCH 6/8] net/mlx5: add convenience macros to switch flow rule engine Adrien Mazarguil
2018-08-31 9:57 ` [dpdk-dev] [PATCH 7/8] net/mlx5: add VXLAN encap support to switch flow rules Adrien Mazarguil
2018-08-31 9:57 ` Adrien Mazarguil [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180831092038.23051-9-adrien.mazarguil@6wind.com \
--to=adrien.mazarguil@6wind.com \
--cc=dev@dpdk.org \
--cc=shahafs@mellanox.com \
--cc=viacheslavo@mellanox.com \
--cc=yskoh@mellanox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).