DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] net/mlx5: eswitch-IP address UDP/TCP port rewrite
@ 2018-09-25 11:51 Xiaoyu Min
  2018-09-28 23:03 ` Yongseok Koh
  2018-10-10 12:56 ` [dpdk-dev] [PATCH v2] net/mlx5: rewrite IP address UDP/TCP port by E-Switch Jack Min
  0 siblings, 2 replies; 13+ messages in thread
From: Xiaoyu Min @ 2018-09-25 11:51 UTC (permalink / raw)
  To: Shahaf Shuler, Yongseok Koh; +Cc: dev

Offload the following rte_flow actions by inserting accordingly
E-Switch rules via TC Flower driver

 - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
 - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
 - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
 - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
 - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
 - RTE_FLOW_ACTION_TYPE_SET_TP_DST

Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
---
This patch bases on Rahul Lakkireddy's patchs[1][2] and
Yongseok Koh's patchset [3]

[1] https://patches.dpdk.org/patch/45191/
[2] https://patches.dpdk.org/patch/45192/
[3] https://patches.dpdk.org/project/dpdk/list/?series=1474


 drivers/net/mlx5/Makefile        |   5 +
 drivers/net/mlx5/meson.build     |   2 +
 drivers/net/mlx5/mlx5_flow.h     |   6 +
 drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
 4 files changed, 369 insertions(+)

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index ca1de9f21..49b95e78e 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		linux/tc_act/tc_vlan.h \
 		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
 		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
+		HAVE_TC_ACT_PEDIT \
+		linux/tc_act/tc_pedit.h \
+		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
+		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
 		HAVE_SUPPORTED_40000baseKR4_Full \
 		/usr/include/linux/ethtool.h \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index fd93ac162..ef6a85101 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -182,6 +182,8 @@ if build
 		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
 		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
 		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
+		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
+		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
 		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
 		'RDMA_NL_NLDEV' ],
 		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 10d700a7f..be182a643 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -87,6 +87,12 @@
 #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
 #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
 #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
+#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
+#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
+#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
+#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
+#define MLX5_ACTION_SET_TP_SRC (1u << 15)
+#define MLX5_ACTION_SET_TP_DST (1u << 16)
 
 /* possible L3 layers protocols filtering. */
 #define MLX5_IP_PROTOCOL_TCP 6
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 14376188e..85c92f369 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -53,6 +53,62 @@ struct tc_vlan {
 
 #endif /* HAVE_TC_ACT_VLAN */
 
+#ifdef HAVE_TC_ACT_PEDIT
+
+#include <linux/tc_act/tc_pedit.h>
+
+#else /* HAVE_TC_ACT_VLAN */
+enum {
+	TCA_PEDIT_UNSPEC,
+	TCA_PEDIT_TM,
+	TCA_PEDIT_PARMS,
+	TCA_PEDIT_PAD,
+	TCA_PEDIT_PARMS_EX,
+	TCA_PEDIT_KEYS_EX,
+	TCA_PEDIT_KEY_EX,
+	__TCA_PEDIT_MAX
+};
+
+enum {
+	TCA_PEDIT_KEY_EX_HTYPE = 1,
+	TCA_PEDIT_KEY_EX_CMD = 2,
+	__TCA_PEDIT_KEY_EX_MAX
+};
+
+enum pedit_header_type {
+	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
+	__PEDIT_HDR_TYPE_MAX,
+};
+
+enum pedit_cmd {
+	TCA_PEDIT_KEY_EX_CMD_SET = 0,
+	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
+	__PEDIT_CMD_MAX,
+};
+
+struct tc_pedit_key {
+	__u32           mask;  /* AND */
+	__u32           val;   /*XOR */
+	__u32           off;  /*offset */
+	__u32           at;
+	__u32           offmask;
+	__u32           shift;
+};
+
+struct tc_pedit_sel {
+	tc_gen;
+	unsigned char           nkeys;
+	unsigned char           flags;
+	struct tc_pedit_key     keys[0];
+};
+
+#endif /* HAVE_TC_ACT_VLAN */
+
 /* Normally found in linux/netlink.h. */
 #ifndef NETLINK_CAP_ACK
 #define NETLINK_CAP_ACK 10
@@ -153,6 +209,14 @@ struct tc_vlan {
 #define IPV6_ADDR_LEN 16
 #endif
 
+#ifndef IPV4_ADDR_LEN
+#define IPV4_ADDR_LEN 4
+#endif
+
+#ifndef TP_PORT_LEN
+#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
+#endif
+
 /** Empty masks for known item types. */
 static const union {
 	struct rte_flow_item_port_id port_id;
@@ -227,6 +291,220 @@ struct flow_tcf_ptoi {
 
 #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
 
+#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
+		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
+		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
+		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
+		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
+		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
+		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
+		1 : 0; })
+#define MAX_PEDIT_KEYS (128)
+#define SZ_PEDIT_KEY_VAL (4)
+
+struct pedit_key_ex {
+	enum pedit_header_type htype;
+	enum pedit_cmd cmd;
+};
+
+struct pedit_parser {
+	struct tc_pedit_sel sel;
+	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
+	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
+};
+
+static int
+flow_tcf_calc_pedit_keys(const uint64_t size)
+{
+	int keys = (size / SZ_PEDIT_KEY_VAL) +
+		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
+	return keys;
+}
+
+static void
+flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
+				struct pedit_parser *p_parser,
+				uint64_t item_flags)
+{
+	int idx = p_parser->sel.nkeys;
+
+	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
+	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
+	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+	p_parser->keys[idx].off =
+		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;
+	p_parser->keys[idx].mask = 0xFFFF0000;
+	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
+			actions->conf)->port;
+	p_parser->sel.nkeys = (++idx);
+}
+
+static void
+flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
+				 struct pedit_parser *p_parser)
+{
+	int idx = p_parser->sel.nkeys;
+	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
+	int off_base =
+		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
+	const struct rte_flow_action_set_ipv6 *conf =
+		(const struct rte_flow_action_set_ipv6 *)actions->conf;
+
+	for (int i = 0; i < keys; i++, idx++) {
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
+		p_parser->keys[idx].mask = ~UINT32_MAX;
+		memcpy(&p_parser->keys[idx].val,
+			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
+			SZ_PEDIT_KEY_VAL);
+	}
+	p_parser->sel.nkeys += keys;
+}
+
+static void
+flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
+				 struct pedit_parser *p_parser)
+{
+	int idx = p_parser->sel.nkeys;
+
+	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
+	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+	p_parser->keys[idx].off =
+		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
+	p_parser->keys[idx].mask = ~UINT32_MAX;
+	p_parser->keys[idx].val =
+		((const struct rte_flow_action_set_ipv4 *)
+		 actions->conf)->ipv4_addr;
+	p_parser->sel.nkeys = (++idx);
+}
+
+static int
+flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
+			      const struct rte_flow_action **actions,
+			      uint64_t item_flags)
+{
+	struct pedit_parser p_parser;
+
+	memset(&p_parser, 0, sizeof(p_parser));
+	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
+	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
+							    TCA_ACT_OPTIONS);
+	/* all modify header actions should be in one tc-pedit action */
+	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+		switch ((*actions)->type) {
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			flow_tcf_pedit_key_set_tp_port(*actions,
+							&p_parser, item_flags);
+			break;
+		default:
+			goto pedit_mnl_msg_done;
+		}
+	}
+pedit_mnl_msg_done:
+	p_parser.sel.action = TC_ACT_PIPE;
+	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
+			sizeof(p_parser.sel) +
+			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
+			&p_parser);
+	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
+					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
+	for (int i = 0; i < p_parser.sel.nkeys; i++) {
+		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
+					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
+		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
+				 p_parser.keys_ex[i].htype);
+		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
+				 p_parser.keys_ex[i].cmd);
+		mnl_attr_nest_end(nl, na_pedit_key);
+	}
+	mnl_attr_nest_end(nl, na_pedit_keys);
+	mnl_attr_nest_end(nl, na_act_options);
+	(*actions)--;
+	return 0;
+}
+
+/**
+ * Calculate max memory size of one TC-pedit actions.
+ * One TC-pedit action can contain set of keys each defining
+ * a rewrite element (rte_flow action)
+ *
+ * @param[in] actions
+ *   actions specification.
+ * @param[inout] action_flags
+ *   actions flags
+ * @param[inout] size
+ *   accumulated size
+ * @return
+ *   Max memory size of one TC-pedit action
+ */
+static int
+flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
+				uint64_t *action_flags)
+{
+	int pedit_size = 0;
+	int keys = 0;
+	uint64_t flags = 0;
+
+	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
+		      SZ_NLATTR_STRZ_OF("pedit") +
+		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
+	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+		switch ((*actions)->type) {
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
+			flags |= MLX5_ACTION_SET_IPV4_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
+			flags |= MLX5_ACTION_SET_IPV4_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
+			flags |= MLX5_ACTION_SET_IPV6_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
+			flags |= MLX5_ACTION_SET_IPV6_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+			/* TCP is as same as UDP */
+			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
+			flags |= MLX5_ACTION_SET_TP_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			/* TCP is as same as UDP */
+			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
+			flags |= MLX5_ACTION_SET_TP_DST;
+			break;
+		default:
+			goto get_pedit_action_size_done;
+		}
+	}
+get_pedit_action_size_done:
+	/* TCA_PEDIT_PARAMS_EX */
+	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
+			keys * sizeof(struct tc_pedit_key));
+	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
+	pedit_size += keys *
+		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
+		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
+	(*action_flags) |= flags;
+	(*actions)--;
+	return pedit_size;
+}
+
 /**
  * Retrieve mask for pattern item.
  *
@@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 			of_set_vlan_vid;
 		const struct rte_flow_action_of_set_vlan_pcp *
 			of_set_vlan_pcp;
+		const struct rte_flow_action_set_ipv4 *set_ipv4;
+		const struct rte_flow_action_set_ipv6 *set_ipv6;
 	} conf;
 	uint32_t item_flags = 0;
 	uint32_t action_flags = 0;
@@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
 			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			action_flags |= MLX5_ACTION_SET_IPV4_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			action_flags |= MLX5_ACTION_SET_IPV6_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+			action_flags |= MLX5_ACTION_SET_TP_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			action_flags |= MLX5_ACTION_SET_TP_DST;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,
 						  actions,
 						  "action not supported");
 		}
+		if (IS_MODIFY_ACTION(actions->type)) {
+			if (!actions->conf)
+				return rte_flow_error_set(error, ENOTSUP,
+						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+						actions,
+						"action configuration not set");
+		}
+	}
+	if (action_flags &
+	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
+		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "no ipv4 item found in"
+						  " pattern");
+	}
+	if (action_flags &
+	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
+		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
+			return rte_flow_error_set(error, ENOTSUP,
+				RTE_FLOW_ERROR_TYPE_ACTION,
+				actions,
+				"no ipv6 item found in pattern");
+	}
+	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
+		if (!(item_flags &
+		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
+		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
+			return rte_flow_error_set(error, ENOTSUP,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						actions,
+						"no TCP/UDP item found in"
+						" pattern");
 	}
 	return 0;
 }
@@ -840,6 +1172,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
 				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
 				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
 			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			size += flow_tcf_get_pedit_actions_size(&actions,
+								&flags);
+			break;
 		default:
 			DRV_LOG(WARNING,
 				"unsupported action %p type %d,"
@@ -998,6 +1339,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 	struct nlattr *na_flower_act;
 	struct nlattr *na_vlan_id = NULL;
 	struct nlattr *na_vlan_priority = NULL;
+	uint64_t item_flags = 0;
 
 	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
 						PTOI_TABLE_SZ_MAX(dev)));
@@ -1189,6 +1531,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_UDP:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
 			mask.udp = flow_tcf_item_mask
 				(items, &rte_flow_item_udp_mask,
 				 &flow_tcf_mask_supported.udp,
@@ -1218,6 +1561,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_TCP:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
 			mask.tcp = flow_tcf_item_mask
 				(items, &rte_flow_item_tcp_mask,
 				 &flow_tcf_mask_supported.tcp,
@@ -1368,6 +1712,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 					conf.of_set_vlan_pcp->vlan_pcp;
 			}
 			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			na_act_index =
+				mnl_attr_nest_start(nlh, na_act_index_cur++);
+			flow_tcf_create_pedit_mnl_msg(nlh,
+						      &actions, item_flags);
+			mnl_attr_nest_end(nlh, na_act_index);
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.17.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] net/mlx5: eswitch-IP address UDP/TCP port rewrite
  2018-09-25 11:51 [dpdk-dev] [PATCH] net/mlx5: eswitch-IP address UDP/TCP port rewrite Xiaoyu Min
@ 2018-09-28 23:03 ` Yongseok Koh
  2018-09-30  7:21   ` Xiaoyu Min
  2018-10-10 12:56 ` [dpdk-dev] [PATCH v2] net/mlx5: rewrite IP address UDP/TCP port by E-Switch Jack Min
  1 sibling, 1 reply; 13+ messages in thread
From: Yongseok Koh @ 2018-09-28 23:03 UTC (permalink / raw)
  To: Jack Min; +Cc: Shahaf Shuler, dev

On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
> Offload the following rte_flow actions by inserting accordingly
> E-Switch rules via TC Flower driver
> 
>  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
>  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
>  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
>  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
>  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
>  - RTE_FLOW_ACTION_TYPE_SET_TP_DST

Can you put an example command of testpmd for the reference?

> Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> ---
> This patch bases on Rahul Lakkireddy's patchs[1][2] and
> Yongseok Koh's patchset [3]
> 
> [1] https://patches.dpdk.org/patch/45191/
> [2] https://patches.dpdk.org/patch/45192/
> [3] https://patches.dpdk.org/project/dpdk/list/?series=1474
> 
> 
>  drivers/net/mlx5/Makefile        |   5 +
>  drivers/net/mlx5/meson.build     |   2 +
>  drivers/net/mlx5/mlx5_flow.h     |   6 +
>  drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
>  4 files changed, 369 insertions(+)
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> index ca1de9f21..49b95e78e 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
>  		linux/tc_act/tc_vlan.h \
>  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
>  		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TC_ACT_PEDIT \
> +		linux/tc_act/tc_pedit.h \
> +		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> +		$(AUTOCONF_OUTPUT)
>  	$Q sh -- '$<' '$@' \
>  		HAVE_SUPPORTED_40000baseKR4_Full \
>  		/usr/include/linux/ethtool.h \
> diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> index fd93ac162..ef6a85101 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -182,6 +182,8 @@ if build
>  		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
>  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
>  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> +		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
> +		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
>  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
>  		'RDMA_NL_NLDEV' ],
>  		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 10d700a7f..be182a643 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -87,6 +87,12 @@
>  #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
>  #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
>  #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
> +#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
> +#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
> +#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
> +#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
> +#define MLX5_ACTION_SET_TP_SRC (1u << 15)
> +#define MLX5_ACTION_SET_TP_DST (1u << 16)
>  
>  /* possible L3 layers protocols filtering. */
>  #define MLX5_IP_PROTOCOL_TCP 6
> diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> index 14376188e..85c92f369 100644
> --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> @@ -53,6 +53,62 @@ struct tc_vlan {
>  
>  #endif /* HAVE_TC_ACT_VLAN */
>  
> +#ifdef HAVE_TC_ACT_PEDIT
> +
> +#include <linux/tc_act/tc_pedit.h>
> +
> +#else /* HAVE_TC_ACT_VLAN */
> +enum {
> +	TCA_PEDIT_UNSPEC,
> +	TCA_PEDIT_TM,
> +	TCA_PEDIT_PARMS,
> +	TCA_PEDIT_PAD,
> +	TCA_PEDIT_PARMS_EX,
> +	TCA_PEDIT_KEYS_EX,
> +	TCA_PEDIT_KEY_EX,
> +	__TCA_PEDIT_MAX
> +};
> +
> +enum {
> +	TCA_PEDIT_KEY_EX_HTYPE = 1,
> +	TCA_PEDIT_KEY_EX_CMD = 2,
> +	__TCA_PEDIT_KEY_EX_MAX
> +};
> +
> +enum pedit_header_type {
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
> +	__PEDIT_HDR_TYPE_MAX,
> +};
> +
> +enum pedit_cmd {
> +	TCA_PEDIT_KEY_EX_CMD_SET = 0,
> +	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
> +	__PEDIT_CMD_MAX,
> +};
> +
> +struct tc_pedit_key {
> +	__u32           mask;  /* AND */
> +	__u32           val;   /*XOR */
> +	__u32           off;  /*offset */
> +	__u32           at;
> +	__u32           offmask;
> +	__u32           shift;
> +};
> +
> +struct tc_pedit_sel {
> +	tc_gen;
> +	unsigned char           nkeys;
> +	unsigned char           flags;
> +	struct tc_pedit_key     keys[0];
> +};
> +
> +#endif /* HAVE_TC_ACT_VLAN */
> +
>  /* Normally found in linux/netlink.h. */
>  #ifndef NETLINK_CAP_ACK
>  #define NETLINK_CAP_ACK 10
> @@ -153,6 +209,14 @@ struct tc_vlan {
>  #define IPV6_ADDR_LEN 16
>  #endif
>  
> +#ifndef IPV4_ADDR_LEN
> +#define IPV4_ADDR_LEN 4
> +#endif
> +
> +#ifndef TP_PORT_LEN
> +#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
> +#endif
> +
>  /** Empty masks for known item types. */
>  static const union {
>  	struct rte_flow_item_port_id port_id;
> @@ -227,6 +291,220 @@ struct flow_tcf_ptoi {
>  
>  #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
>  
> +#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
> +		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
> +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
> +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
> +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
> +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
> +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
> +		1 : 0; })

The reason why you need this complex multi-conditional macro is that
RTE_FLOW_ACTION_TYPE_* isn't a bitmask. But, as this actions will be converted
to MLX5_ACTION_* which is a bitmask, you can use that instead. Then, this
would be enough to be:

#define MLX5_TCF_SET_ACTIONS \
	(MLX5_ACTION_SET_IPV4_SRC | ...)

And in the flow_tcf_validate() below,
	if (action_flags & MLX5_TCF_SET_ACTIONS) {
		...
	}

And please note that I'm going to rename MLX5_ACTION_* to MLX5_FLOW_ACTION_*.
Please refer to "net/mlx5: rename flow macros" in PR #885. You might need to
rebase it again.

> +#define MAX_PEDIT_KEYS (128)
> +#define SZ_PEDIT_KEY_VAL (4)
> +
> +struct pedit_key_ex {
> +	enum pedit_header_type htype;
> +	enum pedit_cmd cmd;
> +};
> +
> +struct pedit_parser {
> +	struct tc_pedit_sel sel;
> +	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
> +	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
> +};
> +
> +static int
> +flow_tcf_calc_pedit_keys(const uint64_t size)

Please add documentation by comment for every funcs you add.
Refer to the other existing ones for formality.

> +{
> +	int keys = (size / SZ_PEDIT_KEY_VAL) +
> +		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);

Indentation.

> +	return keys;
> +}
> +
> +static void
> +flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
> +				struct pedit_parser *p_parser,
> +				uint64_t item_flags)
> +{
> +	int idx = p_parser->sel.nkeys;
> +
> +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
> +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
> +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
> +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
> +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> +	p_parser->keys[idx].off =
> +		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;

	assert(offsetof(struct tcp_hdr, src_port) ==
	       offsetof(struct udp_hdr, src_port));
	assert(offsetof(struct tcp_hdr, dst_port) ==
	       offsetof(struct udp_hdr, dst_port));
	p_parser->keys[idx].off =
		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
		offsetof(struct tcp_hdr, src_port) :
		offsetof(struct tcp_hdr, dst_port);

assert() is just to be informative.
And how about src first like others below?

> +	p_parser->keys[idx].mask = 0xFFFF0000;
> +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
> +			actions->conf)->port;

Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
above - 0xffff0000.

> +	p_parser->sel.nkeys = (++idx);
> +}
> +
> +static void
> +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
> +				 struct pedit_parser *p_parser)
> +{
> +	int idx = p_parser->sel.nkeys;
> +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> +	int off_base =
> +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;

offsetof(struct ipv6_hdr, src_addr) :
offsetof(struct ipv6_hdr, dst_addr);

> +	const struct rte_flow_action_set_ipv6 *conf =
> +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
> +
> +	for (int i = 0; i < keys; i++, idx++) {
> +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
> +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
> +		p_parser->keys[idx].mask = ~UINT32_MAX;
> +		memcpy(&p_parser->keys[idx].val,
> +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
> +			SZ_PEDIT_KEY_VAL);
> +	}
> +	p_parser->sel.nkeys += keys;
> +}
> +
> +static void
> +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,

How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
Same comment for ipv6 and tp_port.

> +				 struct pedit_parser *p_parser)
> +{
> +	int idx = p_parser->sel.nkeys;
> +
> +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
> +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> +	p_parser->keys[idx].off =
> +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);

offsetof(struct ipv4_hdr, src_addr) :
offsetof(struct ipv4_hdr, dst_addr);

> +	p_parser->keys[idx].mask = ~UINT32_MAX;
> +	p_parser->keys[idx].val =
> +		((const struct rte_flow_action_set_ipv4 *)
> +		 actions->conf)->ipv4_addr;
> +	p_parser->sel.nkeys = (++idx);
> +}
> +
> +static int
> +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
> +			      const struct rte_flow_action **actions,
> +			      uint64_t item_flags)
> +{
> +	struct pedit_parser p_parser;
> +
> +	memset(&p_parser, 0, sizeof(p_parser));
> +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
> +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
> +							    TCA_ACT_OPTIONS);
> +	/* all modify header actions should be in one tc-pedit action */
> +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {

It seems that you want to aggregate all the pedit actions and form a single
na attr. But what if rte_flow_action_set_* are not contiguous? E.g.

flow create ... actions set1 / set2 / port_id / set3 / end

Then, it would have two pedit na attrs. Is that okay?
Or, need to think about another way?

Same will happen in flow_tcf_get_pedit_actions_size().

> +		switch ((*actions)->type) {
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			flow_tcf_pedit_key_set_tp_port(*actions,
> +							&p_parser, item_flags);
> +			break;
> +		default:
> +			goto pedit_mnl_msg_done;
> +		}
> +	}
> +pedit_mnl_msg_done:
> +	p_parser.sel.action = TC_ACT_PIPE;
> +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
> +			sizeof(p_parser.sel) +
> +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
> +			&p_parser);
> +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
> +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
> +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
> +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
> +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
> +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
> +				 p_parser.keys_ex[i].htype);
> +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
> +				 p_parser.keys_ex[i].cmd);
> +		mnl_attr_nest_end(nl, na_pedit_key);
> +	}
> +	mnl_attr_nest_end(nl, na_pedit_keys);
> +	mnl_attr_nest_end(nl, na_act_options);
> +	(*actions)--;
> +	return 0;
> +}
> +
> +/**
> + * Calculate max memory size of one TC-pedit actions.
> + * One TC-pedit action can contain set of keys each defining
> + * a rewrite element (rte_flow action)
> + *
> + * @param[in] actions
> + *   actions specification.
> + * @param[inout] action_flags
> + *   actions flags
> + * @param[inout] size
> + *   accumulated size
> + * @return
> + *   Max memory size of one TC-pedit action
> + */
> +static int
> +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
> +				uint64_t *action_flags)
> +{
> +	int pedit_size = 0;
> +	int keys = 0;
> +	uint64_t flags = 0;
> +
> +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
> +		      SZ_NLATTR_STRZ_OF("pedit") +
> +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
> +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> +		switch ((*actions)->type) {
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> +			flags |= MLX5_ACTION_SET_IPV4_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> +			flags |= MLX5_ACTION_SET_IPV4_DST;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> +			flags |= MLX5_ACTION_SET_IPV6_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> +			flags |= MLX5_ACTION_SET_IPV6_DST;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +			/* TCP is as same as UDP */
> +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> +			flags |= MLX5_ACTION_SET_TP_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			/* TCP is as same as UDP */
> +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> +			flags |= MLX5_ACTION_SET_TP_DST;
> +			break;
> +		default:
> +			goto get_pedit_action_size_done;
> +		}
> +	}
> +get_pedit_action_size_done:
> +	/* TCA_PEDIT_PARAMS_EX */
> +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
> +			keys * sizeof(struct tc_pedit_key));

> +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
> +	pedit_size += keys *
> +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
> +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
> +	(*action_flags) |= flags;
> +	(*actions)--;
> +	return pedit_size;
> +}
> +
>  /**
>   * Retrieve mask for pattern item.
>   *
> @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>  			of_set_vlan_vid;
>  		const struct rte_flow_action_of_set_vlan_pcp *
>  			of_set_vlan_pcp;
> +		const struct rte_flow_action_set_ipv4 *set_ipv4;
> +		const struct rte_flow_action_set_ipv6 *set_ipv6;
>  	} conf;
>  	uint32_t item_flags = 0;
>  	uint32_t action_flags = 0;
> @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
>  			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +			action_flags |= MLX5_ACTION_SET_TP_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			action_flags |= MLX5_ACTION_SET_TP_DST;
> +			break;
>  		default:
>  			return rte_flow_error_set(error, ENOTSUP,
>  						  RTE_FLOW_ERROR_TYPE_ACTION,
>  						  actions,
>  						  "action not supported");
>  		}
> +		if (IS_MODIFY_ACTION(actions->type)) {
> +			if (!actions->conf)
> +				return rte_flow_error_set(error, ENOTSUP,
> +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> +						actions,
> +						"action configuration not set");
> +		}
> +	}
> +	if (action_flags &
> +	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
> +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "no ipv4 item found in"
> +						  " pattern");
> +	}
> +	if (action_flags &
> +	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
> +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
> +			return rte_flow_error_set(error, ENOTSUP,
> +				RTE_FLOW_ERROR_TYPE_ACTION,
> +				actions,
> +				"no ipv6 item found in pattern");
> +	}
> +	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
> +		if (!(item_flags &
> +		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
> +		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
> +			return rte_flow_error_set(error, ENOTSUP,
> +						RTE_FLOW_ERROR_TYPE_ACTION,
> +						actions,
> +						"no TCP/UDP item found in"
> +						" pattern");

Isn't this 'set' action compatible with drop action? No point of modifying
packet which will be dropped, isn't it?

>  	}
>  	return 0;
>  }
> @@ -840,6 +1172,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
>  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
>  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			size += flow_tcf_get_pedit_actions_size(&actions,
> +								&flags);
> +			break;
>  		default:
>  			DRV_LOG(WARNING,
>  				"unsupported action %p type %d,"
> @@ -998,6 +1339,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  	struct nlattr *na_flower_act;
>  	struct nlattr *na_vlan_id = NULL;
>  	struct nlattr *na_vlan_priority = NULL;
> +	uint64_t item_flags = 0;
>  
>  	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
>  						PTOI_TABLE_SZ_MAX(dev)));
> @@ -1189,6 +1531,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  			}
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_UDP:
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;

Let's add the same to the rest of items like flow_tcf_validate().


Thanks,
Yongseok

>  			mask.udp = flow_tcf_item_mask
>  				(items, &rte_flow_item_udp_mask,
>  				 &flow_tcf_mask_supported.udp,
> @@ -1218,6 +1561,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  			}
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_TCP:
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
>  			mask.tcp = flow_tcf_item_mask
>  				(items, &rte_flow_item_tcp_mask,
>  				 &flow_tcf_mask_supported.tcp,
> @@ -1368,6 +1712,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  					conf.of_set_vlan_pcp->vlan_pcp;
>  			}
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			na_act_index =
> +				mnl_attr_nest_start(nlh, na_act_index_cur++);
> +			flow_tcf_create_pedit_mnl_msg(nlh,
> +						      &actions, item_flags);
> +			mnl_attr_nest_end(nlh, na_act_index);
> +			break;
>  		default:
>  			return rte_flow_error_set(error, ENOTSUP,
>  						  RTE_FLOW_ERROR_TYPE_ACTION,
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] net/mlx5: eswitch-IP address UDP/TCP port rewrite
  2018-09-28 23:03 ` Yongseok Koh
@ 2018-09-30  7:21   ` Xiaoyu Min
  2018-10-01 20:19     ` Yongseok Koh
  0 siblings, 1 reply; 13+ messages in thread
From: Xiaoyu Min @ 2018-09-30  7:21 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: Shahaf Shuler, dev

On 18-09-29 07:03:33, Yongseok Koh wrote:
> On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
> > Offload the following rte_flow actions by inserting accordingly
> > E-Switch rules via TC Flower driver
> > 
> >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
> >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
> >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
> >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
> >  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
> >  - RTE_FLOW_ACTION_TYPE_SET_TP_DST
> 
> Can you put an example command of testpmd for the reference?
> 
Sure, I'll add.

> > Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> > ---
> > This patch bases on Rahul Lakkireddy's patchs[1][2] and
> > Yongseok Koh's patchset [3]
> > 
> > [1] https://patches.dpdk.org/patch/45191/
> > [2] https://patches.dpdk.org/patch/45192/
> > [3] https://patches.dpdk.org/project/dpdk/list/?series=1474
> > 
> > 
> >  drivers/net/mlx5/Makefile        |   5 +
> >  drivers/net/mlx5/meson.build     |   2 +
> >  drivers/net/mlx5/mlx5_flow.h     |   6 +
> >  drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
> >  4 files changed, 369 insertions(+)
> > 
> > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > index ca1de9f21..49b95e78e 100644
> > --- a/drivers/net/mlx5/Makefile
> > +++ b/drivers/net/mlx5/Makefile
> > @@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
> >  		linux/tc_act/tc_vlan.h \
> >  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
> >  		$(AUTOCONF_OUTPUT)
> > +	$Q sh -- '$<' '$@' \
> > +		HAVE_TC_ACT_PEDIT \
> > +		linux/tc_act/tc_pedit.h \
> > +		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> > +		$(AUTOCONF_OUTPUT)
> >  	$Q sh -- '$<' '$@' \
> >  		HAVE_SUPPORTED_40000baseKR4_Full \
> >  		/usr/include/linux/ethtool.h \
> > diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> > index fd93ac162..ef6a85101 100644
> > --- a/drivers/net/mlx5/meson.build
> > +++ b/drivers/net/mlx5/meson.build
> > @@ -182,6 +182,8 @@ if build
> >  		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
> >  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
> >  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> > +		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
> > +		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
> >  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
> >  		'RDMA_NL_NLDEV' ],
> >  		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
> > diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> > index 10d700a7f..be182a643 100644
> > --- a/drivers/net/mlx5/mlx5_flow.h
> > +++ b/drivers/net/mlx5/mlx5_flow.h
> > @@ -87,6 +87,12 @@
> >  #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
> >  #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
> >  #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
> > +#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
> > +#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
> > +#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
> > +#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
> > +#define MLX5_ACTION_SET_TP_SRC (1u << 15)
> > +#define MLX5_ACTION_SET_TP_DST (1u << 16)
> >  
> >  /* possible L3 layers protocols filtering. */
> >  #define MLX5_IP_PROTOCOL_TCP 6
> > diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> > index 14376188e..85c92f369 100644
> > --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> > +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> > @@ -53,6 +53,62 @@ struct tc_vlan {
> >  
> >  #endif /* HAVE_TC_ACT_VLAN */
> >  
> > +#ifdef HAVE_TC_ACT_PEDIT
> > +
> > +#include <linux/tc_act/tc_pedit.h>
> > +
> > +#else /* HAVE_TC_ACT_VLAN */
> > +enum {
> > +	TCA_PEDIT_UNSPEC,
> > +	TCA_PEDIT_TM,
> > +	TCA_PEDIT_PARMS,
> > +	TCA_PEDIT_PAD,
> > +	TCA_PEDIT_PARMS_EX,
> > +	TCA_PEDIT_KEYS_EX,
> > +	TCA_PEDIT_KEY_EX,
> > +	__TCA_PEDIT_MAX
> > +};
> > +
> > +enum {
> > +	TCA_PEDIT_KEY_EX_HTYPE = 1,
> > +	TCA_PEDIT_KEY_EX_CMD = 2,
> > +	__TCA_PEDIT_KEY_EX_MAX
> > +};
> > +
> > +enum pedit_header_type {
> > +	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
> > +	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
> > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
> > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
> > +	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
> > +	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
> > +	__PEDIT_HDR_TYPE_MAX,
> > +};
> > +
> > +enum pedit_cmd {
> > +	TCA_PEDIT_KEY_EX_CMD_SET = 0,
> > +	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
> > +	__PEDIT_CMD_MAX,
> > +};
> > +
> > +struct tc_pedit_key {
> > +	__u32           mask;  /* AND */
> > +	__u32           val;   /*XOR */
> > +	__u32           off;  /*offset */
> > +	__u32           at;
> > +	__u32           offmask;
> > +	__u32           shift;
> > +};
> > +
> > +struct tc_pedit_sel {
> > +	tc_gen;
> > +	unsigned char           nkeys;
> > +	unsigned char           flags;
> > +	struct tc_pedit_key     keys[0];
> > +};
> > +
> > +#endif /* HAVE_TC_ACT_VLAN */
> > +
> >  /* Normally found in linux/netlink.h. */
> >  #ifndef NETLINK_CAP_ACK
> >  #define NETLINK_CAP_ACK 10
> > @@ -153,6 +209,14 @@ struct tc_vlan {
> >  #define IPV6_ADDR_LEN 16
> >  #endif
> >  
> > +#ifndef IPV4_ADDR_LEN
> > +#define IPV4_ADDR_LEN 4
> > +#endif
> > +
> > +#ifndef TP_PORT_LEN
> > +#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
> > +#endif
> > +
> >  /** Empty masks for known item types. */
> >  static const union {
> >  	struct rte_flow_item_port_id port_id;
> > @@ -227,6 +291,220 @@ struct flow_tcf_ptoi {
> >  
> >  #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
> >  
> > +#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
> > +		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
> > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
> > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
> > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
> > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
> > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
> > +		1 : 0; })
> 
> The reason why you need this complex multi-conditional macro is that
> RTE_FLOW_ACTION_TYPE_* isn't a bitmask. But, as this actions will be converted
> to MLX5_ACTION_* which is a bitmask, you can use that instead. Then, this
> would be enough to be:
> 
> #define MLX5_TCF_SET_ACTIONS \
> 	(MLX5_ACTION_SET_IPV4_SRC | ...)
> 
> And in the flow_tcf_validate() below,
> 	if (action_flags & MLX5_TCF_SET_ACTIONS) {
> 		...
> 	}
> 
Well, I did consider using bitmask but action_flags is an _accumulated_ variable,
records all the actions parsed so far.
But, here, I need to know what is the _current_ action and whether it belongs to modify
actions. If using bitmask, Looks like a new variable (i.e current_action) needed (?)

case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
    current_action = MLX5_ACTION_SET_IPV4_SRC;
    .....

if (current_action & MLX5_TCF_SET_ACTIONS) ...
 ...

action_flags |= current_action;

I feel more code change needed or you think it's worth?

> And please note that I'm going to rename MLX5_ACTION_* to MLX5_FLOW_ACTION_*.
> Please refer to "net/mlx5: rename flow macros" in PR #885. You might need to
> rebase it again.
> 
Sure, I'll rebase on it

> > +#define MAX_PEDIT_KEYS (128)
> > +#define SZ_PEDIT_KEY_VAL (4)
> > +
> > +struct pedit_key_ex {
> > +	enum pedit_header_type htype;
> > +	enum pedit_cmd cmd;
> > +};
> > +
> > +struct pedit_parser {
> > +	struct tc_pedit_sel sel;
> > +	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
> > +	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
> > +};
> > +
> > +static int
> > +flow_tcf_calc_pedit_keys(const uint64_t size)
> 
> Please add documentation by comment for every funcs you add.
> Refer to the other existing ones for formality.
> 
Sure!
> > +{
> > +	int keys = (size / SZ_PEDIT_KEY_VAL) +
> > +		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
> 
> Indentation.
> 
Will fix it
> > +	return keys;
> > +}
> > +
> > +static void
> > +flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
> > +				struct pedit_parser *p_parser,
> > +				uint64_t item_flags)
> > +{
> > +	int idx = p_parser->sel.nkeys;
> > +
> > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
> > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
> > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
> > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
> > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > +	p_parser->keys[idx].off =
> > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;
> 
> 	assert(offsetof(struct tcp_hdr, src_port) ==
> 	       offsetof(struct udp_hdr, src_port));
> 	assert(offsetof(struct tcp_hdr, dst_port) ==
> 	       offsetof(struct udp_hdr, dst_port));
> 	p_parser->keys[idx].off =
> 		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
> 		offsetof(struct tcp_hdr, src_port) :
> 		offsetof(struct tcp_hdr, dst_port);
> 
> assert() is just to be informative.
> And how about src first like others below?
> 
Yes, I will update above.

> > +	p_parser->keys[idx].mask = 0xFFFF0000;
> > +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
> > +			actions->conf)->port;
> 
> Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
> above - 0xffff0000.
> 
So it should be as following ?
p_parser->keys[idx].val = (__u32)((const struct rte_flow_action_set_tp *))
                actions->conf)->port;

> > +	p_parser->sel.nkeys = (++idx);
> > +}
> > +
> > +static void
> > +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
> > +				 struct pedit_parser *p_parser)
> > +{
> > +	int idx = p_parser->sel.nkeys;
> > +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > +	int off_base =
> > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
> 
> offsetof(struct ipv6_hdr, src_addr) :
> offsetof(struct ipv6_hdr, dst_addr);
> 
Got it!
> > +	const struct rte_flow_action_set_ipv6 *conf =
> > +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
> > +
> > +	for (int i = 0; i < keys; i++, idx++) {
> > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
> > +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
> > +		p_parser->keys[idx].mask = ~UINT32_MAX;
> > +		memcpy(&p_parser->keys[idx].val,
> > +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
> > +			SZ_PEDIT_KEY_VAL);
> > +	}
> > +	p_parser->sel.nkeys += keys;
> > +}
> > +
> > +static void
> > +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
> 
> How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
> Same comment for ipv6 and tp_port.
> 
What's the benefit by using rte_flow_action_set_ipv4 and how I know it's
for src or dst address ?
> > +				 struct pedit_parser *p_parser)
> > +{
> > +	int idx = p_parser->sel.nkeys;
> > +
> > +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
> > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > +	p_parser->keys[idx].off =
> > +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
> 
> offsetof(struct ipv4_hdr, src_addr) :
> offsetof(struct ipv4_hdr, dst_addr);
> 
Got it!
> > +	p_parser->keys[idx].mask = ~UINT32_MAX;
> > +	p_parser->keys[idx].val =
> > +		((const struct rte_flow_action_set_ipv4 *)
> > +		 actions->conf)->ipv4_addr;
> > +	p_parser->sel.nkeys = (++idx);
> > +}
> > +
> > +static int
> > +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
> > +			      const struct rte_flow_action **actions,
> > +			      uint64_t item_flags)
> > +{
> > +	struct pedit_parser p_parser;
> > +
> > +	memset(&p_parser, 0, sizeof(p_parser));
> > +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
> > +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
> > +							    TCA_ACT_OPTIONS);
> > +	/* all modify header actions should be in one tc-pedit action */
> > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> 
> It seems that you want to aggregate all the pedit actions and form a single
> na attr. But what if rte_flow_action_set_* are not contiguous? E.g.
> 
> flow create ... actions set1 / set2 / port_id / set3 / end
> 
> Then, it would have two pedit na attrs. Is that okay?
> Or, need to think about another way?
> 
> Same will happen in flow_tcf_get_pedit_actions_size().
> 
It's OK if we have more than one pedit na attrs.
_BUT_ only last pedit take effect based on my experiment

> > +		switch ((*actions)->type) {
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > +			flow_tcf_pedit_key_set_tp_port(*actions,
> > +							&p_parser, item_flags);
> > +			break;
> > +		default:
> > +			goto pedit_mnl_msg_done;
> > +		}
> > +	}
> > +pedit_mnl_msg_done:
> > +	p_parser.sel.action = TC_ACT_PIPE;
> > +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
> > +			sizeof(p_parser.sel) +
> > +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
> > +			&p_parser);
> > +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
> > +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
> > +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
> > +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
> > +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
> > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
> > +				 p_parser.keys_ex[i].htype);
> > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
> > +				 p_parser.keys_ex[i].cmd);
> > +		mnl_attr_nest_end(nl, na_pedit_key);
> > +	}
> > +	mnl_attr_nest_end(nl, na_pedit_keys);
> > +	mnl_attr_nest_end(nl, na_act_options);
> > +	(*actions)--;
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Calculate max memory size of one TC-pedit actions.
> > + * One TC-pedit action can contain set of keys each defining
> > + * a rewrite element (rte_flow action)
> > + *
> > + * @param[in] actions
> > + *   actions specification.
> > + * @param[inout] action_flags
> > + *   actions flags
> > + * @param[inout] size
> > + *   accumulated size
> > + * @return
> > + *   Max memory size of one TC-pedit action
> > + */
> > +static int
> > +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
> > +				uint64_t *action_flags)
> > +{
> > +	int pedit_size = 0;
> > +	int keys = 0;
> > +	uint64_t flags = 0;
> > +
> > +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
> > +		      SZ_NLATTR_STRZ_OF("pedit") +
> > +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
> > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > +		switch ((*actions)->type) {
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > +			flags |= MLX5_ACTION_SET_IPV4_SRC;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > +			flags |= MLX5_ACTION_SET_IPV4_DST;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > +			flags |= MLX5_ACTION_SET_IPV6_SRC;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > +			flags |= MLX5_ACTION_SET_IPV6_DST;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > +			/* TCP is as same as UDP */
> > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > +			flags |= MLX5_ACTION_SET_TP_SRC;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > +			/* TCP is as same as UDP */
> > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > +			flags |= MLX5_ACTION_SET_TP_DST;
> > +			break;
> > +		default:
> > +			goto get_pedit_action_size_done;
> > +		}
> > +	}
> > +get_pedit_action_size_done:
> > +	/* TCA_PEDIT_PARAMS_EX */
> > +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
> > +			keys * sizeof(struct tc_pedit_key));
> 
> > +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
> > +	pedit_size += keys *
> > +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
> > +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
> > +	(*action_flags) |= flags;
> > +	(*actions)--;
> > +	return pedit_size;
> > +}
> > +
> >  /**
> >   * Retrieve mask for pattern item.
> >   *
> > @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> >  			of_set_vlan_vid;
> >  		const struct rte_flow_action_of_set_vlan_pcp *
> >  			of_set_vlan_pcp;
> > +		const struct rte_flow_action_set_ipv4 *set_ipv4;
> > +		const struct rte_flow_action_set_ipv6 *set_ipv6;
> >  	} conf;
> >  	uint32_t item_flags = 0;
> >  	uint32_t action_flags = 0;
> > @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> >  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
> >  			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
> >  			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > +			action_flags |= MLX5_ACTION_SET_TP_SRC;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > +			action_flags |= MLX5_ACTION_SET_TP_DST;
> > +			break;
> >  		default:
> >  			return rte_flow_error_set(error, ENOTSUP,
> >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> >  						  actions,
> >  						  "action not supported");
> >  		}
> > +		if (IS_MODIFY_ACTION(actions->type)) {
> > +			if (!actions->conf)
> > +				return rte_flow_error_set(error, ENOTSUP,
> > +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> > +						actions,
> > +						"action configuration not set");
> > +		}
> > +	}
> > +	if (action_flags &
> > +	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
> > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +						  RTE_FLOW_ERROR_TYPE_ACTION,
> > +						  actions,
> > +						  "no ipv4 item found in"
> > +						  " pattern");
> > +	}
> > +	if (action_flags &
> > +	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
> > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +				RTE_FLOW_ERROR_TYPE_ACTION,
> > +				actions,
> > +				"no ipv6 item found in pattern");
> > +	}
> > +	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
> > +		if (!(item_flags &
> > +		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
> > +		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +						RTE_FLOW_ERROR_TYPE_ACTION,
> > +						actions,
> > +						"no TCP/UDP item found in"
> > +						" pattern");
> 
> Isn't this 'set' action compatible with drop action? No point of modifying
> packet which will be dropped, isn't it?
> 
Yes, you are absolutely right :-)

> >  	}
> >  	return 0;
> >  }
> > @@ -840,6 +1172,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
> >  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
> >  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
> >  			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > +			size += flow_tcf_get_pedit_actions_size(&actions,
> > +								&flags);
> > +			break;
> >  		default:
> >  			DRV_LOG(WARNING,
> >  				"unsupported action %p type %d,"
> > @@ -998,6 +1339,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> >  	struct nlattr *na_flower_act;
> >  	struct nlattr *na_vlan_id = NULL;
> >  	struct nlattr *na_vlan_priority = NULL;
> > +	uint64_t item_flags = 0;
> >  
> >  	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
> >  						PTOI_TABLE_SZ_MAX(dev)));
> > @@ -1189,6 +1531,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> >  			}
> >  			break;
> >  		case RTE_FLOW_ITEM_TYPE_UDP:
> > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> 
> Let's add the same to the rest of items like flow_tcf_validate().
> 
OK!
> 
> Thanks,
> Yongseok
> 
> >  			mask.udp = flow_tcf_item_mask
> >  				(items, &rte_flow_item_udp_mask,
> >  				 &flow_tcf_mask_supported.udp,
> > @@ -1218,6 +1561,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> >  			}
> >  			break;
> >  		case RTE_FLOW_ITEM_TYPE_TCP:
> > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> >  			mask.tcp = flow_tcf_item_mask
> >  				(items, &rte_flow_item_tcp_mask,
> >  				 &flow_tcf_mask_supported.tcp,
> > @@ -1368,6 +1712,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> >  					conf.of_set_vlan_pcp->vlan_pcp;
> >  			}
> >  			break;
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > +			na_act_index =
> > +				mnl_attr_nest_start(nlh, na_act_index_cur++);
> > +			flow_tcf_create_pedit_mnl_msg(nlh,
> > +						      &actions, item_flags);
> > +			mnl_attr_nest_end(nlh, na_act_index);
> > +			break;
> >  		default:
> >  			return rte_flow_error_set(error, ENOTSUP,
> >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > -- 
> > 2.17.1
> > 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] net/mlx5: eswitch-IP address UDP/TCP port rewrite
  2018-09-30  7:21   ` Xiaoyu Min
@ 2018-10-01 20:19     ` Yongseok Koh
  2018-10-08 11:22       ` Xiaoyu Min
  0 siblings, 1 reply; 13+ messages in thread
From: Yongseok Koh @ 2018-10-01 20:19 UTC (permalink / raw)
  To: Jack Min; +Cc: Shahaf Shuler, dev

On Sun, Sep 30, 2018 at 03:21:04PM +0800, Xiaoyu Min wrote:
> On 18-09-29 07:03:33, Yongseok Koh wrote:
> > On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
> > > Offload the following rte_flow actions by inserting accordingly
> > > E-Switch rules via TC Flower driver
> > > 
> > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
> > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
> > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
> > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
> > >  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
> > >  - RTE_FLOW_ACTION_TYPE_SET_TP_DST
> > 
> > Can you put an example command of testpmd for the reference?
> > 
> Sure, I'll add.
> 
> > > Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> > > ---
> > > This patch bases on Rahul Lakkireddy's patchs[1][2] and
> > > Yongseok Koh's patchset [3]
> > > 
> > > [1] https://patches.dpdk.org/patch/45191/
> > > [2] https://patches.dpdk.org/patch/45192/
> > > [3] https://patches.dpdk.org/project/dpdk/list/?series=1474
> > > 
> > > 
> > >  drivers/net/mlx5/Makefile        |   5 +
> > >  drivers/net/mlx5/meson.build     |   2 +
> > >  drivers/net/mlx5/mlx5_flow.h     |   6 +
> > >  drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
> > >  4 files changed, 369 insertions(+)
> > > 
> > > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > > index ca1de9f21..49b95e78e 100644
> > > --- a/drivers/net/mlx5/Makefile
> > > +++ b/drivers/net/mlx5/Makefile
> > > @@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
> > >  		linux/tc_act/tc_vlan.h \
> > >  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
> > >  		$(AUTOCONF_OUTPUT)
> > > +	$Q sh -- '$<' '$@' \
> > > +		HAVE_TC_ACT_PEDIT \
> > > +		linux/tc_act/tc_pedit.h \
> > > +		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> > > +		$(AUTOCONF_OUTPUT)
> > >  	$Q sh -- '$<' '$@' \
> > >  		HAVE_SUPPORTED_40000baseKR4_Full \
> > >  		/usr/include/linux/ethtool.h \
> > > diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> > > index fd93ac162..ef6a85101 100644
> > > --- a/drivers/net/mlx5/meson.build
> > > +++ b/drivers/net/mlx5/meson.build
> > > @@ -182,6 +182,8 @@ if build
> > >  		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
> > >  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
> > >  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> > > +		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
> > > +		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
> > >  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
> > >  		'RDMA_NL_NLDEV' ],
> > >  		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
> > > diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> > > index 10d700a7f..be182a643 100644
> > > --- a/drivers/net/mlx5/mlx5_flow.h
> > > +++ b/drivers/net/mlx5/mlx5_flow.h
> > > @@ -87,6 +87,12 @@
> > >  #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
> > >  #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
> > >  #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
> > > +#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
> > > +#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
> > > +#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
> > > +#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
> > > +#define MLX5_ACTION_SET_TP_SRC (1u << 15)
> > > +#define MLX5_ACTION_SET_TP_DST (1u << 16)
> > >  
> > >  /* possible L3 layers protocols filtering. */
> > >  #define MLX5_IP_PROTOCOL_TCP 6
> > > diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > index 14376188e..85c92f369 100644
> > > --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> > > +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > @@ -53,6 +53,62 @@ struct tc_vlan {
> > >  
> > >  #endif /* HAVE_TC_ACT_VLAN */
> > >  
> > > +#ifdef HAVE_TC_ACT_PEDIT
> > > +
> > > +#include <linux/tc_act/tc_pedit.h>
> > > +
> > > +#else /* HAVE_TC_ACT_VLAN */
> > > +enum {
> > > +	TCA_PEDIT_UNSPEC,
> > > +	TCA_PEDIT_TM,
> > > +	TCA_PEDIT_PARMS,
> > > +	TCA_PEDIT_PAD,
> > > +	TCA_PEDIT_PARMS_EX,
> > > +	TCA_PEDIT_KEYS_EX,
> > > +	TCA_PEDIT_KEY_EX,
> > > +	__TCA_PEDIT_MAX
> > > +};
> > > +
> > > +enum {
> > > +	TCA_PEDIT_KEY_EX_HTYPE = 1,
> > > +	TCA_PEDIT_KEY_EX_CMD = 2,
> > > +	__TCA_PEDIT_KEY_EX_MAX
> > > +};
> > > +
> > > +enum pedit_header_type {
> > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
> > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
> > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
> > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
> > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
> > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
> > > +	__PEDIT_HDR_TYPE_MAX,
> > > +};
> > > +
> > > +enum pedit_cmd {
> > > +	TCA_PEDIT_KEY_EX_CMD_SET = 0,
> > > +	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
> > > +	__PEDIT_CMD_MAX,
> > > +};
> > > +
> > > +struct tc_pedit_key {
> > > +	__u32           mask;  /* AND */
> > > +	__u32           val;   /*XOR */
> > > +	__u32           off;  /*offset */
> > > +	__u32           at;
> > > +	__u32           offmask;
> > > +	__u32           shift;
> > > +};
> > > +
> > > +struct tc_pedit_sel {
> > > +	tc_gen;
> > > +	unsigned char           nkeys;
> > > +	unsigned char           flags;
> > > +	struct tc_pedit_key     keys[0];
> > > +};
> > > +
> > > +#endif /* HAVE_TC_ACT_VLAN */
> > > +
> > >  /* Normally found in linux/netlink.h. */
> > >  #ifndef NETLINK_CAP_ACK
> > >  #define NETLINK_CAP_ACK 10
> > > @@ -153,6 +209,14 @@ struct tc_vlan {
> > >  #define IPV6_ADDR_LEN 16
> > >  #endif
> > >  
> > > +#ifndef IPV4_ADDR_LEN
> > > +#define IPV4_ADDR_LEN 4
> > > +#endif
> > > +
> > > +#ifndef TP_PORT_LEN
> > > +#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
> > > +#endif
> > > +
> > >  /** Empty masks for known item types. */
> > >  static const union {
> > >  	struct rte_flow_item_port_id port_id;
> > > @@ -227,6 +291,220 @@ struct flow_tcf_ptoi {
> > >  
> > >  #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
> > >  
> > > +#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
> > > +		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
> > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
> > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
> > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
> > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
> > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
> > > +		1 : 0; })
> > 
> > The reason why you need this complex multi-conditional macro is that
> > RTE_FLOW_ACTION_TYPE_* isn't a bitmask. But, as this actions will be converted
> > to MLX5_ACTION_* which is a bitmask, you can use that instead. Then, this
> > would be enough to be:
> > 
> > #define MLX5_TCF_SET_ACTIONS \
> > 	(MLX5_ACTION_SET_IPV4_SRC | ...)
> > 
> > And in the flow_tcf_validate() below,
> > 	if (action_flags & MLX5_TCF_SET_ACTIONS) {
> > 		...
> > 	}
> > 
> Well, I did consider using bitmask but action_flags is an _accumulated_ variable,
> records all the actions parsed so far.
> But, here, I need to know what is the _current_ action and whether it belongs to modify
> actions. If using bitmask, Looks like a new variable (i.e current_action) needed (?)
> 
> case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
>     current_action = MLX5_ACTION_SET_IPV4_SRC;
>     .....
> 
> if (current_action & MLX5_TCF_SET_ACTIONS) ...
>  ...
> 
> action_flags |= current_action;
> 
> I feel more code change needed or you think it's worth?

Understood what you meant.
Please see my comment below in the flow_tcf_validate().

> > And please note that I'm going to rename MLX5_ACTION_* to MLX5_FLOW_ACTION_*.
> > Please refer to "net/mlx5: rename flow macros" in PR #885. You might need to
> > rebase it again.
> > 
> Sure, I'll rebase on it
> 
> > > +#define MAX_PEDIT_KEYS (128)
> > > +#define SZ_PEDIT_KEY_VAL (4)
> > > +
> > > +struct pedit_key_ex {
> > > +	enum pedit_header_type htype;
> > > +	enum pedit_cmd cmd;
> > > +};
> > > +
> > > +struct pedit_parser {
> > > +	struct tc_pedit_sel sel;
> > > +	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
> > > +	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
> > > +};
> > > +
> > > +static int
> > > +flow_tcf_calc_pedit_keys(const uint64_t size)
> > 
> > Please add documentation by comment for every funcs you add.
> > Refer to the other existing ones for formality.
> > 
> Sure!
> > > +{
> > > +	int keys = (size / SZ_PEDIT_KEY_VAL) +
> > > +		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
> > 
> > Indentation.
> > 
> Will fix it
> > > +	return keys;
> > > +}
> > > +
> > > +static void
> > > +flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
> > > +				struct pedit_parser *p_parser,
> > > +				uint64_t item_flags)
> > > +{
> > > +	int idx = p_parser->sel.nkeys;
> > > +
> > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
> > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
> > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
> > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
> > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > +	p_parser->keys[idx].off =
> > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;
> > 
> > 	assert(offsetof(struct tcp_hdr, src_port) ==
> > 	       offsetof(struct udp_hdr, src_port));
> > 	assert(offsetof(struct tcp_hdr, dst_port) ==
> > 	       offsetof(struct udp_hdr, dst_port));
> > 	p_parser->keys[idx].off =
> > 		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
> > 		offsetof(struct tcp_hdr, src_port) :
> > 		offsetof(struct tcp_hdr, dst_port);
> > 
> > assert() is just to be informative.
> > And how about src first like others below?
> > 
> Yes, I will update above.
> 
> > > +	p_parser->keys[idx].mask = 0xFFFF0000;
> > > +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
> > > +			actions->conf)->port;
> > 
> > Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
> > above - 0xffff0000.
> > 
> So it should be as following ?
> p_parser->keys[idx].val = (__u32)((const struct rte_flow_action_set_tp *))
>                 actions->conf)->port;

You can figure it out by actual tests but I think the following would be right.
	p_parser->keys[idx].val =
		rte_cpu_to_be_32(((const struct rte_flow_action_set_tp *)
				  actions->conf)->port);

Please verify it by testing anyway.

> > > +	p_parser->sel.nkeys = (++idx);
> > > +}
> > > +
> > > +static void
> > > +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
> > > +				 struct pedit_parser *p_parser)
> > > +{
> > > +	int idx = p_parser->sel.nkeys;
> > > +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > +	int off_base =
> > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
> > 
> > offsetof(struct ipv6_hdr, src_addr) :
> > offsetof(struct ipv6_hdr, dst_addr);
> > 
> Got it!
> > > +	const struct rte_flow_action_set_ipv6 *conf =
> > > +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
> > > +
> > > +	for (int i = 0; i < keys; i++, idx++) {
> > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
> > > +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
> > > +		p_parser->keys[idx].mask = ~UINT32_MAX;
> > > +		memcpy(&p_parser->keys[idx].val,
> > > +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
> > > +			SZ_PEDIT_KEY_VAL);
> > > +	}
> > > +	p_parser->sel.nkeys += keys;
> > > +}
> > > +
> > > +static void
> > > +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
> > 
> > How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
> > Same comment for ipv6 and tp_port.
> > 
> What's the benefit by using rte_flow_action_set_ipv4 and how I know it's
> for src or dst address ?

Just to make the function neat but I overlooked that you still need
actions->type. Please disregard my previous comment.

> > > +				 struct pedit_parser *p_parser)
> > > +{
> > > +	int idx = p_parser->sel.nkeys;
> > > +
> > > +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
> > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > +	p_parser->keys[idx].off =
> > > +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
> > 
> > offsetof(struct ipv4_hdr, src_addr) :
> > offsetof(struct ipv4_hdr, dst_addr);
> > 
> Got it!
> > > +	p_parser->keys[idx].mask = ~UINT32_MAX;
> > > +	p_parser->keys[idx].val =
> > > +		((const struct rte_flow_action_set_ipv4 *)
> > > +		 actions->conf)->ipv4_addr;
> > > +	p_parser->sel.nkeys = (++idx);
> > > +}
> > > +
> > > +static int
> > > +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
> > > +			      const struct rte_flow_action **actions,
> > > +			      uint64_t item_flags)
> > > +{
> > > +	struct pedit_parser p_parser;
> > > +
> > > +	memset(&p_parser, 0, sizeof(p_parser));
> > > +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
> > > +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
> > > +							    TCA_ACT_OPTIONS);
> > > +	/* all modify header actions should be in one tc-pedit action */
> > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > 
> > It seems that you want to aggregate all the pedit actions and form a single
> > na attr. But what if rte_flow_action_set_* are not contiguous? E.g.
> > 
> > flow create ... actions set1 / set2 / port_id / set3 / end
> > 
> > Then, it would have two pedit na attrs. Is that okay?
> > Or, need to think about another way?
> > 
> > Same will happen in flow_tcf_get_pedit_actions_size().
> > 
> It's OK if we have more than one pedit na attrs.
> _BUT_ only last pedit take effect based on my experiment

Then, shouldn't we give some warning to user in validation? So that user can
have right expectation and reorder the actions as their intention like:
	flow create ... actions set1 / set2 / set3 / port_id / end

Otherwise set1 and set2 will be lost according to your comment.

Or, how about making PMD do the right thing. I mean, even if the set actions are
scattered, PMD can collect it and apply in a single na attr?

> > > +		switch ((*actions)->type) {
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > +			flow_tcf_pedit_key_set_tp_port(*actions,
> > > +							&p_parser, item_flags);
> > > +			break;
> > > +		default:
> > > +			goto pedit_mnl_msg_done;
> > > +		}
> > > +	}
> > > +pedit_mnl_msg_done:
> > > +	p_parser.sel.action = TC_ACT_PIPE;
> > > +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
> > > +			sizeof(p_parser.sel) +
> > > +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
> > > +			&p_parser);
> > > +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
> > > +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
> > > +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
> > > +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
> > > +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
> > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
> > > +				 p_parser.keys_ex[i].htype);
> > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
> > > +				 p_parser.keys_ex[i].cmd);
> > > +		mnl_attr_nest_end(nl, na_pedit_key);
> > > +	}
> > > +	mnl_attr_nest_end(nl, na_pedit_keys);
> > > +	mnl_attr_nest_end(nl, na_act_options);
> > > +	(*actions)--;
> > > +	return 0;
> > > +}
> > > +
> > > +/**
> > > + * Calculate max memory size of one TC-pedit actions.
> > > + * One TC-pedit action can contain set of keys each defining
> > > + * a rewrite element (rte_flow action)
> > > + *
> > > + * @param[in] actions
> > > + *   actions specification.
> > > + * @param[inout] action_flags
> > > + *   actions flags
> > > + * @param[inout] size
> > > + *   accumulated size
> > > + * @return
> > > + *   Max memory size of one TC-pedit action
> > > + */
> > > +static int
> > > +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
> > > +				uint64_t *action_flags)
> > > +{
> > > +	int pedit_size = 0;
> > > +	int keys = 0;
> > > +	uint64_t flags = 0;
> > > +
> > > +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
> > > +		      SZ_NLATTR_STRZ_OF("pedit") +
> > > +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
> > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > +		switch ((*actions)->type) {
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > +			flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > +			flags |= MLX5_ACTION_SET_IPV4_DST;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > +			flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > +			flags |= MLX5_ACTION_SET_IPV6_DST;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > +			/* TCP is as same as UDP */
> > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > +			flags |= MLX5_ACTION_SET_TP_SRC;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > +			/* TCP is as same as UDP */
> > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > +			flags |= MLX5_ACTION_SET_TP_DST;
> > > +			break;
> > > +		default:
> > > +			goto get_pedit_action_size_done;
> > > +		}
> > > +	}
> > > +get_pedit_action_size_done:
> > > +	/* TCA_PEDIT_PARAMS_EX */
> > > +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
> > > +			keys * sizeof(struct tc_pedit_key));
> > 
> > > +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
> > > +	pedit_size += keys *
> > > +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
> > > +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
> > > +	(*action_flags) |= flags;
> > > +	(*actions)--;
> > > +	return pedit_size;
> > > +}
> > > +
> > >  /**
> > >   * Retrieve mask for pattern item.
> > >   *
> > > @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > >  			of_set_vlan_vid;
> > >  		const struct rte_flow_action_of_set_vlan_pcp *
> > >  			of_set_vlan_pcp;
> > > +		const struct rte_flow_action_set_ipv4 *set_ipv4;
> > > +		const struct rte_flow_action_set_ipv6 *set_ipv6;
> > >  	} conf;
> > >  	uint32_t item_flags = 0;
> > >  	uint32_t action_flags = 0;
> > > @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > >  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
> > >  			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
> > >  			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > +			action_flags |= MLX5_ACTION_SET_TP_SRC;
> > > +			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > +			action_flags |= MLX5_ACTION_SET_TP_DST;
> > > +			break;
> > >  		default:
> > >  			return rte_flow_error_set(error, ENOTSUP,
> > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > >  						  actions,
> > >  						  "action not supported");
> > >  		}
> > > +		if (IS_MODIFY_ACTION(actions->type)) {

This would be a redundant 'if' as classification is already done above. So, how
about adding a goto label at the end of this code - 'err_no_action_conf:', and
use goto above.  E.g.,

		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
			action_flags |= MLX5_ACTION_SET_TP_DST;
			if (!actions->conf)
				goto err_no_action_conf;
			break;

And if I may, can I ask you to add the same to RTE_FLOW_ACTION_TYPE_PORT_ID?

> > > +			if (!actions->conf)
> > > +				return rte_flow_error_set(error, ENOTSUP,
> > > +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> > > +						actions,
> > > +						"action configuration not set");
> > > +		}
> > > +	}
> > > +	if (action_flags &
> > > +	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
> > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
> > > +			return rte_flow_error_set(error, ENOTSUP,
> > > +						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > +						  actions,
> > > +						  "no ipv4 item found in"
> > > +						  " pattern");
> > > +	}
> > > +	if (action_flags &
> > > +	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
> > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
> > > +			return rte_flow_error_set(error, ENOTSUP,
> > > +				RTE_FLOW_ERROR_TYPE_ACTION,
> > > +				actions,
> > > +				"no ipv6 item found in pattern");
> > > +	}
> > > +	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
> > > +		if (!(item_flags &
> > > +		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
> > > +		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
> > > +			return rte_flow_error_set(error, ENOTSUP,
> > > +						RTE_FLOW_ERROR_TYPE_ACTION,
> > > +						actions,
> > > +						"no TCP/UDP item found in"
> > > +						" pattern");

All the errors you added, I think EINVAL would be a better fit?

> > Isn't this 'set' action compatible with drop action? No point of modifying
> > packet which will be dropped, isn't it?
> > 
> Yes, you are absolutely right :-)

I believe you'll add a validation code for that in the next version. :-)

> > >  	}
> > >  	return 0;
> > >  }
> > > @@ -840,6 +1172,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
> > >  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
> > >  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
> > >  			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > +			size += flow_tcf_get_pedit_actions_size(&actions,
> > > +								&flags);
> > > +			break;
> > >  		default:
> > >  			DRV_LOG(WARNING,
> > >  				"unsupported action %p type %d,"
> > > @@ -998,6 +1339,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > >  	struct nlattr *na_flower_act;
> > >  	struct nlattr *na_vlan_id = NULL;
> > >  	struct nlattr *na_vlan_priority = NULL;
> > > +	uint64_t item_flags = 0;
> > >  
> > >  	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
> > >  						PTOI_TABLE_SZ_MAX(dev)));
> > > @@ -1189,6 +1531,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > >  			}
> > >  			break;
> > >  		case RTE_FLOW_ITEM_TYPE_UDP:
> > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> > 
> > Let's add the same to the rest of items like flow_tcf_validate().
> > 
> OK!
> > 
> > Thanks,
> > Yongseok
> > 
> > >  			mask.udp = flow_tcf_item_mask
> > >  				(items, &rte_flow_item_udp_mask,
> > >  				 &flow_tcf_mask_supported.udp,
> > > @@ -1218,6 +1561,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > >  			}
> > >  			break;
> > >  		case RTE_FLOW_ITEM_TYPE_TCP:
> > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> > >  			mask.tcp = flow_tcf_item_mask
> > >  				(items, &rte_flow_item_tcp_mask,
> > >  				 &flow_tcf_mask_supported.tcp,
> > > @@ -1368,6 +1712,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > >  					conf.of_set_vlan_pcp->vlan_pcp;
> > >  			}
> > >  			break;
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > +			na_act_index =
> > > +				mnl_attr_nest_start(nlh, na_act_index_cur++);
> > > +			flow_tcf_create_pedit_mnl_msg(nlh,
> > > +						      &actions, item_flags);
> > > +			mnl_attr_nest_end(nlh, na_act_index);
> > > +			break;
> > >  		default:
> > >  			return rte_flow_error_set(error, ENOTSUP,
> > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > -- 
> > > 2.17.1
> > > 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] net/mlx5: eswitch-IP address UDP/TCP port rewrite
  2018-10-01 20:19     ` Yongseok Koh
@ 2018-10-08 11:22       ` Xiaoyu Min
  2018-10-08 22:08         ` Yongseok Koh
  0 siblings, 1 reply; 13+ messages in thread
From: Xiaoyu Min @ 2018-10-08 11:22 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: Shahaf Shuler, dev

On 18-10-02 04:19:00, Yongseok Koh wrote:
> On Sun, Sep 30, 2018 at 03:21:04PM +0800, Xiaoyu Min wrote:
> > On 18-09-29 07:03:33, Yongseok Koh wrote:
> > > On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
> > > > Offload the following rte_flow actions by inserting accordingly
> > > > E-Switch rules via TC Flower driver
> > > > 
> > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
> > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
> > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
> > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
> > > >  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
> > > >  - RTE_FLOW_ACTION_TYPE_SET_TP_DST
> > > 
> > > Can you put an example command of testpmd for the reference?
> > > 
> > Sure, I'll add.
> > 
> > > > Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> > > > ---
> > > > This patch bases on Rahul Lakkireddy's patchs[1][2] and
> > > > Yongseok Koh's patchset [3]
> > > > 
> > > > [1] https://patches.dpdk.org/patch/45191/
> > > > [2] https://patches.dpdk.org/patch/45192/
> > > > [3] https://patches.dpdk.org/project/dpdk/list/?series=1474
> > > > 
> > > > 
> > > >  drivers/net/mlx5/Makefile        |   5 +
> > > >  drivers/net/mlx5/meson.build     |   2 +
> > > >  drivers/net/mlx5/mlx5_flow.h     |   6 +
> > > >  drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
> > > >  4 files changed, 369 insertions(+)
> > > > 
> > > > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > > > index ca1de9f21..49b95e78e 100644
> > > > --- a/drivers/net/mlx5/Makefile
> > > > +++ b/drivers/net/mlx5/Makefile
> > > > @@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
> > > >  		linux/tc_act/tc_vlan.h \
> > > >  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
> > > >  		$(AUTOCONF_OUTPUT)
> > > > +	$Q sh -- '$<' '$@' \
> > > > +		HAVE_TC_ACT_PEDIT \
> > > > +		linux/tc_act/tc_pedit.h \
> > > > +		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> > > > +		$(AUTOCONF_OUTPUT)
> > > >  	$Q sh -- '$<' '$@' \
> > > >  		HAVE_SUPPORTED_40000baseKR4_Full \
> > > >  		/usr/include/linux/ethtool.h \
> > > > diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> > > > index fd93ac162..ef6a85101 100644
> > > > --- a/drivers/net/mlx5/meson.build
> > > > +++ b/drivers/net/mlx5/meson.build
> > > > @@ -182,6 +182,8 @@ if build
> > > >  		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
> > > >  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
> > > >  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> > > > +		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
> > > > +		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
> > > >  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
> > > >  		'RDMA_NL_NLDEV' ],
> > > >  		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
> > > > diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> > > > index 10d700a7f..be182a643 100644
> > > > --- a/drivers/net/mlx5/mlx5_flow.h
> > > > +++ b/drivers/net/mlx5/mlx5_flow.h
> > > > @@ -87,6 +87,12 @@
> > > >  #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
> > > >  #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
> > > >  #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
> > > > +#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
> > > > +#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
> > > > +#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
> > > > +#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
> > > > +#define MLX5_ACTION_SET_TP_SRC (1u << 15)
> > > > +#define MLX5_ACTION_SET_TP_DST (1u << 16)
> > > >  
> > > >  /* possible L3 layers protocols filtering. */
> > > >  #define MLX5_IP_PROTOCOL_TCP 6
> > > > diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > index 14376188e..85c92f369 100644
> > > > --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > @@ -53,6 +53,62 @@ struct tc_vlan {
> > > >  
> > > >  #endif /* HAVE_TC_ACT_VLAN */
> > > >  
> > > > +#ifdef HAVE_TC_ACT_PEDIT
> > > > +
> > > > +#include <linux/tc_act/tc_pedit.h>
> > > > +
> > > > +#else /* HAVE_TC_ACT_VLAN */
> > > > +enum {
> > > > +	TCA_PEDIT_UNSPEC,
> > > > +	TCA_PEDIT_TM,
> > > > +	TCA_PEDIT_PARMS,
> > > > +	TCA_PEDIT_PAD,
> > > > +	TCA_PEDIT_PARMS_EX,
> > > > +	TCA_PEDIT_KEYS_EX,
> > > > +	TCA_PEDIT_KEY_EX,
> > > > +	__TCA_PEDIT_MAX
> > > > +};
> > > > +
> > > > +enum {
> > > > +	TCA_PEDIT_KEY_EX_HTYPE = 1,
> > > > +	TCA_PEDIT_KEY_EX_CMD = 2,
> > > > +	__TCA_PEDIT_KEY_EX_MAX
> > > > +};
> > > > +
> > > > +enum pedit_header_type {
> > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
> > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
> > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
> > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
> > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
> > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
> > > > +	__PEDIT_HDR_TYPE_MAX,
> > > > +};
> > > > +
> > > > +enum pedit_cmd {
> > > > +	TCA_PEDIT_KEY_EX_CMD_SET = 0,
> > > > +	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
> > > > +	__PEDIT_CMD_MAX,
> > > > +};
> > > > +
> > > > +struct tc_pedit_key {
> > > > +	__u32           mask;  /* AND */
> > > > +	__u32           val;   /*XOR */
> > > > +	__u32           off;  /*offset */
> > > > +	__u32           at;
> > > > +	__u32           offmask;
> > > > +	__u32           shift;
> > > > +};
> > > > +
> > > > +struct tc_pedit_sel {
> > > > +	tc_gen;
> > > > +	unsigned char           nkeys;
> > > > +	unsigned char           flags;
> > > > +	struct tc_pedit_key     keys[0];
> > > > +};
> > > > +
> > > > +#endif /* HAVE_TC_ACT_VLAN */
> > > > +
> > > >  /* Normally found in linux/netlink.h. */
> > > >  #ifndef NETLINK_CAP_ACK
> > > >  #define NETLINK_CAP_ACK 10
> > > > @@ -153,6 +209,14 @@ struct tc_vlan {
> > > >  #define IPV6_ADDR_LEN 16
> > > >  #endif
> > > >  
> > > > +#ifndef IPV4_ADDR_LEN
> > > > +#define IPV4_ADDR_LEN 4
> > > > +#endif
> > > > +
> > > > +#ifndef TP_PORT_LEN
> > > > +#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
> > > > +#endif
> > > > +
> > > >  /** Empty masks for known item types. */
> > > >  static const union {
> > > >  	struct rte_flow_item_port_id port_id;
> > > > @@ -227,6 +291,220 @@ struct flow_tcf_ptoi {
> > > >  
> > > >  #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
> > > >  
> > > > +#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
> > > > +		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
> > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
> > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
> > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
> > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
> > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
> > > > +		1 : 0; })
> > > 
> > > The reason why you need this complex multi-conditional macro is that
> > > RTE_FLOW_ACTION_TYPE_* isn't a bitmask. But, as this actions will be converted
> > > to MLX5_ACTION_* which is a bitmask, you can use that instead. Then, this
> > > would be enough to be:
> > > 
> > > #define MLX5_TCF_SET_ACTIONS \
> > > 	(MLX5_ACTION_SET_IPV4_SRC | ...)
> > > 
> > > And in the flow_tcf_validate() below,
> > > 	if (action_flags & MLX5_TCF_SET_ACTIONS) {
> > > 		...
> > > 	}
> > > 
> > Well, I did consider using bitmask but action_flags is an _accumulated_ variable,
> > records all the actions parsed so far.
> > But, here, I need to know what is the _current_ action and whether it belongs to modify
> > actions. If using bitmask, Looks like a new variable (i.e current_action) needed (?)
> > 
> > case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> >     current_action = MLX5_ACTION_SET_IPV4_SRC;
> >     .....
> > 
> > if (current_action & MLX5_TCF_SET_ACTIONS) ...
> >  ...
> > 
> > action_flags |= current_action;
> > 
> > I feel more code change needed or you think it's worth?
> 
> Understood what you meant.
> Please see my comment below in the flow_tcf_validate().
> 
> > > And please note that I'm going to rename MLX5_ACTION_* to MLX5_FLOW_ACTION_*.
> > > Please refer to "net/mlx5: rename flow macros" in PR #885. You might need to
> > > rebase it again.
> > > 
> > Sure, I'll rebase on it
> > 
> > > > +#define MAX_PEDIT_KEYS (128)
> > > > +#define SZ_PEDIT_KEY_VAL (4)
> > > > +
> > > > +struct pedit_key_ex {
> > > > +	enum pedit_header_type htype;
> > > > +	enum pedit_cmd cmd;
> > > > +};
> > > > +
> > > > +struct pedit_parser {
> > > > +	struct tc_pedit_sel sel;
> > > > +	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
> > > > +	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
> > > > +};
> > > > +
> > > > +static int
> > > > +flow_tcf_calc_pedit_keys(const uint64_t size)
> > > 
> > > Please add documentation by comment for every funcs you add.
> > > Refer to the other existing ones for formality.
> > > 
> > Sure!
> > > > +{
> > > > +	int keys = (size / SZ_PEDIT_KEY_VAL) +
> > > > +		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
> > > 
> > > Indentation.
> > > 
> > Will fix it
> > > > +	return keys;
> > > > +}
> > > > +
> > > > +static void
> > > > +flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
> > > > +				struct pedit_parser *p_parser,
> > > > +				uint64_t item_flags)
> > > > +{
> > > > +	int idx = p_parser->sel.nkeys;
> > > > +
> > > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
> > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
> > > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
> > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
> > > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > +	p_parser->keys[idx].off =
> > > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;
> > > 
> > > 	assert(offsetof(struct tcp_hdr, src_port) ==
> > > 	       offsetof(struct udp_hdr, src_port));
> > > 	assert(offsetof(struct tcp_hdr, dst_port) ==
> > > 	       offsetof(struct udp_hdr, dst_port));
> > > 	p_parser->keys[idx].off =
> > > 		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
> > > 		offsetof(struct tcp_hdr, src_port) :
> > > 		offsetof(struct tcp_hdr, dst_port);
> > > 
> > > assert() is just to be informative.
> > > And how about src first like others below?
> > > 
> > Yes, I will update above.
> > 
> > > > +	p_parser->keys[idx].mask = 0xFFFF0000;
> > > > +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
> > > > +			actions->conf)->port;
> > > 
> > > Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
> > > above - 0xffff0000.
> > > 
> > So it should be as following ?
> > p_parser->keys[idx].val = (__u32)((const struct rte_flow_action_set_tp *))
> >                 actions->conf)->port;
> 
> You can figure it out by actual tests but I think the following would be right.
> 	p_parser->keys[idx].val =
> 		rte_cpu_to_be_32(((const struct rte_flow_action_set_tp *)
> 				  actions->conf)->port);
> 
> Please verify it by testing anyway.
> 
No, it doesn't work correctly if it's converted to BE.
As my understanding the Netlink message should be expressed in host-byte order (?)

> > > > +	p_parser->sel.nkeys = (++idx);
> > > > +}
> > > > +
> > > > +static void
> > > > +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
> > > > +				 struct pedit_parser *p_parser)
> > > > +{
> > > > +	int idx = p_parser->sel.nkeys;
> > > > +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > +	int off_base =
> > > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
> > > 
> > > offsetof(struct ipv6_hdr, src_addr) :
> > > offsetof(struct ipv6_hdr, dst_addr);
> > > 
> > Got it!
> > > > +	const struct rte_flow_action_set_ipv6 *conf =
> > > > +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
> > > > +
> > > > +	for (int i = 0; i < keys; i++, idx++) {
> > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
> > > > +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
> > > > +		p_parser->keys[idx].mask = ~UINT32_MAX;
> > > > +		memcpy(&p_parser->keys[idx].val,
> > > > +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
> > > > +			SZ_PEDIT_KEY_VAL);
> > > > +	}
> > > > +	p_parser->sel.nkeys += keys;
> > > > +}
> > > > +
> > > > +static void
> > > > +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
> > > 
> > > How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
> > > Same comment for ipv6 and tp_port.
> > > 
> > What's the benefit by using rte_flow_action_set_ipv4 and how I know it's
> > for src or dst address ?
> 
> Just to make the function neat but I overlooked that you still need
> actions->type. Please disregard my previous comment.
> 
OK~
> > > > +				 struct pedit_parser *p_parser)
> > > > +{
> > > > +	int idx = p_parser->sel.nkeys;
> > > > +
> > > > +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
> > > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > +	p_parser->keys[idx].off =
> > > > +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
> > > 
> > > offsetof(struct ipv4_hdr, src_addr) :
> > > offsetof(struct ipv4_hdr, dst_addr);
> > > 
> > Got it!
> > > > +	p_parser->keys[idx].mask = ~UINT32_MAX;
> > > > +	p_parser->keys[idx].val =
> > > > +		((const struct rte_flow_action_set_ipv4 *)
> > > > +		 actions->conf)->ipv4_addr;
> > > > +	p_parser->sel.nkeys = (++idx);
> > > > +}
> > > > +
> > > > +static int
> > > > +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
> > > > +			      const struct rte_flow_action **actions,
> > > > +			      uint64_t item_flags)
> > > > +{
> > > > +	struct pedit_parser p_parser;
> > > > +
> > > > +	memset(&p_parser, 0, sizeof(p_parser));
> > > > +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
> > > > +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
> > > > +							    TCA_ACT_OPTIONS);
> > > > +	/* all modify header actions should be in one tc-pedit action */
> > > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > 
> > > It seems that you want to aggregate all the pedit actions and form a single
> > > na attr. But what if rte_flow_action_set_* are not contiguous? E.g.
> > > 
> > > flow create ... actions set1 / set2 / port_id / set3 / end
> > > 
> > > Then, it would have two pedit na attrs. Is that okay?
> > > Or, need to think about another way?
> > > 
> > > Same will happen in flow_tcf_get_pedit_actions_size().
> > > 
> > It's OK if we have more than one pedit na attrs.
> > _BUT_ only last pedit take effect based on my experiment
> 
> Then, shouldn't we give some warning to user in validation? So that user can
> have right expectation and reorder the actions as their intention like:
> 	flow create ... actions set1 / set2 / set3 / port_id / end
> 
> Otherwise set1 and set2 will be lost according to your comment.
> 
I prefer to give error to user in validation because this is simple.
> Or, how about making PMD do the right thing. I mean, even if the set actions are
> scattered, PMD can collect it and apply in a single na attr?
> 
My feeling is the above approach will be (become) complex. It looks like we introduce
new functionality which re-order all actions, something like rss_expand. 
> > > > +		switch ((*actions)->type) {
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > +			flow_tcf_pedit_key_set_tp_port(*actions,
> > > > +							&p_parser, item_flags);
> > > > +			break;
> > > > +		default:
> > > > +			goto pedit_mnl_msg_done;
> > > > +		}
> > > > +	}
> > > > +pedit_mnl_msg_done:
> > > > +	p_parser.sel.action = TC_ACT_PIPE;
> > > > +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
> > > > +			sizeof(p_parser.sel) +
> > > > +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
> > > > +			&p_parser);
> > > > +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
> > > > +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
> > > > +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
> > > > +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
> > > > +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
> > > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
> > > > +				 p_parser.keys_ex[i].htype);
> > > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
> > > > +				 p_parser.keys_ex[i].cmd);
> > > > +		mnl_attr_nest_end(nl, na_pedit_key);
> > > > +	}
> > > > +	mnl_attr_nest_end(nl, na_pedit_keys);
> > > > +	mnl_attr_nest_end(nl, na_act_options);
> > > > +	(*actions)--;
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +/**
> > > > + * Calculate max memory size of one TC-pedit actions.
> > > > + * One TC-pedit action can contain set of keys each defining
> > > > + * a rewrite element (rte_flow action)
> > > > + *
> > > > + * @param[in] actions
> > > > + *   actions specification.
> > > > + * @param[inout] action_flags
> > > > + *   actions flags
> > > > + * @param[inout] size
> > > > + *   accumulated size
> > > > + * @return
> > > > + *   Max memory size of one TC-pedit action
> > > > + */
> > > > +static int
> > > > +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
> > > > +				uint64_t *action_flags)
> > > > +{
> > > > +	int pedit_size = 0;
> > > > +	int keys = 0;
> > > > +	uint64_t flags = 0;
> > > > +
> > > > +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
> > > > +		      SZ_NLATTR_STRZ_OF("pedit") +
> > > > +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
> > > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > > +		switch ((*actions)->type) {
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > > +			flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > > +			flags |= MLX5_ACTION_SET_IPV4_DST;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > +			flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > +			flags |= MLX5_ACTION_SET_IPV6_DST;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > +			/* TCP is as same as UDP */
> > > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > > +			flags |= MLX5_ACTION_SET_TP_SRC;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > +			/* TCP is as same as UDP */
> > > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > > +			flags |= MLX5_ACTION_SET_TP_DST;
> > > > +			break;
> > > > +		default:
> > > > +			goto get_pedit_action_size_done;
> > > > +		}
> > > > +	}
> > > > +get_pedit_action_size_done:
> > > > +	/* TCA_PEDIT_PARAMS_EX */
> > > > +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
> > > > +			keys * sizeof(struct tc_pedit_key));
> > > 
> > > > +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
> > > > +	pedit_size += keys *
> > > > +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
> > > > +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
> > > > +	(*action_flags) |= flags;
> > > > +	(*actions)--;
> > > > +	return pedit_size;
> > > > +}
> > > > +
> > > >  /**
> > > >   * Retrieve mask for pattern item.
> > > >   *
> > > > @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > > >  			of_set_vlan_vid;
> > > >  		const struct rte_flow_action_of_set_vlan_pcp *
> > > >  			of_set_vlan_pcp;
> > > > +		const struct rte_flow_action_set_ipv4 *set_ipv4;
> > > > +		const struct rte_flow_action_set_ipv6 *set_ipv6;
> > > >  	} conf;
> > > >  	uint32_t item_flags = 0;
> > > >  	uint32_t action_flags = 0;
> > > > @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > > >  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
> > > >  			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
> > > >  			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > +			action_flags |= MLX5_ACTION_SET_TP_SRC;
> > > > +			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > +			action_flags |= MLX5_ACTION_SET_TP_DST;
> > > > +			break;
> > > >  		default:
> > > >  			return rte_flow_error_set(error, ENOTSUP,
> > > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > >  						  actions,
> > > >  						  "action not supported");
> > > >  		}
> > > > +		if (IS_MODIFY_ACTION(actions->type)) {
> 
> This would be a redundant 'if' as classification is already done above. So, how
> about adding a goto label at the end of this code - 'err_no_action_conf:', and
> use goto above.  E.g.,
> 
> 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> 			action_flags |= MLX5_ACTION_SET_TP_DST;
> 			if (!actions->conf)
> 				goto err_no_action_conf;
> 			break;
> 
In some level I do agree with you it's redundant. But things like this kind of
redundancy is not avoidable. I mean if we use "goto err_no_action_conf", the
"if (!actions->conf) goto err_no_action_conf" has to be repeated in each "case"
which needs to check conf or you think it's acceptable ?
> And if I may, can I ask you to add the same to RTE_FLOW_ACTION_TYPE_PORT_ID?
> 
Yes, I will add.
> > > > +			if (!actions->conf)
> > > > +				return rte_flow_error_set(error, ENOTSUP,
> > > > +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> > > > +						actions,
> > > > +						"action configuration not set");
> > > > +		}
> > > > +	}
> > > > +	if (action_flags &
> > > > +	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
> > > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
> > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > +						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > +						  actions,
> > > > +						  "no ipv4 item found in"
> > > > +						  " pattern");
> > > > +	}
> > > > +	if (action_flags &
> > > > +	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
> > > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
> > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > +				RTE_FLOW_ERROR_TYPE_ACTION,
> > > > +				actions,
> > > > +				"no ipv6 item found in pattern");
> > > > +	}
> > > > +	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
> > > > +		if (!(item_flags &
> > > > +		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
> > > > +		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
> > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > +						RTE_FLOW_ERROR_TYPE_ACTION,
> > > > +						actions,
> > > > +						"no TCP/UDP item found in"
> > > > +						" pattern");
> 
> All the errors you added, I think EINVAL would be a better fit?
> 
Yes, EINVAL should be better.
> > > Isn't this 'set' action compatible with drop action? No point of modifying
> > > packet which will be dropped, isn't it?
> > > 
> > Yes, you are absolutely right :-)
> 
> I believe you'll add a validation code for that in the next version. :-)
> 
Of course ;-)
> > > >  	}
> > > >  	return 0;
> > > >  }
> > > > @@ -840,6 +1172,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
> > > >  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
> > > >  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
> > > >  			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > +			size += flow_tcf_get_pedit_actions_size(&actions,
> > > > +								&flags);
> > > > +			break;
> > > >  		default:
> > > >  			DRV_LOG(WARNING,
> > > >  				"unsupported action %p type %d,"
> > > > @@ -998,6 +1339,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > >  	struct nlattr *na_flower_act;
> > > >  	struct nlattr *na_vlan_id = NULL;
> > > >  	struct nlattr *na_vlan_priority = NULL;
> > > > +	uint64_t item_flags = 0;
> > > >  
> > > >  	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
> > > >  						PTOI_TABLE_SZ_MAX(dev)));
> > > > @@ -1189,6 +1531,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > >  			}
> > > >  			break;
> > > >  		case RTE_FLOW_ITEM_TYPE_UDP:
> > > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> > > 
> > > Let's add the same to the rest of items like flow_tcf_validate().
> > > 
> > OK!
> > > 
> > > Thanks,
> > > Yongseok
> > > 
> > > >  			mask.udp = flow_tcf_item_mask
> > > >  				(items, &rte_flow_item_udp_mask,
> > > >  				 &flow_tcf_mask_supported.udp,
> > > > @@ -1218,6 +1561,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > >  			}
> > > >  			break;
> > > >  		case RTE_FLOW_ITEM_TYPE_TCP:
> > > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> > > >  			mask.tcp = flow_tcf_item_mask
> > > >  				(items, &rte_flow_item_tcp_mask,
> > > >  				 &flow_tcf_mask_supported.tcp,
> > > > @@ -1368,6 +1712,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > >  					conf.of_set_vlan_pcp->vlan_pcp;
> > > >  			}
> > > >  			break;
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > +			na_act_index =
> > > > +				mnl_attr_nest_start(nlh, na_act_index_cur++);
> > > > +			flow_tcf_create_pedit_mnl_msg(nlh,
> > > > +						      &actions, item_flags);
> > > > +			mnl_attr_nest_end(nlh, na_act_index);
> > > > +			break;
> > > >  		default:
> > > >  			return rte_flow_error_set(error, ENOTSUP,
> > > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > -- 
> > > > 2.17.1
> > > > 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] net/mlx5: eswitch-IP address UDP/TCP port rewrite
  2018-10-08 11:22       ` Xiaoyu Min
@ 2018-10-08 22:08         ` Yongseok Koh
  2018-10-09  7:03           ` Jack Min
  0 siblings, 1 reply; 13+ messages in thread
From: Yongseok Koh @ 2018-10-08 22:08 UTC (permalink / raw)
  To: Jack Min; +Cc: Shahaf Shuler, dev

On Mon, Oct 08, 2018 at 07:22:03PM +0800, Xiaoyu Min wrote:
> On 18-10-02 04:19:00, Yongseok Koh wrote:
> > On Sun, Sep 30, 2018 at 03:21:04PM +0800, Xiaoyu Min wrote:
> > > On 18-09-29 07:03:33, Yongseok Koh wrote:
> > > > On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
> > > > > Offload the following rte_flow actions by inserting accordingly
> > > > > E-Switch rules via TC Flower driver
> > > > > 
> > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
> > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
> > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
> > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
> > > > >  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
> > > > >  - RTE_FLOW_ACTION_TYPE_SET_TP_DST
> > > > 
> > > > Can you put an example command of testpmd for the reference?
> > > > 
> > > Sure, I'll add.
> > > 
> > > > > Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> > > > > ---
> > > > > This patch bases on Rahul Lakkireddy's patchs[1][2] and
> > > > > Yongseok Koh's patchset [3]
> > > > > 
> > > > > [1] https://patches.dpdk.org/patch/45191/
> > > > > [2] https://patches.dpdk.org/patch/45192/
> > > > > [3] https://patches.dpdk.org/project/dpdk/list/?series=1474
> > > > > 
> > > > > 
> > > > >  drivers/net/mlx5/Makefile        |   5 +
> > > > >  drivers/net/mlx5/meson.build     |   2 +
> > > > >  drivers/net/mlx5/mlx5_flow.h     |   6 +
> > > > >  drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
> > > > >  4 files changed, 369 insertions(+)
> > > > > 
> > > > > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > > > > index ca1de9f21..49b95e78e 100644
> > > > > --- a/drivers/net/mlx5/Makefile
> > > > > +++ b/drivers/net/mlx5/Makefile
> > > > > @@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
> > > > >  		linux/tc_act/tc_vlan.h \
> > > > >  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
> > > > >  		$(AUTOCONF_OUTPUT)
> > > > > +	$Q sh -- '$<' '$@' \
> > > > > +		HAVE_TC_ACT_PEDIT \
> > > > > +		linux/tc_act/tc_pedit.h \
> > > > > +		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> > > > > +		$(AUTOCONF_OUTPUT)
> > > > >  	$Q sh -- '$<' '$@' \
> > > > >  		HAVE_SUPPORTED_40000baseKR4_Full \
> > > > >  		/usr/include/linux/ethtool.h \
> > > > > diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> > > > > index fd93ac162..ef6a85101 100644
> > > > > --- a/drivers/net/mlx5/meson.build
> > > > > +++ b/drivers/net/mlx5/meson.build
> > > > > @@ -182,6 +182,8 @@ if build
> > > > >  		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
> > > > >  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
> > > > >  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> > > > > +		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
> > > > > +		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
> > > > >  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
> > > > >  		'RDMA_NL_NLDEV' ],
> > > > >  		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
> > > > > diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> > > > > index 10d700a7f..be182a643 100644
> > > > > --- a/drivers/net/mlx5/mlx5_flow.h
> > > > > +++ b/drivers/net/mlx5/mlx5_flow.h
> > > > > @@ -87,6 +87,12 @@
> > > > >  #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
> > > > >  #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
> > > > >  #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
> > > > > +#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
> > > > > +#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
> > > > > +#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
> > > > > +#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
> > > > > +#define MLX5_ACTION_SET_TP_SRC (1u << 15)
> > > > > +#define MLX5_ACTION_SET_TP_DST (1u << 16)
> > > > >  
> > > > >  /* possible L3 layers protocols filtering. */
> > > > >  #define MLX5_IP_PROTOCOL_TCP 6
> > > > > diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > > index 14376188e..85c92f369 100644
> > > > > --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > > +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > > @@ -53,6 +53,62 @@ struct tc_vlan {
> > > > >  
> > > > >  #endif /* HAVE_TC_ACT_VLAN */
> > > > >  
> > > > > +#ifdef HAVE_TC_ACT_PEDIT
> > > > > +
> > > > > +#include <linux/tc_act/tc_pedit.h>
> > > > > +
> > > > > +#else /* HAVE_TC_ACT_VLAN */
> > > > > +enum {
> > > > > +	TCA_PEDIT_UNSPEC,
> > > > > +	TCA_PEDIT_TM,
> > > > > +	TCA_PEDIT_PARMS,
> > > > > +	TCA_PEDIT_PAD,
> > > > > +	TCA_PEDIT_PARMS_EX,
> > > > > +	TCA_PEDIT_KEYS_EX,
> > > > > +	TCA_PEDIT_KEY_EX,
> > > > > +	__TCA_PEDIT_MAX
> > > > > +};
> > > > > +
> > > > > +enum {
> > > > > +	TCA_PEDIT_KEY_EX_HTYPE = 1,
> > > > > +	TCA_PEDIT_KEY_EX_CMD = 2,
> > > > > +	__TCA_PEDIT_KEY_EX_MAX
> > > > > +};
> > > > > +
> > > > > +enum pedit_header_type {
> > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
> > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
> > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
> > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
> > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
> > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
> > > > > +	__PEDIT_HDR_TYPE_MAX,
> > > > > +};
> > > > > +
> > > > > +enum pedit_cmd {
> > > > > +	TCA_PEDIT_KEY_EX_CMD_SET = 0,
> > > > > +	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
> > > > > +	__PEDIT_CMD_MAX,
> > > > > +};
> > > > > +
> > > > > +struct tc_pedit_key {
> > > > > +	__u32           mask;  /* AND */
> > > > > +	__u32           val;   /*XOR */
> > > > > +	__u32           off;  /*offset */
> > > > > +	__u32           at;
> > > > > +	__u32           offmask;
> > > > > +	__u32           shift;
> > > > > +};
> > > > > +
> > > > > +struct tc_pedit_sel {
> > > > > +	tc_gen;
> > > > > +	unsigned char           nkeys;
> > > > > +	unsigned char           flags;
> > > > > +	struct tc_pedit_key     keys[0];
> > > > > +};
> > > > > +
> > > > > +#endif /* HAVE_TC_ACT_VLAN */
> > > > > +
> > > > >  /* Normally found in linux/netlink.h. */
> > > > >  #ifndef NETLINK_CAP_ACK
> > > > >  #define NETLINK_CAP_ACK 10
> > > > > @@ -153,6 +209,14 @@ struct tc_vlan {
> > > > >  #define IPV6_ADDR_LEN 16
> > > > >  #endif
> > > > >  
> > > > > +#ifndef IPV4_ADDR_LEN
> > > > > +#define IPV4_ADDR_LEN 4
> > > > > +#endif
> > > > > +
> > > > > +#ifndef TP_PORT_LEN
> > > > > +#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
> > > > > +#endif
> > > > > +
> > > > >  /** Empty masks for known item types. */
> > > > >  static const union {
> > > > >  	struct rte_flow_item_port_id port_id;
> > > > > @@ -227,6 +291,220 @@ struct flow_tcf_ptoi {
> > > > >  
> > > > >  #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
> > > > >  
> > > > > +#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
> > > > > +		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
> > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
> > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
> > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
> > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
> > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
> > > > > +		1 : 0; })
> > > > 
> > > > The reason why you need this complex multi-conditional macro is that
> > > > RTE_FLOW_ACTION_TYPE_* isn't a bitmask. But, as this actions will be converted
> > > > to MLX5_ACTION_* which is a bitmask, you can use that instead. Then, this
> > > > would be enough to be:
> > > > 
> > > > #define MLX5_TCF_SET_ACTIONS \
> > > > 	(MLX5_ACTION_SET_IPV4_SRC | ...)
> > > > 
> > > > And in the flow_tcf_validate() below,
> > > > 	if (action_flags & MLX5_TCF_SET_ACTIONS) {
> > > > 		...
> > > > 	}
> > > > 
> > > Well, I did consider using bitmask but action_flags is an _accumulated_ variable,
> > > records all the actions parsed so far.
> > > But, here, I need to know what is the _current_ action and whether it belongs to modify
> > > actions. If using bitmask, Looks like a new variable (i.e current_action) needed (?)
> > > 
> > > case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > >     current_action = MLX5_ACTION_SET_IPV4_SRC;
> > >     .....
> > > 
> > > if (current_action & MLX5_TCF_SET_ACTIONS) ...
> > >  ...
> > > 
> > > action_flags |= current_action;
> > > 
> > > I feel more code change needed or you think it's worth?
> > 
> > Understood what you meant.
> > Please see my comment below in the flow_tcf_validate().
> > 
> > > > And please note that I'm going to rename MLX5_ACTION_* to MLX5_FLOW_ACTION_*.
> > > > Please refer to "net/mlx5: rename flow macros" in PR #885. You might need to
> > > > rebase it again.
> > > > 
> > > Sure, I'll rebase on it
> > > 
> > > > > +#define MAX_PEDIT_KEYS (128)
> > > > > +#define SZ_PEDIT_KEY_VAL (4)
> > > > > +
> > > > > +struct pedit_key_ex {
> > > > > +	enum pedit_header_type htype;
> > > > > +	enum pedit_cmd cmd;
> > > > > +};
> > > > > +
> > > > > +struct pedit_parser {
> > > > > +	struct tc_pedit_sel sel;
> > > > > +	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
> > > > > +	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
> > > > > +};
> > > > > +
> > > > > +static int
> > > > > +flow_tcf_calc_pedit_keys(const uint64_t size)
> > > > 
> > > > Please add documentation by comment for every funcs you add.
> > > > Refer to the other existing ones for formality.
> > > > 
> > > Sure!
> > > > > +{
> > > > > +	int keys = (size / SZ_PEDIT_KEY_VAL) +
> > > > > +		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
> > > > 
> > > > Indentation.
> > > > 
> > > Will fix it
> > > > > +	return keys;
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
> > > > > +				struct pedit_parser *p_parser,
> > > > > +				uint64_t item_flags)
> > > > > +{
> > > > > +	int idx = p_parser->sel.nkeys;
> > > > > +
> > > > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
> > > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
> > > > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
> > > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
> > > > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > > +	p_parser->keys[idx].off =
> > > > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;
> > > > 
> > > > 	assert(offsetof(struct tcp_hdr, src_port) ==
> > > > 	       offsetof(struct udp_hdr, src_port));
> > > > 	assert(offsetof(struct tcp_hdr, dst_port) ==
> > > > 	       offsetof(struct udp_hdr, dst_port));
> > > > 	p_parser->keys[idx].off =
> > > > 		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
> > > > 		offsetof(struct tcp_hdr, src_port) :
> > > > 		offsetof(struct tcp_hdr, dst_port);
> > > > 
> > > > assert() is just to be informative.
> > > > And how about src first like others below?
> > > > 
> > > Yes, I will update above.
> > > 
> > > > > +	p_parser->keys[idx].mask = 0xFFFF0000;
> > > > > +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
> > > > > +			actions->conf)->port;
> > > > 
> > > > Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
> > > > above - 0xffff0000.
> > > > 
> > > So it should be as following ?
> > > p_parser->keys[idx].val = (__u32)((const struct rte_flow_action_set_tp *))
> > >                 actions->conf)->port;
> > 
> > You can figure it out by actual tests but I think the following would be right.
> > 	p_parser->keys[idx].val =
> > 		rte_cpu_to_be_32(((const struct rte_flow_action_set_tp *)
> > 				  actions->conf)->port);
> > 
> > Please verify it by testing anyway.
> > 
> No, it doesn't work correctly if it's converted to BE.
> As my understanding the Netlink message should be expressed in host-byte order (?)

As far as I know rte_flow takes network-byte order for args and tcf na msg takes
host-byte order. Please refer to the "override_na_vlan_id:" in mlx5_flow_tcf.c,
rte_be_to_cpu_16() is used there.  I'm still confusing why the mask is
0xffff0000 above. Please make sure your code works correctly by multiple test
cases and hopefully I can hear clear explanation what is right and why.

> > > > > +	p_parser->sel.nkeys = (++idx);
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
> > > > > +				 struct pedit_parser *p_parser)
> > > > > +{
> > > > > +	int idx = p_parser->sel.nkeys;
> > > > > +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > > +	int off_base =
> > > > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
> > > > 
> > > > offsetof(struct ipv6_hdr, src_addr) :
> > > > offsetof(struct ipv6_hdr, dst_addr);
> > > > 
> > > Got it!
> > > > > +	const struct rte_flow_action_set_ipv6 *conf =
> > > > > +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
> > > > > +
> > > > > +	for (int i = 0; i < keys; i++, idx++) {
> > > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
> > > > > +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > > +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
> > > > > +		p_parser->keys[idx].mask = ~UINT32_MAX;
> > > > > +		memcpy(&p_parser->keys[idx].val,
> > > > > +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
> > > > > +			SZ_PEDIT_KEY_VAL);
> > > > > +	}
> > > > > +	p_parser->sel.nkeys += keys;
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
> > > > 
> > > > How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
> > > > Same comment for ipv6 and tp_port.
> > > > 
> > > What's the benefit by using rte_flow_action_set_ipv4 and how I know it's
> > > for src or dst address ?
> > 
> > Just to make the function neat but I overlooked that you still need
> > actions->type. Please disregard my previous comment.
> > 
> OK~
> > > > > +				 struct pedit_parser *p_parser)
> > > > > +{
> > > > > +	int idx = p_parser->sel.nkeys;
> > > > > +
> > > > > +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
> > > > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > > +	p_parser->keys[idx].off =
> > > > > +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
> > > > 
> > > > offsetof(struct ipv4_hdr, src_addr) :
> > > > offsetof(struct ipv4_hdr, dst_addr);
> > > > 
> > > Got it!
> > > > > +	p_parser->keys[idx].mask = ~UINT32_MAX;
> > > > > +	p_parser->keys[idx].val =
> > > > > +		((const struct rte_flow_action_set_ipv4 *)
> > > > > +		 actions->conf)->ipv4_addr;
> > > > > +	p_parser->sel.nkeys = (++idx);
> > > > > +}
> > > > > +
> > > > > +static int
> > > > > +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
> > > > > +			      const struct rte_flow_action **actions,
> > > > > +			      uint64_t item_flags)
> > > > > +{
> > > > > +	struct pedit_parser p_parser;
> > > > > +
> > > > > +	memset(&p_parser, 0, sizeof(p_parser));
> > > > > +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
> > > > > +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
> > > > > +							    TCA_ACT_OPTIONS);
> > > > > +	/* all modify header actions should be in one tc-pedit action */
> > > > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > > 
> > > > It seems that you want to aggregate all the pedit actions and form a single
> > > > na attr. But what if rte_flow_action_set_* are not contiguous? E.g.
> > > > 
> > > > flow create ... actions set1 / set2 / port_id / set3 / end
> > > > 
> > > > Then, it would have two pedit na attrs. Is that okay?
> > > > Or, need to think about another way?
> > > > 
> > > > Same will happen in flow_tcf_get_pedit_actions_size().
> > > > 
> > > It's OK if we have more than one pedit na attrs.
> > > _BUT_ only last pedit take effect based on my experiment
> > 
> > Then, shouldn't we give some warning to user in validation? So that user can
> > have right expectation and reorder the actions as their intention like:
> > 	flow create ... actions set1 / set2 / set3 / port_id / end
> > 
> > Otherwise set1 and set2 will be lost according to your comment.
> > 
> I prefer to give error to user in validation because this is simple.

Good.

> > Or, how about making PMD do the right thing. I mean, even if the set actions are
> > scattered, PMD can collect it and apply in a single na attr?
> > 
> My feeling is the above approach will be (become) complex. It looks like we introduce
> new functionality which re-order all actions, something like rss_expand. 

+1

> > > > > +		switch ((*actions)->type) {
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > +			flow_tcf_pedit_key_set_tp_port(*actions,
> > > > > +							&p_parser, item_flags);
> > > > > +			break;
> > > > > +		default:
> > > > > +			goto pedit_mnl_msg_done;
> > > > > +		}
> > > > > +	}
> > > > > +pedit_mnl_msg_done:
> > > > > +	p_parser.sel.action = TC_ACT_PIPE;
> > > > > +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
> > > > > +			sizeof(p_parser.sel) +
> > > > > +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
> > > > > +			&p_parser);
> > > > > +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
> > > > > +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
> > > > > +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
> > > > > +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
> > > > > +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
> > > > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
> > > > > +				 p_parser.keys_ex[i].htype);
> > > > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
> > > > > +				 p_parser.keys_ex[i].cmd);
> > > > > +		mnl_attr_nest_end(nl, na_pedit_key);
> > > > > +	}
> > > > > +	mnl_attr_nest_end(nl, na_pedit_keys);
> > > > > +	mnl_attr_nest_end(nl, na_act_options);
> > > > > +	(*actions)--;
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * Calculate max memory size of one TC-pedit actions.
> > > > > + * One TC-pedit action can contain set of keys each defining
> > > > > + * a rewrite element (rte_flow action)
> > > > > + *
> > > > > + * @param[in] actions
> > > > > + *   actions specification.
> > > > > + * @param[inout] action_flags
> > > > > + *   actions flags
> > > > > + * @param[inout] size
> > > > > + *   accumulated size
> > > > > + * @return
> > > > > + *   Max memory size of one TC-pedit action
> > > > > + */
> > > > > +static int
> > > > > +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
> > > > > +				uint64_t *action_flags)
> > > > > +{
> > > > > +	int pedit_size = 0;
> > > > > +	int keys = 0;
> > > > > +	uint64_t flags = 0;
> > > > > +
> > > > > +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
> > > > > +		      SZ_NLATTR_STRZ_OF("pedit") +
> > > > > +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
> > > > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > > > +		switch ((*actions)->type) {
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > > > +			flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > > > +			flags |= MLX5_ACTION_SET_IPV4_DST;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > > +			flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > > +			flags |= MLX5_ACTION_SET_IPV6_DST;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > +			/* TCP is as same as UDP */
> > > > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > > > +			flags |= MLX5_ACTION_SET_TP_SRC;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > +			/* TCP is as same as UDP */
> > > > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > > > +			flags |= MLX5_ACTION_SET_TP_DST;
> > > > > +			break;
> > > > > +		default:
> > > > > +			goto get_pedit_action_size_done;
> > > > > +		}
> > > > > +	}
> > > > > +get_pedit_action_size_done:
> > > > > +	/* TCA_PEDIT_PARAMS_EX */
> > > > > +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
> > > > > +			keys * sizeof(struct tc_pedit_key));
> > > > 
> > > > > +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
> > > > > +	pedit_size += keys *
> > > > > +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
> > > > > +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
> > > > > +	(*action_flags) |= flags;
> > > > > +	(*actions)--;
> > > > > +	return pedit_size;
> > > > > +}
> > > > > +
> > > > >  /**
> > > > >   * Retrieve mask for pattern item.
> > > > >   *
> > > > > @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > > > >  			of_set_vlan_vid;
> > > > >  		const struct rte_flow_action_of_set_vlan_pcp *
> > > > >  			of_set_vlan_pcp;
> > > > > +		const struct rte_flow_action_set_ipv4 *set_ipv4;
> > > > > +		const struct rte_flow_action_set_ipv6 *set_ipv6;
> > > > >  	} conf;
> > > > >  	uint32_t item_flags = 0;
> > > > >  	uint32_t action_flags = 0;
> > > > > @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > > > >  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
> > > > >  			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
> > > > >  			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > +			action_flags |= MLX5_ACTION_SET_TP_SRC;
> > > > > +			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > +			action_flags |= MLX5_ACTION_SET_TP_DST;
> > > > > +			break;
> > > > >  		default:
> > > > >  			return rte_flow_error_set(error, ENOTSUP,
> > > > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > >  						  actions,
> > > > >  						  "action not supported");
> > > > >  		}
> > > > > +		if (IS_MODIFY_ACTION(actions->type)) {
> > 
> > This would be a redundant 'if' as classification is already done above. So, how
> > about adding a goto label at the end of this code - 'err_no_action_conf:', and
> > use goto above.  E.g.,
> > 
> > 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > 			action_flags |= MLX5_ACTION_SET_TP_DST;
> > 			if (!actions->conf)
> > 				goto err_no_action_conf;
> > 			break;
> > 
> In some level I do agree with you it's redundant. But things like this kind of
> redundancy is not avoidable. I mean if we use "goto err_no_action_conf", the
> "if (!actions->conf) goto err_no_action_conf" has to be repeated in each "case"
> which needs to check conf or you think it's acceptable ?

But in your approach, the redundant check (IS_MODIFY_ACTION) is inside a loop
and it has to be checked even if there's no 'set' action in the action list.

> > And if I may, can I ask you to add the same to RTE_FLOW_ACTION_TYPE_PORT_ID?
> > 
> Yes, I will add.
> > > > > +			if (!actions->conf)
> > > > > +				return rte_flow_error_set(error, ENOTSUP,
> > > > > +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> > > > > +						actions,
> > > > > +						"action configuration not set");
> > > > > +		}
> > > > > +	}
> > > > > +	if (action_flags &
> > > > > +	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
> > > > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
> > > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > > +						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > +						  actions,
> > > > > +						  "no ipv4 item found in"
> > > > > +						  " pattern");
> > > > > +	}
> > > > > +	if (action_flags &
> > > > > +	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
> > > > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
> > > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > > +				RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > +				actions,
> > > > > +				"no ipv6 item found in pattern");
> > > > > +	}
> > > > > +	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
> > > > > +		if (!(item_flags &
> > > > > +		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
> > > > > +		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
> > > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > > +						RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > +						actions,
> > > > > +						"no TCP/UDP item found in"
> > > > > +						" pattern");
> > 
> > All the errors you added, I think EINVAL would be a better fit?
> > 
> Yes, EINVAL should be better.
> > > > Isn't this 'set' action compatible with drop action? No point of modifying
> > > > packet which will be dropped, isn't it?
> > > > 
> > > Yes, you are absolutely right :-)
> > 
> > I believe you'll add a validation code for that in the next version. :-)
> > 
> Of course ;-)
> > > > >  	}
> > > > >  	return 0;
> > > > >  }
> > > > > @@ -840,6 +1172,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
> > > > >  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
> > > > >  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
> > > > >  			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > +			size += flow_tcf_get_pedit_actions_size(&actions,
> > > > > +								&flags);
> > > > > +			break;
> > > > >  		default:
> > > > >  			DRV_LOG(WARNING,
> > > > >  				"unsupported action %p type %d,"
> > > > > @@ -998,6 +1339,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > >  	struct nlattr *na_flower_act;
> > > > >  	struct nlattr *na_vlan_id = NULL;
> > > > >  	struct nlattr *na_vlan_priority = NULL;
> > > > > +	uint64_t item_flags = 0;
> > > > >  
> > > > >  	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
> > > > >  						PTOI_TABLE_SZ_MAX(dev)));
> > > > > @@ -1189,6 +1531,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > >  			}
> > > > >  			break;
> > > > >  		case RTE_FLOW_ITEM_TYPE_UDP:
> > > > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> > > > 
> > > > Let's add the same to the rest of items like flow_tcf_validate().
> > > > 
> > > OK!
> > > > 
> > > > Thanks,
> > > > Yongseok
> > > > 
> > > > >  			mask.udp = flow_tcf_item_mask
> > > > >  				(items, &rte_flow_item_udp_mask,
> > > > >  				 &flow_tcf_mask_supported.udp,
> > > > > @@ -1218,6 +1561,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > >  			}
> > > > >  			break;
> > > > >  		case RTE_FLOW_ITEM_TYPE_TCP:
> > > > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> > > > >  			mask.tcp = flow_tcf_item_mask
> > > > >  				(items, &rte_flow_item_tcp_mask,
> > > > >  				 &flow_tcf_mask_supported.tcp,
> > > > > @@ -1368,6 +1712,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > >  					conf.of_set_vlan_pcp->vlan_pcp;
> > > > >  			}
> > > > >  			break;
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > +			na_act_index =
> > > > > +				mnl_attr_nest_start(nlh, na_act_index_cur++);
> > > > > +			flow_tcf_create_pedit_mnl_msg(nlh,
> > > > > +						      &actions, item_flags);
> > > > > +			mnl_attr_nest_end(nlh, na_act_index);
> > > > > +			break;
> > > > >  		default:
> > > > >  			return rte_flow_error_set(error, ENOTSUP,
> > > > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > -- 
> > > > > 2.17.1
> > > > > 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] net/mlx5: eswitch-IP address UDP/TCP port rewrite
  2018-10-08 22:08         ` Yongseok Koh
@ 2018-10-09  7:03           ` Jack Min
  2018-10-09  8:55             ` Yongseok Koh
  0 siblings, 1 reply; 13+ messages in thread
From: Jack Min @ 2018-10-09  7:03 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: Shahaf Shuler, dev

On 18-10-09 06:08:32, Yongseok Koh wrote:
> On Mon, Oct 08, 2018 at 07:22:03PM +0800, Xiaoyu Min wrote:
> > On 18-10-02 04:19:00, Yongseok Koh wrote:
> > > On Sun, Sep 30, 2018 at 03:21:04PM +0800, Xiaoyu Min wrote:
> > > > On 18-09-29 07:03:33, Yongseok Koh wrote:
> > > > > On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
> > > > > > Offload the following rte_flow actions by inserting accordingly
> > > > > > E-Switch rules via TC Flower driver
> > > > > > 
> > > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
> > > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
> > > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
> > > > > >  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
> > > > > >  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
> > > > > >  - RTE_FLOW_ACTION_TYPE_SET_TP_DST
> > > > > 
> > > > > Can you put an example command of testpmd for the reference?
> > > > > 
> > > > Sure, I'll add.
> > > > 
> > > > > > Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> > > > > > ---
> > > > > > This patch bases on Rahul Lakkireddy's patchs[1][2] and
> > > > > > Yongseok Koh's patchset [3]
> > > > > > 
> > > > > > [1] https://patches.dpdk.org/patch/45191/
> > > > > > [2] https://patches.dpdk.org/patch/45192/
> > > > > > [3] https://patches.dpdk.org/project/dpdk/list/?series=1474
> > > > > > 
> > > > > > 
> > > > > >  drivers/net/mlx5/Makefile        |   5 +
> > > > > >  drivers/net/mlx5/meson.build     |   2 +
> > > > > >  drivers/net/mlx5/mlx5_flow.h     |   6 +
> > > > > >  drivers/net/mlx5/mlx5_flow_tcf.c | 356 +++++++++++++++++++++++++++++++
> > > > > >  4 files changed, 369 insertions(+)
> > > > > > 
> > > > > > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > > > > > index ca1de9f21..49b95e78e 100644
> > > > > > --- a/drivers/net/mlx5/Makefile
> > > > > > +++ b/drivers/net/mlx5/Makefile
> > > > > > @@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
> > > > > >  		linux/tc_act/tc_vlan.h \
> > > > > >  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
> > > > > >  		$(AUTOCONF_OUTPUT)
> > > > > > +	$Q sh -- '$<' '$@' \
> > > > > > +		HAVE_TC_ACT_PEDIT \
> > > > > > +		linux/tc_act/tc_pedit.h \
> > > > > > +		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> > > > > > +		$(AUTOCONF_OUTPUT)
> > > > > >  	$Q sh -- '$<' '$@' \
> > > > > >  		HAVE_SUPPORTED_40000baseKR4_Full \
> > > > > >  		/usr/include/linux/ethtool.h \
> > > > > > diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> > > > > > index fd93ac162..ef6a85101 100644
> > > > > > --- a/drivers/net/mlx5/meson.build
> > > > > > +++ b/drivers/net/mlx5/meson.build
> > > > > > @@ -182,6 +182,8 @@ if build
> > > > > >  		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
> > > > > >  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
> > > > > >  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> > > > > > +		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
> > > > > > +		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
> > > > > >  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
> > > > > >  		'RDMA_NL_NLDEV' ],
> > > > > >  		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
> > > > > > diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> > > > > > index 10d700a7f..be182a643 100644
> > > > > > --- a/drivers/net/mlx5/mlx5_flow.h
> > > > > > +++ b/drivers/net/mlx5/mlx5_flow.h
> > > > > > @@ -87,6 +87,12 @@
> > > > > >  #define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
> > > > > >  #define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
> > > > > >  #define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
> > > > > > +#define MLX5_ACTION_SET_IPV4_SRC (1u << 11)
> > > > > > +#define MLX5_ACTION_SET_IPV4_DST (1u << 12)
> > > > > > +#define MLX5_ACTION_SET_IPV6_SRC (1u << 13)
> > > > > > +#define MLX5_ACTION_SET_IPV6_DST (1u << 14)
> > > > > > +#define MLX5_ACTION_SET_TP_SRC (1u << 15)
> > > > > > +#define MLX5_ACTION_SET_TP_DST (1u << 16)
> > > > > >  
> > > > > >  /* possible L3 layers protocols filtering. */
> > > > > >  #define MLX5_IP_PROTOCOL_TCP 6
> > > > > > diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > > > index 14376188e..85c92f369 100644
> > > > > > --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > > > +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> > > > > > @@ -53,6 +53,62 @@ struct tc_vlan {
> > > > > >  
> > > > > >  #endif /* HAVE_TC_ACT_VLAN */
> > > > > >  
> > > > > > +#ifdef HAVE_TC_ACT_PEDIT
> > > > > > +
> > > > > > +#include <linux/tc_act/tc_pedit.h>
> > > > > > +
> > > > > > +#else /* HAVE_TC_ACT_VLAN */
> > > > > > +enum {
> > > > > > +	TCA_PEDIT_UNSPEC,
> > > > > > +	TCA_PEDIT_TM,
> > > > > > +	TCA_PEDIT_PARMS,
> > > > > > +	TCA_PEDIT_PAD,
> > > > > > +	TCA_PEDIT_PARMS_EX,
> > > > > > +	TCA_PEDIT_KEYS_EX,
> > > > > > +	TCA_PEDIT_KEY_EX,
> > > > > > +	__TCA_PEDIT_MAX
> > > > > > +};
> > > > > > +
> > > > > > +enum {
> > > > > > +	TCA_PEDIT_KEY_EX_HTYPE = 1,
> > > > > > +	TCA_PEDIT_KEY_EX_CMD = 2,
> > > > > > +	__TCA_PEDIT_KEY_EX_MAX
> > > > > > +};
> > > > > > +
> > > > > > +enum pedit_header_type {
> > > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
> > > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
> > > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
> > > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
> > > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
> > > > > > +	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
> > > > > > +	__PEDIT_HDR_TYPE_MAX,
> > > > > > +};
> > > > > > +
> > > > > > +enum pedit_cmd {
> > > > > > +	TCA_PEDIT_KEY_EX_CMD_SET = 0,
> > > > > > +	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
> > > > > > +	__PEDIT_CMD_MAX,
> > > > > > +};
> > > > > > +
> > > > > > +struct tc_pedit_key {
> > > > > > +	__u32           mask;  /* AND */
> > > > > > +	__u32           val;   /*XOR */
> > > > > > +	__u32           off;  /*offset */
> > > > > > +	__u32           at;
> > > > > > +	__u32           offmask;
> > > > > > +	__u32           shift;
> > > > > > +};
> > > > > > +
> > > > > > +struct tc_pedit_sel {
> > > > > > +	tc_gen;
> > > > > > +	unsigned char           nkeys;
> > > > > > +	unsigned char           flags;
> > > > > > +	struct tc_pedit_key     keys[0];
> > > > > > +};
> > > > > > +
> > > > > > +#endif /* HAVE_TC_ACT_VLAN */
> > > > > > +
> > > > > >  /* Normally found in linux/netlink.h. */
> > > > > >  #ifndef NETLINK_CAP_ACK
> > > > > >  #define NETLINK_CAP_ACK 10
> > > > > > @@ -153,6 +209,14 @@ struct tc_vlan {
> > > > > >  #define IPV6_ADDR_LEN 16
> > > > > >  #endif
> > > > > >  
> > > > > > +#ifndef IPV4_ADDR_LEN
> > > > > > +#define IPV4_ADDR_LEN 4
> > > > > > +#endif
> > > > > > +
> > > > > > +#ifndef TP_PORT_LEN
> > > > > > +#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
> > > > > > +#endif
> > > > > > +
> > > > > >  /** Empty masks for known item types. */
> > > > > >  static const union {
> > > > > >  	struct rte_flow_item_port_id port_id;
> > > > > > @@ -227,6 +291,220 @@ struct flow_tcf_ptoi {
> > > > > >  
> > > > > >  #define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
> > > > > >  
> > > > > > +#define IS_MODIFY_ACTION(act_) ({typeof(act_) act = (act_); \
> > > > > > +		((act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || \
> > > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST  || \
> > > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC  || \
> > > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST  || \
> > > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_SRC    || \
> > > > > > +		(act) == RTE_FLOW_ACTION_TYPE_SET_TP_DST) ?    \
> > > > > > +		1 : 0; })
> > > > > 
> > > > > The reason why you need this complex multi-conditional macro is that
> > > > > RTE_FLOW_ACTION_TYPE_* isn't a bitmask. But, as this actions will be converted
> > > > > to MLX5_ACTION_* which is a bitmask, you can use that instead. Then, this
> > > > > would be enough to be:
> > > > > 
> > > > > #define MLX5_TCF_SET_ACTIONS \
> > > > > 	(MLX5_ACTION_SET_IPV4_SRC | ...)
> > > > > 
> > > > > And in the flow_tcf_validate() below,
> > > > > 	if (action_flags & MLX5_TCF_SET_ACTIONS) {
> > > > > 		...
> > > > > 	}
> > > > > 
> > > > Well, I did consider using bitmask but action_flags is an _accumulated_ variable,
> > > > records all the actions parsed so far.
> > > > But, here, I need to know what is the _current_ action and whether it belongs to modify
> > > > actions. If using bitmask, Looks like a new variable (i.e current_action) needed (?)
> > > > 
> > > > case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > >     current_action = MLX5_ACTION_SET_IPV4_SRC;
> > > >     .....
> > > > 
> > > > if (current_action & MLX5_TCF_SET_ACTIONS) ...
> > > >  ...
> > > > 
> > > > action_flags |= current_action;
> > > > 
> > > > I feel more code change needed or you think it's worth?
> > > 
> > > Understood what you meant.
> > > Please see my comment below in the flow_tcf_validate().
> > > 
> > > > > And please note that I'm going to rename MLX5_ACTION_* to MLX5_FLOW_ACTION_*.
> > > > > Please refer to "net/mlx5: rename flow macros" in PR #885. You might need to
> > > > > rebase it again.
> > > > > 
> > > > Sure, I'll rebase on it
> > > > 
> > > > > > +#define MAX_PEDIT_KEYS (128)
> > > > > > +#define SZ_PEDIT_KEY_VAL (4)
> > > > > > +
> > > > > > +struct pedit_key_ex {
> > > > > > +	enum pedit_header_type htype;
> > > > > > +	enum pedit_cmd cmd;
> > > > > > +};
> > > > > > +
> > > > > > +struct pedit_parser {
> > > > > > +	struct tc_pedit_sel sel;
> > > > > > +	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
> > > > > > +	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
> > > > > > +};
> > > > > > +
> > > > > > +static int
> > > > > > +flow_tcf_calc_pedit_keys(const uint64_t size)
> > > > > 
> > > > > Please add documentation by comment for every funcs you add.
> > > > > Refer to the other existing ones for formality.
> > > > > 
> > > > Sure!
> > > > > > +{
> > > > > > +	int keys = (size / SZ_PEDIT_KEY_VAL) +
> > > > > > +		((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
> > > > > 
> > > > > Indentation.
> > > > > 
> > > > Will fix it
> > > > > > +	return keys;
> > > > > > +}
> > > > > > +
> > > > > > +static void
> > > > > > +flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
> > > > > > +				struct pedit_parser *p_parser,
> > > > > > +				uint64_t item_flags)
> > > > > > +{
> > > > > > +	int idx = p_parser->sel.nkeys;
> > > > > > +
> > > > > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
> > > > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
> > > > > > +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
> > > > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
> > > > > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > > > +	p_parser->keys[idx].off =
> > > > > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? 2 : 0;
> > > > > 
> > > > > 	assert(offsetof(struct tcp_hdr, src_port) ==
> > > > > 	       offsetof(struct udp_hdr, src_port));
> > > > > 	assert(offsetof(struct tcp_hdr, dst_port) ==
> > > > > 	       offsetof(struct udp_hdr, dst_port));
> > > > > 	p_parser->keys[idx].off =
> > > > > 		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
> > > > > 		offsetof(struct tcp_hdr, src_port) :
> > > > > 		offsetof(struct tcp_hdr, dst_port);
> > > > > 
> > > > > assert() is just to be informative.
> > > > > And how about src first like others below?
> > > > > 
> > > > Yes, I will update above.
> > > > 
> > > > > > +	p_parser->keys[idx].mask = 0xFFFF0000;
> > > > > > +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
> > > > > > +			actions->conf)->port;
> > > > > 
> > > > > Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
> > > > > above - 0xffff0000.
> > > > > 
> > > > So it should be as following ?
> > > > p_parser->keys[idx].val = (__u32)((const struct rte_flow_action_set_tp *))
> > > >                 actions->conf)->port;
> > > 
> > > You can figure it out by actual tests but I think the following would be right.
> > > 	p_parser->keys[idx].val =
> > > 		rte_cpu_to_be_32(((const struct rte_flow_action_set_tp *)
> > > 				  actions->conf)->port);
> > > 
> > > Please verify it by testing anyway.
> > > 
> > No, it doesn't work correctly if it's converted to BE.
> > As my understanding the Netlink message should be expressed in host-byte order (?)
> 
> As far as I know rte_flow takes network-byte order for args and tcf na msg takes
> host-byte order. Please refer to the "override_na_vlan_id:" in mlx5_flow_tcf.c,
> rte_be_to_cpu_16() is used there.  I'm still confusing why the mask is
> 0xffff0000 above. Please make sure your code works correctly by multiple test
> cases and hopefully I can hear clear explanation what is right and why.
> 
Hey Koh,
1. yes, rte_flow takes network-byte order. Here the 'port' is already converted to
BE by testpmd.
If the rte_be_to_cpu_16 is used just like "override_na_vlan_id" does the result
is not correct. Things like TC-pedit takes network-byte order.
2. For the mask, honestly, I'm not very clear. The only reference is from [1] and 
my verification by using TC show command.
[1] https://www.netdevconf.org/2.1/slides/apr7/tc-workshop/vadai-tc-workshop-update.pdf

> > > > > > +	p_parser->sel.nkeys = (++idx);
> > > > > > +}
> > > > > > +
> > > > > > +static void
> > > > > > +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
> > > > > > +				 struct pedit_parser *p_parser)
> > > > > > +{
> > > > > > +	int idx = p_parser->sel.nkeys;
> > > > > > +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > > > +	int off_base =
> > > > > > +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
> > > > > 
> > > > > offsetof(struct ipv6_hdr, src_addr) :
> > > > > offsetof(struct ipv6_hdr, dst_addr);
> > > > > 
> > > > Got it!
> > > > > > +	const struct rte_flow_action_set_ipv6 *conf =
> > > > > > +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
> > > > > > +
> > > > > > +	for (int i = 0; i < keys; i++, idx++) {
> > > > > > +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
> > > > > > +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > > > +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
> > > > > > +		p_parser->keys[idx].mask = ~UINT32_MAX;
> > > > > > +		memcpy(&p_parser->keys[idx].val,
> > > > > > +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
> > > > > > +			SZ_PEDIT_KEY_VAL);
> > > > > > +	}
> > > > > > +	p_parser->sel.nkeys += keys;
> > > > > > +}
> > > > > > +
> > > > > > +static void
> > > > > > +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
> > > > > 
> > > > > How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
> > > > > Same comment for ipv6 and tp_port.
> > > > > 
> > > > What's the benefit by using rte_flow_action_set_ipv4 and how I know it's
> > > > for src or dst address ?
> > > 
> > > Just to make the function neat but I overlooked that you still need
> > > actions->type. Please disregard my previous comment.
> > > 
> > OK~
> > > > > > +				 struct pedit_parser *p_parser)
> > > > > > +{
> > > > > > +	int idx = p_parser->sel.nkeys;
> > > > > > +
> > > > > > +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
> > > > > > +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > > > +	p_parser->keys[idx].off =
> > > > > > +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
> > > > > 
> > > > > offsetof(struct ipv4_hdr, src_addr) :
> > > > > offsetof(struct ipv4_hdr, dst_addr);
> > > > > 
> > > > Got it!
> > > > > > +	p_parser->keys[idx].mask = ~UINT32_MAX;
> > > > > > +	p_parser->keys[idx].val =
> > > > > > +		((const struct rte_flow_action_set_ipv4 *)
> > > > > > +		 actions->conf)->ipv4_addr;
> > > > > > +	p_parser->sel.nkeys = (++idx);
> > > > > > +}
> > > > > > +
> > > > > > +static int
> > > > > > +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
> > > > > > +			      const struct rte_flow_action **actions,
> > > > > > +			      uint64_t item_flags)
> > > > > > +{
> > > > > > +	struct pedit_parser p_parser;
> > > > > > +
> > > > > > +	memset(&p_parser, 0, sizeof(p_parser));
> > > > > > +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
> > > > > > +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
> > > > > > +							    TCA_ACT_OPTIONS);
> > > > > > +	/* all modify header actions should be in one tc-pedit action */
> > > > > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > > > 
> > > > > It seems that you want to aggregate all the pedit actions and form a single
> > > > > na attr. But what if rte_flow_action_set_* are not contiguous? E.g.
> > > > > 
> > > > > flow create ... actions set1 / set2 / port_id / set3 / end
> > > > > 
> > > > > Then, it would have two pedit na attrs. Is that okay?
> > > > > Or, need to think about another way?
> > > > > 
> > > > > Same will happen in flow_tcf_get_pedit_actions_size().
> > > > > 
> > > > It's OK if we have more than one pedit na attrs.
> > > > _BUT_ only last pedit take effect based on my experiment
> > > 
> > > Then, shouldn't we give some warning to user in validation? So that user can
> > > have right expectation and reorder the actions as their intention like:
> > > 	flow create ... actions set1 / set2 / set3 / port_id / end
> > > 
> > > Otherwise set1 and set2 will be lost according to your comment.
> > > 
> > I prefer to give error to user in validation because this is simple.
> 
> Good.
> 
> > > Or, how about making PMD do the right thing. I mean, even if the set actions are
> > > scattered, PMD can collect it and apply in a single na attr?
> > > 
> > My feeling is the above approach will be (become) complex. It looks like we introduce
> > new functionality which re-order all actions, something like rss_expand. 
> 
> +1
> 
> > > > > > +		switch ((*actions)->type) {
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > > +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > > +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > > +			flow_tcf_pedit_key_set_tp_port(*actions,
> > > > > > +							&p_parser, item_flags);
> > > > > > +			break;
> > > > > > +		default:
> > > > > > +			goto pedit_mnl_msg_done;
> > > > > > +		}
> > > > > > +	}
> > > > > > +pedit_mnl_msg_done:
> > > > > > +	p_parser.sel.action = TC_ACT_PIPE;
> > > > > > +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
> > > > > > +			sizeof(p_parser.sel) +
> > > > > > +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
> > > > > > +			&p_parser);
> > > > > > +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
> > > > > > +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
> > > > > > +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
> > > > > > +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
> > > > > > +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
> > > > > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
> > > > > > +				 p_parser.keys_ex[i].htype);
> > > > > > +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
> > > > > > +				 p_parser.keys_ex[i].cmd);
> > > > > > +		mnl_attr_nest_end(nl, na_pedit_key);
> > > > > > +	}
> > > > > > +	mnl_attr_nest_end(nl, na_pedit_keys);
> > > > > > +	mnl_attr_nest_end(nl, na_act_options);
> > > > > > +	(*actions)--;
> > > > > > +	return 0;
> > > > > > +}
> > > > > > +
> > > > > > +/**
> > > > > > + * Calculate max memory size of one TC-pedit actions.
> > > > > > + * One TC-pedit action can contain set of keys each defining
> > > > > > + * a rewrite element (rte_flow action)
> > > > > > + *
> > > > > > + * @param[in] actions
> > > > > > + *   actions specification.
> > > > > > + * @param[inout] action_flags
> > > > > > + *   actions flags
> > > > > > + * @param[inout] size
> > > > > > + *   accumulated size
> > > > > > + * @return
> > > > > > + *   Max memory size of one TC-pedit action
> > > > > > + */
> > > > > > +static int
> > > > > > +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
> > > > > > +				uint64_t *action_flags)
> > > > > > +{
> > > > > > +	int pedit_size = 0;
> > > > > > +	int keys = 0;
> > > > > > +	uint64_t flags = 0;
> > > > > > +
> > > > > > +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
> > > > > > +		      SZ_NLATTR_STRZ_OF("pedit") +
> > > > > > +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
> > > > > > +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> > > > > > +		switch ((*actions)->type) {
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > > > > +			flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > > +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> > > > > > +			flags |= MLX5_ACTION_SET_IPV4_DST;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > > > +			flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > > +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> > > > > > +			flags |= MLX5_ACTION_SET_IPV6_DST;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > > +			/* TCP is as same as UDP */
> > > > > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > > > > +			flags |= MLX5_ACTION_SET_TP_SRC;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > > +			/* TCP is as same as UDP */
> > > > > > +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> > > > > > +			flags |= MLX5_ACTION_SET_TP_DST;
> > > > > > +			break;
> > > > > > +		default:
> > > > > > +			goto get_pedit_action_size_done;
> > > > > > +		}
> > > > > > +	}
> > > > > > +get_pedit_action_size_done:
> > > > > > +	/* TCA_PEDIT_PARAMS_EX */
> > > > > > +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
> > > > > > +			keys * sizeof(struct tc_pedit_key));
> > > > > 
> > > > > > +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
> > > > > > +	pedit_size += keys *
> > > > > > +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
> > > > > > +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
> > > > > > +	(*action_flags) |= flags;
> > > > > > +	(*actions)--;
> > > > > > +	return pedit_size;
> > > > > > +}
> > > > > > +
> > > > > >  /**
> > > > > >   * Retrieve mask for pattern item.
> > > > > >   *
> > > > > > @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > > > > >  			of_set_vlan_vid;
> > > > > >  		const struct rte_flow_action_of_set_vlan_pcp *
> > > > > >  			of_set_vlan_pcp;
> > > > > > +		const struct rte_flow_action_set_ipv4 *set_ipv4;
> > > > > > +		const struct rte_flow_action_set_ipv6 *set_ipv6;
> > > > > >  	} conf;
> > > > > >  	uint32_t item_flags = 0;
> > > > > >  	uint32_t action_flags = 0;
> > > > > > @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
> > > > > >  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
> > > > > >  			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
> > > > > >  			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > > +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > > +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > > +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > > +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > > +			action_flags |= MLX5_ACTION_SET_TP_SRC;
> > > > > > +			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > > +			action_flags |= MLX5_ACTION_SET_TP_DST;
> > > > > > +			break;
> > > > > >  		default:
> > > > > >  			return rte_flow_error_set(error, ENOTSUP,
> > > > > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > >  						  actions,
> > > > > >  						  "action not supported");
> > > > > >  		}
> > > > > > +		if (IS_MODIFY_ACTION(actions->type)) {
> > > 
> > > This would be a redundant 'if' as classification is already done above. So, how
> > > about adding a goto label at the end of this code - 'err_no_action_conf:', and
> > > use goto above.  E.g.,
> > > 
> > > 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > 			action_flags |= MLX5_ACTION_SET_TP_DST;
> > > 			if (!actions->conf)
> > > 				goto err_no_action_conf;
> > > 			break;
> > > 
> > In some level I do agree with you it's redundant. But things like this kind of
> > redundancy is not avoidable. I mean if we use "goto err_no_action_conf", the
> > "if (!actions->conf) goto err_no_action_conf" has to be repeated in each "case"
> > which needs to check conf or you think it's acceptable ?
> 
> But in your approach, the redundant check (IS_MODIFY_ACTION) is inside a loop
> and it has to be checked even if there's no 'set' action in the action list.
> 
Yes, it will check very time. But I doubt it will impact performace significantly.

Another thing bother me is, if taking 'goto' approach, beside the 'if (!actions->conf)...',
at this moment, we also need to add 'if (set action is discontinued)' for
each modification case. I'm not sure if this will keep code clear
considering many repeated code there...

> > > And if I may, can I ask you to add the same to RTE_FLOW_ACTION_TYPE_PORT_ID?
> > > 
> > Yes, I will add.
> > > > > > +			if (!actions->conf)
> > > > > > +				return rte_flow_error_set(error, ENOTSUP,
> > > > > > +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> > > > > > +						actions,
> > > > > > +						"action configuration not set");
> > > > > > +		}
> > > > > > +	}
> > > > > > +	if (action_flags &
> > > > > > +	   (MLX5_ACTION_SET_IPV4_SRC | MLX5_ACTION_SET_IPV4_DST)) {
> > > > > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
> > > > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > > > +						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > > +						  actions,
> > > > > > +						  "no ipv4 item found in"
> > > > > > +						  " pattern");
> > > > > > +	}
> > > > > > +	if (action_flags &
> > > > > > +	   (MLX5_ACTION_SET_IPV6_SRC | MLX5_ACTION_SET_IPV6_DST)) {
> > > > > > +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
> > > > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > > > +				RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > > +				actions,
> > > > > > +				"no ipv6 item found in pattern");
> > > > > > +	}
> > > > > > +	if (action_flags & (MLX5_ACTION_SET_TP_SRC | MLX5_ACTION_SET_TP_DST)) {
> > > > > > +		if (!(item_flags &
> > > > > > +		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
> > > > > > +		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
> > > > > > +			return rte_flow_error_set(error, ENOTSUP,
> > > > > > +						RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > > +						actions,
> > > > > > +						"no TCP/UDP item found in"
> > > > > > +						" pattern");
> > > 
> > > All the errors you added, I think EINVAL would be a better fit?
> > > 
> > Yes, EINVAL should be better.
> > > > > Isn't this 'set' action compatible with drop action? No point of modifying
> > > > > packet which will be dropped, isn't it?
> > > > > 
> > > > Yes, you are absolutely right :-)
> > > 
> > > I believe you'll add a validation code for that in the next version. :-)
> > > 
> > Of course ;-)
> > > > > >  	}
> > > > > >  	return 0;
> > > > > >  }
> > > > > > @@ -840,6 +1172,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
> > > > > >  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
> > > > > >  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
> > > > > >  			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > > +			size += flow_tcf_get_pedit_actions_size(&actions,
> > > > > > +								&flags);
> > > > > > +			break;
> > > > > >  		default:
> > > > > >  			DRV_LOG(WARNING,
> > > > > >  				"unsupported action %p type %d,"
> > > > > > @@ -998,6 +1339,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > > >  	struct nlattr *na_flower_act;
> > > > > >  	struct nlattr *na_vlan_id = NULL;
> > > > > >  	struct nlattr *na_vlan_priority = NULL;
> > > > > > +	uint64_t item_flags = 0;
> > > > > >  
> > > > > >  	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
> > > > > >  						PTOI_TABLE_SZ_MAX(dev)));
> > > > > > @@ -1189,6 +1531,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > > >  			}
> > > > > >  			break;
> > > > > >  		case RTE_FLOW_ITEM_TYPE_UDP:
> > > > > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> > > > > 
> > > > > Let's add the same to the rest of items like flow_tcf_validate().
> > > > > 
> > > > OK!
> > > > > 
> > > > > Thanks,
> > > > > Yongseok
> > > > > 
> > > > > >  			mask.udp = flow_tcf_item_mask
> > > > > >  				(items, &rte_flow_item_udp_mask,
> > > > > >  				 &flow_tcf_mask_supported.udp,
> > > > > > @@ -1218,6 +1561,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > > >  			}
> > > > > >  			break;
> > > > > >  		case RTE_FLOW_ITEM_TYPE_TCP:
> > > > > > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> > > > > >  			mask.tcp = flow_tcf_item_mask
> > > > > >  				(items, &rte_flow_item_tcp_mask,
> > > > > >  				 &flow_tcf_mask_supported.tcp,
> > > > > > @@ -1368,6 +1712,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
> > > > > >  					conf.of_set_vlan_pcp->vlan_pcp;
> > > > > >  			}
> > > > > >  			break;
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> > > > > > +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> > > > > > +			na_act_index =
> > > > > > +				mnl_attr_nest_start(nlh, na_act_index_cur++);
> > > > > > +			flow_tcf_create_pedit_mnl_msg(nlh,
> > > > > > +						      &actions, item_flags);
> > > > > > +			mnl_attr_nest_end(nlh, na_act_index);
> > > > > > +			break;
> > > > > >  		default:
> > > > > >  			return rte_flow_error_set(error, ENOTSUP,
> > > > > >  						  RTE_FLOW_ERROR_TYPE_ACTION,
> > > > > > -- 
> > > > > > 2.17.1
> > > > > > 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH] net/mlx5: eswitch-IP address UDP/TCP port rewrite
  2018-10-09  7:03           ` Jack Min
@ 2018-10-09  8:55             ` Yongseok Koh
  0 siblings, 0 replies; 13+ messages in thread
From: Yongseok Koh @ 2018-10-09  8:55 UTC (permalink / raw)
  To: Jack Min; +Cc: Shahaf Shuler, dev


> On Oct 9, 2018, at 12:03 AM, Jack Min <jackmin@mellanox.com> wrote:
> 
> On 18-10-09 06:08:32, Yongseok Koh wrote:
>> On Mon, Oct 08, 2018 at 07:22:03PM +0800, Xiaoyu Min wrote:
>>> On 18-10-02 04:19:00, Yongseok Koh wrote:
>>>> On Sun, Sep 30, 2018 at 03:21:04PM +0800, Xiaoyu Min wrote:
>>>>> On 18-09-29 07:03:33, Yongseok Koh wrote:
>>>>>> On Tue, Sep 25, 2018 at 07:51:06PM +0800, Xiaoyu Min wrote:
[...]
>>>>> 
>>>>>>> +	p_parser->keys[idx].mask = 0xFFFF0000;
>>>>>>> +	p_parser->keys[idx].val = ((const struct rte_flow_action_set_tp *)
>>>>>>> +			actions->conf)->port;
>>>>>> 
>>>>>> Assigning 2B to 4B big-endian stroage? Doesn't look consistent with the mask
>>>>>> above - 0xffff0000.
>>>>>> 
>>>>> So it should be as following ?
>>>>> p_parser->keys[idx].val = (__u32)((const struct rte_flow_action_set_tp *))
>>>>>                actions->conf)->port;
>>>> 
>>>> You can figure it out by actual tests but I think the following would be right.
>>>> 	p_parser->keys[idx].val =
>>>> 		rte_cpu_to_be_32(((const struct rte_flow_action_set_tp *)
>>>> 				  actions->conf)->port);
>>>> 
>>>> Please verify it by testing anyway.
>>>> 
>>> No, it doesn't work correctly if it's converted to BE.
>>> As my understanding the Netlink message should be expressed in host-byte order (?)
>> 
>> As far as I know rte_flow takes network-byte order for args and tcf na msg takes
>> host-byte order. Please refer to the "override_na_vlan_id:" in mlx5_flow_tcf.c,
>> rte_be_to_cpu_16() is used there.  I'm still confusing why the mask is
>> 0xffff0000 above. Please make sure your code works correctly by multiple test
>> cases and hopefully I can hear clear explanation what is right and why.
>> 
> Hey Koh,
> 1. yes, rte_flow takes network-byte order. Here the 'port' is already converted to
> BE by testpmd.
> If the rte_be_to_cpu_16 is used just like "override_na_vlan_id" does the result
> is not correct. Things like TC-pedit takes network-byte order.
> 2. For the mask, honestly, I'm not very clear. The only reference is from [1] and 
> my verification by using TC show command.
> [1] https://www.netdevconf.org/2.1/slides/apr7/tc-workshop/vadai-tc-workshop-update.pdf

Interesting.
Please go ahead with your code because you have already verified it.

>>>>>>> +	p_parser->sel.nkeys = (++idx);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void
>>>>>>> +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
>>>>>>> +				 struct pedit_parser *p_parser)
>>>>>>> +{
>>>>>>> +	int idx = p_parser->sel.nkeys;
>>>>>>> +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
>>>>>>> +	int off_base =
>>>>>>> +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? 8 : 24;
>>>>>> 
>>>>>> offsetof(struct ipv6_hdr, src_addr) :
>>>>>> offsetof(struct ipv6_hdr, dst_addr);
>>>>>> 
>>>>> Got it!
>>>>>>> +	const struct rte_flow_action_set_ipv6 *conf =
>>>>>>> +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
>>>>>>> +
>>>>>>> +	for (int i = 0; i < keys; i++, idx++) {
>>>>>>> +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
>>>>>>> +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
>>>>>>> +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
>>>>>>> +		p_parser->keys[idx].mask = ~UINT32_MAX;
>>>>>>> +		memcpy(&p_parser->keys[idx].val,
>>>>>>> +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
>>>>>>> +			SZ_PEDIT_KEY_VAL);
>>>>>>> +	}
>>>>>>> +	p_parser->sel.nkeys += keys;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void
>>>>>>> +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
>>>>>> 
>>>>>> How about getting rte_flow_action_set_ipv4 instead of rte_flow_action?
>>>>>> Same comment for ipv6 and tp_port.
>>>>>> 
>>>>> What's the benefit by using rte_flow_action_set_ipv4 and how I know it's
>>>>> for src or dst address ?
>>>> 
>>>> Just to make the function neat but I overlooked that you still need
>>>> actions->type. Please disregard my previous comment.
>>>> 
>>> OK~
>>>>>>> +				 struct pedit_parser *p_parser)
>>>>>>> +{
>>>>>>> +	int idx = p_parser->sel.nkeys;
>>>>>>> +
>>>>>>> +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
>>>>>>> +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
>>>>>>> +	p_parser->keys[idx].off =
>>>>>>> +		(actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? 12 : 16);
>>>>>> 
>>>>>> offsetof(struct ipv4_hdr, src_addr) :
>>>>>> offsetof(struct ipv4_hdr, dst_addr);
>>>>>> 
>>>>> Got it!
>>>>>>> +	p_parser->keys[idx].mask = ~UINT32_MAX;
>>>>>>> +	p_parser->keys[idx].val =
>>>>>>> +		((const struct rte_flow_action_set_ipv4 *)
>>>>>>> +		 actions->conf)->ipv4_addr;
>>>>>>> +	p_parser->sel.nkeys = (++idx);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int
>>>>>>> +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
>>>>>>> +			      const struct rte_flow_action **actions,
>>>>>>> +			      uint64_t item_flags)
>>>>>>> +{
>>>>>>> +	struct pedit_parser p_parser;
>>>>>>> +
>>>>>>> +	memset(&p_parser, 0, sizeof(p_parser));
>>>>>>> +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
>>>>>>> +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
>>>>>>> +							    TCA_ACT_OPTIONS);
>>>>>>> +	/* all modify header actions should be in one tc-pedit action */
>>>>>>> +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
>>>>>> 
>>>>>> It seems that you want to aggregate all the pedit actions and form a single
>>>>>> na attr. But what if rte_flow_action_set_* are not contiguous? E.g.
>>>>>> 
>>>>>> flow create ... actions set1 / set2 / port_id / set3 / end
>>>>>> 
>>>>>> Then, it would have two pedit na attrs. Is that okay?
>>>>>> Or, need to think about another way?
>>>>>> 
>>>>>> Same will happen in flow_tcf_get_pedit_actions_size().
>>>>>> 
>>>>> It's OK if we have more than one pedit na attrs.
>>>>> _BUT_ only last pedit take effect based on my experiment
>>>> 
>>>> Then, shouldn't we give some warning to user in validation? So that user can
>>>> have right expectation and reorder the actions as their intention like:
>>>> 	flow create ... actions set1 / set2 / set3 / port_id / end
>>>> 
>>>> Otherwise set1 and set2 will be lost according to your comment.
>>>> 
>>> I prefer to give error to user in validation because this is simple.
>> 
>> Good.
>> 
>>>> Or, how about making PMD do the right thing. I mean, even if the set actions are
>>>> scattered, PMD can collect it and apply in a single na attr?
>>>> 
>>> My feeling is the above approach will be (become) complex. It looks like we introduce
>>> new functionality which re-order all actions, something like rss_expand. 
>> 
>> +1
>> 
>>>>>>> +		switch ((*actions)->type) {
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
>>>>>>> +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
>>>>>>> +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
>>>>>>> +			flow_tcf_pedit_key_set_tp_port(*actions,
>>>>>>> +							&p_parser, item_flags);
>>>>>>> +			break;
>>>>>>> +		default:
>>>>>>> +			goto pedit_mnl_msg_done;
>>>>>>> +		}
>>>>>>> +	}
>>>>>>> +pedit_mnl_msg_done:
>>>>>>> +	p_parser.sel.action = TC_ACT_PIPE;
>>>>>>> +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
>>>>>>> +			sizeof(p_parser.sel) +
>>>>>>> +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
>>>>>>> +			&p_parser);
>>>>>>> +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
>>>>>>> +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
>>>>>>> +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
>>>>>>> +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
>>>>>>> +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
>>>>>>> +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
>>>>>>> +				 p_parser.keys_ex[i].htype);
>>>>>>> +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
>>>>>>> +				 p_parser.keys_ex[i].cmd);
>>>>>>> +		mnl_attr_nest_end(nl, na_pedit_key);
>>>>>>> +	}
>>>>>>> +	mnl_attr_nest_end(nl, na_pedit_keys);
>>>>>>> +	mnl_attr_nest_end(nl, na_act_options);
>>>>>>> +	(*actions)--;
>>>>>>> +	return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +/**
>>>>>>> + * Calculate max memory size of one TC-pedit actions.
>>>>>>> + * One TC-pedit action can contain set of keys each defining
>>>>>>> + * a rewrite element (rte_flow action)
>>>>>>> + *
>>>>>>> + * @param[in] actions
>>>>>>> + *   actions specification.
>>>>>>> + * @param[inout] action_flags
>>>>>>> + *   actions flags
>>>>>>> + * @param[inout] size
>>>>>>> + *   accumulated size
>>>>>>> + * @return
>>>>>>> + *   Max memory size of one TC-pedit action
>>>>>>> + */
>>>>>>> +static int
>>>>>>> +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
>>>>>>> +				uint64_t *action_flags)
>>>>>>> +{
>>>>>>> +	int pedit_size = 0;
>>>>>>> +	int keys = 0;
>>>>>>> +	uint64_t flags = 0;
>>>>>>> +
>>>>>>> +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
>>>>>>> +		      SZ_NLATTR_STRZ_OF("pedit") +
>>>>>>> +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
>>>>>>> +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
>>>>>>> +		switch ((*actions)->type) {
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
>>>>>>> +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
>>>>>>> +			flags |= MLX5_ACTION_SET_IPV4_SRC;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
>>>>>>> +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
>>>>>>> +			flags |= MLX5_ACTION_SET_IPV4_DST;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
>>>>>>> +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
>>>>>>> +			flags |= MLX5_ACTION_SET_IPV6_SRC;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
>>>>>>> +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
>>>>>>> +			flags |= MLX5_ACTION_SET_IPV6_DST;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
>>>>>>> +			/* TCP is as same as UDP */
>>>>>>> +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
>>>>>>> +			flags |= MLX5_ACTION_SET_TP_SRC;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
>>>>>>> +			/* TCP is as same as UDP */
>>>>>>> +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
>>>>>>> +			flags |= MLX5_ACTION_SET_TP_DST;
>>>>>>> +			break;
>>>>>>> +		default:
>>>>>>> +			goto get_pedit_action_size_done;
>>>>>>> +		}
>>>>>>> +	}
>>>>>>> +get_pedit_action_size_done:
>>>>>>> +	/* TCA_PEDIT_PARAMS_EX */
>>>>>>> +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
>>>>>>> +			keys * sizeof(struct tc_pedit_key));
>>>>>> 
>>>>>>> +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
>>>>>>> +	pedit_size += keys *
>>>>>>> +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
>>>>>>> +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
>>>>>>> +	(*action_flags) |= flags;
>>>>>>> +	(*actions)--;
>>>>>>> +	return pedit_size;
>>>>>>> +}
>>>>>>> +
>>>>>>> /**
>>>>>>>  * Retrieve mask for pattern item.
>>>>>>>  *
>>>>>>> @@ -430,6 +708,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>>>>>>> 			of_set_vlan_vid;
>>>>>>> 		const struct rte_flow_action_of_set_vlan_pcp *
>>>>>>> 			of_set_vlan_pcp;
>>>>>>> +		const struct rte_flow_action_set_ipv4 *set_ipv4;
>>>>>>> +		const struct rte_flow_action_set_ipv6 *set_ipv6;
>>>>>>> 	} conf;
>>>>>>> 	uint32_t item_flags = 0;
>>>>>>> 	uint32_t action_flags = 0;
>>>>>>> @@ -690,12 +970,64 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>>>>>>> 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
>>>>>>> 			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
>>>>>>> 			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
>>>>>>> +			action_flags |= MLX5_ACTION_SET_IPV4_SRC;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
>>>>>>> +			action_flags |= MLX5_ACTION_SET_IPV4_DST;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
>>>>>>> +			action_flags |= MLX5_ACTION_SET_IPV6_SRC;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
>>>>>>> +			action_flags |= MLX5_ACTION_SET_IPV6_DST;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
>>>>>>> +			action_flags |= MLX5_ACTION_SET_TP_SRC;
>>>>>>> +			break;
>>>>>>> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
>>>>>>> +			action_flags |= MLX5_ACTION_SET_TP_DST;
>>>>>>> +			break;
>>>>>>> 		default:
>>>>>>> 			return rte_flow_error_set(error, ENOTSUP,
>>>>>>> 						  RTE_FLOW_ERROR_TYPE_ACTION,
>>>>>>> 						  actions,
>>>>>>> 						  "action not supported");
>>>>>>> 		}
>>>>>>> +		if (IS_MODIFY_ACTION(actions->type)) {
>>>> 
>>>> This would be a redundant 'if' as classification is already done above. So, how
>>>> about adding a goto label at the end of this code - 'err_no_action_conf:', and
>>>> use goto above.  E.g.,
>>>> 
>>>> 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
>>>> 			action_flags |= MLX5_ACTION_SET_TP_DST;
>>>> 			if (!actions->conf)
>>>> 				goto err_no_action_conf;
>>>> 			break;
>>>> 
>>> In some level I do agree with you it's redundant. But things like this kind of
>>> redundancy is not avoidable. I mean if we use "goto err_no_action_conf", the
>>> "if (!actions->conf) goto err_no_action_conf" has to be repeated in each "case"
>>> which needs to check conf or you think it's acceptable ?
>> 
>> But in your approach, the redundant check (IS_MODIFY_ACTION) is inside a loop
>> and it has to be checked even if there's no 'set' action in the action list.
>> 
> Yes, it will check very time. But I doubt it will impact performace significantly.

Not a performance issue but I just wanted to make the code modular as much as possible.

> Another thing bother me is, if taking 'goto' approach, beside the 'if (!actions->conf)...',
> at this moment, we also need to add 'if (set action is discontinued)' for
> each modification case. I'm not sure if this will keep code clear
> considering many repeated code there...

And like I asked, this sanity check is also needed for other actions (PORT_ID and VLAN).
That's why I just wanted to have more generic way.

I'm not sure which way is clearer considering all that.

I don't have a strong objection about your current code but please make sure you add the
same sanity check for other existing acitons in a unified way. Also I hope the macro
(IS_MODIFY_ACTION) has better name.


Thanks,
Yongseok

>>>> 
>>>> And if I may, can I ask you to add the same to RTE_FLOW_ACTION_TYPE_PORT_ID?
>>>> 
>>> Yes, I will add.
>>>>>>> +			if (!actions->conf)
>>>>>>> +				return rte_flow_error_set(error, ENOTSUP,
>>>>>>> +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
>>>>>>> +						actions,
>>>>>>> +						"action configuration not set");
>>>>>>> +		}
>>>>>>> +	}

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [dpdk-dev] [PATCH v2] net/mlx5: rewrite IP address UDP/TCP port by E-Switch
  2018-09-25 11:51 [dpdk-dev] [PATCH] net/mlx5: eswitch-IP address UDP/TCP port rewrite Xiaoyu Min
  2018-09-28 23:03 ` Yongseok Koh
@ 2018-10-10 12:56 ` Jack Min
  2018-10-11  4:45   ` Yongseok Koh
  2018-10-11  9:26   ` [dpdk-dev] [PATCH v3] " Jack Min
  1 sibling, 2 replies; 13+ messages in thread
From: Jack Min @ 2018-10-10 12:56 UTC (permalink / raw)
  To: Shahaf Shuler, Yongseok Koh; +Cc: dev

Offload the following rte_flow actions by inserting accordingly
E-Switch rules via TC Flower driver

 - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
 - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
 - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
 - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
 - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
 - RTE_FLOW_ACTION_TYPE_SET_TP_DST

The example testpmd command is:

    flow create 0 transfer ingress
         pattern eth / ipv4 / udp dst is 7000 / end
	 actions set_ipv4_src ipv4_addr 172.168.0.1 /
	 set_ipv4_dst ipv4_addr 172.168.10.1 /
	 set_tp_dst port 9000 /
	 set_tp_src port 700 /
	 port_id id 1 / end

Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
---
v2:
 * rebased
 * validate pedit actions with drop action
 * validate successive pedit actions
 * a more generic way of validation
 * added testpmd commands in commit log
 * added comments of new added functions
 * fix some coding style issue

 drivers/net/mlx5/Makefile        |   5 +
 drivers/net/mlx5/meson.build     |   2 +
 drivers/net/mlx5/mlx5_flow.h     |   6 +
 drivers/net/mlx5/mlx5_flow_tcf.c | 468 ++++++++++++++++++++++++++++++-
 4 files changed, 475 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index ca1de9f21..49b95e78e 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		linux/tc_act/tc_vlan.h \
 		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
 		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
+		HAVE_TC_ACT_PEDIT \
+		linux/tc_act/tc_pedit.h \
+		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
+		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
 		HAVE_SUPPORTED_40000baseKR4_Full \
 		/usr/include/linux/ethtool.h \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index fd93ac162..ef6a85101 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -182,6 +182,8 @@ if build
 		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
 		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
 		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
+		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
+		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
 		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
 		'RDMA_NL_NLDEV' ],
 		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 12de841e8..cbb8c56c8 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -78,6 +78,12 @@
 #define MLX5_FLOW_ACTION_OF_PUSH_VLAN (1u << 8)
 #define MLX5_FLOW_ACTION_OF_SET_VLAN_VID (1u << 9)
 #define MLX5_FLOW_ACTION_OF_SET_VLAN_PCP (1u << 10)
+#define MLX5_FLOW_ACTION_SET_IPV4_SRC (1u << 11)
+#define MLX5_FLOW_ACTION_SET_IPV4_DST (1u << 12)
+#define MLX5_FLOW_ACTION_SET_IPV6_SRC (1u << 13)
+#define MLX5_FLOW_ACTION_SET_IPV6_DST (1u << 14)
+#define MLX5_FLOW_ACTION_SET_TP_SRC (1u << 15)
+#define MLX5_FLOW_ACTION_SET_TP_DST (1u << 16)
 
 #define MLX5_FLOW_FATE_ACTIONS \
 	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 91f6ef678..65e8a4b3f 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -53,6 +53,62 @@ struct tc_vlan {
 
 #endif /* HAVE_TC_ACT_VLAN */
 
+#ifdef HAVE_TC_ACT_PEDIT
+
+#include <linux/tc_act/tc_pedit.h>
+
+#else /* HAVE_TC_ACT_VLAN */
+enum {
+	TCA_PEDIT_UNSPEC,
+	TCA_PEDIT_TM,
+	TCA_PEDIT_PARMS,
+	TCA_PEDIT_PAD,
+	TCA_PEDIT_PARMS_EX,
+	TCA_PEDIT_KEYS_EX,
+	TCA_PEDIT_KEY_EX,
+	__TCA_PEDIT_MAX
+};
+
+enum {
+	TCA_PEDIT_KEY_EX_HTYPE = 1,
+	TCA_PEDIT_KEY_EX_CMD = 2,
+	__TCA_PEDIT_KEY_EX_MAX
+};
+
+enum pedit_header_type {
+	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
+	__PEDIT_HDR_TYPE_MAX,
+};
+
+enum pedit_cmd {
+	TCA_PEDIT_KEY_EX_CMD_SET = 0,
+	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
+	__PEDIT_CMD_MAX,
+};
+
+struct tc_pedit_key {
+	__u32           mask;  /* AND */
+	__u32           val;   /*XOR */
+	__u32           off;  /*offset */
+	__u32           at;
+	__u32           offmask;
+	__u32           shift;
+};
+
+struct tc_pedit_sel {
+	tc_gen;
+	unsigned char           nkeys;
+	unsigned char           flags;
+	struct tc_pedit_key     keys[0];
+};
+
+#endif /* HAVE_TC_ACT_VLAN */
+
 /* Normally found in linux/netlink.h. */
 #ifndef NETLINK_CAP_ACK
 #define NETLINK_CAP_ACK 10
@@ -153,6 +209,14 @@ struct tc_vlan {
 #define IPV6_ADDR_LEN 16
 #endif
 
+#ifndef IPV4_ADDR_LEN
+#define IPV4_ADDR_LEN 4
+#endif
+
+#ifndef TP_PORT_LEN
+#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
+#endif
+
 /** Empty masks for known item types. */
 static const union {
 	struct rte_flow_item_port_id port_id;
@@ -230,6 +294,286 @@ struct flow_tcf_ptoi {
 	(MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
 	 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
 
+#define MLX5_TCF_PEDIT_ACTIONS (MLX5_FLOW_ACTION_SET_IPV4_SRC | \
+				MLX5_FLOW_ACTION_SET_IPV4_DST | \
+				MLX5_FLOW_ACTION_SET_IPV6_SRC | \
+				MLX5_FLOW_ACTION_SET_IPV6_DST | \
+				MLX5_FLOW_ACTION_SET_TP_SRC   | \
+				MLX5_FLOW_ACTION_SET_TP_DST)
+
+#define MLX5_TCF_CONFIG_ACTIONS (MLX5_FLOW_ACTION_PORT_ID | \
+				 MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
+				 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
+				 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
+				 MLX5_TCF_PEDIT_ACTIONS)
+
+#define MAX_PEDIT_KEYS (128)
+#define SZ_PEDIT_KEY_VAL (4)
+
+struct pedit_key_ex {
+	enum pedit_header_type htype;
+	enum pedit_cmd cmd;
+};
+
+struct pedit_parser {
+	struct tc_pedit_sel sel;
+	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
+	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
+};
+
+/**
+ * Calculate how many pedit keys for a given
+ * size in byte
+ *
+ * @param[in] size
+ *   size in byte
+ * @return
+ *   number of pedit keys
+ */
+static int
+flow_tcf_calc_pedit_keys(const uint64_t size)
+{
+	int keys = (size / SZ_PEDIT_KEY_VAL) +
+		   ((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
+	return keys;
+}
+
+/**
+ * Set pedit key of transport (TCP/UDP) port value
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ * @param[in] item_flags
+ *   flags of all items presented
+ */
+static void
+flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
+				struct pedit_parser *p_parser,
+				uint64_t item_flags)
+{
+	int idx = p_parser->sel.nkeys;
+
+	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
+	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
+	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+	assert(offsetof(struct tcp_hdr, src_port) ==
+	       offsetof(struct udp_hdr, src_port));
+	assert(offsetof(struct tcp_hdr, dst_port) ==
+	       offsetof(struct udp_hdr, dst_port));
+	p_parser->keys[idx].off =
+		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
+		offsetof(struct tcp_hdr, src_port) :
+		offsetof(struct tcp_hdr, dst_port);
+	p_parser->keys[idx].mask = 0xFFFF0000;
+	p_parser->keys[idx].val =
+		(__u32)((const struct rte_flow_action_set_tp *)
+				actions->conf)->port;
+	p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of ipv6 address
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
+				 struct pedit_parser *p_parser)
+{
+	int idx = p_parser->sel.nkeys;
+	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
+	int off_base =
+		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
+		offsetof(struct ipv6_hdr, src_addr) :
+		offsetof(struct ipv6_hdr, dst_addr);
+	const struct rte_flow_action_set_ipv6 *conf =
+		(const struct rte_flow_action_set_ipv6 *)actions->conf;
+
+	for (int i = 0; i < keys; i++, idx++) {
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
+		p_parser->keys[idx].mask = ~UINT32_MAX;
+		memcpy(&p_parser->keys[idx].val,
+			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
+			SZ_PEDIT_KEY_VAL);
+	}
+	p_parser->sel.nkeys += keys;
+}
+
+/**
+ * Set pedit key of ipv4 address
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
+				 struct pedit_parser *p_parser)
+{
+	int idx = p_parser->sel.nkeys;
+
+	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
+	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+	p_parser->keys[idx].off =
+		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
+		offsetof(struct ipv4_hdr, src_addr) :
+		offsetof(struct ipv4_hdr, dst_addr);
+	p_parser->keys[idx].mask = ~UINT32_MAX;
+	p_parser->keys[idx].val =
+		((const struct rte_flow_action_set_ipv4 *)
+		 actions->conf)->ipv4_addr;
+	p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Create the pedit's na attribute in netlink message
+ * on pre-allocate message buffer
+ *
+ * @param[in,out] nl
+ *   pointer to pre-allocated netlink message buffer
+ * @param[in,out] actions
+ *   pointer to pointer of actions specification.
+ * @param[in,out] action_flags
+ *   pointer to actions flags
+ * @param[in] item_flags
+ *   flags of all item presented
+ * @return
+ *   0 on sucess
+ */
+static int
+flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
+			      const struct rte_flow_action **actions,
+			      uint64_t item_flags)
+{
+	struct pedit_parser p_parser;
+
+	memset(&p_parser, 0, sizeof(p_parser));
+	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
+	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
+							    TCA_ACT_OPTIONS);
+	/* all modify header actions should be in one tc-pedit action */
+	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+		switch ((*actions)->type) {
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			flow_tcf_pedit_key_set_tp_port(*actions,
+							&p_parser, item_flags);
+			break;
+		default:
+			goto pedit_mnl_msg_done;
+		}
+	}
+pedit_mnl_msg_done:
+	p_parser.sel.action = TC_ACT_PIPE;
+	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
+			sizeof(p_parser.sel) +
+			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
+			&p_parser);
+	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
+					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
+	for (int i = 0; i < p_parser.sel.nkeys; i++) {
+		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
+					TCA_PEDIT_KEY_EX | NLA_F_NESTED);
+		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
+				 p_parser.keys_ex[i].htype);
+		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
+				 p_parser.keys_ex[i].cmd);
+		mnl_attr_nest_end(nl, na_pedit_key);
+	}
+	mnl_attr_nest_end(nl, na_pedit_keys);
+	mnl_attr_nest_end(nl, na_act_options);
+	(*actions)--;
+	return 0;
+}
+
+/**
+ * Calculate max memory size of one TC-pedit actions.
+ * One TC-pedit action can contain set of keys each defining
+ * a rewrite element (rte_flow action)
+ *
+ * @param[in,out] actions
+ *   actions specification.
+ * @param[in,out] action_flags
+ *   actions flags
+ * @param[in,out] size
+ *   accumulated size
+ * @return
+ *   Max memory size of one TC-pedit action
+ */
+static int
+flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
+				uint64_t *action_flags)
+{
+	int pedit_size = 0;
+	int keys = 0;
+	uint64_t flags = 0;
+
+	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
+		      SZ_NLATTR_STRZ_OF("pedit") +
+		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
+	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+		switch ((*actions)->type) {
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+			/* TCP is as same as UDP */
+			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			/* TCP is as same as UDP */
+			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_TP_DST;
+			break;
+		default:
+			goto get_pedit_action_size_done;
+		}
+	}
+get_pedit_action_size_done:
+	/* TCA_PEDIT_PARAMS_EX */
+	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
+			keys * sizeof(struct tc_pedit_key));
+	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
+	pedit_size += keys *
+		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
+		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));
+	(*action_flags) |= flags;
+	(*actions)--;
+	return pedit_size;
+}
+
 /**
  * Retrieve mask for pattern item.
  *
@@ -433,11 +777,14 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 			of_set_vlan_vid;
 		const struct rte_flow_action_of_set_vlan_pcp *
 			of_set_vlan_pcp;
+		const struct rte_flow_action_set_ipv4 *set_ipv4;
+		const struct rte_flow_action_set_ipv6 *set_ipv6;
 	} conf;
 	uint32_t item_flags = 0;
 	uint32_t action_flags = 0;
 	uint8_t next_protocol = -1;
 	unsigned int tcm_ifindex = 0;
+	uint8_t pedit_validated = 0;
 	struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
 	struct rte_eth_dev *port_id_dev = NULL;
 	bool in_port_id_set;
@@ -648,16 +995,20 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 	}
 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
 		unsigned int i;
+		uint32_t current_action_flag = 0;
 
 		switch (actions->type) {
 		case RTE_FLOW_ACTION_TYPE_VOID:
 			break;
 		case RTE_FLOW_ACTION_TYPE_PORT_ID:
+			current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
 			if (action_flags & MLX5_TCF_FATE_ACTIONS)
 				return rte_flow_error_set
 					(error, EINVAL,
 					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
 					 "can't have multiple fate actions");
+			if (!actions->conf)
+				break;
 			conf.port_id = actions->conf;
 			if (conf.port_id->original)
 				i = 0;
@@ -672,7 +1023,6 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 					 conf.port_id,
 					 "missing data to convert port ID to"
 					 " ifindex");
-			action_flags |= MLX5_FLOW_ACTION_PORT_ID;
 			port_id_dev = &rte_eth_devices[conf.port_id->id];
 			break;
 		case RTE_FLOW_ACTION_TYPE_DROP:
@@ -681,13 +1031,13 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 					(error, EINVAL,
 					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
 					 "can't have multiple fate actions");
-			action_flags |= MLX5_FLOW_ACTION_DROP;
+			current_action_flag = MLX5_FLOW_ACTION_DROP;
 			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
-			action_flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
+			current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
 			break;
 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
-			action_flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
+			current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
 			break;
 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
 			if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
@@ -696,7 +1046,7 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
 					 "vlan modify is not supported,"
 					 " set action must follow push action");
-			action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
+			current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
 			break;
 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
 			if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
@@ -705,7 +1055,25 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
 					 "vlan modify is not supported,"
 					 " set action must follow push action");
-			action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
+			current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+			current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+			current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+			current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
 			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
@@ -713,6 +1081,66 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 						  actions,
 						  "action not supported");
 		}
+		if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
+			if (!actions->conf)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+						actions,
+						"action configuration not set");
+		}
+		if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
+				pedit_validated)
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "not successive pedit "
+						  "actions");
+		if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
+		    (action_flags & MLX5_TCF_PEDIT_ACTIONS))
+			pedit_validated = 1;
+		action_flags |= current_action_flag;
+	}
+	if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
+	    (action_flags & MLX5_FLOW_ACTION_DROP))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION,
+					  actions,
+					  "actions are not compatible");
+	if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
+	    !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION,
+					  actions,
+					  "pedit action must be followed by "
+					  "port_id action");
+	if (action_flags &
+	   (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
+		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "no ipv4 item found in"
+						  " pattern");
+	}
+	if (action_flags &
+	   (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
+		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "no ipv6 item found in"
+						  " pattern");
+	}
+	if (action_flags &
+	   (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
+		if (!(item_flags &
+		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
+		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "no TCP/UDP item found in"
+						  " pattern");
 	}
 	/*
 	 * FW syndrome (0xA9C090):
@@ -884,6 +1312,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
 				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
 				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
 			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			size += flow_tcf_get_pedit_actions_size(&actions,
+								&flags);
+			break;
 		default:
 			DRV_LOG(WARNING,
 				"unsupported action %p type %d,"
@@ -1042,6 +1479,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 	struct nlattr *na_flower_act;
 	struct nlattr *na_vlan_id = NULL;
 	struct nlattr *na_vlan_priority = NULL;
+	uint64_t item_flags = 0;
 
 	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
 						PTOI_TABLE_SZ_MAX(dev)));
@@ -1088,6 +1526,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			tcm->tcm_ifindex = ptoi[i].ifindex;
 			break;
 		case RTE_FLOW_ITEM_TYPE_ETH:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
 			mask.eth = flow_tcf_item_mask
 				(items, &rte_flow_item_eth_mask,
 				 &flow_tcf_mask_supported.eth,
@@ -1121,6 +1560,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_VLAN:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
 			mask.vlan = flow_tcf_item_mask
 				(items, &rte_flow_item_vlan_mask,
 				 &flow_tcf_mask_supported.vlan,
@@ -1153,6 +1593,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 						  RTE_BE16(0x0fff)));
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV4:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
 			mask.ipv4 = flow_tcf_item_mask
 				(items, &rte_flow_item_ipv4_mask,
 				 &flow_tcf_mask_supported.ipv4,
@@ -1192,6 +1633,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV6:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
 			mask.ipv6 = flow_tcf_item_mask
 				(items, &rte_flow_item_ipv6_mask,
 				 &flow_tcf_mask_supported.ipv6,
@@ -1233,6 +1675,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_UDP:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
 			mask.udp = flow_tcf_item_mask
 				(items, &rte_flow_item_udp_mask,
 				 &flow_tcf_mask_supported.udp,
@@ -1262,6 +1705,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_TCP:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
 			mask.tcp = flow_tcf_item_mask
 				(items, &rte_flow_item_tcp_mask,
 				 &flow_tcf_mask_supported.tcp,
@@ -1412,6 +1856,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 					conf.of_set_vlan_pcp->vlan_pcp;
 			}
 			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			na_act_index =
+				mnl_attr_nest_start(nlh, na_act_index_cur++);
+			flow_tcf_create_pedit_mnl_msg(nlh,
+						      &actions, item_flags);
+			mnl_attr_nest_end(nlh, na_act_index);
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.17.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH v2] net/mlx5: rewrite IP address UDP/TCP port by E-Switch
  2018-10-10 12:56 ` [dpdk-dev] [PATCH v2] net/mlx5: rewrite IP address UDP/TCP port by E-Switch Jack Min
@ 2018-10-11  4:45   ` Yongseok Koh
  2018-10-11  9:26   ` [dpdk-dev] [PATCH v3] " Jack Min
  1 sibling, 0 replies; 13+ messages in thread
From: Yongseok Koh @ 2018-10-11  4:45 UTC (permalink / raw)
  To: Jack Min; +Cc: Shahaf Shuler, dev

On Wed, Oct 10, 2018 at 05:56:15AM -0700, Jack Min wrote:
> Offload the following rte_flow actions by inserting accordingly
> E-Switch rules via TC Flower driver
> 
>  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
>  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
>  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
>  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
>  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
>  - RTE_FLOW_ACTION_TYPE_SET_TP_DST
> 
> The example testpmd command is:
> 
>     flow create 0 transfer ingress
>          pattern eth / ipv4 / udp dst is 7000 / end
> 	 actions set_ipv4_src ipv4_addr 172.168.0.1 /
> 	 set_ipv4_dst ipv4_addr 172.168.10.1 /
> 	 set_tp_dst port 9000 /
> 	 set_tp_src port 700 /
> 	 port_id id 1 / end
> 
> Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> ---

A few minor comments below. If you agree, please make changes and submit v3 with
my acked-by tag.

Thanks

> v2:
>  * rebased
>  * validate pedit actions with drop action
>  * validate successive pedit actions
>  * a more generic way of validation
>  * added testpmd commands in commit log
>  * added comments of new added functions
>  * fix some coding style issue
> 
>  drivers/net/mlx5/Makefile        |   5 +
>  drivers/net/mlx5/meson.build     |   2 +
>  drivers/net/mlx5/mlx5_flow.h     |   6 +
>  drivers/net/mlx5/mlx5_flow_tcf.c | 468 ++++++++++++++++++++++++++++++-
>  4 files changed, 475 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> index ca1de9f21..49b95e78e 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
>  		linux/tc_act/tc_vlan.h \
>  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
>  		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TC_ACT_PEDIT \
> +		linux/tc_act/tc_pedit.h \
> +		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> +		$(AUTOCONF_OUTPUT)
>  	$Q sh -- '$<' '$@' \
>  		HAVE_SUPPORTED_40000baseKR4_Full \
>  		/usr/include/linux/ethtool.h \
> diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> index fd93ac162..ef6a85101 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -182,6 +182,8 @@ if build
>  		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
>  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
>  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> +		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
> +		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
>  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
>  		'RDMA_NL_NLDEV' ],
>  		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 12de841e8..cbb8c56c8 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -78,6 +78,12 @@
>  #define MLX5_FLOW_ACTION_OF_PUSH_VLAN (1u << 8)
>  #define MLX5_FLOW_ACTION_OF_SET_VLAN_VID (1u << 9)
>  #define MLX5_FLOW_ACTION_OF_SET_VLAN_PCP (1u << 10)
> +#define MLX5_FLOW_ACTION_SET_IPV4_SRC (1u << 11)
> +#define MLX5_FLOW_ACTION_SET_IPV4_DST (1u << 12)
> +#define MLX5_FLOW_ACTION_SET_IPV6_SRC (1u << 13)
> +#define MLX5_FLOW_ACTION_SET_IPV6_DST (1u << 14)
> +#define MLX5_FLOW_ACTION_SET_TP_SRC (1u << 15)
> +#define MLX5_FLOW_ACTION_SET_TP_DST (1u << 16)
>  
>  #define MLX5_FLOW_FATE_ACTIONS \
>  	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)
> diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> index 91f6ef678..65e8a4b3f 100644
> --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> @@ -53,6 +53,62 @@ struct tc_vlan {
>  
>  #endif /* HAVE_TC_ACT_VLAN */
>  
> +#ifdef HAVE_TC_ACT_PEDIT
> +
> +#include <linux/tc_act/tc_pedit.h>
> +
> +#else /* HAVE_TC_ACT_VLAN */

New line here?

> +enum {
> +	TCA_PEDIT_UNSPEC,
> +	TCA_PEDIT_TM,
> +	TCA_PEDIT_PARMS,
> +	TCA_PEDIT_PAD,
> +	TCA_PEDIT_PARMS_EX,
> +	TCA_PEDIT_KEYS_EX,
> +	TCA_PEDIT_KEY_EX,
> +	__TCA_PEDIT_MAX
> +};
> +
> +enum {
> +	TCA_PEDIT_KEY_EX_HTYPE = 1,
> +	TCA_PEDIT_KEY_EX_CMD = 2,
> +	__TCA_PEDIT_KEY_EX_MAX
> +};
> +
> +enum pedit_header_type {
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
> +	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
> +	__PEDIT_HDR_TYPE_MAX,
> +};
> +
> +enum pedit_cmd {
> +	TCA_PEDIT_KEY_EX_CMD_SET = 0,
> +	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
> +	__PEDIT_CMD_MAX,
> +};
> +
> +struct tc_pedit_key {
> +	__u32           mask;  /* AND */
> +	__u32           val;   /*XOR */
> +	__u32           off;  /*offset */
> +	__u32           at;
> +	__u32           offmask;
> +	__u32           shift;
> +};
> +
> +struct tc_pedit_sel {
> +	tc_gen;
> +	unsigned char           nkeys;
> +	unsigned char           flags;
> +	struct tc_pedit_key     keys[0];
> +};
> +
> +#endif /* HAVE_TC_ACT_VLAN */
> +
>  /* Normally found in linux/netlink.h. */
>  #ifndef NETLINK_CAP_ACK
>  #define NETLINK_CAP_ACK 10
> @@ -153,6 +209,14 @@ struct tc_vlan {
>  #define IPV6_ADDR_LEN 16
>  #endif
>  
> +#ifndef IPV4_ADDR_LEN
> +#define IPV4_ADDR_LEN 4
> +#endif
> +
> +#ifndef TP_PORT_LEN
> +#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
> +#endif
> +
>  /** Empty masks for known item types. */
>  static const union {
>  	struct rte_flow_item_port_id port_id;
> @@ -230,6 +294,286 @@ struct flow_tcf_ptoi {
>  	(MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
>  	 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
>  
> +#define MLX5_TCF_PEDIT_ACTIONS (MLX5_FLOW_ACTION_SET_IPV4_SRC | \
> +				MLX5_FLOW_ACTION_SET_IPV4_DST | \
> +				MLX5_FLOW_ACTION_SET_IPV6_SRC | \
> +				MLX5_FLOW_ACTION_SET_IPV6_DST | \
> +				MLX5_FLOW_ACTION_SET_TP_SRC   | \
> +				MLX5_FLOW_ACTION_SET_TP_DST)
> +

#define MLX5_TCF_PEDIT_ACTIONS \
	(MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
	 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
	 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)

Isn't it better and looks alike with MLX5_TCF_VLAN_ACTIONS above?

> +#define MLX5_TCF_CONFIG_ACTIONS (MLX5_FLOW_ACTION_PORT_ID | \
> +				 MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
> +				 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
> +				 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
> +				 MLX5_TCF_PEDIT_ACTIONS)

Same here.

> +
> +#define MAX_PEDIT_KEYS (128)
> +#define SZ_PEDIT_KEY_VAL (4)

Unnecessary parenthesis?

> +
> +struct pedit_key_ex {
> +	enum pedit_header_type htype;
> +	enum pedit_cmd cmd;
> +};
> +
> +struct pedit_parser {
> +	struct tc_pedit_sel sel;
> +	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
> +	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
> +};
> +
> +/**
> + * Calculate how many pedit keys for a given
> + * size in byte
> + *
> + * @param[in] size
> + *   size in byte
> + * @return
> + *   number of pedit keys
> + */
> +static int
> +flow_tcf_calc_pedit_keys(const uint64_t size)
> +{
> +	int keys = (size / SZ_PEDIT_KEY_VAL) +
> +		   ((size % SZ_PEDIT_KEY_VAL) ? 1 : 0);
> +	return keys;
> +}

Please convert it to a macro as the size is always a builtin constant. It would
be better to be done in compile time.

> +
> +/**
> + * Set pedit key of transport (TCP/UDP) port value
> + *
> + * @param[in] actions
> + *   pointer to action specification
> + * @param[in,out] p_parser
> + *   pointer to pedit_parser
> + * @param[in] item_flags
> + *   flags of all items presented
> + */
> +static void
> +flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
> +				struct pedit_parser *p_parser,
> +				uint64_t item_flags)
> +{
> +	int idx = p_parser->sel.nkeys;
> +
> +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
> +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
> +	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
> +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
> +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> +	assert(offsetof(struct tcp_hdr, src_port) ==
> +	       offsetof(struct udp_hdr, src_port));
> +	assert(offsetof(struct tcp_hdr, dst_port) ==
> +	       offsetof(struct udp_hdr, dst_port));

I think I was too much paranoid about this. Let's remove this assert() and just
leave a comment that the offset for port number is same for tcp and udp. I don't
think network spec for header definition will be changed ever. :-) Sorry for the
confusion.

> +	p_parser->keys[idx].off =
> +		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
> +		offsetof(struct tcp_hdr, src_port) :
> +		offsetof(struct tcp_hdr, dst_port);
> +	p_parser->keys[idx].mask = 0xFFFF0000;
> +	p_parser->keys[idx].val =
> +		(__u32)((const struct rte_flow_action_set_tp *)
> +				actions->conf)->port;
> +	p_parser->sel.nkeys = (++idx);
> +}
> +
> +/**
> + * Set pedit key of ipv6 address
> + *
> + * @param[in] actions
> + *   pointer to action specification
> + * @param[in,out] p_parser
> + *   pointer to pedit_parser
> + */
> +static void
> +flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
> +				 struct pedit_parser *p_parser)
> +{
> +	int idx = p_parser->sel.nkeys;
> +	int keys = flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> +	int off_base =
> +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
> +		offsetof(struct ipv6_hdr, src_addr) :
> +		offsetof(struct ipv6_hdr, dst_addr);
> +	const struct rte_flow_action_set_ipv6 *conf =
> +		(const struct rte_flow_action_set_ipv6 *)actions->conf;
> +
> +	for (int i = 0; i < keys; i++, idx++) {
> +		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
> +		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> +		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
> +		p_parser->keys[idx].mask = ~UINT32_MAX;
> +		memcpy(&p_parser->keys[idx].val,
> +			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
> +			SZ_PEDIT_KEY_VAL);
> +	}
> +	p_parser->sel.nkeys += keys;
> +}
> +
> +/**
> + * Set pedit key of ipv4 address
> + *
> + * @param[in] actions
> + *   pointer to action specification
> + * @param[in,out] p_parser
> + *   pointer to pedit_parser
> + */
> +static void
> +flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
> +				 struct pedit_parser *p_parser)
> +{
> +	int idx = p_parser->sel.nkeys;
> +
> +	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
> +	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> +	p_parser->keys[idx].off =
> +		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
> +		offsetof(struct ipv4_hdr, src_addr) :
> +		offsetof(struct ipv4_hdr, dst_addr);
> +	p_parser->keys[idx].mask = ~UINT32_MAX;
> +	p_parser->keys[idx].val =
> +		((const struct rte_flow_action_set_ipv4 *)
> +		 actions->conf)->ipv4_addr;
> +	p_parser->sel.nkeys = (++idx);
> +}
> +
> +/**
> + * Create the pedit's na attribute in netlink message
> + * on pre-allocate message buffer
> + *
> + * @param[in,out] nl
> + *   pointer to pre-allocated netlink message buffer
> + * @param[in,out] actions
> + *   pointer to pointer of actions specification.
> + * @param[in,out] action_flags
> + *   pointer to actions flags
> + * @param[in] item_flags
> + *   flags of all item presented
> + * @return
> + *   0 on sucess

If it always returns zero (no failure), then why should it have a return value
instead of void? The caller doesn't even check it.

> + */
> +static int
> +flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
> +			      const struct rte_flow_action **actions,
> +			      uint64_t item_flags)
> +{
> +	struct pedit_parser p_parser;
> +
> +	memset(&p_parser, 0, sizeof(p_parser));
> +	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
> +	struct nlattr *na_act_options = mnl_attr_nest_start(nl,
> +							    TCA_ACT_OPTIONS);

How about,

	struct nlattr *na_act_options =
		mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);

And move it to the declaration block (after p_parser definition).

> +	/* all modify header actions should be in one tc-pedit action */
> +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> +		switch ((*actions)->type) {
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			flow_tcf_pedit_key_set_tp_port(*actions,
> +							&p_parser, item_flags);
> +			break;
> +		default:
> +			goto pedit_mnl_msg_done;
> +		}
> +	}
> +pedit_mnl_msg_done:
> +	p_parser.sel.action = TC_ACT_PIPE;
> +	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
> +			sizeof(p_parser.sel) +
> +			p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
> +			&p_parser);

Indentation.

> +	struct nlattr *na_pedit_keys = mnl_attr_nest_start(nl,
> +					TCA_PEDIT_KEYS_EX | NLA_F_NESTED);

We don't allow declaring a variable in the middle of a block. You can declare it
in the beginning of a block right after "{". And indentation is wrong.

> +	for (int i = 0; i < p_parser.sel.nkeys; i++) {
> +		struct nlattr *na_pedit_key = mnl_attr_nest_start(nl,
> +					TCA_PEDIT_KEY_EX | NLA_F_NESTED);

Indentation.

		struct nlattr *na_pedit_key =
			mnl_attr_nest_start(nl,
					    TCA_PEDIT_KEY_EX | NLA_F_NESTED);

> +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
> +				 p_parser.keys_ex[i].htype);
> +		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
> +				 p_parser.keys_ex[i].cmd);
> +		mnl_attr_nest_end(nl, na_pedit_key);
> +	}
> +	mnl_attr_nest_end(nl, na_pedit_keys);
> +	mnl_attr_nest_end(nl, na_act_options);
> +	(*actions)--;
> +	return 0;
> +}
> +
> +/**
> + * Calculate max memory size of one TC-pedit actions.
> + * One TC-pedit action can contain set of keys each defining
> + * a rewrite element (rte_flow action)
> + *
> + * @param[in,out] actions
> + *   actions specification.
> + * @param[in,out] action_flags
> + *   actions flags
> + * @param[in,out] size
> + *   accumulated size
> + * @return
> + *   Max memory size of one TC-pedit action
> + */
> +static int
> +flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
> +				uint64_t *action_flags)
> +{
> +	int pedit_size = 0;
> +	int keys = 0;
> +	uint64_t flags = 0;
> +
> +	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
> +		      SZ_NLATTR_STRZ_OF("pedit") +
> +		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
> +	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
> +		switch ((*actions)->type) {
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> +			flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +			keys += flow_tcf_calc_pedit_keys(IPV4_ADDR_LEN);
> +			flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> +			flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +			keys += flow_tcf_calc_pedit_keys(IPV6_ADDR_LEN);
> +			flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +			/* TCP is as same as UDP */
> +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> +			flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			/* TCP is as same as UDP */
> +			keys += flow_tcf_calc_pedit_keys(TP_PORT_LEN);
> +			flags |= MLX5_FLOW_ACTION_SET_TP_DST;
> +			break;
> +		default:
> +			goto get_pedit_action_size_done;
> +		}
> +	}
> +get_pedit_action_size_done:
> +	/* TCA_PEDIT_PARAMS_EX */
> +	pedit_size += SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
> +			keys * sizeof(struct tc_pedit_key));

Indentation.

> +	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
> +	pedit_size += keys *
> +		/* TCA_PEDIT_KEY_EX + HTYPE + CMD */
> +		(SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) + SZ_NLATTR_DATA_OF(2));

Indentation.

> +	(*action_flags) |= flags;
> +	(*actions)--;
> +	return pedit_size;
> +}
> +
>  /**
>   * Retrieve mask for pattern item.
>   *
> @@ -433,11 +777,14 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>  			of_set_vlan_vid;
>  		const struct rte_flow_action_of_set_vlan_pcp *
>  			of_set_vlan_pcp;
> +		const struct rte_flow_action_set_ipv4 *set_ipv4;
> +		const struct rte_flow_action_set_ipv6 *set_ipv6;
>  	} conf;
>  	uint32_t item_flags = 0;
>  	uint32_t action_flags = 0;
>  	uint8_t next_protocol = -1;
>  	unsigned int tcm_ifindex = 0;
> +	uint8_t pedit_validated = 0;
>  	struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
>  	struct rte_eth_dev *port_id_dev = NULL;
>  	bool in_port_id_set;
> @@ -648,16 +995,20 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>  	}
>  	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
>  		unsigned int i;
> +		uint32_t current_action_flag = 0;
>  
>  		switch (actions->type) {
>  		case RTE_FLOW_ACTION_TYPE_VOID:
>  			break;
>  		case RTE_FLOW_ACTION_TYPE_PORT_ID:
> +			current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
>  			if (action_flags & MLX5_TCF_FATE_ACTIONS)
>  				return rte_flow_error_set
>  					(error, EINVAL,
>  					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
>  					 "can't have multiple fate actions");
> +			if (!actions->conf)
> +				break;
>  			conf.port_id = actions->conf;
>  			if (conf.port_id->original)
>  				i = 0;
> @@ -672,7 +1023,6 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>  					 conf.port_id,
>  					 "missing data to convert port ID to"
>  					 " ifindex");
> -			action_flags |= MLX5_FLOW_ACTION_PORT_ID;
>  			port_id_dev = &rte_eth_devices[conf.port_id->id];
>  			break;
>  		case RTE_FLOW_ACTION_TYPE_DROP:
> @@ -681,13 +1031,13 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>  					(error, EINVAL,
>  					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
>  					 "can't have multiple fate actions");
> -			action_flags |= MLX5_FLOW_ACTION_DROP;
> +			current_action_flag = MLX5_FLOW_ACTION_DROP;
>  			break;
>  		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
> -			action_flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
> +			current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
>  			break;
>  		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
> -			action_flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
> +			current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
>  			break;
>  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
>  			if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
> @@ -696,7 +1046,7 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>  					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
>  					 "vlan modify is not supported,"
>  					 " set action must follow push action");
> -			action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
> +			current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
>  			break;
>  		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
>  			if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
> @@ -705,7 +1055,25 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>  					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
>  					 "vlan modify is not supported,"
>  					 " set action must follow push action");
> -			action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
> +			current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +			current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +			current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +			current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +			current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +			current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
>  			break;
>  		default:
>  			return rte_flow_error_set(error, ENOTSUP,
> @@ -713,6 +1081,66 @@ flow_tcf_validate(struct rte_eth_dev *dev,
>  						  actions,
>  						  "action not supported");
>  		}
> +		if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
> +			if (!actions->conf)
> +				return rte_flow_error_set(error, EINVAL,
> +						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> +						actions,
> +						"action configuration not set");
> +		}
> +		if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
> +				pedit_validated)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "not successive pedit "
> +						  "actions");

User doesn't understand what the pedit means. It should be "set actions"?
E.g. "set actions should be listed successively"

> +		if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
> +		    (action_flags & MLX5_TCF_PEDIT_ACTIONS))
> +			pedit_validated = 1;
> +		action_flags |= current_action_flag;
> +	}
> +	if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
> +	    (action_flags & MLX5_FLOW_ACTION_DROP))
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ACTION,
> +					  actions,
> +					  "actions are not compatible");

Be more specific. "set action is not compatible with drop action"

> +	if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
> +	    !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ACTION,
> +					  actions,
> +					  "pedit action must be followed by "
> +					  "port_id action");

No pedit for user.

Thanks,
Yongseok

> +	if (action_flags &
> +	   (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
> +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
> +			return rte_flow_error_set(error, EINVAL,
> +						  RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "no ipv4 item found in"
> +						  " pattern");
> +	}
> +	if (action_flags &
> +	   (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
> +		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
> +			return rte_flow_error_set(error, EINVAL,
> +						  RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "no ipv6 item found in"
> +						  " pattern");
> +	}
> +	if (action_flags &
> +	   (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
> +		if (!(item_flags &
> +		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
> +		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
> +			return rte_flow_error_set(error, EINVAL,
> +						  RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "no TCP/UDP item found in"
> +						  " pattern");
>  	}
>  	/*
>  	 * FW syndrome (0xA9C090):
> @@ -884,6 +1312,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
>  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
>  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			size += flow_tcf_get_pedit_actions_size(&actions,
> +								&flags);
> +			break;
>  		default:
>  			DRV_LOG(WARNING,
>  				"unsupported action %p type %d,"
> @@ -1042,6 +1479,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  	struct nlattr *na_flower_act;
>  	struct nlattr *na_vlan_id = NULL;
>  	struct nlattr *na_vlan_priority = NULL;
> +	uint64_t item_flags = 0;
>  
>  	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
>  						PTOI_TABLE_SZ_MAX(dev)));
> @@ -1088,6 +1526,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  			tcm->tcm_ifindex = ptoi[i].ifindex;
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_ETH:
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
>  			mask.eth = flow_tcf_item_mask
>  				(items, &rte_flow_item_eth_mask,
>  				 &flow_tcf_mask_supported.eth,
> @@ -1121,6 +1560,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  			}
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_VLAN:
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
>  			mask.vlan = flow_tcf_item_mask
>  				(items, &rte_flow_item_vlan_mask,
>  				 &flow_tcf_mask_supported.vlan,
> @@ -1153,6 +1593,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  						  RTE_BE16(0x0fff)));
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_IPV4:
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
>  			mask.ipv4 = flow_tcf_item_mask
>  				(items, &rte_flow_item_ipv4_mask,
>  				 &flow_tcf_mask_supported.ipv4,
> @@ -1192,6 +1633,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  			}
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_IPV6:
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
>  			mask.ipv6 = flow_tcf_item_mask
>  				(items, &rte_flow_item_ipv6_mask,
>  				 &flow_tcf_mask_supported.ipv6,
> @@ -1233,6 +1675,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  			}
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_UDP:
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
>  			mask.udp = flow_tcf_item_mask
>  				(items, &rte_flow_item_udp_mask,
>  				 &flow_tcf_mask_supported.udp,
> @@ -1262,6 +1705,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  			}
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_TCP:
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
>  			mask.tcp = flow_tcf_item_mask
>  				(items, &rte_flow_item_tcp_mask,
>  				 &flow_tcf_mask_supported.tcp,
> @@ -1412,6 +1856,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
>  					conf.of_set_vlan_pcp->vlan_pcp;
>  			}
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
> +		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
> +			na_act_index =
> +				mnl_attr_nest_start(nlh, na_act_index_cur++);
> +			flow_tcf_create_pedit_mnl_msg(nlh,
> +						      &actions, item_flags);
> +			mnl_attr_nest_end(nlh, na_act_index);
> +			break;
>  		default:
>  			return rte_flow_error_set(error, ENOTSUP,
>  						  RTE_FLOW_ERROR_TYPE_ACTION,
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [dpdk-dev] [PATCH v3] net/mlx5: rewrite IP address UDP/TCP port by E-Switch
  2018-10-10 12:56 ` [dpdk-dev] [PATCH v2] net/mlx5: rewrite IP address UDP/TCP port by E-Switch Jack Min
  2018-10-11  4:45   ` Yongseok Koh
@ 2018-10-11  9:26   ` Jack Min
  2018-10-11 13:22     ` [dpdk-dev] [PATCH v4] " Jack Min
  1 sibling, 1 reply; 13+ messages in thread
From: Jack Min @ 2018-10-11  9:26 UTC (permalink / raw)
  To: Shahaf Shuler, Yongseok Koh; +Cc: dev

Offload the following rte_flow actions by inserting accordingly
E-Switch rules via TC Flower driver

 - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
 - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
 - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
 - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
 - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
 - RTE_FLOW_ACTION_TYPE_SET_TP_DST

The example testpmd command is:

    flow create 0 transfer ingress
         pattern eth / ipv4 / udp dst is 7000 / end
	 actions set_ipv4_src ipv4_addr 172.168.0.1 /
	 set_ipv4_dst ipv4_addr 172.168.10.1 /
	 set_tp_dst port 9000 /
	 set_tp_src port 700 /
	 port_id id 1 / end

Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
v2:
 * rebased
 * validate pedit actions with drop action
 * validate successive pedit actions
 * a more generic way of validation
 * added testpmd commands in commit log
 * added comments of new added functions
 * fix some coding style issue
v3:
 * fix some coding style issue
 * calc pedit keys become macro

 drivers/net/mlx5/Makefile        |   5 +
 drivers/net/mlx5/meson.build     |   2 +
 drivers/net/mlx5/mlx5_flow.h     |   6 +
 drivers/net/mlx5/mlx5_flow_tcf.c | 452 ++++++++++++++++++++++++++++++-
 4 files changed, 459 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index ca1de9f21..49b95e78e 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		linux/tc_act/tc_vlan.h \
 		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
 		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
+		HAVE_TC_ACT_PEDIT \
+		linux/tc_act/tc_pedit.h \
+		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
+		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
 		HAVE_SUPPORTED_40000baseKR4_Full \
 		/usr/include/linux/ethtool.h \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index fd93ac162..ef6a85101 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -182,6 +182,8 @@ if build
 		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
 		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
 		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
+		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
+		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
 		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
 		'RDMA_NL_NLDEV' ],
 		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 12de841e8..cbb8c56c8 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -78,6 +78,12 @@
 #define MLX5_FLOW_ACTION_OF_PUSH_VLAN (1u << 8)
 #define MLX5_FLOW_ACTION_OF_SET_VLAN_VID (1u << 9)
 #define MLX5_FLOW_ACTION_OF_SET_VLAN_PCP (1u << 10)
+#define MLX5_FLOW_ACTION_SET_IPV4_SRC (1u << 11)
+#define MLX5_FLOW_ACTION_SET_IPV4_DST (1u << 12)
+#define MLX5_FLOW_ACTION_SET_IPV6_SRC (1u << 13)
+#define MLX5_FLOW_ACTION_SET_IPV6_DST (1u << 14)
+#define MLX5_FLOW_ACTION_SET_TP_SRC (1u << 15)
+#define MLX5_FLOW_ACTION_SET_TP_DST (1u << 16)
 
 #define MLX5_FLOW_FATE_ACTIONS \
 	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 91f6ef678..c9214583a 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -53,6 +53,63 @@ struct tc_vlan {
 
 #endif /* HAVE_TC_ACT_VLAN */
 
+#ifdef HAVE_TC_ACT_PEDIT
+
+#include <linux/tc_act/tc_pedit.h>
+
+#else /* HAVE_TC_ACT_VLAN */
+
+enum {
+	TCA_PEDIT_UNSPEC,
+	TCA_PEDIT_TM,
+	TCA_PEDIT_PARMS,
+	TCA_PEDIT_PAD,
+	TCA_PEDIT_PARMS_EX,
+	TCA_PEDIT_KEYS_EX,
+	TCA_PEDIT_KEY_EX,
+	__TCA_PEDIT_MAX
+};
+
+enum {
+	TCA_PEDIT_KEY_EX_HTYPE = 1,
+	TCA_PEDIT_KEY_EX_CMD = 2,
+	__TCA_PEDIT_KEY_EX_MAX
+};
+
+enum pedit_header_type {
+	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
+	__PEDIT_HDR_TYPE_MAX,
+};
+
+enum pedit_cmd {
+	TCA_PEDIT_KEY_EX_CMD_SET = 0,
+	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
+	__PEDIT_CMD_MAX,
+};
+
+struct tc_pedit_key {
+	__u32           mask;  /* AND */
+	__u32           val;   /*XOR */
+	__u32           off;  /*offset */
+	__u32           at;
+	__u32           offmask;
+	__u32           shift;
+};
+
+struct tc_pedit_sel {
+	tc_gen;
+	unsigned char           nkeys;
+	unsigned char           flags;
+	struct tc_pedit_key     keys[0];
+};
+
+#endif /* HAVE_TC_ACT_VLAN */
+
 /* Normally found in linux/netlink.h. */
 #ifndef NETLINK_CAP_ACK
 #define NETLINK_CAP_ACK 10
@@ -153,6 +210,14 @@ struct tc_vlan {
 #define IPV6_ADDR_LEN 16
 #endif
 
+#ifndef IPV4_ADDR_LEN
+#define IPV4_ADDR_LEN 4
+#endif
+
+#ifndef TP_PORT_LEN
+#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
+#endif
+
 /** Empty masks for known item types. */
 static const union {
 	struct rte_flow_item_port_id port_id;
@@ -230,6 +295,268 @@ struct flow_tcf_ptoi {
 	(MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
 	 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
 
+#define MLX5_TCF_PEDIT_ACTIONS \
+	(MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
+	 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
+	 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)
+
+#define MLX5_TCF_CONFIG_ACTIONS \
+	(MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
+	 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
+	 MLX5_TCF_PEDIT_ACTIONS)
+
+#define MAX_PEDIT_KEYS 128
+#define SZ_PEDIT_KEY_VAL 4
+
+#define NUM_OF_PEDIT_KEYS(sz) \
+	(((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
+
+struct pedit_key_ex {
+	enum pedit_header_type htype;
+	enum pedit_cmd cmd;
+};
+
+struct pedit_parser {
+	struct tc_pedit_sel sel;
+	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
+	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
+};
+
+
+/**
+ * Set pedit key of transport (TCP/UDP) port value
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ * @param[in] item_flags
+ *   flags of all items presented
+ */
+static void
+flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
+				struct pedit_parser *p_parser,
+				uint64_t item_flags)
+{
+	int idx = p_parser->sel.nkeys;
+
+	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
+	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
+	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+	/* offset of src/dst port is same for TCP and UDP */
+	p_parser->keys[idx].off =
+		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
+		offsetof(struct tcp_hdr, src_port) :
+		offsetof(struct tcp_hdr, dst_port);
+	p_parser->keys[idx].mask = 0xFFFF0000;
+	p_parser->keys[idx].val =
+		(__u32)((const struct rte_flow_action_set_tp *)
+				actions->conf)->port;
+	p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of ipv6 address
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
+				 struct pedit_parser *p_parser)
+{
+	int idx = p_parser->sel.nkeys;
+	int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+	int off_base =
+		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
+		offsetof(struct ipv6_hdr, src_addr) :
+		offsetof(struct ipv6_hdr, dst_addr);
+	const struct rte_flow_action_set_ipv6 *conf =
+		(const struct rte_flow_action_set_ipv6 *)actions->conf;
+
+	for (int i = 0; i < keys; i++, idx++) {
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
+		p_parser->keys[idx].mask = ~UINT32_MAX;
+		memcpy(&p_parser->keys[idx].val,
+			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
+			SZ_PEDIT_KEY_VAL);
+	}
+	p_parser->sel.nkeys += keys;
+}
+
+/**
+ * Set pedit key of ipv4 address
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
+				 struct pedit_parser *p_parser)
+{
+	int idx = p_parser->sel.nkeys;
+
+	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
+	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+	p_parser->keys[idx].off =
+		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
+		offsetof(struct ipv4_hdr, src_addr) :
+		offsetof(struct ipv4_hdr, dst_addr);
+	p_parser->keys[idx].mask = ~UINT32_MAX;
+	p_parser->keys[idx].val =
+		((const struct rte_flow_action_set_ipv4 *)
+		 actions->conf)->ipv4_addr;
+	p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Create the pedit's na attribute in netlink message
+ * on pre-allocate message buffer
+ *
+ * @param[in,out] nl
+ *   pointer to pre-allocated netlink message buffer
+ * @param[in,out] actions
+ *   pointer to pointer of actions specification.
+ * @param[in,out] action_flags
+ *   pointer to actions flags
+ * @param[in] item_flags
+ *   flags of all item presented
+ */
+static void
+flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
+			      const struct rte_flow_action **actions,
+			      uint64_t item_flags)
+{
+	struct pedit_parser p_parser;
+	struct nlattr *na_act_options =
+		mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
+	struct nlattr *na_pedit_keys;
+
+	memset(&p_parser, 0, sizeof(p_parser));
+	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
+	/* all modify header actions should be in one tc-pedit action */
+	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+		switch ((*actions)->type) {
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			flow_tcf_pedit_key_set_tp_port(*actions,
+							&p_parser, item_flags);
+			break;
+		default:
+			goto pedit_mnl_msg_done;
+		}
+	}
+pedit_mnl_msg_done:
+	p_parser.sel.action = TC_ACT_PIPE;
+	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
+		     sizeof(p_parser.sel) +
+		     p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
+		     &p_parser);
+	na_pedit_keys =
+		mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
+	for (int i = 0; i < p_parser.sel.nkeys; i++) {
+		struct nlattr *na_pedit_key =
+			mnl_attr_nest_start(nl,
+					    TCA_PEDIT_KEY_EX | NLA_F_NESTED);
+		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
+				 p_parser.keys_ex[i].htype);
+		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
+				 p_parser.keys_ex[i].cmd);
+		mnl_attr_nest_end(nl, na_pedit_key);
+	}
+	mnl_attr_nest_end(nl, na_pedit_keys);
+	mnl_attr_nest_end(nl, na_act_options);
+	(*actions)--;
+}
+
+/**
+ * Calculate max memory size of one TC-pedit actions.
+ * One TC-pedit action can contain set of keys each defining
+ * a rewrite element (rte_flow action)
+ *
+ * @param[in,out] actions
+ *   actions specification.
+ * @param[in,out] action_flags
+ *   actions flags
+ * @param[in,out] size
+ *   accumulated size
+ * @return
+ *   Max memory size of one TC-pedit action
+ */
+static int
+flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
+				uint64_t *action_flags)
+{
+	int pedit_size = 0;
+	int keys = 0;
+	uint64_t flags = 0;
+
+	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
+		      SZ_NLATTR_STRZ_OF("pedit") +
+		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
+	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+		switch ((*actions)->type) {
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+			keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+			keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+			/* TCP is as same as UDP */
+			keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			/* TCP is as same as UDP */
+			keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_TP_DST;
+			break;
+		default:
+			goto get_pedit_action_size_done;
+		}
+	}
+get_pedit_action_size_done:
+	/* TCA_PEDIT_PARAMS_EX */
+	pedit_size +=
+		SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
+				  keys * sizeof(struct tc_pedit_key));
+	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
+	pedit_size += keys *
+		      /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
+		      (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
+		       SZ_NLATTR_DATA_OF(2));
+	(*action_flags) |= flags;
+	(*actions)--;
+	return pedit_size;
+}
+
 /**
  * Retrieve mask for pattern item.
  *
@@ -433,11 +760,14 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 			of_set_vlan_vid;
 		const struct rte_flow_action_of_set_vlan_pcp *
 			of_set_vlan_pcp;
+		const struct rte_flow_action_set_ipv4 *set_ipv4;
+		const struct rte_flow_action_set_ipv6 *set_ipv6;
 	} conf;
 	uint32_t item_flags = 0;
 	uint32_t action_flags = 0;
 	uint8_t next_protocol = -1;
 	unsigned int tcm_ifindex = 0;
+	uint8_t pedit_validated = 0;
 	struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
 	struct rte_eth_dev *port_id_dev = NULL;
 	bool in_port_id_set;
@@ -648,16 +978,20 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 	}
 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
 		unsigned int i;
+		uint32_t current_action_flag = 0;
 
 		switch (actions->type) {
 		case RTE_FLOW_ACTION_TYPE_VOID:
 			break;
 		case RTE_FLOW_ACTION_TYPE_PORT_ID:
+			current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
 			if (action_flags & MLX5_TCF_FATE_ACTIONS)
 				return rte_flow_error_set
 					(error, EINVAL,
 					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
 					 "can't have multiple fate actions");
+			if (!actions->conf)
+				break;
 			conf.port_id = actions->conf;
 			if (conf.port_id->original)
 				i = 0;
@@ -672,7 +1006,6 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 					 conf.port_id,
 					 "missing data to convert port ID to"
 					 " ifindex");
-			action_flags |= MLX5_FLOW_ACTION_PORT_ID;
 			port_id_dev = &rte_eth_devices[conf.port_id->id];
 			break;
 		case RTE_FLOW_ACTION_TYPE_DROP:
@@ -681,13 +1014,13 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 					(error, EINVAL,
 					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
 					 "can't have multiple fate actions");
-			action_flags |= MLX5_FLOW_ACTION_DROP;
+			current_action_flag = MLX5_FLOW_ACTION_DROP;
 			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
-			action_flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
+			current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
 			break;
 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
-			action_flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
+			current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
 			break;
 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
 			if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
@@ -696,7 +1029,7 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
 					 "vlan modify is not supported,"
 					 " set action must follow push action");
-			action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
+			current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
 			break;
 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
 			if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
@@ -705,7 +1038,25 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
 					 "vlan modify is not supported,"
 					 " set action must follow push action");
-			action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
+			current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+			current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+			current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+			current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
 			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
@@ -713,6 +1064,67 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 						  actions,
 						  "action not supported");
 		}
+		if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
+			if (!actions->conf)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+						actions,
+						"action configuration not set");
+		}
+		if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
+				pedit_validated)
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "set actions should be "
+						  "listed successively");
+		if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
+		    (action_flags & MLX5_TCF_PEDIT_ACTIONS))
+			pedit_validated = 1;
+		action_flags |= current_action_flag;
+	}
+	if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
+	    (action_flags & MLX5_FLOW_ACTION_DROP))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION,
+					  actions,
+					  "set action is not compatible with "
+					  "drop action");
+	if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
+	    !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION,
+					  actions,
+					  "set action must be followed by "
+					  "port_id action");
+	if (action_flags &
+	   (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
+		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "no ipv4 item found in"
+						  " pattern");
+	}
+	if (action_flags &
+	   (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
+		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "no ipv6 item found in"
+						  " pattern");
+	}
+	if (action_flags &
+	   (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
+		if (!(item_flags &
+		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
+		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "no TCP/UDP item found in"
+						  " pattern");
 	}
 	/*
 	 * FW syndrome (0xA9C090):
@@ -884,6 +1296,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
 				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
 				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
 			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			size += flow_tcf_get_pedit_actions_size(&actions,
+								&flags);
+			break;
 		default:
 			DRV_LOG(WARNING,
 				"unsupported action %p type %d,"
@@ -1042,6 +1463,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 	struct nlattr *na_flower_act;
 	struct nlattr *na_vlan_id = NULL;
 	struct nlattr *na_vlan_priority = NULL;
+	uint64_t item_flags = 0;
 
 	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
 						PTOI_TABLE_SZ_MAX(dev)));
@@ -1088,6 +1510,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			tcm->tcm_ifindex = ptoi[i].ifindex;
 			break;
 		case RTE_FLOW_ITEM_TYPE_ETH:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
 			mask.eth = flow_tcf_item_mask
 				(items, &rte_flow_item_eth_mask,
 				 &flow_tcf_mask_supported.eth,
@@ -1121,6 +1544,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_VLAN:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
 			mask.vlan = flow_tcf_item_mask
 				(items, &rte_flow_item_vlan_mask,
 				 &flow_tcf_mask_supported.vlan,
@@ -1153,6 +1577,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 						  RTE_BE16(0x0fff)));
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV4:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
 			mask.ipv4 = flow_tcf_item_mask
 				(items, &rte_flow_item_ipv4_mask,
 				 &flow_tcf_mask_supported.ipv4,
@@ -1192,6 +1617,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV6:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
 			mask.ipv6 = flow_tcf_item_mask
 				(items, &rte_flow_item_ipv6_mask,
 				 &flow_tcf_mask_supported.ipv6,
@@ -1233,6 +1659,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_UDP:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
 			mask.udp = flow_tcf_item_mask
 				(items, &rte_flow_item_udp_mask,
 				 &flow_tcf_mask_supported.udp,
@@ -1262,6 +1689,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_TCP:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
 			mask.tcp = flow_tcf_item_mask
 				(items, &rte_flow_item_tcp_mask,
 				 &flow_tcf_mask_supported.tcp,
@@ -1412,6 +1840,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 					conf.of_set_vlan_pcp->vlan_pcp;
 			}
 			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			na_act_index =
+				mnl_attr_nest_start(nlh, na_act_index_cur++);
+			flow_tcf_create_pedit_mnl_msg(nlh,
+						      &actions, item_flags);
+			mnl_attr_nest_end(nlh, na_act_index);
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.17.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [dpdk-dev] [PATCH v4] net/mlx5: rewrite IP address UDP/TCP port by E-Switch
  2018-10-11  9:26   ` [dpdk-dev] [PATCH v3] " Jack Min
@ 2018-10-11 13:22     ` Jack Min
  2018-10-11 13:39       ` Shahaf Shuler
  0 siblings, 1 reply; 13+ messages in thread
From: Jack Min @ 2018-10-11 13:22 UTC (permalink / raw)
  To: Shahaf Shuler, Yongseok Koh; +Cc: dev

Offload the following rte_flow actions by inserting accordingly
E-Switch rules via TC Flower driver

 - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
 - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
 - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
 - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
 - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
 - RTE_FLOW_ACTION_TYPE_SET_TP_DST

The example testpmd command is:

    flow create 0 transfer ingress
         pattern eth / ipv4 / udp dst is 7000 / end
	 actions set_ipv4_src ipv4_addr 172.168.0.1 /
	 set_ipv4_dst ipv4_addr 172.168.10.1 /
	 set_tp_dst port 9000 /
	 set_tp_src port 700 /
	 port_id id 1 / end

Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
v2:
 * rebased
 * validate pedit actions with drop action
 * validate successive pedit actions
 * a more generic way of validation
 * added testpmd commands in commit log
 * added comments of new added functions
 * fix some coding style issue
v3:
 * fix some coding style issue
 * calc pedit keys become macro
v4:
 * fix tc-rule create error introduced by v3
 * rebased

 drivers/net/mlx5/Makefile        |   5 +
 drivers/net/mlx5/meson.build     |   2 +
 drivers/net/mlx5/mlx5_flow.h     |   6 +
 drivers/net/mlx5/mlx5_flow_tcf.c | 452 ++++++++++++++++++++++++++++++-
 4 files changed, 459 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index ca1de9f21..49b95e78e 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -346,6 +346,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		linux/tc_act/tc_vlan.h \
 		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
 		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
+		HAVE_TC_ACT_PEDIT \
+		linux/tc_act/tc_pedit.h \
+		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
+		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
 		HAVE_SUPPORTED_40000baseKR4_Full \
 		/usr/include/linux/ethtool.h \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index fd93ac162..ef6a85101 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -182,6 +182,8 @@ if build
 		'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
 		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
 		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
+		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
+		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
 		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
 		'RDMA_NL_NLDEV' ],
 		[ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index fee05f0e3..c592828b5 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -78,6 +78,12 @@
 #define MLX5_FLOW_ACTION_OF_PUSH_VLAN (1u << 8)
 #define MLX5_FLOW_ACTION_OF_SET_VLAN_VID (1u << 9)
 #define MLX5_FLOW_ACTION_OF_SET_VLAN_PCP (1u << 10)
+#define MLX5_FLOW_ACTION_SET_IPV4_SRC (1u << 11)
+#define MLX5_FLOW_ACTION_SET_IPV4_DST (1u << 12)
+#define MLX5_FLOW_ACTION_SET_IPV6_SRC (1u << 13)
+#define MLX5_FLOW_ACTION_SET_IPV6_DST (1u << 14)
+#define MLX5_FLOW_ACTION_SET_TP_SRC (1u << 15)
+#define MLX5_FLOW_ACTION_SET_TP_DST (1u << 16)
 
 #define MLX5_FLOW_FATE_ACTIONS \
 	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 91f6ef678..44c899830 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -53,6 +53,63 @@ struct tc_vlan {
 
 #endif /* HAVE_TC_ACT_VLAN */
 
+#ifdef HAVE_TC_ACT_PEDIT
+
+#include <linux/tc_act/tc_pedit.h>
+
+#else /* HAVE_TC_ACT_VLAN */
+
+enum {
+	TCA_PEDIT_UNSPEC,
+	TCA_PEDIT_TM,
+	TCA_PEDIT_PARMS,
+	TCA_PEDIT_PAD,
+	TCA_PEDIT_PARMS_EX,
+	TCA_PEDIT_KEYS_EX,
+	TCA_PEDIT_KEY_EX,
+	__TCA_PEDIT_MAX
+};
+
+enum {
+	TCA_PEDIT_KEY_EX_HTYPE = 1,
+	TCA_PEDIT_KEY_EX_CMD = 2,
+	__TCA_PEDIT_KEY_EX_MAX
+};
+
+enum pedit_header_type {
+	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
+	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
+	__PEDIT_HDR_TYPE_MAX,
+};
+
+enum pedit_cmd {
+	TCA_PEDIT_KEY_EX_CMD_SET = 0,
+	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
+	__PEDIT_CMD_MAX,
+};
+
+struct tc_pedit_key {
+	__u32           mask;  /* AND */
+	__u32           val;   /*XOR */
+	__u32           off;  /*offset */
+	__u32           at;
+	__u32           offmask;
+	__u32           shift;
+};
+
+struct tc_pedit_sel {
+	tc_gen;
+	unsigned char           nkeys;
+	unsigned char           flags;
+	struct tc_pedit_key     keys[0];
+};
+
+#endif /* HAVE_TC_ACT_VLAN */
+
 /* Normally found in linux/netlink.h. */
 #ifndef NETLINK_CAP_ACK
 #define NETLINK_CAP_ACK 10
@@ -153,6 +210,14 @@ struct tc_vlan {
 #define IPV6_ADDR_LEN 16
 #endif
 
+#ifndef IPV4_ADDR_LEN
+#define IPV4_ADDR_LEN 4
+#endif
+
+#ifndef TP_PORT_LEN
+#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
+#endif
+
 /** Empty masks for known item types. */
 static const union {
 	struct rte_flow_item_port_id port_id;
@@ -230,6 +295,268 @@ struct flow_tcf_ptoi {
 	(MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
 	 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
 
+#define MLX5_TCF_PEDIT_ACTIONS \
+	(MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
+	 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
+	 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)
+
+#define MLX5_TCF_CONFIG_ACTIONS \
+	(MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
+	 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
+	 MLX5_TCF_PEDIT_ACTIONS)
+
+#define MAX_PEDIT_KEYS 128
+#define SZ_PEDIT_KEY_VAL 4
+
+#define NUM_OF_PEDIT_KEYS(sz) \
+	(((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
+
+struct pedit_key_ex {
+	enum pedit_header_type htype;
+	enum pedit_cmd cmd;
+};
+
+struct pedit_parser {
+	struct tc_pedit_sel sel;
+	struct tc_pedit_key keys[MAX_PEDIT_KEYS];
+	struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
+};
+
+
+/**
+ * Set pedit key of transport (TCP/UDP) port value
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ * @param[in] item_flags
+ *   flags of all items presented
+ */
+static void
+flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
+				struct pedit_parser *p_parser,
+				uint64_t item_flags)
+{
+	int idx = p_parser->sel.nkeys;
+
+	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
+	if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
+	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+	/* offset of src/dst port is same for TCP and UDP */
+	p_parser->keys[idx].off =
+		actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
+		offsetof(struct tcp_hdr, src_port) :
+		offsetof(struct tcp_hdr, dst_port);
+	p_parser->keys[idx].mask = 0xFFFF0000;
+	p_parser->keys[idx].val =
+		(__u32)((const struct rte_flow_action_set_tp *)
+				actions->conf)->port;
+	p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of ipv6 address
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
+				 struct pedit_parser *p_parser)
+{
+	int idx = p_parser->sel.nkeys;
+	int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+	int off_base =
+		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
+		offsetof(struct ipv6_hdr, src_addr) :
+		offsetof(struct ipv6_hdr, dst_addr);
+	const struct rte_flow_action_set_ipv6 *conf =
+		(const struct rte_flow_action_set_ipv6 *)actions->conf;
+
+	for (int i = 0; i < keys; i++, idx++) {
+		p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+		p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+		p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
+		p_parser->keys[idx].mask = ~UINT32_MAX;
+		memcpy(&p_parser->keys[idx].val,
+			conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
+			SZ_PEDIT_KEY_VAL);
+	}
+	p_parser->sel.nkeys += keys;
+}
+
+/**
+ * Set pedit key of ipv4 address
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
+				 struct pedit_parser *p_parser)
+{
+	int idx = p_parser->sel.nkeys;
+
+	p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
+	p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+	p_parser->keys[idx].off =
+		actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
+		offsetof(struct ipv4_hdr, src_addr) :
+		offsetof(struct ipv4_hdr, dst_addr);
+	p_parser->keys[idx].mask = ~UINT32_MAX;
+	p_parser->keys[idx].val =
+		((const struct rte_flow_action_set_ipv4 *)
+		 actions->conf)->ipv4_addr;
+	p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Create the pedit's na attribute in netlink message
+ * on pre-allocate message buffer
+ *
+ * @param[in,out] nl
+ *   pointer to pre-allocated netlink message buffer
+ * @param[in,out] actions
+ *   pointer to pointer of actions specification.
+ * @param[in,out] action_flags
+ *   pointer to actions flags
+ * @param[in] item_flags
+ *   flags of all item presented
+ */
+static void
+flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
+			      const struct rte_flow_action **actions,
+			      uint64_t item_flags)
+{
+	struct pedit_parser p_parser;
+	struct nlattr *na_act_options;
+	struct nlattr *na_pedit_keys;
+
+	memset(&p_parser, 0, sizeof(p_parser));
+	mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
+	na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
+	/* all modify header actions should be in one tc-pedit action */
+	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+		switch ((*actions)->type) {
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			flow_tcf_pedit_key_set_tp_port(*actions,
+							&p_parser, item_flags);
+			break;
+		default:
+			goto pedit_mnl_msg_done;
+		}
+	}
+pedit_mnl_msg_done:
+	p_parser.sel.action = TC_ACT_PIPE;
+	mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
+		     sizeof(p_parser.sel) +
+		     p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
+		     &p_parser);
+	na_pedit_keys =
+		mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
+	for (int i = 0; i < p_parser.sel.nkeys; i++) {
+		struct nlattr *na_pedit_key =
+			mnl_attr_nest_start(nl,
+					    TCA_PEDIT_KEY_EX | NLA_F_NESTED);
+		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
+				 p_parser.keys_ex[i].htype);
+		mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
+				 p_parser.keys_ex[i].cmd);
+		mnl_attr_nest_end(nl, na_pedit_key);
+	}
+	mnl_attr_nest_end(nl, na_pedit_keys);
+	mnl_attr_nest_end(nl, na_act_options);
+	(*actions)--;
+}
+
+/**
+ * Calculate max memory size of one TC-pedit actions.
+ * One TC-pedit action can contain set of keys each defining
+ * a rewrite element (rte_flow action)
+ *
+ * @param[in,out] actions
+ *   actions specification.
+ * @param[in,out] action_flags
+ *   actions flags
+ * @param[in,out] size
+ *   accumulated size
+ * @return
+ *   Max memory size of one TC-pedit action
+ */
+static int
+flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
+				uint64_t *action_flags)
+{
+	int pedit_size = 0;
+	int keys = 0;
+	uint64_t flags = 0;
+
+	pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
+		      SZ_NLATTR_STRZ_OF("pedit") +
+		      SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
+	for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+		switch ((*actions)->type) {
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+			keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+			keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+			/* TCP is as same as UDP */
+			keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			/* TCP is as same as UDP */
+			keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
+			flags |= MLX5_FLOW_ACTION_SET_TP_DST;
+			break;
+		default:
+			goto get_pedit_action_size_done;
+		}
+	}
+get_pedit_action_size_done:
+	/* TCA_PEDIT_PARAMS_EX */
+	pedit_size +=
+		SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
+				  keys * sizeof(struct tc_pedit_key));
+	pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
+	pedit_size += keys *
+		      /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
+		      (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
+		       SZ_NLATTR_DATA_OF(2));
+	(*action_flags) |= flags;
+	(*actions)--;
+	return pedit_size;
+}
+
 /**
  * Retrieve mask for pattern item.
  *
@@ -433,11 +760,14 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 			of_set_vlan_vid;
 		const struct rte_flow_action_of_set_vlan_pcp *
 			of_set_vlan_pcp;
+		const struct rte_flow_action_set_ipv4 *set_ipv4;
+		const struct rte_flow_action_set_ipv6 *set_ipv6;
 	} conf;
 	uint32_t item_flags = 0;
 	uint32_t action_flags = 0;
 	uint8_t next_protocol = -1;
 	unsigned int tcm_ifindex = 0;
+	uint8_t pedit_validated = 0;
 	struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
 	struct rte_eth_dev *port_id_dev = NULL;
 	bool in_port_id_set;
@@ -648,16 +978,20 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 	}
 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
 		unsigned int i;
+		uint32_t current_action_flag = 0;
 
 		switch (actions->type) {
 		case RTE_FLOW_ACTION_TYPE_VOID:
 			break;
 		case RTE_FLOW_ACTION_TYPE_PORT_ID:
+			current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
 			if (action_flags & MLX5_TCF_FATE_ACTIONS)
 				return rte_flow_error_set
 					(error, EINVAL,
 					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
 					 "can't have multiple fate actions");
+			if (!actions->conf)
+				break;
 			conf.port_id = actions->conf;
 			if (conf.port_id->original)
 				i = 0;
@@ -672,7 +1006,6 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 					 conf.port_id,
 					 "missing data to convert port ID to"
 					 " ifindex");
-			action_flags |= MLX5_FLOW_ACTION_PORT_ID;
 			port_id_dev = &rte_eth_devices[conf.port_id->id];
 			break;
 		case RTE_FLOW_ACTION_TYPE_DROP:
@@ -681,13 +1014,13 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 					(error, EINVAL,
 					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
 					 "can't have multiple fate actions");
-			action_flags |= MLX5_FLOW_ACTION_DROP;
+			current_action_flag = MLX5_FLOW_ACTION_DROP;
 			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
-			action_flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
+			current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
 			break;
 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
-			action_flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
+			current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
 			break;
 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
 			if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
@@ -696,7 +1029,7 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
 					 "vlan modify is not supported,"
 					 " set action must follow push action");
-			action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
+			current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
 			break;
 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
 			if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
@@ -705,7 +1038,25 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
 					 "vlan modify is not supported,"
 					 " set action must follow push action");
-			action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
+			current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+			current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+			current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+			current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+			current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+			current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
+			break;
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
 			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
@@ -713,6 +1064,67 @@ flow_tcf_validate(struct rte_eth_dev *dev,
 						  actions,
 						  "action not supported");
 		}
+		if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
+			if (!actions->conf)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+						actions,
+						"action configuration not set");
+		}
+		if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
+				pedit_validated)
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "set actions should be "
+						  "listed successively");
+		if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
+		    (action_flags & MLX5_TCF_PEDIT_ACTIONS))
+			pedit_validated = 1;
+		action_flags |= current_action_flag;
+	}
+	if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
+	    (action_flags & MLX5_FLOW_ACTION_DROP))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION,
+					  actions,
+					  "set action is not compatible with "
+					  "drop action");
+	if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
+	    !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION,
+					  actions,
+					  "set action must be followed by "
+					  "port_id action");
+	if (action_flags &
+	   (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
+		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "no ipv4 item found in"
+						  " pattern");
+	}
+	if (action_flags &
+	   (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
+		if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "no ipv6 item found in"
+						  " pattern");
+	}
+	if (action_flags &
+	   (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
+		if (!(item_flags &
+		     (MLX5_FLOW_LAYER_OUTER_L4_UDP |
+		      MLX5_FLOW_LAYER_OUTER_L4_TCP)))
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "no TCP/UDP item found in"
+						  " pattern");
 	}
 	/*
 	 * FW syndrome (0xA9C090):
@@ -884,6 +1296,15 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
 				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
 				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
 			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			size += flow_tcf_get_pedit_actions_size(&actions,
+								&flags);
+			break;
 		default:
 			DRV_LOG(WARNING,
 				"unsupported action %p type %d,"
@@ -1042,6 +1463,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 	struct nlattr *na_flower_act;
 	struct nlattr *na_vlan_id = NULL;
 	struct nlattr *na_vlan_priority = NULL;
+	uint64_t item_flags = 0;
 
 	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
 						PTOI_TABLE_SZ_MAX(dev)));
@@ -1088,6 +1510,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			tcm->tcm_ifindex = ptoi[i].ifindex;
 			break;
 		case RTE_FLOW_ITEM_TYPE_ETH:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
 			mask.eth = flow_tcf_item_mask
 				(items, &rte_flow_item_eth_mask,
 				 &flow_tcf_mask_supported.eth,
@@ -1121,6 +1544,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_VLAN:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
 			mask.vlan = flow_tcf_item_mask
 				(items, &rte_flow_item_vlan_mask,
 				 &flow_tcf_mask_supported.vlan,
@@ -1153,6 +1577,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 						  RTE_BE16(0x0fff)));
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV4:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
 			mask.ipv4 = flow_tcf_item_mask
 				(items, &rte_flow_item_ipv4_mask,
 				 &flow_tcf_mask_supported.ipv4,
@@ -1192,6 +1617,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV6:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
 			mask.ipv6 = flow_tcf_item_mask
 				(items, &rte_flow_item_ipv6_mask,
 				 &flow_tcf_mask_supported.ipv6,
@@ -1233,6 +1659,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_UDP:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
 			mask.udp = flow_tcf_item_mask
 				(items, &rte_flow_item_udp_mask,
 				 &flow_tcf_mask_supported.udp,
@@ -1262,6 +1689,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_TCP:
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
 			mask.tcp = flow_tcf_item_mask
 				(items, &rte_flow_item_tcp_mask,
 				 &flow_tcf_mask_supported.tcp,
@@ -1412,6 +1840,18 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
 					conf.of_set_vlan_pcp->vlan_pcp;
 			}
 			break;
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+			na_act_index =
+				mnl_attr_nest_start(nlh, na_act_index_cur++);
+			flow_tcf_create_pedit_mnl_msg(nlh,
+						      &actions, item_flags);
+			mnl_attr_nest_end(nlh, na_act_index);
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,
-- 
2.17.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [dpdk-dev] [PATCH v4] net/mlx5: rewrite IP address UDP/TCP port by E-Switch
  2018-10-11 13:22     ` [dpdk-dev] [PATCH v4] " Jack Min
@ 2018-10-11 13:39       ` Shahaf Shuler
  0 siblings, 0 replies; 13+ messages in thread
From: Shahaf Shuler @ 2018-10-11 13:39 UTC (permalink / raw)
  To: Jack Min, Yongseok Koh; +Cc: dev

Thursday, October 11, 2018 4:22 PM, Jack Min:
> Subject: [PATCH v4] net/mlx5: rewrite IP address UDP/TCP port by E-Switch
> 
> Offload the following rte_flow actions by inserting accordingly E-Switch rules
> via TC Flower driver
> 
>  - RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
>  - RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
>  - RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
>  - RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
>  - RTE_FLOW_ACTION_TYPE_SET_TP_SRC
>  - RTE_FLOW_ACTION_TYPE_SET_TP_DST
> 
> The example testpmd command is:
> 
>     flow create 0 transfer ingress
>          pattern eth / ipv4 / udp dst is 7000 / end
> 	 actions set_ipv4_src ipv4_addr 172.168.0.1 /
> 	 set_ipv4_dst ipv4_addr 172.168.10.1 /
> 	 set_tp_dst port 9000 /
> 	 set_tp_src port 700 /
> 	 port_id id 1 / end
> 
> Signed-off-by: Xiaoyu Min <jackmin@mellanox.com>
> Acked-by: Yongseok Koh <yskoh@mellanox.com>

Applied to next-net-mlx, Thanks. 

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-10-11 13:39 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-25 11:51 [dpdk-dev] [PATCH] net/mlx5: eswitch-IP address UDP/TCP port rewrite Xiaoyu Min
2018-09-28 23:03 ` Yongseok Koh
2018-09-30  7:21   ` Xiaoyu Min
2018-10-01 20:19     ` Yongseok Koh
2018-10-08 11:22       ` Xiaoyu Min
2018-10-08 22:08         ` Yongseok Koh
2018-10-09  7:03           ` Jack Min
2018-10-09  8:55             ` Yongseok Koh
2018-10-10 12:56 ` [dpdk-dev] [PATCH v2] net/mlx5: rewrite IP address UDP/TCP port by E-Switch Jack Min
2018-10-11  4:45   ` Yongseok Koh
2018-10-11  9:26   ` [dpdk-dev] [PATCH v3] " Jack Min
2018-10-11 13:22     ` [dpdk-dev] [PATCH v4] " Jack Min
2018-10-11 13:39       ` Shahaf Shuler

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).