DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 0/3] migrate Linux TC flower driver to new flow engine
@ 2018-09-19  7:21 Yongseok Koh
  2018-09-19  7:21 ` [dpdk-dev] [PATCH 1/3] net/mlx5: add abstraction for multiple flow drivers Yongseok Koh
                   ` (4 more replies)
  0 siblings, 5 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-19  7:21 UTC (permalink / raw)
  To: Shahaf Shuler; +Cc: dev, Yongseok Koh

This patchset is to migrate the existing E-Switch flow driver on to the new flow
engine. This patchset depends on Ori's new flow engine [1].

[1] http://patches.dpdk.org/project/dpdk/list/?series=1380

Yongseok Koh (3):
  net/mlx5: add abstraction for multiple flow drivers
  net/mlx5: remove Netlink flow driver
  net/mlx5: add Linux TC flower driver for E-Switch flow

 drivers/net/mlx5/Makefile          |    2 +-
 drivers/net/mlx5/mlx5.c            |   12 +-
 drivers/net/mlx5/mlx5.h            |   25 -
 drivers/net/mlx5/mlx5_flow.c       |  352 +++++++-
 drivers/net/mlx5/mlx5_flow.h       |   33 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   26 +-
 drivers/net/mlx5/mlx5_flow_tcf.c   | 1608 ++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_verbs.c |   20 +-
 drivers/net/mlx5/mlx5_nl_flow.c    | 1228 ---------------------------
 9 files changed, 1972 insertions(+), 1334 deletions(-)
 create mode 100644 drivers/net/mlx5/mlx5_flow_tcf.c
 delete mode 100644 drivers/net/mlx5/mlx5_nl_flow.c

-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH 1/3] net/mlx5: add abstraction for multiple flow drivers
  2018-09-19  7:21 [dpdk-dev] [PATCH 0/3] migrate Linux TC flower driver to new flow engine Yongseok Koh
@ 2018-09-19  7:21 ` Yongseok Koh
  2018-09-19  7:21 ` [dpdk-dev] [PATCH 2/3] net/mlx5: remove Netlink flow driver Yongseok Koh
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-19  7:21 UTC (permalink / raw)
  To: Shahaf Shuler; +Cc: dev, Yongseok Koh

Flow engine has to support multiple driver paths. Verbs/DV for NIC flow
steering and Linux TC flower for E-Switch flow steering. In the future,
another flow driver could be added (devX).

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c            |   1 -
 drivers/net/mlx5/mlx5_flow.c       | 348 +++++++++++++++++++++++++++++++++----
 drivers/net/mlx5/mlx5_flow.h       |  17 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |  26 +--
 drivers/net/mlx5/mlx5_flow_verbs.c |  20 +--
 5 files changed, 335 insertions(+), 77 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index d5936091b..02324ef4f 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1192,7 +1192,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	if (err < 0)
 		goto error;
 	priv->config.flow_prio = err;
-	mlx5_flow_init_driver_ops(eth_dev);
 	/*
 	 * Once the device is added to the list of memory event
 	 * callback, its global MR cache table cannot be expanded
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 677cc7a32..2d3158a6f 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -37,6 +37,23 @@
 extern const struct eth_dev_ops mlx5_dev_ops;
 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
 
+/** Device flow drivers. */
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops;
+#endif
+extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
+
+const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
+
+const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
+	[MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
+#endif
+	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
+	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
+};
+
 enum mlx5_expansion {
 	MLX5_EXPANSION_ROOT,
 	MLX5_EXPANSION_ROOT_OUTER,
@@ -282,9 +299,6 @@ static struct mlx5_flow_tunnel_info tunnels_info[] = {
 	},
 };
 
-/* Holds the nic operations that should be used. */
-struct mlx5_flow_driver_ops nic_ops;
-
 /**
  * Discover the maximum number of priority available.
  *
@@ -1511,6 +1525,284 @@ mlx5_flow_validate_item_mpls(const struct rte_flow_item *item __rte_unused,
 				  " update.");
 }
 
+static int
+flow_null_validate(struct rte_eth_dev *dev __rte_unused,
+		   const struct rte_flow_attr *attr __rte_unused,
+		   const struct rte_flow_item items[] __rte_unused,
+		   const struct rte_flow_action actions[] __rte_unused,
+		   struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
+static struct mlx5_flow *
+flow_null_prepare(const struct rte_flow_attr *attr __rte_unused,
+		  const struct rte_flow_item items[] __rte_unused,
+		  const struct rte_flow_action actions[] __rte_unused,
+		  uint64_t *item_flags __rte_unused,
+		  uint64_t *action_flags __rte_unused,
+		  struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return NULL;
+}
+
+static int
+flow_null_translate(struct rte_eth_dev *dev __rte_unused,
+		    struct mlx5_flow *dev_flow __rte_unused,
+		    const struct rte_flow_attr *attr __rte_unused,
+		    const struct rte_flow_item items[] __rte_unused,
+		    const struct rte_flow_action actions[] __rte_unused,
+		    struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
+static int
+flow_null_apply(struct rte_eth_dev *dev __rte_unused,
+		struct rte_flow *flow __rte_unused,
+		struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
+static void
+flow_null_remove(struct rte_eth_dev *dev __rte_unused,
+		 struct rte_flow *flow __rte_unused)
+{
+}
+
+static void
+flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
+		  struct rte_flow *flow __rte_unused)
+{
+}
+
+/* Void driver to protect from null pointer reference. */
+const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
+	.validate = flow_null_validate,
+	.prepare = flow_null_prepare,
+	.translate = flow_null_translate,
+	.apply = flow_null_apply,
+	.remove = flow_null_remove,
+	.destroy = flow_null_destroy,
+};
+
+/**
+ * Select flow driver type according to flow attributes and device
+ * configuration.
+ *
+ * @param[in] dev
+ *   Pointer to the dev structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ *
+ * @return
+ *   flow driver type if supported, MLX5_FLOW_TYPE_MAX otherwise.
+ */
+static enum mlx5_flow_drv_type
+flow_get_drv_type(struct rte_eth_dev *dev __rte_unused,
+		  const struct rte_flow_attr *attr)
+{
+	struct priv *priv __rte_unused = dev->data->dev_private;
+	enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
+
+	if (!attr->transfer) {
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+		type = priv->config.dv_flow_en ?  MLX5_FLOW_TYPE_DV :
+						  MLX5_FLOW_TYPE_VERBS;
+#else
+		type = MLX5_FLOW_TYPE_VERBS;
+#endif
+	}
+	return type;
+}
+
+#define flow_get_drv_ops(type) flow_drv_ops[type]
+
+/**
+ * Flow driver validation API. This abstracts calling driver specific functions.
+ * The type of flow driver is determined according to flow attributes.
+ *
+ * @param[in] dev
+ *   Pointer to the dev structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static inline int
+flow_drv_validate(struct rte_eth_dev *dev,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item items[],
+		  const struct rte_flow_action actions[],
+		  struct rte_flow_error *error)
+{
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
+
+	fops = flow_get_drv_ops(type);
+	return fops->validate(dev, attr, items, actions, error);
+}
+
+/**
+ * Flow driver preparation API. This abstracts calling driver specific
+ * functions. Parent flow (rte_flow) should have driver type (drv_type). It
+ * calculates the size of memory required for device flow, allocates the memory,
+ * initializes the device flow and returns the pointer.
+ *
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] item_flags
+ *   Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ *   Pointer to bit mask of all actions detected.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Pointer to device flow on success, otherwise NULL and rte_ernno is set.
+ */
+static inline struct mlx5_flow *
+flow_drv_prepare(struct rte_flow *flow,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 uint64_t *item_flags,
+		 uint64_t *action_flags,
+		 struct rte_flow_error *error)
+{
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type type = flow->drv_type;
+
+	assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(type);
+	return fops->prepare(attr, items, actions, item_flags, action_flags,
+			     error);
+}
+
+/**
+ * Flow driver translation API. This abstracts calling driver specific
+ * functions. Parent flow (rte_flow) should have driver type (drv_type). It
+ * translates a generic flow into a driver flow. flow_drv_prepare() must
+ * precede.
+ *
+ *
+ * @param[in] dev
+ *   Pointer to the rte dev structure.
+ * @param[in, out] dev_flow
+ *   Pointer to the mlx5 flow.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static inline int
+flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
+
+	assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(type);
+	return fops->translate(dev, dev_flow, attr, items, actions, error);
+}
+
+/**
+ * Flow driver apply API. This abstracts calling driver specific functions.
+ * Parent flow (rte_flow) should have driver type (drv_type). It applies
+ * translated driver flows on to device. flow_drv_translate() must precede.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static inline int
+flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+	       struct rte_flow_error *error)
+{
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type type = flow->drv_type;
+
+	assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(type);
+	return fops->apply(dev, flow, error);
+}
+
+/**
+ * Flow driver remove API. This abstracts calling driver specific functions.
+ * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
+ * on device. All the resources of the flow should be freed by calling
+ * flow_dv_destroy().
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ */
+static inline void
+flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type type = flow->drv_type;
+
+	assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(type);
+	fops->remove(dev, flow);
+}
+
+/**
+ * Flow driver destroy API. This abstracts calling driver specific functions.
+ * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
+ * on device and releases resources of the flow.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ */
+static inline void
+flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type type = flow->drv_type;
+
+	assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(type);
+	fops->destroy(dev, flow);
+}
+
 /**
  * Validate a flow supported by the NIC.
  *
@@ -1526,7 +1818,7 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
 {
 	int ret;
 
-	ret =  nic_ops.validate(dev, attr, items, actions, error);
+	ret = flow_drv_validate(dev, attr, items, actions, error);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -1616,7 +1908,7 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 	uint32_t i;
 	uint32_t flow_size;
 
-	ret = mlx5_flow_validate(dev, attr, items, actions, error);
+	ret = flow_drv_validate(dev, attr, items, actions, error);
 	if (ret < 0)
 		return NULL;
 	flow_size = sizeof(struct rte_flow);
@@ -1627,6 +1919,9 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 	else
 		flow_size += RTE_ALIGN_CEIL(sizeof(uint16_t), sizeof(void *));
 	flow = rte_calloc(__func__, 1, flow_size, 0);
+	flow->drv_type = flow_get_drv_type(dev, attr);
+	assert(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
+	       flow->drv_type < MLX5_FLOW_TYPE_MAX);
 	flow->queue = (void *)(flow + 1);
 	LIST_INIT(&flow->dev_flows);
 	if (rss && rss->types) {
@@ -1644,21 +1939,21 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 		buf->entry[0].pattern = (void *)(uintptr_t)items;
 	}
 	for (i = 0; i < buf->entries; ++i) {
-		dev_flow = nic_ops.prepare(attr, buf->entry[i].pattern,
-					   actions, &item_flags,
-					   &action_flags, error);
+		dev_flow = flow_drv_prepare(flow, attr, buf->entry[i].pattern,
+					    actions, &item_flags, &action_flags,
+					    error);
 		if (!dev_flow)
 			goto error;
 		dev_flow->flow = flow;
 		LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
-		ret = nic_ops.translate(dev, dev_flow, attr,
-					buf->entry[i].pattern,
-					actions, error);
+		ret = flow_drv_translate(dev, dev_flow, attr,
+					 buf->entry[i].pattern,
+					 actions, error);
 		if (ret < 0)
 			goto error;
 	}
 	if (dev->data->dev_started) {
-		ret = nic_ops.apply(dev, flow, error);
+		ret = flow_drv_apply(dev, flow, error);
 		if (ret < 0)
 			goto error;
 	}
@@ -1668,7 +1963,7 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 error:
 	ret = rte_errno; /* Save rte_errno before cleanup. */
 	assert(flow);
-	nic_ops.destroy(dev, flow);
+	flow_drv_destroy(dev, flow);
 	rte_free(flow);
 	rte_errno = ret; /* Restore rte_errno. */
 	return NULL;
@@ -1706,7 +2001,7 @@ static void
 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
 		       struct rte_flow *flow)
 {
-	nic_ops.destroy(dev, flow);
+	flow_drv_destroy(dev, flow);
 	TAILQ_REMOVE(list, flow, next);
 	/*
 	 * Update RX queue flags only if port is started, otherwise it is
@@ -1750,7 +2045,7 @@ mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
 	struct rte_flow *flow;
 
 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
-		nic_ops.remove(dev, flow);
+		flow_drv_remove(dev, flow);
 	mlx5_flow_rxq_flags_clear(dev);
 }
 
@@ -1773,7 +2068,7 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
 	int ret = 0;
 
 	TAILQ_FOREACH(flow, list, next) {
-		ret = nic_ops.apply(dev, flow, &error);
+		ret = flow_drv_apply(dev, flow, &error);
 		if (ret < 0)
 			goto error;
 		mlx5_flow_rxq_flags_set(dev, flow);
@@ -2464,24 +2759,3 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
 	}
 	return 0;
 }
-
-/**
- * Init the driver ops structure.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- */
-void
-mlx5_flow_init_driver_ops(struct rte_eth_dev *dev)
-{
-	struct priv *priv __rte_unused = dev->data->dev_private;
-
-#ifdef HAVE_IBV_FLOW_DV_SUPPORT
-	if (priv->config.dv_flow_en)
-		mlx5_flow_dv_get_driver_ops(&nic_ops);
-	else
-		mlx5_flow_verbs_get_driver_ops(&nic_ops);
-#else
-	mlx5_flow_verbs_get_driver_ops(&nic_ops);
-#endif
-}
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 53c0eeb56..2bc3bee8c 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -128,6 +128,13 @@
 /* Max number of actions per DV flow. */
 #define MLX5_DV_MAX_NUMBER_OF_ACTIONS 8
 
+enum mlx5_flow_drv_type {
+	MLX5_FLOW_TYPE_MIN,
+	MLX5_FLOW_TYPE_DV,
+	MLX5_FLOW_TYPE_VERBS,
+	MLX5_FLOW_TYPE_MAX,
+};
+
 /* Matcher PRM representation */
 struct mlx5_flow_dv_match_params {
 	size_t size;
@@ -210,7 +217,7 @@ struct mlx5_flow_counter {
 /* Flow structure. */
 struct rte_flow {
 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
-	struct rte_flow_attr attributes; /**< User flow attribute. */
+	enum mlx5_flow_drv_type drv_type; /**< Drvier type. */
 	uint32_t layers;
 	/**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
 	struct mlx5_flow_counter *counter; /**< Holds flow counter. */
@@ -314,13 +321,5 @@ int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
 				      uint64_t item_flags,
 				      struct rte_eth_dev *dev,
 				      struct rte_flow_error *error);
-void mlx5_flow_init_driver_ops(struct rte_eth_dev *dev);
-
-/* mlx5_flow_dv.c */
-void mlx5_flow_dv_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops);
-
-/* mlx5_flow_verbs.c */
-
-void mlx5_flow_verbs_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops);
 
 #endif /* RTE_PMD_MLX5_FLOW_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 71af410b2..cf663cdb8 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1351,23 +1351,13 @@ flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
 	}
 }
 
-/**
- * Fills the flow_ops with the function pointers.
- *
- * @param[out] flow_ops
- *   Pointer to driver_ops structure.
- */
-void
-mlx5_flow_dv_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops)
-{
-	*flow_ops = (struct mlx5_flow_driver_ops) {
-		.validate = flow_dv_validate,
-		.prepare = flow_dv_prepare,
-		.translate = flow_dv_translate,
-		.apply = flow_dv_apply,
-		.remove = flow_dv_remove,
-		.destroy = flow_dv_destroy,
-	};
-}
+const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
+	.validate = flow_dv_validate,
+	.prepare = flow_dv_prepare,
+	.translate = flow_dv_translate,
+	.apply = flow_dv_apply,
+	.remove = flow_dv_remove,
+	.destroy = flow_dv_destroy,
+};
 
 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index f4a264232..05ab5fdad 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -1638,15 +1638,11 @@ flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
 	return -rte_errno;
 }
 
-void
-mlx5_flow_verbs_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops)
-{
-	*flow_ops = (struct mlx5_flow_driver_ops) {
-		.validate = flow_verbs_validate,
-		.prepare = flow_verbs_prepare,
-		.translate = flow_verbs_translate,
-		.apply = flow_verbs_apply,
-		.remove = flow_verbs_remove,
-		.destroy = flow_verbs_destroy,
-	};
-}
+const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
+	.validate = flow_verbs_validate,
+	.prepare = flow_verbs_prepare,
+	.translate = flow_verbs_translate,
+	.apply = flow_verbs_apply,
+	.remove = flow_verbs_remove,
+	.destroy = flow_verbs_destroy,
+};
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH 2/3] net/mlx5: remove Netlink flow driver
  2018-09-19  7:21 [dpdk-dev] [PATCH 0/3] migrate Linux TC flower driver to new flow engine Yongseok Koh
  2018-09-19  7:21 ` [dpdk-dev] [PATCH 1/3] net/mlx5: add abstraction for multiple flow drivers Yongseok Koh
@ 2018-09-19  7:21 ` Yongseok Koh
  2018-09-19  7:21 ` [dpdk-dev] [PATCH 3/3] net/mlx5: add Linux TC flower driver for E-Switch flow Yongseok Koh
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-19  7:21 UTC (permalink / raw)
  To: Shahaf Shuler; +Cc: dev, Yongseok Koh

Netlink based E-Switch flow engine will be migrated to the new flow engine.
nl_flow will be renamed to flow_tcf as it goes through Linux TC flower
interface.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/Makefile       |    1 -
 drivers/net/mlx5/mlx5.c         |   32 -
 drivers/net/mlx5/mlx5.h         |   25 -
 drivers/net/mlx5/mlx5_nl_flow.c | 1228 ---------------------------------------
 4 files changed, 1286 deletions(-)
 delete mode 100644 drivers/net/mlx5/mlx5_nl_flow.c

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 4243b37ca..9c1044808 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -35,7 +35,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_dv.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
-SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl_flow.c
 
 ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
 INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 02324ef4f..47cf538e2 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -286,8 +286,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		close(priv->nl_socket_route);
 	if (priv->nl_socket_rdma >= 0)
 		close(priv->nl_socket_rdma);
-	if (priv->mnl_socket)
-		mlx5_nl_flow_socket_destroy(priv->mnl_socket);
 	ret = mlx5_hrxq_ibv_verify(dev);
 	if (ret)
 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1137,34 +1135,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 	if (vf && config.vf_nl_en)
 		mlx5_nl_mac_addr_sync(eth_dev);
-	priv->mnl_socket = mlx5_nl_flow_socket_create();
-	if (!priv->mnl_socket) {
-		err = -rte_errno;
-		DRV_LOG(WARNING,
-			"flow rules relying on switch offloads will not be"
-			" supported: cannot open libmnl socket: %s",
-			strerror(rte_errno));
-	} else {
-		struct rte_flow_error error;
-		unsigned int ifindex = mlx5_ifindex(eth_dev);
-
-		if (!ifindex) {
-			err = -rte_errno;
-			error.message =
-				"cannot retrieve network interface index";
-		} else {
-			err = mlx5_nl_flow_init(priv->mnl_socket, ifindex,
-						&error);
-		}
-		if (err) {
-			DRV_LOG(WARNING,
-				"flow rules relying on switch offloads will"
-				" not be supported: %s: %s",
-				error.message, strerror(rte_errno));
-			mlx5_nl_flow_socket_destroy(priv->mnl_socket);
-			priv->mnl_socket = NULL;
-		}
-	}
 	TAILQ_INIT(&priv->flows);
 	TAILQ_INIT(&priv->ctrl_flows);
 	/* Hint libmlx5 to use PMD allocator for data plane resources */
@@ -1217,8 +1187,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			close(priv->nl_socket_route);
 		if (priv->nl_socket_rdma >= 0)
 			close(priv->nl_socket_rdma);
-		if (priv->mnl_socket)
-			mlx5_nl_flow_socket_destroy(priv->mnl_socket);
 		if (own_domain_id)
 			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
 		rte_free(priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 006cc8e06..5255a0270 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -157,12 +157,6 @@ struct mlx5_drop {
 	struct mlx5_rxq_ibv *rxq; /* Verbs Rx queue. */
 };
 
-/** DPDK port to network interface index (ifindex) conversion. */
-struct mlx5_nl_flow_ptoi {
-	uint16_t port_id; /**< DPDK port ID. */
-	unsigned int ifindex; /**< Network interface index. */
-};
-
 struct mnl_socket;
 
 struct priv {
@@ -398,23 +392,4 @@ unsigned int mlx5_nl_ifindex(int nl, const char *name);
 int mlx5_nl_switch_info(int nl, unsigned int ifindex,
 			struct mlx5_switch_info *info);
 
-/* mlx5_nl_flow.c */
-
-int mlx5_nl_flow_transpose(void *buf,
-			   size_t size,
-			   const struct mlx5_nl_flow_ptoi *ptoi,
-			   const struct rte_flow_attr *attr,
-			   const struct rte_flow_item *pattern,
-			   const struct rte_flow_action *actions,
-			   struct rte_flow_error *error);
-void mlx5_nl_flow_brand(void *buf, uint32_t handle);
-int mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
-			struct rte_flow_error *error);
-int mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
-			 struct rte_flow_error *error);
-int mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
-		      struct rte_flow_error *error);
-struct mnl_socket *mlx5_nl_flow_socket_create(void);
-void mlx5_nl_flow_socket_destroy(struct mnl_socket *nl);
-
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_nl_flow.c b/drivers/net/mlx5/mlx5_nl_flow.c
deleted file mode 100644
index beb03c911..000000000
--- a/drivers/net/mlx5/mlx5_nl_flow.c
+++ /dev/null
@@ -1,1228 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018 6WIND S.A.
- * Copyright 2018 Mellanox Technologies, Ltd
- */
-
-#include <assert.h>
-#include <errno.h>
-#include <libmnl/libmnl.h>
-#include <linux/if_ether.h>
-#include <linux/netlink.h>
-#include <linux/pkt_cls.h>
-#include <linux/pkt_sched.h>
-#include <linux/rtnetlink.h>
-#include <linux/tc_act/tc_gact.h>
-#include <linux/tc_act/tc_mirred.h>
-#include <netinet/in.h>
-#include <stdalign.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-
-#include <rte_byteorder.h>
-#include <rte_errno.h>
-#include <rte_ether.h>
-#include <rte_flow.h>
-
-#include "mlx5.h"
-#include "mlx5_autoconf.h"
-
-#ifdef HAVE_TC_ACT_VLAN
-
-#include <linux/tc_act/tc_vlan.h>
-
-#else /* HAVE_TC_ACT_VLAN */
-
-#define TCA_VLAN_ACT_POP 1
-#define TCA_VLAN_ACT_PUSH 2
-#define TCA_VLAN_ACT_MODIFY 3
-#define TCA_VLAN_PARMS 2
-#define TCA_VLAN_PUSH_VLAN_ID 3
-#define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
-#define TCA_VLAN_PAD 5
-#define TCA_VLAN_PUSH_VLAN_PRIORITY 6
-
-struct tc_vlan {
-	tc_gen;
-	int v_action;
-};
-
-#endif /* HAVE_TC_ACT_VLAN */
-
-/* Normally found in linux/netlink.h. */
-#ifndef NETLINK_CAP_ACK
-#define NETLINK_CAP_ACK 10
-#endif
-
-/* Normally found in linux/pkt_sched.h. */
-#ifndef TC_H_MIN_INGRESS
-#define TC_H_MIN_INGRESS 0xfff2u
-#endif
-
-/* Normally found in linux/pkt_cls.h. */
-#ifndef TCA_CLS_FLAGS_SKIP_SW
-#define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
-#endif
-#ifndef HAVE_TCA_FLOWER_ACT
-#define TCA_FLOWER_ACT 3
-#endif
-#ifndef HAVE_TCA_FLOWER_FLAGS
-#define TCA_FLOWER_FLAGS 22
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
-#define TCA_FLOWER_KEY_ETH_TYPE 8
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
-#define TCA_FLOWER_KEY_ETH_DST 4
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
-#define TCA_FLOWER_KEY_ETH_DST_MASK 5
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
-#define TCA_FLOWER_KEY_ETH_SRC 6
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
-#define TCA_FLOWER_KEY_ETH_SRC_MASK 7
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
-#define TCA_FLOWER_KEY_IP_PROTO 9
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
-#define TCA_FLOWER_KEY_IPV4_SRC 10
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
-#define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
-#define TCA_FLOWER_KEY_IPV4_DST 12
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
-#define TCA_FLOWER_KEY_IPV4_DST_MASK 13
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
-#define TCA_FLOWER_KEY_IPV6_SRC 14
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
-#define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
-#define TCA_FLOWER_KEY_IPV6_DST 16
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
-#define TCA_FLOWER_KEY_IPV6_DST_MASK 17
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
-#define TCA_FLOWER_KEY_TCP_SRC 18
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
-#define TCA_FLOWER_KEY_TCP_SRC_MASK 35
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
-#define TCA_FLOWER_KEY_TCP_DST 19
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
-#define TCA_FLOWER_KEY_TCP_DST_MASK 36
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
-#define TCA_FLOWER_KEY_UDP_SRC 20
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
-#define TCA_FLOWER_KEY_UDP_SRC_MASK 37
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
-#define TCA_FLOWER_KEY_UDP_DST 21
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
-#define TCA_FLOWER_KEY_UDP_DST_MASK 38
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
-#define TCA_FLOWER_KEY_VLAN_ID 23
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
-#define TCA_FLOWER_KEY_VLAN_PRIO 24
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
-#define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
-#endif
-
-/** Parser state definitions for mlx5_nl_flow_trans[]. */
-enum mlx5_nl_flow_trans {
-	INVALID,
-	BACK,
-	ATTR,
-	PATTERN,
-	ITEM_VOID,
-	ITEM_PORT_ID,
-	ITEM_ETH,
-	ITEM_VLAN,
-	ITEM_IPV4,
-	ITEM_IPV6,
-	ITEM_TCP,
-	ITEM_UDP,
-	ACTIONS,
-	ACTION_VOID,
-	ACTION_PORT_ID,
-	ACTION_DROP,
-	ACTION_OF_POP_VLAN,
-	ACTION_OF_PUSH_VLAN,
-	ACTION_OF_SET_VLAN_VID,
-	ACTION_OF_SET_VLAN_PCP,
-	END,
-};
-
-#define TRANS(...) (const enum mlx5_nl_flow_trans []){ __VA_ARGS__, INVALID, }
-
-#define PATTERN_COMMON \
-	ITEM_VOID, ITEM_PORT_ID, ACTIONS
-#define ACTIONS_COMMON \
-	ACTION_VOID, ACTION_OF_POP_VLAN, ACTION_OF_PUSH_VLAN, \
-	ACTION_OF_SET_VLAN_VID, ACTION_OF_SET_VLAN_PCP
-#define ACTIONS_FATE \
-	ACTION_PORT_ID, ACTION_DROP
-
-/** Parser state transitions used by mlx5_nl_flow_transpose(). */
-static const enum mlx5_nl_flow_trans *const mlx5_nl_flow_trans[] = {
-	[INVALID] = NULL,
-	[BACK] = NULL,
-	[ATTR] = TRANS(PATTERN),
-	[PATTERN] = TRANS(ITEM_ETH, PATTERN_COMMON),
-	[ITEM_VOID] = TRANS(BACK),
-	[ITEM_PORT_ID] = TRANS(BACK),
-	[ITEM_ETH] = TRANS(ITEM_IPV4, ITEM_IPV6, ITEM_VLAN, PATTERN_COMMON),
-	[ITEM_VLAN] = TRANS(ITEM_IPV4, ITEM_IPV6, PATTERN_COMMON),
-	[ITEM_IPV4] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
-	[ITEM_IPV6] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
-	[ITEM_TCP] = TRANS(PATTERN_COMMON),
-	[ITEM_UDP] = TRANS(PATTERN_COMMON),
-	[ACTIONS] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-	[ACTION_VOID] = TRANS(BACK),
-	[ACTION_PORT_ID] = TRANS(ACTION_VOID, END),
-	[ACTION_DROP] = TRANS(ACTION_VOID, END),
-	[ACTION_OF_POP_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-	[ACTION_OF_PUSH_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-	[ACTION_OF_SET_VLAN_VID] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-	[ACTION_OF_SET_VLAN_PCP] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-	[END] = NULL,
-};
-
-/** Empty masks for known item types. */
-static const union {
-	struct rte_flow_item_port_id port_id;
-	struct rte_flow_item_eth eth;
-	struct rte_flow_item_vlan vlan;
-	struct rte_flow_item_ipv4 ipv4;
-	struct rte_flow_item_ipv6 ipv6;
-	struct rte_flow_item_tcp tcp;
-	struct rte_flow_item_udp udp;
-} mlx5_nl_flow_mask_empty;
-
-/** Supported masks for known item types. */
-static const struct {
-	struct rte_flow_item_port_id port_id;
-	struct rte_flow_item_eth eth;
-	struct rte_flow_item_vlan vlan;
-	struct rte_flow_item_ipv4 ipv4;
-	struct rte_flow_item_ipv6 ipv6;
-	struct rte_flow_item_tcp tcp;
-	struct rte_flow_item_udp udp;
-} mlx5_nl_flow_mask_supported = {
-	.port_id = {
-		.id = 0xffffffff,
-	},
-	.eth = {
-		.type = RTE_BE16(0xffff),
-		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
-		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
-	},
-	.vlan = {
-		/* PCP and VID only, no DEI. */
-		.tci = RTE_BE16(0xefff),
-		.inner_type = RTE_BE16(0xffff),
-	},
-	.ipv4.hdr = {
-		.next_proto_id = 0xff,
-		.src_addr = RTE_BE32(0xffffffff),
-		.dst_addr = RTE_BE32(0xffffffff),
-	},
-	.ipv6.hdr = {
-		.proto = 0xff,
-		.src_addr =
-			"\xff\xff\xff\xff\xff\xff\xff\xff"
-			"\xff\xff\xff\xff\xff\xff\xff\xff",
-		.dst_addr =
-			"\xff\xff\xff\xff\xff\xff\xff\xff"
-			"\xff\xff\xff\xff\xff\xff\xff\xff",
-	},
-	.tcp.hdr = {
-		.src_port = RTE_BE16(0xffff),
-		.dst_port = RTE_BE16(0xffff),
-	},
-	.udp.hdr = {
-		.src_port = RTE_BE16(0xffff),
-		.dst_port = RTE_BE16(0xffff),
-	},
-};
-
-/**
- * Retrieve mask for pattern item.
- *
- * This function does basic sanity checks on a pattern item in order to
- * return the most appropriate mask for it.
- *
- * @param[in] item
- *   Item specification.
- * @param[in] mask_default
- *   Default mask for pattern item as specified by the flow API.
- * @param[in] mask_supported
- *   Mask fields supported by the implementation.
- * @param[in] mask_empty
- *   Empty mask to return when there is no specification.
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   Either @p item->mask or one of the mask parameters on success, NULL
- *   otherwise and rte_errno is set.
- */
-static const void *
-mlx5_nl_flow_item_mask(const struct rte_flow_item *item,
-		       const void *mask_default,
-		       const void *mask_supported,
-		       const void *mask_empty,
-		       size_t mask_size,
-		       struct rte_flow_error *error)
-{
-	const uint8_t *mask;
-	size_t i;
-
-	/* item->last and item->mask cannot exist without item->spec. */
-	if (!item->spec && (item->mask || item->last)) {
-		rte_flow_error_set
-			(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
-			 "\"mask\" or \"last\" field provided without a"
-			 " corresponding \"spec\"");
-		return NULL;
-	}
-	/* No spec, no mask, no problem. */
-	if (!item->spec)
-		return mask_empty;
-	mask = item->mask ? item->mask : mask_default;
-	assert(mask);
-	/*
-	 * Single-pass check to make sure that:
-	 * - Mask is supported, no bits are set outside mask_supported.
-	 * - Both item->spec and item->last are included in mask.
-	 */
-	for (i = 0; i != mask_size; ++i) {
-		if (!mask[i])
-			continue;
-		if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
-		    ((const uint8_t *)mask_supported)[i]) {
-			rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-				 mask, "unsupported field found in \"mask\"");
-			return NULL;
-		}
-		if (item->last &&
-		    (((const uint8_t *)item->spec)[i] & mask[i]) !=
-		    (((const uint8_t *)item->last)[i] & mask[i])) {
-			rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_LAST,
-				 item->last,
-				 "range between \"spec\" and \"last\" not"
-				 " comprised in \"mask\"");
-			return NULL;
-		}
-	}
-	return mask;
-}
-
-/**
- * Transpose flow rule description to rtnetlink message.
- *
- * This function transposes a flow rule description to a traffic control
- * (TC) filter creation message ready to be sent over Netlink.
- *
- * Target interface is specified as the first entry of the @p ptoi table.
- * Subsequent entries enable this function to resolve other DPDK port IDs
- * found in the flow rule.
- *
- * @param[out] buf
- *   Output message buffer. May be NULL when @p size is 0.
- * @param size
- *   Size of @p buf. Message may be truncated if not large enough.
- * @param[in] ptoi
- *   DPDK port ID to network interface index translation table. This table
- *   is terminated by an entry with a zero ifindex value.
- * @param[in] attr
- *   Flow rule attributes.
- * @param[in] pattern
- *   Pattern specification.
- * @param[in] actions
- *   Associated actions.
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   A positive value representing the exact size of the message in bytes
- *   regardless of the @p size parameter on success, a negative errno value
- *   otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_transpose(void *buf,
-		       size_t size,
-		       const struct mlx5_nl_flow_ptoi *ptoi,
-		       const struct rte_flow_attr *attr,
-		       const struct rte_flow_item *pattern,
-		       const struct rte_flow_action *actions,
-		       struct rte_flow_error *error)
-{
-	alignas(struct nlmsghdr)
-	uint8_t buf_tmp[mnl_nlmsg_size(sizeof(struct tcmsg) + 1024)];
-	const struct rte_flow_item *item;
-	const struct rte_flow_action *action;
-	unsigned int n;
-	uint32_t act_index_cur;
-	bool in_port_id_set;
-	bool eth_type_set;
-	bool vlan_present;
-	bool vlan_eth_type_set;
-	bool ip_proto_set;
-	struct nlattr *na_flower;
-	struct nlattr *na_flower_act;
-	struct nlattr *na_vlan_id;
-	struct nlattr *na_vlan_priority;
-	const enum mlx5_nl_flow_trans *trans;
-	const enum mlx5_nl_flow_trans *back;
-
-	if (!size)
-		goto error_nobufs;
-init:
-	item = pattern;
-	action = actions;
-	n = 0;
-	act_index_cur = 0;
-	in_port_id_set = false;
-	eth_type_set = false;
-	vlan_present = false;
-	vlan_eth_type_set = false;
-	ip_proto_set = false;
-	na_flower = NULL;
-	na_flower_act = NULL;
-	na_vlan_id = NULL;
-	na_vlan_priority = NULL;
-	trans = TRANS(ATTR);
-	back = trans;
-trans:
-	switch (trans[n++]) {
-		union {
-			const struct rte_flow_item_port_id *port_id;
-			const struct rte_flow_item_eth *eth;
-			const struct rte_flow_item_vlan *vlan;
-			const struct rte_flow_item_ipv4 *ipv4;
-			const struct rte_flow_item_ipv6 *ipv6;
-			const struct rte_flow_item_tcp *tcp;
-			const struct rte_flow_item_udp *udp;
-		} spec, mask;
-		union {
-			const struct rte_flow_action_port_id *port_id;
-			const struct rte_flow_action_of_push_vlan *of_push_vlan;
-			const struct rte_flow_action_of_set_vlan_vid *
-				of_set_vlan_vid;
-			const struct rte_flow_action_of_set_vlan_pcp *
-				of_set_vlan_pcp;
-		} conf;
-		struct nlmsghdr *nlh;
-		struct tcmsg *tcm;
-		struct nlattr *act_index;
-		struct nlattr *act;
-		unsigned int i;
-
-	case INVALID:
-		if (item->type)
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
-				 item, "unsupported pattern item combination");
-		else if (action->type)
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
-				 action, "unsupported action combination");
-		return rte_flow_error_set
-			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-			 "flow rule lacks some kind of fate action");
-	case BACK:
-		trans = back;
-		n = 0;
-		goto trans;
-	case ATTR:
-		/*
-		 * Supported attributes: no groups, some priorities and
-		 * ingress only. Don't care about transfer as it is the
-		 * caller's problem.
-		 */
-		if (attr->group)
-			return rte_flow_error_set
-				(error, ENOTSUP,
-				 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
-				 attr, "groups are not supported");
-		if (attr->priority > 0xfffe)
-			return rte_flow_error_set
-				(error, ENOTSUP,
-				 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
-				 attr, "lowest priority level is 0xfffe");
-		if (!attr->ingress)
-			return rte_flow_error_set
-				(error, ENOTSUP,
-				 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
-				 attr, "only ingress is supported");
-		if (attr->egress)
-			return rte_flow_error_set
-				(error, ENOTSUP,
-				 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
-				 attr, "egress is not supported");
-		if (size < mnl_nlmsg_size(sizeof(*tcm)))
-			goto error_nobufs;
-		nlh = mnl_nlmsg_put_header(buf);
-		nlh->nlmsg_type = 0;
-		nlh->nlmsg_flags = 0;
-		nlh->nlmsg_seq = 0;
-		tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
-		tcm->tcm_family = AF_UNSPEC;
-		tcm->tcm_ifindex = ptoi[0].ifindex;
-		/*
-		 * Let kernel pick a handle by default. A predictable handle
-		 * can be set by the caller on the resulting buffer through
-		 * mlx5_nl_flow_brand().
-		 */
-		tcm->tcm_handle = 0;
-		tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
-		/*
-		 * Priority cannot be zero to prevent the kernel from
-		 * picking one automatically.
-		 */
-		tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
-					  RTE_BE16(ETH_P_ALL));
-		break;
-	case PATTERN:
-		if (!mnl_attr_put_strz_check(buf, size, TCA_KIND, "flower"))
-			goto error_nobufs;
-		na_flower = mnl_attr_nest_start_check(buf, size, TCA_OPTIONS);
-		if (!na_flower)
-			goto error_nobufs;
-		if (!mnl_attr_put_u32_check(buf, size, TCA_FLOWER_FLAGS,
-					    TCA_CLS_FLAGS_SKIP_SW))
-			goto error_nobufs;
-		break;
-	case ITEM_VOID:
-		if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
-			goto trans;
-		++item;
-		break;
-	case ITEM_PORT_ID:
-		if (item->type != RTE_FLOW_ITEM_TYPE_PORT_ID)
-			goto trans;
-		mask.port_id = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_port_id_mask,
-			 &mlx5_nl_flow_mask_supported.port_id,
-			 &mlx5_nl_flow_mask_empty.port_id,
-			 sizeof(mlx5_nl_flow_mask_supported.port_id), error);
-		if (!mask.port_id)
-			return -rte_errno;
-		if (mask.port_id == &mlx5_nl_flow_mask_empty.port_id) {
-			in_port_id_set = 1;
-			++item;
-			break;
-		}
-		spec.port_id = item->spec;
-		if (mask.port_id->id && mask.port_id->id != 0xffffffff)
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-				 mask.port_id,
-				 "no support for partial mask on"
-				 " \"id\" field");
-		if (!mask.port_id->id)
-			i = 0;
-		else
-			for (i = 0; ptoi[i].ifindex; ++i)
-				if (ptoi[i].port_id == spec.port_id->id)
-					break;
-		if (!ptoi[i].ifindex)
-			return rte_flow_error_set
-				(error, ENODEV, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
-				 spec.port_id,
-				 "missing data to convert port ID to ifindex");
-		tcm = mnl_nlmsg_get_payload(buf);
-		if (in_port_id_set &&
-		    ptoi[i].ifindex != (unsigned int)tcm->tcm_ifindex)
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
-				 spec.port_id,
-				 "cannot match traffic for several port IDs"
-				 " through a single flow rule");
-		tcm->tcm_ifindex = ptoi[i].ifindex;
-		in_port_id_set = 1;
-		++item;
-		break;
-	case ITEM_ETH:
-		if (item->type != RTE_FLOW_ITEM_TYPE_ETH)
-			goto trans;
-		mask.eth = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_eth_mask,
-			 &mlx5_nl_flow_mask_supported.eth,
-			 &mlx5_nl_flow_mask_empty.eth,
-			 sizeof(mlx5_nl_flow_mask_supported.eth), error);
-		if (!mask.eth)
-			return -rte_errno;
-		if (mask.eth == &mlx5_nl_flow_mask_empty.eth) {
-			++item;
-			break;
-		}
-		spec.eth = item->spec;
-		if (mask.eth->type && mask.eth->type != RTE_BE16(0xffff))
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-				 mask.eth,
-				 "no support for partial mask on"
-				 " \"type\" field");
-		if (mask.eth->type) {
-			if (!mnl_attr_put_u16_check(buf, size,
-						    TCA_FLOWER_KEY_ETH_TYPE,
-						    spec.eth->type))
-				goto error_nobufs;
-			eth_type_set = 1;
-		}
-		if ((!is_zero_ether_addr(&mask.eth->dst) &&
-		     (!mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_ETH_DST,
-					  ETHER_ADDR_LEN,
-					  spec.eth->dst.addr_bytes) ||
-		      !mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_ETH_DST_MASK,
-					  ETHER_ADDR_LEN,
-					  mask.eth->dst.addr_bytes))) ||
-		    (!is_zero_ether_addr(&mask.eth->src) &&
-		     (!mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_ETH_SRC,
-					  ETHER_ADDR_LEN,
-					  spec.eth->src.addr_bytes) ||
-		      !mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_ETH_SRC_MASK,
-					  ETHER_ADDR_LEN,
-					  mask.eth->src.addr_bytes))))
-			goto error_nobufs;
-		++item;
-		break;
-	case ITEM_VLAN:
-		if (item->type != RTE_FLOW_ITEM_TYPE_VLAN)
-			goto trans;
-		mask.vlan = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_vlan_mask,
-			 &mlx5_nl_flow_mask_supported.vlan,
-			 &mlx5_nl_flow_mask_empty.vlan,
-			 sizeof(mlx5_nl_flow_mask_supported.vlan), error);
-		if (!mask.vlan)
-			return -rte_errno;
-		if (!eth_type_set &&
-		    !mnl_attr_put_u16_check(buf, size,
-					    TCA_FLOWER_KEY_ETH_TYPE,
-					    RTE_BE16(ETH_P_8021Q)))
-			goto error_nobufs;
-		eth_type_set = 1;
-		vlan_present = 1;
-		if (mask.vlan == &mlx5_nl_flow_mask_empty.vlan) {
-			++item;
-			break;
-		}
-		spec.vlan = item->spec;
-		if ((mask.vlan->tci & RTE_BE16(0xe000) &&
-		     (mask.vlan->tci & RTE_BE16(0xe000)) != RTE_BE16(0xe000)) ||
-		    (mask.vlan->tci & RTE_BE16(0x0fff) &&
-		     (mask.vlan->tci & RTE_BE16(0x0fff)) != RTE_BE16(0x0fff)) ||
-		    (mask.vlan->inner_type &&
-		     mask.vlan->inner_type != RTE_BE16(0xffff)))
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-				 mask.vlan,
-				 "no support for partial masks on"
-				 " \"tci\" (PCP and VID parts) and"
-				 " \"inner_type\" fields");
-		if (mask.vlan->inner_type) {
-			if (!mnl_attr_put_u16_check
-			    (buf, size, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
-			     spec.vlan->inner_type))
-				goto error_nobufs;
-			vlan_eth_type_set = 1;
-		}
-		if ((mask.vlan->tci & RTE_BE16(0xe000) &&
-		     !mnl_attr_put_u8_check
-		     (buf, size, TCA_FLOWER_KEY_VLAN_PRIO,
-		      (rte_be_to_cpu_16(spec.vlan->tci) >> 13) & 0x7)) ||
-		    (mask.vlan->tci & RTE_BE16(0x0fff) &&
-		     !mnl_attr_put_u16_check
-		     (buf, size, TCA_FLOWER_KEY_VLAN_ID,
-		      rte_be_to_cpu_16(spec.vlan->tci & RTE_BE16(0x0fff)))))
-			goto error_nobufs;
-		++item;
-		break;
-	case ITEM_IPV4:
-		if (item->type != RTE_FLOW_ITEM_TYPE_IPV4)
-			goto trans;
-		mask.ipv4 = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_ipv4_mask,
-			 &mlx5_nl_flow_mask_supported.ipv4,
-			 &mlx5_nl_flow_mask_empty.ipv4,
-			 sizeof(mlx5_nl_flow_mask_supported.ipv4), error);
-		if (!mask.ipv4)
-			return -rte_errno;
-		if ((!eth_type_set || !vlan_eth_type_set) &&
-		    !mnl_attr_put_u16_check(buf, size,
-					    vlan_present ?
-					    TCA_FLOWER_KEY_VLAN_ETH_TYPE :
-					    TCA_FLOWER_KEY_ETH_TYPE,
-					    RTE_BE16(ETH_P_IP)))
-			goto error_nobufs;
-		eth_type_set = 1;
-		vlan_eth_type_set = 1;
-		if (mask.ipv4 == &mlx5_nl_flow_mask_empty.ipv4) {
-			++item;
-			break;
-		}
-		spec.ipv4 = item->spec;
-		if (mask.ipv4->hdr.next_proto_id &&
-		    mask.ipv4->hdr.next_proto_id != 0xff)
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-				 mask.ipv4,
-				 "no support for partial mask on"
-				 " \"hdr.next_proto_id\" field");
-		if (mask.ipv4->hdr.next_proto_id) {
-			if (!mnl_attr_put_u8_check
-			    (buf, size, TCA_FLOWER_KEY_IP_PROTO,
-			     spec.ipv4->hdr.next_proto_id))
-				goto error_nobufs;
-			ip_proto_set = 1;
-		}
-		if ((mask.ipv4->hdr.src_addr &&
-		     (!mnl_attr_put_u32_check(buf, size,
-					      TCA_FLOWER_KEY_IPV4_SRC,
-					      spec.ipv4->hdr.src_addr) ||
-		      !mnl_attr_put_u32_check(buf, size,
-					      TCA_FLOWER_KEY_IPV4_SRC_MASK,
-					      mask.ipv4->hdr.src_addr))) ||
-		    (mask.ipv4->hdr.dst_addr &&
-		     (!mnl_attr_put_u32_check(buf, size,
-					      TCA_FLOWER_KEY_IPV4_DST,
-					      spec.ipv4->hdr.dst_addr) ||
-		      !mnl_attr_put_u32_check(buf, size,
-					      TCA_FLOWER_KEY_IPV4_DST_MASK,
-					      mask.ipv4->hdr.dst_addr))))
-			goto error_nobufs;
-		++item;
-		break;
-	case ITEM_IPV6:
-		if (item->type != RTE_FLOW_ITEM_TYPE_IPV6)
-			goto trans;
-		mask.ipv6 = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_ipv6_mask,
-			 &mlx5_nl_flow_mask_supported.ipv6,
-			 &mlx5_nl_flow_mask_empty.ipv6,
-			 sizeof(mlx5_nl_flow_mask_supported.ipv6), error);
-		if (!mask.ipv6)
-			return -rte_errno;
-		if ((!eth_type_set || !vlan_eth_type_set) &&
-		    !mnl_attr_put_u16_check(buf, size,
-					    vlan_present ?
-					    TCA_FLOWER_KEY_VLAN_ETH_TYPE :
-					    TCA_FLOWER_KEY_ETH_TYPE,
-					    RTE_BE16(ETH_P_IPV6)))
-			goto error_nobufs;
-		eth_type_set = 1;
-		vlan_eth_type_set = 1;
-		if (mask.ipv6 == &mlx5_nl_flow_mask_empty.ipv6) {
-			++item;
-			break;
-		}
-		spec.ipv6 = item->spec;
-		if (mask.ipv6->hdr.proto && mask.ipv6->hdr.proto != 0xff)
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-				 mask.ipv6,
-				 "no support for partial mask on"
-				 " \"hdr.proto\" field");
-		if (mask.ipv6->hdr.proto) {
-			if (!mnl_attr_put_u8_check
-			    (buf, size, TCA_FLOWER_KEY_IP_PROTO,
-			     spec.ipv6->hdr.proto))
-				goto error_nobufs;
-			ip_proto_set = 1;
-		}
-		if ((!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr) &&
-		     (!mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_IPV6_SRC,
-					  sizeof(spec.ipv6->hdr.src_addr),
-					  spec.ipv6->hdr.src_addr) ||
-		      !mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_IPV6_SRC_MASK,
-					  sizeof(mask.ipv6->hdr.src_addr),
-					  mask.ipv6->hdr.src_addr))) ||
-		    (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr) &&
-		     (!mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_IPV6_DST,
-					  sizeof(spec.ipv6->hdr.dst_addr),
-					  spec.ipv6->hdr.dst_addr) ||
-		      !mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_IPV6_DST_MASK,
-					  sizeof(mask.ipv6->hdr.dst_addr),
-					  mask.ipv6->hdr.dst_addr))))
-			goto error_nobufs;
-		++item;
-		break;
-	case ITEM_TCP:
-		if (item->type != RTE_FLOW_ITEM_TYPE_TCP)
-			goto trans;
-		mask.tcp = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_tcp_mask,
-			 &mlx5_nl_flow_mask_supported.tcp,
-			 &mlx5_nl_flow_mask_empty.tcp,
-			 sizeof(mlx5_nl_flow_mask_supported.tcp), error);
-		if (!mask.tcp)
-			return -rte_errno;
-		if (!ip_proto_set &&
-		    !mnl_attr_put_u8_check(buf, size,
-					   TCA_FLOWER_KEY_IP_PROTO,
-					   IPPROTO_TCP))
-			goto error_nobufs;
-		if (mask.tcp == &mlx5_nl_flow_mask_empty.tcp) {
-			++item;
-			break;
-		}
-		spec.tcp = item->spec;
-		if ((mask.tcp->hdr.src_port &&
-		     (!mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_TCP_SRC,
-					      spec.tcp->hdr.src_port) ||
-		      !mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_TCP_SRC_MASK,
-					      mask.tcp->hdr.src_port))) ||
-		    (mask.tcp->hdr.dst_port &&
-		     (!mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_TCP_DST,
-					      spec.tcp->hdr.dst_port) ||
-		      !mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_TCP_DST_MASK,
-					      mask.tcp->hdr.dst_port))))
-			goto error_nobufs;
-		++item;
-		break;
-	case ITEM_UDP:
-		if (item->type != RTE_FLOW_ITEM_TYPE_UDP)
-			goto trans;
-		mask.udp = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_udp_mask,
-			 &mlx5_nl_flow_mask_supported.udp,
-			 &mlx5_nl_flow_mask_empty.udp,
-			 sizeof(mlx5_nl_flow_mask_supported.udp), error);
-		if (!mask.udp)
-			return -rte_errno;
-		if (!ip_proto_set &&
-		    !mnl_attr_put_u8_check(buf, size,
-					   TCA_FLOWER_KEY_IP_PROTO,
-					   IPPROTO_UDP))
-			goto error_nobufs;
-		if (mask.udp == &mlx5_nl_flow_mask_empty.udp) {
-			++item;
-			break;
-		}
-		spec.udp = item->spec;
-		if ((mask.udp->hdr.src_port &&
-		     (!mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_UDP_SRC,
-					      spec.udp->hdr.src_port) ||
-		      !mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_UDP_SRC_MASK,
-					      mask.udp->hdr.src_port))) ||
-		    (mask.udp->hdr.dst_port &&
-		     (!mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_UDP_DST,
-					      spec.udp->hdr.dst_port) ||
-		      !mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_UDP_DST_MASK,
-					      mask.udp->hdr.dst_port))))
-			goto error_nobufs;
-		++item;
-		break;
-	case ACTIONS:
-		if (item->type != RTE_FLOW_ITEM_TYPE_END)
-			goto trans;
-		assert(na_flower);
-		assert(!na_flower_act);
-		na_flower_act =
-			mnl_attr_nest_start_check(buf, size, TCA_FLOWER_ACT);
-		if (!na_flower_act)
-			goto error_nobufs;
-		act_index_cur = 1;
-		break;
-	case ACTION_VOID:
-		if (action->type != RTE_FLOW_ACTION_TYPE_VOID)
-			goto trans;
-		++action;
-		break;
-	case ACTION_PORT_ID:
-		if (action->type != RTE_FLOW_ACTION_TYPE_PORT_ID)
-			goto trans;
-		conf.port_id = action->conf;
-		if (conf.port_id->original)
-			i = 0;
-		else
-			for (i = 0; ptoi[i].ifindex; ++i)
-				if (ptoi[i].port_id == conf.port_id->id)
-					break;
-		if (!ptoi[i].ifindex)
-			return rte_flow_error_set
-				(error, ENODEV, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-				 conf.port_id,
-				 "missing data to convert port ID to ifindex");
-		act_index =
-			mnl_attr_nest_start_check(buf, size, act_index_cur++);
-		if (!act_index ||
-		    !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "mirred"))
-			goto error_nobufs;
-		act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
-		if (!act)
-			goto error_nobufs;
-		if (!mnl_attr_put_check(buf, size, TCA_MIRRED_PARMS,
-					sizeof(struct tc_mirred),
-					&(struct tc_mirred){
-						.action = TC_ACT_STOLEN,
-						.eaction = TCA_EGRESS_REDIR,
-						.ifindex = ptoi[i].ifindex,
-					}))
-			goto error_nobufs;
-		mnl_attr_nest_end(buf, act);
-		mnl_attr_nest_end(buf, act_index);
-		++action;
-		break;
-	case ACTION_DROP:
-		if (action->type != RTE_FLOW_ACTION_TYPE_DROP)
-			goto trans;
-		act_index =
-			mnl_attr_nest_start_check(buf, size, act_index_cur++);
-		if (!act_index ||
-		    !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "gact"))
-			goto error_nobufs;
-		act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
-		if (!act)
-			goto error_nobufs;
-		if (!mnl_attr_put_check(buf, size, TCA_GACT_PARMS,
-					sizeof(struct tc_gact),
-					&(struct tc_gact){
-						.action = TC_ACT_SHOT,
-					}))
-			goto error_nobufs;
-		mnl_attr_nest_end(buf, act);
-		mnl_attr_nest_end(buf, act_index);
-		++action;
-		break;
-	case ACTION_OF_POP_VLAN:
-		if (action->type != RTE_FLOW_ACTION_TYPE_OF_POP_VLAN)
-			goto trans;
-		conf.of_push_vlan = NULL;
-		i = TCA_VLAN_ACT_POP;
-		goto action_of_vlan;
-	case ACTION_OF_PUSH_VLAN:
-		if (action->type != RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN)
-			goto trans;
-		conf.of_push_vlan = action->conf;
-		i = TCA_VLAN_ACT_PUSH;
-		goto action_of_vlan;
-	case ACTION_OF_SET_VLAN_VID:
-		if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID)
-			goto trans;
-		conf.of_set_vlan_vid = action->conf;
-		if (na_vlan_id)
-			goto override_na_vlan_id;
-		i = TCA_VLAN_ACT_MODIFY;
-		goto action_of_vlan;
-	case ACTION_OF_SET_VLAN_PCP:
-		if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP)
-			goto trans;
-		conf.of_set_vlan_pcp = action->conf;
-		if (na_vlan_priority)
-			goto override_na_vlan_priority;
-		i = TCA_VLAN_ACT_MODIFY;
-		goto action_of_vlan;
-action_of_vlan:
-		act_index =
-			mnl_attr_nest_start_check(buf, size, act_index_cur++);
-		if (!act_index ||
-		    !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "vlan"))
-			goto error_nobufs;
-		act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
-		if (!act)
-			goto error_nobufs;
-		if (!mnl_attr_put_check(buf, size, TCA_VLAN_PARMS,
-					sizeof(struct tc_vlan),
-					&(struct tc_vlan){
-						.action = TC_ACT_PIPE,
-						.v_action = i,
-					}))
-			goto error_nobufs;
-		if (i == TCA_VLAN_ACT_POP) {
-			mnl_attr_nest_end(buf, act);
-			mnl_attr_nest_end(buf, act_index);
-			++action;
-			break;
-		}
-		if (i == TCA_VLAN_ACT_PUSH &&
-		    !mnl_attr_put_u16_check(buf, size,
-					    TCA_VLAN_PUSH_VLAN_PROTOCOL,
-					    conf.of_push_vlan->ethertype))
-			goto error_nobufs;
-		na_vlan_id = mnl_nlmsg_get_payload_tail(buf);
-		if (!mnl_attr_put_u16_check(buf, size, TCA_VLAN_PAD, 0))
-			goto error_nobufs;
-		na_vlan_priority = mnl_nlmsg_get_payload_tail(buf);
-		if (!mnl_attr_put_u8_check(buf, size, TCA_VLAN_PAD, 0))
-			goto error_nobufs;
-		mnl_attr_nest_end(buf, act);
-		mnl_attr_nest_end(buf, act_index);
-		if (action->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
-override_na_vlan_id:
-			na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
-			*(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
-				rte_be_to_cpu_16
-				(conf.of_set_vlan_vid->vlan_vid);
-		} else if (action->type ==
-			   RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
-override_na_vlan_priority:
-			na_vlan_priority->nla_type =
-				TCA_VLAN_PUSH_VLAN_PRIORITY;
-			*(uint8_t *)mnl_attr_get_payload(na_vlan_priority) =
-				conf.of_set_vlan_pcp->vlan_pcp;
-		}
-		++action;
-		break;
-	case END:
-		if (item->type != RTE_FLOW_ITEM_TYPE_END ||
-		    action->type != RTE_FLOW_ACTION_TYPE_END)
-			goto trans;
-		if (na_flower_act)
-			mnl_attr_nest_end(buf, na_flower_act);
-		if (na_flower)
-			mnl_attr_nest_end(buf, na_flower);
-		nlh = buf;
-		return nlh->nlmsg_len;
-	}
-	back = trans;
-	trans = mlx5_nl_flow_trans[trans[n - 1]];
-	n = 0;
-	goto trans;
-error_nobufs:
-	if (buf != buf_tmp) {
-		buf = buf_tmp;
-		size = sizeof(buf_tmp);
-		goto init;
-	}
-	return rte_flow_error_set
-		(error, ENOBUFS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-		 "generated TC message is too large");
-}
-
-/**
- * Brand rtnetlink buffer with unique handle.
- *
- * This handle should be unique for a given network interface to avoid
- * collisions.
- *
- * @param buf
- *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
- * @param handle
- *   Unique 32-bit handle to use.
- */
-void
-mlx5_nl_flow_brand(void *buf, uint32_t handle)
-{
-	struct tcmsg *tcm = mnl_nlmsg_get_payload(buf);
-
-	tcm->tcm_handle = handle;
-}
-
-/**
- * Send Netlink message with acknowledgment.
- *
- * @param nl
- *   Libmnl socket to use.
- * @param nlh
- *   Message to send. This function always raises the NLM_F_ACK flag before
- *   sending.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_nl_flow_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
-{
-	alignas(struct nlmsghdr)
-	uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
-		    nlh->nlmsg_len - sizeof(*nlh)];
-	uint32_t seq = random();
-	int ret;
-
-	nlh->nlmsg_flags |= NLM_F_ACK;
-	nlh->nlmsg_seq = seq;
-	ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
-	if (ret != -1)
-		ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
-	if (ret != -1)
-		ret = mnl_cb_run
-			(ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
-	if (!ret)
-		return 0;
-	rte_errno = errno;
-	return -rte_errno;
-}
-
-/**
- * Create a Netlink flow rule.
- *
- * @param nl
- *   Libmnl socket to use.
- * @param buf
- *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
-		    struct rte_flow_error *error)
-{
-	struct nlmsghdr *nlh = buf;
-
-	nlh->nlmsg_type = RTM_NEWTFILTER;
-	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-	if (!mlx5_nl_flow_nl_ack(nl, nlh))
-		return 0;
-	return rte_flow_error_set
-		(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-		 "netlink: failed to create TC flow rule");
-}
-
-/**
- * Destroy a Netlink flow rule.
- *
- * @param nl
- *   Libmnl socket to use.
- * @param buf
- *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
-		     struct rte_flow_error *error)
-{
-	struct nlmsghdr *nlh = buf;
-
-	nlh->nlmsg_type = RTM_DELTFILTER;
-	nlh->nlmsg_flags = NLM_F_REQUEST;
-	if (!mlx5_nl_flow_nl_ack(nl, nlh))
-		return 0;
-	return rte_flow_error_set
-		(error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-		 "netlink: failed to destroy TC flow rule");
-}
-
-/**
- * Initialize ingress qdisc of a given network interface.
- *
- * @param nl
- *   Libmnl socket of the @p NETLINK_ROUTE kind.
- * @param ifindex
- *   Index of network interface to initialize.
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
-		  struct rte_flow_error *error)
-{
-	struct nlmsghdr *nlh;
-	struct tcmsg *tcm;
-	alignas(struct nlmsghdr)
-	uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
-
-	/* Destroy existing ingress qdisc and everything attached to it. */
-	nlh = mnl_nlmsg_put_header(buf);
-	nlh->nlmsg_type = RTM_DELQDISC;
-	nlh->nlmsg_flags = NLM_F_REQUEST;
-	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
-	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm_ifindex = ifindex;
-	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
-	tcm->tcm_parent = TC_H_INGRESS;
-	/* Ignore errors when qdisc is already absent. */
-	if (mlx5_nl_flow_nl_ack(nl, nlh) &&
-	    rte_errno != EINVAL && rte_errno != ENOENT)
-		return rte_flow_error_set
-			(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-			 NULL, "netlink: failed to remove ingress qdisc");
-	/* Create fresh ingress qdisc. */
-	nlh = mnl_nlmsg_put_header(buf);
-	nlh->nlmsg_type = RTM_NEWQDISC;
-	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
-	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm_ifindex = ifindex;
-	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
-	tcm->tcm_parent = TC_H_INGRESS;
-	mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
-	if (mlx5_nl_flow_nl_ack(nl, nlh))
-		return rte_flow_error_set
-			(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-			 NULL, "netlink: failed to create ingress qdisc");
-	return 0;
-}
-
-/**
- * Create and configure a libmnl socket for Netlink flow rules.
- *
- * @return
- *   A valid libmnl socket object pointer on success, NULL otherwise and
- *   rte_errno is set.
- */
-struct mnl_socket *
-mlx5_nl_flow_socket_create(void)
-{
-	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
-
-	if (nl) {
-		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
-				      sizeof(int));
-		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
-			return nl;
-	}
-	rte_errno = errno;
-	if (nl)
-		mnl_socket_close(nl);
-	return NULL;
-}
-
-/**
- * Destroy a libmnl socket.
- */
-void
-mlx5_nl_flow_socket_destroy(struct mnl_socket *nl)
-{
-	mnl_socket_close(nl);
-}
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH 3/3] net/mlx5: add Linux TC flower driver for E-Switch flow
  2018-09-19  7:21 [dpdk-dev] [PATCH 0/3] migrate Linux TC flower driver to new flow engine Yongseok Koh
  2018-09-19  7:21 ` [dpdk-dev] [PATCH 1/3] net/mlx5: add abstraction for multiple flow drivers Yongseok Koh
  2018-09-19  7:21 ` [dpdk-dev] [PATCH 2/3] net/mlx5: remove Netlink flow driver Yongseok Koh
@ 2018-09-19  7:21 ` Yongseok Koh
  2018-09-24 19:55 ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: migrate Linux TC flower driver to new flow engine Yongseok Koh
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
  4 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-19  7:21 UTC (permalink / raw)
  To: Shahaf Shuler; +Cc: dev, Yongseok Koh

Flows having 'transfer' attribute have to be inserted to E-Switch on the
NIC and the control path uses Linux TC flower interface via Netlink socket.
This patch adds the flow driver on top of the new flow engine.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/Makefile        |    1 +
 drivers/net/mlx5/mlx5.c          |   33 +
 drivers/net/mlx5/mlx5_flow.c     |    6 +-
 drivers/net/mlx5/mlx5_flow.h     |   20 +
 drivers/net/mlx5/mlx5_flow_tcf.c | 1608 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 1667 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/mlx5/mlx5_flow_tcf.c

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 9c1044808..ca1de9f21 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -32,6 +32,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_dv.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_tcf.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 47cf538e2..e3c36710c 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -44,6 +44,7 @@
 #include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
+#include "mlx5_flow.h"
 #include "mlx5_glue.h"
 #include "mlx5_mr.h"
 #include "mlx5_flow.h"
@@ -286,6 +287,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		close(priv->nl_socket_route);
 	if (priv->nl_socket_rdma >= 0)
 		close(priv->nl_socket_rdma);
+	if (priv->mnl_socket)
+		mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
 	ret = mlx5_hrxq_ibv_verify(dev);
 	if (ret)
 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1135,6 +1138,34 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 	if (vf && config.vf_nl_en)
 		mlx5_nl_mac_addr_sync(eth_dev);
+	priv->mnl_socket = mlx5_flow_tcf_socket_create();
+	if (!priv->mnl_socket) {
+		err = -rte_errno;
+		DRV_LOG(WARNING,
+			"flow rules relying on switch offloads will not be"
+			" supported: cannot open libmnl socket: %s",
+			strerror(rte_errno));
+	} else {
+		struct rte_flow_error error;
+		unsigned int ifindex = mlx5_ifindex(eth_dev);
+
+		if (!ifindex) {
+			err = -rte_errno;
+			error.message =
+				"cannot retrieve network interface index";
+		} else {
+			err = mlx5_flow_tcf_init(priv->mnl_socket, ifindex,
+						&error);
+		}
+		if (err) {
+			DRV_LOG(WARNING,
+				"flow rules relying on switch offloads will"
+				" not be supported: %s: %s",
+				error.message, strerror(rte_errno));
+			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+			priv->mnl_socket = NULL;
+		}
+	}
 	TAILQ_INIT(&priv->flows);
 	TAILQ_INIT(&priv->ctrl_flows);
 	/* Hint libmlx5 to use PMD allocator for data plane resources */
@@ -1187,6 +1218,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			close(priv->nl_socket_route);
 		if (priv->nl_socket_rdma >= 0)
 			close(priv->nl_socket_rdma);
+		if (priv->mnl_socket)
+			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
 		if (own_domain_id)
 			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
 		rte_free(priv);
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 2d3158a6f..19d445596 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -41,6 +41,7 @@ extern const struct eth_dev_ops mlx5_dev_ops_isolate;
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops;
 #endif
+extern const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops;
 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
 
 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
@@ -50,6 +51,7 @@ const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
 #endif
+	[MLX5_FLOW_TYPE_TCF] = &mlx5_flow_tcf_drv_ops,
 	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
 	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
 };
@@ -1610,7 +1612,9 @@ flow_get_drv_type(struct rte_eth_dev *dev __rte_unused,
 	struct priv *priv __rte_unused = dev->data->dev_private;
 	enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
 
-	if (!attr->transfer) {
+	if (attr->transfer) {
+		type = MLX5_FLOW_TYPE_TCF;
+	} else {
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 		type = priv->config.dv_flow_en ?  MLX5_FLOW_TYPE_DV :
 						  MLX5_FLOW_TYPE_VERBS;
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 2bc3bee8c..10d700a7f 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -82,6 +82,11 @@
 #define MLX5_ACTION_FLAG (1u << 3)
 #define MLX5_ACTION_MARK (1u << 4)
 #define MLX5_ACTION_COUNT (1u << 5)
+#define MLX5_ACTION_PORT_ID (1u << 6)
+#define MLX5_ACTION_OF_POP_VLAN (1u << 7)
+#define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
+#define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
+#define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
 
 /* possible L3 layers protocols filtering. */
 #define MLX5_IP_PROTOCOL_TCP 6
@@ -131,6 +136,7 @@
 enum mlx5_flow_drv_type {
 	MLX5_FLOW_TYPE_MIN,
 	MLX5_FLOW_TYPE_DV,
+	MLX5_FLOW_TYPE_TCF,
 	MLX5_FLOW_TYPE_VERBS,
 	MLX5_FLOW_TYPE_MAX,
 };
@@ -170,6 +176,12 @@ struct mlx5_flow_dv {
 	int actions_n; /**< number of actions. */
 };
 
+/** Linux TC flower driver for E-Switch flow. */
+struct mlx5_flow_tcf {
+	struct nlmsghdr *nlh;
+	struct tcmsg *tcm;
+};
+
 /* Verbs specification header. */
 struct ibv_spec_header {
 	enum ibv_flow_spec_type type;
@@ -199,6 +211,7 @@ struct mlx5_flow {
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 		struct mlx5_flow_dv dv;
 #endif
+		struct mlx5_flow_tcf tcf;
 		struct mlx5_flow_verbs verbs;
 	};
 };
@@ -322,4 +335,11 @@ int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
 				      struct rte_eth_dev *dev,
 				      struct rte_flow_error *error);
 
+/* mlx5_flow_tcf.c */
+
+int mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
+		       struct rte_flow_error *error);
+struct mnl_socket *mlx5_flow_tcf_socket_create(void);
+void mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl);
+
 #endif /* RTE_PMD_MLX5_FLOW_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
new file mode 100644
index 000000000..14376188e
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -0,0 +1,1608 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 6WIND S.A.
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <libmnl/libmnl.h>
+#include <linux/if_ether.h>
+#include <linux/netlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <linux/tc_act/tc_gact.h>
+#include <linux/tc_act/tc_mirred.h>
+#include <netinet/in.h>
+#include <stdalign.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+
+#include <rte_byteorder.h>
+#include <rte_errno.h>
+#include <rte_ether.h>
+#include <rte_flow.h>
+#include <rte_malloc.h>
+
+#include "mlx5.h"
+#include "mlx5_flow.h"
+#include "mlx5_autoconf.h"
+
+#ifdef HAVE_TC_ACT_VLAN
+
+#include <linux/tc_act/tc_vlan.h>
+
+#else /* HAVE_TC_ACT_VLAN */
+
+#define TCA_VLAN_ACT_POP 1
+#define TCA_VLAN_ACT_PUSH 2
+#define TCA_VLAN_ACT_MODIFY 3
+#define TCA_VLAN_PARMS 2
+#define TCA_VLAN_PUSH_VLAN_ID 3
+#define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
+#define TCA_VLAN_PAD 5
+#define TCA_VLAN_PUSH_VLAN_PRIORITY 6
+
+struct tc_vlan {
+	tc_gen;
+	int v_action;
+};
+
+#endif /* HAVE_TC_ACT_VLAN */
+
+/* Normally found in linux/netlink.h. */
+#ifndef NETLINK_CAP_ACK
+#define NETLINK_CAP_ACK 10
+#endif
+
+/* Normally found in linux/pkt_sched.h. */
+#ifndef TC_H_MIN_INGRESS
+#define TC_H_MIN_INGRESS 0xfff2u
+#endif
+
+/* Normally found in linux/pkt_cls.h. */
+#ifndef TCA_CLS_FLAGS_SKIP_SW
+#define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
+#endif
+#ifndef HAVE_TCA_FLOWER_ACT
+#define TCA_FLOWER_ACT 3
+#endif
+#ifndef HAVE_TCA_FLOWER_FLAGS
+#define TCA_FLOWER_FLAGS 22
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
+#define TCA_FLOWER_KEY_ETH_TYPE 8
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
+#define TCA_FLOWER_KEY_ETH_DST 4
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
+#define TCA_FLOWER_KEY_ETH_DST_MASK 5
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
+#define TCA_FLOWER_KEY_ETH_SRC 6
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
+#define TCA_FLOWER_KEY_ETH_SRC_MASK 7
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
+#define TCA_FLOWER_KEY_IP_PROTO 9
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
+#define TCA_FLOWER_KEY_IPV4_SRC 10
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
+#define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
+#define TCA_FLOWER_KEY_IPV4_DST 12
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
+#define TCA_FLOWER_KEY_IPV4_DST_MASK 13
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
+#define TCA_FLOWER_KEY_IPV6_SRC 14
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
+#define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
+#define TCA_FLOWER_KEY_IPV6_DST 16
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
+#define TCA_FLOWER_KEY_IPV6_DST_MASK 17
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
+#define TCA_FLOWER_KEY_TCP_SRC 18
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
+#define TCA_FLOWER_KEY_TCP_SRC_MASK 35
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
+#define TCA_FLOWER_KEY_TCP_DST 19
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
+#define TCA_FLOWER_KEY_TCP_DST_MASK 36
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
+#define TCA_FLOWER_KEY_UDP_SRC 20
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
+#define TCA_FLOWER_KEY_UDP_SRC_MASK 37
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
+#define TCA_FLOWER_KEY_UDP_DST 21
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
+#define TCA_FLOWER_KEY_UDP_DST_MASK 38
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
+#define TCA_FLOWER_KEY_VLAN_ID 23
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
+#define TCA_FLOWER_KEY_VLAN_PRIO 24
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
+#define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
+#endif
+
+#ifndef IPV6_ADDR_LEN
+#define IPV6_ADDR_LEN 16
+#endif
+
+/** Empty masks for known item types. */
+static const union {
+	struct rte_flow_item_port_id port_id;
+	struct rte_flow_item_eth eth;
+	struct rte_flow_item_vlan vlan;
+	struct rte_flow_item_ipv4 ipv4;
+	struct rte_flow_item_ipv6 ipv6;
+	struct rte_flow_item_tcp tcp;
+	struct rte_flow_item_udp udp;
+} flow_tcf_mask_empty;
+
+/** Supported masks for known item types. */
+static const struct {
+	struct rte_flow_item_port_id port_id;
+	struct rte_flow_item_eth eth;
+	struct rte_flow_item_vlan vlan;
+	struct rte_flow_item_ipv4 ipv4;
+	struct rte_flow_item_ipv6 ipv6;
+	struct rte_flow_item_tcp tcp;
+	struct rte_flow_item_udp udp;
+} flow_tcf_mask_supported = {
+	.port_id = {
+		.id = 0xffffffff,
+	},
+	.eth = {
+		.type = RTE_BE16(0xffff),
+		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+	},
+	.vlan = {
+		/* PCP and VID only, no DEI. */
+		.tci = RTE_BE16(0xefff),
+		.inner_type = RTE_BE16(0xffff),
+	},
+	.ipv4.hdr = {
+		.next_proto_id = 0xff,
+		.src_addr = RTE_BE32(0xffffffff),
+		.dst_addr = RTE_BE32(0xffffffff),
+	},
+	.ipv6.hdr = {
+		.proto = 0xff,
+		.src_addr =
+			"\xff\xff\xff\xff\xff\xff\xff\xff"
+			"\xff\xff\xff\xff\xff\xff\xff\xff",
+		.dst_addr =
+			"\xff\xff\xff\xff\xff\xff\xff\xff"
+			"\xff\xff\xff\xff\xff\xff\xff\xff",
+	},
+	.tcp.hdr = {
+		.src_port = RTE_BE16(0xffff),
+		.dst_port = RTE_BE16(0xffff),
+	},
+	.udp.hdr = {
+		.src_port = RTE_BE16(0xffff),
+		.dst_port = RTE_BE16(0xffff),
+	},
+};
+
+#define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
+#define SZ_NLATTR_NEST SZ_NLATTR_HDR
+#define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
+#define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
+#define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
+
+#define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
+
+/** DPDK port to network interface index (ifindex) conversion. */
+struct flow_tcf_ptoi {
+	uint16_t port_id; /**< DPDK port ID. */
+	unsigned int ifindex; /**< Network interface index. */
+};
+
+#define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
+
+/**
+ * Retrieve mask for pattern item.
+ *
+ * This function does basic sanity checks on a pattern item in order to
+ * return the most appropriate mask for it.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] mask_default
+ *   Default mask for pattern item as specified by the flow API.
+ * @param[in] mask_supported
+ *   Mask fields supported by the implementation.
+ * @param[in] mask_empty
+ *   Empty mask to return when there is no specification.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   Either @p item->mask or one of the mask parameters on success, NULL
+ *   otherwise and rte_errno is set.
+ */
+static const void *
+flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
+		   const void *mask_supported, const void *mask_empty,
+		   size_t mask_size, struct rte_flow_error *error)
+{
+	const uint8_t *mask;
+	size_t i;
+
+	/* item->last and item->mask cannot exist without item->spec. */
+	if (!item->spec && (item->mask || item->last)) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ITEM, item,
+				   "\"mask\" or \"last\" field provided without"
+				   " a corresponding \"spec\"");
+		return NULL;
+	}
+	/* No spec, no mask, no problem. */
+	if (!item->spec)
+		return mask_empty;
+	mask = item->mask ? item->mask : mask_default;
+	assert(mask);
+	/*
+	 * Single-pass check to make sure that:
+	 * - Mask is supported, no bits are set outside mask_supported.
+	 * - Both item->spec and item->last are included in mask.
+	 */
+	for (i = 0; i != mask_size; ++i) {
+		if (!mask[i])
+			continue;
+		if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
+		    ((const uint8_t *)mask_supported)[i]) {
+			rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+					   "unsupported field found"
+					   " in \"mask\"");
+			return NULL;
+		}
+		if (item->last &&
+		    (((const uint8_t *)item->spec)[i] & mask[i]) !=
+		    (((const uint8_t *)item->last)[i] & mask[i])) {
+			rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ITEM_LAST,
+					   item->last,
+					   "range between \"spec\" and \"last\""
+					   " not comprised in \"mask\"");
+			return NULL;
+		}
+	}
+	return mask;
+}
+
+/**
+ * Build a conversion table between port ID and ifindex.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[out] ptoi
+ *   Pointer to ptoi table.
+ * @param[in] len
+ *   Size of ptoi table provided.
+ *
+ * @return
+ *   Size of ptoi table filled.
+ */
+static unsigned int
+flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
+			  unsigned int len)
+{
+	unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
+	uint16_t port_id[n + 1];
+	unsigned int i;
+	unsigned int own = 0;
+
+	/* At least one port is needed when no switch domain is present. */
+	if (!n) {
+		n = 1;
+		port_id[0] = dev->data->port_id;
+	} else {
+		n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
+	}
+	if (n > len)
+		return 0;
+	for (i = 0; i != n; ++i) {
+		struct rte_eth_dev_info dev_info;
+
+		rte_eth_dev_info_get(port_id[i], &dev_info);
+		if (port_id[i] == dev->data->port_id)
+			own = i;
+		ptoi[i].port_id = port_id[i];
+		ptoi[i].ifindex = dev_info.if_index;
+	}
+	/* Ensure first entry of ptoi[] is the current device. */
+	if (own) {
+		ptoi[n] = ptoi[0];
+		ptoi[0] = ptoi[own];
+		ptoi[own] = ptoi[n];
+	}
+	/* An entry with zero ifindex terminates ptoi[]. */
+	ptoi[n].port_id = 0;
+	ptoi[n].ifindex = 0;
+	return n;
+}
+
+/**
+ * Verify the @p attr will be correctly understood by the E-switch.
+ *
+ * @param[in] attr
+ *   Pointer to flow attributes
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
+			     struct rte_flow_error *error)
+{
+	/*
+	 * Supported attributes: no groups, some priorities and ingress only.
+	 * Don't care about transfer as it is the caller's problem.
+	 */
+	if (attr->group)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
+					  "groups are not supported");
+	if (attr->priority > 0xfffe)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+					  attr,
+					  "lowest priority level is 0xfffe");
+	if (!attr->ingress)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+					  attr, "only ingress is supported");
+	if (attr->egress)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+					  attr, "egress is not supported");
+	return 0;
+}
+
+/**
+ * Validate flow for E-Switch.
+ *
+ * @param[in] priv
+ *   Pointer to the priv structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_tcf_validate(struct rte_eth_dev *dev,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item items[],
+		  const struct rte_flow_action actions[],
+		  struct rte_flow_error *error)
+{
+	union {
+		const struct rte_flow_item_port_id *port_id;
+		const struct rte_flow_item_eth *eth;
+		const struct rte_flow_item_vlan *vlan;
+		const struct rte_flow_item_ipv4 *ipv4;
+		const struct rte_flow_item_ipv6 *ipv6;
+		const struct rte_flow_item_tcp *tcp;
+		const struct rte_flow_item_udp *udp;
+	} spec, mask;
+	union {
+		const struct rte_flow_action_port_id *port_id;
+		const struct rte_flow_action_of_push_vlan *of_push_vlan;
+		const struct rte_flow_action_of_set_vlan_vid *
+			of_set_vlan_vid;
+		const struct rte_flow_action_of_set_vlan_pcp *
+			of_set_vlan_pcp;
+	} conf;
+	uint32_t item_flags = 0;
+	uint32_t action_flags = 0;
+	uint8_t next_protocol = -1;
+	unsigned int tcm_ifindex = 0;
+	struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
+	bool in_port_id_set;
+	int ret;
+
+	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
+						PTOI_TABLE_SZ_MAX(dev)));
+	ret = flow_tcf_validate_attributes(attr, error);
+	if (ret < 0)
+		return ret;
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		unsigned int i;
+
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_PORT_ID:
+			mask.port_id = flow_tcf_item_mask
+				(items, &rte_flow_item_port_id_mask,
+				 &flow_tcf_mask_supported.port_id,
+				 &flow_tcf_mask_empty.port_id,
+				 sizeof(flow_tcf_mask_supported.port_id),
+				 error);
+			if (!mask.port_id)
+				return -rte_errno;
+			if (mask.port_id == &flow_tcf_mask_empty.port_id) {
+				in_port_id_set = 1;
+				break;
+			}
+			spec.port_id = items->spec;
+			if (mask.port_id->id && mask.port_id->id != 0xffffffff)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+					 mask.port_id,
+					 "no support for partial mask on"
+					 " \"id\" field");
+			if (!mask.port_id->id)
+				i = 0;
+			else
+				for (i = 0; ptoi[i].ifindex; ++i)
+					if (ptoi[i].port_id == spec.port_id->id)
+						break;
+			if (!ptoi[i].ifindex)
+				return rte_flow_error_set
+					(error, ENODEV,
+					 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+					 spec.port_id,
+					 "missing data to convert port ID to"
+					 " ifindex");
+			if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+					 spec.port_id,
+					 "cannot match traffic for"
+					 " several port IDs through"
+					 " a single flow rule");
+			tcm_ifindex = ptoi[i].ifindex;
+			in_port_id_set = 1;
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			ret = mlx5_flow_validate_item_eth(items, item_flags,
+							  error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
+			/* TODO:
+			 * Redundant check due to different supported mask.
+			 * Same for the rest of items.
+			 */
+			mask.eth = flow_tcf_item_mask
+				(items, &rte_flow_item_eth_mask,
+				 &flow_tcf_mask_supported.eth,
+				 &flow_tcf_mask_empty.eth,
+				 sizeof(flow_tcf_mask_supported.eth),
+				 error);
+			if (!mask.eth)
+				return -rte_errno;
+			if (mask.eth->type && mask.eth->type !=
+			    RTE_BE16(0xffff))
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+					 mask.eth,
+					 "no support for partial mask on"
+					 " \"type\" field");
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			ret = mlx5_flow_validate_item_vlan(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
+			mask.vlan = flow_tcf_item_mask
+				(items, &rte_flow_item_vlan_mask,
+				 &flow_tcf_mask_supported.vlan,
+				 &flow_tcf_mask_empty.vlan,
+				 sizeof(flow_tcf_mask_supported.vlan),
+				 error);
+			if (!mask.vlan)
+				return -rte_errno;
+			if ((mask.vlan->tci & RTE_BE16(0xe000) &&
+			     (mask.vlan->tci & RTE_BE16(0xe000)) !=
+			      RTE_BE16(0xe000)) ||
+			    (mask.vlan->tci & RTE_BE16(0x0fff) &&
+			     (mask.vlan->tci & RTE_BE16(0x0fff)) !=
+			      RTE_BE16(0x0fff)) ||
+			    (mask.vlan->inner_type &&
+			     mask.vlan->inner_type != RTE_BE16(0xffff)))
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+					 mask.vlan,
+					 "no support for partial masks on"
+					 " \"tci\" (PCP and VID parts) and"
+					 " \"inner_type\" fields");
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			mask.ipv4 = flow_tcf_item_mask
+				(items, &rte_flow_item_ipv4_mask,
+				 &flow_tcf_mask_supported.ipv4,
+				 &flow_tcf_mask_empty.ipv4,
+				 sizeof(flow_tcf_mask_supported.ipv4),
+				 error);
+			if (!mask.ipv4)
+				return -rte_errno;
+			if (mask.ipv4->hdr.next_proto_id &&
+			    mask.ipv4->hdr.next_proto_id != 0xff)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+					 mask.ipv4,
+					 "no support for partial mask on"
+					 " \"hdr.next_proto_id\" field");
+			else if (mask.ipv4->hdr.next_proto_id)
+				next_protocol =
+					((const struct rte_flow_item_ipv4 *)
+					 (items->spec))->hdr.next_proto_id;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			mask.ipv6 = flow_tcf_item_mask
+				(items, &rte_flow_item_ipv6_mask,
+				 &flow_tcf_mask_supported.ipv6,
+				 &flow_tcf_mask_empty.ipv6,
+				 sizeof(flow_tcf_mask_supported.ipv6),
+				 error);
+			if (!mask.ipv6)
+				return -rte_errno;
+			if (mask.ipv6->hdr.proto &&
+			    mask.ipv6->hdr.proto != 0xff)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+					 mask.ipv6,
+					 "no support for partial mask on"
+					 " \"hdr.proto\" field");
+			else if (mask.ipv6->hdr.proto)
+				next_protocol =
+					((const struct rte_flow_item_ipv6 *)
+					 (items->spec))->hdr.proto;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			ret = mlx5_flow_validate_item_udp(items, item_flags,
+							  next_protocol, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+			mask.udp = flow_tcf_item_mask
+				(items, &rte_flow_item_udp_mask,
+				 &flow_tcf_mask_supported.udp,
+				 &flow_tcf_mask_empty.udp,
+				 sizeof(flow_tcf_mask_supported.udp),
+				 error);
+			if (!mask.udp)
+				return -rte_errno;
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			ret = mlx5_flow_validate_item_tcp(items, item_flags,
+							  next_protocol, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+			mask.tcp = flow_tcf_item_mask
+				(items, &rte_flow_item_tcp_mask,
+				 &flow_tcf_mask_supported.tcp,
+				 &flow_tcf_mask_empty.tcp,
+				 sizeof(flow_tcf_mask_supported.tcp),
+				 error);
+			if (!mask.tcp)
+				return -rte_errno;
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  NULL, "item not supported");
+		}
+	}
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		unsigned int i;
+
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_PORT_ID:
+			if (action_flags & MLX5_TCF_FATE_ACTIONS)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
+					 "can't have multiple fate actions");
+			conf.port_id = actions->conf;
+			if (conf.port_id->original)
+				i = 0;
+			else
+				for (i = 0; ptoi[i].ifindex; ++i)
+					if (ptoi[i].port_id == conf.port_id->id)
+						break;
+			if (!ptoi[i].ifindex)
+				return rte_flow_error_set
+					(error, ENODEV,
+					 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+					 conf.port_id,
+					 "missing data to convert port ID to"
+					 " ifindex");
+			action_flags |= MLX5_ACTION_PORT_ID;
+			break;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			if (action_flags & MLX5_TCF_FATE_ACTIONS)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
+					 "can't have multiple fate actions");
+			action_flags |= MLX5_ACTION_DROP;
+			break;
+		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+			action_flags |= MLX5_ACTION_OF_POP_VLAN;
+			break;
+		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+			action_flags |= MLX5_ACTION_OF_PUSH_VLAN;
+			break;
+		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+			action_flags |= MLX5_ACTION_OF_SET_VLAN_VID;
+			break;
+		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
+			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return 0;
+}
+
+/**
+ * Calculate maximum size of memory for flow items of Linux TC flower and
+ * extract specified items.
+ *
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[out] item_flags
+ *   Pointer to the detected items.
+ *
+ * @return
+ *   Maximum size of memory for items.
+ */
+static int
+flow_tcf_get_items_and_size(const struct rte_flow_item items[],
+			    uint64_t *item_flags)
+{
+	int size = 0;
+	uint64_t flags = 0;
+
+	size += SZ_NLATTR_STRZ_OF("flower") +
+		SZ_NLATTR_NEST + /* TCA_OPTIONS. */
+		SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_PORT_ID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+				SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
+				/* dst/src MAC addr and mask. */
+			flags |= MLX5_FLOW_LAYER_OUTER_L2;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+				SZ_NLATTR_TYPE_OF(uint16_t) +
+				/* VLAN Ether type. */
+				SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
+				SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
+			flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+				SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+				SZ_NLATTR_TYPE_OF(uint32_t) * 4;
+				/* dst/src IP addr and mask. */
+			flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+				SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+				SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
+				/* dst/src IP addr and mask. */
+			flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+				SZ_NLATTR_TYPE_OF(uint16_t) * 4;
+				/* dst/src port and mask. */
+			flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+				SZ_NLATTR_TYPE_OF(uint16_t) * 4;
+				/* dst/src port and mask. */
+			flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+			break;
+		default:
+			DRV_LOG(WARNING,
+				"unsupported item %p type %d,"
+				" items must be validated before flow creation",
+				(const void *)items, items->type);
+			break;
+		}
+	}
+	*item_flags = flags;
+	return size;
+}
+
+/**
+ * Calculate maximum size of memory for flow actions of Linux TC flower and
+ * extract specified actions.
+ *
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] action_flags
+ *   Pointer to the detected actions.
+ *
+ * @return
+ *   Maximum size of memory for actions.
+ */
+static int
+flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
+			      uint64_t *action_flags)
+{
+	int size = 0;
+	uint64_t flags = 0;
+
+	size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_PORT_ID:
+			size += SZ_NLATTR_NEST + /* na_act_index. */
+				SZ_NLATTR_STRZ_OF("mirred") +
+				SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+				SZ_NLATTR_TYPE_OF(struct tc_mirred);
+			flags |= MLX5_ACTION_PORT_ID;
+			break;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			size += SZ_NLATTR_NEST + /* na_act_index. */
+				SZ_NLATTR_STRZ_OF("gact") +
+				SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+				SZ_NLATTR_TYPE_OF(struct tc_gact);
+			flags |= MLX5_ACTION_DROP;
+			break;
+		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+			flags |= MLX5_ACTION_OF_POP_VLAN;
+			goto action_of_vlan;
+		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+			flags |= MLX5_ACTION_OF_PUSH_VLAN;
+			goto action_of_vlan;
+		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+			flags |= MLX5_ACTION_OF_SET_VLAN_VID;
+			goto action_of_vlan;
+		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
+			flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
+			goto action_of_vlan;
+action_of_vlan:
+			size += SZ_NLATTR_NEST + /* na_act_index. */
+				SZ_NLATTR_STRZ_OF("vlan") +
+				SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+				SZ_NLATTR_TYPE_OF(struct tc_vlan) +
+				SZ_NLATTR_TYPE_OF(uint16_t) +
+				/* VLAN protocol. */
+				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
+				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
+			break;
+		default:
+			DRV_LOG(WARNING,
+				"unsupported action %p type %d,"
+				" items must be validated before flow creation",
+				(const void *)actions, actions->type);
+			break;
+		}
+	}
+	*action_flags = flags;
+	return size;
+}
+
+/**
+ * Brand rtnetlink buffer with unique handle.
+ *
+ * This handle should be unique for a given network interface to avoid
+ * collisions.
+ *
+ * @param nlh
+ *   Pointer to Netlink message.
+ * @param handle
+ *   Unique 32-bit handle to use.
+ */
+static void
+flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
+{
+	struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
+
+	tcm->tcm_handle = handle;
+	DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
+		(void *)nlh, handle);
+}
+
+/**
+ * Prepare a flow object for Linux TC flower. It calculates the maximum size of
+ * memory required, allocates the memory, initializes Netlink message headers
+ * and set unique TC message handle.
+ *
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] item_flags
+ *   Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ *   Pointer to bit mask of all actions detected.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Pointer to mlx5_flow object on success,
+ *   otherwise NULL and rte_ernno is set.
+ */
+static struct mlx5_flow *
+flow_tcf_prepare(const struct rte_flow_attr *attr __rte_unused,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 uint64_t *item_flags, uint64_t *action_flags,
+		 struct rte_flow_error *error)
+{
+	size_t size = sizeof(struct mlx5_flow) +
+		      MNL_ALIGN(sizeof(struct nlmsghdr)) +
+		      MNL_ALIGN(sizeof(struct tcmsg));
+	struct mlx5_flow *dev_flow;
+	struct nlmsghdr *nlh;
+	struct tcmsg *tcm;
+
+	size += flow_tcf_get_items_and_size(items, item_flags);
+	size += flow_tcf_get_actions_and_size(actions, action_flags);
+	dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
+	if (!dev_flow) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				   "not enough memory to create E-Switch flow");
+		return NULL;
+	}
+	nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
+	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+	*dev_flow = (struct mlx5_flow){
+		.tcf = (struct mlx5_flow_tcf){
+			.nlh = nlh,
+			.tcm = tcm,
+		},
+	};
+	/*
+	 * Generate a reasonably unique handle based on the address of the
+	 * target buffer.
+	 *
+	 * This is straightforward on 32-bit systems where the flow pointer can
+	 * be used directly. Otherwise, its least significant part is taken
+	 * after shifting it by the previous power of two of the pointed buffer
+	 * size.
+	 */
+	if (sizeof(dev_flow) <= 4)
+		flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
+	else
+		flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
+				       rte_log2_u32(rte_align32prevpow2(size)));
+	return dev_flow;
+}
+
+/**
+ * Translate flow for Linux TC flower and construct Netlink message.
+ *
+ * @param[in] priv
+ *   Pointer to the priv structure.
+ * @param[in, out] flow
+ *   Pointer to the sub flow.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	union {
+		const struct rte_flow_item_port_id *port_id;
+		const struct rte_flow_item_eth *eth;
+		const struct rte_flow_item_vlan *vlan;
+		const struct rte_flow_item_ipv4 *ipv4;
+		const struct rte_flow_item_ipv6 *ipv6;
+		const struct rte_flow_item_tcp *tcp;
+		const struct rte_flow_item_udp *udp;
+	} spec, mask;
+	union {
+		const struct rte_flow_action_port_id *port_id;
+		const struct rte_flow_action_of_push_vlan *of_push_vlan;
+		const struct rte_flow_action_of_set_vlan_vid *
+			of_set_vlan_vid;
+		const struct rte_flow_action_of_set_vlan_pcp *
+			of_set_vlan_pcp;
+	} conf;
+	struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
+	struct nlmsghdr *nlh = dev_flow->tcf.nlh;
+	struct tcmsg *tcm = dev_flow->tcf.tcm;
+	uint32_t na_act_index_cur;
+	bool eth_type_set = 0;
+	bool vlan_present = 0;
+	bool vlan_eth_type_set = 0;
+	bool ip_proto_set = 0;
+	struct nlattr *na_flower;
+	struct nlattr *na_flower_act;
+	struct nlattr *na_vlan_id = NULL;
+	struct nlattr *na_vlan_priority = NULL;
+
+	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
+						PTOI_TABLE_SZ_MAX(dev)));
+	nlh = dev_flow->tcf.nlh;
+	tcm = dev_flow->tcf.tcm;
+	/* Prepare API must have been called beforehand. */
+	assert(nlh != NULL && tcm != NULL);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = ptoi[0].ifindex;
+	tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
+	/*
+	 * Priority cannot be zero to prevent the kernel from picking one
+	 * automatically.
+	 */
+	tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
+				  RTE_BE16(ETH_P_ALL));
+	mnl_attr_put_strz(nlh, TCA_KIND, "flower");
+	na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
+	mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		unsigned int i;
+
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_PORT_ID:
+			mask.port_id = flow_tcf_item_mask
+				(items, &rte_flow_item_port_id_mask,
+				 &flow_tcf_mask_supported.port_id,
+				 &flow_tcf_mask_empty.port_id,
+				 sizeof(flow_tcf_mask_supported.port_id),
+				 error);
+			assert(mask.port_id);
+			if (mask.port_id == &flow_tcf_mask_empty.port_id)
+				break;
+			spec.port_id = items->spec;
+			if (!mask.port_id->id)
+				i = 0;
+			else
+				for (i = 0; ptoi[i].ifindex; ++i)
+					if (ptoi[i].port_id == spec.port_id->id)
+						break;
+			assert(ptoi[i].ifindex);
+			tcm->tcm_ifindex = ptoi[i].ifindex;
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			mask.eth = flow_tcf_item_mask
+				(items, &rte_flow_item_eth_mask,
+				 &flow_tcf_mask_supported.eth,
+				 &flow_tcf_mask_empty.eth,
+				 sizeof(flow_tcf_mask_supported.eth),
+				 error);
+			assert(mask.eth);
+			if (mask.eth == &flow_tcf_mask_empty.eth)
+				break;
+			spec.eth = items->spec;
+			if (mask.eth->type) {
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
+						 spec.eth->type);
+				eth_type_set = 1;
+			}
+			if (!is_zero_ether_addr(&mask.eth->dst)) {
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
+					     ETHER_ADDR_LEN,
+					     spec.eth->dst.addr_bytes);
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
+					     ETHER_ADDR_LEN,
+					     mask.eth->dst.addr_bytes);
+			}
+			if (!is_zero_ether_addr(&mask.eth->src)) {
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
+					     ETHER_ADDR_LEN,
+					     spec.eth->src.addr_bytes);
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
+					     ETHER_ADDR_LEN,
+					     mask.eth->src.addr_bytes);
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			mask.vlan = flow_tcf_item_mask
+				(items, &rte_flow_item_vlan_mask,
+				 &flow_tcf_mask_supported.vlan,
+				 &flow_tcf_mask_empty.vlan,
+				 sizeof(flow_tcf_mask_supported.vlan),
+				 error);
+			assert(mask.vlan);
+			if (!eth_type_set)
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
+						 RTE_BE16(ETH_P_8021Q));
+			eth_type_set = 1;
+			vlan_present = 1;
+			if (mask.vlan == &flow_tcf_mask_empty.vlan)
+				break;
+			spec.vlan = items->spec;
+			if (mask.vlan->inner_type) {
+				mnl_attr_put_u16(nlh,
+						 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+						 spec.vlan->inner_type);
+				vlan_eth_type_set = 1;
+			}
+			if (mask.vlan->tci & RTE_BE16(0xe000))
+				mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
+						(rte_be_to_cpu_16
+						 (spec.vlan->tci) >> 13) & 0x7);
+			if (mask.vlan->tci & RTE_BE16(0x0fff))
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
+						 rte_be_to_cpu_16
+						 (spec.vlan->tci &
+						  RTE_BE16(0x0fff)));
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			mask.ipv4 = flow_tcf_item_mask
+				(items, &rte_flow_item_ipv4_mask,
+				 &flow_tcf_mask_supported.ipv4,
+				 &flow_tcf_mask_empty.ipv4,
+				 sizeof(flow_tcf_mask_supported.ipv4),
+				 error);
+			assert(mask.ipv4);
+			if (!eth_type_set || !vlan_eth_type_set)
+				mnl_attr_put_u16(nlh,
+						 vlan_present ?
+						 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
+						 TCA_FLOWER_KEY_ETH_TYPE,
+						 RTE_BE16(ETH_P_IP));
+			eth_type_set = 1;
+			vlan_eth_type_set = 1;
+			if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
+				break;
+			spec.ipv4 = items->spec;
+			if (mask.ipv4->hdr.next_proto_id) {
+				mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+						spec.ipv4->hdr.next_proto_id);
+				ip_proto_set = 1;
+			}
+			if (mask.ipv4->hdr.src_addr) {
+				mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
+						 spec.ipv4->hdr.src_addr);
+				mnl_attr_put_u32(nlh,
+						 TCA_FLOWER_KEY_IPV4_SRC_MASK,
+						 mask.ipv4->hdr.src_addr);
+			}
+			if (mask.ipv4->hdr.dst_addr) {
+				mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
+						 spec.ipv4->hdr.dst_addr);
+				mnl_attr_put_u32(nlh,
+						 TCA_FLOWER_KEY_IPV4_DST_MASK,
+						 mask.ipv4->hdr.dst_addr);
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			mask.ipv6 = flow_tcf_item_mask
+				(items, &rte_flow_item_ipv6_mask,
+				 &flow_tcf_mask_supported.ipv6,
+				 &flow_tcf_mask_empty.ipv6,
+				 sizeof(flow_tcf_mask_supported.ipv6),
+				 error);
+			assert(mask.ipv6);
+			if (!eth_type_set || !vlan_eth_type_set)
+				mnl_attr_put_u16(nlh,
+						 vlan_present ?
+						 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
+						 TCA_FLOWER_KEY_ETH_TYPE,
+						 RTE_BE16(ETH_P_IPV6));
+			eth_type_set = 1;
+			vlan_eth_type_set = 1;
+			if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
+				break;
+			spec.ipv6 = items->spec;
+			if (mask.ipv6->hdr.proto) {
+				mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+						spec.ipv6->hdr.proto);
+				ip_proto_set = 1;
+			}
+			if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
+					     sizeof(spec.ipv6->hdr.src_addr),
+					     spec.ipv6->hdr.src_addr);
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+					     sizeof(mask.ipv6->hdr.src_addr),
+					     mask.ipv6->hdr.src_addr);
+			}
+			if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
+					     sizeof(spec.ipv6->hdr.dst_addr),
+					     spec.ipv6->hdr.dst_addr);
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
+					     sizeof(mask.ipv6->hdr.dst_addr),
+					     mask.ipv6->hdr.dst_addr);
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			mask.udp = flow_tcf_item_mask
+				(items, &rte_flow_item_udp_mask,
+				 &flow_tcf_mask_supported.udp,
+				 &flow_tcf_mask_empty.udp,
+				 sizeof(flow_tcf_mask_supported.udp),
+				 error);
+			assert(mask.udp);
+			if (!ip_proto_set)
+				mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+						IPPROTO_UDP);
+			if (mask.udp == &flow_tcf_mask_empty.udp)
+				break;
+			spec.udp = items->spec;
+			if (mask.udp->hdr.src_port) {
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
+						 spec.udp->hdr.src_port);
+				mnl_attr_put_u16(nlh,
+						 TCA_FLOWER_KEY_UDP_SRC_MASK,
+						 mask.udp->hdr.src_port);
+			}
+			if (mask.udp->hdr.dst_port) {
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
+						 spec.udp->hdr.dst_port);
+				mnl_attr_put_u16(nlh,
+						 TCA_FLOWER_KEY_UDP_DST_MASK,
+						 mask.udp->hdr.dst_port);
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			mask.tcp = flow_tcf_item_mask
+				(items, &rte_flow_item_tcp_mask,
+				 &flow_tcf_mask_supported.tcp,
+				 &flow_tcf_mask_empty.tcp,
+				 sizeof(flow_tcf_mask_supported.tcp),
+				 error);
+			assert(mask.tcp);
+			if (!ip_proto_set)
+				mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+						IPPROTO_TCP);
+			if (mask.tcp == &flow_tcf_mask_empty.tcp)
+				break;
+			spec.tcp = items->spec;
+			if (mask.tcp->hdr.src_port) {
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
+						 spec.tcp->hdr.src_port);
+				mnl_attr_put_u16(nlh,
+						 TCA_FLOWER_KEY_TCP_SRC_MASK,
+						 mask.tcp->hdr.src_port);
+			}
+			if (mask.tcp->hdr.dst_port) {
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
+						 spec.tcp->hdr.dst_port);
+				mnl_attr_put_u16(nlh,
+						 TCA_FLOWER_KEY_TCP_DST_MASK,
+						 mask.tcp->hdr.dst_port);
+			}
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  NULL, "item not supported");
+		}
+	}
+	na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
+	na_act_index_cur = 1;
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		struct nlattr *na_act_index;
+		struct nlattr *na_act;
+		unsigned int vlan_act;
+		unsigned int i;
+
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_PORT_ID:
+			conf.port_id = actions->conf;
+			if (conf.port_id->original)
+				i = 0;
+			else
+				for (i = 0; ptoi[i].ifindex; ++i)
+					if (ptoi[i].port_id == conf.port_id->id)
+						break;
+			assert(ptoi[i].ifindex);
+			na_act_index =
+				mnl_attr_nest_start(nlh, na_act_index_cur++);
+			assert(na_act_index);
+			mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
+			na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+			assert(na_act);
+			mnl_attr_put(nlh, TCA_MIRRED_PARMS,
+				     sizeof(struct tc_mirred),
+				     &(struct tc_mirred){
+					.action = TC_ACT_STOLEN,
+					.eaction = TCA_EGRESS_REDIR,
+					.ifindex = ptoi[i].ifindex,
+				     });
+			mnl_attr_nest_end(nlh, na_act);
+			mnl_attr_nest_end(nlh, na_act_index);
+			break;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			na_act_index =
+				mnl_attr_nest_start(nlh, na_act_index_cur++);
+			assert(na_act_index);
+			mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
+			na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+			assert(na_act);
+			mnl_attr_put(nlh, TCA_GACT_PARMS,
+				     sizeof(struct tc_gact),
+				     &(struct tc_gact){
+					.action = TC_ACT_SHOT,
+				     });
+			mnl_attr_nest_end(nlh, na_act);
+			mnl_attr_nest_end(nlh, na_act_index);
+			break;
+		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+			conf.of_push_vlan = NULL;
+			vlan_act = TCA_VLAN_ACT_POP;
+			goto action_of_vlan;
+		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+			conf.of_push_vlan = actions->conf;
+			vlan_act = TCA_VLAN_ACT_PUSH;
+			goto action_of_vlan;
+		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+			conf.of_set_vlan_vid = actions->conf;
+			if (na_vlan_id)
+				goto override_na_vlan_id;
+			vlan_act = TCA_VLAN_ACT_MODIFY;
+			goto action_of_vlan;
+		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
+			conf.of_set_vlan_pcp = actions->conf;
+			if (na_vlan_priority)
+				goto override_na_vlan_priority;
+			vlan_act = TCA_VLAN_ACT_MODIFY;
+			goto action_of_vlan;
+action_of_vlan:
+			na_act_index =
+				mnl_attr_nest_start(nlh, na_act_index_cur++);
+			assert(na_act_index);
+			mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
+			na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+			assert(na_act);
+			mnl_attr_put(nlh, TCA_VLAN_PARMS,
+				     sizeof(struct tc_vlan),
+				     &(struct tc_vlan){
+					.action = TC_ACT_PIPE,
+					.v_action = vlan_act,
+				     });
+			if (vlan_act == TCA_VLAN_ACT_POP) {
+				mnl_attr_nest_end(nlh, na_act);
+				mnl_attr_nest_end(nlh, na_act_index);
+				break;
+			}
+			if (vlan_act == TCA_VLAN_ACT_PUSH)
+				mnl_attr_put_u16(nlh,
+						 TCA_VLAN_PUSH_VLAN_PROTOCOL,
+						 conf.of_push_vlan->ethertype);
+			na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
+			mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
+			na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
+			mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
+			mnl_attr_nest_end(nlh, na_act);
+			mnl_attr_nest_end(nlh, na_act_index);
+			if (actions->type ==
+			    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
+override_na_vlan_id:
+				na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
+				*(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
+					rte_be_to_cpu_16
+					(conf.of_set_vlan_vid->vlan_vid);
+			} else if (actions->type ==
+				   RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
+override_na_vlan_priority:
+				na_vlan_priority->nla_type =
+					TCA_VLAN_PUSH_VLAN_PRIORITY;
+				*(uint8_t *)mnl_attr_get_payload
+					(na_vlan_priority) =
+					conf.of_set_vlan_pcp->vlan_pcp;
+			}
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	assert(na_flower);
+	assert(na_flower_act);
+	mnl_attr_nest_end(nlh, na_flower_act);
+	mnl_attr_nest_end(nlh, na_flower);
+	return 0;
+}
+
+/**
+ * Send Netlink message with acknowledgment.
+ *
+ * @param nl
+ *   Libmnl socket to use.
+ * @param nlh
+ *   Message to send. This function always raises the NLM_F_ACK flag before
+ *   sending.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
+{
+	alignas(struct nlmsghdr)
+	uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
+		    nlh->nlmsg_len - sizeof(*nlh)];
+	uint32_t seq = random();
+	int ret;
+
+	nlh->nlmsg_flags |= NLM_F_ACK;
+	nlh->nlmsg_seq = seq;
+	ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
+	if (ret != -1)
+		ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
+	if (ret != -1)
+		ret = mnl_cb_run
+			(ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
+	if (ret > 0)
+		return 0;
+	rte_errno = errno;
+	return -rte_errno;
+}
+
+/**
+ * Apply flow to E-Switch by sending Netlink message.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to the sub flow.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+	       struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct mnl_socket *nl = priv->mnl_socket;
+	struct mlx5_flow *dev_flow;
+	struct nlmsghdr *nlh;
+
+	dev_flow = LIST_FIRST(&flow->dev_flows);
+	/* E-Switch flow can't be expanded. */
+	assert(!LIST_NEXT(dev_flow, next));
+	nlh = dev_flow->tcf.nlh;
+	nlh->nlmsg_type = RTM_NEWTFILTER;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+	if (!flow_tcf_nl_ack(nl, nlh))
+		return 0;
+	return rte_flow_error_set(error, rte_errno,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				  "netlink: failed to create TC flow rule");
+}
+
+/**
+ * Remove flow from E-Switch by sending Netlink message.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to the sub flow.
+ */
+static void
+flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct mnl_socket *nl = priv->mnl_socket;
+	struct mlx5_flow *dev_flow;
+	struct nlmsghdr *nlh;
+
+	if (!flow)
+		return;
+	dev_flow = LIST_FIRST(&flow->dev_flows);
+	if (!dev_flow)
+		return;
+	/* E-Switch flow can't be expanded. */
+	assert(!LIST_NEXT(dev_flow, next));
+	nlh = dev_flow->tcf.nlh;
+	nlh->nlmsg_type = RTM_DELTFILTER;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+	flow_tcf_nl_ack(nl, nlh);
+}
+
+/**
+ * Remove flow from E-Switch and release resources of the device flow.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to the sub flow.
+ */
+static void
+flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	struct mlx5_flow *dev_flow;
+
+	if (!flow)
+		return;
+	flow_tcf_remove(dev, flow);
+	dev_flow = LIST_FIRST(&flow->dev_flows);
+	if (!dev_flow)
+		return;
+	/* E-Switch flow can't be expanded. */
+	assert(!LIST_NEXT(dev_flow, next));
+	LIST_REMOVE(dev_flow, next);
+	rte_free(dev_flow);
+}
+
+const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
+	.validate = flow_tcf_validate,
+	.prepare = flow_tcf_prepare,
+	.translate = flow_tcf_translate,
+	.apply = flow_tcf_apply,
+	.remove = flow_tcf_remove,
+	.destroy = flow_tcf_destroy,
+};
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param nl
+ *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ * @param ifindex
+ *   Index of network interface to initialize.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
+		   struct rte_flow_error *error)
+{
+	struct nlmsghdr *nlh;
+	struct tcmsg *tcm;
+	alignas(struct nlmsghdr)
+	uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
+
+	/* Destroy existing ingress qdisc and everything attached to it. */
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = RTM_DELQDISC;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = ifindex;
+	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+	tcm->tcm_parent = TC_H_INGRESS;
+	/* Ignore errors when qdisc is already absent. */
+	if (flow_tcf_nl_ack(nl, nlh) &&
+	    rte_errno != EINVAL && rte_errno != ENOENT)
+		return rte_flow_error_set(error, rte_errno,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					  "netlink: failed to remove ingress"
+					  " qdisc");
+	/* Create fresh ingress qdisc. */
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = RTM_NEWQDISC;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = ifindex;
+	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+	tcm->tcm_parent = TC_H_INGRESS;
+	mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
+	if (flow_tcf_nl_ack(nl, nlh))
+		return rte_flow_error_set(error, rte_errno,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					  "netlink: failed to create ingress"
+					  " qdisc");
+	return 0;
+}
+
+/**
+ * Create and configure a libmnl socket for Netlink flow rules.
+ *
+ * @return
+ *   A valid libmnl socket object pointer on success, NULL otherwise and
+ *   rte_errno is set.
+ */
+struct mnl_socket *
+mlx5_flow_tcf_socket_create(void)
+{
+	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+
+	if (nl) {
+		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
+				      sizeof(int));
+		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
+			return nl;
+	}
+	rte_errno = errno;
+	if (nl)
+		mnl_socket_close(nl);
+	return NULL;
+}
+
+/**
+ * Destroy a libmnl socket.
+ *
+ * @param nl
+ *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ */
+void
+mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
+{
+	mnl_socket_close(nl);
+}
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v2 0/3] net/mlx5: migrate Linux TC flower driver to new flow engine
  2018-09-19  7:21 [dpdk-dev] [PATCH 0/3] migrate Linux TC flower driver to new flow engine Yongseok Koh
                   ` (2 preceding siblings ...)
  2018-09-19  7:21 ` [dpdk-dev] [PATCH 3/3] net/mlx5: add Linux TC flower driver for E-Switch flow Yongseok Koh
@ 2018-09-24 19:55 ` Yongseok Koh
  2018-09-24 19:55   ` [dpdk-dev] [PATCH v2 1/3] net/mlx5: add abstraction for multiple flow drivers Yongseok Koh
                     ` (3 more replies)
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
  4 siblings, 4 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 19:55 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Yongseok Koh

This patchset is to migrate the existing E-Switch flow driver on to the new flow
engine. This patchset depends on Ori's new flow engine [1].

[1] http://patches.dpdk.org/project/dpdk/list/?series=1473

v2:
* make changes for the newly introduced meson build.

Yongseok Koh (3):
  net/mlx5: add abstraction for multiple flow drivers
  net/mlx5: remove Netlink flow driver
  net/mlx5: add Linux TC flower driver for E-Switch flow

 drivers/net/mlx5/Makefile          |    2 +-
 drivers/net/mlx5/meson.build       |    2 +-
 drivers/net/mlx5/mlx5.c            |   12 +-
 drivers/net/mlx5/mlx5.h            |   25 -
 drivers/net/mlx5/mlx5_flow.c       |  352 +++++++-
 drivers/net/mlx5/mlx5_flow.h       |   33 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   26 +-
 drivers/net/mlx5/mlx5_flow_tcf.c   | 1608 ++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_verbs.c |   20 +-
 drivers/net/mlx5/mlx5_nl_flow.c    | 1228 ---------------------------
 10 files changed, 1973 insertions(+), 1335 deletions(-)
 create mode 100644 drivers/net/mlx5/mlx5_flow_tcf.c
 delete mode 100644 drivers/net/mlx5/mlx5_nl_flow.c

-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v2 1/3] net/mlx5: add abstraction for multiple flow drivers
  2018-09-24 19:55 ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: migrate Linux TC flower driver to new flow engine Yongseok Koh
@ 2018-09-24 19:55   ` Yongseok Koh
  2018-09-24 19:55   ` [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove Netlink flow driver Yongseok Koh
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 19:55 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Yongseok Koh

Flow engine has to support multiple driver paths. Verbs/DV for NIC flow
steering and Linux TC flower for E-Switch flow steering. In the future,
another flow driver could be added (devX).

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c            |   1 -
 drivers/net/mlx5/mlx5_flow.c       | 348 +++++++++++++++++++++++++++++++++----
 drivers/net/mlx5/mlx5_flow.h       |  17 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |  26 +--
 drivers/net/mlx5/mlx5_flow_verbs.c |  20 +--
 5 files changed, 335 insertions(+), 77 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 9b208109b..2f7d046e0 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1192,7 +1192,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	if (err < 0)
 		goto error;
 	priv->config.flow_prio = err;
-	mlx5_flow_init_driver_ops(eth_dev);
 	/*
 	 * Once the device is added to the list of memory event
 	 * callback, its global MR cache table cannot be expanded
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 2119211f5..54008afa4 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -37,6 +37,23 @@
 extern const struct eth_dev_ops mlx5_dev_ops;
 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
 
+/** Device flow drivers. */
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops;
+#endif
+extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
+
+const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
+
+const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
+	[MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
+#endif
+	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
+	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
+};
+
 enum mlx5_expansion {
 	MLX5_EXPANSION_ROOT,
 	MLX5_EXPANSION_ROOT_OUTER,
@@ -282,9 +299,6 @@ static struct mlx5_flow_tunnel_info tunnels_info[] = {
 	},
 };
 
-/* Holds the nic operations that should be used. */
-struct mlx5_flow_driver_ops nic_ops;
-
 /**
  * Discover the maximum number of priority available.
  *
@@ -1510,6 +1524,284 @@ mlx5_flow_validate_item_mpls(const struct rte_flow_item *item __rte_unused,
 				  " update.");
 }
 
+static int
+flow_null_validate(struct rte_eth_dev *dev __rte_unused,
+		   const struct rte_flow_attr *attr __rte_unused,
+		   const struct rte_flow_item items[] __rte_unused,
+		   const struct rte_flow_action actions[] __rte_unused,
+		   struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
+static struct mlx5_flow *
+flow_null_prepare(const struct rte_flow_attr *attr __rte_unused,
+		  const struct rte_flow_item items[] __rte_unused,
+		  const struct rte_flow_action actions[] __rte_unused,
+		  uint64_t *item_flags __rte_unused,
+		  uint64_t *action_flags __rte_unused,
+		  struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return NULL;
+}
+
+static int
+flow_null_translate(struct rte_eth_dev *dev __rte_unused,
+		    struct mlx5_flow *dev_flow __rte_unused,
+		    const struct rte_flow_attr *attr __rte_unused,
+		    const struct rte_flow_item items[] __rte_unused,
+		    const struct rte_flow_action actions[] __rte_unused,
+		    struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
+static int
+flow_null_apply(struct rte_eth_dev *dev __rte_unused,
+		struct rte_flow *flow __rte_unused,
+		struct rte_flow_error *error __rte_unused)
+{
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
+static void
+flow_null_remove(struct rte_eth_dev *dev __rte_unused,
+		 struct rte_flow *flow __rte_unused)
+{
+}
+
+static void
+flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
+		  struct rte_flow *flow __rte_unused)
+{
+}
+
+/* Void driver to protect from null pointer reference. */
+const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
+	.validate = flow_null_validate,
+	.prepare = flow_null_prepare,
+	.translate = flow_null_translate,
+	.apply = flow_null_apply,
+	.remove = flow_null_remove,
+	.destroy = flow_null_destroy,
+};
+
+/**
+ * Select flow driver type according to flow attributes and device
+ * configuration.
+ *
+ * @param[in] dev
+ *   Pointer to the dev structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ *
+ * @return
+ *   flow driver type if supported, MLX5_FLOW_TYPE_MAX otherwise.
+ */
+static enum mlx5_flow_drv_type
+flow_get_drv_type(struct rte_eth_dev *dev __rte_unused,
+		  const struct rte_flow_attr *attr)
+{
+	struct priv *priv __rte_unused = dev->data->dev_private;
+	enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
+
+	if (!attr->transfer) {
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+		type = priv->config.dv_flow_en ?  MLX5_FLOW_TYPE_DV :
+						  MLX5_FLOW_TYPE_VERBS;
+#else
+		type = MLX5_FLOW_TYPE_VERBS;
+#endif
+	}
+	return type;
+}
+
+#define flow_get_drv_ops(type) flow_drv_ops[type]
+
+/**
+ * Flow driver validation API. This abstracts calling driver specific functions.
+ * The type of flow driver is determined according to flow attributes.
+ *
+ * @param[in] dev
+ *   Pointer to the dev structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static inline int
+flow_drv_validate(struct rte_eth_dev *dev,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item items[],
+		  const struct rte_flow_action actions[],
+		  struct rte_flow_error *error)
+{
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
+
+	fops = flow_get_drv_ops(type);
+	return fops->validate(dev, attr, items, actions, error);
+}
+
+/**
+ * Flow driver preparation API. This abstracts calling driver specific
+ * functions. Parent flow (rte_flow) should have driver type (drv_type). It
+ * calculates the size of memory required for device flow, allocates the memory,
+ * initializes the device flow and returns the pointer.
+ *
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] item_flags
+ *   Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ *   Pointer to bit mask of all actions detected.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Pointer to device flow on success, otherwise NULL and rte_ernno is set.
+ */
+static inline struct mlx5_flow *
+flow_drv_prepare(struct rte_flow *flow,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 uint64_t *item_flags,
+		 uint64_t *action_flags,
+		 struct rte_flow_error *error)
+{
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type type = flow->drv_type;
+
+	assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(type);
+	return fops->prepare(attr, items, actions, item_flags, action_flags,
+			     error);
+}
+
+/**
+ * Flow driver translation API. This abstracts calling driver specific
+ * functions. Parent flow (rte_flow) should have driver type (drv_type). It
+ * translates a generic flow into a driver flow. flow_drv_prepare() must
+ * precede.
+ *
+ *
+ * @param[in] dev
+ *   Pointer to the rte dev structure.
+ * @param[in, out] dev_flow
+ *   Pointer to the mlx5 flow.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static inline int
+flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
+
+	assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(type);
+	return fops->translate(dev, dev_flow, attr, items, actions, error);
+}
+
+/**
+ * Flow driver apply API. This abstracts calling driver specific functions.
+ * Parent flow (rte_flow) should have driver type (drv_type). It applies
+ * translated driver flows on to device. flow_drv_translate() must precede.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static inline int
+flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+	       struct rte_flow_error *error)
+{
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type type = flow->drv_type;
+
+	assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(type);
+	return fops->apply(dev, flow, error);
+}
+
+/**
+ * Flow driver remove API. This abstracts calling driver specific functions.
+ * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
+ * on device. All the resources of the flow should be freed by calling
+ * flow_dv_destroy().
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ */
+static inline void
+flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type type = flow->drv_type;
+
+	assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(type);
+	fops->remove(dev, flow);
+}
+
+/**
+ * Flow driver destroy API. This abstracts calling driver specific functions.
+ * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
+ * on device and releases resources of the flow.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ */
+static inline void
+flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	const struct mlx5_flow_driver_ops *fops;
+	enum mlx5_flow_drv_type type = flow->drv_type;
+
+	assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+	fops = flow_get_drv_ops(type);
+	fops->destroy(dev, flow);
+}
+
 /**
  * Validate a flow supported by the NIC.
  *
@@ -1525,7 +1817,7 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
 {
 	int ret;
 
-	ret =  nic_ops.validate(dev, attr, items, actions, error);
+	ret = flow_drv_validate(dev, attr, items, actions, error);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -1615,7 +1907,7 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 	uint32_t i;
 	uint32_t flow_size;
 
-	ret = mlx5_flow_validate(dev, attr, items, actions, error);
+	ret = flow_drv_validate(dev, attr, items, actions, error);
 	if (ret < 0)
 		return NULL;
 	flow_size = sizeof(struct rte_flow);
@@ -1626,6 +1918,9 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 	else
 		flow_size += RTE_ALIGN_CEIL(sizeof(uint16_t), sizeof(void *));
 	flow = rte_calloc(__func__, 1, flow_size, 0);
+	flow->drv_type = flow_get_drv_type(dev, attr);
+	assert(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
+	       flow->drv_type < MLX5_FLOW_TYPE_MAX);
 	flow->queue = (void *)(flow + 1);
 	LIST_INIT(&flow->dev_flows);
 	if (rss && rss->types) {
@@ -1643,21 +1938,21 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 		buf->entry[0].pattern = (void *)(uintptr_t)items;
 	}
 	for (i = 0; i < buf->entries; ++i) {
-		dev_flow = nic_ops.prepare(attr, buf->entry[i].pattern,
-					   actions, &item_flags,
-					   &action_flags, error);
+		dev_flow = flow_drv_prepare(flow, attr, buf->entry[i].pattern,
+					    actions, &item_flags, &action_flags,
+					    error);
 		if (!dev_flow)
 			goto error;
 		dev_flow->flow = flow;
 		LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
-		ret = nic_ops.translate(dev, dev_flow, attr,
-					buf->entry[i].pattern,
-					actions, error);
+		ret = flow_drv_translate(dev, dev_flow, attr,
+					 buf->entry[i].pattern,
+					 actions, error);
 		if (ret < 0)
 			goto error;
 	}
 	if (dev->data->dev_started) {
-		ret = nic_ops.apply(dev, flow, error);
+		ret = flow_drv_apply(dev, flow, error);
 		if (ret < 0)
 			goto error;
 	}
@@ -1667,7 +1962,7 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 error:
 	ret = rte_errno; /* Save rte_errno before cleanup. */
 	assert(flow);
-	nic_ops.destroy(dev, flow);
+	flow_drv_destroy(dev, flow);
 	rte_free(flow);
 	rte_errno = ret; /* Restore rte_errno. */
 	return NULL;
@@ -1705,7 +2000,7 @@ static void
 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
 		       struct rte_flow *flow)
 {
-	nic_ops.destroy(dev, flow);
+	flow_drv_destroy(dev, flow);
 	TAILQ_REMOVE(list, flow, next);
 	/*
 	 * Update RX queue flags only if port is started, otherwise it is
@@ -1749,7 +2044,7 @@ mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
 	struct rte_flow *flow;
 
 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
-		nic_ops.remove(dev, flow);
+		flow_drv_remove(dev, flow);
 	mlx5_flow_rxq_flags_clear(dev);
 }
 
@@ -1772,7 +2067,7 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
 	int ret = 0;
 
 	TAILQ_FOREACH(flow, list, next) {
-		ret = nic_ops.apply(dev, flow, &error);
+		ret = flow_drv_apply(dev, flow, &error);
 		if (ret < 0)
 			goto error;
 		mlx5_flow_rxq_flags_set(dev, flow);
@@ -2463,24 +2758,3 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
 	}
 	return 0;
 }
-
-/**
- * Init the driver ops structure.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- */
-void
-mlx5_flow_init_driver_ops(struct rte_eth_dev *dev)
-{
-	struct priv *priv __rte_unused = dev->data->dev_private;
-
-#ifdef HAVE_IBV_FLOW_DV_SUPPORT
-	if (priv->config.dv_flow_en)
-		mlx5_flow_dv_get_driver_ops(&nic_ops);
-	else
-		mlx5_flow_verbs_get_driver_ops(&nic_ops);
-#else
-	mlx5_flow_verbs_get_driver_ops(&nic_ops);
-#endif
-}
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 53c0eeb56..2bc3bee8c 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -128,6 +128,13 @@
 /* Max number of actions per DV flow. */
 #define MLX5_DV_MAX_NUMBER_OF_ACTIONS 8
 
+enum mlx5_flow_drv_type {
+	MLX5_FLOW_TYPE_MIN,
+	MLX5_FLOW_TYPE_DV,
+	MLX5_FLOW_TYPE_VERBS,
+	MLX5_FLOW_TYPE_MAX,
+};
+
 /* Matcher PRM representation */
 struct mlx5_flow_dv_match_params {
 	size_t size;
@@ -210,7 +217,7 @@ struct mlx5_flow_counter {
 /* Flow structure. */
 struct rte_flow {
 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
-	struct rte_flow_attr attributes; /**< User flow attribute. */
+	enum mlx5_flow_drv_type drv_type; /**< Drvier type. */
 	uint32_t layers;
 	/**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
 	struct mlx5_flow_counter *counter; /**< Holds flow counter. */
@@ -314,13 +321,5 @@ int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
 				      uint64_t item_flags,
 				      struct rte_eth_dev *dev,
 				      struct rte_flow_error *error);
-void mlx5_flow_init_driver_ops(struct rte_eth_dev *dev);
-
-/* mlx5_flow_dv.c */
-void mlx5_flow_dv_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops);
-
-/* mlx5_flow_verbs.c */
-
-void mlx5_flow_verbs_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops);
 
 #endif /* RTE_PMD_MLX5_FLOW_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 71af410b2..cf663cdb8 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1351,23 +1351,13 @@ flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
 	}
 }
 
-/**
- * Fills the flow_ops with the function pointers.
- *
- * @param[out] flow_ops
- *   Pointer to driver_ops structure.
- */
-void
-mlx5_flow_dv_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops)
-{
-	*flow_ops = (struct mlx5_flow_driver_ops) {
-		.validate = flow_dv_validate,
-		.prepare = flow_dv_prepare,
-		.translate = flow_dv_translate,
-		.apply = flow_dv_apply,
-		.remove = flow_dv_remove,
-		.destroy = flow_dv_destroy,
-	};
-}
+const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
+	.validate = flow_dv_validate,
+	.prepare = flow_dv_prepare,
+	.translate = flow_dv_translate,
+	.apply = flow_dv_apply,
+	.remove = flow_dv_remove,
+	.destroy = flow_dv_destroy,
+};
 
 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index f4a264232..05ab5fdad 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -1638,15 +1638,11 @@ flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
 	return -rte_errno;
 }
 
-void
-mlx5_flow_verbs_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops)
-{
-	*flow_ops = (struct mlx5_flow_driver_ops) {
-		.validate = flow_verbs_validate,
-		.prepare = flow_verbs_prepare,
-		.translate = flow_verbs_translate,
-		.apply = flow_verbs_apply,
-		.remove = flow_verbs_remove,
-		.destroy = flow_verbs_destroy,
-	};
-}
+const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
+	.validate = flow_verbs_validate,
+	.prepare = flow_verbs_prepare,
+	.translate = flow_verbs_translate,
+	.apply = flow_verbs_apply,
+	.remove = flow_verbs_remove,
+	.destroy = flow_verbs_destroy,
+};
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove Netlink flow driver
  2018-09-24 19:55 ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: migrate Linux TC flower driver to new flow engine Yongseok Koh
  2018-09-24 19:55   ` [dpdk-dev] [PATCH v2 1/3] net/mlx5: add abstraction for multiple flow drivers Yongseok Koh
@ 2018-09-24 19:55   ` Yongseok Koh
  2018-09-24 19:55   ` [dpdk-dev] [PATCH v2 3/3] net/mlx5: add Linux TC flower driver for E-Switch flow Yongseok Koh
  2018-10-04 16:16   ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: migrate Linux TC flower driver to new flow engine Thomas Monjalon
  3 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 19:55 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Yongseok Koh

Netlink based E-Switch flow engine will be migrated to the new flow engine.
nl_flow will be renamed to flow_tcf as it goes through Linux TC flower
interface.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/Makefile       |    1 -
 drivers/net/mlx5/meson.build    |    1 -
 drivers/net/mlx5/mlx5.c         |   32 -
 drivers/net/mlx5/mlx5.h         |   25 -
 drivers/net/mlx5/mlx5_nl_flow.c | 1228 ---------------------------------------
 5 files changed, 1287 deletions(-)
 delete mode 100644 drivers/net/mlx5/mlx5_nl_flow.c

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 4243b37ca..9c1044808 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -35,7 +35,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_dv.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
-SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl_flow.c
 
 ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
 INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE)
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index 3d09ece4f..e5376291c 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -36,7 +36,6 @@ if build
 		'mlx5_mac.c',
 		'mlx5_mr.c',
 		'mlx5_nl.c',
-		'mlx5_nl_flow.c',
 		'mlx5_rss.c',
 		'mlx5_rxmode.c',
 		'mlx5_rxq.c',
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 2f7d046e0..bb9a63fba 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -286,8 +286,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		close(priv->nl_socket_route);
 	if (priv->nl_socket_rdma >= 0)
 		close(priv->nl_socket_rdma);
-	if (priv->mnl_socket)
-		mlx5_nl_flow_socket_destroy(priv->mnl_socket);
 	ret = mlx5_hrxq_ibv_verify(dev);
 	if (ret)
 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1137,34 +1135,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 	if (vf && config.vf_nl_en)
 		mlx5_nl_mac_addr_sync(eth_dev);
-	priv->mnl_socket = mlx5_nl_flow_socket_create();
-	if (!priv->mnl_socket) {
-		err = -rte_errno;
-		DRV_LOG(WARNING,
-			"flow rules relying on switch offloads will not be"
-			" supported: cannot open libmnl socket: %s",
-			strerror(rte_errno));
-	} else {
-		struct rte_flow_error error;
-		unsigned int ifindex = mlx5_ifindex(eth_dev);
-
-		if (!ifindex) {
-			err = -rte_errno;
-			error.message =
-				"cannot retrieve network interface index";
-		} else {
-			err = mlx5_nl_flow_init(priv->mnl_socket, ifindex,
-						&error);
-		}
-		if (err) {
-			DRV_LOG(WARNING,
-				"flow rules relying on switch offloads will"
-				" not be supported: %s: %s",
-				error.message, strerror(rte_errno));
-			mlx5_nl_flow_socket_destroy(priv->mnl_socket);
-			priv->mnl_socket = NULL;
-		}
-	}
 	TAILQ_INIT(&priv->flows);
 	TAILQ_INIT(&priv->ctrl_flows);
 	/* Hint libmlx5 to use PMD allocator for data plane resources */
@@ -1217,8 +1187,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			close(priv->nl_socket_route);
 		if (priv->nl_socket_rdma >= 0)
 			close(priv->nl_socket_rdma);
-		if (priv->mnl_socket)
-			mlx5_nl_flow_socket_destroy(priv->mnl_socket);
 		if (own_domain_id)
 			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
 		rte_free(priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 8bb619d9e..8de0d74ce 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -158,12 +158,6 @@ struct mlx5_drop {
 	struct mlx5_rxq_ibv *rxq; /* Verbs Rx queue. */
 };
 
-/** DPDK port to network interface index (ifindex) conversion. */
-struct mlx5_nl_flow_ptoi {
-	uint16_t port_id; /**< DPDK port ID. */
-	unsigned int ifindex; /**< Network interface index. */
-};
-
 struct mnl_socket;
 
 struct priv {
@@ -399,23 +393,4 @@ unsigned int mlx5_nl_ifindex(int nl, const char *name);
 int mlx5_nl_switch_info(int nl, unsigned int ifindex,
 			struct mlx5_switch_info *info);
 
-/* mlx5_nl_flow.c */
-
-int mlx5_nl_flow_transpose(void *buf,
-			   size_t size,
-			   const struct mlx5_nl_flow_ptoi *ptoi,
-			   const struct rte_flow_attr *attr,
-			   const struct rte_flow_item *pattern,
-			   const struct rte_flow_action *actions,
-			   struct rte_flow_error *error);
-void mlx5_nl_flow_brand(void *buf, uint32_t handle);
-int mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
-			struct rte_flow_error *error);
-int mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
-			 struct rte_flow_error *error);
-int mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
-		      struct rte_flow_error *error);
-struct mnl_socket *mlx5_nl_flow_socket_create(void);
-void mlx5_nl_flow_socket_destroy(struct mnl_socket *nl);
-
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_nl_flow.c b/drivers/net/mlx5/mlx5_nl_flow.c
deleted file mode 100644
index beb03c911..000000000
--- a/drivers/net/mlx5/mlx5_nl_flow.c
+++ /dev/null
@@ -1,1228 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018 6WIND S.A.
- * Copyright 2018 Mellanox Technologies, Ltd
- */
-
-#include <assert.h>
-#include <errno.h>
-#include <libmnl/libmnl.h>
-#include <linux/if_ether.h>
-#include <linux/netlink.h>
-#include <linux/pkt_cls.h>
-#include <linux/pkt_sched.h>
-#include <linux/rtnetlink.h>
-#include <linux/tc_act/tc_gact.h>
-#include <linux/tc_act/tc_mirred.h>
-#include <netinet/in.h>
-#include <stdalign.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-
-#include <rte_byteorder.h>
-#include <rte_errno.h>
-#include <rte_ether.h>
-#include <rte_flow.h>
-
-#include "mlx5.h"
-#include "mlx5_autoconf.h"
-
-#ifdef HAVE_TC_ACT_VLAN
-
-#include <linux/tc_act/tc_vlan.h>
-
-#else /* HAVE_TC_ACT_VLAN */
-
-#define TCA_VLAN_ACT_POP 1
-#define TCA_VLAN_ACT_PUSH 2
-#define TCA_VLAN_ACT_MODIFY 3
-#define TCA_VLAN_PARMS 2
-#define TCA_VLAN_PUSH_VLAN_ID 3
-#define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
-#define TCA_VLAN_PAD 5
-#define TCA_VLAN_PUSH_VLAN_PRIORITY 6
-
-struct tc_vlan {
-	tc_gen;
-	int v_action;
-};
-
-#endif /* HAVE_TC_ACT_VLAN */
-
-/* Normally found in linux/netlink.h. */
-#ifndef NETLINK_CAP_ACK
-#define NETLINK_CAP_ACK 10
-#endif
-
-/* Normally found in linux/pkt_sched.h. */
-#ifndef TC_H_MIN_INGRESS
-#define TC_H_MIN_INGRESS 0xfff2u
-#endif
-
-/* Normally found in linux/pkt_cls.h. */
-#ifndef TCA_CLS_FLAGS_SKIP_SW
-#define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
-#endif
-#ifndef HAVE_TCA_FLOWER_ACT
-#define TCA_FLOWER_ACT 3
-#endif
-#ifndef HAVE_TCA_FLOWER_FLAGS
-#define TCA_FLOWER_FLAGS 22
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
-#define TCA_FLOWER_KEY_ETH_TYPE 8
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
-#define TCA_FLOWER_KEY_ETH_DST 4
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
-#define TCA_FLOWER_KEY_ETH_DST_MASK 5
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
-#define TCA_FLOWER_KEY_ETH_SRC 6
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
-#define TCA_FLOWER_KEY_ETH_SRC_MASK 7
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
-#define TCA_FLOWER_KEY_IP_PROTO 9
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
-#define TCA_FLOWER_KEY_IPV4_SRC 10
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
-#define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
-#define TCA_FLOWER_KEY_IPV4_DST 12
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
-#define TCA_FLOWER_KEY_IPV4_DST_MASK 13
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
-#define TCA_FLOWER_KEY_IPV6_SRC 14
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
-#define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
-#define TCA_FLOWER_KEY_IPV6_DST 16
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
-#define TCA_FLOWER_KEY_IPV6_DST_MASK 17
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
-#define TCA_FLOWER_KEY_TCP_SRC 18
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
-#define TCA_FLOWER_KEY_TCP_SRC_MASK 35
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
-#define TCA_FLOWER_KEY_TCP_DST 19
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
-#define TCA_FLOWER_KEY_TCP_DST_MASK 36
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
-#define TCA_FLOWER_KEY_UDP_SRC 20
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
-#define TCA_FLOWER_KEY_UDP_SRC_MASK 37
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
-#define TCA_FLOWER_KEY_UDP_DST 21
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
-#define TCA_FLOWER_KEY_UDP_DST_MASK 38
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
-#define TCA_FLOWER_KEY_VLAN_ID 23
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
-#define TCA_FLOWER_KEY_VLAN_PRIO 24
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
-#define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
-#endif
-
-/** Parser state definitions for mlx5_nl_flow_trans[]. */
-enum mlx5_nl_flow_trans {
-	INVALID,
-	BACK,
-	ATTR,
-	PATTERN,
-	ITEM_VOID,
-	ITEM_PORT_ID,
-	ITEM_ETH,
-	ITEM_VLAN,
-	ITEM_IPV4,
-	ITEM_IPV6,
-	ITEM_TCP,
-	ITEM_UDP,
-	ACTIONS,
-	ACTION_VOID,
-	ACTION_PORT_ID,
-	ACTION_DROP,
-	ACTION_OF_POP_VLAN,
-	ACTION_OF_PUSH_VLAN,
-	ACTION_OF_SET_VLAN_VID,
-	ACTION_OF_SET_VLAN_PCP,
-	END,
-};
-
-#define TRANS(...) (const enum mlx5_nl_flow_trans []){ __VA_ARGS__, INVALID, }
-
-#define PATTERN_COMMON \
-	ITEM_VOID, ITEM_PORT_ID, ACTIONS
-#define ACTIONS_COMMON \
-	ACTION_VOID, ACTION_OF_POP_VLAN, ACTION_OF_PUSH_VLAN, \
-	ACTION_OF_SET_VLAN_VID, ACTION_OF_SET_VLAN_PCP
-#define ACTIONS_FATE \
-	ACTION_PORT_ID, ACTION_DROP
-
-/** Parser state transitions used by mlx5_nl_flow_transpose(). */
-static const enum mlx5_nl_flow_trans *const mlx5_nl_flow_trans[] = {
-	[INVALID] = NULL,
-	[BACK] = NULL,
-	[ATTR] = TRANS(PATTERN),
-	[PATTERN] = TRANS(ITEM_ETH, PATTERN_COMMON),
-	[ITEM_VOID] = TRANS(BACK),
-	[ITEM_PORT_ID] = TRANS(BACK),
-	[ITEM_ETH] = TRANS(ITEM_IPV4, ITEM_IPV6, ITEM_VLAN, PATTERN_COMMON),
-	[ITEM_VLAN] = TRANS(ITEM_IPV4, ITEM_IPV6, PATTERN_COMMON),
-	[ITEM_IPV4] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
-	[ITEM_IPV6] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
-	[ITEM_TCP] = TRANS(PATTERN_COMMON),
-	[ITEM_UDP] = TRANS(PATTERN_COMMON),
-	[ACTIONS] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-	[ACTION_VOID] = TRANS(BACK),
-	[ACTION_PORT_ID] = TRANS(ACTION_VOID, END),
-	[ACTION_DROP] = TRANS(ACTION_VOID, END),
-	[ACTION_OF_POP_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-	[ACTION_OF_PUSH_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-	[ACTION_OF_SET_VLAN_VID] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-	[ACTION_OF_SET_VLAN_PCP] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-	[END] = NULL,
-};
-
-/** Empty masks for known item types. */
-static const union {
-	struct rte_flow_item_port_id port_id;
-	struct rte_flow_item_eth eth;
-	struct rte_flow_item_vlan vlan;
-	struct rte_flow_item_ipv4 ipv4;
-	struct rte_flow_item_ipv6 ipv6;
-	struct rte_flow_item_tcp tcp;
-	struct rte_flow_item_udp udp;
-} mlx5_nl_flow_mask_empty;
-
-/** Supported masks for known item types. */
-static const struct {
-	struct rte_flow_item_port_id port_id;
-	struct rte_flow_item_eth eth;
-	struct rte_flow_item_vlan vlan;
-	struct rte_flow_item_ipv4 ipv4;
-	struct rte_flow_item_ipv6 ipv6;
-	struct rte_flow_item_tcp tcp;
-	struct rte_flow_item_udp udp;
-} mlx5_nl_flow_mask_supported = {
-	.port_id = {
-		.id = 0xffffffff,
-	},
-	.eth = {
-		.type = RTE_BE16(0xffff),
-		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
-		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
-	},
-	.vlan = {
-		/* PCP and VID only, no DEI. */
-		.tci = RTE_BE16(0xefff),
-		.inner_type = RTE_BE16(0xffff),
-	},
-	.ipv4.hdr = {
-		.next_proto_id = 0xff,
-		.src_addr = RTE_BE32(0xffffffff),
-		.dst_addr = RTE_BE32(0xffffffff),
-	},
-	.ipv6.hdr = {
-		.proto = 0xff,
-		.src_addr =
-			"\xff\xff\xff\xff\xff\xff\xff\xff"
-			"\xff\xff\xff\xff\xff\xff\xff\xff",
-		.dst_addr =
-			"\xff\xff\xff\xff\xff\xff\xff\xff"
-			"\xff\xff\xff\xff\xff\xff\xff\xff",
-	},
-	.tcp.hdr = {
-		.src_port = RTE_BE16(0xffff),
-		.dst_port = RTE_BE16(0xffff),
-	},
-	.udp.hdr = {
-		.src_port = RTE_BE16(0xffff),
-		.dst_port = RTE_BE16(0xffff),
-	},
-};
-
-/**
- * Retrieve mask for pattern item.
- *
- * This function does basic sanity checks on a pattern item in order to
- * return the most appropriate mask for it.
- *
- * @param[in] item
- *   Item specification.
- * @param[in] mask_default
- *   Default mask for pattern item as specified by the flow API.
- * @param[in] mask_supported
- *   Mask fields supported by the implementation.
- * @param[in] mask_empty
- *   Empty mask to return when there is no specification.
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   Either @p item->mask or one of the mask parameters on success, NULL
- *   otherwise and rte_errno is set.
- */
-static const void *
-mlx5_nl_flow_item_mask(const struct rte_flow_item *item,
-		       const void *mask_default,
-		       const void *mask_supported,
-		       const void *mask_empty,
-		       size_t mask_size,
-		       struct rte_flow_error *error)
-{
-	const uint8_t *mask;
-	size_t i;
-
-	/* item->last and item->mask cannot exist without item->spec. */
-	if (!item->spec && (item->mask || item->last)) {
-		rte_flow_error_set
-			(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
-			 "\"mask\" or \"last\" field provided without a"
-			 " corresponding \"spec\"");
-		return NULL;
-	}
-	/* No spec, no mask, no problem. */
-	if (!item->spec)
-		return mask_empty;
-	mask = item->mask ? item->mask : mask_default;
-	assert(mask);
-	/*
-	 * Single-pass check to make sure that:
-	 * - Mask is supported, no bits are set outside mask_supported.
-	 * - Both item->spec and item->last are included in mask.
-	 */
-	for (i = 0; i != mask_size; ++i) {
-		if (!mask[i])
-			continue;
-		if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
-		    ((const uint8_t *)mask_supported)[i]) {
-			rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-				 mask, "unsupported field found in \"mask\"");
-			return NULL;
-		}
-		if (item->last &&
-		    (((const uint8_t *)item->spec)[i] & mask[i]) !=
-		    (((const uint8_t *)item->last)[i] & mask[i])) {
-			rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_LAST,
-				 item->last,
-				 "range between \"spec\" and \"last\" not"
-				 " comprised in \"mask\"");
-			return NULL;
-		}
-	}
-	return mask;
-}
-
-/**
- * Transpose flow rule description to rtnetlink message.
- *
- * This function transposes a flow rule description to a traffic control
- * (TC) filter creation message ready to be sent over Netlink.
- *
- * Target interface is specified as the first entry of the @p ptoi table.
- * Subsequent entries enable this function to resolve other DPDK port IDs
- * found in the flow rule.
- *
- * @param[out] buf
- *   Output message buffer. May be NULL when @p size is 0.
- * @param size
- *   Size of @p buf. Message may be truncated if not large enough.
- * @param[in] ptoi
- *   DPDK port ID to network interface index translation table. This table
- *   is terminated by an entry with a zero ifindex value.
- * @param[in] attr
- *   Flow rule attributes.
- * @param[in] pattern
- *   Pattern specification.
- * @param[in] actions
- *   Associated actions.
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   A positive value representing the exact size of the message in bytes
- *   regardless of the @p size parameter on success, a negative errno value
- *   otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_transpose(void *buf,
-		       size_t size,
-		       const struct mlx5_nl_flow_ptoi *ptoi,
-		       const struct rte_flow_attr *attr,
-		       const struct rte_flow_item *pattern,
-		       const struct rte_flow_action *actions,
-		       struct rte_flow_error *error)
-{
-	alignas(struct nlmsghdr)
-	uint8_t buf_tmp[mnl_nlmsg_size(sizeof(struct tcmsg) + 1024)];
-	const struct rte_flow_item *item;
-	const struct rte_flow_action *action;
-	unsigned int n;
-	uint32_t act_index_cur;
-	bool in_port_id_set;
-	bool eth_type_set;
-	bool vlan_present;
-	bool vlan_eth_type_set;
-	bool ip_proto_set;
-	struct nlattr *na_flower;
-	struct nlattr *na_flower_act;
-	struct nlattr *na_vlan_id;
-	struct nlattr *na_vlan_priority;
-	const enum mlx5_nl_flow_trans *trans;
-	const enum mlx5_nl_flow_trans *back;
-
-	if (!size)
-		goto error_nobufs;
-init:
-	item = pattern;
-	action = actions;
-	n = 0;
-	act_index_cur = 0;
-	in_port_id_set = false;
-	eth_type_set = false;
-	vlan_present = false;
-	vlan_eth_type_set = false;
-	ip_proto_set = false;
-	na_flower = NULL;
-	na_flower_act = NULL;
-	na_vlan_id = NULL;
-	na_vlan_priority = NULL;
-	trans = TRANS(ATTR);
-	back = trans;
-trans:
-	switch (trans[n++]) {
-		union {
-			const struct rte_flow_item_port_id *port_id;
-			const struct rte_flow_item_eth *eth;
-			const struct rte_flow_item_vlan *vlan;
-			const struct rte_flow_item_ipv4 *ipv4;
-			const struct rte_flow_item_ipv6 *ipv6;
-			const struct rte_flow_item_tcp *tcp;
-			const struct rte_flow_item_udp *udp;
-		} spec, mask;
-		union {
-			const struct rte_flow_action_port_id *port_id;
-			const struct rte_flow_action_of_push_vlan *of_push_vlan;
-			const struct rte_flow_action_of_set_vlan_vid *
-				of_set_vlan_vid;
-			const struct rte_flow_action_of_set_vlan_pcp *
-				of_set_vlan_pcp;
-		} conf;
-		struct nlmsghdr *nlh;
-		struct tcmsg *tcm;
-		struct nlattr *act_index;
-		struct nlattr *act;
-		unsigned int i;
-
-	case INVALID:
-		if (item->type)
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
-				 item, "unsupported pattern item combination");
-		else if (action->type)
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
-				 action, "unsupported action combination");
-		return rte_flow_error_set
-			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-			 "flow rule lacks some kind of fate action");
-	case BACK:
-		trans = back;
-		n = 0;
-		goto trans;
-	case ATTR:
-		/*
-		 * Supported attributes: no groups, some priorities and
-		 * ingress only. Don't care about transfer as it is the
-		 * caller's problem.
-		 */
-		if (attr->group)
-			return rte_flow_error_set
-				(error, ENOTSUP,
-				 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
-				 attr, "groups are not supported");
-		if (attr->priority > 0xfffe)
-			return rte_flow_error_set
-				(error, ENOTSUP,
-				 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
-				 attr, "lowest priority level is 0xfffe");
-		if (!attr->ingress)
-			return rte_flow_error_set
-				(error, ENOTSUP,
-				 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
-				 attr, "only ingress is supported");
-		if (attr->egress)
-			return rte_flow_error_set
-				(error, ENOTSUP,
-				 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
-				 attr, "egress is not supported");
-		if (size < mnl_nlmsg_size(sizeof(*tcm)))
-			goto error_nobufs;
-		nlh = mnl_nlmsg_put_header(buf);
-		nlh->nlmsg_type = 0;
-		nlh->nlmsg_flags = 0;
-		nlh->nlmsg_seq = 0;
-		tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
-		tcm->tcm_family = AF_UNSPEC;
-		tcm->tcm_ifindex = ptoi[0].ifindex;
-		/*
-		 * Let kernel pick a handle by default. A predictable handle
-		 * can be set by the caller on the resulting buffer through
-		 * mlx5_nl_flow_brand().
-		 */
-		tcm->tcm_handle = 0;
-		tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
-		/*
-		 * Priority cannot be zero to prevent the kernel from
-		 * picking one automatically.
-		 */
-		tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
-					  RTE_BE16(ETH_P_ALL));
-		break;
-	case PATTERN:
-		if (!mnl_attr_put_strz_check(buf, size, TCA_KIND, "flower"))
-			goto error_nobufs;
-		na_flower = mnl_attr_nest_start_check(buf, size, TCA_OPTIONS);
-		if (!na_flower)
-			goto error_nobufs;
-		if (!mnl_attr_put_u32_check(buf, size, TCA_FLOWER_FLAGS,
-					    TCA_CLS_FLAGS_SKIP_SW))
-			goto error_nobufs;
-		break;
-	case ITEM_VOID:
-		if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
-			goto trans;
-		++item;
-		break;
-	case ITEM_PORT_ID:
-		if (item->type != RTE_FLOW_ITEM_TYPE_PORT_ID)
-			goto trans;
-		mask.port_id = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_port_id_mask,
-			 &mlx5_nl_flow_mask_supported.port_id,
-			 &mlx5_nl_flow_mask_empty.port_id,
-			 sizeof(mlx5_nl_flow_mask_supported.port_id), error);
-		if (!mask.port_id)
-			return -rte_errno;
-		if (mask.port_id == &mlx5_nl_flow_mask_empty.port_id) {
-			in_port_id_set = 1;
-			++item;
-			break;
-		}
-		spec.port_id = item->spec;
-		if (mask.port_id->id && mask.port_id->id != 0xffffffff)
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-				 mask.port_id,
-				 "no support for partial mask on"
-				 " \"id\" field");
-		if (!mask.port_id->id)
-			i = 0;
-		else
-			for (i = 0; ptoi[i].ifindex; ++i)
-				if (ptoi[i].port_id == spec.port_id->id)
-					break;
-		if (!ptoi[i].ifindex)
-			return rte_flow_error_set
-				(error, ENODEV, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
-				 spec.port_id,
-				 "missing data to convert port ID to ifindex");
-		tcm = mnl_nlmsg_get_payload(buf);
-		if (in_port_id_set &&
-		    ptoi[i].ifindex != (unsigned int)tcm->tcm_ifindex)
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
-				 spec.port_id,
-				 "cannot match traffic for several port IDs"
-				 " through a single flow rule");
-		tcm->tcm_ifindex = ptoi[i].ifindex;
-		in_port_id_set = 1;
-		++item;
-		break;
-	case ITEM_ETH:
-		if (item->type != RTE_FLOW_ITEM_TYPE_ETH)
-			goto trans;
-		mask.eth = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_eth_mask,
-			 &mlx5_nl_flow_mask_supported.eth,
-			 &mlx5_nl_flow_mask_empty.eth,
-			 sizeof(mlx5_nl_flow_mask_supported.eth), error);
-		if (!mask.eth)
-			return -rte_errno;
-		if (mask.eth == &mlx5_nl_flow_mask_empty.eth) {
-			++item;
-			break;
-		}
-		spec.eth = item->spec;
-		if (mask.eth->type && mask.eth->type != RTE_BE16(0xffff))
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-				 mask.eth,
-				 "no support for partial mask on"
-				 " \"type\" field");
-		if (mask.eth->type) {
-			if (!mnl_attr_put_u16_check(buf, size,
-						    TCA_FLOWER_KEY_ETH_TYPE,
-						    spec.eth->type))
-				goto error_nobufs;
-			eth_type_set = 1;
-		}
-		if ((!is_zero_ether_addr(&mask.eth->dst) &&
-		     (!mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_ETH_DST,
-					  ETHER_ADDR_LEN,
-					  spec.eth->dst.addr_bytes) ||
-		      !mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_ETH_DST_MASK,
-					  ETHER_ADDR_LEN,
-					  mask.eth->dst.addr_bytes))) ||
-		    (!is_zero_ether_addr(&mask.eth->src) &&
-		     (!mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_ETH_SRC,
-					  ETHER_ADDR_LEN,
-					  spec.eth->src.addr_bytes) ||
-		      !mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_ETH_SRC_MASK,
-					  ETHER_ADDR_LEN,
-					  mask.eth->src.addr_bytes))))
-			goto error_nobufs;
-		++item;
-		break;
-	case ITEM_VLAN:
-		if (item->type != RTE_FLOW_ITEM_TYPE_VLAN)
-			goto trans;
-		mask.vlan = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_vlan_mask,
-			 &mlx5_nl_flow_mask_supported.vlan,
-			 &mlx5_nl_flow_mask_empty.vlan,
-			 sizeof(mlx5_nl_flow_mask_supported.vlan), error);
-		if (!mask.vlan)
-			return -rte_errno;
-		if (!eth_type_set &&
-		    !mnl_attr_put_u16_check(buf, size,
-					    TCA_FLOWER_KEY_ETH_TYPE,
-					    RTE_BE16(ETH_P_8021Q)))
-			goto error_nobufs;
-		eth_type_set = 1;
-		vlan_present = 1;
-		if (mask.vlan == &mlx5_nl_flow_mask_empty.vlan) {
-			++item;
-			break;
-		}
-		spec.vlan = item->spec;
-		if ((mask.vlan->tci & RTE_BE16(0xe000) &&
-		     (mask.vlan->tci & RTE_BE16(0xe000)) != RTE_BE16(0xe000)) ||
-		    (mask.vlan->tci & RTE_BE16(0x0fff) &&
-		     (mask.vlan->tci & RTE_BE16(0x0fff)) != RTE_BE16(0x0fff)) ||
-		    (mask.vlan->inner_type &&
-		     mask.vlan->inner_type != RTE_BE16(0xffff)))
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-				 mask.vlan,
-				 "no support for partial masks on"
-				 " \"tci\" (PCP and VID parts) and"
-				 " \"inner_type\" fields");
-		if (mask.vlan->inner_type) {
-			if (!mnl_attr_put_u16_check
-			    (buf, size, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
-			     spec.vlan->inner_type))
-				goto error_nobufs;
-			vlan_eth_type_set = 1;
-		}
-		if ((mask.vlan->tci & RTE_BE16(0xe000) &&
-		     !mnl_attr_put_u8_check
-		     (buf, size, TCA_FLOWER_KEY_VLAN_PRIO,
-		      (rte_be_to_cpu_16(spec.vlan->tci) >> 13) & 0x7)) ||
-		    (mask.vlan->tci & RTE_BE16(0x0fff) &&
-		     !mnl_attr_put_u16_check
-		     (buf, size, TCA_FLOWER_KEY_VLAN_ID,
-		      rte_be_to_cpu_16(spec.vlan->tci & RTE_BE16(0x0fff)))))
-			goto error_nobufs;
-		++item;
-		break;
-	case ITEM_IPV4:
-		if (item->type != RTE_FLOW_ITEM_TYPE_IPV4)
-			goto trans;
-		mask.ipv4 = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_ipv4_mask,
-			 &mlx5_nl_flow_mask_supported.ipv4,
-			 &mlx5_nl_flow_mask_empty.ipv4,
-			 sizeof(mlx5_nl_flow_mask_supported.ipv4), error);
-		if (!mask.ipv4)
-			return -rte_errno;
-		if ((!eth_type_set || !vlan_eth_type_set) &&
-		    !mnl_attr_put_u16_check(buf, size,
-					    vlan_present ?
-					    TCA_FLOWER_KEY_VLAN_ETH_TYPE :
-					    TCA_FLOWER_KEY_ETH_TYPE,
-					    RTE_BE16(ETH_P_IP)))
-			goto error_nobufs;
-		eth_type_set = 1;
-		vlan_eth_type_set = 1;
-		if (mask.ipv4 == &mlx5_nl_flow_mask_empty.ipv4) {
-			++item;
-			break;
-		}
-		spec.ipv4 = item->spec;
-		if (mask.ipv4->hdr.next_proto_id &&
-		    mask.ipv4->hdr.next_proto_id != 0xff)
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-				 mask.ipv4,
-				 "no support for partial mask on"
-				 " \"hdr.next_proto_id\" field");
-		if (mask.ipv4->hdr.next_proto_id) {
-			if (!mnl_attr_put_u8_check
-			    (buf, size, TCA_FLOWER_KEY_IP_PROTO,
-			     spec.ipv4->hdr.next_proto_id))
-				goto error_nobufs;
-			ip_proto_set = 1;
-		}
-		if ((mask.ipv4->hdr.src_addr &&
-		     (!mnl_attr_put_u32_check(buf, size,
-					      TCA_FLOWER_KEY_IPV4_SRC,
-					      spec.ipv4->hdr.src_addr) ||
-		      !mnl_attr_put_u32_check(buf, size,
-					      TCA_FLOWER_KEY_IPV4_SRC_MASK,
-					      mask.ipv4->hdr.src_addr))) ||
-		    (mask.ipv4->hdr.dst_addr &&
-		     (!mnl_attr_put_u32_check(buf, size,
-					      TCA_FLOWER_KEY_IPV4_DST,
-					      spec.ipv4->hdr.dst_addr) ||
-		      !mnl_attr_put_u32_check(buf, size,
-					      TCA_FLOWER_KEY_IPV4_DST_MASK,
-					      mask.ipv4->hdr.dst_addr))))
-			goto error_nobufs;
-		++item;
-		break;
-	case ITEM_IPV6:
-		if (item->type != RTE_FLOW_ITEM_TYPE_IPV6)
-			goto trans;
-		mask.ipv6 = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_ipv6_mask,
-			 &mlx5_nl_flow_mask_supported.ipv6,
-			 &mlx5_nl_flow_mask_empty.ipv6,
-			 sizeof(mlx5_nl_flow_mask_supported.ipv6), error);
-		if (!mask.ipv6)
-			return -rte_errno;
-		if ((!eth_type_set || !vlan_eth_type_set) &&
-		    !mnl_attr_put_u16_check(buf, size,
-					    vlan_present ?
-					    TCA_FLOWER_KEY_VLAN_ETH_TYPE :
-					    TCA_FLOWER_KEY_ETH_TYPE,
-					    RTE_BE16(ETH_P_IPV6)))
-			goto error_nobufs;
-		eth_type_set = 1;
-		vlan_eth_type_set = 1;
-		if (mask.ipv6 == &mlx5_nl_flow_mask_empty.ipv6) {
-			++item;
-			break;
-		}
-		spec.ipv6 = item->spec;
-		if (mask.ipv6->hdr.proto && mask.ipv6->hdr.proto != 0xff)
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-				 mask.ipv6,
-				 "no support for partial mask on"
-				 " \"hdr.proto\" field");
-		if (mask.ipv6->hdr.proto) {
-			if (!mnl_attr_put_u8_check
-			    (buf, size, TCA_FLOWER_KEY_IP_PROTO,
-			     spec.ipv6->hdr.proto))
-				goto error_nobufs;
-			ip_proto_set = 1;
-		}
-		if ((!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr) &&
-		     (!mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_IPV6_SRC,
-					  sizeof(spec.ipv6->hdr.src_addr),
-					  spec.ipv6->hdr.src_addr) ||
-		      !mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_IPV6_SRC_MASK,
-					  sizeof(mask.ipv6->hdr.src_addr),
-					  mask.ipv6->hdr.src_addr))) ||
-		    (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr) &&
-		     (!mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_IPV6_DST,
-					  sizeof(spec.ipv6->hdr.dst_addr),
-					  spec.ipv6->hdr.dst_addr) ||
-		      !mnl_attr_put_check(buf, size,
-					  TCA_FLOWER_KEY_IPV6_DST_MASK,
-					  sizeof(mask.ipv6->hdr.dst_addr),
-					  mask.ipv6->hdr.dst_addr))))
-			goto error_nobufs;
-		++item;
-		break;
-	case ITEM_TCP:
-		if (item->type != RTE_FLOW_ITEM_TYPE_TCP)
-			goto trans;
-		mask.tcp = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_tcp_mask,
-			 &mlx5_nl_flow_mask_supported.tcp,
-			 &mlx5_nl_flow_mask_empty.tcp,
-			 sizeof(mlx5_nl_flow_mask_supported.tcp), error);
-		if (!mask.tcp)
-			return -rte_errno;
-		if (!ip_proto_set &&
-		    !mnl_attr_put_u8_check(buf, size,
-					   TCA_FLOWER_KEY_IP_PROTO,
-					   IPPROTO_TCP))
-			goto error_nobufs;
-		if (mask.tcp == &mlx5_nl_flow_mask_empty.tcp) {
-			++item;
-			break;
-		}
-		spec.tcp = item->spec;
-		if ((mask.tcp->hdr.src_port &&
-		     (!mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_TCP_SRC,
-					      spec.tcp->hdr.src_port) ||
-		      !mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_TCP_SRC_MASK,
-					      mask.tcp->hdr.src_port))) ||
-		    (mask.tcp->hdr.dst_port &&
-		     (!mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_TCP_DST,
-					      spec.tcp->hdr.dst_port) ||
-		      !mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_TCP_DST_MASK,
-					      mask.tcp->hdr.dst_port))))
-			goto error_nobufs;
-		++item;
-		break;
-	case ITEM_UDP:
-		if (item->type != RTE_FLOW_ITEM_TYPE_UDP)
-			goto trans;
-		mask.udp = mlx5_nl_flow_item_mask
-			(item, &rte_flow_item_udp_mask,
-			 &mlx5_nl_flow_mask_supported.udp,
-			 &mlx5_nl_flow_mask_empty.udp,
-			 sizeof(mlx5_nl_flow_mask_supported.udp), error);
-		if (!mask.udp)
-			return -rte_errno;
-		if (!ip_proto_set &&
-		    !mnl_attr_put_u8_check(buf, size,
-					   TCA_FLOWER_KEY_IP_PROTO,
-					   IPPROTO_UDP))
-			goto error_nobufs;
-		if (mask.udp == &mlx5_nl_flow_mask_empty.udp) {
-			++item;
-			break;
-		}
-		spec.udp = item->spec;
-		if ((mask.udp->hdr.src_port &&
-		     (!mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_UDP_SRC,
-					      spec.udp->hdr.src_port) ||
-		      !mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_UDP_SRC_MASK,
-					      mask.udp->hdr.src_port))) ||
-		    (mask.udp->hdr.dst_port &&
-		     (!mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_UDP_DST,
-					      spec.udp->hdr.dst_port) ||
-		      !mnl_attr_put_u16_check(buf, size,
-					      TCA_FLOWER_KEY_UDP_DST_MASK,
-					      mask.udp->hdr.dst_port))))
-			goto error_nobufs;
-		++item;
-		break;
-	case ACTIONS:
-		if (item->type != RTE_FLOW_ITEM_TYPE_END)
-			goto trans;
-		assert(na_flower);
-		assert(!na_flower_act);
-		na_flower_act =
-			mnl_attr_nest_start_check(buf, size, TCA_FLOWER_ACT);
-		if (!na_flower_act)
-			goto error_nobufs;
-		act_index_cur = 1;
-		break;
-	case ACTION_VOID:
-		if (action->type != RTE_FLOW_ACTION_TYPE_VOID)
-			goto trans;
-		++action;
-		break;
-	case ACTION_PORT_ID:
-		if (action->type != RTE_FLOW_ACTION_TYPE_PORT_ID)
-			goto trans;
-		conf.port_id = action->conf;
-		if (conf.port_id->original)
-			i = 0;
-		else
-			for (i = 0; ptoi[i].ifindex; ++i)
-				if (ptoi[i].port_id == conf.port_id->id)
-					break;
-		if (!ptoi[i].ifindex)
-			return rte_flow_error_set
-				(error, ENODEV, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-				 conf.port_id,
-				 "missing data to convert port ID to ifindex");
-		act_index =
-			mnl_attr_nest_start_check(buf, size, act_index_cur++);
-		if (!act_index ||
-		    !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "mirred"))
-			goto error_nobufs;
-		act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
-		if (!act)
-			goto error_nobufs;
-		if (!mnl_attr_put_check(buf, size, TCA_MIRRED_PARMS,
-					sizeof(struct tc_mirred),
-					&(struct tc_mirred){
-						.action = TC_ACT_STOLEN,
-						.eaction = TCA_EGRESS_REDIR,
-						.ifindex = ptoi[i].ifindex,
-					}))
-			goto error_nobufs;
-		mnl_attr_nest_end(buf, act);
-		mnl_attr_nest_end(buf, act_index);
-		++action;
-		break;
-	case ACTION_DROP:
-		if (action->type != RTE_FLOW_ACTION_TYPE_DROP)
-			goto trans;
-		act_index =
-			mnl_attr_nest_start_check(buf, size, act_index_cur++);
-		if (!act_index ||
-		    !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "gact"))
-			goto error_nobufs;
-		act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
-		if (!act)
-			goto error_nobufs;
-		if (!mnl_attr_put_check(buf, size, TCA_GACT_PARMS,
-					sizeof(struct tc_gact),
-					&(struct tc_gact){
-						.action = TC_ACT_SHOT,
-					}))
-			goto error_nobufs;
-		mnl_attr_nest_end(buf, act);
-		mnl_attr_nest_end(buf, act_index);
-		++action;
-		break;
-	case ACTION_OF_POP_VLAN:
-		if (action->type != RTE_FLOW_ACTION_TYPE_OF_POP_VLAN)
-			goto trans;
-		conf.of_push_vlan = NULL;
-		i = TCA_VLAN_ACT_POP;
-		goto action_of_vlan;
-	case ACTION_OF_PUSH_VLAN:
-		if (action->type != RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN)
-			goto trans;
-		conf.of_push_vlan = action->conf;
-		i = TCA_VLAN_ACT_PUSH;
-		goto action_of_vlan;
-	case ACTION_OF_SET_VLAN_VID:
-		if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID)
-			goto trans;
-		conf.of_set_vlan_vid = action->conf;
-		if (na_vlan_id)
-			goto override_na_vlan_id;
-		i = TCA_VLAN_ACT_MODIFY;
-		goto action_of_vlan;
-	case ACTION_OF_SET_VLAN_PCP:
-		if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP)
-			goto trans;
-		conf.of_set_vlan_pcp = action->conf;
-		if (na_vlan_priority)
-			goto override_na_vlan_priority;
-		i = TCA_VLAN_ACT_MODIFY;
-		goto action_of_vlan;
-action_of_vlan:
-		act_index =
-			mnl_attr_nest_start_check(buf, size, act_index_cur++);
-		if (!act_index ||
-		    !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "vlan"))
-			goto error_nobufs;
-		act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
-		if (!act)
-			goto error_nobufs;
-		if (!mnl_attr_put_check(buf, size, TCA_VLAN_PARMS,
-					sizeof(struct tc_vlan),
-					&(struct tc_vlan){
-						.action = TC_ACT_PIPE,
-						.v_action = i,
-					}))
-			goto error_nobufs;
-		if (i == TCA_VLAN_ACT_POP) {
-			mnl_attr_nest_end(buf, act);
-			mnl_attr_nest_end(buf, act_index);
-			++action;
-			break;
-		}
-		if (i == TCA_VLAN_ACT_PUSH &&
-		    !mnl_attr_put_u16_check(buf, size,
-					    TCA_VLAN_PUSH_VLAN_PROTOCOL,
-					    conf.of_push_vlan->ethertype))
-			goto error_nobufs;
-		na_vlan_id = mnl_nlmsg_get_payload_tail(buf);
-		if (!mnl_attr_put_u16_check(buf, size, TCA_VLAN_PAD, 0))
-			goto error_nobufs;
-		na_vlan_priority = mnl_nlmsg_get_payload_tail(buf);
-		if (!mnl_attr_put_u8_check(buf, size, TCA_VLAN_PAD, 0))
-			goto error_nobufs;
-		mnl_attr_nest_end(buf, act);
-		mnl_attr_nest_end(buf, act_index);
-		if (action->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
-override_na_vlan_id:
-			na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
-			*(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
-				rte_be_to_cpu_16
-				(conf.of_set_vlan_vid->vlan_vid);
-		} else if (action->type ==
-			   RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
-override_na_vlan_priority:
-			na_vlan_priority->nla_type =
-				TCA_VLAN_PUSH_VLAN_PRIORITY;
-			*(uint8_t *)mnl_attr_get_payload(na_vlan_priority) =
-				conf.of_set_vlan_pcp->vlan_pcp;
-		}
-		++action;
-		break;
-	case END:
-		if (item->type != RTE_FLOW_ITEM_TYPE_END ||
-		    action->type != RTE_FLOW_ACTION_TYPE_END)
-			goto trans;
-		if (na_flower_act)
-			mnl_attr_nest_end(buf, na_flower_act);
-		if (na_flower)
-			mnl_attr_nest_end(buf, na_flower);
-		nlh = buf;
-		return nlh->nlmsg_len;
-	}
-	back = trans;
-	trans = mlx5_nl_flow_trans[trans[n - 1]];
-	n = 0;
-	goto trans;
-error_nobufs:
-	if (buf != buf_tmp) {
-		buf = buf_tmp;
-		size = sizeof(buf_tmp);
-		goto init;
-	}
-	return rte_flow_error_set
-		(error, ENOBUFS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-		 "generated TC message is too large");
-}
-
-/**
- * Brand rtnetlink buffer with unique handle.
- *
- * This handle should be unique for a given network interface to avoid
- * collisions.
- *
- * @param buf
- *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
- * @param handle
- *   Unique 32-bit handle to use.
- */
-void
-mlx5_nl_flow_brand(void *buf, uint32_t handle)
-{
-	struct tcmsg *tcm = mnl_nlmsg_get_payload(buf);
-
-	tcm->tcm_handle = handle;
-}
-
-/**
- * Send Netlink message with acknowledgment.
- *
- * @param nl
- *   Libmnl socket to use.
- * @param nlh
- *   Message to send. This function always raises the NLM_F_ACK flag before
- *   sending.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_nl_flow_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
-{
-	alignas(struct nlmsghdr)
-	uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
-		    nlh->nlmsg_len - sizeof(*nlh)];
-	uint32_t seq = random();
-	int ret;
-
-	nlh->nlmsg_flags |= NLM_F_ACK;
-	nlh->nlmsg_seq = seq;
-	ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
-	if (ret != -1)
-		ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
-	if (ret != -1)
-		ret = mnl_cb_run
-			(ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
-	if (!ret)
-		return 0;
-	rte_errno = errno;
-	return -rte_errno;
-}
-
-/**
- * Create a Netlink flow rule.
- *
- * @param nl
- *   Libmnl socket to use.
- * @param buf
- *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
-		    struct rte_flow_error *error)
-{
-	struct nlmsghdr *nlh = buf;
-
-	nlh->nlmsg_type = RTM_NEWTFILTER;
-	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-	if (!mlx5_nl_flow_nl_ack(nl, nlh))
-		return 0;
-	return rte_flow_error_set
-		(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-		 "netlink: failed to create TC flow rule");
-}
-
-/**
- * Destroy a Netlink flow rule.
- *
- * @param nl
- *   Libmnl socket to use.
- * @param buf
- *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
-		     struct rte_flow_error *error)
-{
-	struct nlmsghdr *nlh = buf;
-
-	nlh->nlmsg_type = RTM_DELTFILTER;
-	nlh->nlmsg_flags = NLM_F_REQUEST;
-	if (!mlx5_nl_flow_nl_ack(nl, nlh))
-		return 0;
-	return rte_flow_error_set
-		(error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-		 "netlink: failed to destroy TC flow rule");
-}
-
-/**
- * Initialize ingress qdisc of a given network interface.
- *
- * @param nl
- *   Libmnl socket of the @p NETLINK_ROUTE kind.
- * @param ifindex
- *   Index of network interface to initialize.
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
-		  struct rte_flow_error *error)
-{
-	struct nlmsghdr *nlh;
-	struct tcmsg *tcm;
-	alignas(struct nlmsghdr)
-	uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
-
-	/* Destroy existing ingress qdisc and everything attached to it. */
-	nlh = mnl_nlmsg_put_header(buf);
-	nlh->nlmsg_type = RTM_DELQDISC;
-	nlh->nlmsg_flags = NLM_F_REQUEST;
-	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
-	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm_ifindex = ifindex;
-	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
-	tcm->tcm_parent = TC_H_INGRESS;
-	/* Ignore errors when qdisc is already absent. */
-	if (mlx5_nl_flow_nl_ack(nl, nlh) &&
-	    rte_errno != EINVAL && rte_errno != ENOENT)
-		return rte_flow_error_set
-			(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-			 NULL, "netlink: failed to remove ingress qdisc");
-	/* Create fresh ingress qdisc. */
-	nlh = mnl_nlmsg_put_header(buf);
-	nlh->nlmsg_type = RTM_NEWQDISC;
-	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
-	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm_ifindex = ifindex;
-	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
-	tcm->tcm_parent = TC_H_INGRESS;
-	mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
-	if (mlx5_nl_flow_nl_ack(nl, nlh))
-		return rte_flow_error_set
-			(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-			 NULL, "netlink: failed to create ingress qdisc");
-	return 0;
-}
-
-/**
- * Create and configure a libmnl socket for Netlink flow rules.
- *
- * @return
- *   A valid libmnl socket object pointer on success, NULL otherwise and
- *   rte_errno is set.
- */
-struct mnl_socket *
-mlx5_nl_flow_socket_create(void)
-{
-	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
-
-	if (nl) {
-		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
-				      sizeof(int));
-		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
-			return nl;
-	}
-	rte_errno = errno;
-	if (nl)
-		mnl_socket_close(nl);
-	return NULL;
-}
-
-/**
- * Destroy a libmnl socket.
- */
-void
-mlx5_nl_flow_socket_destroy(struct mnl_socket *nl)
-{
-	mnl_socket_close(nl);
-}
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v2 3/3] net/mlx5: add Linux TC flower driver for E-Switch flow
  2018-09-24 19:55 ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: migrate Linux TC flower driver to new flow engine Yongseok Koh
  2018-09-24 19:55   ` [dpdk-dev] [PATCH v2 1/3] net/mlx5: add abstraction for multiple flow drivers Yongseok Koh
  2018-09-24 19:55   ` [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove Netlink flow driver Yongseok Koh
@ 2018-09-24 19:55   ` Yongseok Koh
  2018-10-04 16:16   ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: migrate Linux TC flower driver to new flow engine Thomas Monjalon
  3 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 19:55 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Yongseok Koh

Flows having 'transfer' attribute have to be inserted to E-Switch on the
NIC and the control path uses Linux TC flower interface via Netlink socket.
This patch adds the flow driver on top of the new flow engine.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/Makefile        |    1 +
 drivers/net/mlx5/meson.build     |    1 +
 drivers/net/mlx5/mlx5.c          |   33 +
 drivers/net/mlx5/mlx5_flow.c     |    6 +-
 drivers/net/mlx5/mlx5_flow.h     |   20 +
 drivers/net/mlx5/mlx5_flow_tcf.c | 1608 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 1668 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/mlx5/mlx5_flow_tcf.c

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 9c1044808..ca1de9f21 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -32,6 +32,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_dv.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_tcf.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index e5376291c..fd93ac162 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -32,6 +32,7 @@ if build
 		'mlx5_ethdev.c',
 		'mlx5_flow.c',
 		'mlx5_flow_dv.c',
+		'mlx5_flow_tcf.c',
 		'mlx5_flow_verbs.c',
 		'mlx5_mac.c',
 		'mlx5_mr.c',
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index bb9a63fba..4be6a1cc9 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -44,6 +44,7 @@
 #include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
+#include "mlx5_flow.h"
 #include "mlx5_glue.h"
 #include "mlx5_mr.h"
 #include "mlx5_flow.h"
@@ -286,6 +287,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		close(priv->nl_socket_route);
 	if (priv->nl_socket_rdma >= 0)
 		close(priv->nl_socket_rdma);
+	if (priv->mnl_socket)
+		mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
 	ret = mlx5_hrxq_ibv_verify(dev);
 	if (ret)
 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1135,6 +1138,34 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 	if (vf && config.vf_nl_en)
 		mlx5_nl_mac_addr_sync(eth_dev);
+	priv->mnl_socket = mlx5_flow_tcf_socket_create();
+	if (!priv->mnl_socket) {
+		err = -rte_errno;
+		DRV_LOG(WARNING,
+			"flow rules relying on switch offloads will not be"
+			" supported: cannot open libmnl socket: %s",
+			strerror(rte_errno));
+	} else {
+		struct rte_flow_error error;
+		unsigned int ifindex = mlx5_ifindex(eth_dev);
+
+		if (!ifindex) {
+			err = -rte_errno;
+			error.message =
+				"cannot retrieve network interface index";
+		} else {
+			err = mlx5_flow_tcf_init(priv->mnl_socket, ifindex,
+						&error);
+		}
+		if (err) {
+			DRV_LOG(WARNING,
+				"flow rules relying on switch offloads will"
+				" not be supported: %s: %s",
+				error.message, strerror(rte_errno));
+			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+			priv->mnl_socket = NULL;
+		}
+	}
 	TAILQ_INIT(&priv->flows);
 	TAILQ_INIT(&priv->ctrl_flows);
 	/* Hint libmlx5 to use PMD allocator for data plane resources */
@@ -1187,6 +1218,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			close(priv->nl_socket_route);
 		if (priv->nl_socket_rdma >= 0)
 			close(priv->nl_socket_rdma);
+		if (priv->mnl_socket)
+			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
 		if (own_domain_id)
 			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
 		rte_free(priv);
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 54008afa4..7660bee30 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -41,6 +41,7 @@ extern const struct eth_dev_ops mlx5_dev_ops_isolate;
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops;
 #endif
+extern const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops;
 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
 
 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
@@ -50,6 +51,7 @@ const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
 #endif
+	[MLX5_FLOW_TYPE_TCF] = &mlx5_flow_tcf_drv_ops,
 	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
 	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
 };
@@ -1609,7 +1611,9 @@ flow_get_drv_type(struct rte_eth_dev *dev __rte_unused,
 	struct priv *priv __rte_unused = dev->data->dev_private;
 	enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
 
-	if (!attr->transfer) {
+	if (attr->transfer) {
+		type = MLX5_FLOW_TYPE_TCF;
+	} else {
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 		type = priv->config.dv_flow_en ?  MLX5_FLOW_TYPE_DV :
 						  MLX5_FLOW_TYPE_VERBS;
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 2bc3bee8c..10d700a7f 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -82,6 +82,11 @@
 #define MLX5_ACTION_FLAG (1u << 3)
 #define MLX5_ACTION_MARK (1u << 4)
 #define MLX5_ACTION_COUNT (1u << 5)
+#define MLX5_ACTION_PORT_ID (1u << 6)
+#define MLX5_ACTION_OF_POP_VLAN (1u << 7)
+#define MLX5_ACTION_OF_PUSH_VLAN (1u << 8)
+#define MLX5_ACTION_OF_SET_VLAN_VID (1u << 9)
+#define MLX5_ACTION_OF_SET_VLAN_PCP (1u << 10)
 
 /* possible L3 layers protocols filtering. */
 #define MLX5_IP_PROTOCOL_TCP 6
@@ -131,6 +136,7 @@
 enum mlx5_flow_drv_type {
 	MLX5_FLOW_TYPE_MIN,
 	MLX5_FLOW_TYPE_DV,
+	MLX5_FLOW_TYPE_TCF,
 	MLX5_FLOW_TYPE_VERBS,
 	MLX5_FLOW_TYPE_MAX,
 };
@@ -170,6 +176,12 @@ struct mlx5_flow_dv {
 	int actions_n; /**< number of actions. */
 };
 
+/** Linux TC flower driver for E-Switch flow. */
+struct mlx5_flow_tcf {
+	struct nlmsghdr *nlh;
+	struct tcmsg *tcm;
+};
+
 /* Verbs specification header. */
 struct ibv_spec_header {
 	enum ibv_flow_spec_type type;
@@ -199,6 +211,7 @@ struct mlx5_flow {
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 		struct mlx5_flow_dv dv;
 #endif
+		struct mlx5_flow_tcf tcf;
 		struct mlx5_flow_verbs verbs;
 	};
 };
@@ -322,4 +335,11 @@ int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
 				      struct rte_eth_dev *dev,
 				      struct rte_flow_error *error);
 
+/* mlx5_flow_tcf.c */
+
+int mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
+		       struct rte_flow_error *error);
+struct mnl_socket *mlx5_flow_tcf_socket_create(void);
+void mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl);
+
 #endif /* RTE_PMD_MLX5_FLOW_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
new file mode 100644
index 000000000..14376188e
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -0,0 +1,1608 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 6WIND S.A.
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <libmnl/libmnl.h>
+#include <linux/if_ether.h>
+#include <linux/netlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <linux/tc_act/tc_gact.h>
+#include <linux/tc_act/tc_mirred.h>
+#include <netinet/in.h>
+#include <stdalign.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+
+#include <rte_byteorder.h>
+#include <rte_errno.h>
+#include <rte_ether.h>
+#include <rte_flow.h>
+#include <rte_malloc.h>
+
+#include "mlx5.h"
+#include "mlx5_flow.h"
+#include "mlx5_autoconf.h"
+
+#ifdef HAVE_TC_ACT_VLAN
+
+#include <linux/tc_act/tc_vlan.h>
+
+#else /* HAVE_TC_ACT_VLAN */
+
+#define TCA_VLAN_ACT_POP 1
+#define TCA_VLAN_ACT_PUSH 2
+#define TCA_VLAN_ACT_MODIFY 3
+#define TCA_VLAN_PARMS 2
+#define TCA_VLAN_PUSH_VLAN_ID 3
+#define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
+#define TCA_VLAN_PAD 5
+#define TCA_VLAN_PUSH_VLAN_PRIORITY 6
+
+struct tc_vlan {
+	tc_gen;
+	int v_action;
+};
+
+#endif /* HAVE_TC_ACT_VLAN */
+
+/* Normally found in linux/netlink.h. */
+#ifndef NETLINK_CAP_ACK
+#define NETLINK_CAP_ACK 10
+#endif
+
+/* Normally found in linux/pkt_sched.h. */
+#ifndef TC_H_MIN_INGRESS
+#define TC_H_MIN_INGRESS 0xfff2u
+#endif
+
+/* Normally found in linux/pkt_cls.h. */
+#ifndef TCA_CLS_FLAGS_SKIP_SW
+#define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
+#endif
+#ifndef HAVE_TCA_FLOWER_ACT
+#define TCA_FLOWER_ACT 3
+#endif
+#ifndef HAVE_TCA_FLOWER_FLAGS
+#define TCA_FLOWER_FLAGS 22
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
+#define TCA_FLOWER_KEY_ETH_TYPE 8
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
+#define TCA_FLOWER_KEY_ETH_DST 4
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
+#define TCA_FLOWER_KEY_ETH_DST_MASK 5
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
+#define TCA_FLOWER_KEY_ETH_SRC 6
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
+#define TCA_FLOWER_KEY_ETH_SRC_MASK 7
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
+#define TCA_FLOWER_KEY_IP_PROTO 9
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
+#define TCA_FLOWER_KEY_IPV4_SRC 10
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
+#define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
+#define TCA_FLOWER_KEY_IPV4_DST 12
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
+#define TCA_FLOWER_KEY_IPV4_DST_MASK 13
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
+#define TCA_FLOWER_KEY_IPV6_SRC 14
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
+#define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
+#define TCA_FLOWER_KEY_IPV6_DST 16
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
+#define TCA_FLOWER_KEY_IPV6_DST_MASK 17
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
+#define TCA_FLOWER_KEY_TCP_SRC 18
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
+#define TCA_FLOWER_KEY_TCP_SRC_MASK 35
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
+#define TCA_FLOWER_KEY_TCP_DST 19
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
+#define TCA_FLOWER_KEY_TCP_DST_MASK 36
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
+#define TCA_FLOWER_KEY_UDP_SRC 20
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
+#define TCA_FLOWER_KEY_UDP_SRC_MASK 37
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
+#define TCA_FLOWER_KEY_UDP_DST 21
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
+#define TCA_FLOWER_KEY_UDP_DST_MASK 38
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
+#define TCA_FLOWER_KEY_VLAN_ID 23
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
+#define TCA_FLOWER_KEY_VLAN_PRIO 24
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
+#define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
+#endif
+
+#ifndef IPV6_ADDR_LEN
+#define IPV6_ADDR_LEN 16
+#endif
+
+/** Empty masks for known item types. */
+static const union {
+	struct rte_flow_item_port_id port_id;
+	struct rte_flow_item_eth eth;
+	struct rte_flow_item_vlan vlan;
+	struct rte_flow_item_ipv4 ipv4;
+	struct rte_flow_item_ipv6 ipv6;
+	struct rte_flow_item_tcp tcp;
+	struct rte_flow_item_udp udp;
+} flow_tcf_mask_empty;
+
+/** Supported masks for known item types. */
+static const struct {
+	struct rte_flow_item_port_id port_id;
+	struct rte_flow_item_eth eth;
+	struct rte_flow_item_vlan vlan;
+	struct rte_flow_item_ipv4 ipv4;
+	struct rte_flow_item_ipv6 ipv6;
+	struct rte_flow_item_tcp tcp;
+	struct rte_flow_item_udp udp;
+} flow_tcf_mask_supported = {
+	.port_id = {
+		.id = 0xffffffff,
+	},
+	.eth = {
+		.type = RTE_BE16(0xffff),
+		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+	},
+	.vlan = {
+		/* PCP and VID only, no DEI. */
+		.tci = RTE_BE16(0xefff),
+		.inner_type = RTE_BE16(0xffff),
+	},
+	.ipv4.hdr = {
+		.next_proto_id = 0xff,
+		.src_addr = RTE_BE32(0xffffffff),
+		.dst_addr = RTE_BE32(0xffffffff),
+	},
+	.ipv6.hdr = {
+		.proto = 0xff,
+		.src_addr =
+			"\xff\xff\xff\xff\xff\xff\xff\xff"
+			"\xff\xff\xff\xff\xff\xff\xff\xff",
+		.dst_addr =
+			"\xff\xff\xff\xff\xff\xff\xff\xff"
+			"\xff\xff\xff\xff\xff\xff\xff\xff",
+	},
+	.tcp.hdr = {
+		.src_port = RTE_BE16(0xffff),
+		.dst_port = RTE_BE16(0xffff),
+	},
+	.udp.hdr = {
+		.src_port = RTE_BE16(0xffff),
+		.dst_port = RTE_BE16(0xffff),
+	},
+};
+
+#define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
+#define SZ_NLATTR_NEST SZ_NLATTR_HDR
+#define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
+#define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
+#define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
+
+#define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
+
+/** DPDK port to network interface index (ifindex) conversion. */
+struct flow_tcf_ptoi {
+	uint16_t port_id; /**< DPDK port ID. */
+	unsigned int ifindex; /**< Network interface index. */
+};
+
+#define MLX5_TCF_FATE_ACTIONS (MLX5_ACTION_DROP | MLX5_ACTION_PORT_ID)
+
+/**
+ * Retrieve mask for pattern item.
+ *
+ * This function does basic sanity checks on a pattern item in order to
+ * return the most appropriate mask for it.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] mask_default
+ *   Default mask for pattern item as specified by the flow API.
+ * @param[in] mask_supported
+ *   Mask fields supported by the implementation.
+ * @param[in] mask_empty
+ *   Empty mask to return when there is no specification.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   Either @p item->mask or one of the mask parameters on success, NULL
+ *   otherwise and rte_errno is set.
+ */
+static const void *
+flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
+		   const void *mask_supported, const void *mask_empty,
+		   size_t mask_size, struct rte_flow_error *error)
+{
+	const uint8_t *mask;
+	size_t i;
+
+	/* item->last and item->mask cannot exist without item->spec. */
+	if (!item->spec && (item->mask || item->last)) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ITEM, item,
+				   "\"mask\" or \"last\" field provided without"
+				   " a corresponding \"spec\"");
+		return NULL;
+	}
+	/* No spec, no mask, no problem. */
+	if (!item->spec)
+		return mask_empty;
+	mask = item->mask ? item->mask : mask_default;
+	assert(mask);
+	/*
+	 * Single-pass check to make sure that:
+	 * - Mask is supported, no bits are set outside mask_supported.
+	 * - Both item->spec and item->last are included in mask.
+	 */
+	for (i = 0; i != mask_size; ++i) {
+		if (!mask[i])
+			continue;
+		if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
+		    ((const uint8_t *)mask_supported)[i]) {
+			rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+					   "unsupported field found"
+					   " in \"mask\"");
+			return NULL;
+		}
+		if (item->last &&
+		    (((const uint8_t *)item->spec)[i] & mask[i]) !=
+		    (((const uint8_t *)item->last)[i] & mask[i])) {
+			rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ITEM_LAST,
+					   item->last,
+					   "range between \"spec\" and \"last\""
+					   " not comprised in \"mask\"");
+			return NULL;
+		}
+	}
+	return mask;
+}
+
+/**
+ * Build a conversion table between port ID and ifindex.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[out] ptoi
+ *   Pointer to ptoi table.
+ * @param[in] len
+ *   Size of ptoi table provided.
+ *
+ * @return
+ *   Size of ptoi table filled.
+ */
+static unsigned int
+flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
+			  unsigned int len)
+{
+	unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
+	uint16_t port_id[n + 1];
+	unsigned int i;
+	unsigned int own = 0;
+
+	/* At least one port is needed when no switch domain is present. */
+	if (!n) {
+		n = 1;
+		port_id[0] = dev->data->port_id;
+	} else {
+		n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
+	}
+	if (n > len)
+		return 0;
+	for (i = 0; i != n; ++i) {
+		struct rte_eth_dev_info dev_info;
+
+		rte_eth_dev_info_get(port_id[i], &dev_info);
+		if (port_id[i] == dev->data->port_id)
+			own = i;
+		ptoi[i].port_id = port_id[i];
+		ptoi[i].ifindex = dev_info.if_index;
+	}
+	/* Ensure first entry of ptoi[] is the current device. */
+	if (own) {
+		ptoi[n] = ptoi[0];
+		ptoi[0] = ptoi[own];
+		ptoi[own] = ptoi[n];
+	}
+	/* An entry with zero ifindex terminates ptoi[]. */
+	ptoi[n].port_id = 0;
+	ptoi[n].ifindex = 0;
+	return n;
+}
+
+/**
+ * Verify the @p attr will be correctly understood by the E-switch.
+ *
+ * @param[in] attr
+ *   Pointer to flow attributes
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
+			     struct rte_flow_error *error)
+{
+	/*
+	 * Supported attributes: no groups, some priorities and ingress only.
+	 * Don't care about transfer as it is the caller's problem.
+	 */
+	if (attr->group)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
+					  "groups are not supported");
+	if (attr->priority > 0xfffe)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+					  attr,
+					  "lowest priority level is 0xfffe");
+	if (!attr->ingress)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+					  attr, "only ingress is supported");
+	if (attr->egress)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+					  attr, "egress is not supported");
+	return 0;
+}
+
+/**
+ * Validate flow for E-Switch.
+ *
+ * @param[in] priv
+ *   Pointer to the priv structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_tcf_validate(struct rte_eth_dev *dev,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item items[],
+		  const struct rte_flow_action actions[],
+		  struct rte_flow_error *error)
+{
+	union {
+		const struct rte_flow_item_port_id *port_id;
+		const struct rte_flow_item_eth *eth;
+		const struct rte_flow_item_vlan *vlan;
+		const struct rte_flow_item_ipv4 *ipv4;
+		const struct rte_flow_item_ipv6 *ipv6;
+		const struct rte_flow_item_tcp *tcp;
+		const struct rte_flow_item_udp *udp;
+	} spec, mask;
+	union {
+		const struct rte_flow_action_port_id *port_id;
+		const struct rte_flow_action_of_push_vlan *of_push_vlan;
+		const struct rte_flow_action_of_set_vlan_vid *
+			of_set_vlan_vid;
+		const struct rte_flow_action_of_set_vlan_pcp *
+			of_set_vlan_pcp;
+	} conf;
+	uint32_t item_flags = 0;
+	uint32_t action_flags = 0;
+	uint8_t next_protocol = -1;
+	unsigned int tcm_ifindex = 0;
+	struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
+	bool in_port_id_set;
+	int ret;
+
+	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
+						PTOI_TABLE_SZ_MAX(dev)));
+	ret = flow_tcf_validate_attributes(attr, error);
+	if (ret < 0)
+		return ret;
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		unsigned int i;
+
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_PORT_ID:
+			mask.port_id = flow_tcf_item_mask
+				(items, &rte_flow_item_port_id_mask,
+				 &flow_tcf_mask_supported.port_id,
+				 &flow_tcf_mask_empty.port_id,
+				 sizeof(flow_tcf_mask_supported.port_id),
+				 error);
+			if (!mask.port_id)
+				return -rte_errno;
+			if (mask.port_id == &flow_tcf_mask_empty.port_id) {
+				in_port_id_set = 1;
+				break;
+			}
+			spec.port_id = items->spec;
+			if (mask.port_id->id && mask.port_id->id != 0xffffffff)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+					 mask.port_id,
+					 "no support for partial mask on"
+					 " \"id\" field");
+			if (!mask.port_id->id)
+				i = 0;
+			else
+				for (i = 0; ptoi[i].ifindex; ++i)
+					if (ptoi[i].port_id == spec.port_id->id)
+						break;
+			if (!ptoi[i].ifindex)
+				return rte_flow_error_set
+					(error, ENODEV,
+					 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+					 spec.port_id,
+					 "missing data to convert port ID to"
+					 " ifindex");
+			if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+					 spec.port_id,
+					 "cannot match traffic for"
+					 " several port IDs through"
+					 " a single flow rule");
+			tcm_ifindex = ptoi[i].ifindex;
+			in_port_id_set = 1;
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			ret = mlx5_flow_validate_item_eth(items, item_flags,
+							  error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
+			/* TODO:
+			 * Redundant check due to different supported mask.
+			 * Same for the rest of items.
+			 */
+			mask.eth = flow_tcf_item_mask
+				(items, &rte_flow_item_eth_mask,
+				 &flow_tcf_mask_supported.eth,
+				 &flow_tcf_mask_empty.eth,
+				 sizeof(flow_tcf_mask_supported.eth),
+				 error);
+			if (!mask.eth)
+				return -rte_errno;
+			if (mask.eth->type && mask.eth->type !=
+			    RTE_BE16(0xffff))
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+					 mask.eth,
+					 "no support for partial mask on"
+					 " \"type\" field");
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			ret = mlx5_flow_validate_item_vlan(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
+			mask.vlan = flow_tcf_item_mask
+				(items, &rte_flow_item_vlan_mask,
+				 &flow_tcf_mask_supported.vlan,
+				 &flow_tcf_mask_empty.vlan,
+				 sizeof(flow_tcf_mask_supported.vlan),
+				 error);
+			if (!mask.vlan)
+				return -rte_errno;
+			if ((mask.vlan->tci & RTE_BE16(0xe000) &&
+			     (mask.vlan->tci & RTE_BE16(0xe000)) !=
+			      RTE_BE16(0xe000)) ||
+			    (mask.vlan->tci & RTE_BE16(0x0fff) &&
+			     (mask.vlan->tci & RTE_BE16(0x0fff)) !=
+			      RTE_BE16(0x0fff)) ||
+			    (mask.vlan->inner_type &&
+			     mask.vlan->inner_type != RTE_BE16(0xffff)))
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+					 mask.vlan,
+					 "no support for partial masks on"
+					 " \"tci\" (PCP and VID parts) and"
+					 " \"inner_type\" fields");
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			mask.ipv4 = flow_tcf_item_mask
+				(items, &rte_flow_item_ipv4_mask,
+				 &flow_tcf_mask_supported.ipv4,
+				 &flow_tcf_mask_empty.ipv4,
+				 sizeof(flow_tcf_mask_supported.ipv4),
+				 error);
+			if (!mask.ipv4)
+				return -rte_errno;
+			if (mask.ipv4->hdr.next_proto_id &&
+			    mask.ipv4->hdr.next_proto_id != 0xff)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+					 mask.ipv4,
+					 "no support for partial mask on"
+					 " \"hdr.next_proto_id\" field");
+			else if (mask.ipv4->hdr.next_proto_id)
+				next_protocol =
+					((const struct rte_flow_item_ipv4 *)
+					 (items->spec))->hdr.next_proto_id;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			mask.ipv6 = flow_tcf_item_mask
+				(items, &rte_flow_item_ipv6_mask,
+				 &flow_tcf_mask_supported.ipv6,
+				 &flow_tcf_mask_empty.ipv6,
+				 sizeof(flow_tcf_mask_supported.ipv6),
+				 error);
+			if (!mask.ipv6)
+				return -rte_errno;
+			if (mask.ipv6->hdr.proto &&
+			    mask.ipv6->hdr.proto != 0xff)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+					 mask.ipv6,
+					 "no support for partial mask on"
+					 " \"hdr.proto\" field");
+			else if (mask.ipv6->hdr.proto)
+				next_protocol =
+					((const struct rte_flow_item_ipv6 *)
+					 (items->spec))->hdr.proto;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			ret = mlx5_flow_validate_item_udp(items, item_flags,
+							  next_protocol, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+			mask.udp = flow_tcf_item_mask
+				(items, &rte_flow_item_udp_mask,
+				 &flow_tcf_mask_supported.udp,
+				 &flow_tcf_mask_empty.udp,
+				 sizeof(flow_tcf_mask_supported.udp),
+				 error);
+			if (!mask.udp)
+				return -rte_errno;
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			ret = mlx5_flow_validate_item_tcp(items, item_flags,
+							  next_protocol, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+			mask.tcp = flow_tcf_item_mask
+				(items, &rte_flow_item_tcp_mask,
+				 &flow_tcf_mask_supported.tcp,
+				 &flow_tcf_mask_empty.tcp,
+				 sizeof(flow_tcf_mask_supported.tcp),
+				 error);
+			if (!mask.tcp)
+				return -rte_errno;
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  NULL, "item not supported");
+		}
+	}
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		unsigned int i;
+
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_PORT_ID:
+			if (action_flags & MLX5_TCF_FATE_ACTIONS)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
+					 "can't have multiple fate actions");
+			conf.port_id = actions->conf;
+			if (conf.port_id->original)
+				i = 0;
+			else
+				for (i = 0; ptoi[i].ifindex; ++i)
+					if (ptoi[i].port_id == conf.port_id->id)
+						break;
+			if (!ptoi[i].ifindex)
+				return rte_flow_error_set
+					(error, ENODEV,
+					 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+					 conf.port_id,
+					 "missing data to convert port ID to"
+					 " ifindex");
+			action_flags |= MLX5_ACTION_PORT_ID;
+			break;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			if (action_flags & MLX5_TCF_FATE_ACTIONS)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
+					 "can't have multiple fate actions");
+			action_flags |= MLX5_ACTION_DROP;
+			break;
+		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+			action_flags |= MLX5_ACTION_OF_POP_VLAN;
+			break;
+		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+			action_flags |= MLX5_ACTION_OF_PUSH_VLAN;
+			break;
+		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+			action_flags |= MLX5_ACTION_OF_SET_VLAN_VID;
+			break;
+		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
+			action_flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return 0;
+}
+
+/**
+ * Calculate maximum size of memory for flow items of Linux TC flower and
+ * extract specified items.
+ *
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[out] item_flags
+ *   Pointer to the detected items.
+ *
+ * @return
+ *   Maximum size of memory for items.
+ */
+static int
+flow_tcf_get_items_and_size(const struct rte_flow_item items[],
+			    uint64_t *item_flags)
+{
+	int size = 0;
+	uint64_t flags = 0;
+
+	size += SZ_NLATTR_STRZ_OF("flower") +
+		SZ_NLATTR_NEST + /* TCA_OPTIONS. */
+		SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_PORT_ID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+				SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
+				/* dst/src MAC addr and mask. */
+			flags |= MLX5_FLOW_LAYER_OUTER_L2;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+				SZ_NLATTR_TYPE_OF(uint16_t) +
+				/* VLAN Ether type. */
+				SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
+				SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
+			flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+				SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+				SZ_NLATTR_TYPE_OF(uint32_t) * 4;
+				/* dst/src IP addr and mask. */
+			flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+				SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+				SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
+				/* dst/src IP addr and mask. */
+			flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+				SZ_NLATTR_TYPE_OF(uint16_t) * 4;
+				/* dst/src port and mask. */
+			flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+				SZ_NLATTR_TYPE_OF(uint16_t) * 4;
+				/* dst/src port and mask. */
+			flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+			break;
+		default:
+			DRV_LOG(WARNING,
+				"unsupported item %p type %d,"
+				" items must be validated before flow creation",
+				(const void *)items, items->type);
+			break;
+		}
+	}
+	*item_flags = flags;
+	return size;
+}
+
+/**
+ * Calculate maximum size of memory for flow actions of Linux TC flower and
+ * extract specified actions.
+ *
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] action_flags
+ *   Pointer to the detected actions.
+ *
+ * @return
+ *   Maximum size of memory for actions.
+ */
+static int
+flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
+			      uint64_t *action_flags)
+{
+	int size = 0;
+	uint64_t flags = 0;
+
+	size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_PORT_ID:
+			size += SZ_NLATTR_NEST + /* na_act_index. */
+				SZ_NLATTR_STRZ_OF("mirred") +
+				SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+				SZ_NLATTR_TYPE_OF(struct tc_mirred);
+			flags |= MLX5_ACTION_PORT_ID;
+			break;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			size += SZ_NLATTR_NEST + /* na_act_index. */
+				SZ_NLATTR_STRZ_OF("gact") +
+				SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+				SZ_NLATTR_TYPE_OF(struct tc_gact);
+			flags |= MLX5_ACTION_DROP;
+			break;
+		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+			flags |= MLX5_ACTION_OF_POP_VLAN;
+			goto action_of_vlan;
+		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+			flags |= MLX5_ACTION_OF_PUSH_VLAN;
+			goto action_of_vlan;
+		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+			flags |= MLX5_ACTION_OF_SET_VLAN_VID;
+			goto action_of_vlan;
+		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
+			flags |= MLX5_ACTION_OF_SET_VLAN_PCP;
+			goto action_of_vlan;
+action_of_vlan:
+			size += SZ_NLATTR_NEST + /* na_act_index. */
+				SZ_NLATTR_STRZ_OF("vlan") +
+				SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+				SZ_NLATTR_TYPE_OF(struct tc_vlan) +
+				SZ_NLATTR_TYPE_OF(uint16_t) +
+				/* VLAN protocol. */
+				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
+				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
+			break;
+		default:
+			DRV_LOG(WARNING,
+				"unsupported action %p type %d,"
+				" items must be validated before flow creation",
+				(const void *)actions, actions->type);
+			break;
+		}
+	}
+	*action_flags = flags;
+	return size;
+}
+
+/**
+ * Brand rtnetlink buffer with unique handle.
+ *
+ * This handle should be unique for a given network interface to avoid
+ * collisions.
+ *
+ * @param nlh
+ *   Pointer to Netlink message.
+ * @param handle
+ *   Unique 32-bit handle to use.
+ */
+static void
+flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
+{
+	struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
+
+	tcm->tcm_handle = handle;
+	DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
+		(void *)nlh, handle);
+}
+
+/**
+ * Prepare a flow object for Linux TC flower. It calculates the maximum size of
+ * memory required, allocates the memory, initializes Netlink message headers
+ * and set unique TC message handle.
+ *
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] item_flags
+ *   Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ *   Pointer to bit mask of all actions detected.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Pointer to mlx5_flow object on success,
+ *   otherwise NULL and rte_ernno is set.
+ */
+static struct mlx5_flow *
+flow_tcf_prepare(const struct rte_flow_attr *attr __rte_unused,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 uint64_t *item_flags, uint64_t *action_flags,
+		 struct rte_flow_error *error)
+{
+	size_t size = sizeof(struct mlx5_flow) +
+		      MNL_ALIGN(sizeof(struct nlmsghdr)) +
+		      MNL_ALIGN(sizeof(struct tcmsg));
+	struct mlx5_flow *dev_flow;
+	struct nlmsghdr *nlh;
+	struct tcmsg *tcm;
+
+	size += flow_tcf_get_items_and_size(items, item_flags);
+	size += flow_tcf_get_actions_and_size(actions, action_flags);
+	dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
+	if (!dev_flow) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				   "not enough memory to create E-Switch flow");
+		return NULL;
+	}
+	nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
+	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+	*dev_flow = (struct mlx5_flow){
+		.tcf = (struct mlx5_flow_tcf){
+			.nlh = nlh,
+			.tcm = tcm,
+		},
+	};
+	/*
+	 * Generate a reasonably unique handle based on the address of the
+	 * target buffer.
+	 *
+	 * This is straightforward on 32-bit systems where the flow pointer can
+	 * be used directly. Otherwise, its least significant part is taken
+	 * after shifting it by the previous power of two of the pointed buffer
+	 * size.
+	 */
+	if (sizeof(dev_flow) <= 4)
+		flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
+	else
+		flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
+				       rte_log2_u32(rte_align32prevpow2(size)));
+	return dev_flow;
+}
+
+/**
+ * Translate flow for Linux TC flower and construct Netlink message.
+ *
+ * @param[in] priv
+ *   Pointer to the priv structure.
+ * @param[in, out] flow
+ *   Pointer to the sub flow.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	union {
+		const struct rte_flow_item_port_id *port_id;
+		const struct rte_flow_item_eth *eth;
+		const struct rte_flow_item_vlan *vlan;
+		const struct rte_flow_item_ipv4 *ipv4;
+		const struct rte_flow_item_ipv6 *ipv6;
+		const struct rte_flow_item_tcp *tcp;
+		const struct rte_flow_item_udp *udp;
+	} spec, mask;
+	union {
+		const struct rte_flow_action_port_id *port_id;
+		const struct rte_flow_action_of_push_vlan *of_push_vlan;
+		const struct rte_flow_action_of_set_vlan_vid *
+			of_set_vlan_vid;
+		const struct rte_flow_action_of_set_vlan_pcp *
+			of_set_vlan_pcp;
+	} conf;
+	struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
+	struct nlmsghdr *nlh = dev_flow->tcf.nlh;
+	struct tcmsg *tcm = dev_flow->tcf.tcm;
+	uint32_t na_act_index_cur;
+	bool eth_type_set = 0;
+	bool vlan_present = 0;
+	bool vlan_eth_type_set = 0;
+	bool ip_proto_set = 0;
+	struct nlattr *na_flower;
+	struct nlattr *na_flower_act;
+	struct nlattr *na_vlan_id = NULL;
+	struct nlattr *na_vlan_priority = NULL;
+
+	claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
+						PTOI_TABLE_SZ_MAX(dev)));
+	nlh = dev_flow->tcf.nlh;
+	tcm = dev_flow->tcf.tcm;
+	/* Prepare API must have been called beforehand. */
+	assert(nlh != NULL && tcm != NULL);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = ptoi[0].ifindex;
+	tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
+	/*
+	 * Priority cannot be zero to prevent the kernel from picking one
+	 * automatically.
+	 */
+	tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
+				  RTE_BE16(ETH_P_ALL));
+	mnl_attr_put_strz(nlh, TCA_KIND, "flower");
+	na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
+	mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		unsigned int i;
+
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_PORT_ID:
+			mask.port_id = flow_tcf_item_mask
+				(items, &rte_flow_item_port_id_mask,
+				 &flow_tcf_mask_supported.port_id,
+				 &flow_tcf_mask_empty.port_id,
+				 sizeof(flow_tcf_mask_supported.port_id),
+				 error);
+			assert(mask.port_id);
+			if (mask.port_id == &flow_tcf_mask_empty.port_id)
+				break;
+			spec.port_id = items->spec;
+			if (!mask.port_id->id)
+				i = 0;
+			else
+				for (i = 0; ptoi[i].ifindex; ++i)
+					if (ptoi[i].port_id == spec.port_id->id)
+						break;
+			assert(ptoi[i].ifindex);
+			tcm->tcm_ifindex = ptoi[i].ifindex;
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			mask.eth = flow_tcf_item_mask
+				(items, &rte_flow_item_eth_mask,
+				 &flow_tcf_mask_supported.eth,
+				 &flow_tcf_mask_empty.eth,
+				 sizeof(flow_tcf_mask_supported.eth),
+				 error);
+			assert(mask.eth);
+			if (mask.eth == &flow_tcf_mask_empty.eth)
+				break;
+			spec.eth = items->spec;
+			if (mask.eth->type) {
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
+						 spec.eth->type);
+				eth_type_set = 1;
+			}
+			if (!is_zero_ether_addr(&mask.eth->dst)) {
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
+					     ETHER_ADDR_LEN,
+					     spec.eth->dst.addr_bytes);
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
+					     ETHER_ADDR_LEN,
+					     mask.eth->dst.addr_bytes);
+			}
+			if (!is_zero_ether_addr(&mask.eth->src)) {
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
+					     ETHER_ADDR_LEN,
+					     spec.eth->src.addr_bytes);
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
+					     ETHER_ADDR_LEN,
+					     mask.eth->src.addr_bytes);
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			mask.vlan = flow_tcf_item_mask
+				(items, &rte_flow_item_vlan_mask,
+				 &flow_tcf_mask_supported.vlan,
+				 &flow_tcf_mask_empty.vlan,
+				 sizeof(flow_tcf_mask_supported.vlan),
+				 error);
+			assert(mask.vlan);
+			if (!eth_type_set)
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
+						 RTE_BE16(ETH_P_8021Q));
+			eth_type_set = 1;
+			vlan_present = 1;
+			if (mask.vlan == &flow_tcf_mask_empty.vlan)
+				break;
+			spec.vlan = items->spec;
+			if (mask.vlan->inner_type) {
+				mnl_attr_put_u16(nlh,
+						 TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+						 spec.vlan->inner_type);
+				vlan_eth_type_set = 1;
+			}
+			if (mask.vlan->tci & RTE_BE16(0xe000))
+				mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
+						(rte_be_to_cpu_16
+						 (spec.vlan->tci) >> 13) & 0x7);
+			if (mask.vlan->tci & RTE_BE16(0x0fff))
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
+						 rte_be_to_cpu_16
+						 (spec.vlan->tci &
+						  RTE_BE16(0x0fff)));
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			mask.ipv4 = flow_tcf_item_mask
+				(items, &rte_flow_item_ipv4_mask,
+				 &flow_tcf_mask_supported.ipv4,
+				 &flow_tcf_mask_empty.ipv4,
+				 sizeof(flow_tcf_mask_supported.ipv4),
+				 error);
+			assert(mask.ipv4);
+			if (!eth_type_set || !vlan_eth_type_set)
+				mnl_attr_put_u16(nlh,
+						 vlan_present ?
+						 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
+						 TCA_FLOWER_KEY_ETH_TYPE,
+						 RTE_BE16(ETH_P_IP));
+			eth_type_set = 1;
+			vlan_eth_type_set = 1;
+			if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
+				break;
+			spec.ipv4 = items->spec;
+			if (mask.ipv4->hdr.next_proto_id) {
+				mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+						spec.ipv4->hdr.next_proto_id);
+				ip_proto_set = 1;
+			}
+			if (mask.ipv4->hdr.src_addr) {
+				mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
+						 spec.ipv4->hdr.src_addr);
+				mnl_attr_put_u32(nlh,
+						 TCA_FLOWER_KEY_IPV4_SRC_MASK,
+						 mask.ipv4->hdr.src_addr);
+			}
+			if (mask.ipv4->hdr.dst_addr) {
+				mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
+						 spec.ipv4->hdr.dst_addr);
+				mnl_attr_put_u32(nlh,
+						 TCA_FLOWER_KEY_IPV4_DST_MASK,
+						 mask.ipv4->hdr.dst_addr);
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			mask.ipv6 = flow_tcf_item_mask
+				(items, &rte_flow_item_ipv6_mask,
+				 &flow_tcf_mask_supported.ipv6,
+				 &flow_tcf_mask_empty.ipv6,
+				 sizeof(flow_tcf_mask_supported.ipv6),
+				 error);
+			assert(mask.ipv6);
+			if (!eth_type_set || !vlan_eth_type_set)
+				mnl_attr_put_u16(nlh,
+						 vlan_present ?
+						 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
+						 TCA_FLOWER_KEY_ETH_TYPE,
+						 RTE_BE16(ETH_P_IPV6));
+			eth_type_set = 1;
+			vlan_eth_type_set = 1;
+			if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
+				break;
+			spec.ipv6 = items->spec;
+			if (mask.ipv6->hdr.proto) {
+				mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+						spec.ipv6->hdr.proto);
+				ip_proto_set = 1;
+			}
+			if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
+					     sizeof(spec.ipv6->hdr.src_addr),
+					     spec.ipv6->hdr.src_addr);
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+					     sizeof(mask.ipv6->hdr.src_addr),
+					     mask.ipv6->hdr.src_addr);
+			}
+			if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
+					     sizeof(spec.ipv6->hdr.dst_addr),
+					     spec.ipv6->hdr.dst_addr);
+				mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
+					     sizeof(mask.ipv6->hdr.dst_addr),
+					     mask.ipv6->hdr.dst_addr);
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			mask.udp = flow_tcf_item_mask
+				(items, &rte_flow_item_udp_mask,
+				 &flow_tcf_mask_supported.udp,
+				 &flow_tcf_mask_empty.udp,
+				 sizeof(flow_tcf_mask_supported.udp),
+				 error);
+			assert(mask.udp);
+			if (!ip_proto_set)
+				mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+						IPPROTO_UDP);
+			if (mask.udp == &flow_tcf_mask_empty.udp)
+				break;
+			spec.udp = items->spec;
+			if (mask.udp->hdr.src_port) {
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
+						 spec.udp->hdr.src_port);
+				mnl_attr_put_u16(nlh,
+						 TCA_FLOWER_KEY_UDP_SRC_MASK,
+						 mask.udp->hdr.src_port);
+			}
+			if (mask.udp->hdr.dst_port) {
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
+						 spec.udp->hdr.dst_port);
+				mnl_attr_put_u16(nlh,
+						 TCA_FLOWER_KEY_UDP_DST_MASK,
+						 mask.udp->hdr.dst_port);
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			mask.tcp = flow_tcf_item_mask
+				(items, &rte_flow_item_tcp_mask,
+				 &flow_tcf_mask_supported.tcp,
+				 &flow_tcf_mask_empty.tcp,
+				 sizeof(flow_tcf_mask_supported.tcp),
+				 error);
+			assert(mask.tcp);
+			if (!ip_proto_set)
+				mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+						IPPROTO_TCP);
+			if (mask.tcp == &flow_tcf_mask_empty.tcp)
+				break;
+			spec.tcp = items->spec;
+			if (mask.tcp->hdr.src_port) {
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
+						 spec.tcp->hdr.src_port);
+				mnl_attr_put_u16(nlh,
+						 TCA_FLOWER_KEY_TCP_SRC_MASK,
+						 mask.tcp->hdr.src_port);
+			}
+			if (mask.tcp->hdr.dst_port) {
+				mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
+						 spec.tcp->hdr.dst_port);
+				mnl_attr_put_u16(nlh,
+						 TCA_FLOWER_KEY_TCP_DST_MASK,
+						 mask.tcp->hdr.dst_port);
+			}
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  NULL, "item not supported");
+		}
+	}
+	na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
+	na_act_index_cur = 1;
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		struct nlattr *na_act_index;
+		struct nlattr *na_act;
+		unsigned int vlan_act;
+		unsigned int i;
+
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_PORT_ID:
+			conf.port_id = actions->conf;
+			if (conf.port_id->original)
+				i = 0;
+			else
+				for (i = 0; ptoi[i].ifindex; ++i)
+					if (ptoi[i].port_id == conf.port_id->id)
+						break;
+			assert(ptoi[i].ifindex);
+			na_act_index =
+				mnl_attr_nest_start(nlh, na_act_index_cur++);
+			assert(na_act_index);
+			mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
+			na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+			assert(na_act);
+			mnl_attr_put(nlh, TCA_MIRRED_PARMS,
+				     sizeof(struct tc_mirred),
+				     &(struct tc_mirred){
+					.action = TC_ACT_STOLEN,
+					.eaction = TCA_EGRESS_REDIR,
+					.ifindex = ptoi[i].ifindex,
+				     });
+			mnl_attr_nest_end(nlh, na_act);
+			mnl_attr_nest_end(nlh, na_act_index);
+			break;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			na_act_index =
+				mnl_attr_nest_start(nlh, na_act_index_cur++);
+			assert(na_act_index);
+			mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
+			na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+			assert(na_act);
+			mnl_attr_put(nlh, TCA_GACT_PARMS,
+				     sizeof(struct tc_gact),
+				     &(struct tc_gact){
+					.action = TC_ACT_SHOT,
+				     });
+			mnl_attr_nest_end(nlh, na_act);
+			mnl_attr_nest_end(nlh, na_act_index);
+			break;
+		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+			conf.of_push_vlan = NULL;
+			vlan_act = TCA_VLAN_ACT_POP;
+			goto action_of_vlan;
+		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+			conf.of_push_vlan = actions->conf;
+			vlan_act = TCA_VLAN_ACT_PUSH;
+			goto action_of_vlan;
+		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+			conf.of_set_vlan_vid = actions->conf;
+			if (na_vlan_id)
+				goto override_na_vlan_id;
+			vlan_act = TCA_VLAN_ACT_MODIFY;
+			goto action_of_vlan;
+		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
+			conf.of_set_vlan_pcp = actions->conf;
+			if (na_vlan_priority)
+				goto override_na_vlan_priority;
+			vlan_act = TCA_VLAN_ACT_MODIFY;
+			goto action_of_vlan;
+action_of_vlan:
+			na_act_index =
+				mnl_attr_nest_start(nlh, na_act_index_cur++);
+			assert(na_act_index);
+			mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
+			na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+			assert(na_act);
+			mnl_attr_put(nlh, TCA_VLAN_PARMS,
+				     sizeof(struct tc_vlan),
+				     &(struct tc_vlan){
+					.action = TC_ACT_PIPE,
+					.v_action = vlan_act,
+				     });
+			if (vlan_act == TCA_VLAN_ACT_POP) {
+				mnl_attr_nest_end(nlh, na_act);
+				mnl_attr_nest_end(nlh, na_act_index);
+				break;
+			}
+			if (vlan_act == TCA_VLAN_ACT_PUSH)
+				mnl_attr_put_u16(nlh,
+						 TCA_VLAN_PUSH_VLAN_PROTOCOL,
+						 conf.of_push_vlan->ethertype);
+			na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
+			mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
+			na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
+			mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
+			mnl_attr_nest_end(nlh, na_act);
+			mnl_attr_nest_end(nlh, na_act_index);
+			if (actions->type ==
+			    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
+override_na_vlan_id:
+				na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
+				*(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
+					rte_be_to_cpu_16
+					(conf.of_set_vlan_vid->vlan_vid);
+			} else if (actions->type ==
+				   RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
+override_na_vlan_priority:
+				na_vlan_priority->nla_type =
+					TCA_VLAN_PUSH_VLAN_PRIORITY;
+				*(uint8_t *)mnl_attr_get_payload
+					(na_vlan_priority) =
+					conf.of_set_vlan_pcp->vlan_pcp;
+			}
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	assert(na_flower);
+	assert(na_flower_act);
+	mnl_attr_nest_end(nlh, na_flower_act);
+	mnl_attr_nest_end(nlh, na_flower);
+	return 0;
+}
+
+/**
+ * Send Netlink message with acknowledgment.
+ *
+ * @param nl
+ *   Libmnl socket to use.
+ * @param nlh
+ *   Message to send. This function always raises the NLM_F_ACK flag before
+ *   sending.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
+{
+	alignas(struct nlmsghdr)
+	uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
+		    nlh->nlmsg_len - sizeof(*nlh)];
+	uint32_t seq = random();
+	int ret;
+
+	nlh->nlmsg_flags |= NLM_F_ACK;
+	nlh->nlmsg_seq = seq;
+	ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
+	if (ret != -1)
+		ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
+	if (ret != -1)
+		ret = mnl_cb_run
+			(ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
+	if (ret > 0)
+		return 0;
+	rte_errno = errno;
+	return -rte_errno;
+}
+
+/**
+ * Apply flow to E-Switch by sending Netlink message.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to the sub flow.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+	       struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct mnl_socket *nl = priv->mnl_socket;
+	struct mlx5_flow *dev_flow;
+	struct nlmsghdr *nlh;
+
+	dev_flow = LIST_FIRST(&flow->dev_flows);
+	/* E-Switch flow can't be expanded. */
+	assert(!LIST_NEXT(dev_flow, next));
+	nlh = dev_flow->tcf.nlh;
+	nlh->nlmsg_type = RTM_NEWTFILTER;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+	if (!flow_tcf_nl_ack(nl, nlh))
+		return 0;
+	return rte_flow_error_set(error, rte_errno,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				  "netlink: failed to create TC flow rule");
+}
+
+/**
+ * Remove flow from E-Switch by sending Netlink message.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to the sub flow.
+ */
+static void
+flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct mnl_socket *nl = priv->mnl_socket;
+	struct mlx5_flow *dev_flow;
+	struct nlmsghdr *nlh;
+
+	if (!flow)
+		return;
+	dev_flow = LIST_FIRST(&flow->dev_flows);
+	if (!dev_flow)
+		return;
+	/* E-Switch flow can't be expanded. */
+	assert(!LIST_NEXT(dev_flow, next));
+	nlh = dev_flow->tcf.nlh;
+	nlh->nlmsg_type = RTM_DELTFILTER;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+	flow_tcf_nl_ack(nl, nlh);
+}
+
+/**
+ * Remove flow from E-Switch and release resources of the device flow.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to the sub flow.
+ */
+static void
+flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	struct mlx5_flow *dev_flow;
+
+	if (!flow)
+		return;
+	flow_tcf_remove(dev, flow);
+	dev_flow = LIST_FIRST(&flow->dev_flows);
+	if (!dev_flow)
+		return;
+	/* E-Switch flow can't be expanded. */
+	assert(!LIST_NEXT(dev_flow, next));
+	LIST_REMOVE(dev_flow, next);
+	rte_free(dev_flow);
+}
+
+const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
+	.validate = flow_tcf_validate,
+	.prepare = flow_tcf_prepare,
+	.translate = flow_tcf_translate,
+	.apply = flow_tcf_apply,
+	.remove = flow_tcf_remove,
+	.destroy = flow_tcf_destroy,
+};
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param nl
+ *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ * @param ifindex
+ *   Index of network interface to initialize.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
+		   struct rte_flow_error *error)
+{
+	struct nlmsghdr *nlh;
+	struct tcmsg *tcm;
+	alignas(struct nlmsghdr)
+	uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
+
+	/* Destroy existing ingress qdisc and everything attached to it. */
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = RTM_DELQDISC;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = ifindex;
+	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+	tcm->tcm_parent = TC_H_INGRESS;
+	/* Ignore errors when qdisc is already absent. */
+	if (flow_tcf_nl_ack(nl, nlh) &&
+	    rte_errno != EINVAL && rte_errno != ENOENT)
+		return rte_flow_error_set(error, rte_errno,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					  "netlink: failed to remove ingress"
+					  " qdisc");
+	/* Create fresh ingress qdisc. */
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = RTM_NEWQDISC;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = ifindex;
+	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+	tcm->tcm_parent = TC_H_INGRESS;
+	mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
+	if (flow_tcf_nl_ack(nl, nlh))
+		return rte_flow_error_set(error, rte_errno,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					  "netlink: failed to create ingress"
+					  " qdisc");
+	return 0;
+}
+
+/**
+ * Create and configure a libmnl socket for Netlink flow rules.
+ *
+ * @return
+ *   A valid libmnl socket object pointer on success, NULL otherwise and
+ *   rte_errno is set.
+ */
+struct mnl_socket *
+mlx5_flow_tcf_socket_create(void)
+{
+	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+
+	if (nl) {
+		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
+				      sizeof(int));
+		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
+			return nl;
+	}
+	rte_errno = errno;
+	if (nl)
+		mnl_socket_close(nl);
+	return NULL;
+}
+
+/**
+ * Destroy a libmnl socket.
+ *
+ * @param nl
+ *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ */
+void
+mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
+{
+	mnl_socket_close(nl);
+}
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support
  2018-09-19  7:21 [dpdk-dev] [PATCH 0/3] migrate Linux TC flower driver to new flow engine Yongseok Koh
                   ` (3 preceding siblings ...)
  2018-09-24 19:55 ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: migrate Linux TC flower driver to new flow engine Yongseok Koh
@ 2018-09-24 23:17 ` Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 01/11] net/mlx5: split flow validation to dedicated function Yongseok Koh
                     ` (11 more replies)
  4 siblings, 12 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 23:17 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Yongseok Koh

RFC:
	https://mails.dpdk.org/archives/dev/2018-August/109950.html

v3:
* fix clang compilation error.

v2:
* make changes for the newly introduced meson build.

Ori Kam (11):
  net/mlx5: split flow validation to dedicated function
  net/mlx5: add flow prepare function
  net/mlx5: add flow translate function
  net/mlx5: add support for multiple flow drivers
  net/mlx5: add Direct Verbs validation function
  net/mlx5: add Direct Verbs prepare function
  net/mlx5: add Direct Verbs translate items
  net/mlx5: add Direct Verbs translate actions
  net/mlx5: add Direct Verbs driver to glue
  net/mlx5: add Direct Verbs final functions
  net/mlx5: add runtime parameter to enable Direct Verbs

 doc/guides/nics/mlx5.rst           |    7 +
 drivers/net/mlx5/Makefile          |    9 +-
 drivers/net/mlx5/meson.build       |    6 +-
 drivers/net/mlx5/mlx5.c            |    8 +
 drivers/net/mlx5/mlx5.h            |    2 +
 drivers/net/mlx5/mlx5_flow.c       | 3285 +++++++++++-------------------------
 drivers/net/mlx5/mlx5_flow.h       |  326 ++++
 drivers/net/mlx5/mlx5_flow_dv.c    | 1373 +++++++++++++++
 drivers/net/mlx5/mlx5_flow_verbs.c | 1652 ++++++++++++++++++
 drivers/net/mlx5/mlx5_glue.c       |   45 +
 drivers/net/mlx5/mlx5_glue.h       |   15 +
 drivers/net/mlx5/mlx5_prm.h        |  220 +++
 drivers/net/mlx5/mlx5_rxtx.h       |    7 +
 13 files changed, 4635 insertions(+), 2320 deletions(-)
 create mode 100644 drivers/net/mlx5/mlx5_flow.h
 create mode 100644 drivers/net/mlx5/mlx5_flow_dv.c
 create mode 100644 drivers/net/mlx5/mlx5_flow_verbs.c

-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v3 01/11] net/mlx5: split flow validation to dedicated function
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
@ 2018-09-24 23:17   ` Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 02/11] net/mlx5: add flow prepare function Yongseok Koh
                     ` (10 subsequent siblings)
  11 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 23:17 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Ori Kam

From: Ori Kam <orika@mellanox.com>

In current implementation the validation logic reside in the same function
that calculates the size of the verbs spec and also create the verbs spec.
This approach results in hard to maintain code which can't be shared.
also in current logic there is a use of parser entity that holds the
information between function calls. The main problem with this parser is
that it assumes the connection between different functions. For example
it assumes that the validation function was called and relevant values
were set. This may result in an issue if and when we for example only
call the validation function, or call the apply function without the
validation (Currently according to RTE flow we must call validation
before creating flow, but if we want to change that to save time during
flow creation, for example the user validated some rule and just want to
change the IP there is no true reason the validate the rule again).

This commit address both of those issues by extracting the validation logic
into detected functions and remove the use of the parser object.
The side effect of those changes is that in some cases there will be a
need to traverse the item list again.

Signed-off-by: Ori Kam <orika@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c | 1905 +++++++++++++++++++++++++++---------------
 1 file changed, 1240 insertions(+), 665 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 3f548a9a4..799064c0c 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -91,6 +91,14 @@ extern const struct eth_dev_ops mlx5_dev_ops_isolate;
 #define MLX5_FLOW_MOD_MARK (1u << 1)
 #define MLX5_FLOW_MOD_COUNT (1u << 2)
 
+/* Actions */
+#define MLX5_ACTION_DROP (1u << 0)
+#define MLX5_ACTION_QUEUE (1u << 1)
+#define MLX5_ACTION_RSS (1u << 2)
+#define MLX5_ACTION_FLAG (1u << 3)
+#define MLX5_ACTION_MARK (1u << 4)
+#define MLX5_ACTION_COUNT (1u << 5)
+
 /* possible L3 layers protocols filtering. */
 #define MLX5_IP_PROTOCOL_TCP 6
 #define MLX5_IP_PROTOCOL_UDP 17
@@ -299,14 +307,12 @@ struct mlx5_flow_counter {
 struct rte_flow {
 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	struct rte_flow_attr attributes; /**< User flow attribute. */
-	uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
 	uint32_t layers;
 	/**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
 	uint32_t modifier;
 	/**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
 	uint32_t fate;
 	/**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
-	uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
 	LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
 	struct mlx5_flow_verbs *cur_verbs;
 	/**< Current Verbs flow structure being filled. */
@@ -582,52 +588,23 @@ mlx5_flow_counter_release(struct mlx5_flow_counter *counter)
  * them in the @p flow if everything is correct.
  *
  * @param[in] dev
- *   Pointer to Ethernet device.
+ *   Pointer to Ethernet device structure.
  * @param[in] attributes
  *   Pointer to flow attributes
  * @param[in, out] flow
  *   Pointer to the rte_flow structure.
- * @param[out] error
- *   Pointer to error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   0 on success.
  */
 static int
 mlx5_flow_attributes(struct rte_eth_dev *dev,
 		     const struct rte_flow_attr *attributes,
-		     struct rte_flow *flow,
-		     struct rte_flow_error *error)
+		     struct rte_flow *flow)
 {
-	uint32_t priority_max =
-		((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
+	struct priv *priv = dev->data->dev_private;
+	uint32_t priority_max = priv->config.flow_prio - 1;
 
-	if (attributes->group)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
-					  NULL,
-					  "groups is not supported");
-	if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
-	    attributes->priority >= priority_max)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
-					  NULL,
-					  "priority out of range");
-	if (attributes->egress)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
-					  NULL,
-					  "egress is not supported");
-	if (attributes->transfer)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
-					  NULL,
-					  "transfer is not supported");
-	if (!attributes->ingress)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
-					  NULL,
-					  "ingress attribute is mandatory");
 	flow->attributes = *attributes;
 	if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
 		flow->attributes.priority = priority_max;
@@ -671,8 +648,7 @@ mlx5_flow_item_acceptable(const struct rte_flow_item *item,
 						  " bits");
 	if (!item->spec && (item->mask || item->last))
 		return rte_flow_error_set(error, EINVAL,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
 					  "mask/last without a spec is not"
 					  " supported");
 	if (item->spec && item->last) {
@@ -762,8 +738,6 @@ mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
  * @param[in] flow_size
  *   Size in bytes of the available space in @p flow, if too small, nothing is
  *   written.
- * @param[out] error
- *   Pointer to error structure.
  *
  * @return
  *   On success the number of bytes consumed/necessary, if the returned value
@@ -773,37 +747,19 @@ mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
  */
 static int
 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
-		   const size_t flow_size, struct rte_flow_error *error)
+		   const size_t flow_size)
 {
 	const struct rte_flow_item_eth *spec = item->spec;
 	const struct rte_flow_item_eth *mask = item->mask;
-	const struct rte_flow_item_eth nic_mask = {
-		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
-		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
-		.type = RTE_BE16(0xffff),
-	};
 	const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
 	const unsigned int size = sizeof(struct ibv_flow_spec_eth);
 	struct ibv_flow_spec_eth eth = {
 		.type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
 		.size = size,
 	};
-	int ret;
 
-	if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
-			    MLX5_FLOW_LAYER_OUTER_L2))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "L2 layers already configured");
 	if (!mask)
 		mask = &rte_flow_item_eth_mask;
-	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
-					(const uint8_t *)&nic_mask,
-					sizeof(struct rte_flow_item_eth),
-					error);
-	if (ret)
-		return ret;
 	flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
 		MLX5_FLOW_LAYER_OUTER_L2;
 	if (size > flow_size)
@@ -875,8 +831,6 @@ mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
  * @param[in] flow_size
  *   Size in bytes of the available space in @p flow, if too small, nothing is
  *   written.
- * @param[out] error
- *   Pointer to error structure.
  *
  * @return
  *   On success the number of bytes consumed/necessary, if the returned value
@@ -886,47 +840,21 @@ mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
  */
 static int
 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
-		    const size_t flow_size, struct rte_flow_error *error)
+		    const size_t flow_size)
 {
 	const struct rte_flow_item_vlan *spec = item->spec;
 	const struct rte_flow_item_vlan *mask = item->mask;
-	const struct rte_flow_item_vlan nic_mask = {
-		.tci = RTE_BE16(0x0fff),
-		.inner_type = RTE_BE16(0xffff),
-	};
 	unsigned int size = sizeof(struct ibv_flow_spec_eth);
 	const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
 	struct ibv_flow_spec_eth eth = {
 		.type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
 		.size = size,
 	};
-	int ret;
-	const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
-					MLX5_FLOW_LAYER_INNER_L4) :
-		(MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4);
-	const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
-		MLX5_FLOW_LAYER_OUTER_VLAN;
 	const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
 		MLX5_FLOW_LAYER_OUTER_L2;
 
-	if (flow->layers & vlanm)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "VLAN layer already configured");
-	else if ((flow->layers & l34m) != 0)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "L2 layer cannot follow L3/L4 layer");
 	if (!mask)
 		mask = &rte_flow_item_vlan_mask;
-	ret = mlx5_flow_item_acceptable
-		(item, (const uint8_t *)mask,
-		 (const uint8_t *)&nic_mask,
-		 sizeof(struct rte_flow_item_vlan), error);
-	if (ret)
-		return ret;
 	if (spec) {
 		eth.val.vlan_tag = spec->tci;
 		eth.mask.vlan_tag = mask->tci;
@@ -935,15 +863,6 @@ mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
 		eth.mask.ether_type = mask->inner_type;
 		eth.val.ether_type &= eth.mask.ether_type;
 	}
-	/*
-	 * From verbs perspective an empty VLAN is equivalent
-	 * to a packet without VLAN layer.
-	 */
-	if (!eth.mask.vlan_tag)
-		return rte_flow_error_set(error, EINVAL,
-					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
-					  item->spec,
-					  "VLAN cannot be empty");
 	if (!(flow->layers & l2m)) {
 		if (size <= flow_size) {
 			flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
@@ -974,29 +893,18 @@ mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
  * @param[in] flow_size
  *   Size in bytes of the available space in @p flow, if too small, nothing is
  *   written.
- * @param[out] error
- *   Pointer to error structure.
  *
  * @return
  *   On success the number of bytes consumed/necessary, if the returned value
  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
  *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
  */
 static int
 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
-		    const size_t flow_size, struct rte_flow_error *error)
+		    const size_t flow_size)
 {
 	const struct rte_flow_item_ipv4 *spec = item->spec;
 	const struct rte_flow_item_ipv4 *mask = item->mask;
-	const struct rte_flow_item_ipv4 nic_mask = {
-		.hdr = {
-			.src_addr = RTE_BE32(0xffffffff),
-			.dst_addr = RTE_BE32(0xffffffff),
-			.type_of_service = 0xff,
-			.next_proto_id = 0xff,
-		},
-	};
 	const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
 	unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
 	struct ibv_flow_spec_ipv4_ext ipv4 = {
@@ -1004,28 +912,9 @@ mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
 			(tunnel ? IBV_FLOW_SPEC_INNER : 0),
 		.size = size,
 	};
-	int ret;
 
-	if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
-			    MLX5_FLOW_LAYER_OUTER_L3))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "multiple L3 layers not supported");
-	else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
-				 MLX5_FLOW_LAYER_OUTER_L4))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "L3 cannot follow an L4 layer.");
 	if (!mask)
 		mask = &rte_flow_item_ipv4_mask;
-	ret = mlx5_flow_item_acceptable
-		(item, (const uint8_t *)mask,
-		 (const uint8_t *)&nic_mask,
-		 sizeof(struct rte_flow_item_ipv4), error);
-	if (ret < 0)
-		return ret;
 	flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
 		MLX5_FLOW_LAYER_OUTER_L3_IPV4;
 	if (spec) {
@@ -1047,8 +936,6 @@ mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
 		ipv4.val.proto &= ipv4.mask.proto;
 		ipv4.val.tos &= ipv4.mask.tos;
 	}
-	flow->l3_protocol_en = !!ipv4.mask.proto;
-	flow->l3_protocol = ipv4.val.proto;
 	if (size <= flow_size) {
 		mlx5_flow_verbs_hashfields_adjust
 			(flow, tunnel,
@@ -1076,74 +963,27 @@ mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
  * @param[in] flow_size
  *   Size in bytes of the available space in @p flow, if too small, nothing is
  *   written.
- * @param[out] error
- *   Pointer to error structure.
  *
  * @return
  *   On success the number of bytes consumed/necessary, if the returned value
  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
  *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
  */
 static int
 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
-		    const size_t flow_size, struct rte_flow_error *error)
+		    const size_t flow_size)
 {
 	const struct rte_flow_item_ipv6 *spec = item->spec;
 	const struct rte_flow_item_ipv6 *mask = item->mask;
-	const struct rte_flow_item_ipv6 nic_mask = {
-		.hdr = {
-			.src_addr =
-				"\xff\xff\xff\xff\xff\xff\xff\xff"
-				"\xff\xff\xff\xff\xff\xff\xff\xff",
-			.dst_addr =
-				"\xff\xff\xff\xff\xff\xff\xff\xff"
-				"\xff\xff\xff\xff\xff\xff\xff\xff",
-			.vtc_flow = RTE_BE32(0xffffffff),
-			.proto = 0xff,
-			.hop_limits = 0xff,
-		},
-	};
 	const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
 	unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
 	struct ibv_flow_spec_ipv6 ipv6 = {
 		.type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
 		.size = size,
 	};
-	int ret;
 
-	if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
-			    MLX5_FLOW_LAYER_OUTER_L3))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "multiple L3 layers not supported");
-	else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
-				 MLX5_FLOW_LAYER_OUTER_L4))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "L3 cannot follow an L4 layer.");
-	/*
-	 * IPv6 is not recognised by the NIC inside a GRE tunnel.
-	 * Such support has to be disabled as the rule will be
-	 * accepted.  Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and
-	 * Mellanox OFED 4.4-1.0.0.0.
-	 */
-	if (tunnel && flow->layers & MLX5_FLOW_LAYER_GRE)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "IPv6 inside a GRE tunnel is"
-					  " not recognised.");
 	if (!mask)
 		mask = &rte_flow_item_ipv6_mask;
-	ret = mlx5_flow_item_acceptable
-		(item, (const uint8_t *)mask,
-		 (const uint8_t *)&nic_mask,
-		 sizeof(struct rte_flow_item_ipv6), error);
-	if (ret < 0)
-		return ret;
 	flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
 		MLX5_FLOW_LAYER_OUTER_L3_IPV6;
 	if (spec) {
@@ -1185,8 +1025,6 @@ mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
 	}
-	flow->l3_protocol_en = !!ipv6.mask.next_hdr;
-	flow->l3_protocol = ipv6.val.next_hdr;
 	if (size <= flow_size) {
 		mlx5_flow_verbs_hashfields_adjust
 			(flow, tunnel,
@@ -1214,18 +1052,15 @@ mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
  * @param[in] flow_size
  *   Size in bytes of the available space in @p flow, if too small, nothing is
  *   written.
- * @param[out] error
- *   Pointer to error structure.
  *
  * @return
  *   On success the number of bytes consumed/necessary, if the returned value
  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
  *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
  */
 static int
 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
-		   const size_t flow_size, struct rte_flow_error *error)
+		   const size_t flow_size)
 {
 	const struct rte_flow_item_udp *spec = item->spec;
 	const struct rte_flow_item_udp *mask = item->mask;
@@ -1235,36 +1070,9 @@ mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
 		.type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
 		.size = size,
 	};
-	int ret;
 
-	if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "protocol filtering not compatible"
-					  " with UDP layer");
-	if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
-			      MLX5_FLOW_LAYER_OUTER_L3)))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "L3 is mandatory to filter"
-					  " on L4");
-	if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
-			    MLX5_FLOW_LAYER_OUTER_L4))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "L4 layer is already"
-					  " present");
 	if (!mask)
 		mask = &rte_flow_item_udp_mask;
-	ret = mlx5_flow_item_acceptable
-		(item, (const uint8_t *)mask,
-		 (const uint8_t *)&rte_flow_item_udp_mask,
-		 sizeof(struct rte_flow_item_udp), error);
-	if (ret < 0)
-		return ret;
 	flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
 		MLX5_FLOW_LAYER_OUTER_L4_UDP;
 	if (spec) {
@@ -1306,11 +1114,10 @@ mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
  *   On success the number of bytes consumed/necessary, if the returned value
  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
  *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
  */
 static int
 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
-		   const size_t flow_size, struct rte_flow_error *error)
+		   const size_t flow_size)
 {
 	const struct rte_flow_item_tcp *spec = item->spec;
 	const struct rte_flow_item_tcp *mask = item->mask;
@@ -1320,34 +1127,9 @@ mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
 		.type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
 		.size = size,
 	};
-	int ret;
 
-	if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "protocol filtering not compatible"
-					  " with TCP layer");
-	if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
-			      MLX5_FLOW_LAYER_OUTER_L3)))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "L3 is mandatory to filter on L4");
-	if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
-			    MLX5_FLOW_LAYER_OUTER_L4))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "L4 layer is already present");
 	if (!mask)
 		mask = &rte_flow_item_tcp_mask;
-	ret = mlx5_flow_item_acceptable
-		(item, (const uint8_t *)mask,
-		 (const uint8_t *)&rte_flow_item_tcp_mask,
-		 sizeof(struct rte_flow_item_tcp), error);
-	if (ret < 0)
-		return ret;
 	flow->layers |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
 		MLX5_FLOW_LAYER_OUTER_L4_TCP;
 	if (spec) {
@@ -1389,11 +1171,10 @@ mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
  *   On success the number of bytes consumed/necessary, if the returned value
  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
  *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
  */
 static int
 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
-		     const size_t flow_size, struct rte_flow_error *error)
+		     const size_t flow_size)
 {
 	const struct rte_flow_item_vxlan *spec = item->spec;
 	const struct rte_flow_item_vxlan *mask = item->mask;
@@ -1402,34 +1183,13 @@ mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
 		.size = size,
 	};
-	int ret;
 	union vni {
 		uint32_t vlan_id;
 		uint8_t vni[4];
 	} id = { .vlan_id = 0, };
 
-	if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "a tunnel is already present");
-	/*
-	 * Verify only UDPv4 is present as defined in
-	 * https://tools.ietf.org/html/rfc7348
-	 */
-	if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "no outer UDP layer found");
 	if (!mask)
 		mask = &rte_flow_item_vxlan_mask;
-	ret = mlx5_flow_item_acceptable
-		(item, (const uint8_t *)mask,
-		 (const uint8_t *)&rte_flow_item_vxlan_mask,
-		 sizeof(struct rte_flow_item_vxlan), error);
-	if (ret < 0)
-		return ret;
 	if (spec) {
 		memcpy(&id.vni[1], spec->vni, 3);
 		vxlan.val.tunnel_id = id.vlan_id;
@@ -1438,25 +1198,6 @@ mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
 		/* Remove unwanted bits from values. */
 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
 	}
-	/*
-	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
-	 * only this layer is defined in the Verbs specification it is
-	 * interpreted as wildcard and all packets will match this
-	 * rule, if it follows a full stack layer (ex: eth / ipv4 /
-	 * udp), all packets matching the layers before will also
-	 * match this rule.  To avoid such situation, VNI 0 is
-	 * currently refused.
-	 */
-	if (!vxlan.val.tunnel_id)
-		return rte_flow_error_set(error, EINVAL,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "VXLAN vni cannot be 0");
-	if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
-		return rte_flow_error_set(error, EINVAL,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "VXLAN tunnel must be fully defined");
 	if (size <= flow_size) {
 		mlx5_flow_spec_verbs_add(flow, &vxlan, size);
 		flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
@@ -1471,8 +1212,6 @@ mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
  * If the necessary size for the conversion is greater than the @p flow_size,
  * nothing is written in @p flow, the validation is still performed.
  *
- * @param dev
- *   Pointer to Ethernet device.
  * @param[in] item
  *   Item specification.
  * @param[in, out] flow
@@ -1487,13 +1226,10 @@ mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
  *   On success the number of bytes consumed/necessary, if the returned value
  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
  *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
  */
 static int
-mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev,
-			 const struct rte_flow_item *item,
-			 struct rte_flow *flow, const size_t flow_size,
-			 struct rte_flow_error *error)
+mlx5_flow_item_vxlan_gpe(const struct rte_flow_item *item,
+			 struct rte_flow *flow, const size_t flow_size)
 {
 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
@@ -1502,74 +1238,21 @@ mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev,
 		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
 		.size = size,
 	};
-	int ret;
 	union vni {
 		uint32_t vlan_id;
 		uint8_t vni[4];
 	} id = { .vlan_id = 0, };
 
-	if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "L3 VXLAN is not enabled by device"
-					  " parameter and/or not configured in"
-					  " firmware");
-	if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "a tunnel is already present");
-	/*
-	 * Verify only UDPv4 is present as defined in
-	 * https://tools.ietf.org/html/rfc7348
-	 */
-	if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "no outer UDP layer found");
 	if (!mask)
 		mask = &rte_flow_item_vxlan_gpe_mask;
-	ret = mlx5_flow_item_acceptable
-		(item, (const uint8_t *)mask,
-		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
-		 sizeof(struct rte_flow_item_vxlan_gpe), error);
-	if (ret < 0)
-		return ret;
 	if (spec) {
 		memcpy(&id.vni[1], spec->vni, 3);
 		vxlan_gpe.val.tunnel_id = id.vlan_id;
 		memcpy(&id.vni[1], mask->vni, 3);
 		vxlan_gpe.mask.tunnel_id = id.vlan_id;
-		if (spec->protocol)
-			return rte_flow_error_set
-				(error, EINVAL,
-				 RTE_FLOW_ERROR_TYPE_ITEM,
-				 item,
-				 "VxLAN-GPE protocol not supported");
 		/* Remove unwanted bits from values. */
 		vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
 	}
-	/*
-	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
-	 * layer is defined in the Verbs specification it is interpreted as
-	 * wildcard and all packets will match this rule, if it follows a full
-	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
-	 * before will also match this rule.  To avoid such situation, VNI 0
-	 * is currently refused.
-	 */
-	if (!vxlan_gpe.val.tunnel_id)
-		return rte_flow_error_set(error, EINVAL,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "VXLAN-GPE vni cannot be 0");
-	if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
-		return rte_flow_error_set(error, EINVAL,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "VXLAN-GPE tunnel must be fully"
-					  " defined");
 	if (size <= flow_size) {
 		mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size);
 		flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
@@ -1647,24 +1330,20 @@ mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
  * @param[in] flow_size
  *   Size in bytes of the available space in @p flow, if too small, nothing is
  *   written.
- * @param[out] error
- *   Pointer to error structure.
  *
  * @return
  *   On success the number of bytes consumed/necessary, if the returned value
  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
  *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
  */
 static int
-mlx5_flow_item_gre(const struct rte_flow_item *item,
-		   struct rte_flow *flow, const size_t flow_size,
-		   struct rte_flow_error *error)
+mlx5_flow_item_gre(const struct rte_flow_item *item __rte_unused,
+		   struct rte_flow *flow, const size_t flow_size)
 {
 	struct mlx5_flow_verbs *verbs = flow->cur_verbs;
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
 	const struct rte_flow_item_gre *spec = item->spec;
 	const struct rte_flow_item_gre *mask = item->mask;
-#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
 	unsigned int size = sizeof(struct ibv_flow_spec_gre);
 	struct ibv_flow_spec_gre tunnel = {
 		.type = IBV_FLOW_SPEC_GRE,
@@ -1677,33 +1356,10 @@ mlx5_flow_item_gre(const struct rte_flow_item *item,
 		.size = size,
 	};
 #endif
-	int ret;
 
-	if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_GRE)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "protocol filtering not compatible"
-					  " with this GRE layer");
-	if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "a tunnel is already present");
-	if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "L3 Layer is missing");
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
 	if (!mask)
 		mask = &rte_flow_item_gre_mask;
-	ret = mlx5_flow_item_acceptable
-		(item, (const uint8_t *)mask,
-		 (const uint8_t *)&rte_flow_item_gre_mask,
-		 sizeof(struct rte_flow_item_gre), error);
-	if (ret < 0)
-		return ret;
-#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
 	if (spec) {
 		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
 		tunnel.val.protocol = spec->protocol;
@@ -1715,13 +1371,6 @@ mlx5_flow_item_gre(const struct rte_flow_item *item,
 		tunnel.val.key &= tunnel.mask.key;
 	}
 #else
-	if (spec && (spec->protocol & mask->protocol))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "without MPLS support the"
-					  " specification cannot be used for"
-					  " filtering");
 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
 	if (size <= flow_size) {
 		if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
@@ -1775,30 +1424,9 @@ mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused,
 		.type = IBV_FLOW_SPEC_MPLS,
 		.size = size,
 	};
-	int ret;
 
-	if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_MPLS)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "protocol filtering not compatible"
-					  " with MPLS layer");
-	/* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
-	if (flow->layers & MLX5_FLOW_LAYER_TUNNEL &&
-	    (flow->layers & MLX5_FLOW_LAYER_GRE) != MLX5_FLOW_LAYER_GRE)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM,
-					  item,
-					  "a tunnel is already"
-					  " present");
 	if (!mask)
 		mask = &rte_flow_item_mpls_mask;
-	ret = mlx5_flow_item_acceptable
-		(item, (const uint8_t *)mask,
-		 (const uint8_t *)&rte_flow_item_mpls_mask,
-		 sizeof(struct rte_flow_item_mpls), error);
-	if (ret < 0)
-		return ret;
 	if (spec) {
 		memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
 		memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
@@ -1845,8 +1473,7 @@ mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused,
  *   On error, a negative errno value is returned and rte_errno is set.
  */
 static int
-mlx5_flow_items(struct rte_eth_dev *dev,
-		const struct rte_flow_item pattern[],
+mlx5_flow_items(const struct rte_flow_item pattern[],
 		struct rte_flow *flow, const size_t flow_size,
 		struct rte_flow_error *error)
 {
@@ -1860,33 +1487,32 @@ mlx5_flow_items(struct rte_eth_dev *dev,
 		case RTE_FLOW_ITEM_TYPE_VOID:
 			break;
 		case RTE_FLOW_ITEM_TYPE_ETH:
-			ret = mlx5_flow_item_eth(pattern, flow, remain, error);
+			ret = mlx5_flow_item_eth(pattern, flow, remain);
 			break;
 		case RTE_FLOW_ITEM_TYPE_VLAN:
-			ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
+			ret = mlx5_flow_item_vlan(pattern, flow, remain);
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV4:
-			ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
+			ret = mlx5_flow_item_ipv4(pattern, flow, remain);
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV6:
-			ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
+			ret = mlx5_flow_item_ipv6(pattern, flow, remain);
 			break;
 		case RTE_FLOW_ITEM_TYPE_UDP:
-			ret = mlx5_flow_item_udp(pattern, flow, remain, error);
+			ret = mlx5_flow_item_udp(pattern, flow, remain);
 			break;
 		case RTE_FLOW_ITEM_TYPE_TCP:
-			ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
+			ret = mlx5_flow_item_tcp(pattern, flow, remain);
 			break;
 		case RTE_FLOW_ITEM_TYPE_VXLAN:
-			ret = mlx5_flow_item_vxlan(pattern, flow, remain,
-						   error);
+			ret = mlx5_flow_item_vxlan(pattern, flow, remain);
 			break;
 		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
-			ret = mlx5_flow_item_vxlan_gpe(dev, pattern, flow,
-						       remain, error);
+			ret = mlx5_flow_item_vxlan_gpe(pattern, flow,
+						       remain);
 			break;
 		case RTE_FLOW_ITEM_TYPE_GRE:
-			ret = mlx5_flow_item_gre(pattern, flow, remain, error);
+			ret = mlx5_flow_item_gre(pattern, flow, remain);
 			break;
 		case RTE_FLOW_ITEM_TYPE_MPLS:
 			ret = mlx5_flow_item_mpls(pattern, flow, remain, error);
@@ -1910,7 +1536,7 @@ mlx5_flow_items(struct rte_eth_dev *dev,
 			.type = RTE_FLOW_ITEM_TYPE_ETH,
 		};
 
-		return mlx5_flow_item_eth(&item, flow, flow_size, error);
+		return mlx5_flow_item_eth(&item, flow, flow_size);
 	}
 	return size;
 }
@@ -1921,15 +1547,11 @@ mlx5_flow_items(struct rte_eth_dev *dev,
  * If the necessary size for the conversion is greater than the @p flow_size,
  * nothing is written in @p flow, the validation is still performed.
  *
- * @param[in] action
- *   Action configuration.
  * @param[in, out] flow
  *   Pointer to flow structure.
  * @param[in] flow_size
  *   Size in bytes of the available space in @p flow, if too small, nothing is
  *   written.
- * @param[out] error
- *   Pointer to error structure.
  *
  * @return
  *   On success the number of bytes consumed/necessary, if the returned value
@@ -1939,9 +1561,7 @@ mlx5_flow_items(struct rte_eth_dev *dev,
  *   On error, a negative errno value is returned and rte_errno is set.
  */
 static int
-mlx5_flow_action_drop(const struct rte_flow_action *action,
-		      struct rte_flow *flow, const size_t flow_size,
-		      struct rte_flow_error *error)
+mlx5_flow_action_drop(struct rte_flow *flow, const size_t flow_size)
 {
 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
 	struct ibv_flow_spec_action_drop drop = {
@@ -1949,18 +1569,6 @@ mlx5_flow_action_drop(const struct rte_flow_action *action,
 			.size = size,
 	};
 
-	if (flow->fate)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION,
-					  action,
-					  "multiple fate actions are not"
-					  " supported");
-	if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION,
-					  action,
-					  "drop is not compatible with"
-					  " flag/mark action");
 	if (size < flow_size)
 		mlx5_flow_spec_verbs_add(flow, &drop, size);
 	flow->fate |= MLX5_FLOW_FATE_DROP;
@@ -1971,43 +1579,20 @@ mlx5_flow_action_drop(const struct rte_flow_action *action,
  * Convert the @p action into @p flow after ensuring the NIC will understand
  * and process it correctly.
  *
- * @param[in] dev
- *   Pointer to Ethernet device structure.
  * @param[in] action
  *   Action configuration.
  * @param[in, out] flow
  *   Pointer to flow structure.
- * @param[out] error
- *   Pointer to error structure.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_flow_action_queue(struct rte_eth_dev *dev,
-		       const struct rte_flow_action *action,
-		       struct rte_flow *flow,
-		       struct rte_flow_error *error)
+mlx5_flow_action_queue(const struct rte_flow_action *action,
+		       struct rte_flow *flow)
 {
-	struct priv *priv = dev->data->dev_private;
 	const struct rte_flow_action_queue *queue = action->conf;
 
-	if (flow->fate)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION,
-					  action,
-					  "multiple fate actions are not"
-					  " supported");
-	if (queue->index >= priv->rxqs_n)
-		return rte_flow_error_set(error, EINVAL,
-					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-					  &queue->index,
-					  "queue index out of range");
-	if (!(*priv->rxqs)[queue->index])
-		return rte_flow_error_set(error, EINVAL,
-					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-					  &queue->index,
-					  "queue is not configured");
 	if (flow->queue)
 		(*flow->queue)[0] = queue->index;
 	flow->rss.queue_num = 1;
@@ -2018,90 +1603,20 @@ mlx5_flow_action_queue(struct rte_eth_dev *dev,
 /**
  * Ensure the @p action will be understood and used correctly by the  NIC.
  *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param action[in]
- *   Pointer to flow actions array.
+ * @param[in] action
+ *   Action configuration.
  * @param flow[in, out]
  *   Pointer to the rte_flow structure.
- * @param error[in, out]
- *   Pointer to error structure.
  *
  * @return
- *   On success @p flow->queue array and @p flow->rss are filled and valid.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 On success.
  */
 static int
-mlx5_flow_action_rss(struct rte_eth_dev *dev,
-		     const struct rte_flow_action *action,
-		     struct rte_flow *flow,
-		     struct rte_flow_error *error)
+mlx5_flow_action_rss(const struct rte_flow_action *action,
+			struct rte_flow *flow)
 {
-	struct priv *priv = dev->data->dev_private;
 	const struct rte_flow_action_rss *rss = action->conf;
-	unsigned int i;
 
-	if (flow->fate)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION,
-					  action,
-					  "multiple fate actions are not"
-					  " supported");
-	if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
-	    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-					  &rss->func,
-					  "RSS hash function not supported");
-#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
-	if (rss->level > 2)
-#else
-	if (rss->level > 1)
-#endif
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-					  &rss->level,
-					  "tunnel RSS is not supported");
-	if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-					  &rss->key_len,
-					  "RSS hash key too small");
-	if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-					  &rss->key_len,
-					  "RSS hash key too large");
-	if (!rss->queue_num)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-					  rss,
-					  "no queues were provided for RSS");
-	if (rss->queue_num > priv->config.ind_table_max_size)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-					  &rss->queue_num,
-					  "number of queues too large");
-	if (rss->types & MLX5_RSS_HF_MASK)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-					  &rss->types,
-					  "some RSS protocols are not"
-					  " supported");
-	for (i = 0; i != rss->queue_num; ++i) {
-		if (rss->queue[i] >= priv->rxqs_n)
-			return rte_flow_error_set
-				(error, EINVAL,
-				 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-				 rss,
-				 "queue index out of range");
-		if (!(*priv->rxqs)[rss->queue[i]])
-			return rte_flow_error_set
-				(error, EINVAL,
-				 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-				 &rss->queue[i],
-				 "queue is not configured");
-	}
 	if (flow->queue)
 		memcpy((*flow->queue), rss->queue,
 		       rss->queue_num * sizeof(uint16_t));
@@ -2119,27 +1634,20 @@ mlx5_flow_action_rss(struct rte_eth_dev *dev,
  * If the necessary size for the conversion is greater than the @p flow_size,
  * nothing is written in @p flow, the validation is still performed.
  *
- * @param[in] action
- *   Action configuration.
  * @param[in, out] flow
  *   Pointer to flow structure.
  * @param[in] flow_size
  *   Size in bytes of the available space in @p flow, if too small, nothing is
  *   written.
- * @param[out] error
- *   Pointer to error structure.
  *
  * @return
  *   On success the number of bytes consumed/necessary, if the returned value
  *   is lesser or equal to @p flow_size, the @p action has fully been
  *   converted, otherwise another call with this returned memory size should
  *   be done.
- *   On error, a negative errno value is returned and rte_errno is set.
  */
 static int
-mlx5_flow_action_flag(const struct rte_flow_action *action,
-		      struct rte_flow *flow, const size_t flow_size,
-		      struct rte_flow_error *error)
+mlx5_flow_action_flag(struct rte_flow *flow, const size_t flow_size)
 {
 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
 	struct ibv_flow_spec_action_tag tag = {
@@ -2149,17 +1657,6 @@ mlx5_flow_action_flag(const struct rte_flow_action *action,
 	};
 	struct mlx5_flow_verbs *verbs = flow->cur_verbs;
 
-	if (flow->modifier & MLX5_FLOW_MOD_FLAG)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION,
-					  action,
-					  "flag action already present");
-	if (flow->fate & MLX5_FLOW_FATE_DROP)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION,
-					  action,
-					  "flag is not compatible with drop"
-					  " action");
 	if (flow->modifier & MLX5_FLOW_MOD_MARK)
 		size = 0;
 	else if (size <= flow_size && verbs)
@@ -2213,20 +1710,16 @@ mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
  * @param[in] flow_size
  *   Size in bytes of the available space in @p flow, if too small, nothing is
  *   written.
- * @param[out] error
- *   Pointer to error structure.
  *
  * @return
  *   On success the number of bytes consumed/necessary, if the returned value
  *   is lesser or equal to @p flow_size, the @p action has fully been
  *   converted, otherwise another call with this returned memory size should
  *   be done.
- *   On error, a negative errno value is returned and rte_errno is set.
  */
 static int
 mlx5_flow_action_mark(const struct rte_flow_action *action,
-		      struct rte_flow *flow, const size_t flow_size,
-		      struct rte_flow_error *error)
+		      struct rte_flow *flow, const size_t flow_size)
 {
 	const struct rte_flow_action_mark *mark = action->conf;
 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
@@ -2236,28 +1729,6 @@ mlx5_flow_action_mark(const struct rte_flow_action *action,
 	};
 	struct mlx5_flow_verbs *verbs = flow->cur_verbs;
 
-	if (!mark)
-		return rte_flow_error_set(error, EINVAL,
-					  RTE_FLOW_ERROR_TYPE_ACTION,
-					  action,
-					  "configuration cannot be null");
-	if (mark->id >= MLX5_FLOW_MARK_MAX)
-		return rte_flow_error_set(error, EINVAL,
-					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-					  &mark->id,
-					  "mark id must in 0 <= id < "
-					  RTE_STR(MLX5_FLOW_MARK_MAX));
-	if (flow->modifier & MLX5_FLOW_MOD_MARK)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION,
-					  action,
-					  "mark action already present");
-	if (flow->fate & MLX5_FLOW_FATE_DROP)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION,
-					  action,
-					  "mark is not compatible with drop"
-					  " action");
 	if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
 		mlx5_flow_verbs_mark_update(verbs, mark->id);
 		size = 0;
@@ -2318,11 +1789,6 @@ mlx5_flow_action_count(struct rte_eth_dev *dev,
 						  "cannot get counter"
 						  " context.");
 	}
-	if (!((struct priv *)dev->data->dev_private)->config.flow_counter_en)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ACTION,
-					  action,
-					  "flow counters are not supported.");
 	flow->modifier |= MLX5_FLOW_MOD_COUNT;
 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
 	counter.counter_set_handle = flow->counter->cs->handle;
@@ -2375,22 +1841,19 @@ mlx5_flow_actions(struct rte_eth_dev *dev,
 		case RTE_FLOW_ACTION_TYPE_VOID:
 			break;
 		case RTE_FLOW_ACTION_TYPE_FLAG:
-			ret = mlx5_flow_action_flag(actions, flow, remain,
-						    error);
+			ret = mlx5_flow_action_flag(flow, remain);
 			break;
 		case RTE_FLOW_ACTION_TYPE_MARK:
-			ret = mlx5_flow_action_mark(actions, flow, remain,
-						    error);
+			ret = mlx5_flow_action_mark(actions, flow, remain);
 			break;
 		case RTE_FLOW_ACTION_TYPE_DROP:
-			ret = mlx5_flow_action_drop(actions, flow, remain,
-						    error);
+			ret = mlx5_flow_action_drop(flow, remain);
 			break;
 		case RTE_FLOW_ACTION_TYPE_QUEUE:
-			ret = mlx5_flow_action_queue(dev, actions, flow, error);
+			ret = mlx5_flow_action_queue(actions, flow);
 			break;
 		case RTE_FLOW_ACTION_TYPE_RSS:
-			ret = mlx5_flow_action_rss(dev, actions, flow, error);
+			ret = mlx5_flow_action_rss(actions, flow);
 			break;
 		case RTE_FLOW_ACTION_TYPE_COUNT:
 			ret = mlx5_flow_action_count(dev, actions, flow, remain,
@@ -2585,7 +2048,7 @@ mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
 	size_t original_verbs_size = 0;
 	uint32_t original_layers = 0;
 	int expanded_pattern_idx = 0;
-	int ret;
+	int ret = 0;
 	uint32_t i;
 
 	if (attributes->transfer)
@@ -2594,7 +2057,7 @@ mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
 					      actions, error);
 	if (size > flow_size)
 		flow = &local_flow;
-	ret = mlx5_flow_attributes(dev, attributes, flow, error);
+	ret = mlx5_flow_attributes(dev->data->dev_private, attributes, flow);
 	if (ret < 0)
 		return ret;
 	ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
@@ -2666,8 +2129,7 @@ mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
 			}
 		}
 		ret = mlx5_flow_items
-			(dev,
-			 (const struct rte_flow_item *)
+			((const struct rte_flow_item *)
 			 &buf->entry[i].pattern[expanded_pattern_idx],
 			 flow,
 			 (size < flow_size) ? flow_size - size : 0, error);
@@ -2851,99 +2313,1209 @@ mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
 	}
 }
 
-/**
- * Validate a flow supported by the NIC.
+/*
+ * Validate the flag action.
  *
- * @see rte_flow_validate()
- * @see rte_flow_ops
+ * @param[in] action_flags
+ *   Bit-fields that holds the actions detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
  */
-int
-mlx5_flow_validate(struct rte_eth_dev *dev,
-		   const struct rte_flow_attr *attr,
-		   const struct rte_flow_item items[],
-		   const struct rte_flow_action actions[],
-		   struct rte_flow_error *error)
+static int
+mlx5_flow_validate_action_flag(uint64_t action_flags,
+			       struct rte_flow_error *error)
 {
-	int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
 
-	if (ret < 0)
-		return ret;
+	if (action_flags & MLX5_ACTION_DROP)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't drop and flag in same flow");
+	if (action_flags & MLX5_ACTION_MARK)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't mark and flag in same flow");
+	if (action_flags & MLX5_ACTION_FLAG)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't have 2 flag"
+					  " actions in same flow");
 	return 0;
 }
 
-/**
- * Remove the flow.
+/*
+ * Validate the mark action.
  *
- * @param[in] dev
- *   Pointer to Ethernet device.
- * @param[in, out] flow
- *   Pointer to flow structure.
+ * @param[in] action
+ *   Pointer to the queue action.
+ * @param[in] action_flags
+ *   Bit-fields that holds the actions detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static void
-mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+static int
+mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
+			       uint64_t action_flags,
+			       struct rte_flow_error *error)
 {
-	struct priv *priv = dev->data->dev_private;
-	struct mlx5_flow_verbs *verbs;
+	const struct rte_flow_action_mark *mark = action->conf;
 
-	if (flow->nl_flow && priv->mnl_socket)
-		mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL);
-	LIST_FOREACH(verbs, &flow->verbs, next) {
-		if (verbs->flow) {
-			claim_zero(mlx5_glue->destroy_flow(verbs->flow));
-			verbs->flow = NULL;
-		}
-		if (verbs->hrxq) {
-			if (flow->fate & MLX5_FLOW_FATE_DROP)
-				mlx5_hrxq_drop_release(dev);
-			else
-				mlx5_hrxq_release(dev, verbs->hrxq);
-			verbs->hrxq = NULL;
-		}
-	}
-	if (flow->counter) {
-		mlx5_flow_counter_release(flow->counter);
-		flow->counter = NULL;
-	}
+	if (!mark)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION,
+					  action,
+					  "configuration cannot be null");
+	if (mark->id >= MLX5_FLOW_MARK_MAX)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+					  &mark->id,
+					  "mark id must in 0 <= id < "
+					  RTE_STR(MLX5_FLOW_MARK_MAX));
+	if (action_flags & MLX5_ACTION_DROP)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't drop and mark in same flow");
+	if (action_flags & MLX5_ACTION_FLAG)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't flag and mark in same flow");
+	if (action_flags & MLX5_ACTION_MARK)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't have 2 flag actions in same"
+					  " flow");
+	return 0;
 }
 
-/**
- * Apply the flow.
+/*
+ * Validate the drop action.
+ *
+ * @param[in] action_flags
+ *   Bit-fields that holds the actions detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+mlx5_flow_validate_action_drop(uint64_t action_flags,
+			       struct rte_flow_error *error)
+{
+	if (action_flags & MLX5_ACTION_FLAG)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't drop and flag in same flow");
+	if (action_flags & MLX5_ACTION_MARK)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't drop and mark in same flow");
+	if (action_flags &
+		(MLX5_ACTION_DROP | MLX5_ACTION_QUEUE | MLX5_ACTION_RSS))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't have 2 fate actions in"
+					  " same flow");
+	return 0;
+}
+
+/*
  *
+ * Validate the queue action.
+ *
+ * @param[in] action
+ *   Pointer to the queue action.
+ * @param[in] action_flags
+ *   Bit-fields that holds the actions detected until now.
  * @param[in] dev
- *   Pointer to Ethernet device structure.
- * @param[in, out] flow
- *   Pointer to flow structure.
+ *   Pointer to the Ethernet device structure.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
  */
 static int
-mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
-		struct rte_flow_error *error)
+mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
+				uint64_t action_flags,
+				struct rte_eth_dev *dev,
+				struct rte_flow_error *error)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct mlx5_flow_verbs *verbs;
-	int err;
-
-	LIST_FOREACH(verbs, &flow->verbs, next) {
-		if (flow->fate & MLX5_FLOW_FATE_DROP) {
-			verbs->hrxq = mlx5_hrxq_drop_new(dev);
-			if (!verbs->hrxq) {
-				rte_flow_error_set
-					(error, errno,
-					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-					 NULL,
-					 "cannot get drop hash queue");
-				goto error;
-			}
-		} else {
-			struct mlx5_hrxq *hrxq;
+	const struct rte_flow_action_queue *queue = action->conf;
 
-			hrxq = mlx5_hrxq_get(dev, flow->key,
-					     MLX5_RSS_HASH_KEY_LEN,
-					     verbs->hash_fields,
+	if (action_flags &
+	    (MLX5_ACTION_DROP | MLX5_ACTION_QUEUE | MLX5_ACTION_RSS))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't have 2 fate actions in"
+					  " same flow");
+	if (queue->index >= priv->rxqs_n)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+					  &queue->index,
+					  "queue index out of range");
+	if (!(*priv->rxqs)[queue->index])
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+					  &queue->index,
+					  "queue is not configured");
+	return 0;
+}
+
+/*
+ *
+ * Validate the rss action.
+ *
+ * @param[in] action
+ *   Pointer to the queue action.
+ * @param[in] action_flags
+ *   Bit-fields that holds the actions detected until now.
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
+			      uint64_t action_flags,
+			      struct rte_eth_dev *dev,
+			      struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	const struct rte_flow_action_rss *rss = action->conf;
+	unsigned int i;
+
+	if (action_flags &
+	    (MLX5_ACTION_DROP | MLX5_ACTION_QUEUE | MLX5_ACTION_RSS))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't have 2 fate actions"
+					  " in same flow");
+	if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
+	    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+					  &rss->func,
+					  "RSS hash function not supported");
+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+	if (rss->level > 2)
+#else
+	if (rss->level > 1)
+#endif
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+					  &rss->level,
+					  "tunnel RSS is not supported");
+	if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+					  &rss->key_len,
+					  "RSS hash key too small");
+	if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+					  &rss->key_len,
+					  "RSS hash key too large");
+	if (rss->queue_num > priv->config.ind_table_max_size)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+					  &rss->queue_num,
+					  "number of queues too large");
+	if (rss->types & MLX5_RSS_HF_MASK)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+					  &rss->types,
+					  "some RSS protocols are not"
+					  " supported");
+	for (i = 0; i != rss->queue_num; ++i) {
+		if (!(*priv->rxqs)[rss->queue[i]])
+			return rte_flow_error_set
+				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+				 &rss->queue[i], "queue is not configured");
+	}
+	return 0;
+}
+
+/*
+ * Validate the count action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+mlx5_flow_validate_action_count(struct rte_eth_dev *dev,
+				struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	if (!priv->config.flow_counter_en)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "flow counters are not supported.");
+	return 0;
+}
+
+/**
+ * Verify the @p attributes will be correctly understood by the NIC and store
+ * them in the @p flow if everything is correct.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] attributes
+ *   Pointer to flow attributes
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
+			      const struct rte_flow_attr *attributes,
+			      struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	uint32_t priority_max = priv->config.flow_prio - 1;
+
+	if (attributes->group)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+					  NULL, "groups is not supported");
+	if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
+	    attributes->priority >= priority_max)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+					  NULL, "priority out of range");
+	if (attributes->egress)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+					  "egress is not supported");
+	if (attributes->transfer)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
+					  NULL, "transfer is not supported");
+	if (!attributes->ingress)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+					  NULL,
+					  "ingress attribute is mandatory");
+	return 0;
+}
+
+/**
+ * Validate Ethernet item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
+			    uint64_t item_flags,
+			    struct rte_flow_error *error)
+{
+	const struct rte_flow_item_eth *mask = item->mask;
+	const struct rte_flow_item_eth nic_mask = {
+		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		.type = RTE_BE16(0xffff),
+	};
+	int ret;
+	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+
+	if (item_flags & MLX5_FLOW_LAYER_OUTER_L2)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "3 levels of l2 are not supported");
+	if ((item_flags & MLX5_FLOW_LAYER_INNER_L2) && !tunnel)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "2 L2 without tunnel are not supported");
+	if (!mask)
+		mask = &rte_flow_item_eth_mask;
+	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+					(const uint8_t *)&nic_mask,
+					sizeof(struct rte_flow_item_eth),
+					error);
+	return ret;
+}
+
+/**
+ * Validate VLAN item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
+			     int64_t item_flags,
+			     struct rte_flow_error *error)
+{
+	const struct rte_flow_item_vlan *spec = item->spec;
+	const struct rte_flow_item_vlan *mask = item->mask;
+	const struct rte_flow_item_vlan nic_mask = {
+		.tci = RTE_BE16(0x0fff),
+		.inner_type = RTE_BE16(0xffff),
+	};
+	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+	int ret;
+	const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
+					MLX5_FLOW_LAYER_INNER_L4) :
+				       (MLX5_FLOW_LAYER_OUTER_L3 |
+					MLX5_FLOW_LAYER_OUTER_L4);
+	const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
+					MLX5_FLOW_LAYER_OUTER_VLAN;
+
+	if (item_flags & vlanm)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "VLAN layer already configured");
+	else if ((item_flags & l34m) != 0)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "L2 layer cannot follow L3/L4 layer");
+	if (!mask)
+		mask = &rte_flow_item_vlan_mask;
+	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+					(const uint8_t *)&nic_mask,
+					sizeof(struct rte_flow_item_vlan),
+					error);
+	if (ret)
+		return ret;
+	/*
+	 * From verbs perspective an empty VLAN is equivalent
+	 * to a packet without VLAN layer.
+	 */
+	if (!spec->tci)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+					  item->spec,
+					  "VLAN cannot be empty");
+	return 0;
+}
+
+/**
+ * Validate IPV4 item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
+			     int64_t item_flags,
+			     struct rte_flow_error *error)
+{
+	const struct rte_flow_item_ipv4 *mask = item->mask;
+	const struct rte_flow_item_ipv4 nic_mask = {
+		.hdr = {
+			.src_addr = RTE_BE32(0xffffffff),
+			.dst_addr = RTE_BE32(0xffffffff),
+			.type_of_service = 0xff,
+			.next_proto_id = 0xff,
+		},
+	};
+	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+	int ret;
+
+	if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
+				   MLX5_FLOW_LAYER_OUTER_L3))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "multiple L3 layers not supported");
+	else if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+					MLX5_FLOW_LAYER_OUTER_L4))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "L3 cannot follow an L4 layer.");
+	if (!mask)
+		mask = &rte_flow_item_ipv4_mask;
+	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+					(const uint8_t *)&nic_mask,
+					sizeof(struct rte_flow_item_ipv4),
+					error);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+/**
+ * Validate IPV6 item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
+			     uint64_t item_flags,
+			     struct rte_flow_error *error)
+{
+	const struct rte_flow_item_ipv6 *mask = item->mask;
+	const struct rte_flow_item_ipv6 nic_mask = {
+		.hdr = {
+			.src_addr =
+				"\xff\xff\xff\xff\xff\xff\xff\xff"
+				"\xff\xff\xff\xff\xff\xff\xff\xff",
+			.dst_addr =
+				"\xff\xff\xff\xff\xff\xff\xff\xff"
+				"\xff\xff\xff\xff\xff\xff\xff\xff",
+			.vtc_flow = RTE_BE32(0xffffffff),
+			.proto = 0xff,
+			.hop_limits = 0xff,
+		},
+	};
+	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+	int ret;
+
+	if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
+				   MLX5_FLOW_LAYER_OUTER_L3))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "multiple L3 layers not supported");
+	else if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+					MLX5_FLOW_LAYER_OUTER_L4))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "L3 cannot follow an L4 layer.");
+	/*
+	 * IPv6 is not recognised by the NIC inside a GRE tunnel.
+	 * Such support has to be disabled as the rule will be
+	 * accepted.  Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and
+	 * Mellanox OFED 4.4-1.0.0.0.
+	 */
+	if (tunnel && item_flags & MLX5_FLOW_LAYER_GRE)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "IPv6 inside a GRE tunnel is"
+					  " not recognised.");
+	if (!mask)
+		mask = &rte_flow_item_ipv6_mask;
+	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+					(const uint8_t *)&nic_mask,
+					sizeof(struct rte_flow_item_ipv6),
+					error);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+/**
+ * Validate UDP item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[in] target_protocol
+ *   The next protocol in the previous item.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
+			    uint64_t item_flags,
+			    uint8_t target_protocol,
+			    struct rte_flow_error *error)
+{
+	const struct rte_flow_item_udp *mask = item->mask;
+	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+	int ret;
+
+	if (target_protocol != 0xff && target_protocol != MLX5_IP_PROTOCOL_UDP)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "protocol filtering not compatible"
+					  " with UDP layer");
+	if (!(item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
+				     MLX5_FLOW_LAYER_OUTER_L3)))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "L3 is mandatory to filter on L4");
+	if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+				   MLX5_FLOW_LAYER_OUTER_L4))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "L4 layer is already present");
+	if (!mask)
+		mask = &rte_flow_item_udp_mask;
+	ret = mlx5_flow_item_acceptable
+		(item, (const uint8_t *)mask,
+		 (const uint8_t *)&rte_flow_item_udp_mask,
+		 sizeof(struct rte_flow_item_udp), error);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+/**
+ * Validate TCP item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[in] target_protocol
+ *   The next protocol in the previous item.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
+			    uint64_t item_flags,
+			    uint8_t target_protocol,
+			    struct rte_flow_error *error)
+{
+	const struct rte_flow_item_tcp *mask = item->mask;
+	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+	int ret;
+
+	if (target_protocol != 0xff && target_protocol != MLX5_IP_PROTOCOL_TCP)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "protocol filtering not compatible"
+					  " with TCP layer");
+	if (!(item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
+				     MLX5_FLOW_LAYER_OUTER_L3)))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "L3 is mandatory to filter on L4");
+	if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+				   MLX5_FLOW_LAYER_OUTER_L4))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "L4 layer is already present");
+	if (!mask)
+		mask = &rte_flow_item_tcp_mask;
+	ret = mlx5_flow_item_acceptable
+		(item, (const uint8_t *)mask,
+		 (const uint8_t *)&rte_flow_item_tcp_mask,
+		 sizeof(struct rte_flow_item_tcp), error);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+/**
+ * Validate VXLAN item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[in] target_protocol
+ *   The next protocol in the previous item.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
+			      uint64_t item_flags,
+			      struct rte_flow_error *error)
+{
+	const struct rte_flow_item_vxlan *spec = item->spec;
+	const struct rte_flow_item_vxlan *mask = item->mask;
+	int ret;
+	union vni {
+		uint32_t vlan_id;
+		uint8_t vni[4];
+	} id = { .vlan_id = 0, };
+	uint32_t vlan_id = 0;
+
+
+	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "a tunnel is already present");
+	/*
+	 * Verify only UDPv4 is present as defined in
+	 * https://tools.ietf.org/html/rfc7348
+	 */
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "no outer UDP layer found");
+	if (!mask)
+		mask = &rte_flow_item_vxlan_mask;
+	ret = mlx5_flow_item_acceptable
+		(item, (const uint8_t *)mask,
+		 (const uint8_t *)&rte_flow_item_vxlan_mask,
+		 sizeof(struct rte_flow_item_vxlan),
+		 error);
+	if (ret < 0)
+		return ret;
+	if (spec) {
+		memcpy(&id.vni[1], spec->vni, 3);
+		vlan_id = id.vlan_id;
+		memcpy(&id.vni[1], mask->vni, 3);
+		vlan_id &= id.vlan_id;
+	}
+	/*
+	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
+	 * only this layer is defined in the Verbs specification it is
+	 * interpreted as wildcard and all packets will match this
+	 * rule, if it follows a full stack layer (ex: eth / ipv4 /
+	 * udp), all packets matching the layers before will also
+	 * match this rule.  To avoid such situation, VNI 0 is
+	 * currently refused.
+	 */
+	if (!vlan_id)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "VXLAN vni cannot be 0");
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "VXLAN tunnel must be fully defined");
+	return 0;
+}
+
+/**
+ * Validate VXLAN_GPE item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[in] priv
+ *   Pointer to the private data structure.
+ * @param[in] target_protocol
+ *   The next protocol in the previous item.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
+				  uint64_t item_flags,
+				  struct rte_eth_dev *dev,
+				  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
+	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
+	int ret;
+	union vni {
+		uint32_t vlan_id;
+		uint8_t vni[4];
+	} id = { .vlan_id = 0, };
+	uint32_t vlan_id = 0;
+
+	if (!priv->config.l3_vxlan_en)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "L3 VXLAN is not enabled by device"
+					  " parameter and/or not configured in"
+					  " firmware");
+	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "a tunnel is already present");
+	/*
+	 * Verify only UDPv4 is present as defined in
+	 * https://tools.ietf.org/html/rfc7348
+	 */
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "no outer UDP layer found");
+	if (!mask)
+		mask = &rte_flow_item_vxlan_gpe_mask;
+	ret = mlx5_flow_item_acceptable
+		(item, (const uint8_t *)mask,
+		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
+		 sizeof(struct rte_flow_item_vxlan_gpe),
+		 error);
+	if (ret < 0)
+		return ret;
+	if (spec) {
+		if (spec->protocol)
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  item,
+						  "VxLAN-GPE protocol"
+						  " not supported");
+		memcpy(&id.vni[1], spec->vni, 3);
+		vlan_id = id.vlan_id;
+		memcpy(&id.vni[1], mask->vni, 3);
+		vlan_id &= id.vlan_id;
+	}
+	/*
+	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
+	 * layer is defined in the Verbs specification it is interpreted as
+	 * wildcard and all packets will match this rule, if it follows a full
+	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
+	 * before will also match this rule.  To avoid such situation, VNI 0
+	 * is currently refused.
+	 */
+	if (!vlan_id)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "VXLAN-GPE vni cannot be 0");
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "VXLAN-GPE tunnel must be fully"
+					  " defined");
+	return 0;
+}
+
+/**
+ * Validate GRE item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit flags to mark detected items.
+ * @param[in] target_protocol
+ *   The next protocol in the previous item.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
+			    uint64_t item_flags,
+			    uint8_t target_protocol,
+			    struct rte_flow_error *error)
+{
+	const struct rte_flow_item_gre *spec __rte_unused = item->spec;
+	const struct rte_flow_item_gre *mask = item->mask;
+	int ret;
+
+	if (target_protocol != 0xff && target_protocol != MLX5_IP_PROTOCOL_GRE)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "protocol filtering not compatible"
+					  " with this GRE layer");
+	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "a tunnel is already present");
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "L3 Layer is missing");
+	if (!mask)
+		mask = &rte_flow_item_gre_mask;
+	ret = mlx5_flow_item_acceptable
+		(item, (const uint8_t *)mask,
+		 (const uint8_t *)&rte_flow_item_gre_mask,
+		 sizeof(struct rte_flow_item_gre), error);
+	if (ret < 0)
+		return ret;
+#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
+	if (spec && (spec->protocol & mask->protocol))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "without MPLS support the"
+					  " specification cannot be used for"
+					  " filtering");
+#endif
+	return 0;
+}
+
+/**
+ * Validate MPLS item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[in] target_protocol
+ *   The next protocol in the previous item.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_validate_item_mpls(const struct rte_flow_item *item __rte_unused,
+			     uint64_t item_flags __rte_unused,
+			     uint8_t target_protocol __rte_unused,
+			     struct rte_flow_error *error)
+{
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+	const struct rte_flow_item_mpls *mask = item->mask;
+	int ret;
+
+	if (target_protocol != 0xff && target_protocol != MLX5_IP_PROTOCOL_MPLS)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "protocol filtering not compatible"
+					  " with MPLS layer");
+	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "a tunnel is already"
+					  " present");
+	if (!mask)
+		mask = &rte_flow_item_mpls_mask;
+	ret = mlx5_flow_item_acceptable
+		(item, (const uint8_t *)mask,
+		 (const uint8_t *)&rte_flow_item_mpls_mask,
+		 sizeof(struct rte_flow_item_mpls), error);
+	if (ret < 0)
+		return ret;
+	return 0;
+#endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
+	return rte_flow_error_set(error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_ITEM, item,
+				  "MPLS is not supported by Verbs, please"
+				  " update.");
+}
+
+/**
+ *
+ * Internal validation function.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int mlx5_flow_verbs_validate(struct rte_eth_dev *dev,
+				    const struct rte_flow_attr *attr,
+				    const struct rte_flow_item items[],
+				    const struct rte_flow_action actions[],
+				    struct rte_flow_error *error)
+{
+	int ret;
+	uint32_t action_flags = 0;
+	uint32_t item_flags = 0;
+	int tunnel = 0;
+	uint8_t next_protocol = 0xff;
+
+	if (items == NULL)
+		return -1;
+	ret = mlx5_flow_validate_attributes(dev, attr, error);
+	if (ret < 0)
+		return ret;
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		int ret = 0;
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			ret = mlx5_flow_validate_item_eth(items, item_flags,
+							  error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+					       MLX5_FLOW_LAYER_OUTER_L2;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			ret = mlx5_flow_validate_item_vlan(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
+					       MLX5_FLOW_LAYER_OUTER_VLAN;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			if (items->mask != NULL &&
+			    ((const struct rte_flow_item_ipv4 *)
+			     items->mask)->hdr.next_proto_id)
+				next_protocol =
+					((const struct rte_flow_item_ipv4 *)
+					 (items->spec))->hdr.next_proto_id;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			if (items->mask != NULL &&
+			    ((const struct rte_flow_item_ipv6 *)
+			     items->mask)->hdr.proto)
+				next_protocol =
+					((const struct rte_flow_item_ipv6 *)
+					 items->spec)->hdr.proto;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			ret = mlx5_flow_validate_item_udp(items, item_flags,
+							  next_protocol,
+							  error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
+					       MLX5_FLOW_LAYER_OUTER_L4_UDP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			ret = mlx5_flow_validate_item_tcp(items, item_flags,
+							  next_protocol, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
+					       MLX5_FLOW_LAYER_OUTER_L4_TCP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			ret = mlx5_flow_validate_item_vxlan(items, item_flags,
+							    error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_VXLAN;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+			ret = mlx5_flow_validate_item_vxlan_gpe(items,
+								item_flags,
+								dev, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
+			break;
+		case RTE_FLOW_ITEM_TYPE_GRE:
+			ret = mlx5_flow_validate_item_gre(items, item_flags,
+							  next_protocol, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_GRE;
+			break;
+		case RTE_FLOW_ITEM_TYPE_MPLS:
+			ret = mlx5_flow_validate_item_mpls(items, item_flags,
+							   next_protocol,
+							   error);
+			if (ret < 0)
+				return ret;
+			if (next_protocol != 0xff &&
+			    next_protocol != MLX5_IP_PROTOCOL_MPLS)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM, items,
+					 "protocol filtering not compatible"
+					 " with MPLS layer");
+			item_flags |= MLX5_FLOW_LAYER_MPLS;
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  NULL,
+						  "item not supported");
+		}
+	}
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_FLAG:
+			ret = mlx5_flow_validate_action_flag(action_flags,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_FLAG;
+			break;
+		case RTE_FLOW_ACTION_TYPE_MARK:
+			ret = mlx5_flow_validate_action_mark(actions,
+							     action_flags,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_MARK;
+			break;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			ret = mlx5_flow_validate_action_drop(action_flags,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_DROP;
+			break;
+		case RTE_FLOW_ACTION_TYPE_QUEUE:
+			ret = mlx5_flow_validate_action_queue(actions,
+							      action_flags, dev,
+							      error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_QUEUE;
+			break;
+		case RTE_FLOW_ACTION_TYPE_RSS:
+			ret = mlx5_flow_validate_action_rss(actions,
+							    action_flags, dev,
+							    error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_RSS;
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = mlx5_flow_validate_action_count(dev, error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_COUNT;
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return 0;
+}
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_validate(struct rte_eth_dev *dev,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	int ret;
+
+	ret =  mlx5_flow_verbs_validate(dev, attr, items, actions, error);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+/**
+ * Remove the flow.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ */
+static void
+mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct mlx5_flow_verbs *verbs;
+
+	if (flow->nl_flow && priv->mnl_socket)
+		mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL);
+	LIST_FOREACH(verbs, &flow->verbs, next) {
+		if (verbs->flow) {
+			claim_zero(mlx5_glue->destroy_flow(verbs->flow));
+			verbs->flow = NULL;
+		}
+		if (verbs->hrxq) {
+			if (flow->fate & MLX5_FLOW_FATE_DROP)
+				mlx5_hrxq_drop_release(dev);
+			else
+				mlx5_hrxq_release(dev, verbs->hrxq);
+			verbs->hrxq = NULL;
+		}
+	}
+	if (flow->counter) {
+		mlx5_flow_counter_release(flow->counter);
+		flow->counter = NULL;
+	}
+}
+
+/**
+ * Apply the flow.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+		struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct mlx5_flow_verbs *verbs;
+	int err;
+
+	LIST_FOREACH(verbs, &flow->verbs, next) {
+		if (flow->fate & MLX5_FLOW_FATE_DROP) {
+			verbs->hrxq = mlx5_hrxq_drop_new(dev);
+			if (!verbs->hrxq) {
+				rte_flow_error_set
+					(error, errno,
+					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					 NULL,
+					 "cannot get drop hash queue");
+				goto error;
+			}
+		} else {
+			struct mlx5_hrxq *hrxq;
+
+			hrxq = mlx5_hrxq_get(dev, flow->key,
+					     MLX5_RSS_HASH_KEY_LEN,
+					     verbs->hash_fields,
 					     (*flow->queue),
 					     flow->rss.queue_num);
 			if (!hrxq)
@@ -3025,6 +3597,9 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 	size_t size = 0;
 	int ret;
 
+	ret = mlx5_flow_validate(dev, attr, items, actions, error);
+	if (ret < 0)
+		return NULL;
 	ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
 	if (ret < 0)
 		return NULL;
@@ -3233,7 +3808,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
 		},
 		{
 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
-				RTE_FLOW_ITEM_TYPE_END,
+					      RTE_FLOW_ITEM_TYPE_END,
 			.spec = vlan_spec,
 			.last = NULL,
 			.mask = vlan_mask,
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v3 02/11] net/mlx5: add flow prepare function
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 01/11] net/mlx5: split flow validation to dedicated function Yongseok Koh
@ 2018-09-24 23:17   ` Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 03/11] net/mlx5: add flow translate function Yongseok Koh
                     ` (9 subsequent siblings)
  11 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 23:17 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Ori Kam

From: Ori Kam <orika@mellanox.com>

In current implementation the calculation of the flow size is done
during the validation stage, and the same function is also used to
translate the input parameters into verbs spec. This is hard to maintain
and error prone. Another issue is that dev-flows (flows that are created
implicitly in order to support the requested flow for example when the
user request RSS on UDP 2 rules need to be created one for IPv4 and one
for IPv6). In current implementation the dev-flows are created on the same
memory allocation. This will be harder to implement in future drivers.

The commits extract the calculation and creation of the dev-flow from
the translation part (the part that converts the parameters into the
format required by the driver). This results in that the prepare
function only function is to allocate the dev-flow.

Signed-off-by: Ori Kam <orika@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c | 269 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 263 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 799064c0c..166fee555 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -292,6 +292,15 @@ struct mlx5_flow_verbs {
 	uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
 };
 
+/** Device flow structure. */
+struct mlx5_flow {
+	LIST_ENTRY(mlx5_flow) next;
+	struct rte_flow *flow; /**< Pointer to the main flow. */
+	union {
+		struct mlx5_flow_verbs verbs; /**< Holds the verbs dev-flow. */
+	};
+};
+
 /* Counters information. */
 struct mlx5_flow_counter {
 	LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
@@ -321,6 +330,8 @@ struct rte_flow {
 	uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
 	uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
 	void *nl_flow; /**< Netlink flow buffer if relevant. */
+	LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
+	/**< Device flows that are part of the flow. */
 };
 
 static const struct rte_flow_ops mlx5_flow_ops = {
@@ -2322,7 +2333,7 @@ mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
  *   Pointer to error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx5_flow_validate_action_flag(uint64_t action_flags,
@@ -2425,7 +2436,6 @@ mlx5_flow_validate_action_drop(uint64_t action_flags,
 }
 
 /*
- *
  * Validate the queue action.
  *
  * @param[in] action
@@ -2469,7 +2479,6 @@ mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
 }
 
 /*
- *
  * Validate the rss action.
  *
  * @param[in] action
@@ -3211,7 +3220,7 @@ mlx5_flow_validate_item_mpls(const struct rte_flow_item *item __rte_unused,
 	if (ret < 0)
 		return ret;
 	return 0;
-#endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
+#endif
 	return rte_flow_error_set(error, ENOTSUP,
 				  RTE_FLOW_ERROR_TYPE_ITEM, item,
 				  "MPLS is not supported by Verbs, please"
@@ -3219,7 +3228,6 @@ mlx5_flow_validate_item_mpls(const struct rte_flow_item *item __rte_unused,
 }
 
 /**
- *
  * Internal validation function.
  *
  * @param[in] dev
@@ -3444,6 +3452,222 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
 }
 
 /**
+ * Calculate the required bytes that are needed for the action part of the verbs
+ * flow, in addtion returns bit-fields with all the detected action, in order to
+ * avoid another interation over the actions.
+ *
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] action_flags
+ *   Pointer to the detected actions.
+ *
+ * @return
+ *   The size of the memory needed for all actions.
+ */
+static int
+mlx5_flow_verbs_get_actions_and_size(const struct rte_flow_action actions[],
+				     uint64_t *action_flags)
+{
+	int size = 0;
+	uint64_t detected_actions = 0;
+
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_FLAG:
+			size += sizeof(struct ibv_flow_spec_action_tag);
+			detected_actions |= MLX5_ACTION_FLAG;
+			break;
+		case RTE_FLOW_ACTION_TYPE_MARK:
+			size += sizeof(struct ibv_flow_spec_action_tag);
+			detected_actions |= MLX5_ACTION_MARK;
+			break;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			size += sizeof(struct ibv_flow_spec_action_drop);
+			detected_actions |= MLX5_ACTION_DROP;
+			break;
+		case RTE_FLOW_ACTION_TYPE_QUEUE:
+			detected_actions |= MLX5_ACTION_QUEUE;
+			break;
+		case RTE_FLOW_ACTION_TYPE_RSS:
+			detected_actions |= MLX5_ACTION_RSS;
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+			size += sizeof(struct ibv_flow_spec_counter_action);
+#endif
+			detected_actions |= MLX5_ACTION_COUNT;
+			break;
+		default:
+			break;
+		}
+	}
+	*action_flags = detected_actions;
+	return size;
+}
+
+/**
+ * Calculate the required bytes that are needed for the item part of the verbs
+ * flow, in addtion returns bit-fields with all the detected action, in order to
+ * avoid another interation over the actions.
+ *
+ * @param[in] actions
+ *   Pointer to the list of items.
+ * @param[in, out] item_flags
+ *   Pointer to the detected items.
+ *
+ * @return
+ *   The size of the memory needed for all items.
+ */
+static int
+mlx5_flow_verbs_get_items_and_size(const struct rte_flow_item items[],
+				   uint64_t *item_flags)
+{
+	int size = 0;
+	uint64_t detected_items = 0;
+	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			size += sizeof(struct ibv_flow_spec_eth);
+			detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+					MLX5_FLOW_LAYER_OUTER_L2;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			size += sizeof(struct ibv_flow_spec_eth);
+			detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
+					MLX5_FLOW_LAYER_OUTER_VLAN;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			size += sizeof(struct ibv_flow_spec_ipv4_ext);
+			detected_items |= tunnel ?
+					MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+					MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			size += sizeof(struct ibv_flow_spec_ipv6);
+			detected_items |= tunnel ?
+				MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+				MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			size += sizeof(struct ibv_flow_spec_tcp_udp);
+			detected_items |= tunnel ?
+					MLX5_FLOW_LAYER_INNER_L4_UDP :
+					MLX5_FLOW_LAYER_OUTER_L4_UDP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			size += sizeof(struct ibv_flow_spec_tcp_udp);
+			detected_items |= tunnel ?
+					MLX5_FLOW_LAYER_INNER_L4_TCP :
+					MLX5_FLOW_LAYER_OUTER_L4_TCP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			size += sizeof(struct ibv_flow_spec_tunnel);
+			detected_items |= MLX5_FLOW_LAYER_VXLAN;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+			size += sizeof(struct ibv_flow_spec_tunnel);
+			detected_items |= MLX5_FLOW_LAYER_VXLAN_GPE;
+			break;
+		case RTE_FLOW_ITEM_TYPE_GRE:
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+			size += sizeof(struct ibv_flow_spec_gre);
+			detected_items |= MLX5_FLOW_LAYER_GRE;
+#else
+			size += sizeof(struct ibv_flow_spec_tunnel);
+			detected_items |= MLX5_FLOW_LAYER_TUNNEL;
+#endif
+			break;
+		case RTE_FLOW_ITEM_TYPE_MPLS:
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+			size += sizeof(struct ibv_flow_spec_mpls);
+			detected_items |= MLX5_FLOW_LAYER_MPLS;
+#endif
+			break;
+		default:
+			break;
+		}
+	}
+	*item_flags = detected_items;
+	return size;
+}
+
+/**
+ * Get RSS action from the action list.
+ *
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ *
+ * @return
+ *   Pointer to the RSS action if exist, else return NULL.
+ */
+static const struct rte_flow_action_rss*
+mlx5_flow_get_rss_action(const struct rte_flow_action actions[])
+{
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_RSS:
+			return (const struct rte_flow_action_rss *)
+			       actions->conf;
+		default:
+			break;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * Internal preparation function. Allocate mlx5_flow with the required size.
+ * The required size is calculate based on the actions and items. This function
+ * also returns the detected actions and items for later use.
+ *
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] item_flags
+ *   Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ *   Pointer to bit mask of all actions detected.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
+ *   is set.
+ */
+static struct mlx5_flow *
+mlx5_flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
+			const struct rte_flow_item items[],
+			const struct rte_flow_action actions[],
+			uint64_t *item_flags,
+			uint64_t *action_flags,
+			struct rte_flow_error *error)
+{
+	uint32_t size = sizeof(struct ibv_flow_attr);
+	struct mlx5_flow *flow;
+
+	size += mlx5_flow_verbs_get_actions_and_size(actions, action_flags);
+	size += mlx5_flow_verbs_get_items_and_size(items, item_flags);
+	flow = rte_calloc(__func__, 1, size, 0);
+	if (!flow) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL,
+				   "not enough memory to create flow");
+		return NULL;
+	}
+	return flow;
+}
+
+/**
  * Remove the flow.
  *
  * @param[in] dev
@@ -3594,12 +3818,46 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 		      struct rte_flow_error *error)
 {
 	struct rte_flow *flow = NULL;
+	struct mlx5_flow *dev_flow;
 	size_t size = 0;
+	uint64_t action_flags = 0;
+	uint64_t item_flags = 0;
+	const struct rte_flow_action_rss *rss;
+	union {
+		struct rte_flow_expand_rss buf;
+		uint8_t buffer[2048];
+	} expand_buffer;
+	struct rte_flow_expand_rss *buf = &expand_buffer.buf;
 	int ret;
+	uint32_t i;
 
 	ret = mlx5_flow_validate(dev, attr, items, actions, error);
 	if (ret < 0)
 		return NULL;
+	flow = rte_calloc(__func__, 1, sizeof(*flow), 0);
+	LIST_INIT(&flow->dev_flows);
+	rss = mlx5_flow_get_rss_action(actions);
+	if (rss && rss->types) {
+		unsigned int graph_root;
+
+		graph_root = mlx5_find_graph_root(items, rss->level);
+		ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
+					  items, rss->types,
+					  mlx5_support_expansion,
+					  graph_root);
+		assert(ret > 0 &&
+		       (unsigned int)ret < sizeof(expand_buffer.buffer));
+	} else {
+		buf->entries = 1;
+		buf->entry[0].pattern = (void *)(uintptr_t)items;
+	}
+	for (i = 0; i < buf->entries; ++i) {
+		dev_flow = mlx5_flow_verbs_prepare(attr, buf->entry[i].pattern,
+						   actions, &item_flags,
+						   &action_flags, error);
+		dev_flow->flow = flow;
+		LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
+	}
 	ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
 	if (ret < 0)
 		return NULL;
@@ -4091,7 +4349,6 @@ mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
 			.dst_addr = input->flow.ip4_flow.dst_ip,
 			.time_to_live = input->flow.ip4_flow.ttl,
 			.type_of_service = input->flow.ip4_flow.tos,
-			.next_proto_id = input->flow.ip4_flow.proto,
 		};
 		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
 			.src_addr = mask->ipv4_mask.src_ip,
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v3 03/11] net/mlx5: add flow translate function
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 01/11] net/mlx5: split flow validation to dedicated function Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 02/11] net/mlx5: add flow prepare function Yongseok Koh
@ 2018-09-24 23:17   ` Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 04/11] net/mlx5: add support for multiple flow drivers Yongseok Koh
                     ` (8 subsequent siblings)
  11 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 23:17 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Ori Kam

From: Ori Kam <orika@mellanox.com>

This commit modify the conversion of the input parameters into Verbs
spec, in order to support all previous changes. Some of those changes
are: removing the use of the parser, storing each flow in its own flow
structure.

Signed-off-by: Ori Kam <orika@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c | 1624 +++++++++++++++---------------------------
 1 file changed, 580 insertions(+), 1044 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 166fee555..2d55f08b8 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -296,6 +296,7 @@ struct mlx5_flow_verbs {
 struct mlx5_flow {
 	LIST_ENTRY(mlx5_flow) next;
 	struct rte_flow *flow; /**< Pointer to the main flow. */
+	uint32_t layers; /**< Bit-fields that holds the detected layers. */
 	union {
 		struct mlx5_flow_verbs verbs; /**< Holds the verbs dev-flow. */
 	};
@@ -316,15 +317,8 @@ struct mlx5_flow_counter {
 struct rte_flow {
 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	struct rte_flow_attr attributes; /**< User flow attribute. */
-	uint32_t layers;
+	uint32_t layers; /**< Bit-fields that holds the detected layers. */
 	/**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
-	uint32_t modifier;
-	/**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
-	uint32_t fate;
-	/**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
-	LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
-	struct mlx5_flow_verbs *cur_verbs;
-	/**< Current Verbs flow structure being filled. */
 	struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */
 	struct rte_flow_action_rss rss;/**< RSS context. */
 	uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
@@ -332,6 +326,7 @@ struct rte_flow {
 	void *nl_flow; /**< Netlink flow buffer if relevant. */
 	LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
 	/**< Device flows that are part of the flow. */
+	uint32_t actions; /**< Bit-fields which mark all detected actions. */
 };
 
 static const struct rte_flow_ops mlx5_flow_ops = {
@@ -430,7 +425,7 @@ static struct mlx5_flow_tunnel_info tunnels_info[] = {
  * Discover the maximum number of priority available.
  *
  * @param[in] dev
- *   Pointer to Ethernet device.
+ *   Pointer to the Ethernet device structure.
  *
  * @return
  *   number of supported flow priority on success, a negative errno
@@ -497,34 +492,40 @@ mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
 /**
  * Adjust flow priority.
  *
- * @param dev
- *   Pointer to Ethernet device.
- * @param flow
- *   Pointer to an rte flow.
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] priority
+ *   The rule base priority.
+ * @param[in] subpriority
+ *   The priority based on the items.
+ *
+ * @return
+ *   The new priority.
  */
-static void
-mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
+static uint32_t
+mlx5_flow_adjust_priority(struct rte_eth_dev *dev,
+			int32_t priority,
+			uint32_t subpriority)
 {
+	uint32_t res = 0;
 	struct priv *priv = dev->data->dev_private;
-	uint32_t priority = flow->attributes.priority;
-	uint32_t subpriority = flow->cur_verbs->attr->priority;
 
 	switch (priv->config.flow_prio) {
 	case RTE_DIM(priority_map_3):
-		priority = priority_map_3[priority][subpriority];
+		res = priority_map_3[priority][subpriority];
 		break;
 	case RTE_DIM(priority_map_5):
-		priority = priority_map_5[priority][subpriority];
+		res = priority_map_5[priority][subpriority];
 		break;
 	}
-	flow->cur_verbs->attr->priority = priority;
+	return  res;
 }
 
 /**
  * Get a flow counter.
  *
  * @param[in] dev
- *   Pointer to Ethernet device.
+ *   Pointer to the Ethernet device structure.
  * @param[in] shared
  *   Indicate if this counter is shared with other flows.
  * @param[in] id
@@ -595,34 +596,6 @@ mlx5_flow_counter_release(struct mlx5_flow_counter *counter)
 }
 
 /**
- * Verify the @p attributes will be correctly understood by the NIC and store
- * them in the @p flow if everything is correct.
- *
- * @param[in] dev
- *   Pointer to Ethernet device structure.
- * @param[in] attributes
- *   Pointer to flow attributes
- * @param[in, out] flow
- *   Pointer to the rte_flow structure.
- *
- * @return
- *   0 on success.
- */
-static int
-mlx5_flow_attributes(struct rte_eth_dev *dev,
-		     const struct rte_flow_attr *attributes,
-		     struct rte_flow *flow)
-{
-	struct priv *priv = dev->data->dev_private;
-	uint32_t priority_max = priv->config.flow_prio - 1;
-
-	flow->attributes = *attributes;
-	if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
-		flow->attributes.priority = priority_max;
-	return 0;
-}
-
-/**
  * Verify the @p item specifications (spec, last, mask) are compatible with the
  * NIC capabilities.
  *
@@ -693,9 +666,9 @@ mlx5_flow_item_acceptable(const struct rte_flow_item *item,
  *   Size in bytes of the specification to copy.
  */
 static void
-mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
+mlx5_flow_spec_verbs_add(struct mlx5_flow *flow, void *src, unsigned int size)
 {
-	struct mlx5_flow_verbs *verbs = flow->cur_verbs;
+	struct mlx5_flow_verbs *verbs = &flow->verbs;
 
 	if (verbs->specs) {
 		void *dst;
@@ -710,8 +683,8 @@ mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
 /**
  * Adjust verbs hash fields according to the @p flow information.
  *
- * @param[in, out] flow.
- *   Pointer to flow structure.
+ * @param[in] dev_flow.
+ *   Pointer to dev flow structure.
  * @param[in] tunnel
  *   1 when the hash field is for a tunnel item.
  * @param[in] layer_types
@@ -720,49 +693,44 @@ mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
  *   Item hash fields.
  */
 static void
-mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
+mlx5_flow_verbs_hashfields_adjust(struct mlx5_flow *dev_flow,
 				  int tunnel __rte_unused,
 				  uint32_t layer_types, uint64_t hash_fields)
 {
 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+	int rss_request_inner = dev_flow->flow->rss.level >= 2;
+
 	hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
-	if (flow->rss.level == 2 && !tunnel)
+	if (rss_request_inner && !tunnel)
 		hash_fields = 0;
-	else if (flow->rss.level < 2 && tunnel)
+	else if (!rss_request_inner && tunnel)
 		hash_fields = 0;
 #endif
-	if (!(flow->rss.types & layer_types))
+	if (!(dev_flow->flow->rss.types & layer_types))
 		hash_fields = 0;
-	flow->cur_verbs->hash_fields |= hash_fields;
+	dev_flow->verbs.hash_fields |= hash_fields;
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
  *
  * @param[in] item
  *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ * @param[in] item_flags
+ *   Bit field with all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to dev_flow structure.
  */
-static int
-mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
-		   const size_t flow_size)
+static void
+flow_verbs_translate_item_eth(const struct rte_flow_item *item,
+			      uint64_t *item_flags,
+			      struct mlx5_flow *dev_flow)
 {
 	const struct rte_flow_item_eth *spec = item->spec;
 	const struct rte_flow_item_eth *mask = item->mask;
-	const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
 	const unsigned int size = sizeof(struct ibv_flow_spec_eth);
 	struct ibv_flow_spec_eth eth = {
 		.type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
@@ -771,10 +739,6 @@ mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
 
 	if (!mask)
 		mask = &rte_flow_item_eth_mask;
-	flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
-		MLX5_FLOW_LAYER_OUTER_L2;
-	if (size > flow_size)
-		return size;
 	if (spec) {
 		unsigned int i;
 
@@ -790,14 +754,18 @@ mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
 			eth.val.src_mac[i] &= eth.mask.src_mac[i];
 		}
 		eth.val.ether_type &= eth.mask.ether_type;
+		dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
 	}
-	flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
-	mlx5_flow_spec_verbs_add(flow, &eth, size);
-	return size;
+	mlx5_flow_spec_verbs_add(dev_flow, &eth, size);
+	*item_flags |= tunnel ?
+			MLX5_FLOW_LAYER_INNER_L2 :
+			MLX5_FLOW_LAYER_OUTER_L2;
 }
 
 /**
  * Update the VLAN tag in the Verbs Ethernet specification.
+ * This function assumes that the input is valid and there is space to add
+ * the requested item.
  *
  * @param[in, out] attr
  *   Pointer to Verbs attributes structure.
@@ -829,34 +797,26 @@ mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
 }
 
 /**
- * Convert the @p item into @p flow (or by updating the already present
- * Ethernet Verbs) specification after ensuring the NIC will understand and
- * process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
  *
  * @param[in] item
  *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ * @param[in, out] item_flags
+ *   Bit mask that holds all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to dev_flow structure.
  */
-static int
-mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
-		    const size_t flow_size)
+static void
+flow_verbs_translate_item_vlan(const struct rte_flow_item *item,
+			       uint64_t *item_flags,
+			       struct mlx5_flow *dev_flow)
 {
 	const struct rte_flow_item_vlan *spec = item->spec;
 	const struct rte_flow_item_vlan *mask = item->mask;
 	unsigned int size = sizeof(struct ibv_flow_spec_eth);
-	const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
 	struct ibv_flow_spec_eth eth = {
 		.type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
 		.size = size,
@@ -874,49 +834,40 @@ mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
 		eth.mask.ether_type = mask->inner_type;
 		eth.val.ether_type &= eth.mask.ether_type;
 	}
-	if (!(flow->layers & l2m)) {
-		if (size <= flow_size) {
-			flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
-			mlx5_flow_spec_verbs_add(flow, &eth, size);
-		}
+	if (!(*item_flags & l2m)) {
+		dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+		mlx5_flow_spec_verbs_add(dev_flow, &eth, size);
 	} else {
-		if (flow->cur_verbs)
-			mlx5_flow_item_vlan_update(flow->cur_verbs->attr,
+		mlx5_flow_item_vlan_update(dev_flow->verbs.attr,
 						   &eth);
 		size = 0; /* Only an update is done in eth specification. */
 	}
-	flow->layers |= tunnel ?
-		(MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
-		(MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
-	return size;
+	*item_flags |= tunnel ?
+			(MLX5_FLOW_LAYER_INNER_L2 |
+			 MLX5_FLOW_LAYER_INNER_VLAN) :
+			(MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
  *
  * @param[in] item
  *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
  */
-static int
-mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
-		    const size_t flow_size)
+static void
+flow_verbs_translate_item_ipv4(const struct rte_flow_item *item,
+			       uint64_t *item_flags,
+			       struct mlx5_flow *dev_flow)
 {
 	const struct rte_flow_item_ipv4 *spec = item->spec;
 	const struct rte_flow_item_ipv4 *mask = item->mask;
-	const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
 	unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
 	struct ibv_flow_spec_ipv4_ext ipv4 = {
 		.type = IBV_FLOW_SPEC_IPV4_EXT |
@@ -926,7 +877,7 @@ mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
 
 	if (!mask)
 		mask = &rte_flow_item_ipv4_mask;
-	flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+	*item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
 		MLX5_FLOW_LAYER_OUTER_L3_IPV4;
 	if (spec) {
 		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
@@ -947,46 +898,37 @@ mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
 		ipv4.val.proto &= ipv4.mask.proto;
 		ipv4.val.tos &= ipv4.mask.tos;
 	}
-	if (size <= flow_size) {
-		mlx5_flow_verbs_hashfields_adjust
-			(flow, tunnel,
-			 (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
-			  ETH_RSS_NONFRAG_IPV4_TCP |
-			  ETH_RSS_NONFRAG_IPV4_UDP |
-			  ETH_RSS_NONFRAG_IPV4_OTHER),
-			 (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4));
-		flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
-		mlx5_flow_spec_verbs_add(flow, &ipv4, size);
-	}
-	return size;
+	mlx5_flow_verbs_hashfields_adjust(dev_flow, tunnel,
+					  (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
+					   ETH_RSS_NONFRAG_IPV4_TCP |
+					   ETH_RSS_NONFRAG_IPV4_UDP |
+					   ETH_RSS_NONFRAG_IPV4_OTHER),
+					  (IBV_RX_HASH_SRC_IPV4 |
+					   IBV_RX_HASH_DST_IPV4));
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
+	mlx5_flow_spec_verbs_add(dev_flow, &ipv4, size);
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
  *
  * @param[in] item
  *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
  */
-static int
-mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
-		    const size_t flow_size)
+static void
+flow_verbs_translate_item_ipv6(const struct rte_flow_item *item,
+			       uint64_t *item_flags,
+			       struct mlx5_flow *dev_flow)
 {
 	const struct rte_flow_item_ipv6 *spec = item->spec;
 	const struct rte_flow_item_ipv6 *mask = item->mask;
-	const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+	const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
 	unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
 	struct ibv_flow_spec_ipv6 ipv6 = {
 		.type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
@@ -995,8 +937,8 @@ mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
 
 	if (!mask)
 		mask = &rte_flow_item_ipv6_mask;
-	flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
-		MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+	 *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+				MLX5_FLOW_LAYER_OUTER_L3_IPV6;
 	if (spec) {
 		unsigned int i;
 		uint32_t vtc_flow_val;
@@ -1036,46 +978,40 @@ mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
 	}
-	if (size <= flow_size) {
-		mlx5_flow_verbs_hashfields_adjust
-			(flow, tunnel,
-			 (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
-			  ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_NONFRAG_IPV6_UDP |
-			  ETH_RSS_NONFRAG_IPV6_OTHER | ETH_RSS_IPV6_EX |
-			  ETH_RSS_IPV6_TCP_EX | ETH_RSS_IPV6_UDP_EX),
-			 (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6));
-		flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
-		mlx5_flow_spec_verbs_add(flow, &ipv6, size);
-	}
-	return size;
+	mlx5_flow_verbs_hashfields_adjust(dev_flow, tunnel,
+					  (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
+					   ETH_RSS_NONFRAG_IPV6_TCP |
+					   ETH_RSS_NONFRAG_IPV6_UDP |
+					   ETH_RSS_IPV6_EX  |
+					   ETH_RSS_IPV6_TCP_EX |
+					   ETH_RSS_IPV6_UDP_EX |
+					   ETH_RSS_NONFRAG_IPV6_OTHER),
+					  (IBV_RX_HASH_SRC_IPV6 |
+					   IBV_RX_HASH_DST_IPV6));
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
+	mlx5_flow_spec_verbs_add(dev_flow, &ipv6, size);
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
  *
  * @param[in] item
  *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
  */
-static int
-mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
-		   const size_t flow_size)
+static void
+flow_verbs_translate_item_udp(const struct rte_flow_item *item,
+			      uint64_t *item_flags,
+			      struct mlx5_flow *dev_flow)
 {
 	const struct rte_flow_item_udp *spec = item->spec;
 	const struct rte_flow_item_udp *mask = item->mask;
-	const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
 	struct ibv_flow_spec_tcp_udp udp = {
 		.type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
@@ -1084,8 +1020,8 @@ mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
 
 	if (!mask)
 		mask = &rte_flow_item_udp_mask;
-	flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
-		MLX5_FLOW_LAYER_OUTER_L4_UDP;
+	*item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
+			MLX5_FLOW_LAYER_OUTER_L4_UDP;
 	if (spec) {
 		udp.val.dst_port = spec->hdr.dst_port;
 		udp.val.src_port = spec->hdr.src_port;
@@ -1095,44 +1031,34 @@ mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
 		udp.val.src_port &= udp.mask.src_port;
 		udp.val.dst_port &= udp.mask.dst_port;
 	}
-	if (size <= flow_size) {
-		mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP,
-						  (IBV_RX_HASH_SRC_PORT_UDP |
-						   IBV_RX_HASH_DST_PORT_UDP));
-		flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
-		mlx5_flow_spec_verbs_add(flow, &udp, size);
-	}
-	return size;
+	mlx5_flow_verbs_hashfields_adjust(dev_flow,
+					  tunnel, ETH_RSS_UDP,
+					  (IBV_RX_HASH_SRC_PORT_UDP |
+					   IBV_RX_HASH_DST_PORT_UDP));
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
+	mlx5_flow_spec_verbs_add(dev_flow, &udp, size);
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
  *
  * @param[in] item
  *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- * @param[out] error
- *   Pointer to error structure.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
  */
-static int
-mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
-		   const size_t flow_size)
+static void
+flow_verbs_translate_item_tcp(const struct rte_flow_item *item,
+			      uint64_t *item_flags,
+			      struct mlx5_flow *dev_flow)
 {
 	const struct rte_flow_item_tcp *spec = item->spec;
 	const struct rte_flow_item_tcp *mask = item->mask;
-	const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+	const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
 	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
 	struct ibv_flow_spec_tcp_udp tcp = {
 		.type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
@@ -1141,8 +1067,8 @@ mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
 
 	if (!mask)
 		mask = &rte_flow_item_tcp_mask;
-	flow->layers |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
-		MLX5_FLOW_LAYER_OUTER_L4_TCP;
+	*item_flags |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
+				MLX5_FLOW_LAYER_OUTER_L4_TCP;
 	if (spec) {
 		tcp.val.dst_port = spec->hdr.dst_port;
 		tcp.val.src_port = spec->hdr.src_port;
@@ -1152,40 +1078,30 @@ mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
 		tcp.val.src_port &= tcp.mask.src_port;
 		tcp.val.dst_port &= tcp.mask.dst_port;
 	}
-	if (size <= flow_size) {
-		mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP,
-						  (IBV_RX_HASH_SRC_PORT_TCP |
-						   IBV_RX_HASH_DST_PORT_TCP));
-		flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
-		mlx5_flow_spec_verbs_add(flow, &tcp, size);
-	}
-	return size;
+	mlx5_flow_verbs_hashfields_adjust(dev_flow,
+					  tunnel, ETH_RSS_TCP,
+					  (IBV_RX_HASH_SRC_PORT_TCP |
+					   IBV_RX_HASH_DST_PORT_TCP));
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
+	mlx5_flow_spec_verbs_add(dev_flow, &tcp, size);
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
  *
  * @param[in] item
  *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- * @param[out] error
- *   Pointer to error structure.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
  */
-static int
-mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
-		     const size_t flow_size)
+static void
+flow_verbs_translate_item_vxlan(const struct rte_flow_item *item,
+				uint64_t *item_flags,
+				struct mlx5_flow *dev_flow)
 {
 	const struct rte_flow_item_vxlan *spec = item->spec;
 	const struct rte_flow_item_vxlan *mask = item->mask;
@@ -1209,38 +1125,27 @@ mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
 		/* Remove unwanted bits from values. */
 		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
 	}
-	if (size <= flow_size) {
-		mlx5_flow_spec_verbs_add(flow, &vxlan, size);
-		flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
-	}
-	flow->layers |= MLX5_FLOW_LAYER_VXLAN;
-	return size;
+	mlx5_flow_spec_verbs_add(dev_flow, &vxlan, size);
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+	*item_flags |= MLX5_FLOW_LAYER_VXLAN;
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
  *
  * @param[in] item
  *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- * @param[out] error
- *   Pointer to error structure.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
  */
-static int
-mlx5_flow_item_vxlan_gpe(const struct rte_flow_item *item,
-			 struct rte_flow *flow, const size_t flow_size)
+static void
+flow_verbs_translate_item_vxlan_gpe(const struct rte_flow_item *item,
+				    uint64_t *item_flags,
+				    struct mlx5_flow *dev_flow)
 {
 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
@@ -1264,12 +1169,9 @@ mlx5_flow_item_vxlan_gpe(const struct rte_flow_item *item,
 		/* Remove unwanted bits from values. */
 		vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
 	}
-	if (size <= flow_size) {
-		mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size);
-		flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
-	}
-	flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE;
-	return size;
+	mlx5_flow_spec_verbs_add(dev_flow, &vxlan_gpe, size);
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+	*item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
 }
 
 /**
@@ -1325,34 +1227,30 @@ mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * It will also update the previous L3 layer with the protocol value matching
- * the GRE.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
  *
- * @param dev
- *   Pointer to Ethernet device.
  * @param[in] item
  *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
  */
-static int
-mlx5_flow_item_gre(const struct rte_flow_item *item __rte_unused,
-		   struct rte_flow *flow, const size_t flow_size)
+static void
+flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused,
+			      uint64_t *item_flags,
+			      struct mlx5_flow *dev_flow)
 {
-	struct mlx5_flow_verbs *verbs = flow->cur_verbs;
-#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+	struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
+#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
+	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+	struct ibv_flow_spec_tunnel tunnel = {
+		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
+		.size = size,
+	};
+#else
 	const struct rte_flow_item_gre *spec = item->spec;
 	const struct rte_flow_item_gre *mask = item->mask;
 	unsigned int size = sizeof(struct ibv_flow_spec_gre);
@@ -1360,15 +1258,7 @@ mlx5_flow_item_gre(const struct rte_flow_item *item __rte_unused,
 		.type = IBV_FLOW_SPEC_GRE,
 		.size = size,
 	};
-#else
-	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
-	struct ibv_flow_spec_tunnel tunnel = {
-		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
-		.size = size,
-	};
-#endif
 
-#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
 	if (!mask)
 		mask = &rte_flow_item_gre_mask;
 	if (spec) {
@@ -1381,51 +1271,36 @@ mlx5_flow_item_gre(const struct rte_flow_item *item __rte_unused,
 		tunnel.val.protocol &= tunnel.mask.protocol;
 		tunnel.val.key &= tunnel.mask.key;
 	}
-#else
-#endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
-	if (size <= flow_size) {
-		if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
-			mlx5_flow_item_gre_ip_protocol_update
-				(verbs->attr, IBV_FLOW_SPEC_IPV4_EXT,
-				 MLX5_IP_PROTOCOL_GRE);
-		else
-			mlx5_flow_item_gre_ip_protocol_update
-				(verbs->attr, IBV_FLOW_SPEC_IPV6,
-				 MLX5_IP_PROTOCOL_GRE);
-		mlx5_flow_spec_verbs_add(flow, &tunnel, size);
-		flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
-	}
-	flow->layers |= MLX5_FLOW_LAYER_GRE;
-	return size;
+#endif
+	if (*item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
+		mlx5_flow_item_gre_ip_protocol_update
+			(verbs->attr, IBV_FLOW_SPEC_IPV4_EXT,
+			 MLX5_IP_PROTOCOL_GRE);
+	else
+		mlx5_flow_item_gre_ip_protocol_update
+			(verbs->attr, IBV_FLOW_SPEC_IPV6,
+			 MLX5_IP_PROTOCOL_GRE);
+	mlx5_flow_spec_verbs_add(dev_flow, &tunnel, size);
+	verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
+	*item_flags |= MLX5_FLOW_LAYER_GRE;
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
  *
  * @param[in] item
  *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- * @param[out] error
- *   Pointer to error structure.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
  */
-static int
-mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused,
-		    struct rte_flow *flow __rte_unused,
-		    const size_t flow_size __rte_unused,
-		    struct rte_flow_error *error)
+static void
+flow_verbs_translate_item_mpls(const struct rte_flow_item *item __rte_unused,
+			       uint64_t *action_flags __rte_unused,
+			       struct mlx5_flow *dev_flow __rte_unused)
 {
 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
 	const struct rte_flow_item_mpls *spec = item->spec;
@@ -1444,135 +1319,25 @@ mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused,
 		/* Remove unwanted bits from values.  */
 		mpls.val.label &= mpls.mask.label;
 	}
-	if (size <= flow_size) {
-		mlx5_flow_spec_verbs_add(flow, &mpls, size);
-		flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
-	}
-	flow->layers |= MLX5_FLOW_LAYER_MPLS;
-	return size;
-#endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
-	return rte_flow_error_set(error, ENOTSUP,
-				  RTE_FLOW_ERROR_TYPE_ITEM,
-				  item,
-				  "MPLS is not supported by Verbs, please"
-				  " update.");
-}
-
-/**
- * Convert the @p pattern into a Verbs specifications after ensuring the NIC
- * will understand and process it correctly.
- * The conversion is performed item per item, each of them is written into
- * the @p flow if its size is lesser or equal to @p flow_size.
- * Validation and memory consumption computation are still performed until the
- * end of @p pattern, unless an error is encountered.
- *
- * @param[in] pattern
- *   Flow pattern.
- * @param[in, out] flow
- *   Pointer to the rte_flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small some
- *   garbage may be present.
- * @param[out] error
- *   Pointer to error structure.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @pattern  has fully been
- *   converted, otherwise another call with this returned memory size should
- *   be done.
- *   On error, a negative errno value is returned and rte_errno is set.
- */
-static int
-mlx5_flow_items(const struct rte_flow_item pattern[],
-		struct rte_flow *flow, const size_t flow_size,
-		struct rte_flow_error *error)
-{
-	int remain = flow_size;
-	size_t size = 0;
-
-	for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
-		int ret = 0;
-
-		switch (pattern->type) {
-		case RTE_FLOW_ITEM_TYPE_VOID:
-			break;
-		case RTE_FLOW_ITEM_TYPE_ETH:
-			ret = mlx5_flow_item_eth(pattern, flow, remain);
-			break;
-		case RTE_FLOW_ITEM_TYPE_VLAN:
-			ret = mlx5_flow_item_vlan(pattern, flow, remain);
-			break;
-		case RTE_FLOW_ITEM_TYPE_IPV4:
-			ret = mlx5_flow_item_ipv4(pattern, flow, remain);
-			break;
-		case RTE_FLOW_ITEM_TYPE_IPV6:
-			ret = mlx5_flow_item_ipv6(pattern, flow, remain);
-			break;
-		case RTE_FLOW_ITEM_TYPE_UDP:
-			ret = mlx5_flow_item_udp(pattern, flow, remain);
-			break;
-		case RTE_FLOW_ITEM_TYPE_TCP:
-			ret = mlx5_flow_item_tcp(pattern, flow, remain);
-			break;
-		case RTE_FLOW_ITEM_TYPE_VXLAN:
-			ret = mlx5_flow_item_vxlan(pattern, flow, remain);
-			break;
-		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
-			ret = mlx5_flow_item_vxlan_gpe(pattern, flow,
-						       remain);
-			break;
-		case RTE_FLOW_ITEM_TYPE_GRE:
-			ret = mlx5_flow_item_gre(pattern, flow, remain);
-			break;
-		case RTE_FLOW_ITEM_TYPE_MPLS:
-			ret = mlx5_flow_item_mpls(pattern, flow, remain, error);
-			break;
-		default:
-			return rte_flow_error_set(error, ENOTSUP,
-						  RTE_FLOW_ERROR_TYPE_ITEM,
-						  pattern,
-						  "item not supported");
-		}
-		if (ret < 0)
-			return ret;
-		if (remain > ret)
-			remain -= ret;
-		else
-			remain = 0;
-		size += ret;
-	}
-	if (!flow->layers) {
-		const struct rte_flow_item item = {
-			.type = RTE_FLOW_ITEM_TYPE_ETH,
-		};
-
-		return mlx5_flow_item_eth(&item, flow, flow_size);
-	}
-	return size;
+	mlx5_flow_spec_verbs_add(dev_flow, &mpls, size);
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+	*action_flags |= MLX5_FLOW_LAYER_MPLS;
+#endif
 }
 
 /**
- * Convert the @p action into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
- *
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
  *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p action has fully been
- *   converted, otherwise another call with this returned memory size should
- *   be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
  */
-static int
-mlx5_flow_action_drop(struct rte_flow *flow, const size_t flow_size)
+static void
+flow_verbs_translate_action_drop(uint64_t *action_flags,
+				 struct mlx5_flow *dev_flow)
 {
 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
 	struct ibv_flow_spec_action_drop drop = {
@@ -1580,53 +1345,55 @@ mlx5_flow_action_drop(struct rte_flow *flow, const size_t flow_size)
 			.size = size,
 	};
 
-	if (size < flow_size)
-		mlx5_flow_spec_verbs_add(flow, &drop, size);
-	flow->fate |= MLX5_FLOW_FATE_DROP;
-	return size;
+	mlx5_flow_spec_verbs_add(dev_flow, &drop, size);
+	*action_flags |= MLX5_ACTION_DROP;
 }
 
 /**
- * Convert the @p action into @p flow after ensuring the NIC will understand
- * and process it correctly.
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
  *
  * @param[in] action
  *   Action configuration.
- * @param[in, out] flow
- *   Pointer to flow structure.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
  */
-static int
-mlx5_flow_action_queue(const struct rte_flow_action *action,
-		       struct rte_flow *flow)
+static void
+flow_verbs_translate_action_queue(const struct rte_flow_action *action,
+				  uint64_t *action_flags,
+				  struct mlx5_flow *dev_flow)
 {
 	const struct rte_flow_action_queue *queue = action->conf;
+	struct rte_flow *flow = dev_flow->flow;
 
 	if (flow->queue)
 		(*flow->queue)[0] = queue->index;
 	flow->rss.queue_num = 1;
-	flow->fate |= MLX5_FLOW_FATE_QUEUE;
-	return 0;
+	*action_flags |= MLX5_ACTION_QUEUE;
 }
 
 /**
- * Ensure the @p action will be understood and used correctly by the  NIC.
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
  *
  * @param[in] action
  *   Action configuration.
- * @param flow[in, out]
- *   Pointer to the rte_flow structure.
- *
- * @return
- *   0 On success.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
  */
-static int
-mlx5_flow_action_rss(const struct rte_flow_action *action,
-			struct rte_flow *flow)
+static void
+flow_verbs_translate_action_rss(const struct rte_flow_action *action,
+				uint64_t *action_flags,
+				struct mlx5_flow *dev_flow)
 {
 	const struct rte_flow_action_rss *rss = action->conf;
+	struct rte_flow *flow = dev_flow->flow;
 
 	if (flow->queue)
 		memcpy((*flow->queue), rss->queue,
@@ -1635,30 +1402,26 @@ mlx5_flow_action_rss(const struct rte_flow_action *action,
 	memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
 	flow->rss.types = rss->types;
 	flow->rss.level = rss->level;
-	flow->fate |= MLX5_FLOW_FATE_RSS;
-	return 0;
+	*action_flags |= MLX5_ACTION_RSS;
 }
 
 /**
- * Convert the @p action into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
  *
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p action has fully been
- *   converted, otherwise another call with this returned memory size should
- *   be done.
+ * @param[in] action
+ *   Action configuration.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
  */
-static int
-mlx5_flow_action_flag(struct rte_flow *flow, const size_t flow_size)
+static void
+flow_verbs_translate_action_flag
+			(const struct rte_flow_action *action __rte_unused,
+			 uint64_t *action_flags,
+			 struct mlx5_flow *dev_flow)
 {
 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
 	struct ibv_flow_spec_action_tag tag = {
@@ -1666,14 +1429,8 @@ mlx5_flow_action_flag(struct rte_flow *flow, const size_t flow_size)
 		.size = size,
 		.tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
 	};
-	struct mlx5_flow_verbs *verbs = flow->cur_verbs;
-
-	if (flow->modifier & MLX5_FLOW_MOD_MARK)
-		size = 0;
-	else if (size <= flow_size && verbs)
-		mlx5_flow_spec_verbs_add(flow, &tag, size);
-	flow->modifier |= MLX5_FLOW_MOD_FLAG;
-	return size;
+	*action_flags |= MLX5_ACTION_MARK;
+	mlx5_flow_spec_verbs_add(dev_flow, &tag, size);
 }
 
 /**
@@ -1685,499 +1442,115 @@ mlx5_flow_action_flag(struct rte_flow *flow, const size_t flow_size)
  *   Mark identifier to replace the flag.
  */
 static void
-mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
+flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
 {
 	struct ibv_spec_header *hdr;
 	int i;
 
-	if (!verbs)
-		return;
-	/* Update Verbs specification. */
-	hdr = (struct ibv_spec_header *)verbs->specs;
-	if (!hdr)
-		return;
-	for (i = 0; i != verbs->attr->num_of_specs; ++i) {
-		if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
-			struct ibv_flow_spec_action_tag *t =
-				(struct ibv_flow_spec_action_tag *)hdr;
-
-			t->tag_id = mlx5_flow_mark_set(mark_id);
-		}
-		hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
-	}
-}
-
-/**
- * Convert the @p action into @p flow (or by updating the already present
- * Flag Verbs specification) after ensuring the NIC will understand and
- * process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
- *
- * @param[in] action
- *   Action configuration.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p action has fully been
- *   converted, otherwise another call with this returned memory size should
- *   be done.
- */
-static int
-mlx5_flow_action_mark(const struct rte_flow_action *action,
-		      struct rte_flow *flow, const size_t flow_size)
-{
-	const struct rte_flow_action_mark *mark = action->conf;
-	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
-	struct ibv_flow_spec_action_tag tag = {
-		.type = IBV_FLOW_SPEC_ACTION_TAG,
-		.size = size,
-	};
-	struct mlx5_flow_verbs *verbs = flow->cur_verbs;
-
-	if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
-		mlx5_flow_verbs_mark_update(verbs, mark->id);
-		size = 0;
-	} else if (size <= flow_size) {
-		tag.tag_id = mlx5_flow_mark_set(mark->id);
-		mlx5_flow_spec_verbs_add(flow, &tag, size);
-	}
-	flow->modifier |= MLX5_FLOW_MOD_MARK;
-	return size;
-}
-
-/**
- * Convert the @p action into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
- *
- * @param action[in]
- *   Action configuration.
- * @param flow[in, out]
- *   Pointer to flow structure.
- * @param flow_size[in]
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- * @param error[int, out]
- *   Pointer to error structure.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p action has fully been
- *   converted, otherwise another call with this returned memory size should
- *   be done.
- *   On error, a negative errno value is returned and rte_errno is set.
- */
-static int
-mlx5_flow_action_count(struct rte_eth_dev *dev,
-		       const struct rte_flow_action *action,
-		       struct rte_flow *flow,
-		       const size_t flow_size __rte_unused,
-		       struct rte_flow_error *error)
-{
-	const struct rte_flow_action_count *count = action->conf;
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
-	struct ibv_flow_spec_counter_action counter = {
-		.type = IBV_FLOW_SPEC_ACTION_COUNT,
-		.size = size,
-	};
-#endif
-
-	if (!flow->counter) {
-		flow->counter = mlx5_flow_counter_new(dev, count->shared,
-						      count->id);
-		if (!flow->counter)
-			return rte_flow_error_set(error, ENOTSUP,
-						  RTE_FLOW_ERROR_TYPE_ACTION,
-						  action,
-						  "cannot get counter"
-						  " context.");
-	}
-	flow->modifier |= MLX5_FLOW_MOD_COUNT;
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-	counter.counter_set_handle = flow->counter->cs->handle;
-	if (size <= flow_size)
-		mlx5_flow_spec_verbs_add(flow, &counter, size);
-	return size;
-#endif
-	return 0;
-}
-
-/**
- * Convert the @p action into @p flow after ensuring the NIC will understand
- * and process it correctly.
- * The conversion is performed action per action, each of them is written into
- * the @p flow if its size is lesser or equal to @p flow_size.
- * Validation and memory consumption computation are still performed until the
- * end of @p action, unless an error is encountered.
- *
- * @param[in] dev
- *   Pointer to Ethernet device structure.
- * @param[in] actions
- *   Pointer to flow actions array.
- * @param[in, out] flow
- *   Pointer to the rte_flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small some
- *   garbage may be present.
- * @param[out] error
- *   Pointer to error structure.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p actions has fully been
- *   converted, otherwise another call with this returned memory size should
- *   be done.
- *   On error, a negative errno value is returned and rte_errno is set.
- */
-static int
-mlx5_flow_actions(struct rte_eth_dev *dev,
-		  const struct rte_flow_action actions[],
-		  struct rte_flow *flow, const size_t flow_size,
-		  struct rte_flow_error *error)
-{
-	size_t size = 0;
-	int remain = flow_size;
-	int ret = 0;
-
-	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
-		switch (actions->type) {
-		case RTE_FLOW_ACTION_TYPE_VOID:
-			break;
-		case RTE_FLOW_ACTION_TYPE_FLAG:
-			ret = mlx5_flow_action_flag(flow, remain);
-			break;
-		case RTE_FLOW_ACTION_TYPE_MARK:
-			ret = mlx5_flow_action_mark(actions, flow, remain);
-			break;
-		case RTE_FLOW_ACTION_TYPE_DROP:
-			ret = mlx5_flow_action_drop(flow, remain);
-			break;
-		case RTE_FLOW_ACTION_TYPE_QUEUE:
-			ret = mlx5_flow_action_queue(actions, flow);
-			break;
-		case RTE_FLOW_ACTION_TYPE_RSS:
-			ret = mlx5_flow_action_rss(actions, flow);
-			break;
-		case RTE_FLOW_ACTION_TYPE_COUNT:
-			ret = mlx5_flow_action_count(dev, actions, flow, remain,
-						     error);
-			break;
-		default:
-			return rte_flow_error_set(error, ENOTSUP,
-						  RTE_FLOW_ERROR_TYPE_ACTION,
-						  actions,
-						  "action not supported");
-		}
-		if (ret < 0)
-			return ret;
-		if (remain > ret)
-			remain -= ret;
-		else
-			remain = 0;
-		size += ret;
-	}
-	if (!flow->fate)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-					  NULL,
-					  "no fate action found");
-	return size;
-}
-
-/**
- * Validate flow rule and fill flow structure accordingly.
- *
- * @param dev
- *   Pointer to Ethernet device.
- * @param[out] flow
- *   Pointer to flow structure.
- * @param flow_size
- *   Size of allocated space for @p flow.
- * @param[in] attr
- *   Flow rule attributes.
- * @param[in] pattern
- *   Pattern specification (list terminated by the END pattern item).
- * @param[in] actions
- *   Associated actions (list terminated by the END action).
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   A positive value representing the size of the flow object in bytes
- *   regardless of @p flow_size on success, a negative errno value otherwise
- *   and rte_errno is set.
- */
-static int
-mlx5_flow_merge_switch(struct rte_eth_dev *dev,
-		       struct rte_flow *flow,
-		       size_t flow_size,
-		       const struct rte_flow_attr *attr,
-		       const struct rte_flow_item pattern[],
-		       const struct rte_flow_action actions[],
-		       struct rte_flow_error *error)
-{
-	unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
-	uint16_t port_id[!n + n];
-	struct mlx5_nl_flow_ptoi ptoi[!n + n + 1];
-	size_t off = RTE_ALIGN_CEIL(sizeof(*flow), alignof(max_align_t));
-	unsigned int i;
-	unsigned int own = 0;
-	int ret;
-
-	/* At least one port is needed when no switch domain is present. */
-	if (!n) {
-		n = 1;
-		port_id[0] = dev->data->port_id;
-	} else {
-		n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
-	}
-	for (i = 0; i != n; ++i) {
-		struct rte_eth_dev_info dev_info;
-
-		rte_eth_dev_info_get(port_id[i], &dev_info);
-		if (port_id[i] == dev->data->port_id)
-			own = i;
-		ptoi[i].port_id = port_id[i];
-		ptoi[i].ifindex = dev_info.if_index;
-	}
-	/* Ensure first entry of ptoi[] is the current device. */
-	if (own) {
-		ptoi[n] = ptoi[0];
-		ptoi[0] = ptoi[own];
-		ptoi[own] = ptoi[n];
-	}
-	/* An entry with zero ifindex terminates ptoi[]. */
-	ptoi[n].port_id = 0;
-	ptoi[n].ifindex = 0;
-	if (flow_size < off)
-		flow_size = 0;
-	ret = mlx5_nl_flow_transpose((uint8_t *)flow + off,
-				     flow_size ? flow_size - off : 0,
-				     ptoi, attr, pattern, actions, error);
-	if (ret < 0)
-		return ret;
-	if (flow_size) {
-		*flow = (struct rte_flow){
-			.attributes = *attr,
-			.nl_flow = (uint8_t *)flow + off,
-		};
-		/*
-		 * Generate a reasonably unique handle based on the address
-		 * of the target buffer.
-		 *
-		 * This is straightforward on 32-bit systems where the flow
-		 * pointer can be used directly. Otherwise, its least
-		 * significant part is taken after shifting it by the
-		 * previous power of two of the pointed buffer size.
-		 */
-		if (sizeof(flow) <= 4)
-			mlx5_nl_flow_brand(flow->nl_flow, (uintptr_t)flow);
-		else
-			mlx5_nl_flow_brand
-				(flow->nl_flow,
-				 (uintptr_t)flow >>
-				 rte_log2_u32(rte_align32prevpow2(flow_size)));
+	if (!verbs)
+		return;
+	/* Update Verbs specification. */
+	hdr = (struct ibv_spec_header *)verbs->specs;
+	if (!hdr)
+		return;
+	for (i = 0; i != verbs->attr->num_of_specs; ++i) {
+		if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
+			struct ibv_flow_spec_action_tag *t =
+				(struct ibv_flow_spec_action_tag *)hdr;
+
+			t->tag_id = mlx5_flow_mark_set(mark_id);
+		}
+		hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
 	}
-	return off + ret;
 }
 
-static unsigned int
-mlx5_find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] action
+ *   Action configuration.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_mark(const struct rte_flow_action *action,
+				 uint64_t *action_flags,
+				 struct mlx5_flow *dev_flow)
 {
-	const struct rte_flow_item *item;
-	unsigned int has_vlan = 0;
+	const struct rte_flow_action_mark *mark = action->conf;
+	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
+	struct ibv_flow_spec_action_tag tag = {
+		.type = IBV_FLOW_SPEC_ACTION_TAG,
+		.size = size,
+	};
+	struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
 
-	for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
-		if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
-			has_vlan = 1;
-			break;
-		}
+	if (*action_flags & MLX5_ACTION_FLAG) {
+		flow_verbs_mark_update(verbs, mark->id);
+		size = 0;
+	} else {
+		tag.tag_id = mlx5_flow_mark_set(mark->id);
+		mlx5_flow_spec_verbs_add(dev_flow, &tag, size);
 	}
-	if (has_vlan)
-		return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
-				       MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
-	return rss_level < 2 ? MLX5_EXPANSION_ROOT :
-			       MLX5_EXPANSION_ROOT_OUTER;
+	*action_flags |= MLX5_ACTION_MARK;
 }
 
 /**
- * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
- * after ensuring the NIC will understand and process it correctly.
- * The conversion is only performed item/action per item/action, each of
- * them is written into the @p flow if its size is lesser or equal to @p
- * flow_size.
- * Validation and memory consumption computation are still performed until the
- * end, unless an error is encountered.
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
  *
  * @param[in] dev
- *   Pointer to Ethernet device.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small some
- *   garbage may be present.
- * @param[in] attributes
- *   Flow rule attributes.
- * @param[in] pattern
- *   Pattern specification (list terminated by the END pattern item).
- * @param[in] actions
- *   Associated actions (list terminated by the END action).
+ *   Pointer to the Ethernet device structure.
+ * @param[in] action
+ *   Action configuration.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
  * @param[out] error
- *   Perform verbose error reporting if not NULL.
+ *   Pointer to error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the flow has fully been converted and
- *   can be applied, otherwise another call with this returned memory size
- *   should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 On success else a negative errno value is returned and rte_errno is set.
  */
 static int
-mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
-		const size_t flow_size,
-		const struct rte_flow_attr *attributes,
-		const struct rte_flow_item pattern[],
-		const struct rte_flow_action actions[],
-		struct rte_flow_error *error)
+flow_verbs_translate_action_count(struct rte_eth_dev *dev,
+				  const struct rte_flow_action *action,
+				  uint64_t *action_flags,
+				  struct mlx5_flow *dev_flow,
+				  struct rte_flow_error *error)
 {
-	struct rte_flow local_flow = { .layers = 0, };
-	size_t size = sizeof(*flow);
-	union {
-		struct rte_flow_expand_rss buf;
-		uint8_t buffer[2048];
-	} expand_buffer;
-	struct rte_flow_expand_rss *buf = &expand_buffer.buf;
-	struct mlx5_flow_verbs *original_verbs = NULL;
-	size_t original_verbs_size = 0;
-	uint32_t original_layers = 0;
-	int expanded_pattern_idx = 0;
-	int ret = 0;
-	uint32_t i;
-
-	if (attributes->transfer)
-		return mlx5_flow_merge_switch(dev, flow, flow_size,
-					      attributes, pattern,
-					      actions, error);
-	if (size > flow_size)
-		flow = &local_flow;
-	ret = mlx5_flow_attributes(dev->data->dev_private, attributes, flow);
-	if (ret < 0)
-		return ret;
-	ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
-	if (ret < 0)
-		return ret;
-	if (local_flow.rss.types) {
-		unsigned int graph_root;
+	const struct rte_flow_action_count *count = action->conf;
+	struct rte_flow *flow = dev_flow->flow;
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
+	struct ibv_flow_spec_counter_action counter = {
+		.type = IBV_FLOW_SPEC_ACTION_COUNT,
+		.size = size,
+	};
+#endif
 
-		graph_root = mlx5_find_graph_root(pattern,
-						  local_flow.rss.level);
-		ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
-					  pattern, local_flow.rss.types,
-					  mlx5_support_expansion,
-					  graph_root);
-		assert(ret > 0 &&
-		       (unsigned int)ret < sizeof(expand_buffer.buffer));
-	} else {
-		buf->entries = 1;
-		buf->entry[0].pattern = (void *)(uintptr_t)pattern;
-	}
-	size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
-			       sizeof(void *));
-	if (size <= flow_size)
-		flow->queue = (void *)(flow + 1);
-	LIST_INIT(&flow->verbs);
-	flow->layers = 0;
-	flow->modifier = 0;
-	flow->fate = 0;
-	for (i = 0; i != buf->entries; ++i) {
-		size_t off = size;
-		size_t off2;
-
-		flow->layers = original_layers;
-		size += sizeof(struct ibv_flow_attr) +
-			sizeof(struct mlx5_flow_verbs);
-		off2 = size;
-		if (size < flow_size) {
-			flow->cur_verbs = (void *)((uintptr_t)flow + off);
-			flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
-			flow->cur_verbs->specs =
-				(void *)(flow->cur_verbs->attr + 1);
-		}
-		/* First iteration convert the pattern into Verbs. */
-		if (i == 0) {
-			/* Actions don't need to be converted several time. */
-			ret = mlx5_flow_actions(dev, actions, flow,
-						(size < flow_size) ?
-						flow_size - size : 0,
-						error);
-			if (ret < 0)
-				return ret;
-			size += ret;
-		} else {
-			/*
-			 * Next iteration means the pattern has already been
-			 * converted and an expansion is necessary to match
-			 * the user RSS request.  For that only the expanded
-			 * items will be converted, the common part with the
-			 * user pattern are just copied into the next buffer
-			 * zone.
-			 */
-			size += original_verbs_size;
-			if (size < flow_size) {
-				rte_memcpy(flow->cur_verbs->attr,
-					   original_verbs->attr,
-					   original_verbs_size +
-					   sizeof(struct ibv_flow_attr));
-				flow->cur_verbs->size = original_verbs_size;
-			}
-		}
-		ret = mlx5_flow_items
-			((const struct rte_flow_item *)
-			 &buf->entry[i].pattern[expanded_pattern_idx],
-			 flow,
-			 (size < flow_size) ? flow_size - size : 0, error);
-		if (ret < 0)
-			return ret;
-		size += ret;
-		if (size <= flow_size) {
-			mlx5_flow_adjust_priority(dev, flow);
-			LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
-		}
-		/*
-		 * Keep a pointer of the first verbs conversion and the layers
-		 * it has encountered.
-		 */
-		if (i == 0) {
-			original_verbs = flow->cur_verbs;
-			original_verbs_size = size - off2;
-			original_layers = flow->layers;
-			/*
-			 * move the index of the expanded pattern to the
-			 * first item not addressed yet.
-			 */
-			if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
-				expanded_pattern_idx++;
-			} else {
-				const struct rte_flow_item *item = pattern;
-
-				for (item = pattern;
-				     item->type != RTE_FLOW_ITEM_TYPE_END;
-				     ++item)
-					expanded_pattern_idx++;
-			}
-		}
+	if (!flow->counter) {
+		flow->counter = mlx5_flow_counter_new(dev, count->shared,
+						      count->id);
+		if (!flow->counter)
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  action,
+						  "cannot get counter"
+						  " context.");
 	}
-	/* Restore the origin layers in the flow. */
-	flow->layers = original_layers;
-	return size;
+	*action_flags |= MLX5_ACTION_COUNT;
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+	counter.counter_set_handle = flow->counter->cs->handle;
+	mlx5_flow_spec_verbs_add(dev_flow, &counter, size);
+#endif
+	return 0;
 }
 
 /**
@@ -2212,7 +1585,7 @@ mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow.
  *
  * @param[in] dev
- *   Pointer to Ethernet device.
+ *   Pointer to the Ethernet device structure.
  * @param[in] flow
  *   Pointer to flow structure.
  */
@@ -2220,8 +1593,8 @@ static void
 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
 	struct priv *priv = dev->data->dev_private;
-	const int mark = !!(flow->modifier &
-			    (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
+	const int mark = !!(flow->actions &
+			    (MLX5_ACTION_FLAG | MLX5_ACTION_MARK));
 	const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
 	unsigned int i;
 
@@ -2264,8 +1637,8 @@ static void
 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
 	struct priv *priv = dev->data->dev_private;
-	const int mark = !!(flow->modifier &
-			    (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
+	const int mark = !!(flow->actions &
+			    (MLX5_ACTION_FLAG | MLX5_ACTION_MARK));
 	const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
 	unsigned int i;
 
@@ -3574,21 +2947,21 @@ mlx5_flow_verbs_get_items_and_size(const struct rte_flow_item items[],
 			size += sizeof(struct ibv_flow_spec_tunnel);
 			detected_items |= MLX5_FLOW_LAYER_VXLAN_GPE;
 			break;
-		case RTE_FLOW_ITEM_TYPE_GRE:
 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+		case RTE_FLOW_ITEM_TYPE_GRE:
 			size += sizeof(struct ibv_flow_spec_gre);
 			detected_items |= MLX5_FLOW_LAYER_GRE;
-#else
-			size += sizeof(struct ibv_flow_spec_tunnel);
-			detected_items |= MLX5_FLOW_LAYER_TUNNEL;
-#endif
 			break;
 		case RTE_FLOW_ITEM_TYPE_MPLS:
-#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
 			size += sizeof(struct ibv_flow_spec_mpls);
 			detected_items |= MLX5_FLOW_LAYER_MPLS;
-#endif
 			break;
+#else
+		case RTE_FLOW_ITEM_TYPE_GRE:
+			size += sizeof(struct ibv_flow_spec_tunnel);
+			detected_items |= MLX5_FLOW_LAYER_TUNNEL;
+			break;
+#endif
 		default:
 			break;
 		}
@@ -3651,7 +3024,7 @@ mlx5_flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
 			uint64_t *action_flags,
 			struct rte_flow_error *error)
 {
-	uint32_t size = sizeof(struct ibv_flow_attr);
+	uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
 	struct mlx5_flow *flow;
 
 	size += mlx5_flow_verbs_get_actions_and_size(actions, action_flags);
@@ -3664,10 +3037,150 @@ mlx5_flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
 				   "not enough memory to create flow");
 		return NULL;
 	}
+	flow->verbs.attr = (void *)(flow + 1);
+	flow->verbs.specs = (uint8_t *)(flow + 1) +
+				sizeof(struct ibv_flow_attr);
 	return flow;
 }
 
 /**
+ *
+ * Fill the flow with verb spec.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] dev_flow
+ *   Pointer to the mlx5 flow.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, else a negative errno value otherwise and rte_ernno is set.
+ */
+static int mlx5_flow_verbs_translate(struct rte_eth_dev *dev,
+				     struct mlx5_flow *dev_flow,
+				     const struct rte_flow_attr *attr,
+				     const struct rte_flow_item items[],
+				     const struct rte_flow_action actions[],
+				     struct rte_flow_error *error)
+{
+	uint64_t action_flags = 0;
+	uint64_t item_flags = 0;
+	uint64_t priority = attr->priority;
+	struct priv *priv = dev->data->dev_private;
+
+	if (priority == MLX5_FLOW_PRIO_RSVD)
+		priority = priv->config.flow_prio - 1;
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		int ret;
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_FLAG:
+			flow_verbs_translate_action_flag(actions,
+							 &action_flags,
+							 dev_flow);
+			break;
+		case RTE_FLOW_ACTION_TYPE_MARK:
+			flow_verbs_translate_action_mark(actions,
+							 &action_flags,
+							 dev_flow);
+			break;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			flow_verbs_translate_action_drop(&action_flags,
+							 dev_flow);
+			break;
+		case RTE_FLOW_ACTION_TYPE_QUEUE:
+			flow_verbs_translate_action_queue(actions,
+							  &action_flags,
+							  dev_flow);
+			break;
+		case RTE_FLOW_ACTION_TYPE_RSS:
+			flow_verbs_translate_action_rss(actions,
+							&action_flags,
+							dev_flow);
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = flow_verbs_translate_action_count(dev,
+								actions,
+								&action_flags,
+								dev_flow,
+								error);
+			if (ret < 0)
+				return ret;
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	dev_flow->flow->actions |= action_flags;
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			flow_verbs_translate_item_eth(items, &item_flags,
+						      dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			flow_verbs_translate_item_vlan(items, &item_flags,
+						       dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			flow_verbs_translate_item_ipv4(items, &item_flags,
+						       dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			flow_verbs_translate_item_ipv6(items, &item_flags,
+						       dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			flow_verbs_translate_item_udp(items, &item_flags,
+						      dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			flow_verbs_translate_item_tcp(items, &item_flags,
+						      dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			flow_verbs_translate_item_vxlan(items, &item_flags,
+							dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+			flow_verbs_translate_item_vxlan_gpe(items, &item_flags,
+							    dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_GRE:
+			flow_verbs_translate_item_gre(items, &item_flags,
+						      dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_MPLS:
+			flow_verbs_translate_item_mpls(items, &item_flags,
+						       dev_flow);
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  NULL,
+						  "item not supported");
+		}
+	}
+	dev_flow->verbs.attr->priority = mlx5_flow_adjust_priority(dev,
+					priority,
+					dev_flow->verbs.attr->priority);
+	return 0;
+}
+
+/**
  * Remove the flow.
  *
  * @param[in] dev
@@ -3680,16 +3193,18 @@ mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct mlx5_flow_verbs *verbs;
+	struct mlx5_flow *dev_flow;
 
 	if (flow->nl_flow && priv->mnl_socket)
 		mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL);
-	LIST_FOREACH(verbs, &flow->verbs, next) {
+	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+		verbs = &dev_flow->verbs;
 		if (verbs->flow) {
 			claim_zero(mlx5_glue->destroy_flow(verbs->flow));
 			verbs->flow = NULL;
 		}
 		if (verbs->hrxq) {
-			if (flow->fate & MLX5_FLOW_FATE_DROP)
+			if (flow->actions & MLX5_ACTION_DROP)
 				mlx5_hrxq_drop_release(dev);
 			else
 				mlx5_hrxq_release(dev, verbs->hrxq);
@@ -3721,10 +3236,12 @@ mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
 {
 	struct priv *priv = dev->data->dev_private;
 	struct mlx5_flow_verbs *verbs;
+	struct mlx5_flow *dev_flow;
 	int err;
 
-	LIST_FOREACH(verbs, &flow->verbs, next) {
-		if (flow->fate & MLX5_FLOW_FATE_DROP) {
+	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+		verbs = &dev_flow->verbs;
+		if (flow->actions & MLX5_ACTION_DROP) {
 			verbs->hrxq = mlx5_hrxq_drop_new(dev);
 			if (!verbs->hrxq) {
 				rte_flow_error_set
@@ -3760,8 +3277,8 @@ mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
 			}
 			verbs->hrxq = hrxq;
 		}
-		verbs->flow =
-			mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
+		verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
+						     verbs->attr);
 		if (!verbs->flow) {
 			rte_flow_error_set(error, errno,
 					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -3777,9 +3294,10 @@ mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
 	return 0;
 error:
 	err = rte_errno; /* Save rte_errno before cleanup. */
-	LIST_FOREACH(verbs, &flow->verbs, next) {
+	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+		verbs = &dev_flow->verbs;
 		if (verbs->hrxq) {
-			if (flow->fate & MLX5_FLOW_FATE_DROP)
+			if (flow->actions & MLX5_ACTION_DROP)
 				mlx5_hrxq_drop_release(dev);
 			else
 				mlx5_hrxq_release(dev, verbs->hrxq);
@@ -3790,6 +3308,25 @@ mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
 	return -rte_errno;
 }
 
+static unsigned int
+mlx5_find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
+{
+	const struct rte_flow_item *item;
+	unsigned int has_vlan = 0;
+
+	for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+		if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
+			has_vlan = 1;
+			break;
+		}
+	}
+	if (has_vlan)
+		return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
+				       MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
+	return rss_level < 2 ? MLX5_EXPANSION_ROOT :
+			       MLX5_EXPANSION_ROOT_OUTER;
+}
+
 /**
  * Create a flow and add it to @p list.
  *
@@ -3819,7 +3356,6 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 {
 	struct rte_flow *flow = NULL;
 	struct mlx5_flow *dev_flow;
-	size_t size = 0;
 	uint64_t action_flags = 0;
 	uint64_t item_flags = 0;
 	const struct rte_flow_action_rss *rss;
@@ -3830,13 +3366,21 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 	struct rte_flow_expand_rss *buf = &expand_buffer.buf;
 	int ret;
 	uint32_t i;
+	uint32_t flow_size;
 
 	ret = mlx5_flow_validate(dev, attr, items, actions, error);
 	if (ret < 0)
 		return NULL;
-	flow = rte_calloc(__func__, 1, sizeof(*flow), 0);
-	LIST_INIT(&flow->dev_flows);
+	flow_size = sizeof(struct rte_flow);
 	rss = mlx5_flow_get_rss_action(actions);
+	if (rss)
+		flow_size += RTE_ALIGN_CEIL(rss->queue_num * sizeof(uint16_t),
+					    sizeof(void *));
+	else
+		flow_size += RTE_ALIGN_CEIL(sizeof(uint16_t), sizeof(void *));
+	flow = rte_calloc(__func__, 1, flow_size, 0);
+	flow->queue = (void *)(flow + 1);
+	LIST_INIT(&flow->dev_flows);
 	if (rss && rss->types) {
 		unsigned int graph_root;
 
@@ -3857,25 +3401,11 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 						   &action_flags, error);
 		dev_flow->flow = flow;
 		LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
+		mlx5_flow_verbs_translate(dev, dev_flow, attr,
+					  buf->entry[i].pattern,
+					  actions,
+					  error);
 	}
-	ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
-	if (ret < 0)
-		return NULL;
-	size = ret;
-	flow = rte_calloc(__func__, 1, size, 0);
-	if (!flow) {
-		rte_flow_error_set(error, ENOMEM,
-				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				   NULL,
-				   "not enough memory to create flow");
-		return NULL;
-	}
-	ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
-	if (ret < 0) {
-		rte_free(flow);
-		return NULL;
-	}
-	assert((size_t)ret == size);
 	if (dev->data->dev_started) {
 		ret = mlx5_flow_apply(dev, flow, error);
 		if (ret < 0) {
@@ -3933,6 +3463,12 @@ mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
 	 */
 	if (dev->data->dev_started)
 		mlx5_flow_rxq_flags_trim(dev, flow);
+	while (!LIST_EMPTY(&flow->dev_flows)) {
+		struct mlx5_flow *dev_flow;
+		dev_flow = LIST_FIRST(&flow->dev_flows);
+		LIST_REMOVE(dev_flow, next);
+		rte_free(dev_flow);
+	}
 	rte_free(flow);
 }
 
@@ -4208,7 +3744,7 @@ mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
 		      struct rte_flow_error *error)
 {
 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-	if (flow->modifier & MLX5_FLOW_MOD_COUNT) {
+	if (flow->actions & MLX5_ACTION_COUNT) {
 		struct rte_flow_query_count *qc = data;
 		uint64_t counters[2] = {0, 0};
 		struct ibv_query_counter_set_attr query_cs_attr = {
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v3 04/11] net/mlx5: add support for multiple flow drivers
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
                     ` (2 preceding siblings ...)
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 03/11] net/mlx5: add flow translate function Yongseok Koh
@ 2018-09-24 23:17   ` Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 05/11] net/mlx5: add Direct Verbs validation function Yongseok Koh
                     ` (7 subsequent siblings)
  11 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 23:17 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Ori Kam

From: Ori Kam <orika@mellanox.com>

In the current PMD there is only support for Verbs driver API, to
configure NIC rules and TC driver API, to configure eswitch rules.
In order to support new drivers that will enable the use of new features
for example the Direct Verbs driver API. There is a need to split each
driver to a dedicated file and use function pointer to access the driver.

This commit moves the Verbs API to a detected file and introduce the
use of function pointers in the flow handling.

The functions pointers that are in use:
* validate - handle the validation of the flow. It can use both
             specific functions or shared functions that will be located
             in the mlx5_flow.c.
* prepare - allocate a the device flow. There can be number of device
            flows that are connected to a single requested flow.
* translate - converts the requested device flow into the driver flow.
* apply - insert the flow into the NIC.
* remove - remove the flow from the NIC but keeps it in memory.
* destroy - remove the flow from memory.

Signed-off-by: Ori Kam <orika@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/Makefile          |    1 +
 drivers/net/mlx5/meson.build       |    1 +
 drivers/net/mlx5/mlx5.c            |    2 +
 drivers/net/mlx5/mlx5_flow.c       | 1910 ++----------------------------------
 drivers/net/mlx5/mlx5_flow.h       |  257 +++++
 drivers/net/mlx5/mlx5_flow_verbs.c | 1692 ++++++++++++++++++++++++++++++++
 6 files changed, 2026 insertions(+), 1837 deletions(-)
 create mode 100644 drivers/net/mlx5/mlx5_flow.h
 create mode 100644 drivers/net/mlx5/mlx5_flow_verbs.c

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 2e70dec5b..9bd6bfb82 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -31,6 +31,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_stats.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl_flow.c
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index 289c7a4c0..40cc95038 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -31,6 +31,7 @@ if build
 		'mlx5.c',
 		'mlx5_ethdev.c',
 		'mlx5_flow.c',
+		'mlx5_flow_verbs.c',
 		'mlx5_mac.c',
 		'mlx5_mr.c',
 		'mlx5_nl.c',
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index fd89e2af3..ab44864e9 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -46,6 +46,7 @@
 #include "mlx5_defs.h"
 #include "mlx5_glue.h"
 #include "mlx5_mr.h"
+#include "mlx5_flow.h"
 
 /* Device parameter to enable RX completion queue compression. */
 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
@@ -1185,6 +1186,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	if (err < 0)
 		goto error;
 	priv->config.flow_prio = err;
+	mlx5_flow_init_driver_ops(eth_dev);
 	/*
 	 * Once the device is added to the list of memory event
 	 * callback, its global MR cache table cannot be expanded
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 2d55f08b8..1c177b9c8 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -31,83 +31,12 @@
 #include "mlx5_defs.h"
 #include "mlx5_prm.h"
 #include "mlx5_glue.h"
+#include "mlx5_flow.h"
 
 /* Dev ops structure defined in mlx5.c */
 extern const struct eth_dev_ops mlx5_dev_ops;
 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
 
-/* Pattern outer Layer bits. */
-#define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
-#define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
-#define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
-#define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
-#define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
-#define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
-
-/* Pattern inner Layer bits. */
-#define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
-#define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
-#define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
-#define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
-#define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
-#define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
-
-/* Pattern tunnel Layer bits. */
-#define MLX5_FLOW_LAYER_VXLAN (1u << 12)
-#define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
-#define MLX5_FLOW_LAYER_GRE (1u << 14)
-#define MLX5_FLOW_LAYER_MPLS (1u << 15)
-
-/* Outer Masks. */
-#define MLX5_FLOW_LAYER_OUTER_L3 \
-	(MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
-#define MLX5_FLOW_LAYER_OUTER_L4 \
-	(MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
-#define MLX5_FLOW_LAYER_OUTER \
-	(MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
-	 MLX5_FLOW_LAYER_OUTER_L4)
-
-/* Tunnel Masks. */
-#define MLX5_FLOW_LAYER_TUNNEL \
-	(MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \
-	 MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS)
-
-/* Inner Masks. */
-#define MLX5_FLOW_LAYER_INNER_L3 \
-	(MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
-#define MLX5_FLOW_LAYER_INNER_L4 \
-	(MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
-#define MLX5_FLOW_LAYER_INNER \
-	(MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
-	 MLX5_FLOW_LAYER_INNER_L4)
-
-/* Actions that modify the fate of matching traffic. */
-#define MLX5_FLOW_FATE_DROP (1u << 0)
-#define MLX5_FLOW_FATE_QUEUE (1u << 1)
-#define MLX5_FLOW_FATE_RSS (1u << 2)
-
-/* Modify a packet. */
-#define MLX5_FLOW_MOD_FLAG (1u << 0)
-#define MLX5_FLOW_MOD_MARK (1u << 1)
-#define MLX5_FLOW_MOD_COUNT (1u << 2)
-
-/* Actions */
-#define MLX5_ACTION_DROP (1u << 0)
-#define MLX5_ACTION_QUEUE (1u << 1)
-#define MLX5_ACTION_RSS (1u << 2)
-#define MLX5_ACTION_FLAG (1u << 3)
-#define MLX5_ACTION_MARK (1u << 4)
-#define MLX5_ACTION_COUNT (1u << 5)
-
-/* possible L3 layers protocols filtering. */
-#define MLX5_IP_PROTOCOL_TCP 6
-#define MLX5_IP_PROTOCOL_UDP 17
-#define MLX5_IP_PROTOCOL_GRE 47
-#define MLX5_IP_PROTOCOL_MPLS 147
-
-/* Priority reserved for default flows. */
-#define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
-
 enum mlx5_expansion {
 	MLX5_EXPANSION_ROOT,
 	MLX5_EXPANSION_ROOT_OUTER,
@@ -278,57 +207,6 @@ static const struct rte_flow_expand_node mlx5_support_expansion[] = {
 	},
 };
 
-/** Handles information leading to a drop fate. */
-struct mlx5_flow_verbs {
-	LIST_ENTRY(mlx5_flow_verbs) next;
-	unsigned int size; /**< Size of the attribute. */
-	struct {
-		struct ibv_flow_attr *attr;
-		/**< Pointer to the Specification buffer. */
-		uint8_t *specs; /**< Pointer to the specifications. */
-	};
-	struct ibv_flow *flow; /**< Verbs flow pointer. */
-	struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
-	uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
-};
-
-/** Device flow structure. */
-struct mlx5_flow {
-	LIST_ENTRY(mlx5_flow) next;
-	struct rte_flow *flow; /**< Pointer to the main flow. */
-	uint32_t layers; /**< Bit-fields that holds the detected layers. */
-	union {
-		struct mlx5_flow_verbs verbs; /**< Holds the verbs dev-flow. */
-	};
-};
-
-/* Counters information. */
-struct mlx5_flow_counter {
-	LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
-	uint32_t shared:1; /**< Share counter ID with other flow rules. */
-	uint32_t ref_cnt:31; /**< Reference counter. */
-	uint32_t id; /**< Counter ID. */
-	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
-	uint64_t hits; /**< Number of packets matched by the rule. */
-	uint64_t bytes; /**< Number of bytes matched by the rule. */
-};
-
-/* Flow structure. */
-struct rte_flow {
-	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
-	struct rte_flow_attr attributes; /**< User flow attribute. */
-	uint32_t layers; /**< Bit-fields that holds the detected layers. */
-	/**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
-	struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */
-	struct rte_flow_action_rss rss;/**< RSS context. */
-	uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
-	uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
-	void *nl_flow; /**< Netlink flow buffer if relevant. */
-	LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
-	/**< Device flows that are part of the flow. */
-	uint32_t actions; /**< Bit-fields which mark all detected actions. */
-};
-
 static const struct rte_flow_ops mlx5_flow_ops = {
 	.validate = mlx5_flow_validate,
 	.create = mlx5_flow_create,
@@ -364,23 +242,6 @@ struct mlx5_fdir {
 	struct rte_flow_action_queue queue;
 };
 
-/* Verbs specification header. */
-struct ibv_spec_header {
-	enum ibv_flow_spec_type type;
-	uint16_t size;
-};
-
-/*
- * Number of sub priorities.
- * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
- * matching on the NIC (firmware dependent) L4 most have the higher priority
- * followed by L3 and ending with L2.
- */
-#define MLX5_PRIORITY_MAP_L2 2
-#define MLX5_PRIORITY_MAP_L3 1
-#define MLX5_PRIORITY_MAP_L4 0
-#define MLX5_PRIORITY_MAP_MAX 3
-
 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
 	{ 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
@@ -421,6 +282,9 @@ static struct mlx5_flow_tunnel_info tunnels_info[] = {
 	},
 };
 
+/* Holds the nic operations that should be used. */
+struct mlx5_flow_driver_ops nic_ops;
+
 /**
  * Discover the maximum number of priority available.
  *
@@ -490,7 +354,7 @@ mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
 }
 
 /**
- * Adjust flow priority.
+ * Adjust flow priority based on the highest layer and the request priority.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -502,10 +366,8 @@ mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
  * @return
  *   The new priority.
  */
-static uint32_t
-mlx5_flow_adjust_priority(struct rte_eth_dev *dev,
-			int32_t priority,
-			uint32_t subpriority)
+uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
+				   uint32_t subpriority)
 {
 	uint32_t res = 0;
 	struct priv *priv = dev->data->dev_private;
@@ -522,80 +384,6 @@ mlx5_flow_adjust_priority(struct rte_eth_dev *dev,
 }
 
 /**
- * Get a flow counter.
- *
- * @param[in] dev
- *   Pointer to the Ethernet device structure.
- * @param[in] shared
- *   Indicate if this counter is shared with other flows.
- * @param[in] id
- *   Counter identifier.
- *
- * @return
- *   A pointer to the counter, NULL otherwise and rte_errno is set.
- */
-static struct mlx5_flow_counter *
-mlx5_flow_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
-{
-	struct priv *priv = dev->data->dev_private;
-	struct mlx5_flow_counter *cnt;
-
-	LIST_FOREACH(cnt, &priv->flow_counters, next) {
-		if (!cnt->shared || cnt->shared != shared)
-			continue;
-		if (cnt->id != id)
-			continue;
-		cnt->ref_cnt++;
-		return cnt;
-	}
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-
-	struct mlx5_flow_counter tmpl = {
-		.shared = shared,
-		.id = id,
-		.cs = mlx5_glue->create_counter_set
-			(priv->ctx,
-			 &(struct ibv_counter_set_init_attr){
-				 .counter_set_id = id,
-			 }),
-		.hits = 0,
-		.bytes = 0,
-	};
-
-	if (!tmpl.cs) {
-		rte_errno = errno;
-		return NULL;
-	}
-	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
-	if (!cnt) {
-		rte_errno = ENOMEM;
-		return NULL;
-	}
-	*cnt = tmpl;
-	LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
-	return cnt;
-#endif
-	rte_errno = ENOTSUP;
-	return NULL;
-}
-
-/**
- * Release a flow counter.
- *
- * @param[in] counter
- *   Pointer to the counter handler.
- */
-static void
-mlx5_flow_counter_release(struct mlx5_flow_counter *counter)
-{
-	if (--counter->ref_cnt == 0) {
-		claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
-		LIST_REMOVE(counter, next);
-		rte_free(counter);
-	}
-}
-
-/**
  * Verify the @p item specifications (spec, last, mask) are compatible with the
  * NIC capabilities.
  *
@@ -656,904 +444,6 @@ mlx5_flow_item_acceptable(const struct rte_flow_item *item,
 }
 
 /**
- * Add a verbs item specification into @p flow.
- *
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] src
- *   Create specification.
- * @param[in] size
- *   Size in bytes of the specification to copy.
- */
-static void
-mlx5_flow_spec_verbs_add(struct mlx5_flow *flow, void *src, unsigned int size)
-{
-	struct mlx5_flow_verbs *verbs = &flow->verbs;
-
-	if (verbs->specs) {
-		void *dst;
-
-		dst = (void *)(verbs->specs + verbs->size);
-		memcpy(dst, src, size);
-		++verbs->attr->num_of_specs;
-	}
-	verbs->size += size;
-}
-
-/**
- * Adjust verbs hash fields according to the @p flow information.
- *
- * @param[in] dev_flow.
- *   Pointer to dev flow structure.
- * @param[in] tunnel
- *   1 when the hash field is for a tunnel item.
- * @param[in] layer_types
- *   ETH_RSS_* types.
- * @param[in] hash_fields
- *   Item hash fields.
- */
-static void
-mlx5_flow_verbs_hashfields_adjust(struct mlx5_flow *dev_flow,
-				  int tunnel __rte_unused,
-				  uint32_t layer_types, uint64_t hash_fields)
-{
-#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
-	int rss_request_inner = dev_flow->flow->rss.level >= 2;
-
-	hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
-	if (rss_request_inner && !tunnel)
-		hash_fields = 0;
-	else if (!rss_request_inner && tunnel)
-		hash_fields = 0;
-#endif
-	if (!(dev_flow->flow->rss.types & layer_types))
-		hash_fields = 0;
-	dev_flow->verbs.hash_fields |= hash_fields;
-}
-
-/**
- * Convert the @p item into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested item
- * into the flow.
- *
- * @param[in] item
- *   Item specification.
- * @param[in] item_flags
- *   Bit field with all detected items.
- * @param[in, out] dev_flow
- *   Pointer to dev_flow structure.
- */
-static void
-flow_verbs_translate_item_eth(const struct rte_flow_item *item,
-			      uint64_t *item_flags,
-			      struct mlx5_flow *dev_flow)
-{
-	const struct rte_flow_item_eth *spec = item->spec;
-	const struct rte_flow_item_eth *mask = item->mask;
-	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
-	const unsigned int size = sizeof(struct ibv_flow_spec_eth);
-	struct ibv_flow_spec_eth eth = {
-		.type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
-		.size = size,
-	};
-
-	if (!mask)
-		mask = &rte_flow_item_eth_mask;
-	if (spec) {
-		unsigned int i;
-
-		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
-		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
-		eth.val.ether_type = spec->type;
-		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
-		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
-		eth.mask.ether_type = mask->type;
-		/* Remove unwanted bits from values. */
-		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
-			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
-			eth.val.src_mac[i] &= eth.mask.src_mac[i];
-		}
-		eth.val.ether_type &= eth.mask.ether_type;
-		dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
-	}
-	mlx5_flow_spec_verbs_add(dev_flow, &eth, size);
-	*item_flags |= tunnel ?
-			MLX5_FLOW_LAYER_INNER_L2 :
-			MLX5_FLOW_LAYER_OUTER_L2;
-}
-
-/**
- * Update the VLAN tag in the Verbs Ethernet specification.
- * This function assumes that the input is valid and there is space to add
- * the requested item.
- *
- * @param[in, out] attr
- *   Pointer to Verbs attributes structure.
- * @param[in] eth
- *   Verbs structure containing the VLAN information to copy.
- */
-static void
-mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
-			   struct ibv_flow_spec_eth *eth)
-{
-	unsigned int i;
-	const enum ibv_flow_spec_type search = eth->type;
-	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
-		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
-
-	for (i = 0; i != attr->num_of_specs; ++i) {
-		if (hdr->type == search) {
-			struct ibv_flow_spec_eth *e =
-				(struct ibv_flow_spec_eth *)hdr;
-
-			e->val.vlan_tag = eth->val.vlan_tag;
-			e->mask.vlan_tag = eth->mask.vlan_tag;
-			e->val.ether_type = eth->val.ether_type;
-			e->mask.ether_type = eth->mask.ether_type;
-			break;
-		}
-		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
-	}
-}
-
-/**
- * Convert the @p item into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested item
- * into the flow.
- *
- * @param[in] item
- *   Item specification.
- * @param[in, out] item_flags
- *   Bit mask that holds all detected items.
- * @param[in, out] dev_flow
- *   Pointer to dev_flow structure.
- */
-static void
-flow_verbs_translate_item_vlan(const struct rte_flow_item *item,
-			       uint64_t *item_flags,
-			       struct mlx5_flow *dev_flow)
-{
-	const struct rte_flow_item_vlan *spec = item->spec;
-	const struct rte_flow_item_vlan *mask = item->mask;
-	unsigned int size = sizeof(struct ibv_flow_spec_eth);
-	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
-	struct ibv_flow_spec_eth eth = {
-		.type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
-		.size = size,
-	};
-	const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
-		MLX5_FLOW_LAYER_OUTER_L2;
-
-	if (!mask)
-		mask = &rte_flow_item_vlan_mask;
-	if (spec) {
-		eth.val.vlan_tag = spec->tci;
-		eth.mask.vlan_tag = mask->tci;
-		eth.val.vlan_tag &= eth.mask.vlan_tag;
-		eth.val.ether_type = spec->inner_type;
-		eth.mask.ether_type = mask->inner_type;
-		eth.val.ether_type &= eth.mask.ether_type;
-	}
-	if (!(*item_flags & l2m)) {
-		dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
-		mlx5_flow_spec_verbs_add(dev_flow, &eth, size);
-	} else {
-		mlx5_flow_item_vlan_update(dev_flow->verbs.attr,
-						   &eth);
-		size = 0; /* Only an update is done in eth specification. */
-	}
-	*item_flags |= tunnel ?
-			(MLX5_FLOW_LAYER_INNER_L2 |
-			 MLX5_FLOW_LAYER_INNER_VLAN) :
-			(MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
-}
-
-/**
- * Convert the @p item into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested item
- * into the flow.
- *
- * @param[in] item
- *   Item specification.
- * @param[in, out] item_flags
- *   Bit mask that marks all detected items.
- * @param[in, out] dev_flow
- *   Pointer to sepacific flow structure.
- */
-static void
-flow_verbs_translate_item_ipv4(const struct rte_flow_item *item,
-			       uint64_t *item_flags,
-			       struct mlx5_flow *dev_flow)
-{
-	const struct rte_flow_item_ipv4 *spec = item->spec;
-	const struct rte_flow_item_ipv4 *mask = item->mask;
-	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
-	unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
-	struct ibv_flow_spec_ipv4_ext ipv4 = {
-		.type = IBV_FLOW_SPEC_IPV4_EXT |
-			(tunnel ? IBV_FLOW_SPEC_INNER : 0),
-		.size = size,
-	};
-
-	if (!mask)
-		mask = &rte_flow_item_ipv4_mask;
-	*item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
-		MLX5_FLOW_LAYER_OUTER_L3_IPV4;
-	if (spec) {
-		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
-			.src_ip = spec->hdr.src_addr,
-			.dst_ip = spec->hdr.dst_addr,
-			.proto = spec->hdr.next_proto_id,
-			.tos = spec->hdr.type_of_service,
-		};
-		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
-			.src_ip = mask->hdr.src_addr,
-			.dst_ip = mask->hdr.dst_addr,
-			.proto = mask->hdr.next_proto_id,
-			.tos = mask->hdr.type_of_service,
-		};
-		/* Remove unwanted bits from values. */
-		ipv4.val.src_ip &= ipv4.mask.src_ip;
-		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
-		ipv4.val.proto &= ipv4.mask.proto;
-		ipv4.val.tos &= ipv4.mask.tos;
-	}
-	mlx5_flow_verbs_hashfields_adjust(dev_flow, tunnel,
-					  (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
-					   ETH_RSS_NONFRAG_IPV4_TCP |
-					   ETH_RSS_NONFRAG_IPV4_UDP |
-					   ETH_RSS_NONFRAG_IPV4_OTHER),
-					  (IBV_RX_HASH_SRC_IPV4 |
-					   IBV_RX_HASH_DST_IPV4));
-	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
-	mlx5_flow_spec_verbs_add(dev_flow, &ipv4, size);
-}
-
-/**
- * Convert the @p item into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested item
- * into the flow.
- *
- * @param[in] item
- *   Item specification.
- * @param[in, out] item_flags
- *   Bit mask that marks all detected items.
- * @param[in, out] dev_flow
- *   Pointer to sepacific flow structure.
- */
-static void
-flow_verbs_translate_item_ipv6(const struct rte_flow_item *item,
-			       uint64_t *item_flags,
-			       struct mlx5_flow *dev_flow)
-{
-	const struct rte_flow_item_ipv6 *spec = item->spec;
-	const struct rte_flow_item_ipv6 *mask = item->mask;
-	const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
-	unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
-	struct ibv_flow_spec_ipv6 ipv6 = {
-		.type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
-		.size = size,
-	};
-
-	if (!mask)
-		mask = &rte_flow_item_ipv6_mask;
-	 *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
-				MLX5_FLOW_LAYER_OUTER_L3_IPV6;
-	if (spec) {
-		unsigned int i;
-		uint32_t vtc_flow_val;
-		uint32_t vtc_flow_mask;
-
-		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
-		       RTE_DIM(ipv6.val.src_ip));
-		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
-		       RTE_DIM(ipv6.val.dst_ip));
-		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
-		       RTE_DIM(ipv6.mask.src_ip));
-		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
-		       RTE_DIM(ipv6.mask.dst_ip));
-		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
-		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
-		ipv6.val.flow_label =
-			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
-					 IPV6_HDR_FL_SHIFT);
-		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
-					 IPV6_HDR_TC_SHIFT;
-		ipv6.val.next_hdr = spec->hdr.proto;
-		ipv6.val.hop_limit = spec->hdr.hop_limits;
-		ipv6.mask.flow_label =
-			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
-					 IPV6_HDR_FL_SHIFT);
-		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
-					  IPV6_HDR_TC_SHIFT;
-		ipv6.mask.next_hdr = mask->hdr.proto;
-		ipv6.mask.hop_limit = mask->hdr.hop_limits;
-		/* Remove unwanted bits from values. */
-		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
-			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
-			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
-		}
-		ipv6.val.flow_label &= ipv6.mask.flow_label;
-		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
-		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
-		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
-	}
-	mlx5_flow_verbs_hashfields_adjust(dev_flow, tunnel,
-					  (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
-					   ETH_RSS_NONFRAG_IPV6_TCP |
-					   ETH_RSS_NONFRAG_IPV6_UDP |
-					   ETH_RSS_IPV6_EX  |
-					   ETH_RSS_IPV6_TCP_EX |
-					   ETH_RSS_IPV6_UDP_EX |
-					   ETH_RSS_NONFRAG_IPV6_OTHER),
-					  (IBV_RX_HASH_SRC_IPV6 |
-					   IBV_RX_HASH_DST_IPV6));
-	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
-	mlx5_flow_spec_verbs_add(dev_flow, &ipv6, size);
-}
-
-/**
- * Convert the @p item into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested item
- * into the flow.
- *
- * @param[in] item
- *   Item specification.
- * @param[in, out] item_flags
- *   Bit mask that marks all detected items.
- * @param[in, out] dev_flow
- *   Pointer to sepacific flow structure.
- */
-static void
-flow_verbs_translate_item_udp(const struct rte_flow_item *item,
-			      uint64_t *item_flags,
-			      struct mlx5_flow *dev_flow)
-{
-	const struct rte_flow_item_udp *spec = item->spec;
-	const struct rte_flow_item_udp *mask = item->mask;
-	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
-	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
-	struct ibv_flow_spec_tcp_udp udp = {
-		.type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
-		.size = size,
-	};
-
-	if (!mask)
-		mask = &rte_flow_item_udp_mask;
-	*item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
-			MLX5_FLOW_LAYER_OUTER_L4_UDP;
-	if (spec) {
-		udp.val.dst_port = spec->hdr.dst_port;
-		udp.val.src_port = spec->hdr.src_port;
-		udp.mask.dst_port = mask->hdr.dst_port;
-		udp.mask.src_port = mask->hdr.src_port;
-		/* Remove unwanted bits from values. */
-		udp.val.src_port &= udp.mask.src_port;
-		udp.val.dst_port &= udp.mask.dst_port;
-	}
-	mlx5_flow_verbs_hashfields_adjust(dev_flow,
-					  tunnel, ETH_RSS_UDP,
-					  (IBV_RX_HASH_SRC_PORT_UDP |
-					   IBV_RX_HASH_DST_PORT_UDP));
-	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
-	mlx5_flow_spec_verbs_add(dev_flow, &udp, size);
-}
-
-/**
- * Convert the @p item into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested item
- * into the flow.
- *
- * @param[in] item
- *   Item specification.
- * @param[in, out] item_flags
- *   Bit mask that marks all detected items.
- * @param[in, out] dev_flow
- *   Pointer to sepacific flow structure.
- */
-static void
-flow_verbs_translate_item_tcp(const struct rte_flow_item *item,
-			      uint64_t *item_flags,
-			      struct mlx5_flow *dev_flow)
-{
-	const struct rte_flow_item_tcp *spec = item->spec;
-	const struct rte_flow_item_tcp *mask = item->mask;
-	const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
-	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
-	struct ibv_flow_spec_tcp_udp tcp = {
-		.type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
-		.size = size,
-	};
-
-	if (!mask)
-		mask = &rte_flow_item_tcp_mask;
-	*item_flags |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
-				MLX5_FLOW_LAYER_OUTER_L4_TCP;
-	if (spec) {
-		tcp.val.dst_port = spec->hdr.dst_port;
-		tcp.val.src_port = spec->hdr.src_port;
-		tcp.mask.dst_port = mask->hdr.dst_port;
-		tcp.mask.src_port = mask->hdr.src_port;
-		/* Remove unwanted bits from values. */
-		tcp.val.src_port &= tcp.mask.src_port;
-		tcp.val.dst_port &= tcp.mask.dst_port;
-	}
-	mlx5_flow_verbs_hashfields_adjust(dev_flow,
-					  tunnel, ETH_RSS_TCP,
-					  (IBV_RX_HASH_SRC_PORT_TCP |
-					   IBV_RX_HASH_DST_PORT_TCP));
-	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
-	mlx5_flow_spec_verbs_add(dev_flow, &tcp, size);
-}
-
-/**
- * Convert the @p item into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested item
- * into the flow.
- *
- * @param[in] item
- *   Item specification.
- * @param[in, out] item_flags
- *   Bit mask that marks all detected items.
- * @param[in, out] dev_flow
- *   Pointer to sepacific flow structure.
- */
-static void
-flow_verbs_translate_item_vxlan(const struct rte_flow_item *item,
-				uint64_t *item_flags,
-				struct mlx5_flow *dev_flow)
-{
-	const struct rte_flow_item_vxlan *spec = item->spec;
-	const struct rte_flow_item_vxlan *mask = item->mask;
-	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
-	struct ibv_flow_spec_tunnel vxlan = {
-		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
-		.size = size,
-	};
-	union vni {
-		uint32_t vlan_id;
-		uint8_t vni[4];
-	} id = { .vlan_id = 0, };
-
-	if (!mask)
-		mask = &rte_flow_item_vxlan_mask;
-	if (spec) {
-		memcpy(&id.vni[1], spec->vni, 3);
-		vxlan.val.tunnel_id = id.vlan_id;
-		memcpy(&id.vni[1], mask->vni, 3);
-		vxlan.mask.tunnel_id = id.vlan_id;
-		/* Remove unwanted bits from values. */
-		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
-	}
-	mlx5_flow_spec_verbs_add(dev_flow, &vxlan, size);
-	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
-	*item_flags |= MLX5_FLOW_LAYER_VXLAN;
-}
-
-/**
- * Convert the @p item into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested item
- * into the flow.
- *
- * @param[in] item
- *   Item specification.
- * @param[in, out] item_flags
- *   Bit mask that marks all detected items.
- * @param[in, out] dev_flow
- *   Pointer to sepacific flow structure.
- */
-static void
-flow_verbs_translate_item_vxlan_gpe(const struct rte_flow_item *item,
-				    uint64_t *item_flags,
-				    struct mlx5_flow *dev_flow)
-{
-	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
-	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
-	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
-	struct ibv_flow_spec_tunnel vxlan_gpe = {
-		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
-		.size = size,
-	};
-	union vni {
-		uint32_t vlan_id;
-		uint8_t vni[4];
-	} id = { .vlan_id = 0, };
-
-	if (!mask)
-		mask = &rte_flow_item_vxlan_gpe_mask;
-	if (spec) {
-		memcpy(&id.vni[1], spec->vni, 3);
-		vxlan_gpe.val.tunnel_id = id.vlan_id;
-		memcpy(&id.vni[1], mask->vni, 3);
-		vxlan_gpe.mask.tunnel_id = id.vlan_id;
-		/* Remove unwanted bits from values. */
-		vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
-	}
-	mlx5_flow_spec_verbs_add(dev_flow, &vxlan_gpe, size);
-	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
-	*item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
-}
-
-/**
- * Update the protocol in Verbs IPv4/IPv6 spec.
- *
- * @param[in, out] attr
- *   Pointer to Verbs attributes structure.
- * @param[in] search
- *   Specification type to search in order to update the IP protocol.
- * @param[in] protocol
- *   Protocol value to set if none is present in the specification.
- */
-static void
-mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
-				      enum ibv_flow_spec_type search,
-				      uint8_t protocol)
-{
-	unsigned int i;
-	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
-		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
-
-	if (!attr)
-		return;
-	for (i = 0; i != attr->num_of_specs; ++i) {
-		if (hdr->type == search) {
-			union {
-				struct ibv_flow_spec_ipv4_ext *ipv4;
-				struct ibv_flow_spec_ipv6 *ipv6;
-			} ip;
-
-			switch (search) {
-			case IBV_FLOW_SPEC_IPV4_EXT:
-				ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
-				if (!ip.ipv4->val.proto) {
-					ip.ipv4->val.proto = protocol;
-					ip.ipv4->mask.proto = 0xff;
-				}
-				break;
-			case IBV_FLOW_SPEC_IPV6:
-				ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
-				if (!ip.ipv6->val.next_hdr) {
-					ip.ipv6->val.next_hdr = protocol;
-					ip.ipv6->mask.next_hdr = 0xff;
-				}
-				break;
-			default:
-				break;
-			}
-			break;
-		}
-		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
-	}
-}
-
-/**
- * Convert the @p item into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested item
- * into the flow.
- *
- * @param[in] item
- *   Item specification.
- * @param[in, out] item_flags
- *   Bit mask that marks all detected items.
- * @param[in, out] dev_flow
- *   Pointer to sepacific flow structure.
- */
-static void
-flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused,
-			      uint64_t *item_flags,
-			      struct mlx5_flow *dev_flow)
-{
-	struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
-#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
-	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
-	struct ibv_flow_spec_tunnel tunnel = {
-		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
-		.size = size,
-	};
-#else
-	const struct rte_flow_item_gre *spec = item->spec;
-	const struct rte_flow_item_gre *mask = item->mask;
-	unsigned int size = sizeof(struct ibv_flow_spec_gre);
-	struct ibv_flow_spec_gre tunnel = {
-		.type = IBV_FLOW_SPEC_GRE,
-		.size = size,
-	};
-
-	if (!mask)
-		mask = &rte_flow_item_gre_mask;
-	if (spec) {
-		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
-		tunnel.val.protocol = spec->protocol;
-		tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
-		tunnel.mask.protocol = mask->protocol;
-		/* Remove unwanted bits from values. */
-		tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
-		tunnel.val.protocol &= tunnel.mask.protocol;
-		tunnel.val.key &= tunnel.mask.key;
-	}
-#endif
-	if (*item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
-		mlx5_flow_item_gre_ip_protocol_update
-			(verbs->attr, IBV_FLOW_SPEC_IPV4_EXT,
-			 MLX5_IP_PROTOCOL_GRE);
-	else
-		mlx5_flow_item_gre_ip_protocol_update
-			(verbs->attr, IBV_FLOW_SPEC_IPV6,
-			 MLX5_IP_PROTOCOL_GRE);
-	mlx5_flow_spec_verbs_add(dev_flow, &tunnel, size);
-	verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
-	*item_flags |= MLX5_FLOW_LAYER_GRE;
-}
-
-/**
- * Convert the @p action into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested action
- * into the flow. This function also return the action that was added.
- *
- * @param[in] item
- *   Item specification.
- * @param[in, out] item_flags
- *   Bit mask that marks all detected items.
- * @param[in, out] dev_flow
- *   Pointer to sepacific flow structure.
- */
-static void
-flow_verbs_translate_item_mpls(const struct rte_flow_item *item __rte_unused,
-			       uint64_t *action_flags __rte_unused,
-			       struct mlx5_flow *dev_flow __rte_unused)
-{
-#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
-	const struct rte_flow_item_mpls *spec = item->spec;
-	const struct rte_flow_item_mpls *mask = item->mask;
-	unsigned int size = sizeof(struct ibv_flow_spec_mpls);
-	struct ibv_flow_spec_mpls mpls = {
-		.type = IBV_FLOW_SPEC_MPLS,
-		.size = size,
-	};
-
-	if (!mask)
-		mask = &rte_flow_item_mpls_mask;
-	if (spec) {
-		memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
-		memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
-		/* Remove unwanted bits from values.  */
-		mpls.val.label &= mpls.mask.label;
-	}
-	mlx5_flow_spec_verbs_add(dev_flow, &mpls, size);
-	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
-	*action_flags |= MLX5_FLOW_LAYER_MPLS;
-#endif
-}
-
-/**
- * Convert the @p action into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested action
- * into the flow. This function also return the action that was added.
- *
- * @param[in, out] action_flags
- *   Pointer to the detected actions.
- * @param[in] dev_flow
- *   Pointer to mlx5_flow.
- */
-static void
-flow_verbs_translate_action_drop(uint64_t *action_flags,
-				 struct mlx5_flow *dev_flow)
-{
-	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
-	struct ibv_flow_spec_action_drop drop = {
-			.type = IBV_FLOW_SPEC_ACTION_DROP,
-			.size = size,
-	};
-
-	mlx5_flow_spec_verbs_add(dev_flow, &drop, size);
-	*action_flags |= MLX5_ACTION_DROP;
-}
-
-/**
- * Convert the @p action into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested action
- * into the flow. This function also return the action that was added.
- *
- * @param[in] action
- *   Action configuration.
- * @param[in, out] action_flags
- *   Pointer to the detected actions.
- * @param[in] dev_flow
- *   Pointer to mlx5_flow.
- */
-static void
-flow_verbs_translate_action_queue(const struct rte_flow_action *action,
-				  uint64_t *action_flags,
-				  struct mlx5_flow *dev_flow)
-{
-	const struct rte_flow_action_queue *queue = action->conf;
-	struct rte_flow *flow = dev_flow->flow;
-
-	if (flow->queue)
-		(*flow->queue)[0] = queue->index;
-	flow->rss.queue_num = 1;
-	*action_flags |= MLX5_ACTION_QUEUE;
-}
-
-/**
- * Convert the @p action into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested action
- * into the flow. This function also return the action that was added.
- *
- * @param[in] action
- *   Action configuration.
- * @param[in, out] action_flags
- *   Pointer to the detected actions.
- * @param[in] dev_flow
- *   Pointer to mlx5_flow.
- */
-static void
-flow_verbs_translate_action_rss(const struct rte_flow_action *action,
-				uint64_t *action_flags,
-				struct mlx5_flow *dev_flow)
-{
-	const struct rte_flow_action_rss *rss = action->conf;
-	struct rte_flow *flow = dev_flow->flow;
-
-	if (flow->queue)
-		memcpy((*flow->queue), rss->queue,
-		       rss->queue_num * sizeof(uint16_t));
-	flow->rss.queue_num = rss->queue_num;
-	memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
-	flow->rss.types = rss->types;
-	flow->rss.level = rss->level;
-	*action_flags |= MLX5_ACTION_RSS;
-}
-
-/**
- * Convert the @p action into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested action
- * into the flow. This function also return the action that was added.
- *
- * @param[in] action
- *   Action configuration.
- * @param[in, out] action_flags
- *   Pointer to the detected actions.
- * @param[in] dev_flow
- *   Pointer to mlx5_flow.
- */
-static void
-flow_verbs_translate_action_flag
-			(const struct rte_flow_action *action __rte_unused,
-			 uint64_t *action_flags,
-			 struct mlx5_flow *dev_flow)
-{
-	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
-	struct ibv_flow_spec_action_tag tag = {
-		.type = IBV_FLOW_SPEC_ACTION_TAG,
-		.size = size,
-		.tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
-	};
-	*action_flags |= MLX5_ACTION_MARK;
-	mlx5_flow_spec_verbs_add(dev_flow, &tag, size);
-}
-
-/**
- * Update verbs specification to modify the flag to mark.
- *
- * @param[in, out] verbs
- *   Pointer to the mlx5_flow_verbs structure.
- * @param[in] mark_id
- *   Mark identifier to replace the flag.
- */
-static void
-flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
-{
-	struct ibv_spec_header *hdr;
-	int i;
-
-	if (!verbs)
-		return;
-	/* Update Verbs specification. */
-	hdr = (struct ibv_spec_header *)verbs->specs;
-	if (!hdr)
-		return;
-	for (i = 0; i != verbs->attr->num_of_specs; ++i) {
-		if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
-			struct ibv_flow_spec_action_tag *t =
-				(struct ibv_flow_spec_action_tag *)hdr;
-
-			t->tag_id = mlx5_flow_mark_set(mark_id);
-		}
-		hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
-	}
-}
-
-/**
- * Convert the @p action into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested action
- * into the flow. This function also return the action that was added.
- *
- * @param[in] action
- *   Action configuration.
- * @param[in, out] action_flags
- *   Pointer to the detected actions.
- * @param[in] dev_flow
- *   Pointer to mlx5_flow.
- */
-static void
-flow_verbs_translate_action_mark(const struct rte_flow_action *action,
-				 uint64_t *action_flags,
-				 struct mlx5_flow *dev_flow)
-{
-	const struct rte_flow_action_mark *mark = action->conf;
-	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
-	struct ibv_flow_spec_action_tag tag = {
-		.type = IBV_FLOW_SPEC_ACTION_TAG,
-		.size = size,
-	};
-	struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
-
-	if (*action_flags & MLX5_ACTION_FLAG) {
-		flow_verbs_mark_update(verbs, mark->id);
-		size = 0;
-	} else {
-		tag.tag_id = mlx5_flow_mark_set(mark->id);
-		mlx5_flow_spec_verbs_add(dev_flow, &tag, size);
-	}
-	*action_flags |= MLX5_ACTION_MARK;
-}
-
-/**
- * Convert the @p action into a Verbs specification. This function assumes that
- * the input is valid and that there is space to insert the requested action
- * into the flow. This function also return the action that was added.
- *
- * @param[in] dev
- *   Pointer to the Ethernet device structure.
- * @param[in] action
- *   Action configuration.
- * @param[in, out] action_flags
- *   Pointer to the detected actions.
- * @param[in] dev_flow
- *   Pointer to mlx5_flow.
- * @param[out] error
- *   Pointer to error structure.
- *
- * @return
- *   0 On success else a negative errno value is returned and rte_errno is set.
- */
-static int
-flow_verbs_translate_action_count(struct rte_eth_dev *dev,
-				  const struct rte_flow_action *action,
-				  uint64_t *action_flags,
-				  struct mlx5_flow *dev_flow,
-				  struct rte_flow_error *error)
-{
-	const struct rte_flow_action_count *count = action->conf;
-	struct rte_flow *flow = dev_flow->flow;
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
-	struct ibv_flow_spec_counter_action counter = {
-		.type = IBV_FLOW_SPEC_ACTION_COUNT,
-		.size = size,
-	};
-#endif
-
-	if (!flow->counter) {
-		flow->counter = mlx5_flow_counter_new(dev, count->shared,
-						      count->id);
-		if (!flow->counter)
-			return rte_flow_error_set(error, ENOTSUP,
-						  RTE_FLOW_ERROR_TYPE_ACTION,
-						  action,
-						  "cannot get counter"
-						  " context.");
-	}
-	*action_flags |= MLX5_ACTION_COUNT;
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-	counter.counter_set_handle = flow->counter->cs->handle;
-	mlx5_flow_spec_verbs_add(dev_flow, &counter, size);
-#endif
-	return 0;
-}
-
-/**
  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
  * if several tunnel rules are used on this queue, the tunnel ptype will be
  * cleared.
@@ -1708,7 +598,7 @@ mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_validate_action_flag(uint64_t action_flags,
 			       struct rte_flow_error *error)
 {
@@ -1742,7 +632,7 @@ mlx5_flow_validate_action_flag(uint64_t action_flags,
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
 			       uint64_t action_flags,
 			       struct rte_flow_error *error)
@@ -1787,7 +677,7 @@ mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
  * @return
  *   0 on success, a negative errno value otherwise and rte_ernno is set.
  */
-static int
+int
 mlx5_flow_validate_action_drop(uint64_t action_flags,
 			       struct rte_flow_error *error)
 {
@@ -1823,7 +713,7 @@ mlx5_flow_validate_action_drop(uint64_t action_flags,
  * @return
  *   0 on success, a negative errno value otherwise and rte_ernno is set.
  */
-static int
+int
 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
 				uint64_t action_flags,
 				struct rte_eth_dev *dev,
@@ -1866,7 +756,7 @@ mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
  * @return
  *   0 on success, a negative errno value otherwise and rte_ernno is set.
  */
-static int
+int
 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
 			      uint64_t action_flags,
 			      struct rte_eth_dev *dev,
@@ -1938,7 +828,7 @@ mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
  * @return
  *   0 on success, a negative errno value otherwise and rte_ernno is set.
  */
-static int
+int
 mlx5_flow_validate_action_count(struct rte_eth_dev *dev,
 				struct rte_flow_error *error)
 {
@@ -1965,7 +855,7 @@ mlx5_flow_validate_action_count(struct rte_eth_dev *dev,
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
 			      const struct rte_flow_attr *attributes,
 			      struct rte_flow_error *error)
@@ -2011,7 +901,7 @@ mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
 			    uint64_t item_flags,
 			    struct rte_flow_error *error)
@@ -2055,7 +945,7 @@ mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
 			     int64_t item_flags,
 			     struct rte_flow_error *error)
@@ -2116,7 +1006,7 @@ mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
 			     int64_t item_flags,
 			     struct rte_flow_error *error)
@@ -2167,7 +1057,7 @@ mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
 			     uint64_t item_flags,
 			     struct rte_flow_error *error)
@@ -2236,7 +1126,7 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
 			    uint64_t item_flags,
 			    uint8_t target_protocol,
@@ -2287,7 +1177,7 @@ mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
 			    uint64_t item_flags,
 			    uint8_t target_protocol,
@@ -2338,7 +1228,7 @@ mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
 			      uint64_t item_flags,
 			      struct rte_flow_error *error)
@@ -2417,7 +1307,7 @@ mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
 				  uint64_t item_flags,
 				  struct rte_eth_dev *dev,
@@ -2507,7 +1397,7 @@ mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
 			    uint64_t item_flags,
 			    uint8_t target_protocol,
@@ -2564,7 +1454,7 @@ mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_validate_item_mpls(const struct rte_flow_item *item __rte_unused,
 			     uint64_t item_flags __rte_unused,
 			     uint8_t target_protocol __rte_unused,
@@ -2582,225 +1472,22 @@ mlx5_flow_validate_item_mpls(const struct rte_flow_item *item __rte_unused,
 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
-					  "a tunnel is already"
-					  " present");
-	if (!mask)
-		mask = &rte_flow_item_mpls_mask;
-	ret = mlx5_flow_item_acceptable
-		(item, (const uint8_t *)mask,
-		 (const uint8_t *)&rte_flow_item_mpls_mask,
-		 sizeof(struct rte_flow_item_mpls), error);
-	if (ret < 0)
-		return ret;
-	return 0;
-#endif
-	return rte_flow_error_set(error, ENOTSUP,
-				  RTE_FLOW_ERROR_TYPE_ITEM, item,
-				  "MPLS is not supported by Verbs, please"
-				  " update.");
-}
-
-/**
- * Internal validation function.
- *
- * @param[in] dev
- *   Pointer to the Ethernet device structure.
- * @param[in] attr
- *   Pointer to the flow attributes.
- * @param[in] items
- *   Pointer to the list of items.
- * @param[in] actions
- *   Pointer to the list of actions.
- * @param[out] error
- *   Pointer to the error structure.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
- */
-static int mlx5_flow_verbs_validate(struct rte_eth_dev *dev,
-				    const struct rte_flow_attr *attr,
-				    const struct rte_flow_item items[],
-				    const struct rte_flow_action actions[],
-				    struct rte_flow_error *error)
-{
-	int ret;
-	uint32_t action_flags = 0;
-	uint32_t item_flags = 0;
-	int tunnel = 0;
-	uint8_t next_protocol = 0xff;
-
-	if (items == NULL)
-		return -1;
-	ret = mlx5_flow_validate_attributes(dev, attr, error);
+					  "a tunnel is already"
+					  " present");
+	if (!mask)
+		mask = &rte_flow_item_mpls_mask;
+	ret = mlx5_flow_item_acceptable
+		(item, (const uint8_t *)mask,
+		 (const uint8_t *)&rte_flow_item_mpls_mask,
+		 sizeof(struct rte_flow_item_mpls), error);
 	if (ret < 0)
 		return ret;
-	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
-		int ret = 0;
-		switch (items->type) {
-		case RTE_FLOW_ITEM_TYPE_VOID:
-			break;
-		case RTE_FLOW_ITEM_TYPE_ETH:
-			ret = mlx5_flow_validate_item_eth(items, item_flags,
-							  error);
-			if (ret < 0)
-				return ret;
-			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
-					       MLX5_FLOW_LAYER_OUTER_L2;
-			break;
-		case RTE_FLOW_ITEM_TYPE_VLAN:
-			ret = mlx5_flow_validate_item_vlan(items, item_flags,
-							   error);
-			if (ret < 0)
-				return ret;
-			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
-					       MLX5_FLOW_LAYER_OUTER_VLAN;
-			break;
-		case RTE_FLOW_ITEM_TYPE_IPV4:
-			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
-							   error);
-			if (ret < 0)
-				return ret;
-			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
-					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
-			if (items->mask != NULL &&
-			    ((const struct rte_flow_item_ipv4 *)
-			     items->mask)->hdr.next_proto_id)
-				next_protocol =
-					((const struct rte_flow_item_ipv4 *)
-					 (items->spec))->hdr.next_proto_id;
-			break;
-		case RTE_FLOW_ITEM_TYPE_IPV6:
-			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
-							   error);
-			if (ret < 0)
-				return ret;
-			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
-					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
-			if (items->mask != NULL &&
-			    ((const struct rte_flow_item_ipv6 *)
-			     items->mask)->hdr.proto)
-				next_protocol =
-					((const struct rte_flow_item_ipv6 *)
-					 items->spec)->hdr.proto;
-			break;
-		case RTE_FLOW_ITEM_TYPE_UDP:
-			ret = mlx5_flow_validate_item_udp(items, item_flags,
-							  next_protocol,
-							  error);
-			if (ret < 0)
-				return ret;
-			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
-					       MLX5_FLOW_LAYER_OUTER_L4_UDP;
-			break;
-		case RTE_FLOW_ITEM_TYPE_TCP:
-			ret = mlx5_flow_validate_item_tcp(items, item_flags,
-							  next_protocol, error);
-			if (ret < 0)
-				return ret;
-			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
-					       MLX5_FLOW_LAYER_OUTER_L4_TCP;
-			break;
-		case RTE_FLOW_ITEM_TYPE_VXLAN:
-			ret = mlx5_flow_validate_item_vxlan(items, item_flags,
-							    error);
-			if (ret < 0)
-				return ret;
-			item_flags |= MLX5_FLOW_LAYER_VXLAN;
-			break;
-		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
-			ret = mlx5_flow_validate_item_vxlan_gpe(items,
-								item_flags,
-								dev, error);
-			if (ret < 0)
-				return ret;
-			item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
-			break;
-		case RTE_FLOW_ITEM_TYPE_GRE:
-			ret = mlx5_flow_validate_item_gre(items, item_flags,
-							  next_protocol, error);
-			if (ret < 0)
-				return ret;
-			item_flags |= MLX5_FLOW_LAYER_GRE;
-			break;
-		case RTE_FLOW_ITEM_TYPE_MPLS:
-			ret = mlx5_flow_validate_item_mpls(items, item_flags,
-							   next_protocol,
-							   error);
-			if (ret < 0)
-				return ret;
-			if (next_protocol != 0xff &&
-			    next_protocol != MLX5_IP_PROTOCOL_MPLS)
-				return rte_flow_error_set
-					(error, ENOTSUP,
-					 RTE_FLOW_ERROR_TYPE_ITEM, items,
-					 "protocol filtering not compatible"
-					 " with MPLS layer");
-			item_flags |= MLX5_FLOW_LAYER_MPLS;
-			break;
-		default:
-			return rte_flow_error_set(error, ENOTSUP,
-						  RTE_FLOW_ERROR_TYPE_ITEM,
-						  NULL,
-						  "item not supported");
-		}
-	}
-	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
-		tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
-		switch (actions->type) {
-		case RTE_FLOW_ACTION_TYPE_VOID:
-			break;
-		case RTE_FLOW_ACTION_TYPE_FLAG:
-			ret = mlx5_flow_validate_action_flag(action_flags,
-							     error);
-			if (ret < 0)
-				return ret;
-			action_flags |= MLX5_ACTION_FLAG;
-			break;
-		case RTE_FLOW_ACTION_TYPE_MARK:
-			ret = mlx5_flow_validate_action_mark(actions,
-							     action_flags,
-							     error);
-			if (ret < 0)
-				return ret;
-			action_flags |= MLX5_ACTION_MARK;
-			break;
-		case RTE_FLOW_ACTION_TYPE_DROP:
-			ret = mlx5_flow_validate_action_drop(action_flags,
-							     error);
-			if (ret < 0)
-				return ret;
-			action_flags |= MLX5_ACTION_DROP;
-			break;
-		case RTE_FLOW_ACTION_TYPE_QUEUE:
-			ret = mlx5_flow_validate_action_queue(actions,
-							      action_flags, dev,
-							      error);
-			if (ret < 0)
-				return ret;
-			action_flags |= MLX5_ACTION_QUEUE;
-			break;
-		case RTE_FLOW_ACTION_TYPE_RSS:
-			ret = mlx5_flow_validate_action_rss(actions,
-							    action_flags, dev,
-							    error);
-			if (ret < 0)
-				return ret;
-			action_flags |= MLX5_ACTION_RSS;
-			break;
-		case RTE_FLOW_ACTION_TYPE_COUNT:
-			ret = mlx5_flow_validate_action_count(dev, error);
-			if (ret < 0)
-				return ret;
-			action_flags |= MLX5_ACTION_COUNT;
-			break;
-		default:
-			return rte_flow_error_set(error, ENOTSUP,
-						  RTE_FLOW_ERROR_TYPE_ACTION,
-						  actions,
-						  "action not supported");
-		}
-	}
 	return 0;
+#endif
+	return rte_flow_error_set(error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_ITEM, item,
+				  "MPLS is not supported by Verbs, please"
+				  " update.");
 }
 
 /**
@@ -2818,159 +1505,13 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
 {
 	int ret;
 
-	ret =  mlx5_flow_verbs_validate(dev, attr, items, actions, error);
+	ret =  nic_ops.validate(dev, attr, items, actions, error);
 	if (ret < 0)
 		return ret;
 	return 0;
 }
 
 /**
- * Calculate the required bytes that are needed for the action part of the verbs
- * flow, in addtion returns bit-fields with all the detected action, in order to
- * avoid another interation over the actions.
- *
- * @param[in] actions
- *   Pointer to the list of actions.
- * @param[out] action_flags
- *   Pointer to the detected actions.
- *
- * @return
- *   The size of the memory needed for all actions.
- */
-static int
-mlx5_flow_verbs_get_actions_and_size(const struct rte_flow_action actions[],
-				     uint64_t *action_flags)
-{
-	int size = 0;
-	uint64_t detected_actions = 0;
-
-	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
-		switch (actions->type) {
-		case RTE_FLOW_ACTION_TYPE_VOID:
-			break;
-		case RTE_FLOW_ACTION_TYPE_FLAG:
-			size += sizeof(struct ibv_flow_spec_action_tag);
-			detected_actions |= MLX5_ACTION_FLAG;
-			break;
-		case RTE_FLOW_ACTION_TYPE_MARK:
-			size += sizeof(struct ibv_flow_spec_action_tag);
-			detected_actions |= MLX5_ACTION_MARK;
-			break;
-		case RTE_FLOW_ACTION_TYPE_DROP:
-			size += sizeof(struct ibv_flow_spec_action_drop);
-			detected_actions |= MLX5_ACTION_DROP;
-			break;
-		case RTE_FLOW_ACTION_TYPE_QUEUE:
-			detected_actions |= MLX5_ACTION_QUEUE;
-			break;
-		case RTE_FLOW_ACTION_TYPE_RSS:
-			detected_actions |= MLX5_ACTION_RSS;
-			break;
-		case RTE_FLOW_ACTION_TYPE_COUNT:
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-			size += sizeof(struct ibv_flow_spec_counter_action);
-#endif
-			detected_actions |= MLX5_ACTION_COUNT;
-			break;
-		default:
-			break;
-		}
-	}
-	*action_flags = detected_actions;
-	return size;
-}
-
-/**
- * Calculate the required bytes that are needed for the item part of the verbs
- * flow, in addtion returns bit-fields with all the detected action, in order to
- * avoid another interation over the actions.
- *
- * @param[in] actions
- *   Pointer to the list of items.
- * @param[in, out] item_flags
- *   Pointer to the detected items.
- *
- * @return
- *   The size of the memory needed for all items.
- */
-static int
-mlx5_flow_verbs_get_items_and_size(const struct rte_flow_item items[],
-				   uint64_t *item_flags)
-{
-	int size = 0;
-	uint64_t detected_items = 0;
-	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
-
-	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
-		switch (items->type) {
-		case RTE_FLOW_ITEM_TYPE_VOID:
-			break;
-		case RTE_FLOW_ITEM_TYPE_ETH:
-			size += sizeof(struct ibv_flow_spec_eth);
-			detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
-					MLX5_FLOW_LAYER_OUTER_L2;
-			break;
-		case RTE_FLOW_ITEM_TYPE_VLAN:
-			size += sizeof(struct ibv_flow_spec_eth);
-			detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
-					MLX5_FLOW_LAYER_OUTER_VLAN;
-			break;
-		case RTE_FLOW_ITEM_TYPE_IPV4:
-			size += sizeof(struct ibv_flow_spec_ipv4_ext);
-			detected_items |= tunnel ?
-					MLX5_FLOW_LAYER_INNER_L3_IPV4 :
-					MLX5_FLOW_LAYER_OUTER_L3_IPV4;
-			break;
-		case RTE_FLOW_ITEM_TYPE_IPV6:
-			size += sizeof(struct ibv_flow_spec_ipv6);
-			detected_items |= tunnel ?
-				MLX5_FLOW_LAYER_INNER_L3_IPV6 :
-				MLX5_FLOW_LAYER_OUTER_L3_IPV6;
-			break;
-		case RTE_FLOW_ITEM_TYPE_UDP:
-			size += sizeof(struct ibv_flow_spec_tcp_udp);
-			detected_items |= tunnel ?
-					MLX5_FLOW_LAYER_INNER_L4_UDP :
-					MLX5_FLOW_LAYER_OUTER_L4_UDP;
-			break;
-		case RTE_FLOW_ITEM_TYPE_TCP:
-			size += sizeof(struct ibv_flow_spec_tcp_udp);
-			detected_items |= tunnel ?
-					MLX5_FLOW_LAYER_INNER_L4_TCP :
-					MLX5_FLOW_LAYER_OUTER_L4_TCP;
-			break;
-		case RTE_FLOW_ITEM_TYPE_VXLAN:
-			size += sizeof(struct ibv_flow_spec_tunnel);
-			detected_items |= MLX5_FLOW_LAYER_VXLAN;
-			break;
-		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
-			size += sizeof(struct ibv_flow_spec_tunnel);
-			detected_items |= MLX5_FLOW_LAYER_VXLAN_GPE;
-			break;
-#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
-		case RTE_FLOW_ITEM_TYPE_GRE:
-			size += sizeof(struct ibv_flow_spec_gre);
-			detected_items |= MLX5_FLOW_LAYER_GRE;
-			break;
-		case RTE_FLOW_ITEM_TYPE_MPLS:
-			size += sizeof(struct ibv_flow_spec_mpls);
-			detected_items |= MLX5_FLOW_LAYER_MPLS;
-			break;
-#else
-		case RTE_FLOW_ITEM_TYPE_GRE:
-			size += sizeof(struct ibv_flow_spec_tunnel);
-			detected_items |= MLX5_FLOW_LAYER_TUNNEL;
-			break;
-#endif
-		default:
-			break;
-		}
-	}
-	*item_flags = detected_items;
-	return size;
-}
-
-/**
  * Get RSS action from the action list.
  *
  * @param[in] actions
@@ -2994,320 +1535,6 @@ mlx5_flow_get_rss_action(const struct rte_flow_action actions[])
 	return NULL;
 }
 
-/**
- * Internal preparation function. Allocate mlx5_flow with the required size.
- * The required size is calculate based on the actions and items. This function
- * also returns the detected actions and items for later use.
- *
- * @param[in] attr
- *   Pointer to the flow attributes.
- * @param[in] items
- *   Pointer to the list of items.
- * @param[in] actions
- *   Pointer to the list of actions.
- * @param[out] item_flags
- *   Pointer to bit mask of all items detected.
- * @param[out] action_flags
- *   Pointer to bit mask of all actions detected.
- * @param[out] error
- *   Pointer to the error structure.
- *
- * @return
- *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
- *   is set.
- */
-static struct mlx5_flow *
-mlx5_flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
-			const struct rte_flow_item items[],
-			const struct rte_flow_action actions[],
-			uint64_t *item_flags,
-			uint64_t *action_flags,
-			struct rte_flow_error *error)
-{
-	uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
-	struct mlx5_flow *flow;
-
-	size += mlx5_flow_verbs_get_actions_and_size(actions, action_flags);
-	size += mlx5_flow_verbs_get_items_and_size(items, item_flags);
-	flow = rte_calloc(__func__, 1, size, 0);
-	if (!flow) {
-		rte_flow_error_set(error, ENOMEM,
-				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				   NULL,
-				   "not enough memory to create flow");
-		return NULL;
-	}
-	flow->verbs.attr = (void *)(flow + 1);
-	flow->verbs.specs = (uint8_t *)(flow + 1) +
-				sizeof(struct ibv_flow_attr);
-	return flow;
-}
-
-/**
- *
- * Fill the flow with verb spec.
- *
- * @param[in] dev
- *   Pointer to Ethernet device.
- * @param[in, out] dev_flow
- *   Pointer to the mlx5 flow.
- * @param[in] attr
- *   Pointer to the flow attributes.
- * @param[in] items
- *   Pointer to the list of items.
- * @param[in] actions
- *   Pointer to the list of actions.
- * @param[out] error
- *   Pointer to the error structure.
- *
- * @return
- *   0 on success, else a negative errno value otherwise and rte_ernno is set.
- */
-static int mlx5_flow_verbs_translate(struct rte_eth_dev *dev,
-				     struct mlx5_flow *dev_flow,
-				     const struct rte_flow_attr *attr,
-				     const struct rte_flow_item items[],
-				     const struct rte_flow_action actions[],
-				     struct rte_flow_error *error)
-{
-	uint64_t action_flags = 0;
-	uint64_t item_flags = 0;
-	uint64_t priority = attr->priority;
-	struct priv *priv = dev->data->dev_private;
-
-	if (priority == MLX5_FLOW_PRIO_RSVD)
-		priority = priv->config.flow_prio - 1;
-	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
-		int ret;
-		switch (actions->type) {
-		case RTE_FLOW_ACTION_TYPE_VOID:
-			break;
-		case RTE_FLOW_ACTION_TYPE_FLAG:
-			flow_verbs_translate_action_flag(actions,
-							 &action_flags,
-							 dev_flow);
-			break;
-		case RTE_FLOW_ACTION_TYPE_MARK:
-			flow_verbs_translate_action_mark(actions,
-							 &action_flags,
-							 dev_flow);
-			break;
-		case RTE_FLOW_ACTION_TYPE_DROP:
-			flow_verbs_translate_action_drop(&action_flags,
-							 dev_flow);
-			break;
-		case RTE_FLOW_ACTION_TYPE_QUEUE:
-			flow_verbs_translate_action_queue(actions,
-							  &action_flags,
-							  dev_flow);
-			break;
-		case RTE_FLOW_ACTION_TYPE_RSS:
-			flow_verbs_translate_action_rss(actions,
-							&action_flags,
-							dev_flow);
-			break;
-		case RTE_FLOW_ACTION_TYPE_COUNT:
-			ret = flow_verbs_translate_action_count(dev,
-								actions,
-								&action_flags,
-								dev_flow,
-								error);
-			if (ret < 0)
-				return ret;
-			break;
-		default:
-			return rte_flow_error_set(error, ENOTSUP,
-						  RTE_FLOW_ERROR_TYPE_ACTION,
-						  actions,
-						  "action not supported");
-		}
-	}
-	dev_flow->flow->actions |= action_flags;
-	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
-		switch (items->type) {
-		case RTE_FLOW_ITEM_TYPE_VOID:
-			break;
-		case RTE_FLOW_ITEM_TYPE_ETH:
-			flow_verbs_translate_item_eth(items, &item_flags,
-						      dev_flow);
-			break;
-		case RTE_FLOW_ITEM_TYPE_VLAN:
-			flow_verbs_translate_item_vlan(items, &item_flags,
-						       dev_flow);
-			break;
-		case RTE_FLOW_ITEM_TYPE_IPV4:
-			flow_verbs_translate_item_ipv4(items, &item_flags,
-						       dev_flow);
-			break;
-		case RTE_FLOW_ITEM_TYPE_IPV6:
-			flow_verbs_translate_item_ipv6(items, &item_flags,
-						       dev_flow);
-			break;
-		case RTE_FLOW_ITEM_TYPE_UDP:
-			flow_verbs_translate_item_udp(items, &item_flags,
-						      dev_flow);
-			break;
-		case RTE_FLOW_ITEM_TYPE_TCP:
-			flow_verbs_translate_item_tcp(items, &item_flags,
-						      dev_flow);
-			break;
-		case RTE_FLOW_ITEM_TYPE_VXLAN:
-			flow_verbs_translate_item_vxlan(items, &item_flags,
-							dev_flow);
-			break;
-		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
-			flow_verbs_translate_item_vxlan_gpe(items, &item_flags,
-							    dev_flow);
-			break;
-		case RTE_FLOW_ITEM_TYPE_GRE:
-			flow_verbs_translate_item_gre(items, &item_flags,
-						      dev_flow);
-			break;
-		case RTE_FLOW_ITEM_TYPE_MPLS:
-			flow_verbs_translate_item_mpls(items, &item_flags,
-						       dev_flow);
-			break;
-		default:
-			return rte_flow_error_set(error, ENOTSUP,
-						  RTE_FLOW_ERROR_TYPE_ITEM,
-						  NULL,
-						  "item not supported");
-		}
-	}
-	dev_flow->verbs.attr->priority = mlx5_flow_adjust_priority(dev,
-					priority,
-					dev_flow->verbs.attr->priority);
-	return 0;
-}
-
-/**
- * Remove the flow.
- *
- * @param[in] dev
- *   Pointer to the Ethernet device structure.
- * @param[in, out] flow
- *   Pointer to flow structure.
- */
-static void
-mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
-{
-	struct priv *priv = dev->data->dev_private;
-	struct mlx5_flow_verbs *verbs;
-	struct mlx5_flow *dev_flow;
-
-	if (flow->nl_flow && priv->mnl_socket)
-		mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL);
-	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
-		verbs = &dev_flow->verbs;
-		if (verbs->flow) {
-			claim_zero(mlx5_glue->destroy_flow(verbs->flow));
-			verbs->flow = NULL;
-		}
-		if (verbs->hrxq) {
-			if (flow->actions & MLX5_ACTION_DROP)
-				mlx5_hrxq_drop_release(dev);
-			else
-				mlx5_hrxq_release(dev, verbs->hrxq);
-			verbs->hrxq = NULL;
-		}
-	}
-	if (flow->counter) {
-		mlx5_flow_counter_release(flow->counter);
-		flow->counter = NULL;
-	}
-}
-
-/**
- * Apply the flow.
- *
- * @param[in] dev
- *   Pointer to the Ethernet device structure.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[out] error
- *   Pointer to error structure.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
-		struct rte_flow_error *error)
-{
-	struct priv *priv = dev->data->dev_private;
-	struct mlx5_flow_verbs *verbs;
-	struct mlx5_flow *dev_flow;
-	int err;
-
-	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
-		verbs = &dev_flow->verbs;
-		if (flow->actions & MLX5_ACTION_DROP) {
-			verbs->hrxq = mlx5_hrxq_drop_new(dev);
-			if (!verbs->hrxq) {
-				rte_flow_error_set
-					(error, errno,
-					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-					 NULL,
-					 "cannot get drop hash queue");
-				goto error;
-			}
-		} else {
-			struct mlx5_hrxq *hrxq;
-
-			hrxq = mlx5_hrxq_get(dev, flow->key,
-					     MLX5_RSS_HASH_KEY_LEN,
-					     verbs->hash_fields,
-					     (*flow->queue),
-					     flow->rss.queue_num);
-			if (!hrxq)
-				hrxq = mlx5_hrxq_new(dev, flow->key,
-						     MLX5_RSS_HASH_KEY_LEN,
-						     verbs->hash_fields,
-						     (*flow->queue),
-						     flow->rss.queue_num,
-						     !!(flow->layers &
-						      MLX5_FLOW_LAYER_TUNNEL));
-			if (!hrxq) {
-				rte_flow_error_set
-					(error, rte_errno,
-					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-					 NULL,
-					 "cannot get hash queue");
-				goto error;
-			}
-			verbs->hrxq = hrxq;
-		}
-		verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
-						     verbs->attr);
-		if (!verbs->flow) {
-			rte_flow_error_set(error, errno,
-					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-					   NULL,
-					   "hardware refuses to create flow");
-			goto error;
-		}
-	}
-	if (flow->nl_flow &&
-	    priv->mnl_socket &&
-	    mlx5_nl_flow_create(priv->mnl_socket, flow->nl_flow, error))
-		goto error;
-	return 0;
-error:
-	err = rte_errno; /* Save rte_errno before cleanup. */
-	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
-		verbs = &dev_flow->verbs;
-		if (verbs->hrxq) {
-			if (flow->actions & MLX5_ACTION_DROP)
-				mlx5_hrxq_drop_release(dev);
-			else
-				mlx5_hrxq_release(dev, verbs->hrxq);
-			verbs->hrxq = NULL;
-		}
-	}
-	rte_errno = err; /* Restore rte_errno. */
-	return -rte_errno;
-}
-
 static unsigned int
 mlx5_find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
 {
@@ -3396,31 +1623,34 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
 		buf->entry[0].pattern = (void *)(uintptr_t)items;
 	}
 	for (i = 0; i < buf->entries; ++i) {
-		dev_flow = mlx5_flow_verbs_prepare(attr, buf->entry[i].pattern,
-						   actions, &item_flags,
-						   &action_flags, error);
+		dev_flow = nic_ops.prepare(attr, buf->entry[i].pattern,
+					   actions, &item_flags,
+					   &action_flags, error);
+		if (!dev_flow)
+			goto error;
 		dev_flow->flow = flow;
 		LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
-		mlx5_flow_verbs_translate(dev, dev_flow, attr,
-					  buf->entry[i].pattern,
-					  actions,
-					  error);
+		ret = nic_ops.translate(dev, dev_flow, attr,
+					buf->entry[i].pattern,
+					actions, error);
+		if (ret < 0)
+			goto error;
 	}
 	if (dev->data->dev_started) {
-		ret = mlx5_flow_apply(dev, flow, error);
-		if (ret < 0) {
-			ret = rte_errno; /* Save rte_errno before cleanup. */
-			if (flow) {
-				mlx5_flow_remove(dev, flow);
-				rte_free(flow);
-			}
-			rte_errno = ret; /* Restore rte_errno. */
-			return NULL;
-		}
+		ret = nic_ops.apply(dev, flow, error);
+		if (ret < 0)
+			goto error;
 	}
 	TAILQ_INSERT_TAIL(list, flow, next);
 	mlx5_flow_rxq_flags_set(dev, flow);
 	return flow;
+error:
+	ret = rte_errno; /* Save rte_errno before cleanup. */
+	assert(flow);
+	nic_ops.destroy(dev, flow);
+	rte_free(flow);
+	rte_errno = ret; /* Restore rte_errno. */
+	return NULL;
 }
 
 /**
@@ -3455,7 +1685,7 @@ static void
 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
 		       struct rte_flow *flow)
 {
-	mlx5_flow_remove(dev, flow);
+	nic_ops.destroy(dev, flow);
 	TAILQ_REMOVE(list, flow, next);
 	/*
 	 * Update RX queue flags only if port is started, otherwise it is
@@ -3463,12 +1693,6 @@ mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
 	 */
 	if (dev->data->dev_started)
 		mlx5_flow_rxq_flags_trim(dev, flow);
-	while (!LIST_EMPTY(&flow->dev_flows)) {
-		struct mlx5_flow *dev_flow;
-		dev_flow = LIST_FIRST(&flow->dev_flows);
-		LIST_REMOVE(dev_flow, next);
-		rte_free(dev_flow);
-	}
 	rte_free(flow);
 }
 
@@ -3505,7 +1729,7 @@ mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
 	struct rte_flow *flow;
 
 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
-		mlx5_flow_remove(dev, flow);
+		nic_ops.remove(dev, flow);
 	mlx5_flow_rxq_flags_clear(dev);
 }
 
@@ -3528,7 +1752,7 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
 	int ret = 0;
 
 	TAILQ_FOREACH(flow, list, next) {
-		ret = mlx5_flow_apply(dev, flow, &error);
+		ret = nic_ops.apply(dev, flow, &error);
 		if (ret < 0)
 			goto error;
 		mlx5_flow_rxq_flags_set(dev, flow);
@@ -4219,3 +2443,15 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
 	}
 	return 0;
 }
+
+/**
+ * Init the driver ops structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+void
+mlx5_flow_init_driver_ops(struct rte_eth_dev *dev __rte_unused)
+{
+	mlx5_flow_verbs_get_driver_ops(&nic_ops);
+}
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
new file mode 100644
index 000000000..4df60db92
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -0,0 +1,257 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#ifndef RTE_PMD_MLX5_FLOW_H_
+#define RTE_PMD_MLX5_FLOW_H_
+
+#include <sys/queue.h>
+#include <stdalign.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+/* Pattern outer Layer bits. */
+#define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
+#define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
+#define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
+#define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
+#define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
+#define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
+
+/* Pattern inner Layer bits. */
+#define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
+#define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
+#define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
+#define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
+#define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
+#define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
+
+/* Pattern tunnel Layer bits. */
+#define MLX5_FLOW_LAYER_VXLAN (1u << 12)
+#define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
+#define MLX5_FLOW_LAYER_GRE (1u << 14)
+#define MLX5_FLOW_LAYER_MPLS (1u << 15)
+
+/* Outer Masks. */
+#define MLX5_FLOW_LAYER_OUTER_L3 \
+	(MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
+#define MLX5_FLOW_LAYER_OUTER_L4 \
+	(MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
+#define MLX5_FLOW_LAYER_OUTER \
+	(MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
+	 MLX5_FLOW_LAYER_OUTER_L4)
+
+/* Tunnel Masks. */
+#define MLX5_FLOW_LAYER_TUNNEL \
+	(MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \
+	 MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS)
+
+/* Inner Masks. */
+#define MLX5_FLOW_LAYER_INNER_L3 \
+	(MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
+#define MLX5_FLOW_LAYER_INNER_L4 \
+	(MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
+#define MLX5_FLOW_LAYER_INNER \
+	(MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
+	 MLX5_FLOW_LAYER_INNER_L4)
+
+/* Actions that modify the fate of matching traffic. */
+#define MLX5_FLOW_FATE_DROP (1u << 0)
+#define MLX5_FLOW_FATE_QUEUE (1u << 1)
+#define MLX5_FLOW_FATE_RSS (1u << 2)
+
+/* Modify a packet. */
+#define MLX5_FLOW_MOD_FLAG (1u << 0)
+#define MLX5_FLOW_MOD_MARK (1u << 1)
+#define MLX5_FLOW_MOD_COUNT (1u << 2)
+
+/* Actions */
+#define MLX5_ACTION_DROP (1u << 0)
+#define MLX5_ACTION_QUEUE (1u << 1)
+#define MLX5_ACTION_RSS (1u << 2)
+#define MLX5_ACTION_FLAG (1u << 3)
+#define MLX5_ACTION_MARK (1u << 4)
+#define MLX5_ACTION_COUNT (1u << 5)
+
+/* possible L3 layers protocols filtering. */
+#define MLX5_IP_PROTOCOL_TCP 6
+#define MLX5_IP_PROTOCOL_UDP 17
+#define MLX5_IP_PROTOCOL_GRE 47
+#define MLX5_IP_PROTOCOL_MPLS 147
+
+/* Priority reserved for default flows. */
+#define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
+
+/*
+ * Number of sub priorities.
+ * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
+ * matching on the NIC (firmware dependent) L4 most have the higher priority
+ * followed by L3 and ending with L2.
+ */
+#define MLX5_PRIORITY_MAP_L2 2
+#define MLX5_PRIORITY_MAP_L3 1
+#define MLX5_PRIORITY_MAP_L4 0
+#define MLX5_PRIORITY_MAP_MAX 3
+
+/* Verbs specification header. */
+struct ibv_spec_header {
+	enum ibv_flow_spec_type type;
+	uint16_t size;
+};
+
+/** Handles information leading to a drop fate. */
+struct mlx5_flow_verbs {
+	LIST_ENTRY(mlx5_flow_verbs) next;
+	unsigned int size; /**< Size of the attribute. */
+	struct {
+		struct ibv_flow_attr *attr;
+		/**< Pointer to the Specification buffer. */
+		uint8_t *specs; /**< Pointer to the specifications. */
+	};
+	struct ibv_flow *flow; /**< Verbs flow pointer. */
+	struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
+	uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
+};
+
+/** Device flow structure. */
+struct mlx5_flow {
+	LIST_ENTRY(mlx5_flow) next;
+	struct rte_flow *flow; /**< Pointer to the main flow. */
+	uint32_t layers; /**< Bit-fields that holds the detected layers. */
+	union {
+		struct mlx5_flow_verbs verbs; /**< Holds the verbs dev-flow. */
+	};
+};
+
+/* Counters information. */
+struct mlx5_flow_counter {
+	LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
+	uint32_t shared:1; /**< Share counter ID with other flow rules. */
+	uint32_t ref_cnt:31; /**< Reference counter. */
+	uint32_t id; /**< Counter ID. */
+	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
+	uint64_t hits; /**< Number of packets matched by the rule. */
+	uint64_t bytes; /**< Number of bytes matched by the rule. */
+};
+
+/* Flow structure. */
+struct rte_flow {
+	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
+	struct rte_flow_attr attributes; /**< User flow attribute. */
+	uint32_t layers;
+	/**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
+	struct mlx5_flow_counter *counter; /**< Holds flow counter. */
+	struct rte_flow_action_rss rss;/**< RSS context. */
+	uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
+	uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
+	void *nl_flow; /**< Netlink flow buffer if relevant. */
+	LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
+	/**< Device flows that are part of the flow. */
+	uint32_t actions; /**< Bit-fields which mark all detected actions. */
+};
+typedef int (*mlx5_flow_validate_t)(struct rte_eth_dev *dev,
+				    const struct rte_flow_attr *attr,
+				    const struct rte_flow_item items[],
+				    const struct rte_flow_action actions[],
+				    struct rte_flow_error *error);
+typedef struct mlx5_flow *(*mlx5_flow_prepare_t)
+	(const struct rte_flow_attr *attr, const struct rte_flow_item items[],
+	 const struct rte_flow_action actions[], uint64_t *item_flags,
+	 uint64_t *action_flags, struct rte_flow_error *error);
+typedef int (*mlx5_flow_translate_t)(struct rte_eth_dev *dev,
+				     struct mlx5_flow *dev_flow,
+				     const struct rte_flow_attr *attr,
+				     const struct rte_flow_item items[],
+				     const struct rte_flow_action actions[],
+				     struct rte_flow_error *error);
+typedef int (*mlx5_flow_apply_t)(struct rte_eth_dev *dev, struct rte_flow *flow,
+				 struct rte_flow_error *error);
+typedef void (*mlx5_flow_remove_t)(struct rte_eth_dev *dev,
+				   struct rte_flow *flow);
+typedef void (*mlx5_flow_destroy_t)(struct rte_eth_dev *dev,
+				    struct rte_flow *flow);
+struct mlx5_flow_driver_ops {
+	mlx5_flow_validate_t validate;
+	mlx5_flow_prepare_t prepare;
+	mlx5_flow_translate_t translate;
+	mlx5_flow_apply_t apply;
+	mlx5_flow_remove_t remove;
+	mlx5_flow_destroy_t destroy;
+};
+
+/* mlx5_flow.c */
+
+uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
+				   uint32_t subpriority);
+int mlx5_flow_validate_action_count(struct rte_eth_dev *dev,
+				    struct rte_flow_error *error);
+int mlx5_flow_validate_action_drop(uint64_t action_flags,
+				   struct rte_flow_error *error);
+int mlx5_flow_validate_action_flag(uint64_t action_flags,
+				   struct rte_flow_error *error);
+int mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
+				   uint64_t action_flags,
+				   struct rte_flow_error *error);
+int mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
+				    uint64_t action_flags,
+				    struct rte_eth_dev *dev,
+				    struct rte_flow_error *error);
+int mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
+				  uint64_t action_flags,
+				  struct rte_eth_dev *dev,
+				  struct rte_flow_error *error);
+int mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
+				  const struct rte_flow_attr *attributes,
+				  struct rte_flow_error *error);
+int mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
+				uint64_t item_flags,
+				struct rte_flow_error *error);
+int mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
+				uint64_t item_flags,
+				uint8_t target_protocol,
+				struct rte_flow_error *error);
+int mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
+				 int64_t item_flags,
+				 struct rte_flow_error *error);
+int mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
+				 uint64_t item_flags,
+				 struct rte_flow_error *error);
+int mlx5_flow_validate_item_mpls(const struct rte_flow_item *item,
+				 uint64_t item_flags,
+				 uint8_t target_protocol,
+				 struct rte_flow_error *error);
+int mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
+				uint64_t item_flags,
+				uint8_t target_protocol,
+				struct rte_flow_error *error);
+int mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
+				uint64_t item_flags,
+				uint8_t target_protocol,
+				struct rte_flow_error *error);
+int mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
+				 int64_t item_flags,
+				 struct rte_flow_error *error);
+int mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
+				  uint64_t item_flags,
+				  struct rte_flow_error *error);
+int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
+				      uint64_t item_flags,
+				      struct rte_eth_dev *dev,
+				      struct rte_flow_error *error);
+void mlx5_flow_init_driver_ops(struct rte_eth_dev *dev);
+
+/* mlx5_flow_verbs.c */
+
+void mlx5_flow_verbs_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops);
+
+#endif /* RTE_PMD_MLX5_FLOW_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
new file mode 100644
index 000000000..e8e16cc37
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -0,0 +1,1692 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <sys/queue.h>
+#include <stdalign.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_common.h>
+#include <rte_ether.h>
+#include <rte_eth_ctrl.h>
+#include <rte_ethdev_driver.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_malloc.h>
+#include <rte_ip.h>
+
+#include "mlx5.h"
+#include "mlx5_defs.h"
+#include "mlx5_prm.h"
+#include "mlx5_glue.h"
+#include "mlx5_flow.h"
+
+/**
+ * Get a flow counter.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] shared
+ *   Indicate if this counter is shared with other flows.
+ * @param[in] id
+ *   Counter identifier.
+ *
+ * @return
+ *   A pointer to the counter, NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct mlx5_flow_counter *cnt;
+
+	LIST_FOREACH(cnt, &priv->flow_counters, next) {
+		if (!cnt->shared || cnt->shared != shared)
+			continue;
+		if (cnt->id != id)
+			continue;
+		cnt->ref_cnt++;
+		return cnt;
+	}
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+
+	struct mlx5_flow_counter tmpl = {
+		.shared = shared,
+		.id = id,
+		.cs = mlx5_glue->create_counter_set
+			(priv->ctx,
+			 &(struct ibv_counter_set_init_attr){
+				 .counter_set_id = id,
+			 }),
+		.hits = 0,
+		.bytes = 0,
+	};
+
+	if (!tmpl.cs) {
+		rte_errno = errno;
+		return NULL;
+	}
+	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+	if (!cnt) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	*cnt = tmpl;
+	LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
+	return cnt;
+#endif
+	rte_errno = ENOTSUP;
+	return NULL;
+}
+
+/**
+ * Release a flow counter.
+ *
+ * @param[in] counter
+ *   Pointer to the counter handler.
+ */
+static void
+flow_verbs_counter_release(struct mlx5_flow_counter *counter)
+{
+	if (--counter->ref_cnt == 0) {
+		claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
+		LIST_REMOVE(counter, next);
+		rte_free(counter);
+	}
+}
+
+/**
+ * Add a verbs item specification into @p flow.
+ *
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ * @param[in] src
+ *   Create specification.
+ * @param[in] size
+ *   Size in bytes of the specification to copy.
+ */
+static void
+flow_verbs_spec_add(struct mlx5_flow *flow, void *src, unsigned int size)
+{
+	struct mlx5_flow_verbs *verbs = &flow->verbs;
+
+	if (verbs->specs) {
+		void *dst;
+
+		dst = (void *)(verbs->specs + verbs->size);
+		memcpy(dst, src, size);
+		++verbs->attr->num_of_specs;
+	}
+	verbs->size += size;
+}
+
+/**
+ * Adjust verbs hash fields according to the @p flow information.
+ *
+ * @param[in] dev_flow.
+ *   Pointer to dev flow structure.
+ * @param[in] tunnel
+ *   1 when the hash field is for a tunnel item.
+ * @param[in] layer_types
+ *   ETH_RSS_* types.
+ * @param[in] hash_fields
+ *   Item hash fields.
+ */
+static void
+flow_verbs_hashfields_adjust(struct mlx5_flow *dev_flow,
+			     int tunnel __rte_unused,
+			     uint32_t layer_types, uint64_t hash_fields)
+{
+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+	int rss_request_inner = dev_flow->flow->rss.level >= 2;
+
+	hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
+	if (rss_request_inner && !tunnel)
+		hash_fields = 0;
+	else if (rss_request_inner < 2 && tunnel)
+		hash_fields = 0;
+#endif
+	if (!(dev_flow->flow->rss.types & layer_types))
+		hash_fields = 0;
+	dev_flow->verbs.hash_fields |= hash_fields;
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit field with all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to dev_flow structure.
+ */
+static void
+flow_verbs_translate_item_eth(const struct rte_flow_item *item,
+			      uint64_t *item_flags,
+			      struct mlx5_flow *dev_flow)
+{
+	const struct rte_flow_item_eth *spec = item->spec;
+	const struct rte_flow_item_eth *mask = item->mask;
+	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+	const unsigned int size = sizeof(struct ibv_flow_spec_eth);
+	struct ibv_flow_spec_eth eth = {
+		.type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+		.size = size,
+	};
+
+	if (!mask)
+		mask = &rte_flow_item_eth_mask;
+	if (spec) {
+		unsigned int i;
+
+		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+		eth.val.ether_type = spec->type;
+		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+		eth.mask.ether_type = mask->type;
+		/* Remove unwanted bits from values. */
+		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
+			eth.val.src_mac[i] &= eth.mask.src_mac[i];
+		}
+		eth.val.ether_type &= eth.mask.ether_type;
+		dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+	}
+	flow_verbs_spec_add(dev_flow, &eth, size);
+	*item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+				MLX5_FLOW_LAYER_OUTER_L2;
+}
+
+/**
+ * Update the VLAN tag in the Verbs Ethernet specification.
+ * This function assumes that the input is valid and there is space to add
+ * the requested item.
+ *
+ * @param[in, out] attr
+ *   Pointer to Verbs attributes structure.
+ * @param[in] eth
+ *   Verbs structure containing the VLAN information to copy.
+ */
+static void
+flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
+			    struct ibv_flow_spec_eth *eth)
+{
+	unsigned int i;
+	const enum ibv_flow_spec_type search = eth->type;
+	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
+		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
+
+	for (i = 0; i != attr->num_of_specs; ++i) {
+		if (hdr->type == search) {
+			struct ibv_flow_spec_eth *e =
+				(struct ibv_flow_spec_eth *)hdr;
+
+			e->val.vlan_tag = eth->val.vlan_tag;
+			e->mask.vlan_tag = eth->mask.vlan_tag;
+			e->val.ether_type = eth->val.ether_type;
+			e->mask.ether_type = eth->mask.ether_type;
+			break;
+		}
+		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
+	}
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that holds all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to dev_flow structure.
+ */
+static void
+flow_verbs_translate_item_vlan(const struct rte_flow_item *item,
+			       uint64_t *item_flags,
+			       struct mlx5_flow *dev_flow)
+{
+	const struct rte_flow_item_vlan *spec = item->spec;
+	const struct rte_flow_item_vlan *mask = item->mask;
+	unsigned int size = sizeof(struct ibv_flow_spec_eth);
+	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+	struct ibv_flow_spec_eth eth = {
+		.type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+		.size = size,
+	};
+	const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+				      MLX5_FLOW_LAYER_OUTER_L2;
+
+	if (!mask)
+		mask = &rte_flow_item_vlan_mask;
+	if (spec) {
+		eth.val.vlan_tag = spec->tci;
+		eth.mask.vlan_tag = mask->tci;
+		eth.val.vlan_tag &= eth.mask.vlan_tag;
+		eth.val.ether_type = spec->inner_type;
+		eth.mask.ether_type = mask->inner_type;
+		eth.val.ether_type &= eth.mask.ether_type;
+	}
+	if (!(*item_flags & l2m)) {
+		dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+		flow_verbs_spec_add(dev_flow, &eth, size);
+	} else {
+		flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
+		size = 0; /* Only an update is done in eth specification. */
+	}
+	*item_flags |= tunnel ?
+		       (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
+		       (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_ipv4(const struct rte_flow_item *item,
+			       uint64_t *item_flags,
+			       struct mlx5_flow *dev_flow)
+{
+	const struct rte_flow_item_ipv4 *spec = item->spec;
+	const struct rte_flow_item_ipv4 *mask = item->mask;
+	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+	unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
+	struct ibv_flow_spec_ipv4_ext ipv4 = {
+		.type = IBV_FLOW_SPEC_IPV4_EXT |
+			(tunnel ? IBV_FLOW_SPEC_INNER : 0),
+		.size = size,
+	};
+
+	if (!mask)
+		mask = &rte_flow_item_ipv4_mask;
+	*item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+				MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+	if (spec) {
+		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
+			.src_ip = spec->hdr.src_addr,
+			.dst_ip = spec->hdr.dst_addr,
+			.proto = spec->hdr.next_proto_id,
+			.tos = spec->hdr.type_of_service,
+		};
+		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
+			.src_ip = mask->hdr.src_addr,
+			.dst_ip = mask->hdr.dst_addr,
+			.proto = mask->hdr.next_proto_id,
+			.tos = mask->hdr.type_of_service,
+		};
+		/* Remove unwanted bits from values. */
+		ipv4.val.src_ip &= ipv4.mask.src_ip;
+		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
+		ipv4.val.proto &= ipv4.mask.proto;
+		ipv4.val.tos &= ipv4.mask.tos;
+	}
+	flow_verbs_hashfields_adjust(dev_flow, tunnel,
+				     (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
+				      ETH_RSS_NONFRAG_IPV4_TCP |
+				      ETH_RSS_NONFRAG_IPV4_UDP |
+				      ETH_RSS_NONFRAG_IPV4_OTHER),
+				     (IBV_RX_HASH_SRC_IPV4 |
+				      IBV_RX_HASH_DST_IPV4));
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
+	flow_verbs_spec_add(dev_flow, &ipv4, size);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_ipv6(const struct rte_flow_item *item,
+			       uint64_t *item_flags,
+			       struct mlx5_flow *dev_flow)
+{
+	const struct rte_flow_item_ipv6 *spec = item->spec;
+	const struct rte_flow_item_ipv6 *mask = item->mask;
+	const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+	unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
+	struct ibv_flow_spec_ipv6 ipv6 = {
+		.type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+		.size = size,
+	};
+
+	if (!mask)
+		mask = &rte_flow_item_ipv6_mask;
+	 *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+				 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+	if (spec) {
+		unsigned int i;
+		uint32_t vtc_flow_val;
+		uint32_t vtc_flow_mask;
+
+		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
+		       RTE_DIM(ipv6.val.src_ip));
+		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
+		       RTE_DIM(ipv6.val.dst_ip));
+		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
+		       RTE_DIM(ipv6.mask.src_ip));
+		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
+		       RTE_DIM(ipv6.mask.dst_ip));
+		vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
+		vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
+		ipv6.val.flow_label =
+			rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
+					 IPV6_HDR_FL_SHIFT);
+		ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
+					 IPV6_HDR_TC_SHIFT;
+		ipv6.val.next_hdr = spec->hdr.proto;
+		ipv6.val.hop_limit = spec->hdr.hop_limits;
+		ipv6.mask.flow_label =
+			rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
+					 IPV6_HDR_FL_SHIFT);
+		ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
+					  IPV6_HDR_TC_SHIFT;
+		ipv6.mask.next_hdr = mask->hdr.proto;
+		ipv6.mask.hop_limit = mask->hdr.hop_limits;
+		/* Remove unwanted bits from values. */
+		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
+			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
+			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
+		}
+		ipv6.val.flow_label &= ipv6.mask.flow_label;
+		ipv6.val.traffic_class &= ipv6.mask.traffic_class;
+		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
+		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
+	}
+	flow_verbs_hashfields_adjust(dev_flow, tunnel,
+				     (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
+				      ETH_RSS_NONFRAG_IPV6_TCP |
+				      ETH_RSS_NONFRAG_IPV6_UDP |
+				      ETH_RSS_IPV6_EX  |
+				      ETH_RSS_IPV6_TCP_EX |
+				      ETH_RSS_IPV6_UDP_EX |
+				      ETH_RSS_NONFRAG_IPV6_OTHER),
+				     (IBV_RX_HASH_SRC_IPV6 |
+				      IBV_RX_HASH_DST_IPV6));
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
+	flow_verbs_spec_add(dev_flow, &ipv6, size);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_udp(const struct rte_flow_item *item,
+			      uint64_t *item_flags,
+			      struct mlx5_flow *dev_flow)
+{
+	const struct rte_flow_item_udp *spec = item->spec;
+	const struct rte_flow_item_udp *mask = item->mask;
+	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
+	struct ibv_flow_spec_tcp_udp udp = {
+		.type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+		.size = size,
+	};
+
+	if (!mask)
+		mask = &rte_flow_item_udp_mask;
+	*item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
+				MLX5_FLOW_LAYER_OUTER_L4_UDP;
+	if (spec) {
+		udp.val.dst_port = spec->hdr.dst_port;
+		udp.val.src_port = spec->hdr.src_port;
+		udp.mask.dst_port = mask->hdr.dst_port;
+		udp.mask.src_port = mask->hdr.src_port;
+		/* Remove unwanted bits from values. */
+		udp.val.src_port &= udp.mask.src_port;
+		udp.val.dst_port &= udp.mask.dst_port;
+	}
+	flow_verbs_hashfields_adjust(dev_flow,
+				     tunnel, ETH_RSS_UDP,
+				     (IBV_RX_HASH_SRC_PORT_UDP |
+				      IBV_RX_HASH_DST_PORT_UDP));
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
+	flow_verbs_spec_add(dev_flow, &udp, size);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_tcp(const struct rte_flow_item *item,
+			      uint64_t *item_flags,
+			      struct mlx5_flow *dev_flow)
+{
+	const struct rte_flow_item_tcp *spec = item->spec;
+	const struct rte_flow_item_tcp *mask = item->mask;
+	const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+	unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
+	struct ibv_flow_spec_tcp_udp tcp = {
+		.type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+		.size = size,
+	};
+
+	if (!mask)
+		mask = &rte_flow_item_tcp_mask;
+	*item_flags |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
+				 MLX5_FLOW_LAYER_OUTER_L4_TCP;
+	if (spec) {
+		tcp.val.dst_port = spec->hdr.dst_port;
+		tcp.val.src_port = spec->hdr.src_port;
+		tcp.mask.dst_port = mask->hdr.dst_port;
+		tcp.mask.src_port = mask->hdr.src_port;
+		/* Remove unwanted bits from values. */
+		tcp.val.src_port &= tcp.mask.src_port;
+		tcp.val.dst_port &= tcp.mask.dst_port;
+	}
+	flow_verbs_hashfields_adjust(dev_flow,
+				     tunnel, ETH_RSS_TCP,
+				     (IBV_RX_HASH_SRC_PORT_TCP |
+				      IBV_RX_HASH_DST_PORT_TCP));
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
+	flow_verbs_spec_add(dev_flow, &tcp, size);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_vxlan(const struct rte_flow_item *item,
+				uint64_t *item_flags,
+				struct mlx5_flow *dev_flow)
+{
+	const struct rte_flow_item_vxlan *spec = item->spec;
+	const struct rte_flow_item_vxlan *mask = item->mask;
+	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+	struct ibv_flow_spec_tunnel vxlan = {
+		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
+		.size = size,
+	};
+	union vni {
+		uint32_t vlan_id;
+		uint8_t vni[4];
+	} id = { .vlan_id = 0, };
+
+	if (!mask)
+		mask = &rte_flow_item_vxlan_mask;
+	if (spec) {
+		memcpy(&id.vni[1], spec->vni, 3);
+		vxlan.val.tunnel_id = id.vlan_id;
+		memcpy(&id.vni[1], mask->vni, 3);
+		vxlan.mask.tunnel_id = id.vlan_id;
+		/* Remove unwanted bits from values. */
+		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
+	}
+	flow_verbs_spec_add(dev_flow, &vxlan, size);
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+	*item_flags |= MLX5_FLOW_LAYER_VXLAN;
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_vxlan_gpe(const struct rte_flow_item *item,
+				    uint64_t *item_flags,
+				    struct mlx5_flow *dev_flow)
+{
+	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
+	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
+	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+	struct ibv_flow_spec_tunnel vxlan_gpe = {
+		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
+		.size = size,
+	};
+	union vni {
+		uint32_t vlan_id;
+		uint8_t vni[4];
+	} id = { .vlan_id = 0, };
+
+	if (!mask)
+		mask = &rte_flow_item_vxlan_gpe_mask;
+	if (spec) {
+		memcpy(&id.vni[1], spec->vni, 3);
+		vxlan_gpe.val.tunnel_id = id.vlan_id;
+		memcpy(&id.vni[1], mask->vni, 3);
+		vxlan_gpe.mask.tunnel_id = id.vlan_id;
+		/* Remove unwanted bits from values. */
+		vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
+	}
+	flow_verbs_spec_add(dev_flow, &vxlan_gpe, size);
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+	*item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
+}
+
+/**
+ * Update the protocol in Verbs IPv4/IPv6 spec.
+ *
+ * @param[in, out] attr
+ *   Pointer to Verbs attributes structure.
+ * @param[in] search
+ *   Specification type to search in order to update the IP protocol.
+ * @param[in] protocol
+ *   Protocol value to set if none is present in the specification.
+ */
+static void
+flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
+				       enum ibv_flow_spec_type search,
+				       uint8_t protocol)
+{
+	unsigned int i;
+	struct ibv_spec_header *hdr = (struct ibv_spec_header *)
+		((uint8_t *)attr + sizeof(struct ibv_flow_attr));
+
+	if (!attr)
+		return;
+	for (i = 0; i != attr->num_of_specs; ++i) {
+		if (hdr->type == search) {
+			union {
+				struct ibv_flow_spec_ipv4_ext *ipv4;
+				struct ibv_flow_spec_ipv6 *ipv6;
+			} ip;
+
+			switch (search) {
+			case IBV_FLOW_SPEC_IPV4_EXT:
+				ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
+				if (!ip.ipv4->val.proto) {
+					ip.ipv4->val.proto = protocol;
+					ip.ipv4->mask.proto = 0xff;
+				}
+				break;
+			case IBV_FLOW_SPEC_IPV6:
+				ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
+				if (!ip.ipv6->val.next_hdr) {
+					ip.ipv6->val.next_hdr = protocol;
+					ip.ipv6->mask.next_hdr = 0xff;
+				}
+				break;
+			default:
+				break;
+			}
+			break;
+		}
+		hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
+	}
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused,
+			      uint64_t *item_flags,
+			      struct mlx5_flow *dev_flow)
+{
+	struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
+#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
+	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+	struct ibv_flow_spec_tunnel tunnel = {
+		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
+		.size = size,
+	};
+#else
+	const struct rte_flow_item_gre *spec = item->spec;
+	const struct rte_flow_item_gre *mask = item->mask;
+	unsigned int size = sizeof(struct ibv_flow_spec_gre);
+	struct ibv_flow_spec_gre tunnel = {
+		.type = IBV_FLOW_SPEC_GRE,
+		.size = size,
+	};
+
+	if (!mask)
+		mask = &rte_flow_item_gre_mask;
+	if (spec) {
+		tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
+		tunnel.val.protocol = spec->protocol;
+		tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
+		tunnel.mask.protocol = mask->protocol;
+		/* Remove unwanted bits from values. */
+		tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
+		tunnel.val.protocol &= tunnel.mask.protocol;
+		tunnel.val.key &= tunnel.mask.key;
+	}
+#endif
+	if (*item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
+		flow_verbs_item_gre_ip_protocol_update(verbs->attr,
+						       IBV_FLOW_SPEC_IPV4_EXT,
+						       MLX5_IP_PROTOCOL_GRE);
+	else
+		flow_verbs_item_gre_ip_protocol_update(verbs->attr,
+						       IBV_FLOW_SPEC_IPV6,
+						       MLX5_IP_PROTOCOL_GRE);
+	flow_verbs_spec_add(dev_flow, &tunnel, size);
+	verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
+	*item_flags |= MLX5_FLOW_LAYER_GRE;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_mpls(const struct rte_flow_item *item __rte_unused,
+			       uint64_t *action_flags __rte_unused,
+			       struct mlx5_flow *dev_flow __rte_unused)
+{
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+	const struct rte_flow_item_mpls *spec = item->spec;
+	const struct rte_flow_item_mpls *mask = item->mask;
+	unsigned int size = sizeof(struct ibv_flow_spec_mpls);
+	struct ibv_flow_spec_mpls mpls = {
+		.type = IBV_FLOW_SPEC_MPLS,
+		.size = size,
+	};
+
+	if (!mask)
+		mask = &rte_flow_item_mpls_mask;
+	if (spec) {
+		memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
+		memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
+		/* Remove unwanted bits from values.  */
+		mpls.val.label &= mpls.mask.label;
+	}
+	flow_verbs_spec_add(dev_flow, &mpls, size);
+	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+	*action_flags |= MLX5_FLOW_LAYER_MPLS;
+#endif
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_drop(uint64_t *action_flags,
+				 struct mlx5_flow *dev_flow)
+{
+	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
+	struct ibv_flow_spec_action_drop drop = {
+			.type = IBV_FLOW_SPEC_ACTION_DROP,
+			.size = size,
+	};
+
+	flow_verbs_spec_add(dev_flow, &drop, size);
+	*action_flags |= MLX5_ACTION_DROP;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] action
+ *   Action configuration.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_queue(const struct rte_flow_action *action,
+				  uint64_t *action_flags,
+				  struct mlx5_flow *dev_flow)
+{
+	const struct rte_flow_action_queue *queue = action->conf;
+	struct rte_flow *flow = dev_flow->flow;
+
+	if (flow->queue)
+		(*flow->queue)[0] = queue->index;
+	flow->rss.queue_num = 1;
+	*action_flags |= MLX5_ACTION_QUEUE;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] action
+ *   Action configuration.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_rss(const struct rte_flow_action *action,
+				uint64_t *action_flags,
+				struct mlx5_flow *dev_flow)
+{
+	const struct rte_flow_action_rss *rss = action->conf;
+	struct rte_flow *flow = dev_flow->flow;
+
+	if (flow->queue)
+		memcpy((*flow->queue), rss->queue,
+		       rss->queue_num * sizeof(uint16_t));
+	flow->rss.queue_num = rss->queue_num;
+	memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
+	flow->rss.types = rss->types;
+	flow->rss.level = rss->level;
+	*action_flags |= MLX5_ACTION_RSS;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] action
+ *   Action configuration.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_flag
+			(const struct rte_flow_action *action __rte_unused,
+			 uint64_t *action_flags,
+			 struct mlx5_flow *dev_flow)
+{
+	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
+	struct ibv_flow_spec_action_tag tag = {
+		.type = IBV_FLOW_SPEC_ACTION_TAG,
+		.size = size,
+		.tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
+	};
+	*action_flags |= MLX5_ACTION_MARK;
+	flow_verbs_spec_add(dev_flow, &tag, size);
+}
+
+/**
+ * Update verbs specification to modify the flag to mark.
+ *
+ * @param[in, out] verbs
+ *   Pointer to the mlx5_flow_verbs structure.
+ * @param[in] mark_id
+ *   Mark identifier to replace the flag.
+ */
+static void
+flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
+{
+	struct ibv_spec_header *hdr;
+	int i;
+
+	if (!verbs)
+		return;
+	/* Update Verbs specification. */
+	hdr = (struct ibv_spec_header *)verbs->specs;
+	if (!hdr)
+		return;
+	for (i = 0; i != verbs->attr->num_of_specs; ++i) {
+		if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
+			struct ibv_flow_spec_action_tag *t =
+				(struct ibv_flow_spec_action_tag *)hdr;
+
+			t->tag_id = mlx5_flow_mark_set(mark_id);
+		}
+		hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
+	}
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] action
+ *   Action configuration.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_mark(const struct rte_flow_action *action,
+				 uint64_t *action_flags,
+				 struct mlx5_flow *dev_flow)
+{
+	const struct rte_flow_action_mark *mark = action->conf;
+	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
+	struct ibv_flow_spec_action_tag tag = {
+		.type = IBV_FLOW_SPEC_ACTION_TAG,
+		.size = size,
+	};
+	struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
+
+	if (*action_flags & MLX5_ACTION_FLAG) {
+		flow_verbs_mark_update(verbs, mark->id);
+		size = 0;
+	} else {
+		tag.tag_id = mlx5_flow_mark_set(mark->id);
+		flow_verbs_spec_add(dev_flow, &tag, size);
+	}
+	*action_flags |= MLX5_ACTION_MARK;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] action
+ *   Action configuration.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 On success else a negative errno value is returned and rte_errno is set.
+ */
+static int
+flow_verbs_translate_action_count(struct rte_eth_dev *dev,
+				  const struct rte_flow_action *action,
+				  uint64_t *action_flags,
+				  struct mlx5_flow *dev_flow,
+				  struct rte_flow_error *error)
+{
+	const struct rte_flow_action_count *count = action->conf;
+	struct rte_flow *flow = dev_flow->flow;
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
+	struct ibv_flow_spec_counter_action counter = {
+		.type = IBV_FLOW_SPEC_ACTION_COUNT,
+		.size = size,
+	};
+#endif
+
+	if (!flow->counter) {
+		flow->counter = flow_verbs_counter_new(dev, count->shared,
+						       count->id);
+		if (!flow->counter)
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  action,
+						  "cannot get counter"
+						  " context.");
+	}
+	*action_flags |= MLX5_ACTION_COUNT;
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+	counter.counter_set_handle = flow->counter->cs->handle;
+	flow_verbs_spec_add(dev_flow, &counter, size);
+#endif
+	return 0;
+}
+
+/**
+ * Internal validation function. For validating both actions and items.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_verbs_validate(struct rte_eth_dev *dev,
+		    const struct rte_flow_attr *attr,
+		    const struct rte_flow_item items[],
+		    const struct rte_flow_action actions[],
+		    struct rte_flow_error *error)
+{
+	int ret;
+	uint32_t action_flags = 0;
+	uint32_t item_flags = 0;
+	int tunnel = 0;
+	uint8_t next_protocol = 0xff;
+
+	if (items == NULL)
+		return -1;
+	ret = mlx5_flow_validate_attributes(dev, attr, error);
+	if (ret < 0)
+		return ret;
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		int ret = 0;
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			ret = mlx5_flow_validate_item_eth(items, item_flags,
+							  error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+					       MLX5_FLOW_LAYER_OUTER_L2;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			ret = mlx5_flow_validate_item_vlan(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
+					       MLX5_FLOW_LAYER_OUTER_VLAN;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			if (items->mask != NULL &&
+			    ((const struct rte_flow_item_ipv4 *)
+			     items->mask)->hdr.next_proto_id)
+				next_protocol =
+					((const struct rte_flow_item_ipv4 *)
+					 (items->spec))->hdr.next_proto_id;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			if (items->mask != NULL &&
+			    ((const struct rte_flow_item_ipv6 *)
+			     items->mask)->hdr.proto)
+				next_protocol =
+					((const struct rte_flow_item_ipv6 *)
+					 items->spec)->hdr.proto;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			ret = mlx5_flow_validate_item_udp(items, item_flags,
+							  next_protocol,
+							  error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
+					       MLX5_FLOW_LAYER_OUTER_L4_UDP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			ret = mlx5_flow_validate_item_tcp(items, item_flags,
+							  next_protocol, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
+					       MLX5_FLOW_LAYER_OUTER_L4_TCP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			ret = mlx5_flow_validate_item_vxlan(items, item_flags,
+							    error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_VXLAN;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+			ret = mlx5_flow_validate_item_vxlan_gpe(items,
+								item_flags,
+								dev, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
+			break;
+		case RTE_FLOW_ITEM_TYPE_GRE:
+			ret = mlx5_flow_validate_item_gre(items, item_flags,
+							  next_protocol, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_GRE;
+			break;
+		case RTE_FLOW_ITEM_TYPE_MPLS:
+			ret = mlx5_flow_validate_item_mpls(items, item_flags,
+							   next_protocol,
+							   error);
+			if (ret < 0)
+				return ret;
+			if (next_protocol != 0xff &&
+			    next_protocol != MLX5_IP_PROTOCOL_MPLS)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM, items,
+					 "protocol filtering not compatible"
+					 " with MPLS layer");
+			item_flags |= MLX5_FLOW_LAYER_MPLS;
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  NULL, "item not supported");
+		}
+	}
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_FLAG:
+			ret = mlx5_flow_validate_action_flag(action_flags,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_FLAG;
+			break;
+		case RTE_FLOW_ACTION_TYPE_MARK:
+			ret = mlx5_flow_validate_action_mark(actions,
+							     action_flags,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_MARK;
+			break;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			ret = mlx5_flow_validate_action_drop(action_flags,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_DROP;
+			break;
+		case RTE_FLOW_ACTION_TYPE_QUEUE:
+			ret = mlx5_flow_validate_action_queue(actions,
+							      action_flags, dev,
+							      error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_QUEUE;
+			break;
+		case RTE_FLOW_ACTION_TYPE_RSS:
+			ret = mlx5_flow_validate_action_rss(actions,
+							    action_flags, dev,
+							    error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_RSS;
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = mlx5_flow_validate_action_count(dev, error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_COUNT;
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return 0;
+}
+
+/**
+ * Calculate the required bytes that are needed for the action part of the verbs
+ * flow, in addtion returns bit-fields with all the detected action, in order to
+ * avoid another interation over the actions.
+ *
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] action_flags
+ *   Pointer to the detected actions.
+ *
+ * @return
+ *   The size of the memory needed for all actions.
+ */
+static int
+flow_verbs_get_actions_and_size(const struct rte_flow_action actions[],
+				uint64_t *action_flags)
+{
+	int size = 0;
+	uint64_t detected_actions = 0;
+
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_FLAG:
+			size += sizeof(struct ibv_flow_spec_action_tag);
+			detected_actions |= MLX5_ACTION_FLAG;
+			break;
+		case RTE_FLOW_ACTION_TYPE_MARK:
+			size += sizeof(struct ibv_flow_spec_action_tag);
+			detected_actions |= MLX5_ACTION_MARK;
+			break;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			size += sizeof(struct ibv_flow_spec_action_drop);
+			detected_actions |= MLX5_ACTION_DROP;
+			break;
+		case RTE_FLOW_ACTION_TYPE_QUEUE:
+			detected_actions |= MLX5_ACTION_QUEUE;
+			break;
+		case RTE_FLOW_ACTION_TYPE_RSS:
+			detected_actions |= MLX5_ACTION_RSS;
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+			size += sizeof(struct ibv_flow_spec_counter_action);
+#endif
+			detected_actions |= MLX5_ACTION_COUNT;
+			break;
+		default:
+			break;
+		}
+	}
+	*action_flags = detected_actions;
+	return size;
+}
+
+/**
+ * Calculate the required bytes that are needed for the item part of the verbs
+ * flow, in addtion returns bit-fields with all the detected action, in order to
+ * avoid another interation over the actions.
+ *
+ * @param[in] actions
+ *   Pointer to the list of items.
+ * @param[in, out] item_flags
+ *   Pointer to the detected items.
+ *
+ * @return
+ *   The size of the memory needed for all items.
+ */
+static int
+flow_verbs_get_items_and_size(const struct rte_flow_item items[],
+			      uint64_t *item_flags)
+{
+	int size = 0;
+	uint64_t detected_items = 0;
+	const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			size += sizeof(struct ibv_flow_spec_eth);
+			detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+						   MLX5_FLOW_LAYER_OUTER_L2;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			size += sizeof(struct ibv_flow_spec_eth);
+			detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
+						   MLX5_FLOW_LAYER_OUTER_VLAN;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			size += sizeof(struct ibv_flow_spec_ipv4_ext);
+			detected_items |= tunnel ?
+					  MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+					  MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			size += sizeof(struct ibv_flow_spec_ipv6);
+			detected_items |= tunnel ?
+					  MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+					  MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			size += sizeof(struct ibv_flow_spec_tcp_udp);
+			detected_items |= tunnel ?
+					  MLX5_FLOW_LAYER_INNER_L4_UDP :
+					  MLX5_FLOW_LAYER_OUTER_L4_UDP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			size += sizeof(struct ibv_flow_spec_tcp_udp);
+			detected_items |= tunnel ?
+					  MLX5_FLOW_LAYER_INNER_L4_TCP :
+					  MLX5_FLOW_LAYER_OUTER_L4_TCP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			size += sizeof(struct ibv_flow_spec_tunnel);
+			detected_items |= MLX5_FLOW_LAYER_VXLAN;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+			size += sizeof(struct ibv_flow_spec_tunnel);
+			detected_items |= MLX5_FLOW_LAYER_VXLAN_GPE;
+			break;
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+		case RTE_FLOW_ITEM_TYPE_GRE:
+			size += sizeof(struct ibv_flow_spec_gre);
+			detected_items |= MLX5_FLOW_LAYER_GRE;
+			break;
+		case RTE_FLOW_ITEM_TYPE_MPLS:
+			size += sizeof(struct ibv_flow_spec_mpls);
+			detected_items |= MLX5_FLOW_LAYER_MPLS;
+			break;
+#else
+		case RTE_FLOW_ITEM_TYPE_GRE:
+			size += sizeof(struct ibv_flow_spec_tunnel);
+			detected_items |= MLX5_FLOW_LAYER_TUNNEL;
+			break;
+#endif
+		default:
+			break;
+		}
+	}
+	*item_flags = detected_items;
+	return size;
+}
+
+/**
+ * Internal preparation function. Allocate mlx5_flow with the required size.
+ * The required size is calculate based on the actions and items. This function
+ * also returns the detected actions and items for later use.
+ *
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] item_flags
+ *   Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ *   Pointer to bit mask of all actions detected.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
+ *   is set.
+ */
+static struct mlx5_flow *
+flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   uint64_t *item_flags,
+		   uint64_t *action_flags,
+		   struct rte_flow_error *error)
+{
+	uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
+	struct mlx5_flow *flow;
+
+	size += flow_verbs_get_actions_and_size(actions, action_flags);
+	size += flow_verbs_get_items_and_size(items, item_flags);
+	flow = rte_calloc(__func__, 1, size, 0);
+	if (!flow) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				   "not enough memory to create flow");
+		return NULL;
+	}
+	flow->verbs.attr = (void *)(flow + 1);
+	flow->verbs.specs =
+		(uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
+	return flow;
+}
+
+/**
+ * Fill the flow with verb spec.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] dev_flow
+ *   Pointer to the mlx5 flow.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, else a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_verbs_translate(struct rte_eth_dev *dev,
+		     struct mlx5_flow *dev_flow,
+		     const struct rte_flow_attr *attr,
+		     const struct rte_flow_item items[],
+		     const struct rte_flow_action actions[],
+		     struct rte_flow_error *error)
+{
+	uint64_t action_flags = 0;
+	uint64_t item_flags = 0;
+	uint64_t priority = attr->priority;
+	struct priv *priv = dev->data->dev_private;
+
+	if (priority == MLX5_FLOW_PRIO_RSVD)
+		priority = priv->config.flow_prio - 1;
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		int ret;
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_FLAG:
+			flow_verbs_translate_action_flag(actions,
+							 &action_flags,
+							 dev_flow);
+			break;
+		case RTE_FLOW_ACTION_TYPE_MARK:
+			flow_verbs_translate_action_mark(actions,
+							 &action_flags,
+							 dev_flow);
+			break;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			flow_verbs_translate_action_drop(&action_flags,
+							 dev_flow);
+			break;
+		case RTE_FLOW_ACTION_TYPE_QUEUE:
+			flow_verbs_translate_action_queue(actions,
+							  &action_flags,
+							  dev_flow);
+			break;
+		case RTE_FLOW_ACTION_TYPE_RSS:
+			flow_verbs_translate_action_rss(actions,
+							&action_flags,
+							dev_flow);
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = flow_verbs_translate_action_count(dev,
+								actions,
+								&action_flags,
+								dev_flow,
+								error);
+			if (ret < 0)
+				return ret;
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	dev_flow->flow->actions |= action_flags;
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			flow_verbs_translate_item_eth(items, &item_flags,
+						      dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			flow_verbs_translate_item_vlan(items, &item_flags,
+						       dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			flow_verbs_translate_item_ipv4(items, &item_flags,
+						       dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			flow_verbs_translate_item_ipv6(items, &item_flags,
+						       dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			flow_verbs_translate_item_udp(items, &item_flags,
+						      dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			flow_verbs_translate_item_tcp(items, &item_flags,
+						      dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			flow_verbs_translate_item_vxlan(items, &item_flags,
+							dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+			flow_verbs_translate_item_vxlan_gpe(items, &item_flags,
+							    dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_GRE:
+			flow_verbs_translate_item_gre(items, &item_flags,
+						      dev_flow);
+			break;
+		case RTE_FLOW_ITEM_TYPE_MPLS:
+			flow_verbs_translate_item_mpls(items, &item_flags,
+						       dev_flow);
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  NULL,
+						  "item not supported");
+		}
+	}
+	dev_flow->verbs.attr->priority =
+		mlx5_flow_adjust_priority(dev, priority,
+					  dev_flow->verbs.attr->priority);
+	return 0;
+}
+
+/**
+ * Remove the flow from the NIC but keeps it in memory.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ */
+static void
+flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	struct mlx5_flow_verbs *verbs;
+	struct mlx5_flow *dev_flow;
+
+	if (!flow)
+		return;
+	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+		verbs = &dev_flow->verbs;
+		if (verbs->flow) {
+			claim_zero(mlx5_glue->destroy_flow(verbs->flow));
+			verbs->flow = NULL;
+		}
+		if (verbs->hrxq) {
+			if (flow->actions & MLX5_ACTION_DROP)
+				mlx5_hrxq_drop_release(dev);
+			else
+				mlx5_hrxq_release(dev, verbs->hrxq);
+			verbs->hrxq = NULL;
+		}
+	}
+	if (flow->counter) {
+		flow_verbs_counter_release(flow->counter);
+		flow->counter = NULL;
+	}
+}
+
+/**
+ * Remove the flow from the NIC and the memory.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ */
+static void
+flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	struct mlx5_flow *dev_flow;
+
+	if (!flow)
+		return;
+	flow_verbs_remove(dev, flow);
+	while (!LIST_EMPTY(&flow->dev_flows)) {
+		dev_flow = LIST_FIRST(&flow->dev_flows);
+		LIST_REMOVE(dev_flow, next);
+		rte_free(dev_flow);
+	}
+}
+
+/**
+ * Apply the flow to the NIC.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+		 struct rte_flow_error *error)
+{
+	struct mlx5_flow_verbs *verbs;
+	struct mlx5_flow *dev_flow;
+	int err;
+
+	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+		verbs = &dev_flow->verbs;
+		if (flow->actions & MLX5_ACTION_DROP) {
+			verbs->hrxq = mlx5_hrxq_drop_new(dev);
+			if (!verbs->hrxq) {
+				rte_flow_error_set
+					(error, errno,
+					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					 "cannot get drop hash queue");
+				goto error;
+			}
+		} else {
+			struct mlx5_hrxq *hrxq;
+
+			hrxq = mlx5_hrxq_get(dev, flow->key,
+					     MLX5_RSS_HASH_KEY_LEN,
+					     verbs->hash_fields,
+					     (*flow->queue),
+					     flow->rss.queue_num);
+			if (!hrxq)
+				hrxq = mlx5_hrxq_new(dev, flow->key,
+						     MLX5_RSS_HASH_KEY_LEN,
+						     verbs->hash_fields,
+						     (*flow->queue),
+						     flow->rss.queue_num,
+						     !!(flow->layers &
+						      MLX5_FLOW_LAYER_TUNNEL));
+			if (!hrxq) {
+				rte_flow_error_set
+					(error, rte_errno,
+					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					 "cannot get hash queue");
+				goto error;
+			}
+			verbs->hrxq = hrxq;
+		}
+		verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
+						     verbs->attr);
+		if (!verbs->flow) {
+			rte_flow_error_set(error, errno,
+					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					   NULL,
+					   "hardware refuses to create flow");
+			goto error;
+		}
+	}
+	return 0;
+error:
+	err = rte_errno; /* Save rte_errno before cleanup. */
+	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+		verbs = &dev_flow->verbs;
+		if (verbs->hrxq) {
+			if (flow->actions & MLX5_ACTION_DROP)
+				mlx5_hrxq_drop_release(dev);
+			else
+				mlx5_hrxq_release(dev, verbs->hrxq);
+			verbs->hrxq = NULL;
+		}
+	}
+	rte_errno = err; /* Restore rte_errno. */
+	return -rte_errno;
+}
+
+void
+mlx5_flow_verbs_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops)
+{
+	*flow_ops = (struct mlx5_flow_driver_ops) {
+		.validate = flow_verbs_validate,
+		.prepare = flow_verbs_prepare,
+		.translate = flow_verbs_translate,
+		.apply = flow_verbs_apply,
+		.remove = flow_verbs_remove,
+		.destroy = flow_verbs_destroy,
+	};
+}
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v3 05/11] net/mlx5: add Direct Verbs validation function
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
                     ` (3 preceding siblings ...)
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 04/11] net/mlx5: add support for multiple flow drivers Yongseok Koh
@ 2018-09-24 23:17   ` Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 06/11] net/mlx5: add Direct Verbs prepare function Yongseok Koh
                     ` (6 subsequent siblings)
  11 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 23:17 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Ori Kam

From: Ori Kam <orika@mellanox.com>

This is commit introduce the Direct Verbs driver API.
The Direct Verbs is an API adds new features like encapsulation, match
on metatdata.
In this commit the validation function was added, most of the validation
is done with functions that are also in use for the Verbs API.

Signed-off-by: Ori Kam <orika@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/Makefile       |   6 +
 drivers/net/mlx5/meson.build    |   3 +
 drivers/net/mlx5/mlx5_flow.h    |   6 +
 drivers/net/mlx5/mlx5_flow_dv.c | 312 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 327 insertions(+)
 create mode 100644 drivers/net/mlx5/mlx5_flow_dv.c

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 9bd6bfb82..d510a4275 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -31,6 +31,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_stats.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_dv.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
@@ -136,6 +137,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		enum MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP \
 		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
+		HAVE_IBV_FLOW_DV_SUPPORT \
+		infiniband/mlx5dv.h \
+		enum MLX5DV_FLOW_ACTION_TAG \
+		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
 		HAVE_ETHTOOL_LINK_MODE_25G \
 		/usr/include/linux/ethtool.h \
 		enum ETHTOOL_LINK_MODE_25000baseCR_Full_BIT \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index 40cc95038..8075496f7 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -31,6 +31,7 @@ if build
 		'mlx5.c',
 		'mlx5_ethdev.c',
 		'mlx5_flow.c',
+		'mlx5_flow_dv.c',
 		'mlx5_flow_verbs.c',
 		'mlx5_mac.c',
 		'mlx5_mr.c',
@@ -93,6 +94,8 @@ if build
 		'MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED' ],
 		[ 'HAVE_IBV_MLX5_MOD_CQE_128B_COMP', 'infiniband/mlx5dv.h',
 		'MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP' ],
+		[ 'HAVE_IBV_FLOW_DV_SUPPORT', 'infiniband/mlx5dv.h',
+		'MLX5DV_FLOW_ACTION_TAG' ],
 		[ 'HAVE_IBV_DEVICE_MPLS_SUPPORT', 'infiniband/verbs.h',
 		'IBV_FLOW_SPEC_MPLS' ],
 		[ 'HAVE_IBV_WQ_FLAG_RX_END_PADDING', 'infiniband/verbs.h',
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 4df60db92..9b0cd28ae 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -103,6 +103,9 @@
 #define MLX5_PRIORITY_MAP_L4 0
 #define MLX5_PRIORITY_MAP_MAX 3
 
+/* Max number of actions per DV flow. */
+#define MLX5_DV_MAX_NUMBER_OF_ACTIONS 8
+
 /* Verbs specification header. */
 struct ibv_spec_header {
 	enum ibv_flow_spec_type type;
@@ -250,6 +253,9 @@ int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
 				      struct rte_flow_error *error);
 void mlx5_flow_init_driver_ops(struct rte_eth_dev *dev);
 
+/* mlx5_flow_dv.c */
+void mlx5_flow_dv_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops);
+
 /* mlx5_flow_verbs.c */
 
 void mlx5_flow_verbs_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops);
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
new file mode 100644
index 000000000..86a8b3cd0
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <sys/queue.h>
+#include <stdalign.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_common.h>
+#include <rte_ether.h>
+#include <rte_eth_ctrl.h>
+#include <rte_ethdev_driver.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_malloc.h>
+#include <rte_ip.h>
+
+#include "mlx5.h"
+#include "mlx5_defs.h"
+#include "mlx5_prm.h"
+#include "mlx5_glue.h"
+#include "mlx5_flow.h"
+
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+
+/**
+ * Verify the @p attributes will be correctly understood by the NIC and store
+ * them in the @p flow if everything is correct.
+ *
+ * @param[in] dev
+ *   Pointer to dev struct.
+ * @param[in] attributes
+ *   Pointer to flow attributes
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_attributes(struct rte_eth_dev *dev,
+			    const struct rte_flow_attr *attributes,
+			    struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	uint32_t priority_max = priv->config.flow_prio - 1;
+
+	if (attributes->group)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+					  NULL,
+					  "groups is not supported");
+	if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
+	    attributes->priority >= priority_max)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+					  NULL,
+					  "priority out of range");
+	if (attributes->egress)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+					  NULL,
+					  "egress is not supported");
+	if (attributes->transfer)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
+					  NULL,
+					  "transfer is not supported");
+	if (!attributes->ingress)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+					  NULL,
+					  "ingress attribute is mandatory");
+	return 0;
+}
+
+/**
+ * Internal validation function. For validating both actions and items.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	int ret;
+	uint32_t action_flags = 0;
+	uint32_t item_flags = 0;
+	int tunnel = 0;
+	uint8_t next_protocol = 0xff;
+	int actions_n = 0;
+
+	if (items == NULL)
+		return -1;
+	ret = flow_dv_validate_attributes(dev, attr, error);
+	if (ret < 0)
+		return ret;
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			ret = mlx5_flow_validate_item_eth(items, item_flags,
+							  error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+					       MLX5_FLOW_LAYER_OUTER_L2;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			ret = mlx5_flow_validate_item_vlan(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
+					       MLX5_FLOW_LAYER_OUTER_VLAN;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			if (items->mask != NULL &&
+			    ((const struct rte_flow_item_ipv4 *)
+			     items->mask)->hdr.next_proto_id)
+				next_protocol =
+					((const struct rte_flow_item_ipv4 *)
+					 (items->spec))->hdr.next_proto_id;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			if (items->mask != NULL &&
+			    ((const struct rte_flow_item_ipv6 *)
+			     items->mask)->hdr.proto)
+				next_protocol =
+					((const struct rte_flow_item_ipv6 *)
+					 items->spec)->hdr.proto;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			ret = mlx5_flow_validate_item_udp(items, item_flags,
+							  next_protocol,
+							  error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
+					       MLX5_FLOW_LAYER_OUTER_L4_UDP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			ret = mlx5_flow_validate_item_tcp(items, item_flags,
+							  next_protocol, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
+					       MLX5_FLOW_LAYER_OUTER_L4_TCP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			ret = mlx5_flow_validate_item_vxlan(items, item_flags,
+							    error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_VXLAN;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+			ret = mlx5_flow_validate_item_vxlan_gpe(items,
+								item_flags, dev,
+								error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
+			break;
+		case RTE_FLOW_ITEM_TYPE_GRE:
+			ret = mlx5_flow_validate_item_gre(items, item_flags,
+							  next_protocol, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_GRE;
+			break;
+		case RTE_FLOW_ITEM_TYPE_MPLS:
+			ret = mlx5_flow_validate_item_mpls(items, item_flags,
+							   next_protocol,
+							   error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_MPLS;
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  NULL, "item not supported");
+		}
+	}
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		if (actions_n == MLX5_DV_MAX_NUMBER_OF_ACTIONS)
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions, "too many actions");
+		tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_FLAG:
+			ret = mlx5_flow_validate_action_flag(action_flags,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_FLAG;
+			++actions_n;
+			break;
+		case RTE_FLOW_ACTION_TYPE_MARK:
+			ret = mlx5_flow_validate_action_mark(actions,
+							     action_flags,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_MARK;
+			++actions_n;
+			break;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			ret = mlx5_flow_validate_action_drop(action_flags,
+							     error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_DROP;
+			++actions_n;
+			break;
+		case RTE_FLOW_ACTION_TYPE_QUEUE:
+			ret = mlx5_flow_validate_action_queue(actions,
+							      action_flags, dev,
+							      error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_QUEUE;
+			++actions_n;
+			break;
+		case RTE_FLOW_ACTION_TYPE_RSS:
+			ret = mlx5_flow_validate_action_rss(actions,
+							    action_flags, dev,
+							    error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_RSS;
+			++actions_n;
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = mlx5_flow_validate_action_count(dev, error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_COUNT;
+			++actions_n;
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return 0;
+}
+
+/**
+ * Fills the flow_ops with the function pointers.
+ *
+ * @param[out] flow_ops
+ *   Pointer to driver_ops structure.
+ */
+void
+mlx5_flow_dv_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops)
+{
+	*flow_ops = (struct mlx5_flow_driver_ops) {
+		.validate = flow_dv_validate,
+		.prepare = NULL,
+		.translate = NULL,
+		.apply = NULL,
+		.remove = NULL,
+		.destroy = NULL,
+	};
+}
+
+#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v3 06/11] net/mlx5: add Direct Verbs prepare function
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
                     ` (4 preceding siblings ...)
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 05/11] net/mlx5: add Direct Verbs validation function Yongseok Koh
@ 2018-09-24 23:17   ` Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 07/11] net/mlx5: add Direct Verbs translate items Yongseok Koh
                     ` (5 subsequent siblings)
  11 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 23:17 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Ori Kam

From: Ori Kam <orika@mellanox.com>

This function allocates the Direct Verbs device flow, and
introduce the relevant PRM structures.

This commit also adds the matcher object. The matcher object acts as a
mask and should be shared between flows. For example all rules that
should match source IP with full mask should use the same matcher. A
flow that should match dest IP or source IP but without full mask should
have a new matcher allocated.

Signed-off-by: Ori Kam <orika@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.h         |   1 +
 drivers/net/mlx5/mlx5_flow.h    |  31 +++++-
 drivers/net/mlx5/mlx5_flow_dv.c |  45 ++++++++-
 drivers/net/mlx5/mlx5_prm.h     | 213 ++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_rxtx.h    |   7 ++
 5 files changed, 295 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 4d3e9f38f..8ff6d6987 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -213,6 +213,7 @@ struct priv {
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
 	/* Verbs Indirection tables. */
 	LIST_HEAD(ind_tables, mlx5_ind_table_ibv) ind_tbls;
+	LIST_HEAD(matcher, mlx5_cache) matchers;
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	int primary_socket; /* Unix socket for primary process. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 9b0cd28ae..0cf496db3 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -106,6 +106,34 @@
 /* Max number of actions per DV flow. */
 #define MLX5_DV_MAX_NUMBER_OF_ACTIONS 8
 
+/* Matcher PRM representation */
+struct mlx5_flow_dv_match_params {
+	size_t size;
+	/**< Size of match value. Do NOT split size and key! */
+	uint32_t buf[MLX5_ST_SZ_DW(fte_match_param)];
+	/**< Matcher value. This value is used as the mask or as a key. */
+};
+
+/* Matcher structure. */
+struct mlx5_flow_dv_matcher {
+	struct mlx5_cache cache; /**< Cache to struct mlx5dv_flow_matcher. */
+	uint16_t crc; /**< CRC of key. */
+	uint16_t priority; /**< Priority of matcher. */
+	uint8_t egress; /**< Egress matcher. */
+	struct mlx5_flow_dv_match_params mask; /**< Matcher mask. */
+};
+
+/* DV flows structure. */
+struct mlx5_flow_dv {
+	uint64_t hash_fields; /**< Fields that participate in the hash. */
+	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
+	/* Flow DV api: */
+	struct mlx5_flow_dv_matcher *matcher; /**< Cache to matcher. */
+	struct mlx5_flow_dv_match_params value;
+	/**< Holds the value that the packet is compared to. */
+	struct ibv_flow *flow; /**< Installed flow. */
+};
+
 /* Verbs specification header. */
 struct ibv_spec_header {
 	enum ibv_flow_spec_type type;
@@ -132,7 +160,8 @@ struct mlx5_flow {
 	struct rte_flow *flow; /**< Pointer to the main flow. */
 	uint32_t layers; /**< Bit-fields that holds the detected layers. */
 	union {
-		struct mlx5_flow_verbs verbs; /**< Holds the verbs dev-flow. */
+		struct mlx5_flow_dv dv;
+		struct mlx5_flow_verbs verbs;
 	};
 };
 
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 86a8b3cd0..30d501a61 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -291,6 +291,49 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 }
 
 /**
+ * Internal preparation function. Allocates the DV flow size,
+ * this size is constant.
+ *
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] item_flags
+ *   Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ *   Pointer to bit mask of all actions detected.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Pointer to mlx5_flow object on success,
+ *   otherwise NULL and rte_ernno is set.
+ */
+static struct mlx5_flow *
+flow_dv_prepare(const struct rte_flow_attr *attr __rte_unused,
+		const struct rte_flow_item items[] __rte_unused,
+		const struct rte_flow_action actions[] __rte_unused,
+		uint64_t *item_flags __rte_unused,
+		uint64_t *action_flags __rte_unused,
+		struct rte_flow_error *error)
+{
+	uint32_t size = sizeof(struct mlx5_flow);
+	struct mlx5_flow *flow;
+
+	flow = rte_calloc(__func__, 1, size, 0);
+	if (!flow) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				   "not enough memory to create flow");
+		return NULL;
+	}
+	flow->dv.value.size = MLX5_ST_SZ_DB(fte_match_param);
+	return flow;
+}
+
+/**
  * Fills the flow_ops with the function pointers.
  *
  * @param[out] flow_ops
@@ -301,7 +344,7 @@ mlx5_flow_dv_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops)
 {
 	*flow_ops = (struct mlx5_flow_driver_ops) {
 		.validate = flow_dv_validate,
-		.prepare = NULL,
+		.prepare = flow_dv_prepare,
 		.translate = NULL,
 		.apply = NULL,
 		.remove = NULL,
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 0870d32fd..2222e7fbd 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -280,6 +280,219 @@ struct mlx5_cqe {
 /* CQE format value. */
 #define MLX5_COMPRESSED 0x3
 
+/* The field of packet to be modified. */
+enum mlx5_modificaiton_field {
+	MLX5_MODI_OUT_SMAC_47_16 = 1,
+	MLX5_MODI_OUT_SMAC_15_0,
+	MLX5_MODI_OUT_ETHERTYPE,
+	MLX5_MODI_OUT_DMAC_47_16,
+	MLX5_MODI_OUT_DMAC_15_0,
+	MLX5_MODI_OUT_IP_DSCP,
+	MLX5_MODI_OUT_TCP_FLAGS,
+	MLX5_MODI_OUT_TCP_SPORT,
+	MLX5_MODI_OUT_TCP_DPORT,
+	MLX5_MODI_OUT_IPV4_TTL,
+	MLX5_MODI_OUT_UDP_SPORT,
+	MLX5_MODI_OUT_UDP_DPORT,
+	MLX5_MODI_OUT_SIPV6_127_96,
+	MLX5_MODI_OUT_SIPV6_95_64,
+	MLX5_MODI_OUT_SIPV6_63_32,
+	MLX5_MODI_OUT_SIPV6_31_0,
+	MLX5_MODI_OUT_DIPV6_127_96,
+	MLX5_MODI_OUT_DIPV6_95_64,
+	MLX5_MODI_OUT_DIPV6_63_32,
+	MLX5_MODI_OUT_DIPV6_31_0,
+	MLX5_MODI_OUT_SIPV4,
+	MLX5_MODI_OUT_DIPV4,
+	MLX5_MODI_IN_SMAC_47_16 = 0x31,
+	MLX5_MODI_IN_SMAC_15_0,
+	MLX5_MODI_IN_ETHERTYPE,
+	MLX5_MODI_IN_DMAC_47_16,
+	MLX5_MODI_IN_DMAC_15_0,
+	MLX5_MODI_IN_IP_DSCP,
+	MLX5_MODI_IN_TCP_FLAGS,
+	MLX5_MODI_IN_TCP_SPORT,
+	MLX5_MODI_IN_TCP_DPORT,
+	MLX5_MODI_IN_IPV4_TTL,
+	MLX5_MODI_IN_UDP_SPORT,
+	MLX5_MODI_IN_UDP_DPORT,
+	MLX5_MODI_IN_SIPV6_127_96,
+	MLX5_MODI_IN_SIPV6_95_64,
+	MLX5_MODI_IN_SIPV6_63_32,
+	MLX5_MODI_IN_SIPV6_31_0,
+	MLX5_MODI_IN_DIPV6_127_96,
+	MLX5_MODI_IN_DIPV6_95_64,
+	MLX5_MODI_IN_DIPV6_63_32,
+	MLX5_MODI_IN_DIPV6_31_0,
+	MLX5_MODI_IN_SIPV4,
+	MLX5_MODI_IN_DIPV4,
+	MLX5_MODI_OUT_IPV6_HOPLIMIT,
+	MLX5_MODI_IN_IPV6_HOPLIMIT,
+	MLX5_MODI_META_DATA_REG_A,
+	MLX5_MODI_META_DATA_REG_B = 0x50,
+};
+
+/* Modification sub command. */
+struct mlx5_modification_cmd {
+	union {
+		uint32_t data0;
+		struct {
+			unsigned int bits:5;
+			unsigned int rsvd0:3;
+			unsigned int src_offset:5; /* Start bit offset. */
+			unsigned int rsvd1:3;
+			unsigned int src_field:12;
+			unsigned int type:4;
+		};
+	};
+	union {
+		uint32_t data1;
+		uint8_t data[4];
+		struct {
+			unsigned int rsvd2:8;
+			unsigned int dst_offset:8;
+			unsigned int dst_field:12;
+			unsigned int rsvd3:4;
+		};
+	};
+};
+
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+
+#define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0)
+#define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld)
+#define __mlx5_bit_off(typ, fld) ((unsigned int)(unsigned long) \
+				  (&(__mlx5_nullp(typ)->fld)))
+#define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - \
+				    (__mlx5_bit_off(typ, fld) & 0x1f))
+#define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32)
+#define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << \
+				  __mlx5_dw_bit_off(typ, fld))
+#define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
+#define __mlx5_16_off(typ, fld) (__mlx5_bit_off(typ, fld) / 16)
+#define __mlx5_16_bit_off(typ, fld) (16 - __mlx5_bit_sz(typ, fld) - \
+				    (__mlx5_bit_off(typ, fld) & 0xf))
+#define __mlx5_mask16(typ, fld) ((u16)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
+#define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32)
+#define MLX5_ST_SZ_DB(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32)
+#define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8)
+#define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld))
+
+/* insert a value to a struct */
+#define MLX5_SET(typ, p, fld, v) \
+	do { \
+		u32 _v = v; \
+		*((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \
+		rte_cpu_to_be_32((rte_be_to_cpu_32(*((u32 *)(p) + \
+				  __mlx5_dw_off(typ, fld))) & \
+				  (~__mlx5_dw_mask(typ, fld))) | \
+				 (((_v) & __mlx5_mask(typ, fld)) << \
+				   __mlx5_dw_bit_off(typ, fld))); \
+	} while (0)
+#define MLX5_GET16(typ, p, fld) \
+	((rte_be_to_cpu_16(*((__be16 *)(p) + \
+	  __mlx5_16_off(typ, fld))) >> __mlx5_16_bit_off(typ, fld)) & \
+	 __mlx5_mask16(typ, fld))
+#define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8)
+
+struct mlx5_ifc_fte_match_set_misc_bits {
+	u8 reserved_at_0[0x8];
+	u8 source_sqn[0x18];
+	u8 reserved_at_20[0x10];
+	u8 source_port[0x10];
+	u8 outer_second_prio[0x3];
+	u8 outer_second_cfi[0x1];
+	u8 outer_second_vid[0xc];
+	u8 inner_second_prio[0x3];
+	u8 inner_second_cfi[0x1];
+	u8 inner_second_vid[0xc];
+	u8 outer_second_cvlan_tag[0x1];
+	u8 inner_second_cvlan_tag[0x1];
+	u8 outer_second_svlan_tag[0x1];
+	u8 inner_second_svlan_tag[0x1];
+	u8 reserved_at_64[0xc];
+	u8 gre_protocol[0x10];
+	u8 gre_key_h[0x18];
+	u8 gre_key_l[0x8];
+	u8 vxlan_vni[0x18];
+	u8 reserved_at_b8[0x8];
+	u8 reserved_at_c0[0x20];
+	u8 reserved_at_e0[0xc];
+	u8 outer_ipv6_flow_label[0x14];
+	u8 reserved_at_100[0xc];
+	u8 inner_ipv6_flow_label[0x14];
+	u8 reserved_at_120[0xe0];
+};
+
+struct mlx5_ifc_ipv4_layout_bits {
+	u8 reserved_at_0[0x60];
+	u8 ipv4[0x20];
+};
+
+struct mlx5_ifc_ipv6_layout_bits {
+	u8 ipv6[16][0x8];
+};
+
+union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
+	struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
+	struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
+	u8 reserved_at_0[0x80];
+};
+
+struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
+	u8 smac_47_16[0x20];
+	u8 smac_15_0[0x10];
+	u8 ethertype[0x10];
+	u8 dmac_47_16[0x20];
+	u8 dmac_15_0[0x10];
+	u8 first_prio[0x3];
+	u8 first_cfi[0x1];
+	u8 first_vid[0xc];
+	u8 ip_protocol[0x8];
+	u8 ip_dscp[0x6];
+	u8 ip_ecn[0x2];
+	u8 cvlan_tag[0x1];
+	u8 svlan_tag[0x1];
+	u8 frag[0x1];
+	u8 ip_version[0x4];
+	u8 tcp_flags[0x9];
+	u8 tcp_sport[0x10];
+	u8 tcp_dport[0x10];
+	u8 reserved_at_c0[0x20];
+	u8 udp_sport[0x10];
+	u8 udp_dport[0x10];
+	union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
+	union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
+};
+
+struct mlx5_ifc_fte_match_mpls_bits {
+	u8 mpls_label[0x14];
+	u8 mpls_exp[0x3];
+	u8 mpls_s_bos[0x1];
+	u8 mpls_ttl[0x8];
+};
+
+struct mlx5_ifc_fte_match_set_misc2_bits {
+	struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls;
+	struct mlx5_ifc_fte_match_mpls_bits inner_first_mpls;
+	struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls_over_gre;
+	struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls_over_udp;
+	u8 reserved_at_80[0x100];
+	u8 metadata_reg_a[0x20];
+	u8 reserved_at_1a0[0x60];
+};
+
+/* Flow matcher. */
+struct mlx5_ifc_fte_match_param_bits {
+	struct mlx5_ifc_fte_match_set_lyr_2_4_bits outer_headers;
+	struct mlx5_ifc_fte_match_set_misc_bits misc_parameters;
+	struct mlx5_ifc_fte_match_set_lyr_2_4_bits inner_headers;
+	struct mlx5_ifc_fte_match_set_misc2_bits misc_parameters_2;
+	u8 reserved_at_800[0x800];
+};
+
 /* CQE format mask. */
 #define MLX5E_CQE_FORMAT_MASK 0xc
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index d225b9c27..02034a4f4 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -171,6 +171,13 @@ struct mlx5_hrxq {
 	uint8_t rss_key[]; /* Hash key. */
 };
 
+/* List of cached objects. */
+struct mlx5_cache {
+	LIST_ENTRY(mlx5_cache) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	void *resource; /* Cached resource */
+};
+
 /* TX queue descriptor. */
 __extension__
 struct mlx5_txq_data {
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v3 07/11] net/mlx5: add Direct Verbs translate items
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
                     ` (5 preceding siblings ...)
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 06/11] net/mlx5: add Direct Verbs prepare function Yongseok Koh
@ 2018-09-24 23:17   ` Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 08/11] net/mlx5: add Direct Verbs translate actions Yongseok Koh
                     ` (4 subsequent siblings)
  11 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 23:17 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Ori Kam

From: Ori Kam <orika@mellanox.com>

This commit handles the translation of the requested flow into Direct
Verbs API.

The Direct Verbs introduce the matcher object which acts as shared mask
for all flows that are using the same mask. So in this commit we
translate the item and get in return a matcher and the value that should
be matched.

Signed-off-by: Ori Kam <orika@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c       |  36 ++
 drivers/net/mlx5/mlx5_flow.h       |  25 ++
 drivers/net/mlx5/mlx5_flow_dv.c    | 775 ++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_flow_verbs.c |  72 +---
 drivers/net/mlx5/mlx5_prm.h        |   7 +
 5 files changed, 858 insertions(+), 57 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 1c177b9c8..5632e31c5 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -444,6 +444,42 @@ mlx5_flow_item_acceptable(const struct rte_flow_item *item,
 }
 
 /**
+ * Adjust the hash fields according to the @p flow information.
+ *
+ * @param[in] dev_flow.
+ *   Pointer to the mlx5_flow.
+ * @param[in] tunnel
+ *   1 when the hash field is for a tunnel item.
+ * @param[in] layer_types
+ *   ETH_RSS_* types.
+ * @param[in] hash_fields
+ *   Item hash fields.
+ *
+ * @return
+ *   The hash fileds that should be used.
+ */
+uint64_t
+mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow,
+			    int tunnel __rte_unused, uint32_t layer_types,
+			    uint64_t hash_fields)
+{
+	struct rte_flow *flow = dev_flow->flow;
+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+	int rss_request_inner = flow->rss.level >= 2;
+
+	/* Check RSS hash level for tunnel. */
+	if (tunnel && rss_request_inner)
+		hash_fields |= IBV_RX_HASH_INNER;
+	else if (tunnel || rss_request_inner)
+		return 0;
+#endif
+	/* Check if requested layer matches RSS hash fields. */
+	if (!(flow->rss.types & layer_types))
+		return 0;
+	return hash_fields;
+}
+
+/**
  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
  * if several tunnel rules are used on this queue, the tunnel ptype will be
  * cleared.
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 0cf496db3..7f0566fc9 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -89,6 +89,10 @@
 #define MLX5_IP_PROTOCOL_GRE 47
 #define MLX5_IP_PROTOCOL_MPLS 147
 
+/* Internent Protocol versions. */
+#define MLX5_VXLAN 4789
+#define MLX5_VXLAN_GPE 4790
+
 /* Priority reserved for default flows. */
 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
 
@@ -103,6 +107,24 @@
 #define MLX5_PRIORITY_MAP_L4 0
 #define MLX5_PRIORITY_MAP_MAX 3
 
+/* Valid layer type for IPV4 RSS. */
+#define MLX5_IPV4_LAYER_TYPES \
+	(ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | \
+	 ETH_RSS_NONFRAG_IPV4_TCP | ETH_RSS_NONFRAG_IPV4_UDP | \
+	 ETH_RSS_NONFRAG_IPV4_OTHER)
+
+/* IBV hash source bits  for IPV4. */
+#define MLX5_IPV4_IBV_RX_HASH (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4)
+
+/* Valid layer type for IPV6 RSS. */
+#define MLX5_IPV6_LAYER_TYPES \
+	(ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_TCP | \
+	 ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_EX  | ETH_RSS_IPV6_TCP_EX | \
+	 ETH_RSS_IPV6_UDP_EX | ETH_RSS_NONFRAG_IPV6_OTHER)
+
+/* IBV hash source bits  for IPV6. */
+#define MLX5_IPV6_IBV_RX_HASH (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6)
+
 /* Max number of actions per DV flow. */
 #define MLX5_DV_MAX_NUMBER_OF_ACTIONS 8
 
@@ -223,6 +245,9 @@ struct mlx5_flow_driver_ops {
 
 /* mlx5_flow.c */
 
+uint64_t mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow, int tunnel,
+				     uint32_t layer_types,
+				     uint64_t hash_fields);
 uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 				   uint32_t subpriority);
 int mlx5_flow_validate_action_count(struct rte_eth_dev *dev,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 30d501a61..acb1b7549 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -334,6 +334,779 @@ flow_dv_prepare(const struct rte_flow_attr *attr __rte_unused,
 }
 
 /**
+ * Add Ethernet item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_eth(void *matcher, void *key,
+			   const struct rte_flow_item *item, int inner)
+{
+	const struct rte_flow_item_eth *eth_m = item->mask;
+	const struct rte_flow_item_eth *eth_v = item->spec;
+	const struct rte_flow_item_eth nic_mask = {
+		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		.type = RTE_BE16(0xffff),
+	};
+	void *headers_m;
+	void *headers_v;
+	char *l24_v;
+	unsigned int i;
+
+	if (!eth_v)
+		return;
+	if (!eth_m)
+		eth_m = &nic_mask;
+	if (inner) {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 inner_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+	} else {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 outer_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+	}
+	memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m, dmac_47_16),
+	       &eth_m->dst, sizeof(eth_m->dst));
+	/* The value must be in the range of the mask. */
+	l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dmac_47_16);
+	for (i = 0; i < sizeof(eth_m->dst); ++i)
+		l24_v[i] = eth_m->dst.addr_bytes[i] & eth_v->dst.addr_bytes[i];
+	memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m, smac_47_16),
+	       &eth_m->src, sizeof(eth_m->src));
+	l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, smac_47_16);
+	/* The value must be in the range of the mask. */
+	for (i = 0; i < sizeof(eth_m->dst); ++i)
+		l24_v[i] = eth_m->src.addr_bytes[i] & eth_v->src.addr_bytes[i];
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, ethertype,
+		 rte_be_to_cpu_16(eth_m->type));
+	l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, ethertype);
+	*(uint16_t *)(l24_v) = eth_m->type & eth_v->type;
+}
+
+/**
+ * Add VLAN item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_vlan(void *matcher, void *key,
+			    const struct rte_flow_item *item,
+			    int inner)
+{
+	const struct rte_flow_item_vlan *vlan_m = item->mask;
+	const struct rte_flow_item_vlan *vlan_v = item->spec;
+	const struct rte_flow_item_vlan nic_mask = {
+		.tci = RTE_BE16(0x0fff),
+		.inner_type = RTE_BE16(0xffff),
+	};
+	void *headers_m;
+	void *headers_v;
+	uint16_t tci_m;
+	uint16_t tci_v;
+
+	if (!vlan_v)
+		return;
+	if (!vlan_m)
+		vlan_m = &nic_mask;
+	if (inner) {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 inner_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+	} else {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 outer_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+	}
+	tci_m = rte_be_to_cpu_16(vlan_m->tci);
+	tci_v = rte_be_to_cpu_16(vlan_m->tci & vlan_v->tci);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, cvlan_tag, 1);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_vid, tci_m);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, tci_v);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_cfi, tci_m >> 12);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_cfi, tci_v >> 12);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_prio, tci_m >> 13);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, tci_v >> 13);
+}
+
+/**
+ * Add IPV4 item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_ipv4(void *matcher, void *key,
+			    const struct rte_flow_item *item,
+			    int inner)
+{
+	const struct rte_flow_item_ipv4 *ipv4_m = item->mask;
+	const struct rte_flow_item_ipv4 *ipv4_v = item->spec;
+	const struct rte_flow_item_ipv4 nic_mask = {
+		.hdr = {
+			.src_addr = RTE_BE32(0xffffffff),
+			.dst_addr = RTE_BE32(0xffffffff),
+			.type_of_service = 0xff,
+			.next_proto_id = 0xff,
+		},
+	};
+	void *headers_m;
+	void *headers_v;
+	char *l24_m;
+	char *l24_v;
+	uint8_t tos;
+
+	if (!ipv4_v)
+		return;
+	if (!ipv4_m)
+		ipv4_m = &nic_mask;
+	if (inner) {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 inner_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+	} else {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 outer_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+	}
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0xf);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 4);
+	l24_m = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m,
+			     dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+	l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+			     dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+	*(uint32_t *)l24_m = ipv4_m->hdr.dst_addr;
+	*(uint32_t *)l24_v = ipv4_m->hdr.dst_addr & ipv4_v->hdr.dst_addr;
+	l24_m = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m,
+			  src_ipv4_src_ipv6.ipv4_layout.ipv4);
+	l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+			  src_ipv4_src_ipv6.ipv4_layout.ipv4);
+	*(uint32_t *)l24_m = ipv4_m->hdr.src_addr;
+	*(uint32_t *)l24_v = ipv4_m->hdr.src_addr & ipv4_v->hdr.src_addr;
+	tos = ipv4_m->hdr.type_of_service & ipv4_v->hdr.type_of_service;
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_ecn,
+		 ipv4_m->hdr.type_of_service);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, tos);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_dscp,
+		 ipv4_m->hdr.type_of_service >> 2);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, tos >> 2);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol,
+		 ipv4_m->hdr.next_proto_id);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+		 ipv4_v->hdr.next_proto_id & ipv4_m->hdr.next_proto_id);
+}
+
+/**
+ * Add IPV6 item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_ipv6(void *matcher, void *key,
+			    const struct rte_flow_item *item,
+			    int inner)
+{
+	const struct rte_flow_item_ipv6 *ipv6_m = item->mask;
+	const struct rte_flow_item_ipv6 *ipv6_v = item->spec;
+	const struct rte_flow_item_ipv6 nic_mask = {
+		.hdr = {
+			.src_addr =
+				"\xff\xff\xff\xff\xff\xff\xff\xff"
+				"\xff\xff\xff\xff\xff\xff\xff\xff",
+			.dst_addr =
+				"\xff\xff\xff\xff\xff\xff\xff\xff"
+				"\xff\xff\xff\xff\xff\xff\xff\xff",
+			.vtc_flow = RTE_BE32(0xffffffff),
+			.proto = 0xff,
+			.hop_limits = 0xff,
+		},
+	};
+	void *headers_m;
+	void *headers_v;
+	void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters);
+	void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+	char *l24_m;
+	char *l24_v;
+	uint32_t vtc_m;
+	uint32_t vtc_v;
+	int i;
+	int size;
+
+	if (!ipv6_v)
+		return;
+	if (!ipv6_m)
+		ipv6_m = &nic_mask;
+	if (inner) {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 inner_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+	} else {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 outer_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+	}
+	size = sizeof(ipv6_m->hdr.dst_addr);
+	l24_m = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m,
+			     dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+	l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+			     dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+	memcpy(l24_m, ipv6_m->hdr.dst_addr, size);
+	for (i = 0; i < size; ++i)
+		l24_v[i] = l24_m[i] & ipv6_v->hdr.dst_addr[i];
+	l24_m = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m,
+			     src_ipv4_src_ipv6.ipv6_layout.ipv6);
+	l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+			     src_ipv4_src_ipv6.ipv6_layout.ipv6);
+	memcpy(l24_m, ipv6_m->hdr.src_addr, size);
+	for (i = 0; i < size; ++i)
+		l24_v[i] = l24_m[i] & ipv6_v->hdr.src_addr[i];
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0xf);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 6);
+	/* TOS. */
+	vtc_m = rte_be_to_cpu_32(ipv6_m->hdr.vtc_flow);
+	vtc_v = rte_be_to_cpu_32(ipv6_m->hdr.vtc_flow & ipv6_v->hdr.vtc_flow);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_ecn, vtc_m >> 20);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, vtc_v >> 20);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_dscp, vtc_m >> 22);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, vtc_v >> 22);
+	/* Label. */
+	if (inner) {
+		MLX5_SET(fte_match_set_misc, misc_m, inner_ipv6_flow_label,
+			 vtc_m);
+		MLX5_SET(fte_match_set_misc, misc_v, inner_ipv6_flow_label,
+			 vtc_v);
+	} else {
+		MLX5_SET(fte_match_set_misc, misc_m, outer_ipv6_flow_label,
+			 vtc_m);
+		MLX5_SET(fte_match_set_misc, misc_v, outer_ipv6_flow_label,
+			 vtc_v);
+	}
+	/* Protocol. */
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol,
+		 ipv6_m->hdr.proto);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+		 ipv6_v->hdr.proto & ipv6_m->hdr.proto);
+}
+
+/**
+ * Add TCP item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_tcp(void *matcher, void *key,
+			   const struct rte_flow_item *item,
+			   int inner)
+{
+	const struct rte_flow_item_tcp *tcp_m = item->mask;
+	const struct rte_flow_item_tcp *tcp_v = item->spec;
+	void *headers_m;
+	void *headers_v;
+
+	if (!tcp_v)
+		return;
+	if (!tcp_m)
+		tcp_m = &rte_flow_item_tcp_mask;
+	if (inner) {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 inner_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+	} else {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 outer_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+	}
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_TCP);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, tcp_sport,
+		 rte_be_to_cpu_16(tcp_m->hdr.src_port));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
+		 rte_be_to_cpu_16(tcp_v->hdr.src_port & tcp_m->hdr.src_port));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, tcp_dport,
+		 rte_be_to_cpu_16(tcp_m->hdr.dst_port));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
+		 rte_be_to_cpu_16(tcp_v->hdr.dst_port & tcp_m->hdr.dst_port));
+}
+
+/**
+ * Add UDP item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_udp(void *matcher, void *key,
+			   const struct rte_flow_item *item,
+			   int inner)
+{
+	const struct rte_flow_item_udp *udp_m = item->mask;
+	const struct rte_flow_item_udp *udp_v = item->spec;
+	void *headers_m;
+	void *headers_v;
+
+	if (!udp_v)
+		return;
+	if (!udp_m)
+		udp_m = &rte_flow_item_udp_mask;
+	if (inner) {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 inner_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+	} else {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 outer_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+	}
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_sport,
+		 rte_be_to_cpu_16(udp_m->hdr.src_port));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
+		 rte_be_to_cpu_16(udp_v->hdr.src_port & udp_m->hdr.src_port));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport,
+		 rte_be_to_cpu_16(udp_m->hdr.dst_port));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
+		 rte_be_to_cpu_16(udp_v->hdr.dst_port & udp_m->hdr.dst_port));
+}
+
+/**
+ * Add GRE item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_gre(void *matcher, void *key,
+			   const struct rte_flow_item *item,
+			   int inner)
+{
+	const struct rte_flow_item_gre *gre_m = item->mask;
+	const struct rte_flow_item_gre *gre_v = item->spec;
+	void *headers_m;
+	void *headers_v;
+	void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters);
+	void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+
+	if (!gre_v)
+		return;
+	if (!gre_m)
+		gre_m = &rte_flow_item_gre_mask;
+	if (inner) {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 inner_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+	} else {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 outer_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+	}
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE);
+	MLX5_SET(fte_match_set_misc, misc_m, gre_protocol,
+		 rte_be_to_cpu_16(gre_m->protocol));
+	MLX5_SET(fte_match_set_misc, misc_v, gre_protocol,
+		 rte_be_to_cpu_16(gre_v->protocol & gre_m->protocol));
+}
+
+/**
+ * Add NVGRE item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_nvgre(void *matcher, void *key,
+			     const struct rte_flow_item *item,
+			     int inner)
+{
+	const struct rte_flow_item_nvgre *nvgre_m = item->mask;
+	const struct rte_flow_item_nvgre *nvgre_v = item->spec;
+	void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters);
+	void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+	char *gre_key_m;
+	char *gre_key_v;
+	int size;
+	int i;
+
+	if (!nvgre_v)
+		return;
+	if (!nvgre_m)
+		nvgre_m = &rte_flow_item_nvgre_mask;
+	size = sizeof(nvgre_m->tni) + sizeof(nvgre_m->flow_id);
+	gre_key_m = MLX5_ADDR_OF(fte_match_set_misc, misc_m, gre_key_h);
+	gre_key_v = MLX5_ADDR_OF(fte_match_set_misc, misc_v, gre_key_h);
+	memcpy(gre_key_m, nvgre_m->tni, size);
+	for (i = 0; i < size; ++i)
+		gre_key_v[i] = gre_key_m[i] & ((const char *)(nvgre_v->tni))[i];
+	flow_dv_translate_item_gre(matcher, key, item, inner);
+}
+
+/**
+ * Add VXLAN item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_vxlan(void *matcher, void *key,
+			     const struct rte_flow_item *item,
+			     int inner)
+{
+	const struct rte_flow_item_vxlan *vxlan_m = item->mask;
+	const struct rte_flow_item_vxlan *vxlan_v = item->spec;
+	void *headers_m;
+	void *headers_v;
+	void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters);
+	void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+	char *vni_m;
+	char *vni_v;
+	uint16_t dport;
+	int size;
+	int i;
+
+	if (!vxlan_v)
+		return;
+	if (!vxlan_m)
+		vxlan_m = &rte_flow_item_vxlan_mask;
+	if (inner) {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 inner_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+	} else {
+		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+					 outer_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+	}
+	dport = item->type == RTE_FLOW_ITEM_TYPE_VXLAN ? MLX5_VXLAN :
+							 MLX5_VXLAN_GPE;
+	if (!MLX5_GET16(fte_match_set_lyr_2_4, headers_v, udp_dport)) {
+		MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport, 0xFFFF);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, dport);
+	}
+	size = sizeof(vxlan_m->vni);
+	vni_m = MLX5_ADDR_OF(fte_match_set_misc, misc_m, vxlan_vni);
+	vni_v = MLX5_ADDR_OF(fte_match_set_misc, misc_v, vxlan_vni);
+	memcpy(vni_m, vxlan_m->vni, size);
+	for (i = 0; i < size; ++i)
+		vni_v[i] = vni_m[i] & vxlan_v->vni[i];
+}
+
+/**
+ * Update the matcher and the value based the selected item.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in, out] dev_flow
+ *   Pointer to the mlx5_flow.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_create_item(void *matcher, void *key,
+		    const struct rte_flow_item *item,
+		    struct mlx5_flow *dev_flow,
+		    int inner)
+{
+	struct mlx5_flow_dv_matcher *tmatcher = matcher;
+
+	switch (item->type) {
+	case RTE_FLOW_ITEM_TYPE_VOID:
+	case RTE_FLOW_ITEM_TYPE_END:
+		break;
+	case RTE_FLOW_ITEM_TYPE_ETH:
+		flow_dv_translate_item_eth(tmatcher->mask.buf, key, item,
+					   inner);
+		tmatcher->priority = MLX5_PRIORITY_MAP_L2;
+		break;
+	case RTE_FLOW_ITEM_TYPE_VLAN:
+		flow_dv_translate_item_vlan(tmatcher->mask.buf, key, item,
+					    inner);
+		break;
+	case RTE_FLOW_ITEM_TYPE_IPV4:
+		flow_dv_translate_item_ipv4(tmatcher->mask.buf, key, item,
+					    inner);
+		tmatcher->priority = MLX5_PRIORITY_MAP_L3;
+		dev_flow->dv.hash_fields |=
+			mlx5_flow_hashfields_adjust(dev_flow, inner,
+						    MLX5_IPV4_LAYER_TYPES,
+						    MLX5_IPV4_IBV_RX_HASH);
+		break;
+	case RTE_FLOW_ITEM_TYPE_IPV6:
+		flow_dv_translate_item_ipv6(tmatcher->mask.buf, key, item,
+					    inner);
+		tmatcher->priority = MLX5_PRIORITY_MAP_L3;
+		dev_flow->dv.hash_fields |=
+			mlx5_flow_hashfields_adjust(dev_flow, inner,
+						    MLX5_IPV6_LAYER_TYPES,
+						    MLX5_IPV6_IBV_RX_HASH);
+		break;
+	case RTE_FLOW_ITEM_TYPE_TCP:
+		flow_dv_translate_item_tcp(tmatcher->mask.buf, key, item,
+					   inner);
+		tmatcher->priority = MLX5_PRIORITY_MAP_L4;
+		dev_flow->dv.hash_fields |=
+			mlx5_flow_hashfields_adjust(dev_flow, inner,
+						    ETH_RSS_TCP,
+						    (IBV_RX_HASH_SRC_PORT_TCP |
+						     IBV_RX_HASH_DST_PORT_TCP));
+		break;
+	case RTE_FLOW_ITEM_TYPE_UDP:
+		flow_dv_translate_item_udp(tmatcher->mask.buf, key, item,
+					   inner);
+		tmatcher->priority = MLX5_PRIORITY_MAP_L4;
+		dev_flow->verbs.hash_fields |=
+			mlx5_flow_hashfields_adjust(dev_flow, inner,
+						    ETH_RSS_TCP,
+						    (IBV_RX_HASH_SRC_PORT_TCP |
+						     IBV_RX_HASH_DST_PORT_TCP));
+		break;
+	case RTE_FLOW_ITEM_TYPE_NVGRE:
+		flow_dv_translate_item_nvgre(tmatcher->mask.buf, key, item,
+					     inner);
+		break;
+	case RTE_FLOW_ITEM_TYPE_GRE:
+		flow_dv_translate_item_gre(tmatcher->mask.buf, key, item,
+					   inner);
+		break;
+	case RTE_FLOW_ITEM_TYPE_VXLAN:
+	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+		flow_dv_translate_item_vxlan(tmatcher->mask.buf, key, item,
+					     inner);
+		break;
+	default:
+		break;
+	}
+}
+
+static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
+
+#define HEADER_IS_ZERO(match_criteria, headers)				     \
+	!(memcmp(MLX5_ADDR_OF(fte_match_param, match_criteria, headers),     \
+		 matcher_zero, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
+
+/**
+ * Calculate flow matcher enable bitmap.
+ *
+ * @param match_criteria
+ *   Pointer to flow matcher criteria.
+ *
+ * @return
+ *   Bitmap of enabled fields.
+ */
+static uint8_t
+flow_dv_matcher_enable(uint32_t *match_criteria)
+{
+	uint8_t match_criteria_enable;
+
+	match_criteria_enable =
+		(!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
+		MLX5_MATCH_CRITERIA_ENABLE_OUTER_BIT;
+	match_criteria_enable |=
+		(!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
+		MLX5_MATCH_CRITERIA_ENABLE_MISC_BIT;
+	match_criteria_enable |=
+		(!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
+		MLX5_MATCH_CRITERIA_ENABLE_INNER_BIT;
+	match_criteria_enable |=
+		(!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
+		MLX5_MATCH_CRITERIA_ENABLE_MISC2_BIT;
+
+	return match_criteria_enable;
+}
+
+/**
+ * Register the flow matcher.
+ *
+ * @param dev[in, out]
+ *   Pointer to rte_eth_dev structure.
+ * @param[in, out] matcher
+ *   Pointer to flow matcher.
+ * @parm[in, out] dev_flow
+ *   Pointer to the dev_flow.
+ * @param[out] error
+ *   pointer to error structure.
+ *
+ * @return
+ *   0 on success otherwise -errno and errno is set.
+ */
+static int
+flow_dv_matcher_register(struct rte_eth_dev *dev,
+			 struct mlx5_flow_dv_matcher *matcher,
+			 struct mlx5_flow *dev_flow,
+			 struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct mlx5_flow_dv_matcher *cache;
+	struct mlx5dv_flow_matcher_attr dv_attr = {
+		.type = IBV_FLOW_ATTR_NORMAL,
+		.match_mask = (void *)&matcher->mask,
+	};
+
+	/* Lookup from cache. */
+	LIST_FOREACH(cache, &priv->matchers, cache.next) {
+		if (matcher->crc == cache->crc &&
+		    matcher->priority == cache->priority &&
+		    matcher->egress == cache->egress &&
+		    !memcmp((const void *)matcher->mask.buf,
+			    (const void *)cache->mask.buf, cache->mask.size)) {
+			DRV_LOG(DEBUG,
+				"priority %hd use %s matcher %p: refcnt %d++",
+				cache->priority, cache->egress ? "tx" : "rx",
+				(void *)cache,
+				rte_atomic32_read(&cache->cache.refcnt));
+			rte_atomic32_inc(&cache->cache.refcnt);
+			dev_flow->dv.matcher = cache;
+			return 0;
+		}
+	}
+	/* Register new matcher. */
+	cache = rte_calloc(__func__, 1, sizeof(*cache), 0);
+	if (!cache)
+		return rte_flow_error_set(error, ENOMEM,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					  "cannot allocate matcher memory");
+	*cache = *matcher;
+	dv_attr.match_criteria_enable =
+		flow_dv_matcher_enable(cache->mask.buf);
+	dv_attr.priority = matcher->priority;
+	if (matcher->egress)
+		dv_attr.flags |= IBV_FLOW_ATTR_FLAGS_EGRESS;
+	cache->cache.resource =
+		mlx5dv_create_flow_matcher(priv->ctx, &dv_attr);
+	if (!cache->cache.resource)
+		return rte_flow_error_set(error, ENOMEM,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL, "cannot create matcher");
+	rte_atomic32_inc(&cache->cache.refcnt);
+	LIST_INSERT_HEAD(&priv->matchers, &cache->cache, next);
+	dev_flow->dv.matcher = cache;
+	DRV_LOG(DEBUG, "priority %hd new %s matcher %p: refcnt %d",
+		cache->priority,
+		cache->egress ? "tx" : "rx", (void *)cache,
+		rte_atomic32_read(&cache->cache.refcnt));
+	return 0;
+}
+
+
+/**
+ * Fill the flow with DV spec.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ * @param[in, out] dev_flow
+ *   Pointer to the sub flow.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_dv_translate(struct rte_eth_dev *dev,
+		  struct mlx5_flow *dev_flow,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item items[],
+		  const struct rte_flow_action actions[] __rte_unused,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	uint64_t priority = attr->priority;
+	struct mlx5_flow_dv_matcher matcher = {
+		.mask = {
+			.size = sizeof(matcher.mask.buf),
+		},
+	};
+	void *match_value = dev_flow->dv.value.buf;
+	uint8_t inner = 0;
+
+	if (priority == MLX5_FLOW_PRIO_RSVD)
+		priority = priv->config.flow_prio - 1;
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++)
+		flow_dv_create_item(&matcher, match_value, items, dev_flow,
+				    inner);
+	matcher.crc = rte_raw_cksum((const void *)matcher.mask.buf,
+				     matcher.mask.size);
+	if (priority == MLX5_FLOW_PRIO_RSVD)
+		priority = priv->config.flow_prio - 1;
+	matcher.priority = mlx5_flow_adjust_priority(dev, priority,
+						     matcher.priority);
+	matcher.egress = attr->egress;
+	if (flow_dv_matcher_register(dev, &matcher, dev_flow, error))
+		return -rte_errno;
+	return 0;
+}
+
+/**
  * Fills the flow_ops with the function pointers.
  *
  * @param[out] flow_ops
@@ -345,7 +1118,7 @@ mlx5_flow_dv_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops)
 	*flow_ops = (struct mlx5_flow_driver_ops) {
 		.validate = flow_dv_validate,
 		.prepare = flow_dv_prepare,
-		.translate = NULL,
+		.translate = flow_dv_translate,
 		.apply = NULL,
 		.remove = NULL,
 		.destroy = NULL,
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index e8e16cc37..f4a264232 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -132,37 +132,6 @@ flow_verbs_spec_add(struct mlx5_flow *flow, void *src, unsigned int size)
 }
 
 /**
- * Adjust verbs hash fields according to the @p flow information.
- *
- * @param[in] dev_flow.
- *   Pointer to dev flow structure.
- * @param[in] tunnel
- *   1 when the hash field is for a tunnel item.
- * @param[in] layer_types
- *   ETH_RSS_* types.
- * @param[in] hash_fields
- *   Item hash fields.
- */
-static void
-flow_verbs_hashfields_adjust(struct mlx5_flow *dev_flow,
-			     int tunnel __rte_unused,
-			     uint32_t layer_types, uint64_t hash_fields)
-{
-#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
-	int rss_request_inner = dev_flow->flow->rss.level >= 2;
-
-	hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
-	if (rss_request_inner && !tunnel)
-		hash_fields = 0;
-	else if (rss_request_inner < 2 && tunnel)
-		hash_fields = 0;
-#endif
-	if (!(dev_flow->flow->rss.types & layer_types))
-		hash_fields = 0;
-	dev_flow->verbs.hash_fields |= hash_fields;
-}
-
-/**
  * Convert the @p item into a Verbs specification. This function assumes that
  * the input is valid and that there is space to insert the requested item
  * into the flow.
@@ -346,13 +315,10 @@ flow_verbs_translate_item_ipv4(const struct rte_flow_item *item,
 		ipv4.val.proto &= ipv4.mask.proto;
 		ipv4.val.tos &= ipv4.mask.tos;
 	}
-	flow_verbs_hashfields_adjust(dev_flow, tunnel,
-				     (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
-				      ETH_RSS_NONFRAG_IPV4_TCP |
-				      ETH_RSS_NONFRAG_IPV4_UDP |
-				      ETH_RSS_NONFRAG_IPV4_OTHER),
-				     (IBV_RX_HASH_SRC_IPV4 |
-				      IBV_RX_HASH_DST_IPV4));
+	dev_flow->verbs.hash_fields |=
+		mlx5_flow_hashfields_adjust(dev_flow, tunnel,
+					    MLX5_IPV4_LAYER_TYPES,
+					    MLX5_IPV4_IBV_RX_HASH);
 	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
 	flow_verbs_spec_add(dev_flow, &ipv4, size);
 }
@@ -426,16 +392,10 @@ flow_verbs_translate_item_ipv6(const struct rte_flow_item *item,
 		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
 		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
 	}
-	flow_verbs_hashfields_adjust(dev_flow, tunnel,
-				     (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
-				      ETH_RSS_NONFRAG_IPV6_TCP |
-				      ETH_RSS_NONFRAG_IPV6_UDP |
-				      ETH_RSS_IPV6_EX  |
-				      ETH_RSS_IPV6_TCP_EX |
-				      ETH_RSS_IPV6_UDP_EX |
-				      ETH_RSS_NONFRAG_IPV6_OTHER),
-				     (IBV_RX_HASH_SRC_IPV6 |
-				      IBV_RX_HASH_DST_IPV6));
+	dev_flow->verbs.hash_fields |=
+		mlx5_flow_hashfields_adjust(dev_flow, tunnel,
+					    MLX5_IPV6_LAYER_TYPES,
+					    MLX5_IPV6_IBV_RX_HASH);
 	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
 	flow_verbs_spec_add(dev_flow, &ipv6, size);
 }
@@ -479,10 +439,10 @@ flow_verbs_translate_item_udp(const struct rte_flow_item *item,
 		udp.val.src_port &= udp.mask.src_port;
 		udp.val.dst_port &= udp.mask.dst_port;
 	}
-	flow_verbs_hashfields_adjust(dev_flow,
-				     tunnel, ETH_RSS_UDP,
-				     (IBV_RX_HASH_SRC_PORT_UDP |
-				      IBV_RX_HASH_DST_PORT_UDP));
+	dev_flow->verbs.hash_fields |=
+		mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_UDP,
+					    (IBV_RX_HASH_SRC_PORT_UDP |
+					     IBV_RX_HASH_DST_PORT_UDP));
 	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
 	flow_verbs_spec_add(dev_flow, &udp, size);
 }
@@ -526,10 +486,10 @@ flow_verbs_translate_item_tcp(const struct rte_flow_item *item,
 		tcp.val.src_port &= tcp.mask.src_port;
 		tcp.val.dst_port &= tcp.mask.dst_port;
 	}
-	flow_verbs_hashfields_adjust(dev_flow,
-				     tunnel, ETH_RSS_TCP,
-				     (IBV_RX_HASH_SRC_PORT_TCP |
-				      IBV_RX_HASH_DST_PORT_TCP));
+	dev_flow->verbs.hash_fields |=
+		mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_TCP,
+					    (IBV_RX_HASH_SRC_PORT_TCP |
+					     IBV_RX_HASH_DST_PORT_TCP));
 	dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
 	flow_verbs_spec_add(dev_flow, &tcp, size);
 }
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 2222e7fbd..4e2f9f43d 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -493,6 +493,13 @@ struct mlx5_ifc_fte_match_param_bits {
 	u8 reserved_at_800[0x800];
 };
 
+enum {
+	MLX5_MATCH_CRITERIA_ENABLE_OUTER_BIT,
+	MLX5_MATCH_CRITERIA_ENABLE_MISC_BIT,
+	MLX5_MATCH_CRITERIA_ENABLE_INNER_BIT,
+	MLX5_MATCH_CRITERIA_ENABLE_MISC2_BIT
+};
+
 /* CQE format mask. */
 #define MLX5E_CQE_FORMAT_MASK 0xc
 
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v3 08/11] net/mlx5: add Direct Verbs translate actions
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
                     ` (6 preceding siblings ...)
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 07/11] net/mlx5: add Direct Verbs translate items Yongseok Koh
@ 2018-09-24 23:17   ` Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 09/11] net/mlx5: add Direct Verbs driver to glue Yongseok Koh
                     ` (3 subsequent siblings)
  11 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 23:17 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Ori Kam

From: Ori Kam <orika@mellanox.com>

In this commit we add the translation of flow actions.
Unlike the Verbs API actions are separeted from the items and are passed
to the API in array structure.
Since the target action like RSS require the QP information those
actions are handled both in the translate action and in the apply.

Signed-off-by: Ori Kam <orika@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  7 +++++
 drivers/net/mlx5/mlx5_flow_dv.c | 61 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 7f0566fc9..ec860ef4b 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -136,6 +136,8 @@ struct mlx5_flow_dv_match_params {
 	/**< Matcher value. This value is used as the mask or as a key. */
 };
 
+#define MLX5_DV_MAX_NUMBER_OF_ACTIONS 8
+
 /* Matcher structure. */
 struct mlx5_flow_dv_matcher {
 	struct mlx5_cache cache; /**< Cache to struct mlx5dv_flow_matcher. */
@@ -154,6 +156,11 @@ struct mlx5_flow_dv {
 	struct mlx5_flow_dv_match_params value;
 	/**< Holds the value that the packet is compared to. */
 	struct ibv_flow *flow; /**< Installed flow. */
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+	struct mlx5dv_flow_action_attr actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];
+	/**< Action list. */
+#endif
+	int actions_n; /**< number of actions. */
 };
 
 /* Verbs specification header. */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index acb1b7549..916989988 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -942,6 +942,65 @@ flow_dv_create_item(void *matcher, void *key,
 	}
 }
 
+/**
+ * Store the requested actions in an array.
+ *
+ * @param[in] action
+ *   Flow action to translate.
+ * @param[in, out] dev_flow
+ *   Pointer to the mlx5_flow.
+ */
+static void
+flow_dv_create_action(const struct rte_flow_action *action,
+		      struct mlx5_flow *dev_flow)
+{
+	const struct rte_flow_action_queue *queue;
+	const struct rte_flow_action_rss *rss;
+	int actions_n = dev_flow->dv.actions_n;
+	struct rte_flow *flow = dev_flow->flow;
+
+	switch (action->type) {
+	case RTE_FLOW_ACTION_TYPE_VOID:
+		break;
+	case RTE_FLOW_ACTION_TYPE_FLAG:
+		dev_flow->dv.actions[actions_n].type = MLX5DV_FLOW_ACTION_TAG;
+		dev_flow->dv.actions[actions_n].tag_value =
+			MLX5_FLOW_MARK_DEFAULT;
+		actions_n++;
+		break;
+	case RTE_FLOW_ACTION_TYPE_MARK:
+		dev_flow->dv.actions[actions_n].type = MLX5DV_FLOW_ACTION_TAG;
+		dev_flow->dv.actions[actions_n].tag_value =
+			((const struct rte_flow_action_mark *)
+			 (action->conf))->id;
+		actions_n++;
+		break;
+	case RTE_FLOW_ACTION_TYPE_DROP:
+		dev_flow->dv.actions[actions_n].type = MLX5DV_FLOW_ACTION_DROP;
+		flow->actions |= MLX5_ACTION_DROP;
+		break;
+	case RTE_FLOW_ACTION_TYPE_QUEUE:
+		queue = action->conf;
+		flow->rss.queue_num = 1;
+		(*flow->queue)[0] = queue->index;
+		break;
+	case RTE_FLOW_ACTION_TYPE_RSS:
+		rss = action->conf;
+		if (flow->queue)
+			memcpy((*flow->queue), rss->queue,
+			       rss->queue_num * sizeof(uint16_t));
+		flow->rss.queue_num = rss->queue_num;
+		memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
+		flow->rss.types = rss->types;
+		flow->rss.level = rss->level;
+		/* Added to array only in apply since we need the QP */
+		break;
+	default:
+		break;
+	}
+	dev_flow->dv.actions_n = actions_n;
+}
+
 static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
 
 #define HEADER_IS_ZERO(match_criteria, headers)				     \
@@ -1103,6 +1162,8 @@ flow_dv_translate(struct rte_eth_dev *dev,
 	matcher.egress = attr->egress;
 	if (flow_dv_matcher_register(dev, &matcher, dev_flow, error))
 		return -rte_errno;
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
+		flow_dv_create_action(actions, dev_flow);
 	return 0;
 }
 
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v3 09/11] net/mlx5: add Direct Verbs driver to glue
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
                     ` (7 preceding siblings ...)
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 08/11] net/mlx5: add Direct Verbs translate actions Yongseok Koh
@ 2018-09-24 23:17   ` Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 10/11] net/mlx5: add Direct Verbs final functions Yongseok Koh
                     ` (2 subsequent siblings)
  11 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 23:17 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Ori Kam

From: Ori Kam <orika@mellanox.com>

This commit adds all Direct Verbs required functions to the glue lib.

Signed-off-by: Ori Kam <orika@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/Makefile    |  2 +-
 drivers/net/mlx5/meson.build |  2 +-
 drivers/net/mlx5/mlx5_glue.c | 45 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_glue.h | 15 +++++++++++++++
 4 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index d510a4275..4243b37ca 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -8,7 +8,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_mlx5.a
 LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION)
 LIB_GLUE_BASE = librte_pmd_mlx5_glue.so
-LIB_GLUE_VERSION = 18.05.0
+LIB_GLUE_VERSION = 18.11.0
 
 # Sources.
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index 8075496f7..3d09ece4f 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -4,7 +4,7 @@
 
 pmd_dlopen = get_option('enable_driver_mlx_glue')
 LIB_GLUE_BASE = 'librte_pmd_mlx5_glue.so'
-LIB_GLUE_VERSION = '18.05.0'
+LIB_GLUE_VERSION = '18.11.0'
 LIB_GLUE = LIB_GLUE_BASE + '.' + LIB_GLUE_VERSION
 if pmd_dlopen
 	dpdk_conf.set('RTE_LIBRTE_MLX5_DLOPEN_DEPS', 1)
diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c
index 84f9492a7..48590df5b 100644
--- a/drivers/net/mlx5/mlx5_glue.c
+++ b/drivers/net/mlx5/mlx5_glue.c
@@ -346,6 +346,48 @@ mlx5_glue_dv_create_qp(struct ibv_context *context,
 #endif
 }
 
+static struct mlx5dv_flow_matcher *
+mlx5_glue_dv_create_flow_matcher(struct ibv_context *context,
+				 struct mlx5dv_flow_matcher_attr *matcher_attr)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+	return mlx5dv_create_flow_matcher(context, matcher_attr);
+#else
+	(void)context;
+	(void)matcher_attr;
+	return NULL;
+#endif
+}
+
+static struct ibv_flow *
+mlx5_glue_dv_create_flow(struct mlx5dv_flow_matcher *matcher,
+			 struct mlx5dv_flow_match_parameters *match_value,
+			 size_t num_actions,
+			 struct mlx5dv_flow_action_attr *actions_attr)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+	return mlx5dv_create_flow(matcher, match_value,
+				  num_actions, actions_attr);
+#else
+	(void)matcher;
+	(void)match_value;
+	(void)num_actions;
+	(void)actions_attr;
+	return NULL;
+#endif
+}
+
+static int
+mlx5_glue_dv_destroy_flow_matcher(struct mlx5dv_flow_matcher *matcher)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+	return mlx5dv_destroy_flow_matcher(matcher);
+#else
+	(void)matcher;
+	return 0;
+#endif
+}
+
 alignas(RTE_CACHE_LINE_SIZE)
 const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
 	.version = MLX5_GLUE_VERSION,
@@ -392,4 +434,7 @@ const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
 	.dv_set_context_attr = mlx5_glue_dv_set_context_attr,
 	.dv_init_obj = mlx5_glue_dv_init_obj,
 	.dv_create_qp = mlx5_glue_dv_create_qp,
+	.dv_create_flow_matcher = mlx5_glue_dv_create_flow_matcher,
+	.dv_destroy_flow_matcher = mlx5_glue_dv_destroy_flow_matcher,
+	.dv_create_flow = mlx5_glue_dv_create_flow,
 };
diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h
index e584d3679..f6e4e3842 100644
--- a/drivers/net/mlx5/mlx5_glue.h
+++ b/drivers/net/mlx5/mlx5_glue.h
@@ -39,6 +39,13 @@ struct mlx5dv_qp_init_attr;
 struct mlx5dv_wq_init_attr;
 #endif
 
+#ifndef HAVE_IBV_FLOW_DV_SUPPORT
+struct mlx5dv_flow_matcher;
+struct mlx5dv_flow_matcher_attr;
+struct mlx5dv_flow_action_attr;
+struct mlx5dv_flow_match_parameters;
+#endif
+
 /* LIB_GLUE_VERSION must be updated every time this structure is modified. */
 struct mlx5_glue {
 	const char *version;
@@ -122,6 +129,14 @@ struct mlx5_glue {
 		(struct ibv_context *context,
 		 struct ibv_qp_init_attr_ex *qp_init_attr_ex,
 		 struct mlx5dv_qp_init_attr *dv_qp_init_attr);
+	struct mlx5dv_flow_matcher *(*dv_create_flow_matcher)
+		(struct ibv_context *context,
+		 struct mlx5dv_flow_matcher_attr *matcher_attr);
+	int (*dv_destroy_flow_matcher)(struct mlx5dv_flow_matcher *matcher);
+	struct ibv_flow *(*dv_create_flow)(struct mlx5dv_flow_matcher *matcher,
+			  struct mlx5dv_flow_match_parameters *match_value,
+			  size_t num_actions,
+			  struct mlx5dv_flow_action_attr *actions_attr);
 };
 
 const struct mlx5_glue *mlx5_glue;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v3 10/11] net/mlx5: add Direct Verbs final functions
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
                     ` (8 preceding siblings ...)
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 09/11] net/mlx5: add Direct Verbs driver to glue Yongseok Koh
@ 2018-09-24 23:17   ` Yongseok Koh
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 11/11] net/mlx5: add runtime parameter to enable Direct Verbs Yongseok Koh
  2018-10-04 16:17   ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Thomas Monjalon
  11 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 23:17 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Ori Kam

From: Ori Kam <orika@mellanox.com>

This commits add the missing function which are apply, remove, and
destroy.

Signed-off-by: Ori Kam <orika@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c    |   4 +
 drivers/net/mlx5/mlx5_flow.h    |   2 +
 drivers/net/mlx5/mlx5_flow_dv.c | 192 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 194 insertions(+), 4 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 5632e31c5..c6c145021 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -2489,5 +2489,9 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
 void
 mlx5_flow_init_driver_ops(struct rte_eth_dev *dev __rte_unused)
 {
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+	mlx5_flow_dv_get_driver_ops(&nic_ops);
+#else
 	mlx5_flow_verbs_get_driver_ops(&nic_ops);
+#endif
 }
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index ec860ef4b..53c0eeb56 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -189,7 +189,9 @@ struct mlx5_flow {
 	struct rte_flow *flow; /**< Pointer to the main flow. */
 	uint32_t layers; /**< Bit-fields that holds the detected layers. */
 	union {
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
 		struct mlx5_flow_dv dv;
+#endif
 		struct mlx5_flow_verbs verbs;
 	};
 };
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 916989988..71af410b2 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -2,6 +2,7 @@
  * Copyright 2018 Mellanox Technologies, Ltd
  */
 
+
 #include <sys/queue.h>
 #include <stdalign.h>
 #include <stdint.h>
@@ -1095,7 +1096,7 @@ flow_dv_matcher_register(struct rte_eth_dev *dev,
 	if (matcher->egress)
 		dv_attr.flags |= IBV_FLOW_ATTR_FLAGS_EGRESS;
 	cache->cache.resource =
-		mlx5dv_create_flow_matcher(priv->ctx, &dv_attr);
+		mlx5_glue->dv_create_flow_matcher(priv->ctx, &dv_attr);
 	if (!cache->cache.resource)
 		return rte_flow_error_set(error, ENOMEM,
 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -1168,6 +1169,189 @@ flow_dv_translate(struct rte_eth_dev *dev,
 }
 
 /**
+ * Apply the flow to the NIC.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+	      struct rte_flow_error *error)
+{
+	struct mlx5_flow_dv *dv;
+	struct mlx5_flow *dev_flow;
+	int n;
+	int err;
+
+	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+		dv = &dev_flow->dv;
+		n = dv->actions_n;
+		if (flow->actions & MLX5_ACTION_DROP) {
+			dv->hrxq = mlx5_hrxq_drop_new(dev);
+			if (!dv->hrxq) {
+				rte_flow_error_set
+					(error, errno,
+					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					 "cannot get drop hash queue");
+				goto error;
+			}
+			dv->actions[n].type = MLX5DV_FLOW_ACTION_DEST_IBV_QP;
+			dv->actions[n].qp = dv->hrxq->qp;
+			n++;
+		} else {
+			struct mlx5_hrxq *hrxq;
+			hrxq = mlx5_hrxq_get(dev, flow->key,
+					     MLX5_RSS_HASH_KEY_LEN,
+					     dv->hash_fields,
+					     (*flow->queue),
+					     flow->rss.queue_num);
+			if (!hrxq)
+				hrxq = mlx5_hrxq_new
+					(dev, flow->key, MLX5_RSS_HASH_KEY_LEN,
+					 dv->hash_fields, (*flow->queue),
+					 flow->rss.queue_num,
+					 !!(flow->layers &
+					    MLX5_FLOW_LAYER_TUNNEL));
+			if (!hrxq) {
+				rte_flow_error_set
+					(error, rte_errno,
+					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+					 "cannot get hash queue");
+				goto error;
+			}
+			dv->hrxq = hrxq;
+			dv->actions[n].type = MLX5DV_FLOW_ACTION_DEST_IBV_QP;
+			dv->actions[n].qp = hrxq->qp;
+			n++;
+		}
+		dv->flow =
+			mlx5_glue->dv_create_flow(dv->matcher->cache.resource,
+						  (void *)&dv->value, n,
+						  dv->actions);
+		if (!dv->flow) {
+			rte_flow_error_set(error, errno,
+					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					   NULL,
+					   "hardware refuses to create flow");
+			goto error;
+		}
+	}
+	return 0;
+error:
+	err = rte_errno; /* Save rte_errno before cleanup. */
+	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+		struct mlx5_flow_dv *dv = &dev_flow->dv;
+		if (dv->hrxq) {
+			if (flow->actions & MLX5_FLOW_FATE_DROP)
+				mlx5_hrxq_drop_release(dev);
+			else
+				mlx5_hrxq_release(dev, dv->hrxq);
+			dv->hrxq = NULL;
+		}
+	}
+	rte_errno = err; /* Restore rte_errno. */
+	return -rte_errno;
+}
+
+/**
+ * Release the flow matcher.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param matcher
+ *   Pointer to flow matcher.
+ *
+ * @return
+ *   1 while a reference on it exists, 0 when freed.
+ */
+static int
+flow_dv_matcher_release(struct rte_eth_dev *dev,
+			struct mlx5_flow_dv_matcher *matcher)
+{
+	struct mlx5_cache *cache = &matcher->cache;
+
+	assert(cache->resource);
+	DRV_LOG(DEBUG, "port %u matcher %p: refcnt %d--",
+		dev->data->port_id, (void *)cache,
+		rte_atomic32_read(&cache->refcnt));
+	if (rte_atomic32_dec_and_test(&cache->refcnt)) {
+		claim_zero(mlx5_glue->dv_destroy_flow_matcher(cache->resource));
+		LIST_REMOVE(cache, next);
+		rte_free(cache);
+		DRV_LOG(DEBUG, "port %u matcher %p: removed",
+			dev->data->port_id, (void *)cache);
+		return 0;
+	}
+	return 1;
+}
+
+/**
+ * Remove the flow from the NIC but keeps it in memory.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ */
+static void
+flow_dv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	struct mlx5_flow_dv *dv;
+	struct mlx5_flow *dev_flow;
+
+	if (!flow)
+		return;
+	LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+		dv = &dev_flow->dv;
+		if (dv->flow) {
+			claim_zero(mlx5_glue->destroy_flow(dv->flow));
+			dv->flow = NULL;
+		}
+		if (dv->hrxq) {
+			if (flow->actions & MLX5_ACTION_DROP)
+				mlx5_hrxq_drop_release(dev);
+			else
+				mlx5_hrxq_release(dev, dv->hrxq);
+			dv->hrxq = NULL;
+		}
+	}
+	if (flow->counter)
+		flow->counter = NULL;
+}
+
+/**
+ * Remove the flow from the NIC and the memory.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ */
+static void
+flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	struct mlx5_flow *dev_flow;
+
+	if (!flow)
+		return;
+	flow_dv_remove(dev, flow);
+	while (!LIST_EMPTY(&flow->dev_flows)) {
+		dev_flow = LIST_FIRST(&flow->dev_flows);
+		LIST_REMOVE(dev_flow, next);
+		if (dev_flow->dv.matcher)
+			flow_dv_matcher_release(dev, dev_flow->dv.matcher);
+		rte_free(dev_flow);
+	}
+}
+
+/**
  * Fills the flow_ops with the function pointers.
  *
  * @param[out] flow_ops
@@ -1180,9 +1364,9 @@ mlx5_flow_dv_get_driver_ops(struct mlx5_flow_driver_ops *flow_ops)
 		.validate = flow_dv_validate,
 		.prepare = flow_dv_prepare,
 		.translate = flow_dv_translate,
-		.apply = NULL,
-		.remove = NULL,
-		.destroy = NULL,
+		.apply = flow_dv_apply,
+		.remove = flow_dv_remove,
+		.destroy = flow_dv_destroy,
 	};
 }
 
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [dpdk-dev] [PATCH v3 11/11] net/mlx5: add runtime parameter to enable Direct Verbs
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
                     ` (9 preceding siblings ...)
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 10/11] net/mlx5: add Direct Verbs final functions Yongseok Koh
@ 2018-09-24 23:17   ` Yongseok Koh
  2018-10-04 16:17   ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Thomas Monjalon
  11 siblings, 0 replies; 22+ messages in thread
From: Yongseok Koh @ 2018-09-24 23:17 UTC (permalink / raw)
  To: Thomas Monjalon, Shahaf Shuler; +Cc: dev, Ori Kam

From: Ori Kam <orika@mellanox.com>

DV flow API is based on new kernel API and is
missing some functionality like counter but add other functionality
like encap.

In order not to affect current users even if the kernel supports
the new DV API it should be enabled only manually.

Signed-off-by: Ori Kam <orika@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 doc/guides/nics/mlx5.rst     | 7 +++++++
 drivers/net/mlx5/mlx5.c      | 6 ++++++
 drivers/net/mlx5/mlx5.h      | 1 +
 drivers/net/mlx5/mlx5_flow.c | 9 +++++++--
 4 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index dbdb90b59..67696283e 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -397,6 +397,13 @@ Run-time configuration
 
   Disabled by default.
 
+- ``dv_flow_en`` parameter [int]
+
+  A nonzero value enables the DV flow steering assuming it is supported
+  by the driver.
+
+  Disabled by default.
+
 - ``representor`` parameter [list]
 
   This parameter can be used to instantiate DPDK Ethernet devices from
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index ab44864e9..9b208109b 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -90,6 +90,9 @@
 /* Allow L3 VXLAN flow creation. */
 #define MLX5_L3_VXLAN_EN "l3_vxlan_en"
 
+/* Activate DV flow steering. */
+#define MLX5_DV_FLOW_EN "dv_flow_en"
+
 /* Activate Netlink support in VF mode. */
 #define MLX5_VF_NL_EN "vf_nl_en"
 
@@ -491,6 +494,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
 		config->l3_vxlan_en = !!tmp;
 	} else if (strcmp(MLX5_VF_NL_EN, key) == 0) {
 		config->vf_nl_en = !!tmp;
+	} else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) {
+		config->dv_flow_en = !!tmp;
 	} else {
 		DRV_LOG(WARNING, "%s: unknown parameter", key);
 		rte_errno = EINVAL;
@@ -528,6 +533,7 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 		MLX5_RX_VEC_EN,
 		MLX5_L3_VXLAN_EN,
 		MLX5_VF_NL_EN,
+		MLX5_DV_FLOW_EN,
 		MLX5_REPRESENTOR,
 		NULL,
 	};
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 8ff6d6987..8bb619d9e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -111,6 +111,7 @@ struct mlx5_dev_config {
 	unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
 	unsigned int l3_vxlan_en:1; /* Enable L3 VXLAN flow creation. */
 	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
+	unsigned int dv_flow_en:1; /* Enable DV flow. */
 	unsigned int swp:1; /* Tx generic tunnel checksum and TSO offload. */
 	struct {
 		unsigned int enabled:1; /* Whether MPRQ is enabled. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index c6c145021..6a4a5d17e 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -2487,10 +2487,15 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
  *   Pointer to Ethernet device structure.
  */
 void
-mlx5_flow_init_driver_ops(struct rte_eth_dev *dev __rte_unused)
+mlx5_flow_init_driver_ops(struct rte_eth_dev *dev)
 {
+	struct priv *priv __rte_unused = dev->data->dev_private;
+
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
-	mlx5_flow_dv_get_driver_ops(&nic_ops);
+	if (priv->config.dv_flow_en)
+		mlx5_flow_dv_get_driver_ops(&nic_ops);
+	else
+		mlx5_flow_verbs_get_driver_ops(&nic_ops);
 #else
 	mlx5_flow_verbs_get_driver_ops(&nic_ops);
 #endif
-- 
2.11.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/3] net/mlx5: migrate Linux TC flower driver to new flow engine
  2018-09-24 19:55 ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: migrate Linux TC flower driver to new flow engine Yongseok Koh
                     ` (2 preceding siblings ...)
  2018-09-24 19:55   ` [dpdk-dev] [PATCH v2 3/3] net/mlx5: add Linux TC flower driver for E-Switch flow Yongseok Koh
@ 2018-10-04 16:16   ` Thomas Monjalon
  3 siblings, 0 replies; 22+ messages in thread
From: Thomas Monjalon @ 2018-10-04 16:16 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev, Shahaf Shuler

24/09/2018 21:55, Yongseok Koh:
> Yongseok Koh (3):
>   net/mlx5: add abstraction for multiple flow drivers
>   net/mlx5: remove Netlink flow driver
>   net/mlx5: add Linux TC flower driver for E-Switch flow

applied to dpdk-next-net-mlx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support
  2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
                     ` (10 preceding siblings ...)
  2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 11/11] net/mlx5: add runtime parameter to enable Direct Verbs Yongseok Koh
@ 2018-10-04 16:17   ` Thomas Monjalon
  11 siblings, 0 replies; 22+ messages in thread
From: Thomas Monjalon @ 2018-10-04 16:17 UTC (permalink / raw)
  To: orika; +Cc: dev, Yongseok Koh, Shahaf Shuler

25/09/2018 01:17, Yongseok Koh:
> Ori Kam (11):
>   net/mlx5: split flow validation to dedicated function
>   net/mlx5: add flow prepare function
>   net/mlx5: add flow translate function
>   net/mlx5: add support for multiple flow drivers
>   net/mlx5: add Direct Verbs validation function
>   net/mlx5: add Direct Verbs prepare function
>   net/mlx5: add Direct Verbs translate items
>   net/mlx5: add Direct Verbs translate actions
>   net/mlx5: add Direct Verbs driver to glue
>   net/mlx5: add Direct Verbs final functions
>   net/mlx5: add runtime parameter to enable Direct Verbs

applied to dpdk-next-net-mlx

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2018-10-04 16:17 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-19  7:21 [dpdk-dev] [PATCH 0/3] migrate Linux TC flower driver to new flow engine Yongseok Koh
2018-09-19  7:21 ` [dpdk-dev] [PATCH 1/3] net/mlx5: add abstraction for multiple flow drivers Yongseok Koh
2018-09-19  7:21 ` [dpdk-dev] [PATCH 2/3] net/mlx5: remove Netlink flow driver Yongseok Koh
2018-09-19  7:21 ` [dpdk-dev] [PATCH 3/3] net/mlx5: add Linux TC flower driver for E-Switch flow Yongseok Koh
2018-09-24 19:55 ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: migrate Linux TC flower driver to new flow engine Yongseok Koh
2018-09-24 19:55   ` [dpdk-dev] [PATCH v2 1/3] net/mlx5: add abstraction for multiple flow drivers Yongseok Koh
2018-09-24 19:55   ` [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove Netlink flow driver Yongseok Koh
2018-09-24 19:55   ` [dpdk-dev] [PATCH v2 3/3] net/mlx5: add Linux TC flower driver for E-Switch flow Yongseok Koh
2018-10-04 16:16   ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: migrate Linux TC flower driver to new flow engine Thomas Monjalon
2018-09-24 23:17 ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Yongseok Koh
2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 01/11] net/mlx5: split flow validation to dedicated function Yongseok Koh
2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 02/11] net/mlx5: add flow prepare function Yongseok Koh
2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 03/11] net/mlx5: add flow translate function Yongseok Koh
2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 04/11] net/mlx5: add support for multiple flow drivers Yongseok Koh
2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 05/11] net/mlx5: add Direct Verbs validation function Yongseok Koh
2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 06/11] net/mlx5: add Direct Verbs prepare function Yongseok Koh
2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 07/11] net/mlx5: add Direct Verbs translate items Yongseok Koh
2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 08/11] net/mlx5: add Direct Verbs translate actions Yongseok Koh
2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 09/11] net/mlx5: add Direct Verbs driver to glue Yongseok Koh
2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 10/11] net/mlx5: add Direct Verbs final functions Yongseok Koh
2018-09-24 23:17   ` [dpdk-dev] [PATCH v3 11/11] net/mlx5: add runtime parameter to enable Direct Verbs Yongseok Koh
2018-10-04 16:17   ` [dpdk-dev] [PATCH v3 00/11] net/mlx5: add Direct Verbs flow driver support Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).