DPDK patches and discussions
 help / color / mirror / Atom feed
From: Yongseok Koh <yskoh@mellanox.com>
To: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Cc: dev@dpdk.org, Adrien Mazarguil <adrien.mazarguil@6wind.com>
Subject: Re: [dpdk-dev] [PATCH v2 13/20] net/mlx5: add RSS flow action
Date: Thu, 5 Jul 2018 19:16:35 -0700	[thread overview]
Message-ID: <20180706021630.GB47821@yongseok-MBP.local> (raw)
In-Reply-To: <ae5d5fc2b1a1501ca622e31c9d1cc6a348b2bd15.1530111623.git.nelio.laranjeiro@6wind.com>

On Wed, Jun 27, 2018 at 05:07:45PM +0200, Nelio Laranjeiro wrote:
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> ---
>  drivers/net/mlx5/mlx5_flow.c | 1211 +++++++++++++++++++++++++---------
>  1 file changed, 899 insertions(+), 312 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index a39157533..08e0a6556 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -51,13 +51,148 @@ extern const struct eth_dev_ops mlx5_dev_ops_isolate;
>  /* Action fate on the packet. */
>  #define MLX5_FLOW_FATE_DROP (1u << 0)
>  #define MLX5_FLOW_FATE_QUEUE (1u << 1)
> +#define MLX5_FLOW_FATE_RSS (1u << 2)
>  
>  /* Modify a packet. */
>  #define MLX5_FLOW_MOD_FLAG (1u << 0)
>  #define MLX5_FLOW_MOD_MARK (1u << 1)
>  
> +/* Priority reserved for default flows. */
> +#define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
> +
> +enum mlx5_expansion {
> +	MLX5_EXPANSION_ROOT,
> +	MLX5_EXPANSION_ROOT2,

How about MLX5_EXPANSION_OUTER_ROOT?

> +	MLX5_EXPANSION_OUTER_ETH,
> +	MLX5_EXPANSION_OUTER_IPV4,
> +	MLX5_EXPANSION_OUTER_IPV4_UDP,
> +	MLX5_EXPANSION_OUTER_IPV4_TCP,
> +	MLX5_EXPANSION_OUTER_IPV6,
> +	MLX5_EXPANSION_OUTER_IPV6_UDP,
> +	MLX5_EXPANSION_OUTER_IPV6_TCP,
> +	MLX5_EXPANSION_VXLAN,
> +	MLX5_EXPANSION_VXLAN_GPE,
> +	MLX5_EXPANSION_GRE,
> +	MLX5_EXPANSION_MPLS,
> +	MLX5_EXPANSION_ETH,
> +	MLX5_EXPANSION_IPV4,
> +	MLX5_EXPANSION_IPV4_UDP,
> +	MLX5_EXPANSION_IPV4_TCP,
> +	MLX5_EXPANSION_IPV6,
> +	MLX5_EXPANSION_IPV6_UDP,
> +	MLX5_EXPANSION_IPV6_TCP,
> +};
> +
> +/** Supported expansion of items. */
> +static const struct rte_flow_expand_node mlx5_support_expansion[] = {
> +	[MLX5_EXPANSION_ROOT] = {
> +		.next = RTE_FLOW_EXPAND_ITEMS(MLX5_EXPANSION_ETH,
> +					      MLX5_EXPANSION_IPV4,
> +					      MLX5_EXPANSION_IPV6),
> +		.type = RTE_FLOW_ITEM_TYPE_END,
> +	},
> +	[MLX5_EXPANSION_ROOT2] = {
> +		.next = RTE_FLOW_EXPAND_ITEMS(MLX5_EXPANSION_OUTER_ETH,
> +					      MLX5_EXPANSION_OUTER_IPV4,
> +					      MLX5_EXPANSION_OUTER_IPV6),
> +		.type = RTE_FLOW_ITEM_TYPE_END,
> +	},
> +	[MLX5_EXPANSION_OUTER_ETH] = {
> +		.next = RTE_FLOW_EXPAND_ITEMS(MLX5_EXPANSION_OUTER_IPV4,
> +					      MLX5_EXPANSION_OUTER_IPV6),
> +		.type = RTE_FLOW_ITEM_TYPE_ETH,
> +		.rss_types = 0,
> +	},
> +	[MLX5_EXPANSION_OUTER_IPV4] = {
> +		.next = RTE_FLOW_EXPAND_ITEMS(MLX5_EXPANSION_OUTER_IPV4_UDP,
> +					      MLX5_EXPANSION_OUTER_IPV4_TCP),
> +		.type = RTE_FLOW_ITEM_TYPE_IPV4,
> +		.rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
> +			ETH_RSS_NONFRAG_IPV4_OTHER,
> +	},
> +	[MLX5_EXPANSION_OUTER_IPV4_UDP] = {
> +		.next = RTE_FLOW_EXPAND_ITEMS(MLX5_EXPANSION_VXLAN),
> +		.type = RTE_FLOW_ITEM_TYPE_UDP,
> +		.rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
> +	},
> +	[MLX5_EXPANSION_OUTER_IPV4_TCP] = {
> +		.type = RTE_FLOW_ITEM_TYPE_TCP,
> +		.rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
> +	},
> +	[MLX5_EXPANSION_OUTER_IPV6] = {
> +		.next = RTE_FLOW_EXPAND_ITEMS(MLX5_EXPANSION_OUTER_IPV6_UDP,
> +					      MLX5_EXPANSION_OUTER_IPV6_TCP),
> +		.type = RTE_FLOW_ITEM_TYPE_IPV6,
> +		.rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
> +			ETH_RSS_NONFRAG_IPV6_OTHER,
> +	},
> +	[MLX5_EXPANSION_OUTER_IPV6_UDP] = {
> +		.next = RTE_FLOW_EXPAND_ITEMS(MLX5_EXPANSION_VXLAN),
> +		.type = RTE_FLOW_ITEM_TYPE_UDP,
> +		.rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
> +	},
> +	[MLX5_EXPANSION_OUTER_IPV6_TCP] = {
> +		.type = RTE_FLOW_ITEM_TYPE_TCP,
> +		.rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
> +	},
> +	[MLX5_EXPANSION_VXLAN] = {
> +		.next = RTE_FLOW_EXPAND_ITEMS(MLX5_EXPANSION_ETH),
> +		.type = RTE_FLOW_ITEM_TYPE_VXLAN,
> +	},
> +	[MLX5_EXPANSION_VXLAN_GPE] = {
> +		.next = RTE_FLOW_EXPAND_ITEMS(MLX5_EXPANSION_ETH,
> +					      MLX5_EXPANSION_IPV4,
> +					      MLX5_EXPANSION_IPV6),
> +		.type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
> +	},
> +	[MLX5_EXPANSION_GRE] = {
> +		.next = RTE_FLOW_EXPAND_ITEMS(MLX5_EXPANSION_IPV4),
> +		.type = RTE_FLOW_ITEM_TYPE_GRE,
> +	},
> +	[MLX5_EXPANSION_ETH] = {
> +		.next = RTE_FLOW_EXPAND_ITEMS(MLX5_EXPANSION_IPV4,
> +					      MLX5_EXPANSION_IPV6),
> +		.type = RTE_FLOW_ITEM_TYPE_ETH,
> +	},
> +	[MLX5_EXPANSION_IPV4] = {
> +		.next = RTE_FLOW_EXPAND_ITEMS(MLX5_EXPANSION_IPV4_UDP,
> +					      MLX5_EXPANSION_IPV4_TCP),
> +		.type = RTE_FLOW_ITEM_TYPE_IPV4,
> +		.rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
> +			ETH_RSS_NONFRAG_IPV4_OTHER,
> +	},
> +	[MLX5_EXPANSION_IPV4_UDP] = {
> +		.type = RTE_FLOW_ITEM_TYPE_UDP,
> +		.rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
> +	},
> +	[MLX5_EXPANSION_IPV4_TCP] = {
> +		.type = RTE_FLOW_ITEM_TYPE_TCP,
> +		.rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
> +	},
> +	[MLX5_EXPANSION_IPV6] = {
> +		.next = RTE_FLOW_EXPAND_ITEMS(MLX5_EXPANSION_IPV6_UDP,
> +					      MLX5_EXPANSION_IPV6_TCP),
> +		.type = RTE_FLOW_ITEM_TYPE_IPV6,
> +		.rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
> +			ETH_RSS_NONFRAG_IPV6_OTHER,
> +	},
> +	[MLX5_EXPANSION_IPV6_UDP] = {
> +		.type = RTE_FLOW_ITEM_TYPE_UDP,
> +		.rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
> +	},
> +	[MLX5_EXPANSION_IPV6_TCP] = {
> +		.type = RTE_FLOW_ITEM_TYPE_TCP,
> +		.rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
> +	},
> +};
> +
>  /** Handles information leading to a drop fate. */
>  struct mlx5_flow_verbs {
> +	LIST_ENTRY(mlx5_flow_verbs) next;
> +	uint32_t layers;
> +	/**< Bit-fields of expanded layers see MLX5_FLOW_ITEMS_*. */
> +	uint32_t modifier;
> +	/**< Bit-fields of expanded modifier see MLX5_FLOW_MOD_*. */
>  	unsigned int size; /**< Size of the attribute. */
>  	struct {
>  		struct ibv_flow_attr *attr;
> @@ -66,20 +201,26 @@ struct mlx5_flow_verbs {
>  	};
>  	struct ibv_flow *flow; /**< Verbs flow pointer. */
>  	struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
> +	uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
>  };
>  
>  /* Flow structure. */
>  struct rte_flow {
>  	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
>  	struct rte_flow_attr attributes; /**< User flow attribute. */
> +	uint32_t expand:1; /**< Flow is expanded due to RSS configuration. */

Suggest 'expanded'.

>  	uint32_t layers;
>  	/**< Bit-fields of present layers see MLX5_FLOW_ITEMS_*. */
>  	uint32_t modifier;
>  	/**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
>  	uint32_t fate;
>  	/**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
> -	struct mlx5_flow_verbs verbs; /* Verbs flow. */
> -	uint16_t queue; /**< Destination queue to redirect traffic to. */
> +	LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
> +	struct mlx5_flow_verbs *cur_verbs;
> +	/**< Current Verbs flow structure being filled. */
> +	struct rte_flow_action_rss rss;/**< RSS context. */
> +	uint8_t key[40]; /**< RSS hash key. */

Let's define a macro for '40'.

> +	uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
>  };
>  
>  static const struct rte_flow_ops mlx5_flow_ops = {
> @@ -122,16 +263,27 @@ struct ibv_spec_header {
>  	uint16_t size;
>  };
>  
> - /**
> -  * Get the maximum number of priority available.
> -  *
> -  * @param dev
> -  *   Pointer to Ethernet device.
> -  *
> -  * @return
> -  *   number of supported flow priority on success, a negative errno value
> -  *   otherwise and rte_errno is set.
> -  */
> +/* Map of Verbs to Flow priority with 8 Verbs priorities. */
> +static const uint32_t priority_map_3[][3] = {
> +	{ 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
> +};
> +
> +/* Map of Verbs to Flow priority with 16 Verbs priorities. */
> +static const uint32_t priority_map_5[][3] = {
> +	{ 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
> +	{ 9, 10, 11 }, { 12, 13, 14 },
> +};

How about 

enum mlx5_sub_priority {
	MLX5_SUB_PRIORITY_0 = 0,
	MLX5_SUB_PRIORITY_1,
	MLX5_SUB_PRIORITY_2,
	MLX5_SUB_PRIORITY_MAX,
};

static const uint32_t priority_map_3[][MLX5_SUB_PRIORITY_MAX] = {

> +
> +/**
> + * Get the maximum number of priority available.
> + *
> + * @param dev
> + *   Pointer to Ethernet device.
> + *
> + * @return
> + *   number of supported flow priority on success, a negative errno
> + *   value otherwise and rte_errno is set.
> + */
>  int
>  mlx5_flow_priorities(struct rte_eth_dev *dev)

mlx5_flow_priorities() vs mlx5_flow_priority(), similar name but different
functionality. Better to rename it, e.g. mlx5_flow_get_max_priority() and
mlx5_flow_adjust_priority()

>  {
> @@ -156,6 +308,7 @@ mlx5_flow_priorities(struct rte_eth_dev *dev)
>  	struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
>  	uint16_t vprio[] = { 8, 16 };
>  	int i;
> +	int priority = 0;
>  
>  	if (!drop) {
>  		rte_errno = ENOTSUP;
> @@ -167,11 +320,54 @@ mlx5_flow_priorities(struct rte_eth_dev *dev)
>  		if (!flow)
>  			break;
>  		claim_zero(mlx5_glue->destroy_flow(flow));
> +		priority = vprio[i];
> +	}
> +	switch (priority) {
> +	case 8:
> +		priority = 3;

How about,
	priority = RTE_DIM(priority_map_3);

> +		break;
> +	case 16:
> +		priority = 5;

	priority = RTE_DIM(priority_map_5);

> +		break;
> +	default:
> +		rte_errno = ENOTSUP;
> +		DRV_LOG(ERR,
> +			"port %u verbs maximum priority: %d expected 8/16",
> +			dev->data->port_id, vprio[i]);
> +		return -rte_errno;
>  	}
>  	mlx5_hrxq_drop_release(dev, drop);
>  	DRV_LOG(INFO, "port %u flow maximum priority: %d",
> -		dev->data->port_id, vprio[i]);
> -	return vprio[i];
> +		dev->data->port_id, priority);
> +	return priority;
> +}
> +
> +/** > + * Adjust flow priority.
> + *
> + * @param dev
> + *   Pointer to Ethernet device.
> + * @param flow
> + *   Pointer to an rte flow.
> + *
> + * @return
> + *   The priority adjusted.
> + */
> +static int
> +mlx5_flow_priority(struct rte_eth_dev *dev, uint32_t priority,
> +		   uint32_t subpriority)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +
> +	switch (priv->config.flow_prio) {
> +	case 3:

	case RTE_DIM(priority_map_3):

> +		priority = priority_map_3[priority][subpriority];
> +		break;
> +	case 5:

	case RTE_DIM(priority_map_5):

> +		priority = priority_map_5[priority][subpriority];
> +		break;
> +	}
> +	return priority;
>  }
>  
>  /**
> @@ -185,6 +381,8 @@ void
>  mlx5_flow_print(struct rte_flow *flow __rte_unused)
>  {
>  #ifndef NDEBUG
> +	struct mlx5_flow_verbs *verbs = LIST_FIRST(&flow->verbs);
> +
>  	fprintf(stdout, "---------8<------------\n");
>  	fprintf(stdout, "%s: flow information\n", MLX5_DRIVER_NAME);
>  	fprintf(stdout, " attributes: group %u priority %u ingress %d egress %d"
> @@ -193,26 +391,36 @@ mlx5_flow_print(struct rte_flow *flow __rte_unused)
>  		flow->attributes.ingress,
>  		flow->attributes.egress,
>  		flow->attributes.transfer);
> -	fprintf(stdout, " layers: %s/%s/%s\n",
> -		flow->layers & MLX5_FLOW_LAYER_OUTER_L2 ? "l2" : "-",
> -		flow->layers & MLX5_FLOW_LAYER_OUTER_L3 ? "l3" : "-",
> -		flow->layers & MLX5_FLOW_LAYER_OUTER_L4 ? "l4" : "-");
> -	if (flow->fate & MLX5_FLOW_FATE_DROP)
> +	if (flow->fate & MLX5_FLOW_FATE_DROP) {
>  		fprintf(stdout, " fate: drop queue\n");
> -	else if (flow->fate & MLX5_FLOW_FATE_QUEUE)
> -		fprintf(stdout, " fate: target queue %u\n", flow->queue);
> -	if (flow->verbs.attr) {
> -		struct ibv_spec_header *hdr =
> -			(struct ibv_spec_header *)flow->verbs.specs;
> -		const int n = flow->verbs.attr->num_of_specs;
> -		int i;
> -
> -		fprintf(stdout, " Verbs attributes: specs_n %u\n",
> -			flow->verbs.attr->num_of_specs);
> -		for (i = 0; i != n; ++i) {
> -			rte_hexdump(stdout, " ", hdr, hdr->size);
> -			hdr = (struct ibv_spec_header *)
> -				((uint8_t *)hdr + hdr->size);
> +	} else {
> +		uint16_t i;
> +
> +		fprintf(stdout, " fate: target queues");
> +		for (i = 0; i != flow->rss.queue_num; ++i)
> +			fprintf(stdout, " %u", (*flow->queue)[i]);
> +		fprintf(stdout, "\n");
> +	}
> +	LIST_FOREACH(verbs, &flow->verbs, next) {
> +		uint32_t layers = flow->layers | verbs->layers;
> +
> +		fprintf(stdout, " layers: %s/%s/%s\n",
> +			layers & MLX5_FLOW_LAYER_OUTER_L2 ? "l2" : "-",
> +			layers & MLX5_FLOW_LAYER_OUTER_L3 ? "l3" : "-",
> +			layers & MLX5_FLOW_LAYER_OUTER_L4 ? "l4" : "-");
> +		if (verbs->attr) {
> +			struct ibv_spec_header *hdr =
> +				(struct ibv_spec_header *)verbs->specs;
> +			const int n = verbs->attr->num_of_specs;
> +			int i;
> +
> +			fprintf(stdout, " Verbs attributes: specs_n %u\n",
> +				verbs->attr->num_of_specs);
> +			for (i = 0; i != n; ++i) {
> +				rte_hexdump(stdout, " ", hdr, hdr->size);
> +				hdr = (struct ibv_spec_header *)
> +					((uint8_t *)hdr + hdr->size);
> +			}
>  		}
>  	}
>  	fprintf(stdout, "--------->8------------\n");
> @@ -239,18 +447,20 @@ mlx5_flow_attributes(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
>  		     struct rte_flow *flow, struct rte_flow_error *error)
>  {
>  	uint32_t priority_max =
> -		((struct priv *)dev->data->dev_private)->config.flow_prio;
> +		((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
>  
>  	if (attr->group)
>  		return rte_flow_error_set(error, ENOTSUP,
>  					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
>  					  NULL,
>  					  "groups are not supported");
> -	if (attr->priority >= priority_max)
> +	if (attr->priority != MLX5_FLOW_PRIO_RSVD &&
> +	    attr->priority >= priority_max)
>  		return rte_flow_error_set(error, ENOTSUP,
>  					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
>  					  NULL,
> -					  "priority value is not supported");
> +					  "requested priority value is not"
> +					  " supported");
>  	if (attr->egress)
>  		return rte_flow_error_set(error, ENOTSUP,
>  					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
> @@ -267,6 +477,8 @@ mlx5_flow_attributes(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
>  					  NULL,
>  					  "only ingress is supported");
>  	flow->attributes = *attr;
> +	if (attr->priority == MLX5_FLOW_PRIO_RSVD)
> +		flow->attributes.priority = priority_max;
>  	return 0;
>  }
>  
> @@ -346,14 +558,51 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
>  static void
>  mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
>  {
> -	if (flow->verbs.specs) {
> +	struct mlx5_flow_verbs *verbs = flow->cur_verbs;
> +
> +	if (verbs->specs) {
>  		void *dst;
>  
> -		dst = (void *)(flow->verbs.specs + flow->verbs.size);
> +		dst = (void *)(verbs->specs + verbs->size);
>  		memcpy(dst, src, size);
> -		++flow->verbs.attr->num_of_specs;
> +		++verbs->attr->num_of_specs;
>  	}
> -	flow->verbs.size += size;
> +	verbs->size += size;
> +}
> +
> +/**
> + * Update layer bit-field.
> + *
> + * @param flow[in, out]
> + *   Pointer to flow structure.
> + * @param layers
> + *   Bit-fields of layers to add see MLX5_FLOW_ITEMS_*.

Where is MLX5_FLOW_ITEMS_*? Isn't it MLX5_FLOW_LAYER_*?
There are several occurrences.

> + */
> +static void
> +mlx5_flow_layers_update(struct rte_flow *flow, uint32_t layers)
> +{
> +	if (flow->expand) {
> +		if (flow->cur_verbs)
> +			flow->cur_verbs->layers |= layers;

If flow->cur_verbs is null, does that mean it is a testing call? Then, is it
unnecessary to update layers for the testing call? Confusing..

> +	} else {
> +		flow->layers |= layers;
> +	}
> +}
> +
> +/**
> + * Get layers bit-field.
> + *
> + * @param flow[in, out]
> + *   Pointer to flow structure.
> + */
> +static uint32_t
> +mlx5_flow_layers(struct rte_flow *flow)
> +{
> +	uint32_t layers = flow->layers;
> +
> +	if (flow->expand && flow->cur_verbs)

If flow is expanded and it is a testing call, then flow->layers is used?

> +		layers |= flow->cur_verbs->layers;
> +	return layers;

This part is so unclear to me, hard to understand. There are two 'layers'
fields, one in rte_flow and the other in mlx5_flow_verbs. It seems
rte_flow->layers is used only when the flow isn't expanded. If a flow is
expanded, flow->expand is set after processing the first entry in the expanded
list. In mlx5_flow_merge(),

	for (i = 0; i != buf->entries; ++i) {

		...

		flow->expand = !!(buf->entries > 1);
	}

Why is flow->expand set at the end of the loop? Is this in order to avoid
validation for the expanded flows? mlx5_flow_item_xxx() executes validation only
if flow->expand is zero, why?

And why does mlx5_flow_layers() have to return (flow->layers |
flow->cur_verbs->layers) if expanded?

If there are 3 entries in the rte_flow_expand_rss,
	eth
	eth / ipv4 / udp
	eth / ipv6 / udp

Then, the 2nd and 3rd don't have MLX5_FLOW_LAYER_OUTER_L2 in layers field?
Please explain in details and add comments appropriately.

>  }
>  
>  /**
> @@ -388,22 +637,26 @@ mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
>  		.type = IBV_FLOW_SPEC_ETH,
>  		.size = size,
>  	};
> +	const uint32_t layers = mlx5_flow_layers(flow);
>  	int ret;
>  
> -	if (flow->layers & MLX5_FLOW_LAYER_OUTER_L2)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ITEM,
> -					  item,
> -					  "L2 layers already configured");
> -	if (!mask)
> -		mask = &rte_flow_item_eth_mask;
> -	ret = mlx5_flow_item_validate(item, (const uint8_t *)mask,
> -				      (const uint8_t *)&nic_mask,
> -				      sizeof(struct rte_flow_item_eth),
> -				      error);
> -	if (ret)
> -		return ret;
> -	flow->layers |= MLX5_FLOW_LAYER_OUTER_L2;
> +	if (!flow->expand) {
> +		if (layers & MLX5_FLOW_LAYER_OUTER_L2)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ITEM,
> +						  item,
> +						  "L2 layers already"
> +						  " configured");
> +		if (!mask)
> +			mask = &rte_flow_item_eth_mask;
> +		ret = mlx5_flow_item_validate(item, (const uint8_t *)mask,
> +					      (const uint8_t *)&nic_mask,
> +					      sizeof(struct rte_flow_item_eth),
> +					      error);
> +		if (ret)
> +			return ret;
> +	}
> +	mlx5_flow_layers_update(flow, MLX5_FLOW_LAYER_OUTER_L2);
>  	if (size > flow_size)
>  		return size;
>  	if (spec) {
> @@ -482,6 +735,7 @@ mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
>  		.tci = RTE_BE16(0x0fff),
>  	};
>  	unsigned int size = sizeof(struct ibv_flow_spec_eth);
> +	struct mlx5_flow_verbs *verbs = flow->cur_verbs;
>  	struct ibv_flow_spec_eth eth = {
>  		.type = IBV_FLOW_SPEC_ETH,
>  		.size = size,
> @@ -491,24 +745,30 @@ mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
>  			MLX5_FLOW_LAYER_OUTER_L4;
>  	const uint32_t vlanm = MLX5_FLOW_LAYER_OUTER_VLAN;
>  	const uint32_t l2m = MLX5_FLOW_LAYER_OUTER_L2;
> +	const uint32_t layers = mlx5_flow_layers(flow);
>  
> -	if (flow->layers & vlanm)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ITEM,
> -					  item,
> -					  "L2 layers already configured");
> -	else if ((flow->layers & lm) != 0)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ITEM,
> -					  item,
> -					  "L2 layer cannot follow L3/L4 layer");
> -	if (!mask)
> -		mask = &rte_flow_item_vlan_mask;
> -	ret = mlx5_flow_item_validate(item, (const uint8_t *)mask,
> -				      (const uint8_t *)&nic_mask,
> -				      sizeof(struct rte_flow_item_vlan), error);
> -	if (ret)
> -		return ret;
> +	if (!flow->expand) {
> +		if (layers & vlanm)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ITEM,
> +						  item,
> +						  "L2 layers already"
> +						  " configured");
> +		else if ((layers & lm) != 0)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ITEM,
> +						  item,
> +						  "L2 layer cannot follow"
> +						  " L3/L4 layer");
> +		if (!mask)
> +			mask = &rte_flow_item_vlan_mask;
> +		ret = mlx5_flow_item_validate(item, (const uint8_t *)mask,
> +					      (const uint8_t *)&nic_mask,
> +					      sizeof(struct rte_flow_item_vlan),
> +					      error);
> +		if (ret)
> +			return ret;
> +	}
>  	if (spec) {
>  		eth.val.vlan_tag = spec->tci;
>  		eth.mask.vlan_tag = mask->tci;
> @@ -517,32 +777,34 @@ mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
>  		eth.mask.ether_type = mask->inner_type;
>  		eth.val.ether_type &= eth.mask.ether_type;
>  	}
> -	/*
> -	 * From verbs perspective an empty VLAN is equivalent
> -	 * to a packet without VLAN layer.
> -	 */
> -	if (!eth.mask.vlan_tag)
> -		return rte_flow_error_set(error, EINVAL,
> -					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
> -					  item->spec,
> -					  "VLAN cannot be empty");
> -	/* Outer TPID cannot be matched. */
> -	if (eth.mask.ether_type)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
> -					  item->spec,
> -					  "VLAN TPID matching is not"
> -					  " supported");
> -	if (!(flow->layers & l2m)) {
> +	if (!flow->expand) {
> +		/*
> +		 * From verbs perspective an empty VLAN is equivalent
> +		 * to a packet without VLAN layer.
> +		 */
> +		if (!eth.mask.vlan_tag)
> +			return rte_flow_error_set(error, EINVAL,
> +						  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
> +						  item->spec,
> +						  "VLAN cannot be empty");
> +		/* Outer TPID cannot be matched. */
> +		if (eth.mask.ether_type)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
> +						  item->spec,
> +						  "VLAN TPID matching is not"
> +						  " supported");
> +	}
> +	if (!(layers & l2m)) {
>  		if (size <= flow_size)
>  			mlx5_flow_spec_verbs_add(flow, &eth, size);
>  	} else {
> -		if (flow->verbs.attr)
> -			mlx5_flow_item_vlan_update(flow->verbs.attr, &eth);
> +		if (verbs->attr)
> +			mlx5_flow_item_vlan_update(verbs->attr, &eth);
>  		size = 0; /**< Only an update is done in eth specification. */
>  	}
> -	flow->layers |= MLX5_FLOW_LAYER_OUTER_L2 |
> -		MLX5_FLOW_LAYER_OUTER_VLAN;
> +	mlx5_flow_layers_update(flow, MLX5_FLOW_LAYER_OUTER_L2 |
> +				MLX5_FLOW_LAYER_OUTER_VLAN);
>  	return size;
>  }
>  
> @@ -582,25 +844,31 @@ mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
>  		.size = size,
>  	};
>  	int ret;
> +	const uint32_t layers = mlx5_flow_layers(flow);
>  
> -	if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ITEM,
> -					  item,
> -					  "multiple L3 layers not supported");
> -	else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ITEM,
> -					  item,
> -					  "L3 cannot follow an L4 layer.");
> -	if (!mask)
> -		mask = &rte_flow_item_ipv4_mask;
> -	ret = mlx5_flow_item_validate(item, (const uint8_t *)mask,
> -				      (const uint8_t *)&nic_mask,
> -				      sizeof(struct rte_flow_item_ipv4), error);
> -	if (ret < 0)
> -		return ret;
> -	flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
> +	if (!flow->expand) {
> +		if (layers & MLX5_FLOW_LAYER_OUTER_L3)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ITEM,
> +						  item,
> +						  "multiple L3 layers not"
> +						  " supported");
> +		else if (layers & MLX5_FLOW_LAYER_OUTER_L4)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ITEM,
> +						  item,
> +						  "L3 cannot follow an L4"
> +						  " layer");
> +		if (!mask)
> +			mask = &rte_flow_item_ipv4_mask;
> +		ret = mlx5_flow_item_validate(item, (const uint8_t *)mask,
> +					      (const uint8_t *)&nic_mask,
> +					      sizeof(struct rte_flow_item_ipv4),
> +					      error);
> +		if (ret < 0)
> +			return ret;
> +	}
> +	mlx5_flow_layers_update(flow, MLX5_FLOW_LAYER_OUTER_L3_IPV4);
>  	if (size > flow_size)
>  		return size;
>  	if (spec) {
> @@ -667,25 +935,31 @@ mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
>  		.size = size,
>  	};
>  	int ret;
> +	const uint32_t layers = mlx5_flow_layers(flow);
>  
> -	if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ITEM,
> -					  item,
> -					  "multiple L3 layers not supported");
> -	else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ITEM,
> -					  item,
> -					  "L3 cannot follow an L4 layer.");
> -	if (!mask)
> -		mask = &rte_flow_item_ipv6_mask;
> -	ret = mlx5_flow_item_validate(item, (const uint8_t *)mask,
> -				      (const uint8_t *)&nic_mask,
> -				      sizeof(struct rte_flow_item_ipv6), error);
> -	if (ret < 0)
> -		return ret;
> -	flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
> +	if (!flow->expand) {
> +		if (layers & MLX5_FLOW_LAYER_OUTER_L3)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ITEM,
> +						  item,
> +						  "multiple L3 layers not"
> +						  " supported");
> +		else if (layers & MLX5_FLOW_LAYER_OUTER_L4)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ITEM,
> +						  item,
> +						  "L3 cannot follow an L4"
> +						  " layer");
> +		if (!mask)
> +			mask = &rte_flow_item_ipv6_mask;
> +		ret = mlx5_flow_item_validate(item, (const uint8_t *)mask,
> +					      (const uint8_t *)&nic_mask,
> +					      sizeof(struct rte_flow_item_ipv6),
> +					      error);
> +		if (ret < 0)
> +			return ret;
> +	}
> +	mlx5_flow_layers_update(flow, MLX5_FLOW_LAYER_OUTER_L3_IPV6);
>  	if (size > flow_size)
>  		return size;
>  	if (spec) {
> @@ -759,25 +1033,31 @@ mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
>  		.size = size,
>  	};
>  	int ret;
> +	const uint32_t layers = mlx5_flow_layers(flow);
>  
> -	if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ITEM,
> -					  item,
> -					  "L3 is mandatory to filter on L4");
> -	if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ITEM,
> -					  item,
> -					  "L4 layer is already present");
> -	if (!mask)
> -		mask = &rte_flow_item_udp_mask;
> -	ret = mlx5_flow_item_validate(item, (const uint8_t *)mask,
> -				      (const uint8_t *)&rte_flow_item_udp_mask,
> -				      sizeof(struct rte_flow_item_udp), error);
> -	if (ret < 0)
> -		return ret;
> -	flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> +	if (!flow->expand) {
> +		if (!(layers & MLX5_FLOW_LAYER_OUTER_L3))
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ITEM,
> +						  item,
> +						  "L3 is mandatory to filter"
> +						  " on L4");
> +		if (layers & MLX5_FLOW_LAYER_OUTER_L4)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ITEM,
> +						  item,
> +						  "L4 layer is already"
> +						  " present");
> +		if (!mask)
> +			mask = &rte_flow_item_udp_mask;
> +		ret = mlx5_flow_item_validate
> +			(item, (const uint8_t *)mask,
> +			 (const uint8_t *)&rte_flow_item_udp_mask,
> +			 sizeof(struct rte_flow_item_udp), error);
> +		if (ret < 0)
> +			return ret;
> +	}
> +	mlx5_flow_layers_update(flow, MLX5_FLOW_LAYER_OUTER_L4_UDP);
>  	if (size > flow_size)
>  		return size;
>  	if (spec) {
> @@ -821,25 +1101,31 @@ mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
>  		.size = size,
>  	};
>  	int ret;
> +	const uint32_t layers = mlx5_flow_layers(flow);
>  
> -	if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ITEM,
> -					  item,
> -					  "L3 is mandatory to filter on L4");
> -	if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ITEM,
> -					  item,
> -					  "L4 layer is already present");
> -	if (!mask)
> -		mask = &rte_flow_item_tcp_mask;
> -	ret = mlx5_flow_item_validate(item, (const uint8_t *)mask,
> -				      (const uint8_t *)&rte_flow_item_tcp_mask,
> -				      sizeof(struct rte_flow_item_tcp), error);
> -	if (ret < 0)
> -		return ret;
> -	flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> +	if (!flow->expand) {
> +		if (!(layers & MLX5_FLOW_LAYER_OUTER_L3))
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ITEM,
> +						  item,
> +						  "L3 is mandatory to filter"
> +						  " on L4");
> +		if (layers & MLX5_FLOW_LAYER_OUTER_L4)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ITEM,
> +						  item,
> +						  "L4 layer is already"
> +						  " present");
> +		if (!mask)
> +			mask = &rte_flow_item_tcp_mask;
> +		ret = mlx5_flow_item_validate
> +			(item, (const uint8_t *)mask,
> +			 (const uint8_t *)&rte_flow_item_tcp_mask,
> +			 sizeof(struct rte_flow_item_tcp), error);
> +		if (ret < 0)
> +			return ret;
> +	}
> +	mlx5_flow_layers_update(flow, MLX5_FLOW_LAYER_OUTER_L4_TCP);
>  	if (size > flow_size)
>  		return size;
>  	if (spec) {
> @@ -954,18 +1240,20 @@ mlx5_flow_action_drop(const struct rte_flow_action *actions,
>  			.size = size,
>  	};
>  
> -	if (flow->fate)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ACTION,
> -					  actions,
> -					  "multiple fate actions are not"
> -					  " supported");
> -	if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ACTION,
> -					  actions,
> -					  "drop is not compatible with"
> -					  " flag/mark action");
> +	if (!flow->expand) {
> +		if (flow->fate)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "multiple fate actions are"
> +						  " not supported");
> +		if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "drop is not compatible with"
> +						  " flag/mark action");
> +	}
>  	if (size < flow_size)
>  		mlx5_flow_spec_verbs_add(flow, &drop, size);
>  	flow->fate |= MLX5_FLOW_FATE_DROP;
> @@ -998,6 +1286,8 @@ mlx5_flow_action_queue(struct rte_eth_dev *dev,
>  	struct priv *priv = dev->data->dev_private;
>  	const struct rte_flow_action_queue *queue = actions->conf;
>  
> +	if (flow->expand)
> +		return 0;
>  	if (flow->fate)
>  		return rte_flow_error_set(error, ENOTSUP,
>  					  RTE_FLOW_ERROR_TYPE_ACTION,
> @@ -1014,11 +1304,162 @@ mlx5_flow_action_queue(struct rte_eth_dev *dev,
>  					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
>  					  &queue->index,
>  					  "queue is not configured");
> -	flow->queue = queue->index;
> +	if (flow->queue)
> +		(*flow->queue)[0] = queue->index;
> +	flow->rss.queue_num = 1;
>  	flow->fate |= MLX5_FLOW_FATE_QUEUE;
>  	return 0;
>  }
>  
> +/**
> + * Store the Verbs hash fields and priority according to the layer and types.
> + *
> + * @param dev
> + *   Pointer to Ethernet device.
> + * @param flow
> + *   Pointer to flow structure.
> + * @param types
> + *   RSS types for this flow (see ETH_RSS_*).
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +mlx5_flow_action_rss_verbs_attr(struct rte_eth_dev *dev, struct rte_flow *flow,
> +				uint32_t types)
> +{
> +	const uint32_t layers = mlx5_flow_layers(flow);
> +	uint64_t hash_fields;
> +	uint32_t priority;
> +
> +	if ((types & ETH_RSS_NONFRAG_IPV4_TCP) &&
> +	    (layers & MLX5_FLOW_LAYER_OUTER_L4_TCP)) {
> +		hash_fields = IBV_RX_HASH_SRC_IPV4 |
> +			IBV_RX_HASH_DST_IPV4 |
> +			IBV_RX_HASH_SRC_PORT_TCP |
> +			IBV_RX_HASH_DST_PORT_TCP;
> +		priority = 0;
> +	} else if ((types & ETH_RSS_NONFRAG_IPV4_UDP) &&
> +		 (layers & MLX5_FLOW_LAYER_OUTER_L4_UDP)) {
> +		hash_fields = IBV_RX_HASH_SRC_IPV4 |
> +			IBV_RX_HASH_DST_IPV4 |
> +			IBV_RX_HASH_SRC_PORT_UDP |
> +			IBV_RX_HASH_DST_PORT_UDP;
> +		priority = 0;
> +	} else if ((types & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4)) &&
> +		 (layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4)) {
> +		hash_fields = IBV_RX_HASH_SRC_IPV4 |
> +			IBV_RX_HASH_DST_IPV4;
> +		priority = 1;
> +	} else if ((types & ETH_RSS_NONFRAG_IPV6_TCP) &&
> +		 (layers & MLX5_FLOW_LAYER_OUTER_L4_TCP)) {
> +		hash_fields = IBV_RX_HASH_SRC_IPV6 |
> +			IBV_RX_HASH_DST_IPV6 |
> +			IBV_RX_HASH_SRC_PORT_TCP |
> +			IBV_RX_HASH_DST_PORT_TCP;
> +		priority = 0;
> +	} else if ((types & ETH_RSS_NONFRAG_IPV6_UDP) &&
> +		 (layers & MLX5_FLOW_LAYER_OUTER_L3_IPV6)) {
> +		hash_fields = IBV_RX_HASH_SRC_IPV6 |
> +			IBV_RX_HASH_DST_IPV6 |
> +			IBV_RX_HASH_SRC_PORT_UDP |
> +			IBV_RX_HASH_DST_PORT_UDP;
> +		priority = 0;
> +	} else if ((types & (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6)) &&
> +		 (layers & MLX5_FLOW_LAYER_OUTER_L3_IPV6)) {
> +		hash_fields = IBV_RX_HASH_SRC_IPV6 |
> +			IBV_RX_HASH_DST_IPV6;
> +		priority = 1;
> +	} else {
> +		hash_fields = 0;
> +		priority = 2;

How about 
		delta = MLX5_SUB_PRIORITY_2;

> +	}
> +	flow->cur_verbs->hash_fields = hash_fields;
> +	flow->cur_verbs->attr->priority =
> +		mlx5_flow_priority(dev, flow->attributes.priority, priority);
> +	return 0;
> +}
> +
> +/**
> + * Validate action queue provided by the user.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param actions
> + *   Pointer to flow actions array.
> + * @param flow
> + *   Pointer to the rte_flow structure.
> + * @param error
> + *   Pointer to error structure.

Missing return value.

> + */
> +static int
> +mlx5_flow_action_rss(struct rte_eth_dev *dev,
> +		     const struct rte_flow_action *actions,
> +		     struct rte_flow *flow,
> +		     struct rte_flow_error *error)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +	const struct rte_flow_action_rss *rss = actions->conf;
> +	unsigned int i;
> +
> +	if (flow->expand)
> +		return 0;
> +	if (flow->fate)
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ACTION,
> +					  actions,
> +					  "multiple fate actions are not"
> +					  " supported");
> +	if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
> +	    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> +					  &rss->func,
> +					  "RSS hash function not supported");
> +	if (rss->level > 1)
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> +					  &rss->level,
> +					  "tunnel RSS is not supported");
> +	if (rss->key_len < rss_hash_default_key_len)
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> +					  &rss->key_len,
> +					  "RSS hash key too small");
> +	if (rss->key_len > rss_hash_default_key_len)
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> +					  &rss->key_len,
> +					  "RSS hash key too large");
> +	if (rss->queue_num > priv->config.ind_table_max_size)
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> +					  &rss->queue_num,
> +					  "number of queues too large");
> +	if (rss->types & MLX5_RSS_HF_MASK)
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> +					  &rss->types,
> +					  "some RSS protocols are not"
> +					  " supported");
> +	for (i = 0; i != rss->queue_num; ++i) {
> +		if (!(*priv->rxqs)[rss->queue[i]])
> +			return rte_flow_error_set
> +				(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> +				 &rss->queue[i],
> +				 "queue is not configured");
> +	}
> +	if (flow->queue)
> +		memcpy((*flow->queue), rss->queue,
> +		       rss->queue_num * sizeof(uint16_t));
> +	flow->rss.queue_num = rss->queue_num;
> +	memcpy(flow->key, rss->key, rss_hash_default_key_len);
> +	flow->rss.types = rss->types;
> +	flow->fate |= MLX5_FLOW_FATE_RSS;
> +	return 0;
> +}
> +
>  /**
>   * Validate action flag provided by the user.
>   *
> @@ -1046,43 +1487,59 @@ mlx5_flow_action_flag(const struct rte_flow_action *actions,
>  		.size = size,
>  		.tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
>  	};
> +	struct mlx5_flow_verbs *verbs = flow->cur_verbs;
>  
> -	if (flow->modifier & MLX5_FLOW_MOD_FLAG)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ACTION,
> -					  actions,
> -					  "flag action already present");
> -	if (flow->fate & MLX5_FLOW_FATE_DROP)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ACTION,
> -					  actions,
> -					  "flag is not compatible with drop"
> -					  " action");
> -	if (flow->modifier & MLX5_FLOW_MOD_MARK)
> -		return 0;
> +	if (!flow->expand) {
> +		if (flow->modifier & MLX5_FLOW_MOD_FLAG)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "flag action already present");
> +		if (flow->fate & MLX5_FLOW_FATE_DROP)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "flag is not compatible with"
> +						  " drop action");
> +	}
> +	/*
> +	 * The two only possible cases, a mark has already been added in the
> +	 * specification, in such case, the flag is already present in
> +	 * addition of the mark.
> +	 * Second case, has it is not possible to have two flags, it just
> +	 * needs to add it.
> +	 */

Can you rephrase the 'second case'? Maybe 'has' -> 'as'?

> +	if (verbs) {
> +		verbs->modifier |= MLX5_FLOW_MOD_FLAG;
> +		if (verbs->modifier & MLX5_FLOW_MOD_MARK)
> +			size = 0;
> +		else if (size <= flow_size)
> +			mlx5_flow_spec_verbs_add(flow, &tag, size);
> +	} else {
> +		if (flow->modifier & MLX5_FLOW_MOD_MARK)
> +			size = 0;
> +	}
>  	flow->modifier |= MLX5_FLOW_MOD_FLAG;
> -	if (size <= flow_size)
> -		mlx5_flow_spec_verbs_add(flow, &tag, size);
>  	return size;
>  }
>  
>  /**
>   * Update verbs specification to modify the flag to mark.
>   *
> - * @param flow
> - *   Pointer to the rte_flow structure.
> + * @param verbs
> + *   Pointer to the mlx5_flow_verbs structure.
>   * @param mark_id
>   *   Mark identifier to replace the flag.
>   */
>  static void
> -mlx5_flow_verbs_mark_update(struct rte_flow *flow, uint32_t mark_id)
> +mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
>  {
>  	struct ibv_spec_header *hdr;
>  	int i;
>  
>  	/* Update Verbs specification. */
> -	hdr = (struct ibv_spec_header *)flow->verbs.specs;
> -	for (i = 0; i != flow->verbs.attr->num_of_specs; ++i) {
> +	hdr = (struct ibv_spec_header *)verbs->specs;
> +	for (i = 0; i != verbs->attr->num_of_specs; ++i) {
>  		if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
>  			struct ibv_flow_spec_action_tag *t =
>  				(struct ibv_flow_spec_action_tag *)hdr;
> @@ -1120,38 +1577,52 @@ mlx5_flow_action_mark(const struct rte_flow_action *actions,
>  		.type = IBV_FLOW_SPEC_ACTION_TAG,
>  		.size = size,
>  	};
> +	struct mlx5_flow_verbs *verbs = flow->cur_verbs;
>  
> -	if (!mark)
> -		return rte_flow_error_set(error, EINVAL,
> -					  RTE_FLOW_ERROR_TYPE_ACTION,
> -					  actions,
> -					  "configuration cannot be null");
> -	if (mark->id >= MLX5_FLOW_MARK_MAX)
> -		return rte_flow_error_set(error, EINVAL,
> -					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> -					  &mark->id,
> -					  "mark must be between 0 and"
> -					  " 16777199");
> -	if (flow->modifier & MLX5_FLOW_MOD_MARK)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ACTION,
> -					  actions,
> -					  "mark action already present");
> -	if (flow->fate & MLX5_FLOW_FATE_DROP)
> -		return rte_flow_error_set(error, ENOTSUP,
> -					  RTE_FLOW_ERROR_TYPE_ACTION,
> -					  actions,
> -					  "mark is not compatible with drop"
> -					  " action");
> -	if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
> -		mlx5_flow_verbs_mark_update(flow, mark->id);
> -		size = 0; /**< Only an update is done in the specification. */
> -	} else {
> -		tag.tag_id = mlx5_flow_mark_set(mark->id);
> -		if (size <= flow_size) {
> +	if (!flow->expand) {
> +		if (!mark)
> +			return rte_flow_error_set(error, EINVAL,
> +						  RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "configuration cannot be"
> +						  " null");
> +		if (mark->id >= MLX5_FLOW_MARK_MAX)
> +			return rte_flow_error_set
> +				(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> +				 &mark->id,
> +				 "mark must be between 0 and 16777199");
> +		if (flow->modifier & MLX5_FLOW_MOD_MARK)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "mark action already"
> +						  " present");
> +		if (flow->fate & MLX5_FLOW_FATE_DROP)
> +			return rte_flow_error_set(error, ENOTSUP,
> +						  RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "mark is not compatible with"
> +						  " drop action");
> +	}
> +	/*
> +	 * The two only possible cases, a flag has already been added in the
> +	 * specification, in such case, it needs to be update to add the id.
> +	 * Second case, has it is not possible to have two mark, it just
> +	 * needs to add it.
> +	 */

Can you rephrase the 'second case'? Maybe 'has' -> 'as'?

> +	if (verbs) {
> +		verbs->modifier |= MLX5_FLOW_MOD_MARK;
> +		if (verbs->modifier & MLX5_FLOW_MOD_FLAG) {
> +			mlx5_flow_verbs_mark_update(verbs, mark->id);
> +			size = 0;
> +		} else if (size <= flow_size) {

If verbs isn't null (not testing call), isn't it guaranteed there's enough
space? Is it still needed to check the size?

>  			tag.tag_id = mlx5_flow_mark_set(mark->id);
>  			mlx5_flow_spec_verbs_add(flow, &tag, size);
>  		}
> +	} else {
> +		if (flow->modifier & MLX5_FLOW_MOD_FLAG)
> +			size = 0;
>  	}
>  	flow->modifier |= MLX5_FLOW_MOD_MARK;
>  	return size;
> @@ -1185,6 +1656,15 @@ mlx5_flow_actions(struct rte_eth_dev *dev,
>  	int remain = flow_size;
>  	int ret = 0;
>  
> +	/*
> +	 * FLAG/MARK are the only actions having a specification in Verbs and
> +	 * not making part of the packet fate.  Due to this specificity and to
> +	 * avoid extra variable, their bit in the flow->modifier bit-field are
> +	 * disabled here to compute the exact necessary memory those action
> +	 * needs.
> +	 */
> +	flow->modifier &= ~(MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK);

Can't understand this well. Is this for the case where the flow is expanded? If
so, why don't you reset flow->modifier in the for loop of mlx5_flow_merge()?

> +	/* Process the actions. */
>  	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
>  		switch (actions->type) {
>  		case RTE_FLOW_ACTION_TYPE_VOID:
> @@ -1204,6 +1684,9 @@ mlx5_flow_actions(struct rte_eth_dev *dev,
>  		case RTE_FLOW_ACTION_TYPE_QUEUE:
>  			ret = mlx5_flow_action_queue(dev, actions, flow, error);
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_RSS:
> +			ret = mlx5_flow_action_rss(dev, actions, flow, error);
> +			break;
>  		default:
>  			return rte_flow_error_set(error, ENOTSUP,
>  						  RTE_FLOW_ERROR_TYPE_ACTION,
> @@ -1257,27 +1740,92 @@ mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
>  		struct rte_flow_error *error)
>  {
>  	struct rte_flow local_flow = { .layers = 0, };
> -	size_t size = sizeof(*flow) + sizeof(struct ibv_flow_attr);
> +	size_t size = sizeof(*flow);
>  	int remain = (flow_size > size) ? flow_size - size : 0;
> +	struct rte_flow_expand_rss *buf;
>  	int ret;
> +	uint32_t i;
>  
>  	if (!remain)
>  		flow = &local_flow;
>  	ret = mlx5_flow_attributes(dev, attr, flow, error);
>  	if (ret < 0)
>  		return ret;
> -	ret = mlx5_flow_items(items, flow, remain, error);
> -	if (ret < 0)
> -		return ret;
> -	size += ret;
> -	remain = (flow_size > size) ? flow_size - size : 0;
> -	ret = mlx5_flow_actions(dev, actions, flow, remain, error);
> +	ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
>  	if (ret < 0)
>  		return ret;
> -	size += ret;
> +	ret = rte_flow_expand_rss(NULL, 0, items, local_flow.rss.types,
> +				  mlx5_support_expansion,
> +				  local_flow.rss.level < 2 ?
> +				  MLX5_EXPANSION_ROOT : MLX5_EXPANSION_ROOT2);
> +	assert(ret > 0);
> +	buf = rte_calloc(__func__, 1, ret, 0);
> +	if (!buf) {
> +		rte_flow_error_set(error, ENOMEM,
> +				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +				   NULL,
> +				   "not enough memory to expand the RSS flow");
> +		goto error;
> +	}

I'm pretty sure you've already fixed this bug. Validation can't return ENOMEM.

> +	ret = rte_flow_expand_rss(buf, ret, items, local_flow.rss.types,
> +				  mlx5_support_expansion,
> +				  local_flow.rss.level < 2 ?
> +				  MLX5_EXPANSION_ROOT : MLX5_EXPANSION_ROOT2);
> +	assert(ret > 0);
> +	size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
> +			       sizeof(void *));
>  	if (size <= flow_size)
> -		flow->verbs.attr->priority = flow->attributes.priority;
> +		flow->queue = (void *)(flow + 1);
> +	LIST_INIT(&flow->verbs);
> +	flow->layers = 0;
> +	flow->modifier = 0;
> +	flow->fate = 0;
> +	for (i = 0; i != buf->entries; ++i) {
> +		size_t off = size;
> +
> +		size += sizeof(struct ibv_flow_attr) +
> +			sizeof(struct mlx5_flow_verbs);
> +		remain = (flow_size > size) ? flow_size - size : 0;
> +		if (remain) {
> +			flow->cur_verbs = (void *)((uintptr_t)flow + off);
> +			flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
> +			flow->cur_verbs->specs =
> +				(void *)(flow->cur_verbs->attr + 1);
> +		}
> +		ret = mlx5_flow_items
> +			((const struct rte_flow_item *)buf->patterns[i],
> +			 flow, remain, error);
> +		if (ret < 0)
> +			goto error;
> +		size += ret;
> +		if (remain > ret)
> +			remain -= ret;
> +		else
> +			remain = 0;
> +		ret = mlx5_flow_actions(dev, actions, flow, remain, error);
> +		if (ret < 0)
> +			goto error;
> +		size += ret;
> +		if (remain > ret)
> +			remain -= ret;
> +		else
> +			remain = 0;
> +		if (size <= flow_size) {
> +			flow->cur_verbs->attr->priority =
> +				flow->attributes.priority;
> +			ret = mlx5_flow_action_rss_verbs_attr(dev, flow,
> +							      flow->rss.types);
> +			if (ret < 0)
> +				goto error;
> +			LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
> +		}
> +		flow->expand = !!(buf->entries > 1);
> +	}
> +	rte_free(buf);
>  	return size;
> +error:
> +	rte_free(buf);
> +	return ret;
>  }
>  
>  /**
> @@ -1292,9 +1840,13 @@ static void
>  mlx5_flow_rxq_mark(struct rte_eth_dev *dev, struct rte_flow *flow)
>  {
>  	struct priv *priv = dev->data->dev_private;
> +	const uint32_t mask = MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK;
> +	uint32_t i;
>  
> -	(*priv->rxqs)[flow->queue]->mark |=
> -		flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK);
> +	if (!(flow->modifier & mask))
> +		return;
> +	for (i = 0; i != flow->rss.queue_num; ++i)
> +		(*priv->rxqs)[(*flow->queue)[i]]->mark = 1;
>  }
>  
>  /**
> @@ -1328,18 +1880,20 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
>  static void
>  mlx5_flow_fate_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
>  {
> -	if (flow->fate & MLX5_FLOW_FATE_DROP) {
> -		if (flow->verbs.flow) {
> -			claim_zero(mlx5_glue->destroy_flow(flow->verbs.flow));
> -			flow->verbs.flow = NULL;
> +	struct mlx5_flow_verbs *verbs;
> +
> +	LIST_FOREACH(verbs, &flow->verbs, next) {
> +		if (verbs->flow) {
> +			claim_zero(mlx5_glue->destroy_flow(verbs->flow));
> +			verbs->flow = NULL;
> +		}
> +		if (verbs->hrxq) {
> +			if (flow->fate & MLX5_FLOW_FATE_DROP)
> +				mlx5_hrxq_drop_release(dev, verbs->hrxq);
> +			else
> +				mlx5_hrxq_release(dev, verbs->hrxq);
> +			verbs->hrxq = NULL;
>  		}
> -	}
> -	if (flow->verbs.hrxq) {
> -		if (flow->fate & MLX5_FLOW_FATE_DROP)
> -			mlx5_hrxq_drop_release(dev, flow->verbs.hrxq);
> -		else if (flow->fate & MLX5_FLOW_FATE_QUEUE)
> -			mlx5_hrxq_release(dev, flow->verbs.hrxq);
> -		flow->verbs.hrxq = NULL;
>  	}
>  }
>  
> @@ -1360,46 +1914,68 @@ static int
>  mlx5_flow_fate_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
>  		     struct rte_flow_error *error)
>  {
> -	if (flow->fate & MLX5_FLOW_FATE_DROP) {
> -		flow->verbs.hrxq = mlx5_hrxq_drop_new(dev);
> -		if (!flow->verbs.hrxq)
> -			return rte_flow_error_set
> -				(error, errno,
> -				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> -				 NULL,
> -				 "cannot allocate Drop queue");
> -	} else if (flow->fate & MLX5_FLOW_FATE_QUEUE) {
> -		struct mlx5_hrxq *hrxq;
> -
> -		hrxq = mlx5_hrxq_get(dev, rss_hash_default_key,
> -				     rss_hash_default_key_len, 0,
> -				     &flow->queue, 1, 0, 0);
> -		if (!hrxq)
> -			hrxq = mlx5_hrxq_new(dev, rss_hash_default_key,
> -					     rss_hash_default_key_len, 0,
> -					     &flow->queue, 1, 0, 0);
> -		if (!hrxq)
> -			return rte_flow_error_set(error, rte_errno,
> -					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> -					NULL,
> -					"cannot create flow");
> -		flow->verbs.hrxq = hrxq;
> -	}
> -	flow->verbs.flow =
> -		mlx5_glue->create_flow(flow->verbs.hrxq->qp, flow->verbs.attr);
> -	if (!flow->verbs.flow) {
> -		if (flow->fate & MLX5_FLOW_FATE_DROP)
> -			mlx5_hrxq_drop_release(dev, flow->verbs.hrxq);
> -		else
> -			mlx5_hrxq_release(dev, flow->verbs.hrxq);
> -		flow->verbs.hrxq = NULL;
> -		return rte_flow_error_set(error, errno,
> -					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> -					  NULL,
> -					  "kernel module refuses to create"
> -					  " flow");
> +	struct mlx5_flow_verbs *verbs;
> +	int err;
> +
> +	LIST_FOREACH(verbs, &flow->verbs, next) {
> +		if (flow->fate & MLX5_FLOW_FATE_DROP) {
> +			verbs->hrxq = mlx5_hrxq_drop_new(dev);
> +			if (!verbs->hrxq) {
> +				rte_flow_error_set
> +					(error, errno,
> +					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +					 NULL,
> +					 "cannot get drop hash queue");
> +				goto error;
> +			}
> +		} else {
> +			struct mlx5_hrxq *hrxq;
> +
> +			hrxq = mlx5_hrxq_get(dev, flow->key,
> +					     rss_hash_default_key_len,
> +					     verbs->hash_fields,
> +					     (*flow->queue),
> +					     flow->rss.queue_num, 0, 0);
> +			if (!hrxq)
> +				hrxq = mlx5_hrxq_new(dev, flow->key,
> +						     rss_hash_default_key_len,
> +						     verbs->hash_fields,
> +						     (*flow->queue),
> +						     flow->rss.queue_num, 0, 0);
> +			if (!hrxq) {
> +				rte_flow_error_set
> +					(error, rte_errno,
> +					 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +					 NULL,
> +					 "cannot get hash queue");
> +				goto error;
> +			}
> +			verbs->hrxq = hrxq;
> +		}
> +		verbs->flow =
> +			mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
> +		if (!verbs->flow) {
> +			rte_flow_error_set(error, errno,
> +					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +					   NULL,
> +					   "hardware refuses to create flow");
> +			goto error;
> +		}
>  	}
>  	return 0;
> +error:
> +	err = rte_errno; /* Save rte_errno before cleanup. */
> +	LIST_FOREACH(verbs, &flow->verbs, next) {
> +		if (verbs->hrxq) {
> +			if (flow->fate & MLX5_FLOW_FATE_DROP)
> +				mlx5_hrxq_drop_release(dev, verbs->hrxq);
> +			else
> +				mlx5_hrxq_release(dev, verbs->hrxq);
> +			verbs->hrxq = NULL;
> +		}
> +	}
> +	rte_errno = err; /* Restore rte_errno. */
> +	return -rte_errno;
>  }
>  
>  /**
> @@ -1429,42 +2005,43 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
>  		      const struct rte_flow_action actions[],
>  		      struct rte_flow_error *error)
>  {
> -	struct rte_flow *flow;
> -	size_t size;
> +	struct rte_flow *flow = NULL;
> +	size_t size = 0;
>  	int ret;
>  
> -	ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
> +	ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
>  	if (ret < 0)
>  		return NULL;
>  	size = ret;
> -	flow = rte_zmalloc(__func__, size, 0);
> +	flow = rte_calloc(__func__, 1, size, 0);
>  	if (!flow) {
>  		rte_flow_error_set(error, ENOMEM,
>  				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
>  				   NULL,
> -				   "cannot allocate memory");
> +				   "not enough memory to create flow");
>  		return NULL;
>  	}
> -	flow->verbs.attr = (struct ibv_flow_attr *)(flow + 1);
> -	flow->verbs.specs = (uint8_t *)(flow->verbs.attr + 1);
>  	ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
> -	if (ret < 0)
> -		goto error;
> +	if (ret < 0) {
> +		rte_free(flow);
> +		return NULL;
> +	}
>  	assert((size_t)ret == size);
>  	if (dev->data->dev_started) {
>  		ret = mlx5_flow_fate_apply(dev, flow, error);
> -		if (ret < 0)
> -			goto error;
> +		if (ret < 0) {
> +			ret = rte_errno; /* Save rte_errno before cleanup. */
> +			if (flow) {
> +				mlx5_flow_fate_remove(dev, flow);
> +				rte_free(flow);
> +			}
> +			rte_errno = ret; /* Restore rte_errno. */
> +			return NULL;
> +		}
>  	}
>  	mlx5_flow_rxq_mark(dev, flow);
>  	TAILQ_INSERT_TAIL(list, flow, next);
>  	return flow;
> -error:
> -	ret = rte_errno; /* Save rte_errno before cleanup. */
> -	mlx5_flow_fate_remove(dev, flow);
> -	rte_free(flow);
> -	rte_errno = ret; /* Restore rte_errno. */
> -	return NULL;
>  }
>  
>  /**
> @@ -1502,7 +2079,7 @@ mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
>  	struct priv *priv = dev->data->dev_private;
>  	struct rte_flow *rflow;
>  	const uint32_t mask = MLX5_FLOW_MOD_FLAG & MLX5_FLOW_MOD_MARK;
> -	int mark = 0;
> +	unsigned int i;
>  
>  	mlx5_flow_fate_remove(dev, flow);
>  	TAILQ_REMOVE(list, flow, next);
> @@ -1512,18 +2089,28 @@ mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
>  	}
>  	/*
>  	 * When a flow is removed and this flow has a flag/mark modifier, all
> -	 * flows needs to be parse to verify if the Rx queue use by the flow
> +	 * flows needs to be parse to verify if the Rx queues use by the flow
>  	 * still need to track the flag/mark request.
>  	 */
> -	TAILQ_FOREACH(rflow, &priv->flows, next) {
> -		if (!(rflow->modifier & mask))
> -			continue;
> -		if (flow->queue == rflow->queue) {
> -			mark = 1;
> -			break;
> +	for (i = 0; i != flow->rss.queue_num; ++i) {
> +		int mark = 0;
> +
> +		TAILQ_FOREACH(rflow, &priv->flows, next) {
> +			unsigned int j;
> +
> +			if (!(rflow->modifier & mask))
> +				continue;
> +			for (j = 0; j != rflow->rss.queue_num; ++j) {
> +				if ((*flow->queue)[i] == (*rflow->queue)[j]) {
> +					mark = 1;
> +					break;
> +				}
> +			}
> +			if (mark)
> +				break;
>  		}
> +		(*priv->rxqs)[i]->mark = !!mark;
>  	}
> -	(*priv->rxqs)[flow->queue]->mark = !!mark;
>  	rte_free(flow);
>  }
>  
> @@ -1654,7 +2241,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
>  	struct priv *priv = dev->data->dev_private;
>  	const struct rte_flow_attr attr = {
>  		.ingress = 1,
> -		.priority = priv->config.flow_prio - 1,
> +		.priority = MLX5_FLOW_PRIO_RSVD,
>  	};
>  	struct rte_flow_item items[] = {
>  		{
> -- 
> 2.18.0
> 

  reply	other threads:[~2018-07-06  2:16 UTC|newest]

Thread overview: 118+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-28 11:21 [dpdk-dev] [DPDK 18.08 v1 00/12] net/mlx5: flow rework Nelio Laranjeiro
2018-05-28 11:21 ` [dpdk-dev] [DPDK 18.08 v1 01/12] net/mlx5: remove flow support Nelio Laranjeiro
2018-05-28 11:21 ` [dpdk-dev] [DPDK 18.08 v1 02/12] net/mlx5: handle drop queues are regular queues Nelio Laranjeiro
2018-05-28 11:21 ` [dpdk-dev] [DPDK 18.08 v1 03/12] net/mlx5: support flow Ethernet item among with drop action Nelio Laranjeiro
2018-05-28 11:21 ` [dpdk-dev] [DPDK 18.08 v1 04/12] net/mlx5: add flow queue action Nelio Laranjeiro
2018-05-28 11:21 ` [dpdk-dev] [DPDK 18.08 v1 05/12] net/mlx5: add flow stop/start Nelio Laranjeiro
2018-05-28 11:21 ` [dpdk-dev] [DPDK 18.08 v1 06/12] net/mlx5: add flow VLAN item Nelio Laranjeiro
2018-05-28 11:21 ` [dpdk-dev] [DPDK 18.08 v1 07/12] net/mlx5: add flow IPv4 item Nelio Laranjeiro
2018-05-28 11:21 ` [dpdk-dev] [DPDK 18.08 v1 08/12] net/mlx5: add flow IPv6 item Nelio Laranjeiro
2018-05-28 11:21 ` [dpdk-dev] [DPDK 18.08 v1 09/12] net/mlx5: add flow UDP item Nelio Laranjeiro
2018-05-28 11:21 ` [dpdk-dev] [DPDK 18.08 v1 10/12] net/mlx5: add flow TCP item Nelio Laranjeiro
2018-05-28 11:21 ` [dpdk-dev] [DPDK 18.08 v1 11/12] net/mlx5: add mark/flag flow action Nelio Laranjeiro
2018-05-28 11:21 ` [dpdk-dev] [DPDK 18.08 v1 12/12] net/mlx5: add RSS " Nelio Laranjeiro
2018-05-28 13:32 ` [dpdk-dev] [DPDK 18.08 v1 00/12] net/mlx5: flow rework Wiles, Keith
2018-05-28 13:47   ` Ferruh Yigit
2018-05-28 13:50   ` Nélio Laranjeiro
2018-06-27 15:07 ` [dpdk-dev] [PATCH v2 00/20] " Nelio Laranjeiro
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 01/20] net/mlx5: remove flow support Nelio Laranjeiro
2018-07-02 21:53     ` Yongseok Koh
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 02/20] net/mlx5: handle drop queues are regular queues Nelio Laranjeiro
2018-07-03  1:07     ` Yongseok Koh
2018-07-03  7:17       ` Nélio Laranjeiro
2018-07-03 17:05         ` Yongseok Koh
2018-07-04  6:44           ` Nélio Laranjeiro
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 03/20] net/mlx5: replace verbs priorities by flow Nelio Laranjeiro
2018-07-03  1:40     ` Yongseok Koh
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 04/20] net/mlx5: support flow Ethernet item among with drop action Nelio Laranjeiro
2018-07-03 22:27     ` Yongseok Koh
2018-07-04  9:24       ` Nélio Laranjeiro
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 05/20] net/mlx5: add flow queue action Nelio Laranjeiro
2018-07-03 23:00     ` Yongseok Koh
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 06/20] net/mlx5: add flow stop/start Nelio Laranjeiro
2018-07-03 23:08     ` Yongseok Koh
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 07/20] net/mlx5: add flow VLAN item Nelio Laranjeiro
2018-07-03 23:56     ` Yongseok Koh
2018-07-04 12:03       ` Nélio Laranjeiro
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 08/20] net/mlx5: add flow IPv4 item Nelio Laranjeiro
2018-07-04  0:12     ` Yongseok Koh
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 09/20] net/mlx5: add flow IPv6 item Nelio Laranjeiro
2018-07-04  0:16     ` Yongseok Koh
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 10/20] net/mlx5: add flow UDP item Nelio Laranjeiro
2018-07-04  0:17     ` Yongseok Koh
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 11/20] net/mlx5: add flow TCP item Nelio Laranjeiro
2018-07-04  0:18     ` Yongseok Koh
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 12/20] net/mlx5: add mark/flag flow action Nelio Laranjeiro
2018-07-04  8:34     ` Yongseok Koh
2018-07-05  8:47       ` Nélio Laranjeiro
2018-07-05 19:56         ` Yongseok Koh
2018-07-06  8:23           ` Nélio Laranjeiro
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 13/20] net/mlx5: add RSS " Nelio Laranjeiro
2018-07-06  2:16     ` Yongseok Koh [this message]
2018-07-06 15:59       ` Nélio Laranjeiro
2018-07-06 17:35         ` Yongseok Koh
2018-07-09 13:09           ` Nélio Laranjeiro
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 14/20] net/mlx5: remove useless arguments in hrxq API Nelio Laranjeiro
2018-07-06  2:18     ` Yongseok Koh
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 15/20] net/mlx5: support inner RSS computation Nelio Laranjeiro
2018-07-06  8:16     ` Yongseok Koh
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 16/20] net/mlx5: add flow VXLAN item Nelio Laranjeiro
2018-07-06 23:14     ` Yongseok Koh
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 17/20] net/mlx5: add flow VXLAN-GPE item Nelio Laranjeiro
2018-07-06 23:23     ` Yongseok Koh
2018-07-09 14:53       ` Nélio Laranjeiro
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 18/20] net/mlx5: add flow GRE item Nelio Laranjeiro
2018-07-06 23:46     ` Yongseok Koh
2018-07-09 13:58       ` Nélio Laranjeiro
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 19/20] net/mlx5: add flow MPLS item Nelio Laranjeiro
2018-07-07  0:11     ` Yongseok Koh
2018-07-09 15:00       ` Nélio Laranjeiro
2018-06-27 15:07   ` [dpdk-dev] [PATCH v2 20/20] net/mlx5: add count flow action Nelio Laranjeiro
2018-07-07  1:08     ` Yongseok Koh
2018-07-11  7:22   ` [dpdk-dev] [PATCH v3 00/21] net/mlx5: flow rework Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 01/21] net/mlx5: remove flow support Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 02/21] net/mlx5: handle drop queues as regular queues Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 03/21] net/mlx5: replace verbs priorities by flow Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 04/21] net/mlx5: support flow Ethernet item along with drop action Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 05/21] net/mlx5: add flow queue action Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 06/21] net/mlx5: add flow stop/start Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 07/21] net/mlx5: add flow VLAN item Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 08/21] net/mlx5: add flow IPv4 item Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 09/21] net/mlx5: add flow IPv6 item Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 10/21] net/mlx5: add flow UDP item Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 11/21] net/mlx5: add flow TCP item Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 12/21] net/mlx5: add mark/flag flow action Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 13/21] net/mlx5: use a macro for the RSS key size Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 14/21] net/mlx5: add RSS flow action Nelio Laranjeiro
2018-07-11 19:57       ` Yongseok Koh
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 15/21] net/mlx5: remove useless arguments in hrxq API Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 16/21] net/mlx5: support inner RSS computation Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 17/21] net/mlx5: add flow VXLAN item Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 18/21] net/mlx5: add flow VXLAN-GPE item Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 19/21] net/mlx5: add flow GRE item Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 20/21] net/mlx5: add flow MPLS item Nelio Laranjeiro
2018-07-11  7:22     ` [dpdk-dev] [PATCH v3 21/21] net/mlx5: add count flow action Nelio Laranjeiro
2018-07-11 20:00     ` [dpdk-dev] [PATCH v3 00/21] net/mlx5: flow rework Yongseok Koh
2018-07-12  9:30     ` [dpdk-dev] [PATCH v4 " Nelio Laranjeiro
2018-07-12  9:30       ` [dpdk-dev] [PATCH v4 01/21] net/mlx5: remove flow support Nelio Laranjeiro
2018-07-12  9:30       ` [dpdk-dev] [PATCH v4 02/21] net/mlx5: handle drop queues as regular queues Nelio Laranjeiro
2018-07-12  9:30       ` [dpdk-dev] [PATCH v4 03/21] net/mlx5: replace verbs priorities by flow Nelio Laranjeiro
2018-07-12  9:30       ` [dpdk-dev] [PATCH v4 04/21] net/mlx5: support flow Ethernet item along with drop action Nelio Laranjeiro
2018-07-12  9:30       ` [dpdk-dev] [PATCH v4 05/21] net/mlx5: add flow queue action Nelio Laranjeiro
2018-07-12  9:30       ` [dpdk-dev] [PATCH v4 06/21] net/mlx5: add flow stop/start Nelio Laranjeiro
2018-07-12  9:30       ` [dpdk-dev] [PATCH v4 07/21] net/mlx5: add flow VLAN item Nelio Laranjeiro
2018-07-12  9:30       ` [dpdk-dev] [PATCH v4 08/21] net/mlx5: add flow IPv4 item Nelio Laranjeiro
2018-07-12  9:30       ` [dpdk-dev] [PATCH v4 09/21] net/mlx5: add flow IPv6 item Nelio Laranjeiro
2018-07-12  9:30       ` [dpdk-dev] [PATCH v4 10/21] net/mlx5: add flow UDP item Nelio Laranjeiro
2018-07-12  9:30       ` [dpdk-dev] [PATCH v4 11/21] net/mlx5: add flow TCP item Nelio Laranjeiro
2018-07-12  9:30       ` [dpdk-dev] [PATCH v4 12/21] net/mlx5: add mark/flag flow action Nelio Laranjeiro
2018-07-12  9:30       ` [dpdk-dev] [PATCH v4 13/21] net/mlx5: use a macro for the RSS key size Nelio Laranjeiro
2018-07-12  9:31       ` [dpdk-dev] [PATCH v4 14/21] net/mlx5: add RSS flow action Nelio Laranjeiro
2018-07-12  9:31       ` [dpdk-dev] [PATCH v4 15/21] net/mlx5: remove useless arguments in hrxq API Nelio Laranjeiro
2018-07-12  9:31       ` [dpdk-dev] [PATCH v4 16/21] net/mlx5: support inner RSS computation Nelio Laranjeiro
2018-07-12  9:31       ` [dpdk-dev] [PATCH v4 17/21] net/mlx5: add flow VXLAN item Nelio Laranjeiro
2018-07-12  9:31       ` [dpdk-dev] [PATCH v4 18/21] net/mlx5: add flow VXLAN-GPE item Nelio Laranjeiro
2018-07-12  9:31       ` [dpdk-dev] [PATCH v4 19/21] net/mlx5: add flow GRE item Nelio Laranjeiro
2018-07-12  9:31       ` [dpdk-dev] [PATCH v4 20/21] net/mlx5: add flow MPLS item Nelio Laranjeiro
2018-07-12  9:31       ` [dpdk-dev] [PATCH v4 21/21] net/mlx5: add count flow action Nelio Laranjeiro
2018-07-12 10:44       ` [dpdk-dev] [PATCH v4 00/21] net/mlx5: flow rework Shahaf Shuler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180706021630.GB47821@yongseok-MBP.local \
    --to=yskoh@mellanox.com \
    --cc=adrien.mazarguil@6wind.com \
    --cc=dev@dpdk.org \
    --cc=nelio.laranjeiro@6wind.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).