- * [dpdk-dev] [PATCH v1 01/21] net/mlx5: merge action and flow parser structure
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
       [not found] ` <cover.1501681913.git.nelio.laranjeiro@6wind.com>
                   ` (54 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
mlx5_flow_create() and mlx5_flow_validate() are making common checks.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 151 +++++++++++++++++++++----------------------
 1 file changed, 72 insertions(+), 79 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 7dd3ebb..00355f4 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -271,12 +271,23 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 	},
 };
 
+/* Structure to parse actions. */
+struct mlx5_flow_action {
+	uint32_t queue:1; /**< Target is a receive queue. */
+	uint32_t drop:1; /**< Target is a drop queue. */
+	uint32_t mark:1; /**< Mark is present in the flow. */
+	uint32_t mark_id; /**< Mark identifier. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
+	uint16_t queues_n; /**< Number of entries in queue[]. */
+};
+
 /** Structure to pass to the conversion function. */
-struct mlx5_flow {
+struct mlx5_flow_parse {
 	struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 	uint32_t inner; /**< Set once VXLAN is encountered. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
+	struct mlx5_flow_action actions; /**< Parsed action result. */
 };
 
 /** Structure for Drop queue. */
@@ -287,15 +298,6 @@ struct rte_flow_drop {
 	struct ibv_cq *cq; /**< Verbs completion queue. */
 };
 
-struct mlx5_flow_action {
-	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t drop:1; /**< Target is a drop queue. */
-	uint32_t mark:1; /**< Mark is present in the flow. */
-	uint32_t mark_id; /**< Mark identifier. */
-	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
-	uint16_t queues_n; /**< Number of entries in queue[]. */
-};
-
 /**
  * Check support for a given item.
  *
@@ -374,8 +376,6 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
  *   Perform verbose error reporting if not NULL.
  * @param[in, out] flow
  *   Flow structure to update.
- * @param[in, out] action
- *   Action structure to update.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
@@ -386,8 +386,7 @@ priv_flow_validate(struct priv *priv,
 		   const struct rte_flow_item items[],
 		   const struct rte_flow_action actions[],
 		   struct rte_flow_error *error,
-		   struct mlx5_flow *flow,
-		   struct mlx5_flow_action *action)
+		   struct mlx5_flow_parse *flow)
 {
 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
 
@@ -469,7 +468,7 @@ priv_flow_validate(struct priv *priv,
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
-			action->drop = 1;
+			flow->actions.drop = 1;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			const struct rte_flow_action_queue *queue =
 				(const struct rte_flow_action_queue *)
@@ -479,13 +478,13 @@ priv_flow_validate(struct priv *priv,
 
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
-			for (n = 0; n < action->queues_n; ++n) {
-				if (action->queues[n] == queue->index) {
+			for (n = 0; n < flow->actions.queues_n; ++n) {
+				if (flow->actions.queues[n] == queue->index) {
 					found = 1;
 					break;
 				}
 			}
-			if (action->queues_n > 1 && !found) {
+			if (flow->actions.queues_n > 1 && !found) {
 				rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
 					   actions,
@@ -493,9 +492,9 @@ priv_flow_validate(struct priv *priv,
 				return -rte_errno;
 			}
 			if (!found) {
-				action->queue = 1;
-				action->queues_n = 1;
-				action->queues[0] = queue->index;
+				flow->actions.queue = 1;
+				flow->actions.queues_n = 1;
+				flow->actions.queues[0] = queue->index;
 			}
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
 			const struct rte_flow_action_rss *rss =
@@ -510,12 +509,12 @@ priv_flow_validate(struct priv *priv,
 						   "no valid queues");
 				return -rte_errno;
 			}
-			if (action->queues_n == 1) {
+			if (flow->actions.queues_n == 1) {
 				uint16_t found = 0;
 
-				assert(action->queues_n);
+				assert(flow->actions.queues_n);
 				for (n = 0; n < rss->num; ++n) {
-					if (action->queues[0] ==
+					if (flow->actions.queues[0] ==
 					    rss->queue[n]) {
 						found = 1;
 						break;
@@ -540,10 +539,10 @@ priv_flow_validate(struct priv *priv,
 					return -rte_errno;
 				}
 			}
-			action->queue = 1;
+			flow->actions.queue = 1;
 			for (n = 0; n < rss->num; ++n)
-				action->queues[n] = rss->queue[n];
-			action->queues_n = rss->num;
+				flow->actions.queues[n] = rss->queue[n];
+			flow->actions.queues_n = rss->num;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
 			const struct rte_flow_action_mark *mark =
 				(const struct rte_flow_action_mark *)
@@ -563,19 +562,19 @@ priv_flow_validate(struct priv *priv,
 						   " and 16777199");
 				return -rte_errno;
 			}
-			action->mark = 1;
-			action->mark_id = mark->id;
+			flow->actions.mark = 1;
+			flow->actions.mark_id = mark->id;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
-			action->mark = 1;
+			flow->actions.mark = 1;
 		} else {
 			goto exit_action_not_supported;
 		}
 	}
-	if (action->mark && !flow->ibv_attr && !action->drop)
+	if (flow->actions.mark && !flow->ibv_attr && !flow->actions.drop)
 		flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
-	if (!flow->ibv_attr && action->drop)
+	if (!flow->ibv_attr && flow->actions.drop)
 		flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
-	if (!action->queue && !action->drop) {
+	if (!flow->actions.queue && !flow->actions.drop) {
 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "no valid action");
 		return -rte_errno;
@@ -606,18 +605,16 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
 {
 	struct priv *priv = dev->data->dev_private;
 	int ret;
-	struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
-	struct mlx5_flow_action action = {
-		.queue = 0,
-		.drop = 0,
-		.mark = 0,
-		.mark_id = MLX5_FLOW_MARK_DEFAULT,
-		.queues_n = 0,
+	struct mlx5_flow_parse flow = {
+		.offset = sizeof(struct ibv_exp_flow_attr),
+		.actions = {
+			.mark_id = MLX5_FLOW_MARK_DEFAULT,
+			.queues_n = 0,
+		},
 	};
 
 	priv_lock(priv);
-	ret = priv_flow_validate(priv, attr, items, actions, error, &flow,
-				 &action);
+	ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
 	priv_unlock(priv);
 	return ret;
 }
@@ -639,7 +636,7 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_eth *spec = item->spec;
 	const struct rte_flow_item_eth *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_exp_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
 	unsigned int i;
@@ -688,7 +685,7 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_vlan *spec = item->spec;
 	const struct rte_flow_item_vlan *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_exp_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
 
@@ -720,7 +717,7 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_ipv4 *spec = item->spec;
 	const struct rte_flow_item_ipv4 *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_exp_flow_spec_ipv4_ext *ipv4;
 	unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
 
@@ -774,7 +771,7 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_ipv6 *spec = item->spec;
 	const struct rte_flow_item_ipv6 *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_exp_flow_spec_ipv6_ext *ipv6;
 	unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
 	unsigned int i;
@@ -831,7 +828,7 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_udp *spec = item->spec;
 	const struct rte_flow_item_udp *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_exp_flow_spec_tcp_udp *udp;
 	unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
 
@@ -875,7 +872,7 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_tcp *spec = item->spec;
 	const struct rte_flow_item_tcp *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_exp_flow_spec_tcp_udp *tcp;
 	unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
 
@@ -919,7 +916,7 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_vxlan *spec = item->spec;
 	const struct rte_flow_item_vxlan *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_exp_flow_spec_tunnel *vxlan;
 	unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
 	union vni {
@@ -958,7 +955,7 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
  *   Mark identifier.
  */
 static int
-mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
+mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
 {
 	struct ibv_exp_flow_spec_action_tag *tag;
 	unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
@@ -988,7 +985,7 @@ mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
  */
 static struct rte_flow *
 priv_flow_create_action_queue_drop(struct priv *priv,
-				   struct mlx5_flow *flow,
+				   struct mlx5_flow_parse *flow,
 				   struct rte_flow_error *error)
 {
 	struct rte_flow *rte_flow;
@@ -1036,8 +1033,6 @@ priv_flow_create_action_queue_drop(struct priv *priv,
  *   Pointer to private structure.
  * @param flow
  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
- * @param action
- *   Target action structure.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
@@ -1046,50 +1041,50 @@ priv_flow_create_action_queue_drop(struct priv *priv,
  */
 static struct rte_flow *
 priv_flow_create_action_queue(struct priv *priv,
-			      struct mlx5_flow *flow,
-			      struct mlx5_flow_action *action,
+			      struct mlx5_flow_parse *flow,
 			      struct rte_flow_error *error)
 {
 	struct rte_flow *rte_flow;
 	unsigned int i;
 	unsigned int j;
-	const unsigned int wqs_n = 1 << log2above(action->queues_n);
+	const unsigned int wqs_n = 1 << log2above(flow->actions.queues_n);
 	struct ibv_exp_wq *wqs[wqs_n];
 
 	assert(priv->pd);
 	assert(priv->ctx);
-	assert(!action->drop);
+	assert(!flow->actions.drop);
 	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
-			      sizeof(*rte_flow->rxqs) * action->queues_n, 0);
+			      sizeof(*rte_flow->rxqs) * flow->actions.queues_n,
+			      0);
 	if (!rte_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	for (i = 0; i < action->queues_n; ++i) {
+	for (i = 0; i < flow->actions.queues_n; ++i) {
 		struct rxq_ctrl *rxq;
 
-		rxq = container_of((*priv->rxqs)[action->queues[i]],
+		rxq = container_of((*priv->rxqs)[flow->actions.queues[i]],
 				   struct rxq_ctrl, rxq);
 		wqs[i] = rxq->wq;
 		rte_flow->rxqs[i] = &rxq->rxq;
 		++rte_flow->rxqs_n;
-		rxq->rxq.mark |= action->mark;
+		rxq->rxq.mark |= flow->actions.mark;
 	}
 	/* finalise indirection table. */
 	for (j = 0; i < wqs_n; ++i, ++j) {
 		wqs[i] = wqs[j];
-		if (j == action->queues_n)
+		if (j == flow->actions.queues_n)
 			j = 0;
 	}
-	rte_flow->mark = action->mark;
+	rte_flow->mark = flow->actions.mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
 	rte_flow->hash_fields = flow->hash_fields;
 	rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
 		priv->ctx,
 		&(struct ibv_exp_rwq_ind_table_init_attr){
 			.pd = priv->pd,
-			.log_ind_tbl_size = log2above(action->queues_n),
+			.log_ind_tbl_size = log2above(flow->actions.queues_n),
 			.ind_tbl = wqs,
 			.comp_mask = 0,
 		});
@@ -1167,18 +1162,17 @@ priv_flow_create(struct priv *priv,
 		 struct rte_flow_error *error)
 {
 	struct rte_flow *rte_flow;
-	struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
-	struct mlx5_flow_action action = {
-		.queue = 0,
-		.drop = 0,
-		.mark = 0,
-		.mark_id = MLX5_FLOW_MARK_DEFAULT,
-		.queues_n = 0,
+	struct mlx5_flow_parse flow = {
+		.offset = sizeof(struct ibv_exp_flow_attr),
+		.actions = {
+			.mark_id = MLX5_FLOW_MARK_DEFAULT,
+			.queues = { 0 },
+			.queues_n = 0,
+		},
 	};
 	int err;
 
-	err = priv_flow_validate(priv, attr, items, actions, error, &flow,
-				 &action);
+	err = priv_flow_validate(priv, attr, items, actions, error, &flow);
 	if (err)
 		goto exit;
 	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
@@ -1200,17 +1194,16 @@ priv_flow_create(struct priv *priv,
 	flow.inner = 0;
 	flow.hash_fields = 0;
 	claim_zero(priv_flow_validate(priv, attr, items, actions,
-				      error, &flow, &action));
-	if (action.mark && !action.drop) {
-		mlx5_flow_create_flag_mark(&flow, action.mark_id);
+				      error, &flow));
+	if (flow.actions.mark && !flow.actions.drop) {
+		mlx5_flow_create_flag_mark(&flow, flow.actions.mark_id);
 		flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
 	}
-	if (action.drop)
+	if (flow.actions.drop)
 		rte_flow =
 			priv_flow_create_action_queue_drop(priv, &flow, error);
 	else
-		rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
-							 error);
+		rte_flow = priv_flow_create_action_queue(priv, &flow, error);
 	if (!rte_flow)
 		goto exit;
 	return rte_flow;
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- [parent not found: <cover.1501681913.git.nelio.laranjeiro@6wind.com>] 
- * [dpdk-dev] [PATCH v1] net/mlx5: support RSS hash configuration in generic flow action
       [not found] ` <cover.1501681913.git.nelio.laranjeiro@6wind.com>
@ 2017-08-02 14:10   ` Nelio Laranjeiro
  0 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
This also bring back the RSS support on all flows created by default from
the control plane.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 437 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 346 insertions(+), 91 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 8316255..fe21dac 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -87,12 +87,89 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 		       const void *default_mask,
 		       void *data);
 
+/* Hash RX queue types. */
+enum hash_rxq_type {
+	HASH_RXQ_TCPV4,
+	HASH_RXQ_UDPV4,
+	HASH_RXQ_IPV4,
+	HASH_RXQ_TCPV6,
+	HASH_RXQ_UDPV6,
+	HASH_RXQ_IPV6,
+	HASH_RXQ_ETH,
+};
+
+/* Initialization data for hash RX queue. */
+struct hash_rxq_init {
+	uint64_t hash_fields; /* Fields that participate in the hash. */
+	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
+	unsigned int flow_priority; /* Flow priority to use. */
+};
+
+/* Initialization data for hash RX queues. */
+const struct hash_rxq_init hash_rxq_init[] = {
+	[HASH_RXQ_TCPV4] = {
+		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
+				IBV_EXP_RX_HASH_DST_IPV4 |
+				IBV_EXP_RX_HASH_SRC_PORT_TCP |
+				IBV_EXP_RX_HASH_DST_PORT_TCP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
+		.flow_priority = 4,
+	},
+	[HASH_RXQ_UDPV4] = {
+		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
+				IBV_EXP_RX_HASH_DST_IPV4 |
+				IBV_EXP_RX_HASH_SRC_PORT_UDP |
+				IBV_EXP_RX_HASH_DST_PORT_UDP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
+		.flow_priority = 4,
+	},
+	[HASH_RXQ_IPV4] = {
+		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
+				IBV_EXP_RX_HASH_DST_IPV4),
+		.dpdk_rss_hf = (ETH_RSS_IPV4 |
+				ETH_RSS_FRAG_IPV4),
+		.flow_priority = 5,
+	},
+	[HASH_RXQ_TCPV6] = {
+		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
+				IBV_EXP_RX_HASH_DST_IPV6 |
+				IBV_EXP_RX_HASH_SRC_PORT_TCP |
+				IBV_EXP_RX_HASH_DST_PORT_TCP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
+		.flow_priority = 4,
+	},
+	[HASH_RXQ_UDPV6] = {
+		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
+				IBV_EXP_RX_HASH_DST_IPV6 |
+				IBV_EXP_RX_HASH_SRC_PORT_UDP |
+				IBV_EXP_RX_HASH_DST_PORT_UDP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
+		.flow_priority = 4,
+	},
+	[HASH_RXQ_IPV6] = {
+		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
+				IBV_EXP_RX_HASH_DST_IPV6),
+		.dpdk_rss_hf = (ETH_RSS_IPV6 |
+				ETH_RSS_FRAG_IPV6),
+		.flow_priority = 5,
+	},
+	[HASH_RXQ_ETH] = {
+		.hash_fields = 0,
+		.dpdk_rss_hf = 0,
+		.flow_priority = 6,
+	},
+};
+
+/* Number of entries in hash_rxq_init[]. */
+const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
+
 struct rte_flow {
 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
-	struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
-	struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
-	struct mlx5_hrxq *hrxq; /**< Hash Rx queue. */
 	uint32_t mark:1; /**< Set if the flow is marked. */
+	struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct mlx5_hrxq *hrxqs[RTE_DIM(hash_rxq_init)]; /**< Hash Rx queues. */
+	struct ibv_exp_flow *ibv_flows[RTE_DIM(hash_rxq_init)];
+		/**< Verbs flows. */
 };
 
 /** Static initializer for items. */
@@ -271,6 +348,7 @@ struct mlx5_flow_action {
 	uint32_t mark_id; /**< Mark identifier. */
 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
 	uint16_t queues_n; /**< Number of entries in queue[]. */
+	const struct rte_eth_rss_conf *rss_conf; /**< User RSS configuration. */
 };
 
 /** Structure to pass to the conversion function. */
@@ -278,7 +356,6 @@ struct mlx5_flow_parse {
 	struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 	uint32_t inner; /**< Set once VXLAN is encountered. */
-	uint64_t hash_fields; /**< Fields that participate in the hash. */
 	struct mlx5_flow_action actions; /**< Parsed action result. */
 };
 
@@ -526,6 +603,7 @@ priv_flow_validate(struct priv *priv,
 				(const struct rte_flow_action_rss *)
 				actions->conf;
 			uint16_t n;
+			int rxq_n;
 
 			if (!rss || !rss->num) {
 				rte_flow_error_set(error, EINVAL,
@@ -534,6 +612,9 @@ priv_flow_validate(struct priv *priv,
 						   "no valid queues");
 				return -rte_errno;
 			}
+			rxq_n = rss->num;
+			if (rss->rss_conf && !rss->rss_conf->rss_hf)
+				rxq_n = 1;
 			if (flow->actions.queues_n == 1) {
 				uint16_t found = 0;
 
@@ -554,7 +635,7 @@ priv_flow_validate(struct priv *priv,
 					return -rte_errno;
 				}
 			}
-			for (n = 0; n < rss->num; ++n) {
+			for (n = 0; n < rxq_n; ++n) {
 				if (rss->queue[n] >= priv->rxqs_n) {
 					rte_flow_error_set(error, EINVAL,
 						   RTE_FLOW_ERROR_TYPE_ACTION,
@@ -565,9 +646,10 @@ priv_flow_validate(struct priv *priv,
 				}
 			}
 			flow->actions.queue = 1;
-			for (n = 0; n < rss->num; ++n)
+			for (n = 0; n < rxq_n; ++n)
 				flow->actions.queues[n] = rss->queue[n];
-			flow->actions.queues_n = rss->num;
+			flow->actions.queues_n = rxq_n;
+			flow->actions.rss_conf = rss->rss_conf;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
 			const struct rte_flow_action_mark *mark =
 				(const struct rte_flow_action_mark *)
@@ -666,7 +748,6 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
 
 	++flow->ibv_attr->num_of_specs;
 	flow->ibv_attr->priority = 2;
-	flow->hash_fields = 0;
 	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*eth = (struct ibv_exp_flow_spec_eth) {
 		.type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
@@ -746,8 +827,6 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
 
 	++flow->ibv_attr->num_of_specs;
 	flow->ibv_attr->priority = 1;
-	flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
-			     IBV_EXP_RX_HASH_DST_IPV4);
 	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
 		.type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
@@ -801,8 +880,6 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
 
 	++flow->ibv_attr->num_of_specs;
 	flow->ibv_attr->priority = 1;
-	flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
-			     IBV_EXP_RX_HASH_DST_IPV6);
 	ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
 		.type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
@@ -857,8 +934,6 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
 
 	++flow->ibv_attr->num_of_specs;
 	flow->ibv_attr->priority = 0;
-	flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
-			      IBV_EXP_RX_HASH_DST_PORT_UDP);
 	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*udp = (struct ibv_exp_flow_spec_tcp_udp) {
 		.type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
@@ -901,8 +976,6 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
 
 	++flow->ibv_attr->num_of_specs;
 	flow->ibv_attr->priority = 0;
-	flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
-			      IBV_EXP_RX_HASH_DST_PORT_TCP);
 	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*tcp = (struct ibv_exp_flow_spec_tcp_udp) {
 		.type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
@@ -994,6 +1067,118 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
 }
 
 /**
+ * Create hash Rx queues when RSS is disabled.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param flow
+ *   MLX5 flow attributes (filled by mlx5_flow_validate()).
+ * @param rte_flow
+ *   Pointer to rte flow structure.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_create_action_queue_no_rss(struct priv *priv,
+				     struct mlx5_flow_parse *flow,
+				     struct rte_flow *rte_flow,
+				     struct rte_flow_error *error)
+{
+	rte_flow->hrxqs[HASH_RXQ_ETH] =
+		mlx5_priv_hrxq_get(priv, rss_hash_default_key,
+				   rss_hash_default_key_len,
+				   0,
+				   flow->actions.queues,
+				   flow->actions.queues_n);
+	if (rte_flow->hrxqs[HASH_RXQ_ETH])
+		return 0;
+	rte_flow->hrxqs[HASH_RXQ_ETH] =
+		mlx5_priv_hrxq_new(priv, rss_hash_default_key,
+				   rss_hash_default_key_len,
+				   0,
+				   flow->actions.queues,
+				   flow->actions.queues_n);
+	if (!rte_flow->hrxqs[HASH_RXQ_ETH]) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "cannot create hash rxq");
+		return ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * Create hash Rx queues when RSS is enabled.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param flow
+ *   MLX5 flow attributes (filled by mlx5_flow_validate()).
+ * @param rte_flow
+ *   Pointer to rte flow structure.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_create_action_queue_rss(struct priv *priv,
+				  struct mlx5_flow_parse *flow,
+				  struct rte_flow *rte_flow,
+				  struct rte_flow_error *error)
+{
+	unsigned int i;
+
+	/**
+	 * 7 types of Hash Rx queues can be created to make the RSS
+	 * possible on the different kind of packets:
+	 *  - IPv4 UDP
+	 *  - IPv4 TCP
+	 *  - IPv6 UDP
+	 *  - IPv6 TCP
+	 *  - IPv4
+	 *  - IPv6
+	 * This can be possible when the DPDK rss_conf.hf is full.
+	 */
+	for (i = 0; i != (hash_rxq_init_n - 1); ++i) {
+		uint64_t hash_fields;
+
+		if ((flow->actions.rss_conf->rss_hf &
+		     hash_rxq_init[i].dpdk_rss_hf) !=
+		    hash_rxq_init[i].dpdk_rss_hf)
+			continue;
+		hash_fields = hash_rxq_init[i].hash_fields;
+		rte_flow->hrxqs[i] =
+			mlx5_priv_hrxq_get(priv,
+					   flow->actions.rss_conf->rss_key,
+					   flow->actions.rss_conf->rss_key_len,
+					   hash_fields,
+					   flow->actions.queues,
+					   flow->actions.queues_n);
+		if (rte_flow->hrxqs[i])
+			continue;
+		rte_flow->hrxqs[i] =
+			mlx5_priv_hrxq_new(priv,
+					   flow->actions.rss_conf->rss_key,
+					   flow->actions.rss_conf->rss_key_len,
+					   hash_fields,
+					   flow->actions.queues,
+					   flow->actions.queues_n);
+		if (!rte_flow->hrxqs[i]) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "cannot create hash rxq");
+			return ENOMEM;
+		}
+	}
+	return 0;
+}
+
+/**
  * Complete flow rule creation.
  *
  * @param priv
@@ -1024,23 +1209,20 @@ priv_flow_create_action_queue(struct priv *priv,
 	}
 	rte_flow->mark = flow->actions.mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
-	rte_flow->hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
-					    rss_hash_default_key_len,
-					    flow->hash_fields,
-					    flow->actions.queues,
-					    flow->actions.queues_n);
-	if (!rte_flow->hrxq) {
-		rte_flow->hrxq = mlx5_priv_hrxq_new(priv, rss_hash_default_key,
-						    rss_hash_default_key_len,
-						    flow->hash_fields,
-						    flow->actions.queues,
-						    flow->actions.queues_n);
-		if (!rte_flow->hrxq) {
-			rte_flow_error_set(error, ENOMEM,
-					   RTE_FLOW_ERROR_TYPE_HANDLE,
-					   NULL, "cannot create hash rxq");
+	if (flow->actions.queues_n == 1) {
+		unsigned int ret;
+
+		ret = priv_flow_create_action_queue_no_rss(priv, flow, rte_flow,
+							   error);
+		if (ret)
+			goto error;
+	} else {
+		unsigned int ret;
+
+		ret = priv_flow_create_action_queue_rss(priv, flow, rte_flow,
+							error);
+		if (ret)
 			goto error;
-		}
 	}
 	for (i = 0; i != flow->actions.queues_n; ++i) {
 		struct mlx5_rxq_data *q = (*priv->rxqs)[flow->actions.queues[i]];
@@ -1049,18 +1231,31 @@ priv_flow_create_action_queue(struct priv *priv,
 	}
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
-	rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->hrxq->qp,
-						 rte_flow->ibv_attr);
-	if (!rte_flow->ibv_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "flow rule creation failure");
-		goto error;
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (!rte_flow->hrxqs[i])
+			continue;
+		rte_flow->ibv_flows[i] =
+			ibv_exp_create_flow(rte_flow->hrxqs[i]->qp,
+					    rte_flow->ibv_attr);
+		if (!rte_flow->ibv_flows[i]) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "flow rule creation failure");
+			goto error;
+		}
+		DEBUG("%p type %d QP %p ibv_flow %p",
+		      (void*)rte_flow, i, (void*)rte_flow->hrxqs[i],
+		      (void*)rte_flow->ibv_flows[i]);
 	}
 	return rte_flow;
 error:
 	assert(rte_flow);
-	if (rte_flow->hrxq)
-		mlx5_priv_hrxq_release(priv, rte_flow->hrxq);
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (rte_flow->ibv_flows[i])
+			claim_zero(ibv_exp_destroy_flow(rte_flow->ibv_flows[i]));
+		if (rte_flow->hrxqs[i])
+			mlx5_priv_hrxq_release(priv, rte_flow->hrxqs[i]);
+	}
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -1120,7 +1315,6 @@ priv_flow_create(struct priv *priv,
 		.reserved = 0,
 	};
 	flow.inner = 0;
-	flow.hash_fields = 0;
 	claim_zero(priv_flow_validate(priv, attr, items, actions,
 				      error, &flow));
 	if (flow.actions.mark) {
@@ -1178,41 +1372,53 @@ priv_flow_destroy(struct priv *priv,
 		  struct rte_flow *flow)
 {
 	unsigned int i;
+	unsigned int j;
 	uint16_t *queues;
 	uint16_t queues_n;
 
-	queues = flow->hrxq->ind_table->queues;
-	queues_n = flow->hrxq->ind_table->queues_n;
-	if (!flow->mark)
-		goto out;
-	for (i = 0; i != queues_n; ++i) {
-		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq = (*priv->rxqs)[queues[i]];
-		int mark = 0;
-
-		/*
-		 * To remove the mark from the queue, the queue must not be
-		 * present in any other marked flow (RSS or not).
-		 */
-		TAILQ_FOREACH(tmp, list, next) {
-			unsigned int j;
-
-			if (!tmp->mark)
-				continue;
-			for (j = 0;
-			     (j != tmp->hrxq->ind_table->queues_n) && !mark;
-			     j++)
-				if (tmp->hrxq->ind_table->queues[j] ==
-				    queues[i])
-					mark = 1;
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (!flow->hrxqs[i])
+			continue;
+		queues = flow->hrxqs[i]->ind_table->queues;
+		queues_n = flow->hrxqs[i]->ind_table->queues_n;
+		if (!flow->mark)
+			goto out;
+		for (j = 0; j != queues_n; ++j) {
+			struct rte_flow *tmp;
+			struct mlx5_rxq_data *rxq = (*priv->rxqs)[queues[j]];
+			int mark = 0;
+
+			/*
+			 * To remove the mark from the queue, the queue must not be
+			 * present in any other marked flow (RSS or not).
+			 */
+			TAILQ_FOREACH(tmp, list, next) {
+				unsigned int k;
+				uint16_t *tqueues =
+					tmp->hrxqs[j]->ind_table->queues;
+				uint16_t tqueues_n =
+					tmp->hrxqs[j]->ind_table->queues_n;
+
+				if (!tmp->mark)
+					continue;
+				for (k = 0; (k != tqueues_n) && !mark; k++)
+					if (tqueues[k] == queues[i])
+						mark = 1;
+			}
+			rxq->mark = mark;
 		}
-		rxq->mark = mark;
 	}
 out:
 	TAILQ_REMOVE(list, flow, next);
-	if (flow->ibv_flow)
-		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
-	mlx5_priv_hrxq_release(priv, flow->hrxq);
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (flow->ibv_flows[i]) {
+			DEBUG("%p type %d QP %p ibv_flow %p",
+			      (void*)flow, i, (void*)flow->hrxqs[i],
+			      (void*)flow->ibv_flows[i]);
+			claim_zero(ibv_exp_destroy_flow(flow->ibv_flows[i]));
+			mlx5_priv_hrxq_release(priv, flow->hrxqs[i]);
+		}
+	}
 	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
 	rte_free(flow);
@@ -1294,8 +1500,12 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
 		unsigned int i;
 
-		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
-		flow->ibv_flow = NULL;
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!flow->ibv_flows[i])
+				continue;
+			claim_zero(ibv_exp_destroy_flow(flow->ibv_flows[i]));
+			flow->ibv_flows[i] = NULL;
+		}
 		/* Disable mark on all queues. */
 		for (i = 0; i != priv->rxqs_n; ++i)
 			(*priv->rxqs)[i]->mark = 0;
@@ -1320,20 +1530,41 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 	struct rte_flow *flow;
 
 	TAILQ_FOREACH(flow, list, next) {
-		flow->ibv_flow = ibv_exp_create_flow(flow->hrxq->qp,
-						     flow->ibv_attr);
-		if (!flow->ibv_flow) {
-			DEBUG("Flow %p cannot be applied", (void *)flow);
-			rte_errno = EINVAL;
-			return rte_errno;
+		unsigned int i;
+
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!flow->hrxqs[i])
+				continue;
+			flow->ibv_flows[i] =
+				ibv_exp_create_flow(flow->hrxqs[i]->qp,
+						    flow->ibv_attr);
+			if (!flow->ibv_flows[i]) {
+				DEBUG("Flow %p cannot be applied",
+				      (void *)flow);
+				rte_errno = EINVAL;
+				return rte_errno;
+			}
 		}
 		DEBUG("Flow %p applied", (void *)flow);
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->hrxq->ind_table->queues_n; ++n) {
-				uint16_t idx = flow->hrxq->ind_table->queues[n];
-				(*priv->rxqs)[idx]->mark = 1;
+			/*
+			 * Inside the same flow, queue list will remain the
+			 * same.
+			 */
+			for (i = 0; i != hash_rxq_init_n; ++i) {
+				uint16_t *queues;
+				uint16_t queues_n;
+
+				if (!flow->hrxqs[i])
+					continue;
+				queues_n = flow->hrxqs[i]->ind_table->queues_n;
+				queues = flow->hrxqs[i]->ind_table->queues;
+				for (n = 0; n < queues_n; ++n) {
+					(*priv->rxqs)[queues[n]]->mark = 1;
+				}
+				break;
 			}
 		}
 	}
@@ -1431,18 +1662,35 @@ mlx5_flow_ctrl(struct rte_eth_dev *dev,
 	};
 	struct rte_flow_action actions[] = {
 		{
-			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
-			.conf = &(struct rte_flow_action_queue){
-				.index = 0,
-			},
+			.type = RTE_FLOW_ACTION_TYPE_RSS,
 		},
 		{
 			.type = RTE_FLOW_ACTION_TYPE_END,
 		},
 	};
+	struct rte_flow_action_rss *conf;
 	struct rte_flow *flow;
 	struct rte_flow_error error;
+	unsigned int i;
+	unsigned int j;
 
+	conf = rte_malloc(__func__, sizeof(*conf) +
+			  priv->rxqs_n * sizeof(uint16_t), 0);
+	if (!conf)
+		goto out;
+	for (i = 0, j = 0; i != priv->rxqs_n; ++i) {
+		if ((*priv->rxqs)[i]) {
+			conf->queue[j] = i;
+			++j;
+			++conf->num;
+		}
+	}
+	if (!conf->num) {
+		rte_free(conf);
+		goto out;
+	}
+	conf->rss_conf = &priv->rss_conf;
+	actions[0].conf = conf;
 	if (enable) {
 		flow = priv_flow_create(priv, &attr, items, actions, &error);
 		if (!flow) {
@@ -1461,6 +1709,7 @@ mlx5_flow_ctrl(struct rte_eth_dev *dev,
 		};
 		struct ibv_exp_flow_spec_eth *eth;
 		const unsigned int attr_size = sizeof(struct ibv_exp_flow_attr);
+		unsigned int i;
 
 		claim_zero(mlx5_flow_create_eth(&items[0], NULL, &parser));
 		TAILQ_FOREACH(flow, &priv->ctrl_flows, next) {
@@ -1469,14 +1718,20 @@ mlx5_flow_ctrl(struct rte_eth_dev *dev,
 			if (!memcmp(eth, &spec.eth, sizeof(*eth)))
 				break;
 		}
-		if (flow) {
-			claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
-			mlx5_priv_hrxq_release(priv, flow->hrxq);
-			rte_free(flow->ibv_attr);
-			DEBUG("Control flow destroyed %p", (void *)flow);
-			TAILQ_REMOVE(&priv->ctrl_flows, flow, next);
-			rte_free(flow);
+		if (!flow)
+			goto out;
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!flow->ibv_flows[i])
+				continue;
+			claim_zero(ibv_exp_destroy_flow(flow->ibv_flows[i]));
+			mlx5_priv_hrxq_release(priv, flow->hrxqs[i]);
 		}
+		rte_free(flow->ibv_attr);
+		DEBUG("Control flow destroyed %p", (void *)flow);
+		TAILQ_REMOVE(&priv->ctrl_flows, flow, next);
+		rte_free(flow);
 	}
+	rte_free(conf);
+out:
 	return 0;
 }
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
 
- * [dpdk-dev] [PATCH v1 02/21] net/mlx5: remove flow director support
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 01/21] net/mlx5: merge action and flow parser structure Nelio Laranjeiro
       [not found] ` <cover.1501681913.git.nelio.laranjeiro@6wind.com>
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 03/21] net/mlx5: prefix Rx queue control structures Nelio Laranjeiro
                   ` (52 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
Generic flow API should be use for flow steering as is provides a better
and easier way to configure flows.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 doc/guides/nics/features/mlx5.ini |    1 -
 doc/guides/nics/mlx5.rst          |    2 -
 drivers/net/mlx5/Makefile         |    1 -
 drivers/net/mlx5/mlx5.c           |    8 -
 drivers/net/mlx5/mlx5.h           |   12 +-
 drivers/net/mlx5/mlx5_fdir.c      | 1094 -------------------------------------
 drivers/net/mlx5/mlx5_flow.c      |   44 ++
 drivers/net/mlx5/mlx5_rxq.c       |    2 -
 drivers/net/mlx5/mlx5_rxtx.h      |    9 -
 drivers/net/mlx5/mlx5_trigger.c   |    3 -
 10 files changed, 45 insertions(+), 1131 deletions(-)
 delete mode 100644 drivers/net/mlx5/mlx5_fdir.c
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index 2913591..327a450 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -22,7 +22,6 @@ RSS key update       = Y
 RSS reta update      = Y
 SR-IOV               = Y
 VLAN filter          = Y
-Flow director        = Y
 Flow API             = Y
 CRC offload          = Y
 VLAN offload         = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index f4cb18b..c56f6f0 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -89,8 +89,6 @@ Features
 - Promiscuous mode.
 - Multicast promiscuous mode.
 - Hardware checksum offloads.
-- Flow director (RTE_FDIR_MODE_PERFECT, RTE_FDIR_MODE_PERFECT_MAC_VLAN and
-  RTE_ETH_FDIR_REJECT).
 - Flow API.
 - Secondary process TX is supported.
 - KVM and VMware ESX SR-IOV modes are supported.
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 14b739a..099e4f0 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -49,7 +49,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxmode.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_vlan.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_stats.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
-SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_fdir.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
 
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index bd66a7c..ef10a22 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -150,10 +150,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 
-	/* Remove flow director elements. */
-	priv_fdir_disable(priv);
-	priv_fdir_delete_filters_list(priv);
-
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
 	dev->tx_pkt_burst = removed_tx_burst;
@@ -739,10 +735,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		claim_zero(priv_mac_addr_add(priv, 0,
 					     (const uint8_t (*)[ETHER_ADDR_LEN])
 					     mac.addr_bytes));
-		/* Initialize FD filters list. */
-		err = fdir_init_filters_list(priv);
-		if (err)
-			goto port_error;
 #ifndef NDEBUG
 		{
 			char ifname[IF_NAMESIZE];
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index e89aba8..c7194de 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -144,8 +144,6 @@ struct priv {
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
-	struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */
-	struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
@@ -270,18 +268,10 @@ void mlx5_vlan_strip_queue_set(struct rte_eth_dev *, uint16_t, int);
 int mlx5_dev_start(struct rte_eth_dev *);
 void mlx5_dev_stop(struct rte_eth_dev *);
 
-/* mlx5_fdir.c */
+/* mlx5_flow.c */
 
-void priv_fdir_queue_destroy(struct priv *, struct fdir_queue *);
-int fdir_init_filters_list(struct priv *);
-void priv_fdir_delete_filters_list(struct priv *);
-void priv_fdir_disable(struct priv *);
-void priv_fdir_enable(struct priv *);
 int mlx5_dev_filter_ctrl(struct rte_eth_dev *, enum rte_filter_type,
 			 enum rte_filter_op, void *);
-
-/* mlx5_flow.c */
-
 int mlx5_flow_validate(struct rte_eth_dev *, const struct rte_flow_attr *,
 		       const struct rte_flow_item [],
 		       const struct rte_flow_action [],
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
deleted file mode 100644
index ad256e4..0000000
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ /dev/null
@@ -1,1094 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright 2015 6WIND S.A.
- *   Copyright 2015 Mellanox.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of 6WIND S.A. nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stddef.h>
-#include <assert.h>
-#include <stdint.h>
-#include <string.h>
-#include <errno.h>
-
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <infiniband/verbs.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
-#include <rte_ether.h>
-#include <rte_malloc.h>
-#include <rte_ethdev.h>
-#include <rte_common.h>
-#include <rte_flow.h>
-#include <rte_flow_driver.h>
-
-#include "mlx5.h"
-#include "mlx5_rxtx.h"
-
-struct fdir_flow_desc {
-	uint16_t dst_port;
-	uint16_t src_port;
-	uint32_t src_ip[4];
-	uint32_t dst_ip[4];
-	uint8_t	mac[6];
-	uint16_t vlan_tag;
-	enum hash_rxq_type type;
-};
-
-struct mlx5_fdir_filter {
-	LIST_ENTRY(mlx5_fdir_filter) next;
-	uint16_t queue; /* Queue assigned to if FDIR match. */
-	enum rte_eth_fdir_behavior behavior;
-	struct fdir_flow_desc desc;
-	struct ibv_exp_flow *flow;
-};
-
-LIST_HEAD(fdir_filter_list, mlx5_fdir_filter);
-
-/**
- * Convert struct rte_eth_fdir_filter to mlx5 filter descriptor.
- *
- * @param[in] fdir_filter
- *   DPDK filter structure to convert.
- * @param[out] desc
- *   Resulting mlx5 filter descriptor.
- * @param mode
- *   Flow director mode.
- */
-static void
-fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
-			 struct fdir_flow_desc *desc, enum rte_fdir_mode mode)
-{
-	/* Initialize descriptor. */
-	memset(desc, 0, sizeof(*desc));
-
-	/* Set VLAN ID. */
-	desc->vlan_tag = fdir_filter->input.flow_ext.vlan_tci;
-
-	/* Set MAC address. */
-	if (mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
-		rte_memcpy(desc->mac,
-			   fdir_filter->input.flow.mac_vlan_flow.mac_addr.
-				addr_bytes,
-			   sizeof(desc->mac));
-		desc->type = HASH_RXQ_ETH;
-		return;
-	}
-
-	/* Set mode */
-	switch (fdir_filter->input.flow_type) {
-	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
-		desc->type = HASH_RXQ_UDPV4;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
-		desc->type = HASH_RXQ_TCPV4;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
-		desc->type = HASH_RXQ_IPV4;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
-		desc->type = HASH_RXQ_UDPV6;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
-		desc->type = HASH_RXQ_TCPV6;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
-		desc->type = HASH_RXQ_IPV6;
-		break;
-	default:
-		break;
-	}
-
-	/* Set flow values */
-	switch (fdir_filter->input.flow_type) {
-	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
-	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
-		desc->src_port = fdir_filter->input.flow.udp4_flow.src_port;
-		desc->dst_port = fdir_filter->input.flow.udp4_flow.dst_port;
-		/* fallthrough */
-	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
-		desc->src_ip[0] = fdir_filter->input.flow.ip4_flow.src_ip;
-		desc->dst_ip[0] = fdir_filter->input.flow.ip4_flow.dst_ip;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
-	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
-		desc->src_port = fdir_filter->input.flow.udp6_flow.src_port;
-		desc->dst_port = fdir_filter->input.flow.udp6_flow.dst_port;
-		/* Fall through. */
-	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
-		rte_memcpy(desc->src_ip,
-			   fdir_filter->input.flow.ipv6_flow.src_ip,
-			   sizeof(desc->src_ip));
-		rte_memcpy(desc->dst_ip,
-			   fdir_filter->input.flow.ipv6_flow.dst_ip,
-			   sizeof(desc->dst_ip));
-		break;
-	default:
-		break;
-	}
-}
-
-/**
- * Check if two flow descriptors overlap according to configured mask.
- *
- * @param priv
- *   Private structure that provides flow director mask.
- * @param desc1
- *   First flow descriptor to compare.
- * @param desc2
- *   Second flow descriptor to compare.
- *
- * @return
- *   Nonzero if descriptors overlap.
- */
-static int
-priv_fdir_overlap(const struct priv *priv,
-		  const struct fdir_flow_desc *desc1,
-		  const struct fdir_flow_desc *desc2)
-{
-	const struct rte_eth_fdir_masks *mask =
-		&priv->dev->data->dev_conf.fdir_conf.mask;
-	unsigned int i;
-
-	if (desc1->type != desc2->type)
-		return 0;
-	/* Ignore non masked bits. */
-	for (i = 0; i != RTE_DIM(desc1->mac); ++i)
-		if ((desc1->mac[i] & mask->mac_addr_byte_mask) !=
-		    (desc2->mac[i] & mask->mac_addr_byte_mask))
-			return 0;
-	if (((desc1->src_port & mask->src_port_mask) !=
-	     (desc2->src_port & mask->src_port_mask)) ||
-	    ((desc1->dst_port & mask->dst_port_mask) !=
-	     (desc2->dst_port & mask->dst_port_mask)))
-		return 0;
-	switch (desc1->type) {
-	case HASH_RXQ_IPV4:
-	case HASH_RXQ_UDPV4:
-	case HASH_RXQ_TCPV4:
-		if (((desc1->src_ip[0] & mask->ipv4_mask.src_ip) !=
-		     (desc2->src_ip[0] & mask->ipv4_mask.src_ip)) ||
-		    ((desc1->dst_ip[0] & mask->ipv4_mask.dst_ip) !=
-		     (desc2->dst_ip[0] & mask->ipv4_mask.dst_ip)))
-			return 0;
-		break;
-	case HASH_RXQ_IPV6:
-	case HASH_RXQ_UDPV6:
-	case HASH_RXQ_TCPV6:
-		for (i = 0; i != RTE_DIM(desc1->src_ip); ++i)
-			if (((desc1->src_ip[i] & mask->ipv6_mask.src_ip[i]) !=
-			     (desc2->src_ip[i] & mask->ipv6_mask.src_ip[i])) ||
-			    ((desc1->dst_ip[i] & mask->ipv6_mask.dst_ip[i]) !=
-			     (desc2->dst_ip[i] & mask->ipv6_mask.dst_ip[i])))
-				return 0;
-		break;
-	default:
-		break;
-	}
-	return 1;
-}
-
-/**
- * Create flow director steering rule for a specific filter.
- *
- * @param priv
- *   Private structure.
- * @param mlx5_fdir_filter
- *   Filter to create a steering rule for.
- * @param fdir_queue
- *   Flow director queue for matching packets.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_flow_add(struct priv *priv,
-		   struct mlx5_fdir_filter *mlx5_fdir_filter,
-		   struct fdir_queue *fdir_queue)
-{
-	struct ibv_exp_flow *flow;
-	struct fdir_flow_desc *desc = &mlx5_fdir_filter->desc;
-	enum rte_fdir_mode fdir_mode =
-		priv->dev->data->dev_conf.fdir_conf.mode;
-	struct rte_eth_fdir_masks *mask =
-		&priv->dev->data->dev_conf.fdir_conf.mask;
-	FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, desc->type));
-	struct ibv_exp_flow_attr *attr = &data->attr;
-	uintptr_t spec_offset = (uintptr_t)&data->spec;
-	struct ibv_exp_flow_spec_eth *spec_eth;
-	struct ibv_exp_flow_spec_ipv4 *spec_ipv4;
-	struct ibv_exp_flow_spec_ipv6 *spec_ipv6;
-	struct ibv_exp_flow_spec_tcp_udp *spec_tcp_udp;
-	struct mlx5_fdir_filter *iter_fdir_filter;
-	unsigned int i;
-
-	/* Abort if an existing flow overlaps this one to avoid packet
-	 * duplication, even if it targets another queue. */
-	LIST_FOREACH(iter_fdir_filter, priv->fdir_filter_list, next)
-		if ((iter_fdir_filter != mlx5_fdir_filter) &&
-		    (iter_fdir_filter->flow != NULL) &&
-		    (priv_fdir_overlap(priv,
-				       &mlx5_fdir_filter->desc,
-				       &iter_fdir_filter->desc)))
-			return EEXIST;
-
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec_offset);
-	priv_flow_attr(priv, attr, sizeof(data), desc->type);
-
-	/* Set Ethernet spec */
-	spec_eth = (struct ibv_exp_flow_spec_eth *)spec_offset;
-
-	/* The first specification must be Ethernet. */
-	assert(spec_eth->type == IBV_EXP_FLOW_SPEC_ETH);
-	assert(spec_eth->size == sizeof(*spec_eth));
-
-	/* VLAN ID */
-	spec_eth->val.vlan_tag = desc->vlan_tag & mask->vlan_tci_mask;
-	spec_eth->mask.vlan_tag = mask->vlan_tci_mask;
-
-	/* Update priority */
-	attr->priority = 2;
-
-	if (fdir_mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
-		/* MAC Address */
-		for (i = 0; i != RTE_DIM(spec_eth->mask.dst_mac); ++i) {
-			spec_eth->val.dst_mac[i] =
-				desc->mac[i] & mask->mac_addr_byte_mask;
-			spec_eth->mask.dst_mac[i] = mask->mac_addr_byte_mask;
-		}
-		goto create_flow;
-	}
-
-	switch (desc->type) {
-	case HASH_RXQ_IPV4:
-	case HASH_RXQ_UDPV4:
-	case HASH_RXQ_TCPV4:
-		spec_offset += spec_eth->size;
-
-		/* Set IP spec */
-		spec_ipv4 = (struct ibv_exp_flow_spec_ipv4 *)spec_offset;
-
-		/* The second specification must be IP. */
-		assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4);
-		assert(spec_ipv4->size == sizeof(*spec_ipv4));
-
-		spec_ipv4->val.src_ip =
-			desc->src_ip[0] & mask->ipv4_mask.src_ip;
-		spec_ipv4->val.dst_ip =
-			desc->dst_ip[0] & mask->ipv4_mask.dst_ip;
-		spec_ipv4->mask.src_ip = mask->ipv4_mask.src_ip;
-		spec_ipv4->mask.dst_ip = mask->ipv4_mask.dst_ip;
-
-		/* Update priority */
-		attr->priority = 1;
-
-		if (desc->type == HASH_RXQ_IPV4)
-			goto create_flow;
-
-		spec_offset += spec_ipv4->size;
-		break;
-	case HASH_RXQ_IPV6:
-	case HASH_RXQ_UDPV6:
-	case HASH_RXQ_TCPV6:
-		spec_offset += spec_eth->size;
-
-		/* Set IP spec */
-		spec_ipv6 = (struct ibv_exp_flow_spec_ipv6 *)spec_offset;
-
-		/* The second specification must be IP. */
-		assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6);
-		assert(spec_ipv6->size == sizeof(*spec_ipv6));
-
-		for (i = 0; i != RTE_DIM(desc->src_ip); ++i) {
-			((uint32_t *)spec_ipv6->val.src_ip)[i] =
-				desc->src_ip[i] & mask->ipv6_mask.src_ip[i];
-			((uint32_t *)spec_ipv6->val.dst_ip)[i] =
-				desc->dst_ip[i] & mask->ipv6_mask.dst_ip[i];
-		}
-		rte_memcpy(spec_ipv6->mask.src_ip,
-			   mask->ipv6_mask.src_ip,
-			   sizeof(spec_ipv6->mask.src_ip));
-		rte_memcpy(spec_ipv6->mask.dst_ip,
-			   mask->ipv6_mask.dst_ip,
-			   sizeof(spec_ipv6->mask.dst_ip));
-
-		/* Update priority */
-		attr->priority = 1;
-
-		if (desc->type == HASH_RXQ_IPV6)
-			goto create_flow;
-
-		spec_offset += spec_ipv6->size;
-		break;
-	default:
-		ERROR("invalid flow attribute type");
-		return EINVAL;
-	}
-
-	/* Set TCP/UDP flow specification. */
-	spec_tcp_udp = (struct ibv_exp_flow_spec_tcp_udp *)spec_offset;
-
-	/* The third specification must be TCP/UDP. */
-	assert(spec_tcp_udp->type == IBV_EXP_FLOW_SPEC_TCP ||
-	       spec_tcp_udp->type == IBV_EXP_FLOW_SPEC_UDP);
-	assert(spec_tcp_udp->size == sizeof(*spec_tcp_udp));
-
-	spec_tcp_udp->val.src_port = desc->src_port & mask->src_port_mask;
-	spec_tcp_udp->val.dst_port = desc->dst_port & mask->dst_port_mask;
-	spec_tcp_udp->mask.src_port = mask->src_port_mask;
-	spec_tcp_udp->mask.dst_port = mask->dst_port_mask;
-
-	/* Update priority */
-	attr->priority = 0;
-
-create_flow:
-
-	errno = 0;
-	flow = ibv_exp_create_flow(fdir_queue->qp, attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow director configuration failed, errno=%d: %s",
-		      (void *)priv, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-
-	DEBUG("%p: added flow director rule (%p)", (void *)priv, (void *)flow);
-	mlx5_fdir_filter->flow = flow;
-	return 0;
-}
-
-/**
- * Destroy a flow director queue.
- *
- * @param fdir_queue
- *   Flow director queue to be destroyed.
- */
-void
-priv_fdir_queue_destroy(struct priv *priv, struct fdir_queue *fdir_queue)
-{
-	struct mlx5_fdir_filter *fdir_filter;
-
-	/* Disable filter flows still applying to this queue. */
-	LIST_FOREACH(fdir_filter, priv->fdir_filter_list, next) {
-		unsigned int idx = fdir_filter->queue;
-		struct rxq_ctrl *rxq_ctrl =
-			container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
-
-		assert(idx < priv->rxqs_n);
-		if (fdir_queue == rxq_ctrl->fdir_queue &&
-		    fdir_filter->flow != NULL) {
-			claim_zero(ibv_exp_destroy_flow(fdir_filter->flow));
-			fdir_filter->flow = NULL;
-		}
-	}
-	assert(fdir_queue->qp);
-	claim_zero(ibv_destroy_qp(fdir_queue->qp));
-	assert(fdir_queue->ind_table);
-	claim_zero(ibv_exp_destroy_rwq_ind_table(fdir_queue->ind_table));
-	if (fdir_queue->wq)
-		claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
-	if (fdir_queue->cq)
-		claim_zero(ibv_destroy_cq(fdir_queue->cq));
-#ifndef NDEBUG
-	memset(fdir_queue, 0x2a, sizeof(*fdir_queue));
-#endif
-	rte_free(fdir_queue);
-}
-
-/**
- * Create a flow director queue.
- *
- * @param priv
- *   Private structure.
- * @param wq
- *   Work queue to route matched packets to, NULL if one needs to
- *   be created.
- *
- * @return
- *   Related flow director queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_fdir_queue_create(struct priv *priv, struct ibv_exp_wq *wq,
-		       unsigned int socket)
-{
-	struct fdir_queue *fdir_queue;
-
-	fdir_queue = rte_calloc_socket(__func__, 1, sizeof(*fdir_queue),
-				       0, socket);
-	if (!fdir_queue) {
-		ERROR("cannot allocate flow director queue");
-		return NULL;
-	}
-	assert(priv->pd);
-	assert(priv->ctx);
-	if (!wq) {
-		fdir_queue->cq = ibv_exp_create_cq(
-			priv->ctx, 1, NULL, NULL, 0,
-			&(struct ibv_exp_cq_init_attr){
-				.comp_mask = 0,
-			});
-		if (!fdir_queue->cq) {
-			ERROR("cannot create flow director CQ");
-			goto error;
-		}
-		fdir_queue->wq = ibv_exp_create_wq(
-			priv->ctx,
-			&(struct ibv_exp_wq_init_attr){
-				.wq_type = IBV_EXP_WQT_RQ,
-				.max_recv_wr = 1,
-				.max_recv_sge = 1,
-				.pd = priv->pd,
-				.cq = fdir_queue->cq,
-			});
-		if (!fdir_queue->wq) {
-			ERROR("cannot create flow director WQ");
-			goto error;
-		}
-		wq = fdir_queue->wq;
-	}
-	fdir_queue->ind_table = ibv_exp_create_rwq_ind_table(
-		priv->ctx,
-		&(struct ibv_exp_rwq_ind_table_init_attr){
-			.pd = priv->pd,
-			.log_ind_tbl_size = 0,
-			.ind_tbl = &wq,
-			.comp_mask = 0,
-		});
-	if (!fdir_queue->ind_table) {
-		ERROR("cannot create flow director indirection table");
-		goto error;
-	}
-	fdir_queue->qp = ibv_exp_create_qp(
-		priv->ctx,
-		&(struct ibv_exp_qp_init_attr){
-			.qp_type = IBV_QPT_RAW_PACKET,
-			.comp_mask =
-				IBV_EXP_QP_INIT_ATTR_PD |
-				IBV_EXP_QP_INIT_ATTR_PORT |
-				IBV_EXP_QP_INIT_ATTR_RX_HASH,
-			.pd = priv->pd,
-			.rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
-				.rx_hash_function =
-					IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
-				.rx_hash_key_len = rss_hash_default_key_len,
-				.rx_hash_key = rss_hash_default_key,
-				.rx_hash_fields_mask = 0,
-				.rwq_ind_tbl = fdir_queue->ind_table,
-			},
-			.port_num = priv->port,
-		});
-	if (!fdir_queue->qp) {
-		ERROR("cannot create flow director hash RX QP");
-		goto error;
-	}
-	return fdir_queue;
-error:
-	assert(fdir_queue);
-	assert(!fdir_queue->qp);
-	if (fdir_queue->ind_table)
-		claim_zero(ibv_exp_destroy_rwq_ind_table
-			   (fdir_queue->ind_table));
-	if (fdir_queue->wq)
-		claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
-	if (fdir_queue->cq)
-		claim_zero(ibv_destroy_cq(fdir_queue->cq));
-	rte_free(fdir_queue);
-	return NULL;
-}
-
-/**
- * Get flow director queue for a specific RX queue, create it in case
- * it does not exist.
- *
- * @param priv
- *   Private structure.
- * @param idx
- *   RX queue index.
- *
- * @return
- *   Related flow director queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_get_fdir_queue(struct priv *priv, uint16_t idx)
-{
-	struct rxq_ctrl *rxq_ctrl =
-		container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
-	struct fdir_queue *fdir_queue = rxq_ctrl->fdir_queue;
-
-	assert(rxq_ctrl->wq);
-	if (fdir_queue == NULL) {
-		fdir_queue = priv_fdir_queue_create(priv, rxq_ctrl->wq,
-						    rxq_ctrl->socket);
-		rxq_ctrl->fdir_queue = fdir_queue;
-	}
-	return fdir_queue;
-}
-
-/**
- * Get or flow director drop queue. Create it if it does not exist.
- *
- * @param priv
- *   Private structure.
- *
- * @return
- *   Flow director drop queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_get_fdir_drop_queue(struct priv *priv)
-{
-	struct fdir_queue *fdir_queue = priv->fdir_drop_queue;
-
-	if (fdir_queue == NULL) {
-		unsigned int socket = SOCKET_ID_ANY;
-
-		/* Select a known NUMA socket if possible. */
-		if (priv->rxqs_n && (*priv->rxqs)[0])
-			socket = container_of((*priv->rxqs)[0],
-					      struct rxq_ctrl, rxq)->socket;
-		fdir_queue = priv_fdir_queue_create(priv, NULL, socket);
-		priv->fdir_drop_queue = fdir_queue;
-	}
-	return fdir_queue;
-}
-
-/**
- * Enable flow director filter and create steering rules.
- *
- * @param priv
- *   Private structure.
- * @param mlx5_fdir_filter
- *   Filter to create steering rule for.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_enable(struct priv *priv,
-			struct mlx5_fdir_filter *mlx5_fdir_filter)
-{
-	struct fdir_queue *fdir_queue;
-
-	/* Check if flow already exists. */
-	if (mlx5_fdir_filter->flow != NULL)
-		return 0;
-
-	/* Get fdir_queue for specific queue. */
-	if (mlx5_fdir_filter->behavior == RTE_ETH_FDIR_REJECT)
-		fdir_queue = priv_get_fdir_drop_queue(priv);
-	else
-		fdir_queue = priv_get_fdir_queue(priv,
-						 mlx5_fdir_filter->queue);
-
-	if (fdir_queue == NULL) {
-		ERROR("failed to create flow director rxq for queue %d",
-		      mlx5_fdir_filter->queue);
-		return EINVAL;
-	}
-
-	/* Create flow */
-	return priv_fdir_flow_add(priv, mlx5_fdir_filter, fdir_queue);
-}
-
-/**
- * Initialize flow director filters list.
- *
- * @param priv
- *   Private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-fdir_init_filters_list(struct priv *priv)
-{
-	/* Filter list initialization should be done only once. */
-	if (priv->fdir_filter_list)
-		return 0;
-
-	/* Create filters list. */
-	priv->fdir_filter_list =
-		rte_calloc(__func__, 1, sizeof(*priv->fdir_filter_list), 0);
-
-	if (priv->fdir_filter_list == NULL) {
-		int err = ENOMEM;
-
-		ERROR("cannot allocate flow director filter list: %s",
-		      strerror(err));
-		return err;
-	}
-
-	LIST_INIT(priv->fdir_filter_list);
-
-	return 0;
-}
-
-/**
- * Flush all filters.
- *
- * @param priv
- *   Private structure.
- */
-static void
-priv_fdir_filter_flush(struct priv *priv)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	while ((mlx5_fdir_filter = LIST_FIRST(priv->fdir_filter_list))) {
-		struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
-
-		DEBUG("%p: flushing flow director filter %p",
-		      (void *)priv, (void *)mlx5_fdir_filter);
-		LIST_REMOVE(mlx5_fdir_filter, next);
-		if (flow != NULL)
-			claim_zero(ibv_exp_destroy_flow(flow));
-		rte_free(mlx5_fdir_filter);
-	}
-}
-
-/**
- * Remove all flow director filters and delete list.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_fdir_delete_filters_list(struct priv *priv)
-{
-	priv_fdir_filter_flush(priv);
-	rte_free(priv->fdir_filter_list);
-	priv->fdir_filter_list = NULL;
-}
-
-/**
- * Disable flow director, remove all steering rules.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_fdir_disable(struct priv *priv)
-{
-	unsigned int i;
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	/* Run on every flow director filter and destroy flow handle. */
-	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
-		struct ibv_exp_flow *flow;
-
-		/* Only valid elements should be in the list */
-		assert(mlx5_fdir_filter != NULL);
-		flow = mlx5_fdir_filter->flow;
-
-		/* Destroy flow handle */
-		if (flow != NULL) {
-			claim_zero(ibv_exp_destroy_flow(flow));
-			mlx5_fdir_filter->flow = NULL;
-		}
-	}
-
-	/* Destroy flow director context in each RX queue. */
-	for (i = 0; (i != priv->rxqs_n); i++) {
-		struct rxq_ctrl *rxq_ctrl;
-
-		if (!(*priv->rxqs)[i])
-			continue;
-		rxq_ctrl = container_of((*priv->rxqs)[i], struct rxq_ctrl, rxq);
-		if (!rxq_ctrl->fdir_queue)
-			continue;
-		priv_fdir_queue_destroy(priv, rxq_ctrl->fdir_queue);
-		rxq_ctrl->fdir_queue = NULL;
-	}
-	if (priv->fdir_drop_queue) {
-		priv_fdir_queue_destroy(priv, priv->fdir_drop_queue);
-		priv->fdir_drop_queue = NULL;
-	}
-}
-
-/**
- * Enable flow director, create steering rules.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_fdir_enable(struct priv *priv)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	/* Run on every fdir filter and create flow handle */
-	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
-		/* Only valid elements should be in the list */
-		assert(mlx5_fdir_filter != NULL);
-
-		priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-	}
-}
-
-/**
- * Find specific filter in list.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Flow director filter to find.
- *
- * @return
- *   Filter element if found, otherwise NULL.
- */
-static struct mlx5_fdir_filter *
-priv_find_filter_in_list(struct priv *priv,
-			 const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct fdir_flow_desc desc;
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-	enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
-
-	/* Get flow director filter to look for. */
-	fdir_filter_to_flow_desc(fdir_filter, &desc, fdir_mode);
-
-	/* Look for the requested element. */
-	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
-		/* Only valid elements should be in the list. */
-		assert(mlx5_fdir_filter != NULL);
-
-		/* Return matching filter. */
-		if (!memcmp(&desc, &mlx5_fdir_filter->desc, sizeof(desc)))
-			return mlx5_fdir_filter;
-	}
-
-	/* Filter not found */
-	return NULL;
-}
-
-/**
- * Add new flow director filter and store it in list.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Flow director filter to add.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_add(struct priv *priv,
-		     const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-	enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
-	int err = 0;
-
-	/* Validate queue number. */
-	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
-		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
-		return EINVAL;
-	}
-
-	/* Duplicate filters are currently unsupported. */
-	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
-	if (mlx5_fdir_filter != NULL) {
-		ERROR("filter already exists");
-		return EINVAL;
-	}
-
-	/* Create new flow director filter. */
-	mlx5_fdir_filter =
-		rte_calloc(__func__, 1, sizeof(*mlx5_fdir_filter), 0);
-	if (mlx5_fdir_filter == NULL) {
-		err = ENOMEM;
-		ERROR("cannot allocate flow director filter: %s",
-		      strerror(err));
-		return err;
-	}
-
-	/* Set action parameters. */
-	mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
-	mlx5_fdir_filter->behavior = fdir_filter->action.behavior;
-
-	/* Convert to mlx5 filter descriptor. */
-	fdir_filter_to_flow_desc(fdir_filter,
-				 &mlx5_fdir_filter->desc, fdir_mode);
-
-	/* Insert new filter into list. */
-	LIST_INSERT_HEAD(priv->fdir_filter_list, mlx5_fdir_filter, next);
-
-	DEBUG("%p: flow director filter %p added",
-	      (void *)priv, (void *)mlx5_fdir_filter);
-
-	/* Enable filter immediately if device is started. */
-	if (priv->started)
-		err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-
-	return err;
-}
-
-/**
- * Update queue for specific filter.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Filter to be updated.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_update(struct priv *priv,
-			const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	/* Validate queue number. */
-	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
-		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
-		return EINVAL;
-	}
-
-	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
-	if (mlx5_fdir_filter != NULL) {
-		struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
-		int err = 0;
-
-		/* Update queue number. */
-		mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
-
-		/* Destroy flow handle. */
-		if (flow != NULL) {
-			claim_zero(ibv_exp_destroy_flow(flow));
-			mlx5_fdir_filter->flow = NULL;
-		}
-		DEBUG("%p: flow director filter %p updated",
-		      (void *)priv, (void *)mlx5_fdir_filter);
-
-		/* Enable filter if device is started. */
-		if (priv->started)
-			err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-
-		return err;
-	}
-
-	/* Filter not found, create it. */
-	DEBUG("%p: filter not found for update, creating new filter",
-	      (void *)priv);
-	return priv_fdir_filter_add(priv, fdir_filter);
-}
-
-/**
- * Delete specific filter.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Filter to be deleted.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_delete(struct priv *priv,
-			const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
-	if (mlx5_fdir_filter != NULL) {
-		struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
-
-		/* Remove element from list. */
-		LIST_REMOVE(mlx5_fdir_filter, next);
-
-		/* Destroy flow handle. */
-		if (flow != NULL) {
-			claim_zero(ibv_exp_destroy_flow(flow));
-			mlx5_fdir_filter->flow = NULL;
-		}
-
-		DEBUG("%p: flow director filter %p deleted",
-		      (void *)priv, (void *)mlx5_fdir_filter);
-
-		/* Delete filter. */
-		rte_free(mlx5_fdir_filter);
-
-		return 0;
-	}
-
-	ERROR("%p: flow director delete failed, cannot find filter",
-	      (void *)priv);
-	return EINVAL;
-}
-
-/**
- * Get flow director information.
- *
- * @param priv
- *   Private structure.
- * @param[out] fdir_info
- *   Resulting flow director information.
- */
-static void
-priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
-{
-	struct rte_eth_fdir_masks *mask =
-		&priv->dev->data->dev_conf.fdir_conf.mask;
-
-	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
-	fdir_info->guarant_spc = 0;
-
-	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
-
-	fdir_info->max_flexpayload = 0;
-	fdir_info->flow_types_mask[0] = 0;
-
-	fdir_info->flex_payload_unit = 0;
-	fdir_info->max_flex_payload_segment_num = 0;
-	fdir_info->flex_payload_limit = 0;
-	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
-}
-
-/**
- * Deal with flow director operations.
- *
- * @param priv
- *   Pointer to private structure.
- * @param filter_op
- *   Operation to perform.
- * @param arg
- *   Pointer to operation-specific structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
-{
-	enum rte_fdir_mode fdir_mode =
-		priv->dev->data->dev_conf.fdir_conf.mode;
-	int ret = 0;
-
-	if (filter_op == RTE_ETH_FILTER_NOP)
-		return 0;
-
-	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
-	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
-		ERROR("%p: flow director mode %d not supported",
-		      (void *)priv, fdir_mode);
-		return EINVAL;
-	}
-
-	switch (filter_op) {
-	case RTE_ETH_FILTER_ADD:
-		ret = priv_fdir_filter_add(priv, arg);
-		break;
-	case RTE_ETH_FILTER_UPDATE:
-		ret = priv_fdir_filter_update(priv, arg);
-		break;
-	case RTE_ETH_FILTER_DELETE:
-		ret = priv_fdir_filter_delete(priv, arg);
-		break;
-	case RTE_ETH_FILTER_FLUSH:
-		priv_fdir_filter_flush(priv);
-		break;
-	case RTE_ETH_FILTER_INFO:
-		priv_fdir_info_get(priv, arg);
-		break;
-	default:
-		DEBUG("%p: unknown operation %u", (void *)priv, filter_op);
-		ret = EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static const struct rte_flow_ops mlx5_flow_ops = {
-	.validate = mlx5_flow_validate,
-	.create = mlx5_flow_create,
-	.destroy = mlx5_flow_destroy,
-	.flush = mlx5_flow_flush,
-	.query = NULL,
-	.isolate = mlx5_flow_isolate,
-};
-
-/**
- * Manage filter operations.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param filter_type
- *   Filter type.
- * @param filter_op
- *   Operation to perform.
- * @param arg
- *   Pointer to operation-specific structure.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
-		     enum rte_filter_type filter_type,
-		     enum rte_filter_op filter_op,
-		     void *arg)
-{
-	int ret = EINVAL;
-	struct priv *priv = dev->data->dev_private;
-
-	switch (filter_type) {
-	case RTE_ETH_FILTER_GENERIC:
-		if (filter_op != RTE_ETH_FILTER_GET)
-			return -EINVAL;
-		*(const void **)arg = &mlx5_flow_ops;
-		return 0;
-	case RTE_ETH_FILTER_FDIR:
-		priv_lock(priv);
-		ret = priv_fdir_ctrl_func(priv, filter_op, arg);
-		priv_unlock(priv);
-		break;
-	default:
-		ERROR("%p: filter type (%d) not supported",
-		      (void *)dev, filter_type);
-		break;
-	}
-
-	return -ret;
-}
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 00355f4..545bc8f 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -298,6 +298,50 @@ struct rte_flow_drop {
 	struct ibv_cq *cq; /**< Verbs completion queue. */
 };
 
+static const struct rte_flow_ops mlx5_flow_ops = {
+	.validate = mlx5_flow_validate,
+	.create = mlx5_flow_create,
+	.destroy = mlx5_flow_destroy,
+	.flush = mlx5_flow_flush,
+	.query = NULL,
+	.isolate = mlx5_flow_isolate,
+};
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param filter_type
+ *   Filter type.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
+		     enum rte_filter_type filter_type,
+		     enum rte_filter_op filter_op,
+		     void *arg)
+{
+	int ret = EINVAL;
+
+	if (filter_type == RTE_ETH_FILTER_GENERIC) {
+		if (filter_op != RTE_ETH_FILTER_GET)
+			return -EINVAL;
+		*(const void **)arg = &mlx5_flow_ops;
+		return 0;
+	} else {
+		ERROR("%p: filter type (%d) not supported",
+		      (void *)dev, filter_type);
+	}
+	return -ret;
+}
+
 /**
  * Check support for a given item.
  *
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index b52de98..bbb914a 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -761,8 +761,6 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 {
 	DEBUG("cleaning up %p", (void *)rxq_ctrl);
 	rxq_free_elts(rxq_ctrl);
-	if (rxq_ctrl->fdir_queue != NULL)
-		priv_fdir_queue_destroy(rxq_ctrl->priv, rxq_ctrl->fdir_queue);
 	if (rxq_ctrl->wq != NULL)
 		claim_zero(ibv_exp_destroy_wq(rxq_ctrl->wq));
 	if (rxq_ctrl->cq != NULL)
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 333e5af..91ff780 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -77,14 +77,6 @@ struct mlx5_txq_stats {
 	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
 };
 
-/* Flow director queue structure. */
-struct fdir_queue {
-	struct ibv_qp *qp; /* Associated RX QP. */
-	struct ibv_exp_rwq_ind_table *ind_table; /* Indirection table. */
-	struct ibv_exp_wq *wq; /* Work queue. */
-	struct ibv_cq *cq; /* Completion queue. */
-};
-
 struct priv;
 
 /* Compressed CQE context. */
@@ -130,7 +122,6 @@ struct rxq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_exp_wq *wq; /* Work Queue. */
-	struct fdir_queue *fdir_queue; /* Flow director queue. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
 	struct ibv_comp_channel *channel;
 	unsigned int socket; /* CPU socket ID for allocations. */
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 3fa9401..3b17678 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -80,8 +80,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		      (void *)priv, strerror(err));
 		goto error;
 	}
-	if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE)
-		priv_fdir_enable(priv);
 	err = priv_flow_start(priv);
 	if (err) {
 		priv->started = 0;
@@ -135,7 +133,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
-	priv_fdir_disable(priv);
 	priv_flow_stop(priv);
 	priv_rx_intr_vec_disable(priv);
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 03/21] net/mlx5: prefix Rx queue control structures
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (2 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 02/21] net/mlx5: remove flow director support Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 04/21] net/mlx5: prefix Tx control queue structures Nelio Laranjeiro
                   ` (51 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
Prefix struct rxq_ctrl with mlx5.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c              |  8 ++---
 drivers/net/mlx5/mlx5.h              |  4 +--
 drivers/net/mlx5/mlx5_flow.c         | 12 +++----
 drivers/net/mlx5/mlx5_rxq.c          | 61 +++++++++++++++++++-----------------
 drivers/net/mlx5/mlx5_rxtx.c         | 14 ++++-----
 drivers/net/mlx5/mlx5_rxtx.h         | 17 +++++-----
 drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 21 +++++++------
 drivers/net/mlx5/mlx5_stats.c        |  2 +-
 drivers/net/mlx5/mlx5_vlan.c         |  5 +--
 9 files changed, 75 insertions(+), 69 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index ef10a22..d2fa8b1 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -157,14 +157,14 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		/* XXX race condition if mlx5_rx_burst() is still running. */
 		usleep(1000);
 		for (i = 0; (i != priv->rxqs_n); ++i) {
-			struct rxq *rxq = (*priv->rxqs)[i];
-			struct rxq_ctrl *rxq_ctrl;
+			struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
+			struct mlx5_rxq_ctrl *rxq_ctrl;
 
 			if (rxq == NULL)
 				continue;
-			rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+			rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 			(*priv->rxqs)[i] = NULL;
-			rxq_cleanup(rxq_ctrl);
+			mlx5_rxq_cleanup(rxq_ctrl);
 			rte_free(rxq_ctrl);
 		}
 		priv->rxqs_n = 0;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c7194de..55cea6f 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -129,7 +129,7 @@ struct priv {
 	/* RX/TX queues. */
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
-	struct rxq *(*rxqs)[]; /* RX queues. */
+	struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */
 	struct txq *(*txqs)[]; /* TX queues. */
 	/* Indirection tables referencing all RX WQs. */
 	struct ibv_exp_rwq_ind_table *(*ind_tables)[];
@@ -287,6 +287,6 @@ int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
 int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
 int priv_flow_start(struct priv *);
 void priv_flow_stop(struct priv *);
-int priv_flow_rxq_in_use(struct priv *, struct rxq *);
+int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 545bc8f..77b85a6 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -99,7 +99,7 @@ struct rte_flow {
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
-	struct rxq *rxqs[]; /**< Pointer to the queues array. */
+	struct mlx5_rxq_data *rxqs[]; /**< Pointer to the queues array. */
 };
 
 /** Static initializer for items. */
@@ -1106,10 +1106,10 @@ priv_flow_create_action_queue(struct priv *priv,
 		return NULL;
 	}
 	for (i = 0; i < flow->actions.queues_n; ++i) {
-		struct rxq_ctrl *rxq;
+		struct mlx5_rxq_ctrl *rxq;
 
 		rxq = container_of((*priv->rxqs)[flow->actions.queues[i]],
-				   struct rxq_ctrl, rxq);
+				   struct mlx5_rxq_ctrl, rxq);
 		wqs[i] = rxq->wq;
 		rte_flow->rxqs[i] = &rxq->rxq;
 		++rte_flow->rxqs_n;
@@ -1305,7 +1305,7 @@ priv_flow_destroy(struct priv *priv,
 		claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
 	if (flow->mark) {
 		struct rte_flow *tmp;
-		struct rxq *rxq;
+		struct mlx5_rxq_data *rxq;
 		uint32_t mark_n = 0;
 		uint32_t queue_n;
 
@@ -1325,7 +1325,7 @@ priv_flow_destroy(struct priv *priv,
 				for (tqueue_n = 0;
 				     tqueue_n < tmp->rxqs_n;
 				     ++tqueue_n) {
-					struct rxq *trxq;
+					struct mlx5_rxq_data *trxq;
 
 					trxq = tmp->rxqs[tqueue_n];
 					if (rxq == trxq)
@@ -1594,7 +1594,7 @@ priv_flow_start(struct priv *priv)
  *   Nonzero if the queue is used by a flow.
  */
 int
-priv_flow_rxq_in_use(struct priv *priv, struct rxq *rxq)
+priv_flow_rxq_in_use(struct priv *priv, struct mlx5_rxq_data *rxq)
 {
 	struct rte_flow *flow;
 
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index bbb914a..c09a554 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -374,10 +374,10 @@ priv_create_hash_rxqs(struct priv *priv)
 		      priv->reta_idx_n);
 	}
 	for (i = 0; (i != priv->reta_idx_n); ++i) {
-		struct rxq_ctrl *rxq_ctrl;
+		struct mlx5_rxq_ctrl *rxq_ctrl;
 
 		rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
-					struct rxq_ctrl, rxq);
+					struct mlx5_rxq_ctrl, rxq);
 		wqs[i] = rxq_ctrl->wq;
 	}
 	/* Get number of hash RX queues to configure. */
@@ -638,7 +638,7 @@ priv_rehash_flows(struct priv *priv)
  *   0 on success, errno value on failure.
  */
 static int
-rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n)
+rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 {
 	const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
 	unsigned int i;
@@ -679,7 +679,7 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
 	if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
-		struct rxq *rxq = &rxq_ctrl->rxq;
+		struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
 		struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
 
 		assert(rxq->elts_n == rxq->cqe_n);
@@ -721,9 +721,9 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n)
  *   Pointer to RX queue structure.
  */
 static void
-rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
+rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
-	struct rxq *rxq = &rxq_ctrl->rxq;
+	struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
 	const uint16_t q_n = (1 << rxq->elts_n);
 	const uint16_t q_mask = q_n - 1;
 	uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
@@ -757,7 +757,7 @@ rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
  *   Pointer to RX queue structure.
  */
 void
-rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
+mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	DEBUG("cleaning up %p", (void *)rxq_ctrl);
 	rxq_free_elts(rxq_ctrl);
@@ -782,7 +782,7 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
  *   0 on success, errno value on failure.
  */
 static inline int
-rxq_setup(struct rxq_ctrl *tmpl)
+rxq_setup(struct mlx5_rxq_ctrl *tmpl)
 {
 	struct ibv_cq *ibcq = tmpl->cq;
 	struct ibv_mlx5_cq_info cq_info;
@@ -839,12 +839,12 @@ rxq_setup(struct rxq_ctrl *tmpl)
  *   0 on success, errno value on failure.
  */
 int
-rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
-	       uint16_t desc, unsigned int socket,
-	       const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
+mlx5_rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
+		    uint16_t desc, unsigned int socket,
+		    const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct rxq_ctrl tmpl = {
+	struct mlx5_rxq_ctrl tmpl = {
 		.priv = priv,
 		.socket = socket,
 		.rxq = {
@@ -1062,7 +1062,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
 	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl);
-	rxq_cleanup(rxq_ctrl);
+	mlx5_rxq_cleanup(rxq_ctrl);
 	/* Move mbuf pointers to dedicated storage area in RX queue. */
 	elts = (void *)(rxq_ctrl + 1);
 	rte_memcpy(elts, tmpl.rxq.elts, sizeof(*elts));
@@ -1081,7 +1081,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
 	return 0;
 error:
 	elts = tmpl.rxq.elts;
-	rxq_cleanup(&tmpl);
+	mlx5_rxq_cleanup(&tmpl);
 	rte_free(elts);
 	assert(ret > 0);
 	return ret;
@@ -1112,8 +1112,9 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		    struct rte_mempool *mp)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct rxq *rxq = (*priv->rxqs)[idx];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	const uint16_t desc_n =
 		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
 	int ret;
@@ -1144,7 +1145,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			return -EEXIST;
 		}
 		(*priv->rxqs)[idx] = NULL;
-		rxq_cleanup(rxq_ctrl);
+		mlx5_rxq_cleanup(rxq_ctrl);
 		/* Resize if rxq size is changed. */
 		if (rxq_ctrl->rxq.elts_n != log2above(desc)) {
 			rxq_ctrl = rte_realloc(rxq_ctrl,
@@ -1170,7 +1171,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			return -ENOMEM;
 		}
 	}
-	ret = rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
+	ret = mlx5_rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
 	if (ret)
 		rte_free(rxq_ctrl);
 	else {
@@ -1192,8 +1193,8 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 void
 mlx5_rx_queue_release(void *dpdk_rxq)
 {
-	struct rxq *rxq = (struct rxq *)dpdk_rxq;
-	struct rxq_ctrl *rxq_ctrl;
+	struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
+	struct mlx5_rxq_ctrl *rxq_ctrl;
 	struct priv *priv;
 	unsigned int i;
 
@@ -1202,7 +1203,7 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 
 	if (rxq == NULL)
 		return;
-	rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
 	priv_lock(priv);
 	if (priv_flow_rxq_in_use(priv, rxq))
@@ -1215,7 +1216,7 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 			(*priv->rxqs)[i] = NULL;
 			break;
 		}
-	rxq_cleanup(rxq_ctrl);
+	mlx5_rxq_cleanup(rxq_ctrl);
 	rte_free(rxq_ctrl);
 	priv_unlock(priv);
 }
@@ -1249,9 +1250,9 @@ priv_rx_intr_vec_enable(struct priv *priv)
 	}
 	intr_handle->type = RTE_INTR_HANDLE_EXT;
 	for (i = 0; i != n; ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
-		struct rxq_ctrl *rxq_ctrl =
-			container_of(rxq, struct rxq_ctrl, rxq);
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
+		struct mlx5_rxq_ctrl *rxq_ctrl =
+			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 		int fd;
 		int flags;
 		int rc;
@@ -1325,8 +1326,9 @@ int
 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct rxq *rxq = (*priv->rxqs)[rx_queue_id];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	int ret;
 
 	if (!rxq || !rxq_ctrl->channel) {
@@ -1355,8 +1357,9 @@ int
 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct rxq *rxq = (*priv->rxqs)[rx_queue_id];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	struct ibv_cq *ev_cq;
 	void *ev_ctx;
 	int ret;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 59d9ce0..cd5182c 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -66,11 +66,11 @@ static __rte_always_inline uint32_t
 rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe);
 
 static __rte_always_inline int
-mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 		 uint16_t cqe_cnt, uint32_t *rss_hash);
 
 static __rte_always_inline uint32_t
-rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe);
+rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe);
 
 uint32_t mlx5_ptype_table[] __rte_cache_aligned = {
 	[0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */
@@ -282,7 +282,7 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
 int
 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct rxq *rxq = rx_queue;
+	struct mlx5_rxq_data *rxq = rx_queue;
 	struct rxq_zip *zip = &rxq->zip;
 	volatile struct mlx5_cqe *cqe;
 	const unsigned int cqe_n = (1 << rxq->cqe_n);
@@ -1619,7 +1619,7 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
  *   with error.
  */
 static inline int
-mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 		 uint16_t cqe_cnt, uint32_t *rss_hash)
 {
 	struct rxq_zip *zip = &rxq->zip;
@@ -1730,7 +1730,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
  *   Offload flags (ol_flags) for struct rte_mbuf.
  */
 static inline uint32_t
-rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
+rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
 {
 	uint32_t ol_flags = 0;
 	uint16_t flags = ntohs(cqe->hdr_type_etc);
@@ -1769,7 +1769,7 @@ rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
 uint16_t
 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct rxq *rxq = dpdk_rxq;
+	struct mlx5_rxq_data *rxq = dpdk_rxq;
 	const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
 	const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
 	const unsigned int sges_n = rxq->sges_n;
@@ -2008,7 +2008,7 @@ priv_check_vec_tx_support(struct priv *priv)
 }
 
 int __attribute__((weak))
-rxq_check_vec_support(struct rxq *rxq)
+rxq_check_vec_support(struct mlx5_rxq_data *rxq)
 {
 	(void)rxq;
 	return -ENOTSUP;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 91ff780..bd07b5d 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -89,7 +89,7 @@ struct rxq_zip {
 };
 
 /* RX queue descriptor. */
-struct rxq {
+struct mlx5_rxq_data {
 	unsigned int csum:1; /* Enable checksum offloading. */
 	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
 	unsigned int vlan_strip:1; /* Enable VLAN stripping. */
@@ -118,14 +118,14 @@ struct rxq {
 } __rte_cache_aligned;
 
 /* RX queue control descriptor. */
-struct rxq_ctrl {
+struct mlx5_rxq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_exp_wq *wq; /* Work Queue. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
 	struct ibv_comp_channel *channel;
 	unsigned int socket; /* CPU socket ID for allocations. */
-	struct rxq rxq; /* Data path structure. */
+	struct mlx5_rxq_data rxq; /* Data path structure. */
 };
 
 /* Hash RX queue types. */
@@ -289,10 +289,11 @@ int priv_create_hash_rxqs(struct priv *);
 void priv_destroy_hash_rxqs(struct priv *);
 int priv_allow_flow_type(struct priv *, enum hash_rxq_flow_type);
 int priv_rehash_flows(struct priv *);
-void rxq_cleanup(struct rxq_ctrl *);
-int rxq_ctrl_setup(struct rte_eth_dev *, struct rxq_ctrl *, uint16_t,
-		   unsigned int, const struct rte_eth_rxconf *,
-		   struct rte_mempool *);
+void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
+int mlx5_rxq_rehash(struct rte_eth_dev *, struct mlx5_rxq_ctrl *);
+int mlx5_rxq_ctrl_setup(struct rte_eth_dev *, struct mlx5_rxq_ctrl *,
+			uint16_t, unsigned int, const struct rte_eth_rxconf *,
+			struct rte_mempool *);
 int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_rxconf *, struct rte_mempool *);
 void mlx5_rx_queue_release(void *);
@@ -330,7 +331,7 @@ int mlx5_tx_descriptor_status(void *, uint16_t);
 /* Vectorized version of mlx5_rxtx.c */
 int priv_check_raw_vec_tx_support(struct priv *);
 int priv_check_vec_tx_support(struct priv *);
-int rxq_check_vec_support(struct rxq *);
+int rxq_check_vec_support(struct mlx5_rxq_data *);
 int priv_check_vec_rx_support(struct priv *);
 uint16_t mlx5_tx_burst_raw_vec(void *, struct rte_mbuf **, uint16_t);
 uint16_t mlx5_tx_burst_vec(void *, struct rte_mbuf **, uint16_t);
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index 290d6cf..245a58e 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -516,7 +516,7 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Number of packets to be stored.
  */
 static inline void
-rxq_copy_mbuf_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t n)
+rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n)
 {
 	const uint16_t q_mask = (1 << rxq->elts_n) - 1;
 	struct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask];
@@ -542,7 +542,7 @@ rxq_copy_mbuf_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t n)
  *   Number of buffers to be replenished.
  */
 static inline void
-rxq_replenish_bulk_mbuf(struct rxq *rxq, uint16_t n)
+rxq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)
 {
 	const uint16_t q_n = 1 << rxq->elts_n;
 	const uint16_t q_mask = q_n - 1;
@@ -580,7 +580,7 @@ rxq_replenish_bulk_mbuf(struct rxq *rxq, uint16_t n)
  *   the title completion descriptor to be copied to the rest of mbufs.
  */
 static inline void
-rxq_cq_decompress_v(struct rxq *rxq,
+rxq_cq_decompress_v(struct mlx5_rxq_data *rxq,
 		    volatile struct mlx5_cqe *cq,
 		    struct rte_mbuf **elts)
 {
@@ -739,7 +739,7 @@ rxq_cq_decompress_v(struct rxq *rxq,
  *   Pointer to array of packets to be filled.
  */
 static inline void
-rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
+rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], __m128i op_err,
 			 struct rte_mbuf **pkts)
 {
 	__m128i pinfo0, pinfo1;
@@ -873,7 +873,7 @@ rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
  *   Number of packets successfully received (<= pkts_n).
  */
 static uint16_t
-rxq_handle_pending_error(struct rxq *rxq, struct rte_mbuf **pkts,
+rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
 			 uint16_t pkts_n)
 {
 	uint16_t n = 0;
@@ -908,7 +908,7 @@ rxq_handle_pending_error(struct rxq *rxq, struct rte_mbuf **pkts,
  *   Number of packets received including errors (<= pkts_n).
  */
 static inline uint16_t
-rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
 	const uint16_t q_n = 1 << rxq->cqe_n;
 	const uint16_t q_mask = q_n - 1;
@@ -1254,7 +1254,7 @@ rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 uint16_t
 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct rxq *rxq = dpdk_rxq;
+	struct mlx5_rxq_data *rxq = dpdk_rxq;
 	uint16_t nb_rx;
 
 	nb_rx = rxq_burst_v(rxq, pkts, pkts_n);
@@ -1320,9 +1320,10 @@ priv_check_vec_tx_support(struct priv *priv)
  *   1 if supported, negative errno value if not.
  */
 int __attribute__((cold))
-rxq_check_vec_support(struct rxq *rxq)
+rxq_check_vec_support(struct mlx5_rxq_data *rxq)
 {
-	struct rxq_ctrl *ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_ctrl *ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 
 	if (!ctrl->priv->rx_vec_en || rxq->sges_n != 0)
 		return -ENOTSUP;
@@ -1347,7 +1348,7 @@ priv_check_vec_rx_support(struct priv *priv)
 		return -ENOTSUP;
 	/* All the configured queues should support. */
 	for (i = 0; i < priv->rxqs_n; ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
 
 		if (rxq_check_vec_support(rxq) < 0)
 			break;
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 7b45c8c..3c3db24 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -329,7 +329,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	priv_lock(priv);
 	/* Add software counters. */
 	for (i = 0; (i != priv->rxqs_n); ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
 
 		if (rxq == NULL)
 			continue;
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 353ae49..512052a 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -137,8 +137,9 @@ mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 static void
 priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
 {
-	struct rxq *rxq = (*priv->rxqs)[idx];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	struct ibv_exp_wq_attr mod;
 	uint16_t vlan_offloads =
 		(on ? IBV_EXP_RECEIVE_WQ_CVLAN_STRIP : 0) |
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 04/21] net/mlx5: prefix Tx control queue structures
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (3 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 03/21] net/mlx5: prefix Rx queue control structures Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 05/21] net/mlx5: remove redundant started flag Nelio Laranjeiro
                   ` (50 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
Prefix struct txq_ctrl with mlx5.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c              |  8 ++++----
 drivers/net/mlx5/mlx5.h              |  2 +-
 drivers/net/mlx5/mlx5_mr.c           | 20 +++++++++---------
 drivers/net/mlx5/mlx5_rxtx.c         | 25 ++++++++++++-----------
 drivers/net/mlx5/mlx5_rxtx.h         | 29 ++++++++++++++-------------
 drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 14 ++++++-------
 drivers/net/mlx5/mlx5_stats.c        |  2 +-
 drivers/net/mlx5/mlx5_txq.c          | 39 ++++++++++++++++++------------------
 8 files changed, 72 insertions(+), 67 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index d2fa8b1..c7bc65f 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -174,14 +174,14 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		/* XXX race condition if mlx5_tx_burst() is still running. */
 		usleep(1000);
 		for (i = 0; (i != priv->txqs_n); ++i) {
-			struct txq *txq = (*priv->txqs)[i];
-			struct txq_ctrl *txq_ctrl;
+			struct mlx5_txq_data *txq = (*priv->txqs)[i];
+			struct mlx5_txq_ctrl *txq_ctrl;
 
 			if (txq == NULL)
 				continue;
-			txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+			txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 			(*priv->txqs)[i] = NULL;
-			txq_cleanup(txq_ctrl);
+			mlx5_txq_cleanup(txq_ctrl);
 			rte_free(txq_ctrl);
 		}
 		priv->txqs_n = 0;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 55cea6f..155dd76 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -130,7 +130,7 @@ struct priv {
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
 	struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */
-	struct txq *(*txqs)[]; /* TX queues. */
+	struct mlx5_txq_data *(*txqs)[]; /* TX queues. */
 	/* Indirection tables referencing all RX WQs. */
 	struct ibv_exp_rwq_ind_table *(*ind_tables)[];
 	unsigned int ind_tables_n; /* Number of indirection tables. */
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 9593830..e8adde5 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -175,9 +175,11 @@ mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
  *   mr->lkey on success, (uint32_t)-1 on failure.
  */
 uint32_t
-txq_mp2mr_reg(struct txq *txq, struct rte_mempool *mp, unsigned int idx)
+mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
+		   unsigned int idx)
 {
-	struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
 	struct ibv_mr *mr;
 
 	/* Add a new entry, register MR first. */
@@ -229,8 +231,8 @@ struct txq_mp2mr_mbuf_check_data {
  *   Object index, unused.
  */
 static void
-txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
-	uint32_t index __rte_unused)
+mlx5_txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
+			  uint32_t index __rte_unused)
 {
 	struct txq_mp2mr_mbuf_check_data *data = arg;
 	struct rte_mbuf *buf = obj;
@@ -253,9 +255,9 @@ txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
  *   Pointer to TX queue structure.
  */
 void
-txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
+mlx5_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
 {
-	struct txq_ctrl *txq_ctrl = arg;
+	struct mlx5_txq_ctrl *txq_ctrl = arg;
 	struct txq_mp2mr_mbuf_check_data data = {
 		.ret = 0,
 	};
@@ -264,8 +266,8 @@ txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
 	unsigned int i;
 
 	/* Register mempool only if the first element looks like a mbuf. */
-	if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
-			data.ret == -1)
+	if (rte_mempool_obj_iter(mp, mlx5_txq_mp2mr_mbuf_check, &data) == 0 ||
+	    data.ret == -1)
 		return;
 	if (mlx5_check_mempool(mp, &start, &end) != 0) {
 		ERROR("mempool %p: not virtually contiguous",
@@ -283,5 +285,5 @@ txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
 		    end <= (uintptr_t)mr->addr + mr->length)
 			return;
 	}
-	txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
+	mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
 }
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index cd5182c..986e238 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -200,7 +200,7 @@ mlx5_set_ptype_table(void)
  *   Size of tailroom.
  */
 static inline size_t
-tx_mlx5_wq_tailroom(struct txq *txq, void *addr)
+tx_mlx5_wq_tailroom(struct mlx5_txq_data *txq, void *addr)
 {
 	size_t tailroom;
 	tailroom = (uintptr_t)(txq->wqes) +
@@ -258,7 +258,7 @@ mlx5_copy_to_wq(void *dst, const void *src, size_t n,
 int
 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
 {
-	struct txq *txq = tx_queue;
+	struct mlx5_txq_data *txq = tx_queue;
 	uint16_t used;
 
 	mlx5_tx_complete(txq);
@@ -334,7 +334,7 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
 uint16_t
 mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -731,7 +731,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Packet length.
  */
 static inline void
-mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
+mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
 	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 	volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
@@ -770,7 +770,7 @@ mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
  *   Pointer to MPW session structure.
  */
 static inline void
-mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_mpw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
 {
 	unsigned int num = mpw->pkts_n;
 
@@ -804,7 +804,7 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
 uint16_t
 mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -945,7 +945,8 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Packet length.
  */
 static inline void
-mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
+mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw,
+		    uint32_t length)
 {
 	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 	struct mlx5_wqe_inl_small *inl;
@@ -980,7 +981,7 @@ mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
  *   Pointer to MPW session structure.
  */
 static inline void
-mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_mpw_inline_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
 {
 	unsigned int size;
 	struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *)
@@ -1014,7 +1015,7 @@ uint16_t
 mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 			 uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -1237,7 +1238,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
  *   Packet length.
  */
 static inline void
-mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
+mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding)
 {
 	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 
@@ -1278,7 +1279,7 @@ mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
  *   Number of consumed WQEs.
  */
 static inline uint16_t
-mlx5_empw_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
 {
 	uint16_t ret;
 
@@ -1308,7 +1309,7 @@ mlx5_empw_close(struct txq *txq, struct mlx5_mpw *mpw)
 uint16_t
 mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index bd07b5d..12366c5 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -228,8 +228,8 @@ struct hash_rxq {
 };
 
 /* TX queue descriptor. */
-__extension__
-struct txq {
+RTE_STD_C11
+struct mlx5_txq_data {
 	uint16_t elts_head; /* Current counter in (*elts)[]. */
 	uint16_t elts_tail; /* Counter of first element awaiting completion. */
 	uint16_t elts_comp; /* Counter since last completion request. */
@@ -267,12 +267,12 @@ struct txq {
 } __rte_cache_aligned;
 
 /* TX queue control descriptor. */
-struct txq_ctrl {
+struct mlx5_txq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_qp *qp; /* Queue Pair. */
 	unsigned int socket; /* CPU socket ID for allocations. */
-	struct txq txq; /* Data path structure. */
+	struct mlx5_txq_data txq; /* Data path structure. */
 };
 
 /* mlx5_rxq.c */
@@ -306,9 +306,9 @@ int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 
 /* mlx5_txq.c */
 
-void txq_cleanup(struct txq_ctrl *);
-int txq_ctrl_setup(struct rte_eth_dev *, struct txq_ctrl *, uint16_t,
-		   unsigned int, const struct rte_eth_txconf *);
+void mlx5_txq_cleanup(struct mlx5_txq_ctrl *);
+int mlx5_txq_ctrl_setup(struct rte_eth_dev *, struct mlx5_txq_ctrl *, uint16_t,
+			unsigned int, const struct rte_eth_txconf *);
 int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_txconf *);
 void mlx5_tx_queue_release(void *);
@@ -340,8 +340,9 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
 /* mlx5_mr.c */
 
 struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *);
-void txq_mp2mr_iter(struct rte_mempool *, void *);
-uint32_t txq_mp2mr_reg(struct txq *, struct rte_mempool *, unsigned int);
+void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *);
+uint32_t mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
+			    unsigned int);
 
 #ifndef NDEBUG
 /**
@@ -431,7 +432,7 @@ check_cqe(volatile struct mlx5_cqe *cqe,
  *   WQE address.
  */
 static inline uintptr_t *
-tx_mlx5_wqe(struct txq *txq, uint16_t ci)
+tx_mlx5_wqe(struct mlx5_txq_data *txq, uint16_t ci)
 {
 	ci &= ((1 << txq->wqe_n) - 1);
 	return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
@@ -446,7 +447,7 @@ tx_mlx5_wqe(struct txq *txq, uint16_t ci)
  *   Pointer to TX queue structure.
  */
 static __rte_always_inline void
-mlx5_tx_complete(struct txq *txq)
+mlx5_tx_complete(struct mlx5_txq_data *txq)
 {
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -546,7 +547,7 @@ mlx5_tx_mb2mp(struct rte_mbuf *buf)
  *   mr->lkey on success, (uint32_t)-1 on failure.
  */
 static __rte_always_inline uint32_t
-mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
+mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
 {
 	uint16_t i = txq->mr_cache_idx;
 	uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
@@ -569,7 +570,7 @@ mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
 		}
 	}
 	txq->mr_cache_idx = 0;
-	return txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+	return mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
 }
 
 /**
@@ -581,7 +582,7 @@ mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
  *   Pointer to the last WQE posted in the NIC.
  */
 static __rte_always_inline void
-mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
+mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe)
 {
 	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index 245a58e..fb96542 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -77,7 +77,7 @@
  *   Number of packets to be filled.
  */
 static inline void
-txq_wr_dseg_v(struct txq *txq, __m128i *dseg,
+txq_wr_dseg_v(struct mlx5_txq_data *txq, __m128i *dseg,
 	      struct rte_mbuf **pkts, unsigned int n)
 {
 	unsigned int pos;
@@ -154,7 +154,7 @@ txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Number of packets having same ol_flags.
  */
 static inline unsigned int
-txq_calc_offload(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
+txq_calc_offload(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
 		 uint8_t *cs_flags)
 {
 	unsigned int pos;
@@ -205,7 +205,7 @@ txq_calc_offload(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
  *   Number of packets successfully transmitted (<= pkts_n).
  */
 static uint16_t
-txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
@@ -331,7 +331,7 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Number of packets successfully transmitted (<= pkts_n).
  */
 static inline uint16_t
-txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
+txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	    uint8_t cs_flags)
 {
 	struct rte_mbuf **elts;
@@ -446,7 +446,7 @@ uint16_t
 mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
 		      uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t nb_tx = 0;
 
 	while (pkts_n > nb_tx) {
@@ -478,7 +478,7 @@ mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
 uint16_t
 mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t nb_tx = 0;
 
 	while (pkts_n > nb_tx) {
@@ -1279,7 +1279,7 @@ priv_check_raw_vec_tx_support(struct priv *priv)
 
 	/* All the configured queues should support. */
 	for (i = 0; i < priv->txqs_n; ++i) {
-		struct txq *txq = (*priv->txqs)[i];
+		struct mlx5_txq_data *txq = (*priv->txqs)[i];
 
 		if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) ||
 		    !(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 3c3db24..c188488 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -350,7 +350,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		tmp.rx_nombuf += rxq->stats.rx_nombuf;
 	}
 	for (i = 0; (i != priv->txqs_n); ++i) {
-		struct txq *txq = (*priv->txqs)[i];
+		struct mlx5_txq_data *txq = (*priv->txqs)[i];
 
 		if (txq == NULL)
 			continue;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4b0b532..5384b51 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -67,7 +67,7 @@
  *   Number of elements to allocate.
  */
 static void
-txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
+txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl, unsigned int elts_n)
 {
 	unsigned int i;
 
@@ -93,7 +93,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
  *   Pointer to TX queue structure.
  */
 static void
-txq_free_elts(struct txq_ctrl *txq_ctrl)
+txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 {
 	const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -130,7 +130,7 @@ txq_free_elts(struct txq_ctrl *txq_ctrl)
  *   Pointer to TX queue structure.
  */
 void
-txq_cleanup(struct txq_ctrl *txq_ctrl)
+mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl)
 {
 	size_t i;
 
@@ -160,7 +160,7 @@ txq_cleanup(struct txq_ctrl *txq_ctrl)
  *   0 on success, errno value on failure.
  */
 static inline int
-txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
+txq_setup(struct mlx5_txq_ctrl *tmpl, struct mlx5_txq_ctrl *txq_ctrl)
 {
 	struct mlx5_qp *qp = to_mqp(tmpl->qp);
 	struct ibv_cq *ibcq = tmpl->cq;
@@ -209,12 +209,12 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
  *   0 on success, errno value on failure.
  */
 int
-txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
-	       uint16_t desc, unsigned int socket,
-	       const struct rte_eth_txconf *conf)
+mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
+		    uint16_t desc, unsigned int socket,
+		    const struct rte_eth_txconf *conf)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct txq_ctrl tmpl = {
+	struct mlx5_txq_ctrl tmpl = {
 		.priv = priv,
 		.socket = socket,
 	};
@@ -381,15 +381,15 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
 	}
 	/* Clean up txq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old txq just in case", (void *)txq_ctrl);
-	txq_cleanup(txq_ctrl);
+	mlx5_txq_cleanup(txq_ctrl);
 	*txq_ctrl = tmpl;
 	DEBUG("%p: txq updated with %p", (void *)txq_ctrl, (void *)&tmpl);
 	/* Pre-register known mempools. */
-	rte_mempool_walk(txq_mp2mr_iter, txq_ctrl);
+	rte_mempool_walk(mlx5_txq_mp2mr_iter, txq_ctrl);
 	assert(ret == 0);
 	return 0;
 error:
-	txq_cleanup(&tmpl);
+	mlx5_txq_cleanup(&tmpl);
 	assert(ret > 0);
 	return ret;
 }
@@ -416,8 +416,9 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		    unsigned int socket, const struct rte_eth_txconf *conf)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct txq *txq = (*priv->txqs)[idx];
-	struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+	struct mlx5_txq_data *txq = (*priv->txqs)[idx];
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
 	int ret;
 
 	if (mlx5_is_secondary())
@@ -453,7 +454,7 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			return -EEXIST;
 		}
 		(*priv->txqs)[idx] = NULL;
-		txq_cleanup(txq_ctrl);
+		mlx5_txq_cleanup(txq_ctrl);
 		/* Resize if txq size is changed. */
 		if (txq_ctrl->txq.elts_n != log2above(desc)) {
 			txq_ctrl = rte_realloc(txq_ctrl,
@@ -480,7 +481,7 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			return -ENOMEM;
 		}
 	}
-	ret = txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
+	ret = mlx5_txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
 	if (ret)
 		rte_free(txq_ctrl);
 	else {
@@ -502,8 +503,8 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 void
 mlx5_tx_queue_release(void *dpdk_txq)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
-	struct txq_ctrl *txq_ctrl;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
 	struct priv *priv;
 	unsigned int i;
 
@@ -512,7 +513,7 @@ mlx5_tx_queue_release(void *dpdk_txq)
 
 	if (txq == NULL)
 		return;
-	txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+	txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 	priv = txq_ctrl->priv;
 	priv_lock(priv);
 	for (i = 0; (i != priv->txqs_n); ++i)
@@ -522,7 +523,7 @@ mlx5_tx_queue_release(void *dpdk_txq)
 			(*priv->txqs)[i] = NULL;
 			break;
 		}
-	txq_cleanup(txq_ctrl);
+	mlx5_txq_cleanup(txq_ctrl);
 	rte_free(txq_ctrl);
 	priv_unlock(priv);
 }
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 05/21] net/mlx5: remove redundant started flag
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (4 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 04/21] net/mlx5: prefix Tx control queue structures Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 06/21] net/mlx5: verify all flows are been removed on close Nelio Laranjeiro
                   ` (49 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
This flag is already present in the Ethernet device.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.h         |  1 -
 drivers/net/mlx5/mlx5_flow.c    |  6 +++---
 drivers/net/mlx5/mlx5_rxq.c     |  2 +-
 drivers/net/mlx5/mlx5_trigger.c | 12 ------------
 drivers/net/mlx5/mlx5_txq.c     |  2 +-
 5 files changed, 5 insertions(+), 18 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 155dd76..40c3475 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -103,7 +103,6 @@ struct priv {
 	/* Device properties. */
 	uint16_t mtu; /* Configured MTU. */
 	uint8_t port; /* Physical port number. */
-	unsigned int started:1; /* Device started, flows enabled. */
 	unsigned int promisc_req:1; /* Promiscuous mode requested. */
 	unsigned int allmulti_req:1; /* All multicast mode requested. */
 	unsigned int hw_csum:1; /* Checksum offload is supported. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 77b85a6..996fe31 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1053,7 +1053,7 @@ priv_flow_create_action_queue_drop(struct priv *priv,
 	++flow->ibv_attr->num_of_specs;
 	flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
 	rte_flow->ibv_attr = flow->ibv_attr;
-	if (!priv->started)
+	if (!priv->dev->data->dev_started)
 		return rte_flow;
 	rte_flow->qp = priv->flow_drop_queue->qp;
 	rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
@@ -1161,7 +1161,7 @@ priv_flow_create_action_queue(struct priv *priv,
 				   NULL, "cannot allocate QP");
 		goto error;
 	}
-	if (!priv->started)
+	if (!priv->dev->data->dev_started)
 		return rte_flow;
 	rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
 						 rte_flow->ibv_attr);
@@ -1627,7 +1627,7 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 	struct priv *priv = dev->data->dev_private;
 
 	priv_lock(priv);
-	if (priv->started) {
+	if (dev->data->dev_started) {
 		rte_flow_error_set(error, EBUSY,
 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 				   NULL,
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index c09a554..88a024c 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1140,7 +1140,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	if (rxq != NULL) {
 		DEBUG("%p: reusing already allocated queue index %u (%p)",
 		      (void *)dev, idx, (void *)rxq);
-		if (priv->started) {
+		if (dev->data->dev_started) {
 			priv_unlock(priv);
 			return -EEXIST;
 		}
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 3b17678..a4a0acd 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -61,10 +61,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		return -E_RTE_SECONDARY;
 
 	priv_lock(priv);
-	if (priv->started) {
-		priv_unlock(priv);
-		return 0;
-	}
 	/* Update Rx/Tx callback. */
 	priv_select_tx_function(priv);
 	priv_select_rx_function(priv);
@@ -72,8 +68,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	err = priv_create_hash_rxqs(priv);
 	if (!err)
 		err = priv_rehash_flows(priv);
-	if (!err)
-		priv->started = 1;
 	else {
 		ERROR("%p: an error occurred while configuring hash RX queues:"
 		      " %s",
@@ -82,7 +76,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	}
 	err = priv_flow_start(priv);
 	if (err) {
-		priv->started = 0;
 		ERROR("%p: an error occurred while configuring flows:"
 		      " %s",
 		      (void *)priv, strerror(err));
@@ -125,10 +118,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 		return;
 
 	priv_lock(priv);
-	if (!priv->started) {
-		priv_unlock(priv);
-		return;
-	}
 	DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
@@ -136,6 +125,5 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_flow_stop(priv);
 	priv_rx_intr_vec_disable(priv);
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-	priv->started = 0;
 	priv_unlock(priv);
 }
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5384b51..3f6702a 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -449,7 +449,7 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	if (txq != NULL) {
 		DEBUG("%p: reusing already allocated queue index %u (%p)",
 		      (void *)dev, idx, (void *)txq);
-		if (priv->started) {
+		if (dev->data->dev_started) {
 			priv_unlock(priv);
 			return -EEXIST;
 		}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 06/21] net/mlx5: verify all flows are been removed on close
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (5 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 05/21] net/mlx5: remove redundant started flag Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 07/21] net/mlx5: add reference counter on memory region Nelio Laranjeiro
                   ` (48 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
Debug tools to verify all flows are be un-register from the NIC.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c      |  3 +++
 drivers/net/mlx5/mlx5.h      |  1 +
 drivers/net/mlx5/mlx5_flow.c | 22 ++++++++++++++++++++++
 3 files changed, 26 insertions(+)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index c7bc65f..0d8ca52 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -200,6 +200,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	}
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
+	i = priv_flow_verify(priv);
+	if (i)
+		WARN("%p: some flows still remain", (void*)priv);
 	priv_unlock(priv);
 	memset(priv, 0, sizeof(*priv));
 }
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 40c3475..a5e9aa1 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -287,5 +287,6 @@ int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
 int priv_flow_start(struct priv *);
 void priv_flow_stop(struct priv *);
 int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *);
+int priv_flow_verify(struct priv *);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 996fe31..bcbb984 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1639,3 +1639,25 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 	priv_unlock(priv);
 	return 0;
 }
+
+/**
+ * Verify the flow list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+priv_flow_verify(struct priv *priv)
+{
+	struct rte_flow *flow;
+	int ret = 0;
+
+	TAILQ_FOREACH(flow, &priv->flows, next) {
+		DEBUG("%p: flow %p still referenced", (void*)priv,
+		      (void*)flow);
+		++ret;
+	}
+	return ret;
+}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 07/21] net/mlx5: add reference counter on memory region
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (6 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 06/21] net/mlx5: verify all flows are been removed on close Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 08/21] net/mlx5: separate DPDK from Verbs Rx queue objects Nelio Laranjeiro
                   ` (47 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
Memory regions becomes shared by the same time, the control plane as it own
reference in addition of the Tx/Rx queues.
This should also avoid to un-register a memory region when the Tx queue
evicts it from its local cache.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.h      |   8 ++
 drivers/net/mlx5/mlx5_mr.c   | 202 ++++++++++++++++++++++++++++++-------------
 drivers/net/mlx5/mlx5_rxq.c  |  17 ++--
 drivers/net/mlx5/mlx5_rxtx.h |  42 +++++----
 drivers/net/mlx5/mlx5_txq.c  |   8 +-
 5 files changed, 186 insertions(+), 91 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index a5e9aa1..1ae5f59 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -145,6 +145,7 @@ struct priv {
 	unsigned int reta_idx_n; /* RETA index size. */
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
+	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
@@ -289,4 +290,11 @@ void priv_flow_stop(struct priv *);
 int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *);
 int priv_flow_verify(struct priv *);
 
+/* mlx5_mr.c */
+
+struct mlx5_mr* priv_mr_new(struct priv *, struct rte_mempool *);
+struct mlx5_mr* priv_mr_get(struct priv *, struct rte_mempool *);
+int priv_mr_release(struct priv *, struct mlx5_mr *);
+int priv_mr_verify(struct priv *);
+
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index e8adde5..b5e9500 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -42,6 +42,7 @@
 #endif
 
 #include <rte_mempool.h>
+#include <rte_malloc.h>
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
@@ -111,54 +112,6 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
 }
 
 /**
- * Register mempool as a memory region.
- *
- * @param pd
- *   Pointer to protection domain.
- * @param mp
- *   Pointer to memory pool.
- *
- * @return
- *   Memory region pointer, NULL in case of error.
- */
-struct ibv_mr *
-mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
-{
-	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-	uintptr_t start;
-	uintptr_t end;
-	unsigned int i;
-
-	if (mlx5_check_mempool(mp, &start, &end) != 0) {
-		ERROR("mempool %p: not virtually contiguous",
-		      (void *)mp);
-		return NULL;
-	}
-
-	DEBUG("mempool %p area start=%p end=%p size=%zu",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	/* Round start and end to page boundary if found in memory segments. */
-	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
-		uintptr_t addr = (uintptr_t)ms[i].addr;
-		size_t len = ms[i].len;
-		unsigned int align = ms[i].hugepage_sz;
-
-		if ((start > addr) && (start < addr + len))
-			start = RTE_ALIGN_FLOOR(start, align);
-		if ((end > addr) && (end < addr + len))
-			end = RTE_ALIGN_CEIL(end, align);
-	}
-	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	return ibv_reg_mr(pd,
-			  (void *)start,
-			  end - start,
-			  IBV_ACCESS_LOCAL_WRITE);
-}
-
-/**
  * Register a Memory Region (MR) <-> Memory Pool (MP) association in
  * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
  *
@@ -180,12 +133,14 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
 {
 	struct mlx5_txq_ctrl *txq_ctrl =
 		container_of(txq, struct mlx5_txq_ctrl, txq);
-	struct ibv_mr *mr;
+	struct mlx5_mr *mr;
 
 	/* Add a new entry, register MR first. */
 	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
 	      (void *)txq_ctrl, mp->name, (void *)mp);
-	mr = mlx5_mp2mr(txq_ctrl->priv->pd, mp);
+	mr = priv_mr_get(txq_ctrl->priv, mp);
+	if (mr == NULL)
+		mr = priv_mr_new(txq_ctrl->priv, mp);
 	if (unlikely(mr == NULL)) {
 		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
 		      (void *)txq_ctrl);
@@ -196,20 +151,17 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
 		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
 		      (void *)txq_ctrl);
 		--idx;
-		claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[0].mr));
+		priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[0]);
 		memmove(&txq_ctrl->txq.mp2mr[0], &txq_ctrl->txq.mp2mr[1],
 			(sizeof(txq_ctrl->txq.mp2mr) -
 			 sizeof(txq_ctrl->txq.mp2mr[0])));
 	}
 	/* Store the new entry. */
-	txq_ctrl->txq.mp2mr[idx].start = (uintptr_t)mr->addr;
-	txq_ctrl->txq.mp2mr[idx].end = (uintptr_t)mr->addr + mr->length;
-	txq_ctrl->txq.mp2mr[idx].mr = mr;
-	txq_ctrl->txq.mp2mr[idx].lkey = htonl(mr->lkey);
+	txq_ctrl->txq.mp2mr[idx] = mr;
 	DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
 	      (void *)txq_ctrl, mp->name, (void *)mp,
-	      txq_ctrl->txq.mp2mr[idx].lkey);
-	return txq_ctrl->txq.mp2mr[idx].lkey;
+	      txq_ctrl->txq.mp2mr[idx]->lkey);
+	return txq_ctrl->txq.mp2mr[idx]->lkey;
 }
 
 struct txq_mp2mr_mbuf_check_data {
@@ -275,15 +227,141 @@ mlx5_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
 		return;
 	}
 	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-		struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;
-
-		if (unlikely(mr == NULL)) {
+		if (unlikely(txq_ctrl->txq.mp2mr[i] == NULL)) {
 			/* Unknown MP, add a new MR for it. */
 			break;
 		}
-		if (start >= (uintptr_t)mr->addr &&
-		    end <= (uintptr_t)mr->addr + mr->length)
+		if (start >= (uintptr_t)txq_ctrl->txq.mp2mr[i]->start &&
+		    end <= (uintptr_t)txq_ctrl->txq.mp2mr[i]->end)
 			return;
 	}
 	mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
 }
+
+/**
+ * Register a new memory region from the mempool and store it in the memory
+ * region list.
+ *
+ * @param  priv
+ *   Pointer to private structure.
+ * @param mp
+ *   Pointer to the memory pool to register.
+ * @return
+ *   The memory region on success.
+ */
+struct mlx5_mr*
+priv_mr_new(struct priv *priv, struct rte_mempool *mp)
+{
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+	uintptr_t start;
+	uintptr_t end;
+	unsigned int i;
+	struct mlx5_mr *mr;
+
+	mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id);
+	if (!mr) {
+		DEBUG("unable to configure MR, ibv_reg_mr() failed.");
+		return NULL;
+	}
+	if (mlx5_check_mempool(mp, &start, &end) != 0) {
+		ERROR("mempool %p: not virtually contiguous",
+		      (void *)mp);
+		return NULL;
+	}
+	DEBUG("mempool %p area start=%p end=%p size=%zu",
+	      (void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	/* Round start and end to page boundary if found in memory segments. */
+	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
+		uintptr_t addr = (uintptr_t)ms[i].addr;
+		size_t len = ms[i].len;
+		unsigned int align = ms[i].hugepage_sz;
+
+		if ((start > addr) && (start < addr + len))
+			start = RTE_ALIGN_FLOOR(start, align);
+		if ((end > addr) && (end < addr + len))
+			end = RTE_ALIGN_CEIL(end, align);
+	}
+	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
+	      (void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start,
+			    IBV_ACCESS_LOCAL_WRITE);
+	mr->mp = mp;
+	mr->lkey = htonl(mr->mr->lkey);
+	mr->start = start;
+	mr->end = (uintptr_t)mr->mr->addr + mr->mr->length;
+	rte_atomic32_inc(&mr->refcnt);
+	LIST_INSERT_HEAD(&priv->mr, mr, next);
+	return mr;
+}
+
+/**
+ * Search the memory region object in the memory region list.
+ *
+ * @param  priv
+ *   Pointer to private structure.
+ * @param mp
+ *   Pointer to the memory pool to register.
+ * @return
+ *   The memory region on success.
+ */
+struct mlx5_mr*
+priv_mr_get(struct priv *priv, struct rte_mempool *mp)
+{
+	struct mlx5_mr *mr;
+
+	if (LIST_EMPTY(&priv->mr))
+		return NULL;
+	LIST_FOREACH(mr, &priv->mr, next) {
+		if (mr->mp == mp) {
+			rte_atomic32_inc(&mr->refcnt);
+			return mr;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * Release the memory region object.
+ *
+ * @param  mr
+ *   Pointer to memory region to release.
+ *
+ * @return
+ *   0 on success, errno on failure.
+ */
+int
+priv_mr_release(struct priv *priv, struct mlx5_mr *mr)
+{
+	(void)priv;
+	if (rte_atomic32_dec_and_test(&mr->refcnt)) {
+		claim_zero(ibv_dereg_mr(mr->mr));
+		LIST_REMOVE(mr, next);
+		rte_free(mr);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the flow list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+priv_mr_verify(struct priv *priv)
+{
+	int ret = 0;
+	struct mlx5_mr *mr;
+
+	LIST_FOREACH(mr, &priv->mr, next) {
+		DEBUG("%p: mr %p still referenced", (void*)priv,
+		      (void*)mr);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 88a024c..80cfd96 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -674,7 +674,7 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 		*scat = (struct mlx5_wqe_data_seg){
 			.addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
 			.byte_count = htonl(DATA_LEN(buf)),
-			.lkey = htonl(rxq_ctrl->mr->lkey),
+			.lkey = rxq_ctrl->mr->lkey,
 		};
 		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
@@ -768,7 +768,7 @@ mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 	if (rxq_ctrl->channel != NULL)
 		claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel));
 	if (rxq_ctrl->mr != NULL)
-		claim_zero(ibv_dereg_mr(rxq_ctrl->mr));
+		priv_mr_release(rxq_ctrl->priv, rxq_ctrl->mr);
 	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
 }
 
@@ -920,12 +920,15 @@ mlx5_rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 		tmpl.rxq.csum_l2tun =
 			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
 	/* Use the entire RX mempool as the memory region. */
-	tmpl.mr = mlx5_mp2mr(priv->pd, mp);
+	tmpl.mr = priv_mr_get(priv, mp);
 	if (tmpl.mr == NULL) {
-		ret = EINVAL;
-		ERROR("%p: MR creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
+		tmpl.mr = priv_mr_new(priv, mp);
+		if (tmpl.mr == NULL) {
+			ret = EINVAL;
+			ERROR("%p: MR creation failure: %s",
+			      (void *)dev, strerror(ret));
+			goto error;
+		}
 	}
 	if (dev->data->dev_conf.intr_conf.rxq) {
 		tmpl.channel = ibv_create_comp_channel(priv->ctx);
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 12366c5..c7c7518 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -36,6 +36,7 @@
 
 #include <stddef.h>
 #include <stdint.h>
+#include <sys/queue.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -51,6 +52,7 @@
 #include <rte_mbuf.h>
 #include <rte_mempool.h>
 #include <rte_common.h>
+#include <rte_atomic.h>
 
 #include "mlx5_utils.h"
 #include "mlx5.h"
@@ -79,6 +81,17 @@ struct mlx5_txq_stats {
 
 struct priv;
 
+/* Memory region queue object. */
+struct mlx5_mr {
+	LIST_ENTRY(mlx5_mr) next; /**< Pointer to the next element. */
+	rte_atomic32_t refcnt; /*<< Reference counter. */
+	uint32_t lkey; /*<< htonl(mr->lkey) */
+	uintptr_t start; /* Start address of MR */
+	uintptr_t end; /* End address of MR */
+	struct ibv_mr *mr; /*<< Memory Region. */
+	struct rte_mempool *mp; /*<< Memory Pool. */
+};
+
 /* Compressed CQE context. */
 struct rxq_zip {
 	uint16_t ai; /* Array index. */
@@ -122,7 +135,7 @@ struct mlx5_rxq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_exp_wq *wq; /* Work Queue. */
-	struct ibv_mr *mr; /* Memory Region (for mp). */
+	struct mlx5_mr *mr; /* Memory Region (for mp). */
 	struct ibv_comp_channel *channel;
 	unsigned int socket; /* CPU socket ID for allocations. */
 	struct mlx5_rxq_data rxq; /* Data path structure. */
@@ -248,6 +261,7 @@ struct mlx5_txq_data {
 	uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
 	uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
 	uint16_t inline_max_packet_sz; /* Max packet size for inlining. */
+	uint16_t mr_cache_idx; /* Index of last hit entry. */
 	uint32_t qp_num_8s; /* QP number shifted by 8. */
 	uint32_t flags; /* Flags for Tx Queue. */
 	volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
@@ -255,13 +269,7 @@ struct mlx5_txq_data {
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	volatile void *bf_reg; /* Blueflame register. */
-	struct {
-		uintptr_t start; /* Start address of MR */
-		uintptr_t end; /* End address of MR */
-		struct ibv_mr *mr; /* Memory Region (for mp). */
-		uint32_t lkey; /* htonl(mr->lkey) */
-	} mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
-	uint16_t mr_cache_idx; /* Index of last hit entry. */
+	struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 } __rte_cache_aligned;
@@ -553,20 +561,20 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
 	uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
 
 	assert(i < RTE_DIM(txq->mp2mr));
-	if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
-		return txq->mp2mr[i].lkey;
+	if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr))
+		return txq->mp2mr[i]->lkey;
 	for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
-		if (unlikely(txq->mp2mr[i].mr == NULL)) {
+		if (unlikely(txq->mp2mr[i]->mr == NULL)) {
 			/* Unknown MP, add a new MR for it. */
 			break;
 		}
-		if (txq->mp2mr[i].start <= addr &&
-		    txq->mp2mr[i].end >= addr) {
-			assert(txq->mp2mr[i].lkey != (uint32_t)-1);
-			assert(htonl(txq->mp2mr[i].mr->lkey) ==
-			       txq->mp2mr[i].lkey);
+		if (txq->mp2mr[i]->start <= addr &&
+		    txq->mp2mr[i]->end >= addr) {
+			assert(txq->mp2mr[i]->lkey != (uint32_t)-1);
+			assert(htonl(txq->mp2mr[i]->mr->lkey) ==
+			       txq->mp2mr[i]->lkey);
 			txq->mr_cache_idx = i;
-			return txq->mp2mr[i].lkey;
+			return txq->mp2mr[i]->lkey;
 		}
 	}
 	txq->mr_cache_idx = 0;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 3f6702a..ce826dd 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -140,11 +140,9 @@ mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl)
 		claim_zero(ibv_destroy_qp(txq_ctrl->qp));
 	if (txq_ctrl->cq != NULL)
 		claim_zero(ibv_destroy_cq(txq_ctrl->cq));
-	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-		if (txq_ctrl->txq.mp2mr[i].mr == NULL)
-			break;
-		claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[i].mr));
-	}
+	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i)
+		if (txq_ctrl->txq.mp2mr[i])
+			priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[i]);
 	memset(txq_ctrl, 0, sizeof(*txq_ctrl));
 }
 
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 08/21] net/mlx5: separate DPDK from Verbs Rx queue objects
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (7 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 07/21] net/mlx5: add reference counter on memory region Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 09/21] net/mlx5: separate DPDK from Verbs Tx " Nelio Laranjeiro
                   ` (46 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c      |   3 +
 drivers/net/mlx5/mlx5.h      |   2 +-
 drivers/net/mlx5/mlx5_flow.c |  97 +++-----
 drivers/net/mlx5/mlx5_rxq.c  | 564 ++++++++++++++++++++++++++-----------------
 drivers/net/mlx5/mlx5_rxtx.h |  26 +-
 drivers/net/mlx5/mlx5_vlan.c |   2 +-
 6 files changed, 401 insertions(+), 293 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 0d8ca52..c158d8e 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -200,6 +200,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	}
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
+	i = mlx5_priv_rxq_ibv_verify(priv);
+	if (i)
+		WARN("%p: some Verbs Rx queue still remain", (void*)priv);
 	i = priv_flow_verify(priv);
 	if (i)
 		WARN("%p: some flows still remain", (void*)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1ae5f59..228fd34 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -146,6 +146,7 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
+	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
@@ -287,7 +288,6 @@ int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
 int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
 int priv_flow_start(struct priv *);
 void priv_flow_stop(struct priv *);
-int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *);
 int priv_flow_verify(struct priv *);
 
 /* mlx5_mr.c */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index bcbb984..9ed8d05 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -95,11 +95,11 @@ struct rte_flow {
 	struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
 	struct ibv_exp_wq *wq; /**< Verbs work queue. */
 	struct ibv_cq *cq; /**< Verbs completion queue. */
-	uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
-	struct mlx5_rxq_data *rxqs[]; /**< Pointer to the queues array. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< List of queues. */
+	uint16_t queues_n; /**< Number of queues in the list. */
 };
 
 /** Static initializer for items. */
@@ -1097,23 +1097,21 @@ priv_flow_create_action_queue(struct priv *priv,
 	assert(priv->pd);
 	assert(priv->ctx);
 	assert(!flow->actions.drop);
-	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
-			      sizeof(*rte_flow->rxqs) * flow->actions.queues_n,
-			      0);
+	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
 	if (!rte_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
 	for (i = 0; i < flow->actions.queues_n; ++i) {
-		struct mlx5_rxq_ctrl *rxq;
+		struct mlx5_rxq_ibv *rxq =
+			mlx5_priv_rxq_ibv_get(priv, flow->actions.queues[i]);
 
-		rxq = container_of((*priv->rxqs)[flow->actions.queues[i]],
-				   struct mlx5_rxq_ctrl, rxq);
 		wqs[i] = rxq->wq;
-		rte_flow->rxqs[i] = &rxq->rxq;
-		++rte_flow->rxqs_n;
-		rxq->rxq.mark |= flow->actions.mark;
+		rte_flow->queues[i] = flow->actions.queues[i];
+		++rte_flow->queues_n;
+		(*priv->rxqs)[flow->actions.queues[i]]->mark |=
+			flow->actions.mark;
 	}
 	/* finalise indirection table. */
 	for (j = 0; i < wqs_n; ++i, ++j) {
@@ -1294,6 +1292,8 @@ static void
 priv_flow_destroy(struct priv *priv,
 		  struct rte_flow *flow)
 {
+	unsigned int i;
+
 	TAILQ_REMOVE(&priv->flows, flow, next);
 	if (flow->ibv_flow)
 		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
@@ -1303,37 +1303,33 @@ priv_flow_destroy(struct priv *priv,
 		claim_zero(ibv_destroy_qp(flow->qp));
 	if (flow->ind_table)
 		claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
-	if (flow->mark) {
+	for (i = 0; i != flow->queues_n; ++i) {
 		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq;
-		uint32_t mark_n = 0;
-		uint32_t queue_n;
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[flow->queues[i]];
+		struct mlx5_rxq_ctrl *rxq_ctrl =
+			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 
 		/*
 		 * To remove the mark from the queue, the queue must not be
 		 * present in any other marked flow (RSS or not).
 		 */
-		for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
-			rxq = flow->rxqs[queue_n];
-			for (tmp = TAILQ_FIRST(&priv->flows);
-			     tmp;
-			     tmp = TAILQ_NEXT(tmp, next)) {
-				uint32_t tqueue_n;
+		if (flow->mark) {
+			int mark = 0;
+
+			TAILQ_FOREACH(tmp, &priv->flows, next) {
+				unsigned int j;
 
 				if (tmp->drop)
 					continue;
-				for (tqueue_n = 0;
-				     tqueue_n < tmp->rxqs_n;
-				     ++tqueue_n) {
-					struct mlx5_rxq_data *trxq;
-
-					trxq = tmp->rxqs[tqueue_n];
-					if (rxq == trxq)
-						++mark_n;
-				}
+				if (!tmp->mark)
+					continue;
+				for (j = 0; (j != tmp->queues_n) && !mark; j++)
+					if (tmp->queues[j] == flow->queues[i])
+						mark = 1;
 			}
-			rxq->mark = !!mark_n;
+			rxq->mark = mark;
 		}
+		mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
 	}
 free:
 	rte_free(flow->ibv_attr);
@@ -1532,8 +1528,8 @@ priv_flow_stop(struct priv *priv)
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->rxqs_n; ++n)
-				flow->rxqs[n]->mark = 0;
+			for (n = 0; n < flow->queues_n; ++n)
+				(*priv->rxqs)[flow->queues[n]]->mark = 0;
 		}
 		DEBUG("Flow %p removed", (void *)flow);
 	}
@@ -1575,39 +1571,8 @@ priv_flow_start(struct priv *priv)
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->rxqs_n; ++n)
-				flow->rxqs[n]->mark = 1;
-		}
-	}
-	return 0;
-}
-
-/**
- * Verify if the Rx queue is used in a flow.
- *
- * @param priv
- *   Pointer to private structure.
- * @param rxq
- *   Pointer to the queue to search.
- *
- * @return
- *   Nonzero if the queue is used by a flow.
- */
-int
-priv_flow_rxq_in_use(struct priv *priv, struct mlx5_rxq_data *rxq)
-{
-	struct rte_flow *flow;
-
-	for (flow = TAILQ_FIRST(&priv->flows);
-	     flow;
-	     flow = TAILQ_NEXT(flow, next)) {
-		unsigned int n;
-
-		if (flow->drop)
-			continue;
-		for (n = 0; n < flow->rxqs_n; ++n) {
-			if (flow->rxqs[n] == rxq)
-				return 1;
+			for (n = 0; n < flow->queues_n; ++n)
+				(*priv->rxqs)[flow->queues[n]]->mark = 1;
 		}
 	}
 	return 0;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 80cfd96..1663734 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -378,7 +378,7 @@ priv_create_hash_rxqs(struct priv *priv)
 
 		rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
 					struct mlx5_rxq_ctrl, rxq);
-		wqs[i] = rxq_ctrl->wq;
+		wqs[i] = rxq_ctrl->ibv->wq;
 	}
 	/* Get number of hash RX queues to configure. */
 	for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
@@ -647,8 +647,6 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 	/* Iterate on segments. */
 	for (i = 0; (i != elts_n); ++i) {
 		struct rte_mbuf *buf;
-		volatile struct mlx5_wqe_data_seg *scat =
-			&(*rxq_ctrl->rxq.wqes)[i];
 
 		buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
 		if (buf == NULL) {
@@ -669,13 +667,6 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 		DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
 		PKT_LEN(buf) = DATA_LEN(buf);
 		NB_SEGS(buf) = 1;
-		/* scat->addr must be able to store a pointer. */
-		assert(sizeof(scat->addr) >= sizeof(uintptr_t));
-		*scat = (struct mlx5_wqe_data_seg){
-			.addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
-			.byte_count = htonl(DATA_LEN(buf)),
-			.lkey = rxq_ctrl->mr->lkey,
-		};
 		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
 	if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
@@ -761,65 +752,12 @@ mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	DEBUG("cleaning up %p", (void *)rxq_ctrl);
 	rxq_free_elts(rxq_ctrl);
-	if (rxq_ctrl->wq != NULL)
-		claim_zero(ibv_exp_destroy_wq(rxq_ctrl->wq));
-	if (rxq_ctrl->cq != NULL)
-		claim_zero(ibv_destroy_cq(rxq_ctrl->cq));
-	if (rxq_ctrl->channel != NULL)
-		claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel));
-	if (rxq_ctrl->mr != NULL)
-		priv_mr_release(rxq_ctrl->priv, rxq_ctrl->mr);
+	if (rxq_ctrl->ibv)
+		mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
 	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
 }
 
 /**
- * Initialize RX queue.
- *
- * @param tmpl
- *   Pointer to RX queue control template.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static inline int
-rxq_setup(struct mlx5_rxq_ctrl *tmpl)
-{
-	struct ibv_cq *ibcq = tmpl->cq;
-	struct ibv_mlx5_cq_info cq_info;
-	struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq);
-	const uint16_t desc_n =
-		(1 << tmpl->rxq.elts_n) + tmpl->priv->rx_vec_en *
-		MLX5_VPMD_DESCS_PER_LOOP;
-	struct rte_mbuf *(*elts)[desc_n] =
-		rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket);
-	if (ibv_mlx5_exp_get_cq_info(ibcq, &cq_info)) {
-		ERROR("Unable to query CQ info. check your OFED.");
-		return ENOTSUP;
-	}
-	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
-		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
-		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
-		return EINVAL;
-	}
-	if (elts == NULL)
-		return ENOMEM;
-	tmpl->rxq.rq_db = rwq->rq.db;
-	tmpl->rxq.cqe_n = log2above(cq_info.cqe_cnt);
-	tmpl->rxq.cq_ci = 0;
-	tmpl->rxq.rq_ci = 0;
-	tmpl->rxq.rq_pi = 0;
-	tmpl->rxq.cq_db = cq_info.dbrec;
-	tmpl->rxq.wqes =
-		(volatile struct mlx5_wqe_data_seg (*)[])
-		(uintptr_t)rwq->rq.buff;
-	tmpl->rxq.cqes =
-		(volatile struct mlx5_cqe (*)[])
-		(uintptr_t)cq_info.buf;
-	tmpl->rxq.elts = elts;
-	return 0;
-}
-
-/**
  * Configure a RX queue.
  *
  * @param dev
@@ -848,25 +786,24 @@ mlx5_rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 		.priv = priv,
 		.socket = socket,
 		.rxq = {
+			.elts = rte_calloc_socket("RXQ", 1,
+						  desc *
+						  sizeof(struct rte_mbuf *), 0,
+						  socket),
 			.elts_n = log2above(desc),
 			.mp = mp,
 			.rss_hash = priv->rxqs_n > 1,
 		},
 	};
-	struct ibv_exp_wq_attr mod;
-	union {
-		struct ibv_exp_cq_init_attr cq;
-		struct ibv_exp_wq_init_attr wq;
-		struct ibv_exp_cq_attr cq_attr;
-	} attr;
 	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
-	unsigned int cqe_n = desc - 1;
 	const uint16_t desc_n =
 		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
 	struct rte_mbuf *(*elts)[desc_n] = NULL;
 	int ret = 0;
 
 	(void)conf; /* Thresholds configuration (ignored). */
+	if (dev->data->dev_conf.intr_conf.rxq)
+		tmpl.memory_channel = 1;
 	/* Enable scattered packets support for this queue if necessary. */
 	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
 	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
@@ -919,78 +856,13 @@ mlx5_rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 	if (priv->hw_csum_l2tun)
 		tmpl.rxq.csum_l2tun =
 			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
-	/* Use the entire RX mempool as the memory region. */
-	tmpl.mr = priv_mr_get(priv, mp);
-	if (tmpl.mr == NULL) {
-		tmpl.mr = priv_mr_new(priv, mp);
-		if (tmpl.mr == NULL) {
-			ret = EINVAL;
-			ERROR("%p: MR creation failure: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
-	if (dev->data->dev_conf.intr_conf.rxq) {
-		tmpl.channel = ibv_create_comp_channel(priv->ctx);
-		if (tmpl.channel == NULL) {
-			ret = ENOMEM;
-			ERROR("%p: Rx interrupt completion channel creation"
-			      " failure: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
-	attr.cq = (struct ibv_exp_cq_init_attr){
-		.comp_mask = 0,
-	};
-	if (priv->cqe_comp) {
-		attr.cq.comp_mask |= IBV_EXP_CQ_INIT_ATTR_FLAGS;
-		attr.cq.flags |= IBV_EXP_CQ_COMPRESSED_CQE;
-		/*
-		 * For vectorized Rx, it must not be doubled in order to
-		 * make cq_ci and rq_ci aligned.
-		 */
-		if (rxq_check_vec_support(&tmpl.rxq) < 0)
-			cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */
-	}
-	tmpl.cq = ibv_exp_create_cq(priv->ctx, cqe_n, NULL, tmpl.channel, 0,
-				    &attr.cq);
-	if (tmpl.cq == NULL) {
-		ret = ENOMEM;
-		ERROR("%p: CQ creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	DEBUG("priv->device_attr.max_qp_wr is %d",
-	      priv->device_attr.max_qp_wr);
-	DEBUG("priv->device_attr.max_sge is %d",
-	      priv->device_attr.max_sge);
 	/* Configure VLAN stripping. */
 	tmpl.rxq.vlan_strip = (priv->hw_vlan_strip &&
 			       !!dev->data->dev_conf.rxmode.hw_vlan_strip);
-	attr.wq = (struct ibv_exp_wq_init_attr){
-		.wq_context = NULL, /* Could be useful in the future. */
-		.wq_type = IBV_EXP_WQT_RQ,
-		/* Max number of outstanding WRs. */
-		.max_recv_wr = desc >> tmpl.rxq.sges_n,
-		/* Max number of scatter/gather elements in a WR. */
-		.max_recv_sge = 1 << tmpl.rxq.sges_n,
-		.pd = priv->pd,
-		.cq = tmpl.cq,
-		.comp_mask =
-			IBV_EXP_CREATE_WQ_VLAN_OFFLOADS |
-			0,
-		.vlan_offloads = (tmpl.rxq.vlan_strip ?
-				  IBV_EXP_RECEIVE_WQ_CVLAN_STRIP :
-				  0),
-	};
 	/* By default, FCS (CRC) is stripped by hardware. */
 	if (dev->data->dev_conf.rxmode.hw_strip_crc) {
 		tmpl.rxq.crc_present = 0;
 	} else if (priv->hw_fcs_strip) {
-		/* Ask HW/Verbs to leave CRC in place when supported. */
-		attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_SCATTER_FCS;
-		attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS;
 		tmpl.rxq.crc_present = 1;
 	} else {
 		WARN("%p: CRC stripping has been disabled but will still"
@@ -1004,59 +876,9 @@ mlx5_rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 	      (void *)dev,
 	      tmpl.rxq.crc_present ? "disabled" : "enabled",
 	      tmpl.rxq.crc_present << 2);
-	if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING"))
-		; /* Nothing else to do. */
-	else if (priv->hw_padding) {
-		INFO("%p: enabling packet padding on queue %p",
-		     (void *)dev, (void *)rxq_ctrl);
-		attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_RX_END_PADDING;
-		attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS;
-	} else
-		WARN("%p: packet padding has been requested but is not"
-		     " supported, make sure MLNX_OFED and firmware are"
-		     " up to date",
-		     (void *)dev);
-
-	tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
-	if (tmpl.wq == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: WQ creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	/*
-	 * Make sure number of WRs*SGEs match expectations since a queue
-	 * cannot allocate more than "desc" buffers.
-	 */
-	if (((int)attr.wq.max_recv_wr != (desc >> tmpl.rxq.sges_n)) ||
-	    ((int)attr.wq.max_recv_sge != (1 << tmpl.rxq.sges_n))) {
-		ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
-		      (void *)dev,
-		      (desc >> tmpl.rxq.sges_n), (1 << tmpl.rxq.sges_n),
-		      attr.wq.max_recv_wr, attr.wq.max_recv_sge);
-		ret = EINVAL;
-		goto error;
-	}
 	/* Save port ID. */
 	tmpl.rxq.port_id = dev->data->port_id;
 	DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
-	/* Change queue state to ready. */
-	mod = (struct ibv_exp_wq_attr){
-		.attr_mask = IBV_EXP_WQ_ATTR_STATE,
-		.wq_state = IBV_EXP_WQS_RDY,
-	};
-	ret = ibv_exp_modify_wq(tmpl.wq, &mod);
-	if (ret) {
-		ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = rxq_setup(&tmpl);
-	if (ret) {
-		ERROR("%p: cannot initialize RX queue structure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
 	ret = rxq_alloc_elts(&tmpl, desc);
 	if (ret) {
 		ERROR("%p: RXQ allocation failed: %s",
@@ -1075,17 +897,12 @@ mlx5_rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 	rte_free(tmpl.rxq.elts);
 	tmpl.rxq.elts = elts;
 	*rxq_ctrl = tmpl;
-	/* Update doorbell counter. */
-	rxq_ctrl->rxq.rq_ci = desc >> rxq_ctrl->rxq.sges_n;
-	rte_wmb();
-	*rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
 	DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
 	assert(ret == 0);
 	return 0;
 error:
-	elts = tmpl.rxq.elts;
+	rte_free(tmpl.rxq.elts);
 	mlx5_rxq_cleanup(&tmpl);
-	rte_free(elts);
 	assert(ret > 0);
 	return ret;
 }
@@ -1175,14 +992,20 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		}
 	}
 	ret = mlx5_rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
-	if (ret)
+	if (ret) {
 		rte_free(rxq_ctrl);
-	else {
-		rxq_ctrl->rxq.stats.idx = idx;
-		DEBUG("%p: adding RX queue %p to list",
-		      (void *)dev, (void *)rxq_ctrl);
-		(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
+		goto out;
 	}
+	rxq_ctrl->rxq.stats.idx = idx;
+	DEBUG("%p: adding RX queue %p to list",
+	      (void *)dev, (void *)rxq_ctrl);
+	(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
+	rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, idx);
+	if (!rxq_ctrl->ibv) {
+		ret = EAGAIN;
+		goto out;
+	}
+out:
 	priv_unlock(priv);
 	return -ret;
 }
@@ -1209,7 +1032,7 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
 	priv_lock(priv);
-	if (priv_flow_rxq_in_use(priv, rxq))
+	if (!mlx5_priv_rxq_ibv_releasable(priv, rxq_ctrl->ibv))
 		rte_panic("Rx queue %p is still used by a flow and cannot be"
 			  " removed\n", (void *)rxq_ctrl);
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -1253,15 +1076,14 @@ priv_rx_intr_vec_enable(struct priv *priv)
 	}
 	intr_handle->type = RTE_INTR_HANDLE_EXT;
 	for (i = 0; i != n; ++i) {
-		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
-		struct mlx5_rxq_ctrl *rxq_ctrl =
-			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+		/* This rxq ibv must not be released in this function. */
+		struct mlx5_rxq_ibv *rxq = mlx5_priv_rxq_ibv_get(priv, i);
 		int fd;
 		int flags;
 		int rc;
 
 		/* Skip queues that cannot request interrupts. */
-		if (!rxq || !rxq_ctrl->channel) {
+		if (!rxq || !rxq->channel) {
 			/* Use invalid intr_vec[] index to disable entry. */
 			intr_handle->intr_vec[i] =
 				RTE_INTR_VEC_RXTX_OFFSET +
@@ -1275,7 +1097,7 @@ priv_rx_intr_vec_enable(struct priv *priv)
 			priv_rx_intr_vec_disable(priv);
 			return -1;
 		}
-		fd = rxq_ctrl->channel->fd;
+		fd = rxq->channel->fd;
 		flags = fcntl(fd, F_GETFL);
 		rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
 		if (rc < 0) {
@@ -1305,7 +1127,27 @@ void
 priv_rx_intr_vec_disable(struct priv *priv)
 {
 	struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+	unsigned int i;
+	unsigned int rxqs_n = priv->rxqs_n;
+	unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
 
+	if (!priv->dev->data->dev_conf.intr_conf.rxq)
+		return;
+	for (i = 0; i != n; ++i) {
+		struct mlx5_rxq_ctrl *ctrl;
+		struct mlx5_rxq_data *rxq;
+
+		if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
+		    RTE_MAX_RXTX_INTR_VEC_ID)
+			continue;
+		/**
+		 * Need to access directly the queue to release the reference
+		 * kept in priv_rx_intr_vec_enable().
+		 */
+		rxq = (*priv->rxqs)[i];
+		ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+		mlx5_priv_rxq_ibv_release(priv, ctrl->ibv);
+	}
 	rte_intr_free_epoll_fd(intr_handle);
 	free(intr_handle->intr_vec);
 	intr_handle->nb_efd = 0;
@@ -1329,19 +1171,19 @@ int
 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
-	struct mlx5_rxq_ctrl *rxq_ctrl =
-		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+	struct mlx5_rxq_ibv *rxq = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
 	int ret;
 
-	if (!rxq || !rxq_ctrl->channel) {
+	if (!rxq || !rxq->channel) {
 		ret = EINVAL;
 	} else {
-		ibv_mlx5_exp_update_cq_ci(rxq_ctrl->cq, rxq->cq_ci);
-		ret = ibv_req_notify_cq(rxq_ctrl->cq, 0);
+		ibv_mlx5_exp_update_cq_ci(rxq->cq,
+					  (*priv->rxqs)[rx_queue_id]->cq_ci);
+		ret = ibv_req_notify_cq(rxq->cq, 0);
 	}
 	if (ret)
 		WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
+	mlx5_priv_rxq_ibv_release(priv, rxq);
 	return -ret;
 }
 
@@ -1360,26 +1202,312 @@ int
 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
-	struct mlx5_rxq_ctrl *rxq_ctrl =
-		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+	struct mlx5_rxq_ibv *rxq = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
 	struct ibv_cq *ev_cq;
 	void *ev_ctx;
 	int ret;
 
-	if (!rxq || !rxq_ctrl->channel) {
+	if (!rxq || !rxq->channel) {
 		ret = EINVAL;
 	} else {
-		ret = ibv_get_cq_event(rxq_ctrl->cq->channel, &ev_cq, &ev_ctx);
-		if (ret || ev_cq != rxq_ctrl->cq)
+		ret = ibv_get_cq_event(rxq->cq->channel, &ev_cq, &ev_ctx);
+		if (ret || ev_cq != rxq->cq)
 			ret = EINVAL;
 	}
 	if (ret)
 		WARN("unable to disable interrupt on rx queue %d",
 		     rx_queue_id);
 	else
-		ibv_ack_cq_events(rxq_ctrl->cq, 1);
+		ibv_ack_cq_events(rxq->cq, 1);
+	mlx5_priv_rxq_ibv_release(priv, rxq);
 	return -ret;
 }
 
 #endif /* HAVE_UPDATE_CQ_CI */
+
+/**
+ * Create the Rx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object initialised if it can be created.
+ */
+struct mlx5_rxq_ibv*
+mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+	struct ibv_exp_wq_attr mod;
+	union {
+		struct ibv_exp_cq_init_attr cq;
+		struct ibv_exp_wq_init_attr wq;
+		struct ibv_exp_cq_attr cq_attr;
+	} attr;
+	unsigned int cqe_n = (1 << rxq->elts_n) - 1;
+	struct mlx5_rxq_ibv *tmpl;
+	struct ibv_mlx5_cq_info cq_info;
+	struct mlx5_rwq *rwq;
+	unsigned int i;
+	int ret = 0;
+
+	assert(!rxq_ctrl->ibv);
+	tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
+				 rxq_ctrl->socket);
+	if (!tmpl) {
+		ERROR("%p: cannot allocate verbs ressources",
+		       (void*)rxq_ctrl);
+		goto error;
+	}
+	/* Use the entire RX mempool as the memory region. */
+	tmpl->mr = priv_mr_get(priv, rxq->mp);
+	if (!tmpl->mr) {
+		tmpl->mr = priv_mr_new(priv, rxq->mp);
+		if (!tmpl->mr) {
+			ERROR("%p: MR creation failure", (void *)rxq_ctrl);
+			goto error;
+		}
+	}
+	if (rxq_ctrl->memory_channel) {
+		tmpl->channel = ibv_create_comp_channel(priv->ctx);
+		if (!tmpl->channel) {
+			ERROR("%p: Comp Channel creation failure",
+			      (void *)rxq_ctrl);
+			goto error;
+		}
+	}
+	attr.cq = (struct ibv_exp_cq_init_attr){
+		.comp_mask = 0,
+	};
+	if (priv->cqe_comp) {
+		attr.cq.comp_mask |= IBV_EXP_CQ_INIT_ATTR_FLAGS;
+		attr.cq.flags |= IBV_EXP_CQ_COMPRESSED_CQE;
+		/*
+		 * For vectorized Rx, it must not be doubled in order to
+		 * make cq_ci and rq_ci aligned.
+		 */
+		if (rxq_check_vec_support(rxq) < 0)
+			cqe_n *= 2;
+	}
+	tmpl->cq = ibv_exp_create_cq(priv->ctx, cqe_n, NULL, tmpl->channel, 0,
+				     &attr.cq);
+	if (tmpl->cq == NULL) {
+		ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
+		goto error;
+	}
+	if (ibv_mlx5_exp_get_cq_info(tmpl->cq, &cq_info)) {
+		ERROR("Unable to query CQ info. check your OFED.");
+		goto error;
+	}
+	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
+		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
+		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
+		goto error;
+	}
+	DEBUG("priv->device_attr.max_qp_wr is %d",
+	      priv->device_attr.max_qp_wr);
+	DEBUG("priv->device_attr.max_sge is %d",
+	      priv->device_attr.max_sge);
+	attr.wq = (struct ibv_exp_wq_init_attr){
+		.wq_context = NULL, /* Could be useful in the future. */
+		.wq_type = IBV_EXP_WQT_RQ,
+		/* Max number of outstanding WRs. */
+		.max_recv_wr = (1 << rxq->elts_n) >> rxq->sges_n,
+		/* Max number of scatter/gather elements in a WR. */
+		.max_recv_sge = 1 << rxq->sges_n,
+		.pd = priv->pd,
+		.cq = tmpl->cq,
+		.comp_mask =
+			IBV_EXP_CREATE_WQ_VLAN_OFFLOADS |
+			0,
+		.vlan_offloads = (rxq->vlan_strip ?
+				  IBV_EXP_RECEIVE_WQ_CVLAN_STRIP :
+				  0),
+	};
+	/* By default, FCS (CRC) is stripped by hardware. */
+	if (rxq->crc_present) {
+		attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_SCATTER_FCS;
+		attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS;
+	}
+	if (priv->hw_padding) {
+		attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_RX_END_PADDING;
+		attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS;
+	}
+	tmpl->wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
+	if (tmpl->wq == NULL) {
+		ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
+		goto error;
+	}
+	/*
+	 * Make sure number of WRs*SGEs match expectations since a queue
+	 * cannot allocate more than "desc" buffers.
+	 */
+	if (((int)attr.wq.max_recv_wr != ((1 << rxq->elts_n) >> rxq->sges_n)) ||
+	    ((int)attr.wq.max_recv_sge != (1 << rxq->sges_n))) {
+		ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
+		      (void *)rxq_ctrl,
+		      ((1 << rxq->elts_n) >> rxq->sges_n),
+		      (1 << rxq->sges_n),
+		      attr.wq.max_recv_wr, attr.wq.max_recv_sge);
+		goto error;
+	}
+	/* Change queue state to ready. */
+	mod = (struct ibv_exp_wq_attr){
+		.attr_mask = IBV_EXP_WQ_ATTR_STATE,
+		.wq_state = IBV_EXP_WQS_RDY,
+	};
+	ret = ibv_exp_modify_wq(tmpl->wq, &mod);
+	if (ret) {
+		ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed",
+		      (void *)rxq_ctrl);
+		goto error;
+	}
+	/* Fill the rings. */
+	rwq = container_of(tmpl->wq, struct mlx5_rwq, wq);
+	rxq->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
+		(uintptr_t)rwq->rq.buff;
+	for (i = 0; (i != (unsigned int)(1 << rxq->elts_n)); ++i) {
+		struct rte_mbuf *buf = (*rxq->elts)[i];
+		volatile struct mlx5_wqe_data_seg *scat = &(*rxq->wqes)[i];
+
+		/* scat->addr must be able to store a pointer. */
+		assert(sizeof(scat->addr) >= sizeof(uintptr_t));
+		*scat = (struct mlx5_wqe_data_seg){
+			.addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
+			.byte_count = htonl(DATA_LEN(buf)),
+			.lkey = tmpl->mr->lkey,
+		};
+	}
+	rxq->rq_db = rwq->rq.db;
+	rxq->cqe_n = log2above(cq_info.cqe_cnt);
+	rxq->cq_ci = 0;
+	rxq->rq_ci = 0;
+	rxq->cq_db = cq_info.dbrec;
+	rxq->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
+	/* Update doorbell counter. */
+	rxq->rq_ci = (1 << rxq->elts_n) >> rxq->sges_n;
+	rte_wmb();
+	*rxq->rq_db = htonl(rxq->rq_ci);
+	DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
+	rte_atomic32_inc(&tmpl->refcnt);
+	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void*)priv,
+	      (void*)tmpl, rte_atomic32_read(&tmpl->refcnt));
+	LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
+	return tmpl;
+error:
+	if (tmpl->wq)
+		claim_zero(ibv_exp_destroy_wq(tmpl->wq));
+	if (tmpl->cq)
+		claim_zero(ibv_destroy_cq(tmpl->cq));
+	if (tmpl->channel)
+		claim_zero(ibv_destroy_comp_channel(tmpl->channel));
+	if (tmpl->mr)
+		priv_mr_release(priv, tmpl->mr);
+	return NULL;
+
+}
+
+/**
+ * Get an Rx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object if it exists.
+ */
+struct mlx5_rxq_ibv*
+mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+	struct mlx5_mr *mr __rte_unused;
+
+	if (ctrl->ibv) {
+		mr = priv_mr_get(priv, rxq->mp);
+		rte_atomic32_inc(&ctrl->ibv->refcnt);
+		DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void*)priv,
+		      (void*)ctrl->ibv, rte_atomic32_read(&ctrl->ibv->refcnt));
+	}
+	return ctrl->ibv;
+}
+
+/**
+ * Release an Rx verbs queue object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rxq
+ *   Verbs Rx queue object.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq)
+{
+	int ret;
+
+	assert(rxq->wq);
+	assert(rxq->cq);
+	assert(rxq->mr);
+	ret = priv_mr_release(priv, rxq->mr);
+	if (!ret)
+		rxq->mr = NULL;
+	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void*)priv,
+	      (void*)rxq, rte_atomic32_read(&rxq->refcnt));
+	if (rte_atomic32_dec_and_test(&rxq->refcnt)) {
+		claim_zero(ibv_exp_destroy_wq(rxq->wq));
+		claim_zero(ibv_destroy_cq(rxq->cq));
+		if (rxq->channel)
+			claim_zero(ibv_destroy_comp_channel(rxq->channel));
+		LIST_REMOVE(rxq, next);
+		rte_free(rxq);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the Verbs Rx queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_rxq_ibv_verify(struct priv *priv)
+{
+	int ret = 0;
+	struct mlx5_rxq_ibv *rxq;
+
+	LIST_FOREACH(rxq, &priv->rxqsibv, next) {
+		DEBUG("%p: Verbs Rx queue %p still referenced", (void*)priv,
+		      (void*)rxq);
+		++ret;
+	}
+	return ret;
+}
+
+/**
+ * Return true if a single reference exists on the object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rxq
+ *   Verbs Rx queue object.
+ */
+int
+mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq)
+{
+	(void)priv;
+	return (rte_atomic32_read(&rxq->refcnt) == 1);
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index c7c7518..abdbf6a 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -130,15 +130,24 @@ struct mlx5_rxq_data {
 	struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
 } __rte_cache_aligned;
 
-/* RX queue control descriptor. */
-struct mlx5_rxq_ctrl {
-	struct priv *priv; /* Back pointer to private data. */
+/* Verbs Rx queue elements. */
+struct mlx5_rxq_ibv {
+	LIST_ENTRY(mlx5_rxq_ibv) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct mlx5_rxq_ctrl *rxq_ctrl; /* Back pointer to parent. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_exp_wq *wq; /* Work Queue. */
-	struct mlx5_mr *mr; /* Memory Region (for mp). */
 	struct ibv_comp_channel *channel;
-	unsigned int socket; /* CPU socket ID for allocations. */
+	struct mlx5_mr *mr; /* Memory Region (for mp). */
+};
+
+/* RX queue control descriptor. */
+struct mlx5_rxq_ctrl {
+	struct priv *priv; /* Back pointer to private data. */
+	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
 	struct mlx5_rxq_data rxq; /* Data path structure. */
+	unsigned int socket; /* CPU socket ID for allocations. */
+	unsigned int memory_channel:1; /* Need memory channel. */
 };
 
 /* Hash RX queue types. */
@@ -298,7 +307,6 @@ void priv_destroy_hash_rxqs(struct priv *);
 int priv_allow_flow_type(struct priv *, enum hash_rxq_flow_type);
 int priv_rehash_flows(struct priv *);
 void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
-int mlx5_rxq_rehash(struct rte_eth_dev *, struct mlx5_rxq_ctrl *);
 int mlx5_rxq_ctrl_setup(struct rte_eth_dev *, struct mlx5_rxq_ctrl *,
 			uint16_t, unsigned int, const struct rte_eth_rxconf *,
 			struct rte_mempool *);
@@ -311,6 +319,11 @@ void priv_rx_intr_vec_disable(struct priv *priv);
 int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 #endif /* HAVE_UPDATE_CQ_CI */
+struct mlx5_rxq_ibv* mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx);
+struct mlx5_rxq_ibv* mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx);
+int mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq);
+int mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq);
+int mlx5_priv_rxq_ibv_verify(struct priv *priv);
 
 /* mlx5_txq.c */
 
@@ -347,7 +360,6 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
 
 /* mlx5_mr.c */
 
-struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *);
 void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *);
 uint32_t mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
 			    unsigned int);
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 512052a..dffa1cd 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -153,7 +153,7 @@ priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
 		.vlan_offloads = vlan_offloads,
 	};
 
-	err = ibv_exp_modify_wq(rxq_ctrl->wq, &mod);
+	err = ibv_exp_modify_wq(rxq_ctrl->ibv->wq, &mod);
 	if (err) {
 		ERROR("%p: failed to modified stripping mode: %s",
 		      (void *)priv, strerror(err));
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 09/21] net/mlx5: separate DPDK from Verbs Tx queue objects
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (8 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 08/21] net/mlx5: separate DPDK from Verbs Rx queue objects Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 10/21] net/mlx5: add reference counter on DPDK Tx queues Nelio Laranjeiro
                   ` (45 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c      |   3 +
 drivers/net/mlx5/mlx5.h      |   1 +
 drivers/net/mlx5/mlx5_rxtx.h |  18 +-
 drivers/net/mlx5/mlx5_txq.c  | 437 ++++++++++++++++++++++++++-----------------
 4 files changed, 284 insertions(+), 175 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index c158d8e..23068ae 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -203,6 +203,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	i = mlx5_priv_rxq_ibv_verify(priv);
 	if (i)
 		WARN("%p: some Verbs Rx queue still remain", (void*)priv);
+	i = mlx5_priv_txq_ibv_verify(priv);
+	if (i)
+		WARN("%p: some Verbs Tx queue still remain", (void*)priv);
 	i = priv_flow_verify(priv);
 	if (i)
 		WARN("%p: some flows still remain", (void*)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 228fd34..25711ef 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -147,6 +147,7 @@ struct priv {
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
+	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index abdbf6a..f78aa38 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -283,12 +283,21 @@ struct mlx5_txq_data {
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 } __rte_cache_aligned;
 
+/* Verbs Rx queue elements. */
+struct mlx5_txq_ibv {
+	LIST_ENTRY(mlx5_txq_ibv) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct ibv_cq *cq; /* Completion Queue. */
+	struct ibv_qp *qp; /* Queue Pair. */
+};
+
 /* TX queue control descriptor. */
 struct mlx5_txq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
-	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_qp *qp; /* Queue Pair. */
 	unsigned int socket; /* CPU socket ID for allocations. */
+	unsigned int max_inline_data; /* Max inline data. */
+	unsigned int max_tso_header; /* Max TSO header size. */
+	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_txq_data txq; /* Data path structure. */
 };
 
@@ -333,6 +342,11 @@ int mlx5_txq_ctrl_setup(struct rte_eth_dev *, struct mlx5_txq_ctrl *, uint16_t,
 int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_txconf *);
 void mlx5_tx_queue_release(void *);
+struct mlx5_txq_ibv* mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx);
+struct mlx5_txq_ibv* mlx5_priv_txq_ibv_get(struct priv *priv, uint16_t idx);
+int mlx5_priv_txq_ibv_release(struct priv *priv, struct mlx5_txq_ibv *txq);
+int mlx5_priv_txq_ibv_releasable(struct priv *priv, struct mlx5_txq_ibv *txq);
+int mlx5_priv_txq_ibv_verify(struct priv *priv);
 
 /* mlx5_rxtx.c */
 
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index ce826dd..570eb67 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -73,13 +73,6 @@ txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl, unsigned int elts_n)
 
 	for (i = 0; (i != elts_n); ++i)
 		(*txq_ctrl->txq.elts)[i] = NULL;
-	for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) {
-		volatile struct mlx5_wqe64 *wqe =
-			(volatile struct mlx5_wqe64 *)
-			txq_ctrl->txq.wqes + i;
-
-		memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe));
-	}
 	DEBUG("%p: allocated and configured %u WRs", (void *)txq_ctrl, elts_n);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
@@ -136,60 +129,15 @@ mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl)
 
 	DEBUG("cleaning up %p", (void *)txq_ctrl);
 	txq_free_elts(txq_ctrl);
-	if (txq_ctrl->qp != NULL)
-		claim_zero(ibv_destroy_qp(txq_ctrl->qp));
-	if (txq_ctrl->cq != NULL)
-		claim_zero(ibv_destroy_cq(txq_ctrl->cq));
 	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i)
 		if (txq_ctrl->txq.mp2mr[i])
 			priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[i]);
+	if (txq_ctrl->ibv)
+		mlx5_priv_txq_ibv_release(txq_ctrl->priv, txq_ctrl->ibv);
 	memset(txq_ctrl, 0, sizeof(*txq_ctrl));
 }
 
 /**
- * Initialize TX queue.
- *
- * @param tmpl
- *   Pointer to TX queue control template.
- * @param txq_ctrl
- *   Pointer to TX queue control.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static inline int
-txq_setup(struct mlx5_txq_ctrl *tmpl, struct mlx5_txq_ctrl *txq_ctrl)
-{
-	struct mlx5_qp *qp = to_mqp(tmpl->qp);
-	struct ibv_cq *ibcq = tmpl->cq;
-	struct ibv_mlx5_cq_info cq_info;
-
-	if (ibv_mlx5_exp_get_cq_info(ibcq, &cq_info)) {
-		ERROR("Unable to query CQ info. check your OFED.");
-		return ENOTSUP;
-	}
-	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
-		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
-		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
-		return EINVAL;
-	}
-	tmpl->txq.cqe_n = log2above(cq_info.cqe_cnt);
-	tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8;
-	tmpl->txq.wqes = qp->gen_data.sqstart;
-	tmpl->txq.wqe_n = log2above(qp->sq.wqe_cnt);
-	tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR];
-	tmpl->txq.bf_reg = qp->gen_data.bf->reg;
-	tmpl->txq.cq_db = cq_info.dbrec;
-	tmpl->txq.cqes =
-		(volatile struct mlx5_cqe (*)[])
-		(uintptr_t)cq_info.buf;
-	tmpl->txq.elts =
-		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])
-		((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
-	return 0;
-}
-
-/**
  * Configure a TX queue.
  *
  * @param dev
@@ -216,22 +164,13 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 		.priv = priv,
 		.socket = socket,
 	};
-	union {
-		struct ibv_exp_qp_init_attr init;
-		struct ibv_exp_cq_init_attr cq;
-		struct ibv_exp_qp_attr mod;
-		struct ibv_exp_cq_attr cq_attr;
-	} attr;
-	unsigned int cqe_n;
 	const unsigned int max_tso_inline = ((MLX5_MAX_TSO_HEADER +
 					     (RTE_CACHE_LINE_SIZE - 1)) /
 					      RTE_CACHE_LINE_SIZE);
-	int ret = 0;
 
 	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
-		ret = ENOTSUP;
 		ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
-		goto error;
+		return ENOTSUP;
 	}
 	tmpl.txq.flags = conf->txq_flags;
 	assert(desc > MLX5_TX_COMP_THRESH);
@@ -239,52 +178,10 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 	if (priv->mps == MLX5_MPW_ENHANCED)
 		tmpl.txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
 	/* MRs will be registered in mp2mr[] later. */
-	attr.cq = (struct ibv_exp_cq_init_attr){
-		.comp_mask = 0,
-	};
-	cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ?
-		((desc / MLX5_TX_COMP_THRESH) - 1) : 1;
-	if (priv->mps == MLX5_MPW_ENHANCED)
-		cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
-	tmpl.cq = ibv_exp_create_cq(priv->ctx,
-				    cqe_n,
-				    NULL, NULL, 0, &attr.cq);
-	if (tmpl.cq == NULL) {
-		ret = ENOMEM;
-		ERROR("%p: CQ creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
 	DEBUG("priv->device_attr.max_qp_wr is %d",
 	      priv->device_attr.max_qp_wr);
 	DEBUG("priv->device_attr.max_sge is %d",
 	      priv->device_attr.max_sge);
-	attr.init = (struct ibv_exp_qp_init_attr){
-		/* CQ to be associated with the send queue. */
-		.send_cq = tmpl.cq,
-		/* CQ to be associated with the receive queue. */
-		.recv_cq = tmpl.cq,
-		.cap = {
-			/* Max number of outstanding WRs. */
-			.max_send_wr = ((priv->device_attr.max_qp_wr < desc) ?
-					priv->device_attr.max_qp_wr :
-					desc),
-			/*
-			 * Max number of scatter/gather elements in a WR,
-			 * must be 1 to prevent libmlx5 from trying to affect
-			 * too much memory. TX gather is not impacted by the
-			 * priv->device_attr.max_sge limit and will still work
-			 * properly.
-			 */
-			.max_send_sge = 1,
-		},
-		.qp_type = IBV_QPT_RAW_PACKET,
-		/* Do *NOT* enable this, completions events are managed per
-		 * TX burst. */
-		.sq_sig_all = 0,
-		.pd = priv->pd,
-		.comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
-	};
 	if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
 		tmpl.txq.max_inline =
 			((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
@@ -298,7 +195,7 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 			/* To minimize the size of data set, avoid requesting
 			 * too large WQ.
 			 */
-			attr.init.cap.max_inline_data =
+			tmpl.max_inline_data =
 				((RTE_MIN(priv->txq_inline,
 					  priv->inline_max_packet_sz) +
 				  (RTE_CACHE_LINE_SIZE - 1)) /
@@ -310,73 +207,27 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 			 * Adjust inline value as Verbs aggregates
 			 * tso_inline and txq_inline fields.
 			 */
-			attr.init.cap.max_inline_data = inline_diff > 0 ?
-							inline_diff *
-							RTE_CACHE_LINE_SIZE :
-							0;
+			tmpl.max_inline_data = inline_diff > 0 ?
+					       inline_diff *
+					       RTE_CACHE_LINE_SIZE :
+					       0;
 		} else {
-			attr.init.cap.max_inline_data =
+			tmpl.max_inline_data =
 				tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
 		}
 	}
 	if (priv->tso) {
-		attr.init.max_tso_header =
-			max_tso_inline * RTE_CACHE_LINE_SIZE;
-		attr.init.comp_mask |= IBV_EXP_QP_INIT_ATTR_MAX_TSO_HEADER;
+		tmpl.max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE;
 		tmpl.txq.max_inline = RTE_MAX(tmpl.txq.max_inline,
 					      max_tso_inline);
 		tmpl.txq.tso_en = 1;
 	}
 	if (priv->tunnel_en)
 		tmpl.txq.tunnel_en = 1;
-	tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init);
-	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	DEBUG("TX queue capabilities: max_send_wr=%u, max_send_sge=%u,"
-	      " max_inline_data=%u",
-	      attr.init.cap.max_send_wr,
-	      attr.init.cap.max_send_sge,
-	      attr.init.cap.max_inline_data);
-	attr.mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod,
-				(IBV_EXP_QP_STATE | IBV_EXP_QP_PORT));
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = txq_setup(&tmpl, txq_ctrl);
-	if (ret) {
-		ERROR("%p: cannot initialize TX queue structure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
+	tmpl.txq.elts =
+		(struct rte_mbuf *(*)[1 << tmpl.txq.elts_n])
+		((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
 	txq_alloc_elts(&tmpl, desc);
-	attr.mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	attr.mod.qp_state = IBV_QPS_RTS;
-	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTS failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
 	/* Clean up txq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old txq just in case", (void *)txq_ctrl);
 	mlx5_txq_cleanup(txq_ctrl);
@@ -384,12 +235,7 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 	DEBUG("%p: txq updated with %p", (void *)txq_ctrl, (void *)&tmpl);
 	/* Pre-register known mempools. */
 	rte_mempool_walk(mlx5_txq_mp2mr_iter, txq_ctrl);
-	assert(ret == 0);
 	return 0;
-error:
-	mlx5_txq_cleanup(&tmpl);
-	assert(ret > 0);
-	return ret;
 }
 
 /**
@@ -480,14 +326,22 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		}
 	}
 	ret = mlx5_txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
-	if (ret)
+	if (ret) {
 		rte_free(txq_ctrl);
-	else {
-		txq_ctrl->txq.stats.idx = idx;
-		DEBUG("%p: adding TX queue %p to list",
-		      (void *)dev, (void *)txq_ctrl);
-		(*priv->txqs)[idx] = &txq_ctrl->txq;
+		goto out;
+	}
+	txq_ctrl->txq.stats.idx = idx;
+	DEBUG("%p: adding TX queue %p to list",
+	      (void *)dev, (void *)txq_ctrl);
+	(*priv->txqs)[idx] = &txq_ctrl->txq;
+	txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, idx);
+	if (!txq_ctrl->ibv) {
+		ret = EAGAIN;
+		goto out;
 	}
+	/* Update send callback. */
+	priv_select_tx_function(priv);
+out:
 	priv_unlock(priv);
 	return -ret;
 }
@@ -525,3 +379,240 @@ mlx5_tx_queue_release(void *dpdk_txq)
 	rte_free(txq_ctrl);
 	priv_unlock(priv);
 }
+
+/**
+ * Create the Tx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object initialised if it can be created.
+ */
+struct mlx5_txq_ibv*
+mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_data *txq = (*priv->txqs)[idx];
+	struct mlx5_txq_ctrl *ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
+	struct mlx5_txq_ibv tmpl;
+	struct mlx5_txq_ibv *ibv;
+	union {
+		struct ibv_exp_qp_init_attr init;
+		struct ibv_exp_cq_init_attr cq;
+		struct ibv_exp_qp_attr mod;
+		struct ibv_exp_cq_attr cq_attr;
+	} attr;
+	unsigned int cqe_n;
+	struct mlx5_qp *qp;
+	struct ibv_mlx5_cq_info cq_info;
+	const int desc = 1 << txq->elts_n;
+	int ret = 0;
+
+	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
+		ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
+		goto error;
+	}
+	memset(&tmpl, 0, sizeof(struct mlx5_txq_ibv));
+	/* MRs will be registered in mp2mr[] later. */
+	attr.cq = (struct ibv_exp_cq_init_attr){
+		.comp_mask = 0,
+	};
+	cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ?
+		((desc / MLX5_TX_COMP_THRESH) - 1) : 1;
+	if (priv->mps == MLX5_MPW_ENHANCED)
+		cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
+	tmpl.cq = ibv_exp_create_cq(priv->ctx,
+				    cqe_n,
+				    NULL, NULL, 0, &attr.cq);
+	if (tmpl.cq == NULL) {
+		ERROR("%p: CQ creation failure", (void *)ctrl);
+		goto error;
+	}
+	attr.init = (struct ibv_exp_qp_init_attr){
+		/* CQ to be associated with the send queue. */
+		.send_cq = tmpl.cq,
+		/* CQ to be associated with the receive queue. */
+		.recv_cq = tmpl.cq,
+		.cap = {
+			/* Max number of outstanding WRs. */
+			.max_send_wr = ((priv->device_attr.max_qp_wr < desc) ?
+					priv->device_attr.max_qp_wr : desc),
+			/*
+			 * Max number of scatter/gather elements in a WR,
+			 * must be 1 to prevent libmlx5 from trying to affect
+			 * too much memory. TX gather is not impacted by the
+			 * priv->device_attr.max_sge limit and will still work
+			 * properly.
+			 */
+			.max_send_sge = 1,
+		},
+		.qp_type = IBV_QPT_RAW_PACKET,
+		/* Do *NOT* enable this, completions events are managed per
+		 * TX burst. */
+		.sq_sig_all = 0,
+		.pd = priv->pd,
+		.comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
+	};
+	if (txq->inline_en)
+		attr.init.cap.max_inline_data = ctrl->max_inline_data;
+	if (txq->tso_en) {
+		attr.init.max_tso_header = ctrl->max_tso_header;
+		attr.init.comp_mask |= IBV_EXP_QP_INIT_ATTR_MAX_TSO_HEADER;
+	}
+	tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init);
+	if (tmpl.qp == NULL) {
+		ERROR("%p: QP creation failure", (void *)ctrl);
+		goto error;
+	}
+	attr.mod = (struct ibv_exp_qp_attr){
+		/* Move the QP to this state. */
+		.qp_state = IBV_QPS_INIT,
+		/* Primary port number. */
+		.port_num = priv->port
+	};
+	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod,
+				(IBV_EXP_QP_STATE | IBV_EXP_QP_PORT));
+	if (ret) {
+		ERROR("%p: QP state to IBV_QPS_INIT failed", (void *)ctrl);
+		goto error;
+	}
+	attr.mod = (struct ibv_exp_qp_attr){
+		.qp_state = IBV_QPS_RTR
+	};
+	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);
+	if (ret) {
+		ERROR("%p: QP state to IBV_QPS_RTR failed", (void *)ctrl);
+		goto error;
+	}
+	attr.mod.qp_state = IBV_QPS_RTS;
+	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);
+	if (ret) {
+		ERROR("%p: QP state to IBV_QPS_RTS failed", (void *)ctrl);
+		goto error;
+	}
+	ibv = rte_calloc_socket(__func__, 1, sizeof(struct mlx5_txq_ibv), 0,
+				ctrl->socket);
+	if (!ibv) {
+		ERROR("%p: cannot allocate memory", (void *)ctrl);
+		goto error;
+	}
+	*ibv = tmpl;
+	qp = to_mqp(ibv->qp);
+	if (ibv_mlx5_exp_get_cq_info(ibv->cq, &cq_info)) {
+		ERROR("Unable to query CQ info. check your OFED.");
+		goto error;
+	}
+	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
+		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
+		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
+		goto error;
+	}
+	txq->cqe_n = log2above(cq_info.cqe_cnt);
+	txq->qp_num_8s = qp->ctrl_seg.qp_num << 8;
+	txq->wqes = qp->gen_data.sqstart;
+	txq->wqe_n = log2above(qp->sq.wqe_cnt);
+	txq->qp_db = &qp->gen_data.db[MLX5_SND_DBR];
+	txq->bf_reg = qp->gen_data.bf->reg;
+	txq->cq_db = cq_info.dbrec;
+	txq->cqes =
+		(volatile struct mlx5_cqe (*)[])
+		(uintptr_t)cq_info.buf;
+	rte_atomic32_inc(&ibv->refcnt);
+	LIST_INSERT_HEAD(&priv->txqsibv, ibv, next);
+	return ibv;
+error:
+	if (tmpl.cq)
+		claim_zero(ibv_destroy_cq(tmpl.cq));
+	if (tmpl.qp)
+		claim_zero(ibv_destroy_qp(tmpl.qp));
+	return NULL;
+
+}
+
+/**
+ * Get an Tx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object if it exists.
+ */
+struct mlx5_txq_ibv*
+mlx5_priv_txq_ibv_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_data *txq = (*priv->txqs)[idx];
+	struct mlx5_txq_ctrl *ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
+
+	if (ctrl->ibv)
+		rte_atomic32_inc(&ctrl->ibv->refcnt);
+	return ctrl->ibv;
+}
+
+/**
+ * Release an Tx verbs queue object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rxq
+ *   Verbs Rx queue object.
+ *
+ * @return
+ *   0 on success, errno on failure.
+ */
+int
+mlx5_priv_txq_ibv_release(struct priv *priv, struct mlx5_txq_ibv *txq)
+{
+	(void)priv;
+	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
+		claim_zero(ibv_destroy_qp(txq->qp));
+		claim_zero(ibv_destroy_cq(txq->cq));
+		LIST_REMOVE(txq, next);
+		rte_free(txq);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Return true if a single reference exists on the object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param txq
+ *   Verbs Tx queue object.
+ */
+int
+mlx5_priv_txq_ibv_releasable(struct priv *priv, struct mlx5_txq_ibv *txq)
+{
+	(void)priv;
+	return (rte_atomic32_read(&txq->refcnt) == 1);
+}
+
+/**
+ * Verify the Verbs Tx queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_txq_ibv_verify(struct priv *priv)
+{
+	int ret = 0;
+	struct mlx5_txq_ibv *txq;
+
+	LIST_FOREACH(txq, &priv->txqsibv, next) {
+		DEBUG("%p: Verbs Tx queue %p still referenced", (void*)priv,
+		      (void*)txq);
+		++ret;
+	}
+	return ret;
+}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 10/21] net/mlx5: add reference counter on DPDK Tx queues
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (9 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 09/21] net/mlx5: separate DPDK from Verbs Tx " Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 11/21] net/mlx5: add reference counter on DPDK Rx queues Nelio Laranjeiro
                   ` (44 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c         |  16 +-
 drivers/net/mlx5/mlx5.h         |   1 +
 drivers/net/mlx5/mlx5_mr.c      |  37 ++--
 drivers/net/mlx5/mlx5_rxtx.h    |  13 +-
 drivers/net/mlx5/mlx5_trigger.c |  56 ++++++
 drivers/net/mlx5/mlx5_txq.c     | 383 ++++++++++++++++++++++++----------------
 6 files changed, 317 insertions(+), 189 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 23068ae..c8be196 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -173,17 +173,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->txqs != NULL) {
 		/* XXX race condition if mlx5_tx_burst() is still running. */
 		usleep(1000);
-		for (i = 0; (i != priv->txqs_n); ++i) {
-			struct mlx5_txq_data *txq = (*priv->txqs)[i];
-			struct mlx5_txq_ctrl *txq_ctrl;
-
-			if (txq == NULL)
-				continue;
-			txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
-			(*priv->txqs)[i] = NULL;
-			mlx5_txq_cleanup(txq_ctrl);
-			rte_free(txq_ctrl);
-		}
+		for (i = 0; (i != priv->txqs_n); ++i)
+			mlx5_priv_txq_release(priv, i);
 		priv->txqs_n = 0;
 		priv->txqs = NULL;
 	}
@@ -206,6 +197,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	i = mlx5_priv_txq_ibv_verify(priv);
 	if (i)
 		WARN("%p: some Verbs Tx queue still remain", (void*)priv);
+	i = mlx5_priv_txq_verify(priv);
+	if (i)
+		WARN("%p: some Tx Queues still remain", (void*)priv);
 	i = priv_flow_verify(priv);
 	if (i)
 		WARN("%p: some flows still remain", (void*)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 25711ef..4dd432b 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -147,6 +147,7 @@ struct priv {
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
+	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index b5e9500..58e3b3f 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -164,7 +164,7 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
 	return txq_ctrl->txq.mp2mr[idx]->lkey;
 }
 
-struct txq_mp2mr_mbuf_check_data {
+struct mlx5_mp2mr_mbuf_check_data {
 	int ret;
 };
 
@@ -183,10 +183,10 @@ struct txq_mp2mr_mbuf_check_data {
  *   Object index, unused.
  */
 static void
-mlx5_txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
-			  uint32_t index __rte_unused)
+mlx5_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
+		      uint32_t index __rte_unused)
 {
-	struct txq_mp2mr_mbuf_check_data *data = arg;
+	struct mlx5_mp2mr_mbuf_check_data *data = arg;
 	struct rte_mbuf *buf = obj;
 
 	/*
@@ -207,35 +207,24 @@ mlx5_txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
  *   Pointer to TX queue structure.
  */
 void
-mlx5_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
+mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg)
 {
-	struct mlx5_txq_ctrl *txq_ctrl = arg;
-	struct txq_mp2mr_mbuf_check_data data = {
+	struct priv *priv = (struct priv *)arg;
+	struct mlx5_mp2mr_mbuf_check_data data = {
 		.ret = 0,
 	};
-	uintptr_t start;
-	uintptr_t end;
-	unsigned int i;
+	struct mlx5_mr *mr;
 
 	/* Register mempool only if the first element looks like a mbuf. */
-	if (rte_mempool_obj_iter(mp, mlx5_txq_mp2mr_mbuf_check, &data) == 0 ||
+	if (rte_mempool_obj_iter(mp, mlx5_mp2mr_mbuf_check, &data) == 0 ||
 	    data.ret == -1)
 		return;
-	if (mlx5_check_mempool(mp, &start, &end) != 0) {
-		ERROR("mempool %p: not virtually contiguous",
-		      (void *)mp);
+	mr = priv_mr_get(priv, mp);
+	if (mr) {
+		priv_mr_release(priv, mr);
 		return;
 	}
-	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-		if (unlikely(txq_ctrl->txq.mp2mr[i] == NULL)) {
-			/* Unknown MP, add a new MR for it. */
-			break;
-		}
-		if (start >= (uintptr_t)txq_ctrl->txq.mp2mr[i]->start &&
-		    end <= (uintptr_t)txq_ctrl->txq.mp2mr[i]->end)
-			return;
-	}
-	mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
+	priv_mr_new(priv, mp);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index f78aa38..13b50a1 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -293,6 +293,8 @@ struct mlx5_txq_ibv {
 
 /* TX queue control descriptor. */
 struct mlx5_txq_ctrl {
+	LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
 	struct priv *priv; /* Back pointer to private data. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	unsigned int max_inline_data; /* Max inline data. */
@@ -337,8 +339,6 @@ int mlx5_priv_rxq_ibv_verify(struct priv *priv);
 /* mlx5_txq.c */
 
 void mlx5_txq_cleanup(struct mlx5_txq_ctrl *);
-int mlx5_txq_ctrl_setup(struct rte_eth_dev *, struct mlx5_txq_ctrl *, uint16_t,
-			unsigned int, const struct rte_eth_txconf *);
 int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_txconf *);
 void mlx5_tx_queue_release(void *);
@@ -347,6 +347,13 @@ struct mlx5_txq_ibv* mlx5_priv_txq_ibv_get(struct priv *priv, uint16_t idx);
 int mlx5_priv_txq_ibv_release(struct priv *priv, struct mlx5_txq_ibv *txq);
 int mlx5_priv_txq_ibv_releasable(struct priv *priv, struct mlx5_txq_ibv *txq);
 int mlx5_priv_txq_ibv_verify(struct priv *priv);
+struct mlx5_txq_ctrl* mlx5_priv_txq_new(struct priv *priv, uint16_t idx,
+					uint16_t desc, unsigned int socket);
+struct mlx5_txq_ctrl* mlx5_priv_txq_get(struct priv *priv, uint16_t idx);
+int mlx5_priv_txq_release(struct priv *priv, uint16_t idx);
+int mlx5_priv_txq_releasable(struct priv *priv, uint16_t idx);
+int mlx5_priv_txq_verify(struct priv *);
+void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl);
 
 /* mlx5_rxtx.c */
 
@@ -374,7 +381,7 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
 
 /* mlx5_mr.c */
 
-void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *);
+void mlx5_mp2mr_iter(struct rte_mempool *, void *);
 uint32_t mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
 			    unsigned int);
 
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index a4a0acd..7df85aa 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -40,6 +40,44 @@
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 
+static void
+priv_txq_stop(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; i != priv->rxqs_n; ++i)
+		mlx5_priv_txq_release(priv, i);
+}
+
+static int
+priv_txq_start(struct priv *priv)
+{
+	unsigned int i;
+	int ret = 0;
+
+	/* Add memory regions to Tx queues. */
+	for (i = 0; i != priv->txqs_n; ++i) {
+		unsigned int idx = 0;
+		struct mlx5_mr *mr;
+		struct mlx5_txq_ctrl *txq_ctrl = mlx5_priv_txq_get(priv, i);
+
+		if (!txq_ctrl)
+			continue;
+		LIST_FOREACH(mr, &priv->mr, next)
+			mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mr->mp, idx++);
+		txq_alloc_elts(txq_ctrl);
+		txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, i);
+		if (!txq_ctrl->ibv) {
+			ret = ENOMEM;
+			goto error;
+		}
+	}
+	return -ret;
+error:
+	priv_txq_stop(priv);
+	return -ret;
+}
+
 /**
  * DPDK callback to start the device.
  *
@@ -55,6 +93,7 @@ int
 mlx5_dev_start(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
+	struct mlx5_mr *mr = NULL;
 	int err;
 
 	if (mlx5_is_secondary())
@@ -65,6 +104,15 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	priv_select_tx_function(priv);
 	priv_select_rx_function(priv);
 	DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
+	rte_mempool_walk(mlx5_mp2mr_iter, priv);
+	err = priv_txq_start(priv);
+	if (err) {
+		ERROR("%p: TXQ allocation failed: %s",
+		      (void *)dev, strerror(err));
+		goto error;
+	}
+	/* Update send callback. */
+	priv_select_tx_function(priv);
 	err = priv_create_hash_rxqs(priv);
 	if (!err)
 		err = priv_rehash_flows(priv);
@@ -93,10 +141,13 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	return 0;
 error:
 	/* Rollback. */
+	LIST_FOREACH(mr, &priv->mr, next)
+		priv_mr_release(priv, mr);
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv);
+	priv_txq_stop(priv);
 	priv_unlock(priv);
 	return -err;
 }
@@ -113,6 +164,7 @@ void
 mlx5_dev_stop(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
+	struct mlx5_mr *mr;
 
 	if (mlx5_is_secondary())
 		return;
@@ -124,6 +176,10 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv);
 	priv_rx_intr_vec_disable(priv);
+	LIST_FOREACH(mr, &priv->mr, next) {
+		priv_mr_release(priv, mr);
+	}
+	priv_txq_stop(priv);
 	priv_dev_interrupt_handler_uninstall(priv, dev);
 	priv_unlock(priv);
 }
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 570eb67..0530668 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -63,12 +63,11 @@
  *
  * @param txq_ctrl
  *   Pointer to TX queue structure.
- * @param elts_n
- *   Number of elements to allocate.
  */
-static void
-txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl, unsigned int elts_n)
+void
+txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl)
 {
+	const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n;
 	unsigned int i;
 
 	for (i = 0; (i != elts_n); ++i)
@@ -138,107 +137,6 @@ mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl)
 }
 
 /**
- * Configure a TX queue.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param txq_ctrl
- *   Pointer to TX queue structure.
- * @param desc
- *   Number of descriptors to configure in queue.
- * @param socket
- *   NUMA socket on which memory must be allocated.
- * @param[in] conf
- *   Thresholds parameters.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
-		    uint16_t desc, unsigned int socket,
-		    const struct rte_eth_txconf *conf)
-{
-	struct priv *priv = mlx5_get_priv(dev);
-	struct mlx5_txq_ctrl tmpl = {
-		.priv = priv,
-		.socket = socket,
-	};
-	const unsigned int max_tso_inline = ((MLX5_MAX_TSO_HEADER +
-					     (RTE_CACHE_LINE_SIZE - 1)) /
-					      RTE_CACHE_LINE_SIZE);
-
-	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
-		ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
-		return ENOTSUP;
-	}
-	tmpl.txq.flags = conf->txq_flags;
-	assert(desc > MLX5_TX_COMP_THRESH);
-	tmpl.txq.elts_n = log2above(desc);
-	if (priv->mps == MLX5_MPW_ENHANCED)
-		tmpl.txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
-	/* MRs will be registered in mp2mr[] later. */
-	DEBUG("priv->device_attr.max_qp_wr is %d",
-	      priv->device_attr.max_qp_wr);
-	DEBUG("priv->device_attr.max_sge is %d",
-	      priv->device_attr.max_sge);
-	if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
-		tmpl.txq.max_inline =
-			((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
-			 RTE_CACHE_LINE_SIZE);
-		tmpl.txq.inline_en = 1;
-		/* TSO and MPS can't be enabled concurrently. */
-		assert(!priv->tso || !priv->mps);
-		if (priv->mps == MLX5_MPW_ENHANCED) {
-			tmpl.txq.inline_max_packet_sz =
-				priv->inline_max_packet_sz;
-			/* To minimize the size of data set, avoid requesting
-			 * too large WQ.
-			 */
-			tmpl.max_inline_data =
-				((RTE_MIN(priv->txq_inline,
-					  priv->inline_max_packet_sz) +
-				  (RTE_CACHE_LINE_SIZE - 1)) /
-				 RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
-		} else if (priv->tso) {
-			int inline_diff = tmpl.txq.max_inline - max_tso_inline;
-
-			/*
-			 * Adjust inline value as Verbs aggregates
-			 * tso_inline and txq_inline fields.
-			 */
-			tmpl.max_inline_data = inline_diff > 0 ?
-					       inline_diff *
-					       RTE_CACHE_LINE_SIZE :
-					       0;
-		} else {
-			tmpl.max_inline_data =
-				tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
-		}
-	}
-	if (priv->tso) {
-		tmpl.max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE;
-		tmpl.txq.max_inline = RTE_MAX(tmpl.txq.max_inline,
-					      max_tso_inline);
-		tmpl.txq.tso_en = 1;
-	}
-	if (priv->tunnel_en)
-		tmpl.txq.tunnel_en = 1;
-	tmpl.txq.elts =
-		(struct rte_mbuf *(*)[1 << tmpl.txq.elts_n])
-		((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
-	txq_alloc_elts(&tmpl, desc);
-	/* Clean up txq in case we're reinitializing it. */
-	DEBUG("%p: cleaning-up old txq just in case", (void *)txq_ctrl);
-	mlx5_txq_cleanup(txq_ctrl);
-	*txq_ctrl = tmpl;
-	DEBUG("%p: txq updated with %p", (void *)txq_ctrl, (void *)&tmpl);
-	/* Pre-register known mempools. */
-	rte_mempool_walk(mlx5_txq_mp2mr_iter, txq_ctrl);
-	return 0;
-}
-
-/**
  * DPDK callback to configure a TX queue.
  *
  * @param dev
@@ -263,8 +161,9 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	struct mlx5_txq_data *txq = (*priv->txqs)[idx];
 	struct mlx5_txq_ctrl *txq_ctrl =
 		container_of(txq, struct mlx5_txq_ctrl, txq);
-	int ret;
+	int ret = 0;
 
+	(void)conf;
 	if (mlx5_is_secondary())
 		return -E_RTE_SECONDARY;
 
@@ -290,57 +189,23 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		priv_unlock(priv);
 		return -EOVERFLOW;
 	}
-	if (txq != NULL) {
-		DEBUG("%p: reusing already allocated queue index %u (%p)",
-		      (void *)dev, idx, (void *)txq);
-		if (dev->data->dev_started) {
-			priv_unlock(priv);
-			return -EEXIST;
-		}
-		(*priv->txqs)[idx] = NULL;
-		mlx5_txq_cleanup(txq_ctrl);
-		/* Resize if txq size is changed. */
-		if (txq_ctrl->txq.elts_n != log2above(desc)) {
-			txq_ctrl = rte_realloc(txq_ctrl,
-					       sizeof(*txq_ctrl) +
-					       desc * sizeof(struct rte_mbuf *),
-					       RTE_CACHE_LINE_SIZE);
-			if (!txq_ctrl) {
-				ERROR("%p: unable to reallocate queue index %u",
-					(void *)dev, idx);
-				priv_unlock(priv);
-				return -ENOMEM;
-			}
-		}
-	} else {
-		txq_ctrl =
-			rte_calloc_socket("TXQ", 1,
-					  sizeof(*txq_ctrl) +
-					  desc * sizeof(struct rte_mbuf *),
-					  0, socket);
-		if (txq_ctrl == NULL) {
-			ERROR("%p: unable to allocate queue index %u",
-			      (void *)dev, idx);
-			priv_unlock(priv);
-			return -ENOMEM;
-		}
+	if (!mlx5_priv_txq_releasable(priv, idx)) {
+		ret = EBUSY;
+		ERROR("%p: unable to release queue index %u",
+		      (void *)dev, idx);
+		goto out;
 	}
-	ret = mlx5_txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
-	if (ret) {
-		rte_free(txq_ctrl);
+	mlx5_priv_txq_release(priv, idx);
+	txq_ctrl = mlx5_priv_txq_new(priv, idx, desc, socket);
+	if (!txq_ctrl) {
+		ERROR("%p: unable to allocate queue index %u",
+		      (void *)dev, idx);
+		ret = ENOMEM;
 		goto out;
 	}
-	txq_ctrl->txq.stats.idx = idx;
 	DEBUG("%p: adding TX queue %p to list",
 	      (void *)dev, (void *)txq_ctrl);
 	(*priv->txqs)[idx] = &txq_ctrl->txq;
-	txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, idx);
-	if (!txq_ctrl->ibv) {
-		ret = EAGAIN;
-		goto out;
-	}
-	/* Update send callback. */
-	priv_select_tx_function(priv);
 out:
 	priv_unlock(priv);
 	return -ret;
@@ -616,3 +481,219 @@ mlx5_priv_txq_ibv_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Create a DPDK Tx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ *
+ * @return
+ *   A DPDK queue object on success.
+ */
+struct mlx5_txq_ctrl*
+mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc,
+		  unsigned int socket)
+{
+	const unsigned int max_tso_inline =
+		((MLX5_MAX_TSO_HEADER + (RTE_CACHE_LINE_SIZE - 1)) /
+		 RTE_CACHE_LINE_SIZE);
+	struct mlx5_txq_ctrl *tmpl;
+
+	tmpl = rte_calloc_socket("TXQ", 1,
+				 sizeof(*tmpl) +
+				 desc * sizeof(struct rte_mbuf *),
+				 0, socket);
+	if (!tmpl)
+		return NULL;
+	assert(desc > MLX5_TX_COMP_THRESH);
+	tmpl->priv = priv;
+	tmpl->txq.elts_n = log2above(desc);
+	if (priv->mps == MLX5_MPW_ENHANCED)
+		tmpl->txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
+	/* MRs will be registered in mp2mr[] later. */
+	DEBUG("priv->device_attr.max_qp_wr is %d",
+	      priv->device_attr.max_qp_wr);
+	DEBUG("priv->device_attr.max_sge is %d",
+	      priv->device_attr.max_sge);
+	if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+		tmpl->txq.max_inline =
+			((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
+			 RTE_CACHE_LINE_SIZE);
+		tmpl->txq.inline_en = 1;
+		/* TSO and MPS can't be enabled concurrently. */
+		assert(!priv->tso || !priv->mps);
+		if (priv->mps == MLX5_MPW_ENHANCED) {
+			tmpl->txq.inline_max_packet_sz =
+				priv->inline_max_packet_sz;
+			/* To minimize the size of data set, avoid requesting
+			 * too large WQ.
+			 */
+			tmpl->max_inline_data =
+				((RTE_MIN(priv->txq_inline,
+					  priv->inline_max_packet_sz) +
+				  (RTE_CACHE_LINE_SIZE - 1)) /
+				 RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
+		} else if (priv->tso) {
+			int inline_diff = tmpl->txq.max_inline - max_tso_inline;
+
+			/*
+			 * Adjust inline value as Verbs aggregates
+			 * tso_inline and txq_inline fields.
+			 */
+			tmpl->max_inline_data = inline_diff > 0 ?
+					       inline_diff *
+					       RTE_CACHE_LINE_SIZE :
+					       0;
+		} else {
+			tmpl->max_inline_data =
+				tmpl->txq.max_inline * RTE_CACHE_LINE_SIZE;
+		}
+	}
+	if (priv->tso) {
+		tmpl->max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE;
+		tmpl->txq.max_inline = RTE_MAX(tmpl->txq.max_inline,
+					       max_tso_inline);
+		tmpl->txq.tso_en = 1;
+	}
+	if (priv->tunnel_en)
+		tmpl->txq.tunnel_en = 1;
+	tmpl->txq.elts =
+		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1);
+	tmpl->txq.stats.idx = idx;
+	rte_atomic32_inc(&tmpl->refcnt);
+	LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
+	return tmpl;
+}
+
+/**
+ * Get a Tx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   A pointer to the queue if it exists.
+ */
+struct mlx5_txq_ctrl*
+mlx5_priv_txq_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_ctrl *ctrl = NULL;
+
+	if ((*priv->txqs)[idx]) {
+		ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl,
+				    txq);
+		struct mlx5_txq_ibv *ibv;
+		unsigned int i;
+
+		(void)ibv;
+		ibv = mlx5_priv_txq_ibv_get(priv, idx);
+		for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
+			struct mlx5_mr *mr;
+
+			(void)mr;
+			if (ctrl->txq.mp2mr[i])
+				mr = priv_mr_get(priv, ctrl->txq.mp2mr[i]->mp);
+		}
+		rte_atomic32_inc(&ctrl->refcnt);
+	}
+	return ctrl;
+}
+
+/**
+ * Release a Tx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   0 on success, errno on failure.
+ */
+int
+mlx5_priv_txq_release(struct priv *priv, uint16_t idx)
+{
+	unsigned int i;
+	struct mlx5_txq_ctrl *txq;
+
+	if (!(*priv->txqs)[idx])
+		return 0;
+	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+	if (txq->ibv) {
+		int ret;
+
+		ret = mlx5_priv_txq_ibv_release(priv, txq->ibv);
+		if (ret)
+			txq->ibv = NULL;
+	}
+	for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
+		if (txq->txq.mp2mr[i]) {
+			int ret;
+
+			ret = priv_mr_release(priv, txq->txq.mp2mr[i]);
+			if (ret)
+				txq->txq.mp2mr[i] = NULL;
+		}
+	}
+	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
+		txq_free_elts(txq);
+		LIST_REMOVE(txq, next);
+		rte_free(txq);
+		(*priv->txqs)[idx] = NULL;
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify if the queue can be released.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   1 if the queue can be released.
+ */
+int
+mlx5_priv_txq_releasable(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_ctrl *txq;
+
+	if (!(*priv->txqs)[idx])
+		return -1;
+	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+	return (rte_atomic32_read(&txq->refcnt) == 1);
+}
+
+/**
+ * Verify the Tx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_txq_verify(struct priv *priv)
+{
+	struct mlx5_txq_ctrl *txq;
+	int ret = 0;
+
+	LIST_FOREACH(txq, &priv->txqsctrl, next) {
+		DEBUG("%p: Tx Queue %p still referenced", (void*)priv,
+		      (void*)txq);
+		++ret;
+	}
+	return ret;
+}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 11/21] net/mlx5: add reference counter on DPDK Rx queues
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (10 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 10/21] net/mlx5: add reference counter on DPDK Tx queues Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 12/21] net/mlx5: remove queue drop support Nelio Laranjeiro
                   ` (43 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c         |  16 +-
 drivers/net/mlx5/mlx5.h         |   1 +
 drivers/net/mlx5/mlx5_rxq.c     | 492 +++++++++++++++++++++-------------------
 drivers/net/mlx5/mlx5_rxtx.h    |  10 +
 drivers/net/mlx5/mlx5_trigger.c |  45 ++++
 5 files changed, 321 insertions(+), 243 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index c8be196..b37292c 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -156,17 +156,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->rxqs != NULL) {
 		/* XXX race condition if mlx5_rx_burst() is still running. */
 		usleep(1000);
-		for (i = 0; (i != priv->rxqs_n); ++i) {
-			struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
-			struct mlx5_rxq_ctrl *rxq_ctrl;
-
-			if (rxq == NULL)
-				continue;
-			rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
-			(*priv->rxqs)[i] = NULL;
-			mlx5_rxq_cleanup(rxq_ctrl);
-			rte_free(rxq_ctrl);
-		}
+		for (i = 0; (i != priv->rxqs_n); ++i)
+			mlx5_priv_rxq_release(priv, i);
 		priv->rxqs_n = 0;
 		priv->rxqs = NULL;
 	}
@@ -194,6 +185,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	i = mlx5_priv_rxq_ibv_verify(priv);
 	if (i)
 		WARN("%p: some Verbs Rx queue still remain", (void*)priv);
+	i = mlx5_priv_rxq_verify(priv);
+	if (i)
+		WARN("%p: some Rx Queues still remain", (void*)priv);
 	i = mlx5_priv_txq_ibv_verify(priv);
 	if (i)
 		WARN("%p: some Verbs Tx queue still remain", (void*)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 4dd432b..448995e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -146,6 +146,7 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
+	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
 	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 1663734..3b75a7e 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -37,6 +37,7 @@
 #include <string.h>
 #include <stdint.h>
 #include <fcntl.h>
+#include <sys/queue.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -631,16 +632,15 @@ priv_rehash_flows(struct priv *priv)
  *
  * @param rxq_ctrl
  *   Pointer to RX queue structure.
- * @param elts_n
- *   Number of elements to allocate.
  *
  * @return
  *   0 on success, errno value on failure.
  */
-static int
-rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
+int
+rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
+	unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
 	unsigned int i;
 	int ret = 0;
 
@@ -669,9 +669,11 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 		NB_SEGS(buf) = 1;
 		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
+	/* If Rx vector is activated. */
 	if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
 		struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
 		struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
+		int j;
 
 		assert(rxq->elts_n == rxq->cqe_n);
 		/* Initialize default rearm_data for vPMD. */
@@ -684,10 +686,12 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 		 * rearm_data covers previous fields.
 		 */
 		rte_compiler_barrier();
-		rxq->mbuf_initializer = *(uint64_t *)&mbuf_init->rearm_data;
+		rxq->mbuf_initializer =
+			*(uint64_t *)&mbuf_init->rearm_data;
 		/* Padding with a fake mbuf for vectorized Rx. */
-		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
-			(*rxq->elts)[elts_n + i] = &rxq->fake_mbuf;
+		for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
+			(*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
+		/* Mark that it need to be cleaned up for rxq_alloc_elts(). */
 	}
 	DEBUG("%p: allocated and configured %u segments (max %u packets)",
 	      (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
@@ -740,174 +744,6 @@ rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 }
 
 /**
- * Clean up a RX queue.
- *
- * Destroy objects, free allocated memory and reset the structure for reuse.
- *
- * @param rxq_ctrl
- *   Pointer to RX queue structure.
- */
-void
-mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
-{
-	DEBUG("cleaning up %p", (void *)rxq_ctrl);
-	rxq_free_elts(rxq_ctrl);
-	if (rxq_ctrl->ibv)
-		mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
-	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
-}
-
-/**
- * Configure a RX queue.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param rxq_ctrl
- *   Pointer to RX queue structure.
- * @param desc
- *   Number of descriptors to configure in queue.
- * @param socket
- *   NUMA socket on which memory must be allocated.
- * @param[in] conf
- *   Thresholds parameters.
- * @param mp
- *   Memory pool for buffer allocations.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-mlx5_rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
-		    uint16_t desc, unsigned int socket,
-		    const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
-{
-	struct priv *priv = dev->data->dev_private;
-	struct mlx5_rxq_ctrl tmpl = {
-		.priv = priv,
-		.socket = socket,
-		.rxq = {
-			.elts = rte_calloc_socket("RXQ", 1,
-						  desc *
-						  sizeof(struct rte_mbuf *), 0,
-						  socket),
-			.elts_n = log2above(desc),
-			.mp = mp,
-			.rss_hash = priv->rxqs_n > 1,
-		},
-	};
-	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
-	const uint16_t desc_n =
-		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
-	struct rte_mbuf *(*elts)[desc_n] = NULL;
-	int ret = 0;
-
-	(void)conf; /* Thresholds configuration (ignored). */
-	if (dev->data->dev_conf.intr_conf.rxq)
-		tmpl.memory_channel = 1;
-	/* Enable scattered packets support for this queue if necessary. */
-	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
-	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
-	    (mb_len - RTE_PKTMBUF_HEADROOM)) {
-		tmpl.rxq.sges_n = 0;
-	} else if (dev->data->dev_conf.rxmode.enable_scatter) {
-		unsigned int size =
-			RTE_PKTMBUF_HEADROOM +
-			dev->data->dev_conf.rxmode.max_rx_pkt_len;
-		unsigned int sges_n;
-
-		/*
-		 * Determine the number of SGEs needed for a full packet
-		 * and round it to the next power of two.
-		 */
-		sges_n = log2above((size / mb_len) + !!(size % mb_len));
-		tmpl.rxq.sges_n = sges_n;
-		/* Make sure rxq.sges_n did not overflow. */
-		size = mb_len * (1 << tmpl.rxq.sges_n);
-		size -= RTE_PKTMBUF_HEADROOM;
-		if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
-			ERROR("%p: too many SGEs (%u) needed to handle"
-			      " requested maximum packet size %u",
-			      (void *)dev,
-			      1 << sges_n,
-			      dev->data->dev_conf.rxmode.max_rx_pkt_len);
-			return EOVERFLOW;
-		}
-	} else {
-		WARN("%p: the requested maximum Rx packet size (%u) is"
-		     " larger than a single mbuf (%u) and scattered"
-		     " mode has not been requested",
-		     (void *)dev,
-		     dev->data->dev_conf.rxmode.max_rx_pkt_len,
-		     mb_len - RTE_PKTMBUF_HEADROOM);
-	}
-	DEBUG("%p: maximum number of segments per packet: %u",
-	      (void *)dev, 1 << tmpl.rxq.sges_n);
-	if (desc % (1 << tmpl.rxq.sges_n)) {
-		ERROR("%p: number of RX queue descriptors (%u) is not a"
-		      " multiple of SGEs per packet (%u)",
-		      (void *)dev,
-		      desc,
-		      1 << tmpl.rxq.sges_n);
-		return EINVAL;
-	}
-	/* Toggle RX checksum offload if hardware supports it. */
-	if (priv->hw_csum)
-		tmpl.rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
-	if (priv->hw_csum_l2tun)
-		tmpl.rxq.csum_l2tun =
-			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
-	/* Configure VLAN stripping. */
-	tmpl.rxq.vlan_strip = (priv->hw_vlan_strip &&
-			       !!dev->data->dev_conf.rxmode.hw_vlan_strip);
-	/* By default, FCS (CRC) is stripped by hardware. */
-	if (dev->data->dev_conf.rxmode.hw_strip_crc) {
-		tmpl.rxq.crc_present = 0;
-	} else if (priv->hw_fcs_strip) {
-		tmpl.rxq.crc_present = 1;
-	} else {
-		WARN("%p: CRC stripping has been disabled but will still"
-		     " be performed by hardware, make sure MLNX_OFED and"
-		     " firmware are up to date",
-		     (void *)dev);
-		tmpl.rxq.crc_present = 0;
-	}
-	DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
-	      " incoming frames to hide it",
-	      (void *)dev,
-	      tmpl.rxq.crc_present ? "disabled" : "enabled",
-	      tmpl.rxq.crc_present << 2);
-	/* Save port ID. */
-	tmpl.rxq.port_id = dev->data->port_id;
-	DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
-	ret = rxq_alloc_elts(&tmpl, desc);
-	if (ret) {
-		ERROR("%p: RXQ allocation failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	/* Clean up rxq in case we're reinitializing it. */
-	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl);
-	mlx5_rxq_cleanup(rxq_ctrl);
-	/* Move mbuf pointers to dedicated storage area in RX queue. */
-	elts = (void *)(rxq_ctrl + 1);
-	rte_memcpy(elts, tmpl.rxq.elts, sizeof(*elts));
-#ifndef NDEBUG
-	memset(tmpl.rxq.elts, 0x55, sizeof(*elts));
-#endif
-	rte_free(tmpl.rxq.elts);
-	tmpl.rxq.elts = elts;
-	*rxq_ctrl = tmpl;
-	DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
-	assert(ret == 0);
-	return 0;
-error:
-	rte_free(tmpl.rxq.elts);
-	mlx5_rxq_cleanup(&tmpl);
-	assert(ret > 0);
-	return ret;
-}
-
-/**
  * DPDK callback to configure a RX queue.
  *
  * @param dev
@@ -935,13 +771,11 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
 	struct mlx5_rxq_ctrl *rxq_ctrl =
 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
-	const uint16_t desc_n =
-		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
-	int ret;
+	int ret = 0;
 
+	(void)conf;
 	if (mlx5_is_secondary())
 		return -E_RTE_SECONDARY;
-
 	priv_lock(priv);
 	if (!rte_is_power_of_2(desc)) {
 		desc = 1 << log2above(desc);
@@ -957,54 +791,23 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		priv_unlock(priv);
 		return -EOVERFLOW;
 	}
-	if (rxq != NULL) {
-		DEBUG("%p: reusing already allocated queue index %u (%p)",
-		      (void *)dev, idx, (void *)rxq);
-		if (dev->data->dev_started) {
-			priv_unlock(priv);
-			return -EEXIST;
-		}
-		(*priv->rxqs)[idx] = NULL;
-		mlx5_rxq_cleanup(rxq_ctrl);
-		/* Resize if rxq size is changed. */
-		if (rxq_ctrl->rxq.elts_n != log2above(desc)) {
-			rxq_ctrl = rte_realloc(rxq_ctrl,
-					       sizeof(*rxq_ctrl) + desc_n *
-					       sizeof(struct rte_mbuf *),
-					       RTE_CACHE_LINE_SIZE);
-			if (!rxq_ctrl) {
-				ERROR("%p: unable to reallocate queue index %u",
-					(void *)dev, idx);
-				priv_unlock(priv);
-				return -ENOMEM;
-			}
-		}
-	} else {
-		rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl) +
-					     desc_n *
-					     sizeof(struct rte_mbuf *),
-					     0, socket);
-		if (rxq_ctrl == NULL) {
-			ERROR("%p: unable to allocate queue index %u",
-			      (void *)dev, idx);
-			priv_unlock(priv);
-			return -ENOMEM;
-		}
+	if (!mlx5_priv_rxq_releasable(priv, idx)) {
+		ret = EBUSY;
+		ERROR("%p: unable to release queue index %u",
+		      (void *)dev, idx);
+		goto out;
 	}
-	ret = mlx5_rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
-	if (ret) {
-		rte_free(rxq_ctrl);
+	mlx5_priv_rxq_release(priv, idx);
+	rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp);
+	if (!rxq_ctrl) {
+		ERROR("%p: unable to allocate queue index %u",
+		      (void *)dev, idx);
+		ret = ENOMEM;
 		goto out;
 	}
-	rxq_ctrl->rxq.stats.idx = idx;
 	DEBUG("%p: adding RX queue %p to list",
 	      (void *)dev, (void *)rxq_ctrl);
 	(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
-	rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, idx);
-	if (!rxq_ctrl->ibv) {
-		ret = EAGAIN;
-		goto out;
-	}
 out:
 	priv_unlock(priv);
 	return -ret;
@@ -1022,7 +825,6 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 	struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
 	struct mlx5_rxq_ctrl *rxq_ctrl;
 	struct priv *priv;
-	unsigned int i;
 
 	if (mlx5_is_secondary())
 		return;
@@ -1032,18 +834,10 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
 	priv_lock(priv);
-	if (!mlx5_priv_rxq_ibv_releasable(priv, rxq_ctrl->ibv))
+	if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx))
 		rte_panic("Rx queue %p is still used by a flow and cannot be"
 			  " removed\n", (void *)rxq_ctrl);
-	for (i = 0; (i != priv->rxqs_n); ++i)
-		if ((*priv->rxqs)[i] == rxq) {
-			DEBUG("%p: removing RX queue %p from list",
-			      (void *)priv->dev, (void *)rxq_ctrl);
-			(*priv->rxqs)[i] = NULL;
-			break;
-		}
-	mlx5_rxq_cleanup(rxq_ctrl);
-	rte_free(rxq_ctrl);
+	mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx);
 	priv_unlock(priv);
 }
 
@@ -1511,3 +1305,237 @@ mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq)
 	(void)priv;
 	return (rte_atomic32_read(&rxq->refcnt) == 1);
 }
+
+/**
+ * Create a DPDK Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ *
+ * @return
+ *   A DPDK queue object on success.
+ */
+struct mlx5_rxq_ctrl*
+mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
+		  unsigned int socket, struct rte_mempool *mp)
+{
+	struct rte_eth_dev *dev = priv->dev;
+	struct mlx5_rxq_ctrl *tmpl;
+	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
+
+	tmpl = rte_calloc_socket("TXQ", 1,
+				 sizeof(*tmpl) +
+				 desc * sizeof(struct rte_mbuf *),
+				 0, socket);
+	if (!tmpl)
+		return NULL;
+	if (priv->dev->data->dev_conf.intr_conf.rxq)
+		tmpl->memory_channel = 1;
+	/* Enable scattered packets support for this queue if necessary. */
+	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
+	    (mb_len - RTE_PKTMBUF_HEADROOM)) {
+		tmpl->rxq.sges_n = 0;
+	} else if (dev->data->dev_conf.rxmode.enable_scatter) {
+		unsigned int size =
+			RTE_PKTMBUF_HEADROOM +
+			dev->data->dev_conf.rxmode.max_rx_pkt_len;
+		unsigned int sges_n;
+
+		/*
+		 * Determine the number of SGEs needed for a full packet
+		 * and round it to the next power of two.
+		 */
+		sges_n = log2above((size / mb_len) + !!(size % mb_len));
+		tmpl->rxq.sges_n = sges_n;
+		/* Make sure rxq.sges_n did not overflow. */
+		size = mb_len * (1 << tmpl->rxq.sges_n);
+		size -= RTE_PKTMBUF_HEADROOM;
+		if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
+			ERROR("%p: too many SGEs (%u) needed to handle"
+			      " requested maximum packet size %u",
+			      (void *)dev,
+			      1 << sges_n,
+			      dev->data->dev_conf.rxmode.max_rx_pkt_len);
+			goto error;
+		}
+	} else {
+		WARN("%p: the requested maximum Rx packet size (%u) is"
+		     " larger than a single mbuf (%u) and scattered"
+		     " mode has not been requested",
+		     (void *)dev,
+		     dev->data->dev_conf.rxmode.max_rx_pkt_len,
+		     mb_len - RTE_PKTMBUF_HEADROOM);
+	}
+	DEBUG("%p: maximum number of segments per packet: %u",
+	      (void *)dev, 1 << tmpl->rxq.sges_n);
+	if (desc % (1 << tmpl->rxq.sges_n)) {
+		ERROR("%p: number of RX queue descriptors (%u) is not a"
+		      " multiple of SGEs per packet (%u)",
+		      (void *)dev,
+		      desc,
+		      1 << tmpl->rxq.sges_n);
+		goto error;
+	}
+	/* Toggle RX checksum offload if hardware supports it. */
+	if (priv->hw_csum)
+		tmpl->rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
+	if (priv->hw_csum_l2tun)
+		tmpl->rxq.csum_l2tun =
+			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
+	/* Configure VLAN stripping. */
+	tmpl->rxq.vlan_strip = (priv->hw_vlan_strip &&
+			       !!dev->data->dev_conf.rxmode.hw_vlan_strip);
+	/* By default, FCS (CRC) is stripped by hardware. */
+	if (dev->data->dev_conf.rxmode.hw_strip_crc) {
+		tmpl->rxq.crc_present = 0;
+	} else if (priv->hw_fcs_strip) {
+		tmpl->rxq.crc_present = 1;
+	} else {
+		WARN("%p: CRC stripping has been disabled but will still"
+		     " be performed by hardware, make sure MLNX_OFED and"
+		     " firmware are up to date",
+		     (void *)dev);
+		tmpl->rxq.crc_present = 0;
+	}
+	DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
+	      " incoming frames to hide it",
+	      (void *)dev,
+	      tmpl->rxq.crc_present ? "disabled" : "enabled",
+	      tmpl->rxq.crc_present << 2);
+	/* Save port ID. */
+	tmpl->rxq.port_id = dev->data->port_id;
+	tmpl->priv = priv;
+	tmpl->rxq.mp = mp;
+	tmpl->rxq.stats.idx = idx;
+	tmpl->rxq.elts_n = log2above(desc);
+	tmpl->rxq.elts =
+		(struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
+	rte_atomic32_inc(&tmpl->refcnt);
+	DEBUG("%p: Rx queue %p: refcnt %d", (void*)priv,
+	      (void*)tmpl, rte_atomic32_read(&tmpl->refcnt));
+	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
+	return tmpl;
+error:
+	rte_free(tmpl);
+	return NULL;
+}
+
+/**
+ * Get a Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   A pointer to the queue if it exists.
+ */
+struct mlx5_rxq_ctrl*
+mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_ctrl *ctrl = NULL;
+
+	if ((*priv->rxqs)[idx]) {
+		ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl,
+				    rxq);
+		struct mlx5_rxq_ibv *ibv;
+
+		(void)ibv;
+		ibv = mlx5_priv_rxq_ibv_get(priv, idx);
+		rte_atomic32_inc(&ctrl->refcnt);
+		DEBUG("%p: Rx queue %p: refcnt %d", (void*)priv,
+		      (void*)ctrl, rte_atomic32_read(&ctrl->refcnt));
+	}
+	return ctrl;
+}
+
+/**
+ * Release a Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_ctrl *rxq;
+
+	if (!(*priv->rxqs)[idx])
+		return 0;
+	rxq = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
+	assert(rxq->priv);
+	if (rxq->ibv) {
+		int ret;
+
+		ret = mlx5_priv_rxq_ibv_release(rxq->priv, rxq->ibv);
+		if (!ret)
+			rxq->ibv = NULL;
+	}
+	DEBUG("%p: Rx queue %p: refcnt %d", (void*)priv,
+	      (void*)rxq, rte_atomic32_read(&rxq->refcnt));
+	if (rte_atomic32_dec_and_test(&rxq->refcnt)) {
+		rxq_free_elts(rxq);
+		LIST_REMOVE(rxq, next);
+		rte_free(rxq);
+		(*priv->rxqs)[idx] = NULL;
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify if the queue can be released.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   1 if the queue can be released.
+ */
+int
+mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_ctrl *rxq;
+
+	if (!(*priv->rxqs)[idx])
+		return -1;
+	rxq = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
+	return (rte_atomic32_read(&rxq->refcnt) == 1);
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_rxq_verify(struct priv *priv)
+{
+	struct mlx5_rxq_ctrl *rxq;
+	int ret = 0;
+
+	LIST_FOREACH(rxq, &priv->rxqsctrl, next) {
+		DEBUG("%p: Rx Queue %p still referenced", (void*)priv,
+		      (void*)rxq);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 13b50a1..672793a 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -143,6 +143,8 @@ struct mlx5_rxq_ibv {
 
 /* RX queue control descriptor. */
 struct mlx5_rxq_ctrl {
+	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
 	struct priv *priv; /* Back pointer to private data. */
 	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
 	struct mlx5_rxq_data rxq; /* Data path structure. */
@@ -335,6 +337,14 @@ struct mlx5_rxq_ibv* mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx);
 int mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq);
 int mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq);
 int mlx5_priv_rxq_ibv_verify(struct priv *priv);
+struct mlx5_rxq_ctrl* mlx5_priv_rxq_new(struct priv *priv, uint16_t idx,
+					uint16_t desc, unsigned int socket,
+					struct rte_mempool *mp);
+struct mlx5_rxq_ctrl* mlx5_priv_rxq_get(struct priv *priv, uint16_t idx);
+int mlx5_priv_rxq_release(struct priv *priv, uint16_t idx);
+int mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx);
+int mlx5_priv_rxq_verify(struct priv *priv);
+int rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl);
 
 /* mlx5_txq.c */
 
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7df85aa..bedce0a 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -78,6 +78,41 @@ priv_txq_start(struct priv *priv)
 	return -ret;
 }
 
+static void
+priv_rxq_stop(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; i != priv->rxqs_n; ++i)
+		mlx5_priv_rxq_release(priv, i);
+}
+
+static int
+priv_rxq_start(struct priv *priv)
+{
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i != priv->rxqs_n; ++i) {
+		struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_priv_rxq_get(priv, i);
+
+		if (!rxq_ctrl)
+			continue;
+		ret = rxq_alloc_elts(rxq_ctrl);
+		if (ret)
+			goto error;
+		rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, i);
+		if (!rxq_ctrl->ibv) {
+			ret = ENOMEM;
+			goto error;
+		}
+	}
+	return -ret;
+error:
+	priv_rxq_stop(priv);
+	return -ret;
+}
+
 /**
  * DPDK callback to start the device.
  *
@@ -113,6 +148,14 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	}
 	/* Update send callback. */
 	priv_select_tx_function(priv);
+	err = priv_rxq_start(priv);
+	if (err) {
+		ERROR("%p: RXQ allocation failed: %s",
+		      (void *)dev, strerror(err));
+		goto error;
+	}
+	/* Update receive callback. */
+	priv_select_rx_function(priv);
 	err = priv_create_hash_rxqs(priv);
 	if (!err)
 		err = priv_rehash_flows(priv);
@@ -147,6 +190,7 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv);
+	priv_rxq_stop(priv);
 	priv_txq_stop(priv);
 	priv_unlock(priv);
 	return -err;
@@ -180,6 +224,7 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 		priv_mr_release(priv, mr);
 	}
 	priv_txq_stop(priv);
+	priv_rxq_stop(priv);
 	priv_dev_interrupt_handler_uninstall(priv, dev);
 	priv_unlock(priv);
 }
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 12/21] net/mlx5: remove queue drop support
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (11 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 11/21] net/mlx5: add reference counter on DPDK Rx queues Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 13/21] net/mlx5: make indirection tables sharable Nelio Laranjeiro
                   ` (42 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
In prevision of the huge rework on Rx hash queues and the fact this
feature will be totally different, the drop flow is removed from now and
will be re-introduced later.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.h      |   1 -
 drivers/net/mlx5/mlx5_flow.c | 228 +++----------------------------------------
 2 files changed, 15 insertions(+), 214 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 448995e..a0266d4 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -143,7 +143,6 @@ struct priv {
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
-	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 9ed8d05..151854a 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -96,7 +96,6 @@ struct rte_flow {
 	struct ibv_exp_wq *wq; /**< Verbs work queue. */
 	struct ibv_cq *cq; /**< Verbs completion queue. */
 	uint32_t mark:1; /**< Set if the flow is marked. */
-	uint32_t drop:1; /**< Drop queue. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< List of queues. */
 	uint16_t queues_n; /**< Number of queues in the list. */
@@ -274,7 +273,6 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 /* Structure to parse actions. */
 struct mlx5_flow_action {
 	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t mark:1; /**< Mark is present in the flow. */
 	uint32_t mark_id; /**< Mark identifier. */
 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
@@ -290,14 +288,6 @@ struct mlx5_flow_parse {
 	struct mlx5_flow_action actions; /**< Parsed action result. */
 };
 
-/** Structure for Drop queue. */
-struct rte_flow_drop {
-	struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
-	struct ibv_qp *qp; /**< Verbs queue pair. */
-	struct ibv_exp_wq *wq; /**< Verbs work queue. */
-	struct ibv_cq *cq; /**< Verbs completion queue. */
-};
-
 static const struct rte_flow_ops mlx5_flow_ops = {
 	.validate = mlx5_flow_validate,
 	.create = mlx5_flow_create,
@@ -512,7 +502,11 @@ priv_flow_validate(struct priv *priv,
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
-			flow->actions.drop = 1;
+			rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ACTION,
+				   actions,
+				   "Drop queue not supported");
+			return -rte_errno;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			const struct rte_flow_action_queue *queue =
 				(const struct rte_flow_action_queue *)
@@ -614,11 +608,9 @@ priv_flow_validate(struct priv *priv,
 			goto exit_action_not_supported;
 		}
 	}
-	if (flow->actions.mark && !flow->ibv_attr && !flow->actions.drop)
+	if (flow->actions.mark && !flow->ibv_attr)
 		flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
-	if (!flow->ibv_attr && flow->actions.drop)
-		flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
-	if (!flow->actions.queue && !flow->actions.drop) {
+	if (!flow->actions.queue) {
 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "no valid action");
 		return -rte_errno;
@@ -1015,62 +1007,6 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
 }
 
 /**
- * Complete flow rule creation with a drop queue.
- *
- * @param priv
- *   Pointer to private structure.
- * @param flow
- *   MLX5 flow attributes (filled by mlx5_flow_validate()).
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   A flow if the rule could be created.
- */
-static struct rte_flow *
-priv_flow_create_action_queue_drop(struct priv *priv,
-				   struct mlx5_flow_parse *flow,
-				   struct rte_flow_error *error)
-{
-	struct rte_flow *rte_flow;
-	struct ibv_exp_flow_spec_action_drop *drop;
-	unsigned int size = sizeof(struct ibv_exp_flow_spec_action_drop);
-
-	assert(priv->pd);
-	assert(priv->ctx);
-	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
-	if (!rte_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate flow memory");
-		return NULL;
-	}
-	rte_flow->drop = 1;
-	drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
-	*drop = (struct ibv_exp_flow_spec_action_drop){
-			.type = IBV_EXP_FLOW_SPEC_ACTION_DROP,
-			.size = size,
-	};
-	++flow->ibv_attr->num_of_specs;
-	flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
-	rte_flow->ibv_attr = flow->ibv_attr;
-	if (!priv->dev->data->dev_started)
-		return rte_flow;
-	rte_flow->qp = priv->flow_drop_queue->qp;
-	rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
-						 rte_flow->ibv_attr);
-	if (!rte_flow->ibv_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "flow rule creation failure");
-		goto error;
-	}
-	return rte_flow;
-error:
-	assert(rte_flow);
-	rte_free(rte_flow);
-	return NULL;
-}
-
-/**
  * Complete flow rule creation.
  *
  * @param priv
@@ -1237,15 +1173,11 @@ priv_flow_create(struct priv *priv,
 	flow.hash_fields = 0;
 	claim_zero(priv_flow_validate(priv, attr, items, actions,
 				      error, &flow));
-	if (flow.actions.mark && !flow.actions.drop) {
+	if (flow.actions.mark) {
 		mlx5_flow_create_flag_mark(&flow, flow.actions.mark_id);
 		flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
 	}
-	if (flow.actions.drop)
-		rte_flow =
-			priv_flow_create_action_queue_drop(priv, &flow, error);
-	else
-		rte_flow = priv_flow_create_action_queue(priv, &flow, error);
+	rte_flow = priv_flow_create_action_queue(priv, &flow, error);
 	if (!rte_flow)
 		goto exit;
 	return rte_flow;
@@ -1297,8 +1229,6 @@ priv_flow_destroy(struct priv *priv,
 	TAILQ_REMOVE(&priv->flows, flow, next);
 	if (flow->ibv_flow)
 		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
-	if (flow->drop)
-		goto free;
 	if (flow->qp)
 		claim_zero(ibv_destroy_qp(flow->qp));
 	if (flow->ind_table)
@@ -1319,8 +1249,6 @@ priv_flow_destroy(struct priv *priv,
 			TAILQ_FOREACH(tmp, &priv->flows, next) {
 				unsigned int j;
 
-				if (tmp->drop)
-					continue;
 				if (!tmp->mark)
 					continue;
 				for (j = 0; (j != tmp->queues_n) && !mark; j++)
@@ -1331,7 +1259,6 @@ priv_flow_destroy(struct priv *priv,
 		}
 		mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
 	}
-free:
 	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
 	rte_free(flow);
@@ -1394,122 +1321,6 @@ mlx5_flow_flush(struct rte_eth_dev *dev,
 }
 
 /**
- * Create drop queue.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   0 on success.
- */
-static int
-priv_flow_create_drop_queue(struct priv *priv)
-{
-	struct rte_flow_drop *fdq = NULL;
-
-	assert(priv->pd);
-	assert(priv->ctx);
-	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
-	if (!fdq) {
-		WARN("cannot allocate memory for drop queue");
-		goto error;
-	}
-	fdq->cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
-			&(struct ibv_exp_cq_init_attr){
-			.comp_mask = 0,
-			});
-	if (!fdq->cq) {
-		WARN("cannot allocate CQ for drop queue");
-		goto error;
-	}
-	fdq->wq = ibv_exp_create_wq(priv->ctx,
-			&(struct ibv_exp_wq_init_attr){
-			.wq_type = IBV_EXP_WQT_RQ,
-			.max_recv_wr = 1,
-			.max_recv_sge = 1,
-			.pd = priv->pd,
-			.cq = fdq->cq,
-			});
-	if (!fdq->wq) {
-		WARN("cannot allocate WQ for drop queue");
-		goto error;
-	}
-	fdq->ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
-			&(struct ibv_exp_rwq_ind_table_init_attr){
-			.pd = priv->pd,
-			.log_ind_tbl_size = 0,
-			.ind_tbl = &fdq->wq,
-			.comp_mask = 0,
-			});
-	if (!fdq->ind_table) {
-		WARN("cannot allocate indirection table for drop queue");
-		goto error;
-	}
-	fdq->qp = ibv_exp_create_qp(priv->ctx,
-		&(struct ibv_exp_qp_init_attr){
-			.qp_type = IBV_QPT_RAW_PACKET,
-			.comp_mask =
-				IBV_EXP_QP_INIT_ATTR_PD |
-				IBV_EXP_QP_INIT_ATTR_PORT |
-				IBV_EXP_QP_INIT_ATTR_RX_HASH,
-			.pd = priv->pd,
-			.rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
-				.rx_hash_function =
-					IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
-				.rx_hash_key_len = rss_hash_default_key_len,
-				.rx_hash_key = rss_hash_default_key,
-				.rx_hash_fields_mask = 0,
-				.rwq_ind_tbl = fdq->ind_table,
-				},
-			.port_num = priv->port,
-			});
-	if (!fdq->qp) {
-		WARN("cannot allocate QP for drop queue");
-		goto error;
-	}
-	priv->flow_drop_queue = fdq;
-	return 0;
-error:
-	if (fdq->qp)
-		claim_zero(ibv_destroy_qp(fdq->qp));
-	if (fdq->ind_table)
-		claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
-	if (fdq->wq)
-		claim_zero(ibv_exp_destroy_wq(fdq->wq));
-	if (fdq->cq)
-		claim_zero(ibv_destroy_cq(fdq->cq));
-	if (fdq)
-		rte_free(fdq);
-	priv->flow_drop_queue = NULL;
-	return -1;
-}
-
-/**
- * Delete drop queue.
- *
- * @param priv
- *   Pointer to private structure.
- */
-static void
-priv_flow_delete_drop_queue(struct priv *priv)
-{
-	struct rte_flow_drop *fdq = priv->flow_drop_queue;
-
-	if (!fdq)
-		return;
-	if (fdq->qp)
-		claim_zero(ibv_destroy_qp(fdq->qp));
-	if (fdq->ind_table)
-		claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
-	if (fdq->wq)
-		claim_zero(ibv_exp_destroy_wq(fdq->wq));
-	if (fdq->cq)
-		claim_zero(ibv_destroy_cq(fdq->cq));
-	rte_free(fdq);
-	priv->flow_drop_queue = NULL;
-}
-
-/**
  * Remove all flows.
  *
  * Called by dev_stop() to remove all flows.
@@ -1523,17 +1334,15 @@ priv_flow_stop(struct priv *priv)
 	struct rte_flow *flow;
 
 	TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
+		unsigned int i;
+
 		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
 		flow->ibv_flow = NULL;
-		if (flow->mark) {
-			unsigned int n;
-
-			for (n = 0; n < flow->queues_n; ++n)
-				(*priv->rxqs)[flow->queues[n]]->mark = 0;
-		}
+		/* Disable mark on all queues. */
+		for (i = 0; i != priv->rxqs_n; ++i)
+			(*priv->rxqs)[i]->mark = 0;
 		DEBUG("Flow %p removed", (void *)flow);
 	}
-	priv_flow_delete_drop_queue(priv);
 }
 
 /**
@@ -1548,19 +1357,12 @@ priv_flow_stop(struct priv *priv)
 int
 priv_flow_start(struct priv *priv)
 {
-	int ret;
 	struct rte_flow *flow;
 
-	ret = priv_flow_create_drop_queue(priv);
-	if (ret)
-		return -1;
 	TAILQ_FOREACH(flow, &priv->flows, next) {
 		struct ibv_qp *qp;
 
-		if (flow->drop)
-			qp = priv->flow_drop_queue->qp;
-		else
-			qp = flow->qp;
+		qp = flow->qp;
 		flow->ibv_flow = ibv_exp_create_flow(qp, flow->ibv_attr);
 		if (!flow->ibv_flow) {
 			DEBUG("Flow %p cannot be applied", (void *)flow);
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 13/21] net/mlx5: make indirection tables sharable
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (12 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 12/21] net/mlx5: remove queue drop support Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 14/21] net/mlx5: add Hash Rx queue object Nelio Laranjeiro
                   ` (41 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
Avoid to have for each Hash Rx queue it dedicated indirection table.  On
verbs side, the indirection table only points to the Work Queue, two hash
Rx queues using the same set of WQ can use the same indirection table.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c       |   3 +
 drivers/net/mlx5/mlx5.h       |   2 +
 drivers/net/mlx5/mlx5_flow.c  |  79 ++++++++++------------
 drivers/net/mlx5/mlx5_rxq.c   | 149 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_rxtx.h  |  18 +++++
 drivers/net/mlx5/mlx5_utils.h |   2 +
 6 files changed, 207 insertions(+), 46 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index b37292c..d5cb6e4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -182,6 +182,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	}
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
+	i = mlx5_priv_ind_table_ibv_verify(priv);
+	if (i)
+		WARN("%p: some Indirection table still remain", (void*)priv);
 	i = mlx5_priv_rxq_ibv_verify(priv);
 	if (i)
 		WARN("%p: some Verbs Rx queue still remain", (void*)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index a0266d4..081c2c6 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -149,6 +149,8 @@ struct priv {
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
 	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
+	/* Verbs Indirection tables. */
+	LIST_HEAD(ind_tables, mlx5_ind_table_ibv) ind_tbls;
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 151854a..049a8e2 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -90,15 +90,13 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 struct rte_flow {
 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
-	struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
+	struct mlx5_ind_table_ibv *ind_table; /**< Indirection table. */
 	struct ibv_qp *qp; /**< Verbs queue pair. */
 	struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
 	struct ibv_exp_wq *wq; /**< Verbs work queue. */
 	struct ibv_cq *cq; /**< Verbs completion queue. */
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
-	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< List of queues. */
-	uint16_t queues_n; /**< Number of queues in the list. */
 };
 
 /** Static initializer for items. */
@@ -1026,50 +1024,37 @@ priv_flow_create_action_queue(struct priv *priv,
 {
 	struct rte_flow *rte_flow;
 	unsigned int i;
-	unsigned int j;
-	const unsigned int wqs_n = 1 << log2above(flow->actions.queues_n);
-	struct ibv_exp_wq *wqs[wqs_n];
 
 	assert(priv->pd);
 	assert(priv->ctx);
-	assert(!flow->actions.drop);
 	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
 	if (!rte_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	for (i = 0; i < flow->actions.queues_n; ++i) {
-		struct mlx5_rxq_ibv *rxq =
-			mlx5_priv_rxq_ibv_get(priv, flow->actions.queues[i]);
-
-		wqs[i] = rxq->wq;
-		rte_flow->queues[i] = flow->actions.queues[i];
-		++rte_flow->queues_n;
-		(*priv->rxqs)[flow->actions.queues[i]]->mark |=
-			flow->actions.mark;
-	}
-	/* finalise indirection table. */
-	for (j = 0; i < wqs_n; ++i, ++j) {
-		wqs[i] = wqs[j];
-		if (j == flow->actions.queues_n)
-			j = 0;
+	for (i = 0; i != flow->actions.queues_n; ++i) {
+		struct mlx5_rxq_data *q = (*priv->rxqs)[flow->actions.queues[i]];
+
+		q->mark |= flow->actions.mark;
 	}
 	rte_flow->mark = flow->actions.mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
 	rte_flow->hash_fields = flow->hash_fields;
-	rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
-		priv->ctx,
-		&(struct ibv_exp_rwq_ind_table_init_attr){
-			.pd = priv->pd,
-			.log_ind_tbl_size = log2above(flow->actions.queues_n),
-			.ind_tbl = wqs,
-			.comp_mask = 0,
-		});
+	rte_flow->ind_table =
+		mlx5_priv_ind_table_ibv_get(priv, flow->actions.queues,
+					    flow->actions.queues_n);
 	if (!rte_flow->ind_table) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate indirection table");
-		goto error;
+		rte_flow->ind_table =
+			mlx5_priv_ind_table_ibv_new(priv, flow->actions.queues,
+						    flow->actions.queues_n);
+		if (!rte_flow->ind_table) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL,
+					   "cannot allocate indirection table");
+			goto error;
+		}
 	}
 	rte_flow->qp = ibv_exp_create_qp(
 		priv->ctx,
@@ -1086,7 +1071,7 @@ priv_flow_create_action_queue(struct priv *priv,
 				.rx_hash_key_len = rss_hash_default_key_len,
 				.rx_hash_key = rss_hash_default_key,
 				.rx_hash_fields_mask = rte_flow->hash_fields,
-				.rwq_ind_tbl = rte_flow->ind_table,
+				.rwq_ind_tbl = rte_flow->ind_table->ind_table,
 			},
 			.port_num = priv->port,
 		});
@@ -1110,7 +1095,7 @@ priv_flow_create_action_queue(struct priv *priv,
 	if (rte_flow->qp)
 		ibv_destroy_qp(rte_flow->qp);
 	if (rte_flow->ind_table)
-		ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
+		mlx5_priv_ind_table_ibv_release(priv, rte_flow->ind_table);
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -1231,13 +1216,10 @@ priv_flow_destroy(struct priv *priv,
 		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
 	if (flow->qp)
 		claim_zero(ibv_destroy_qp(flow->qp));
-	if (flow->ind_table)
-		claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
-	for (i = 0; i != flow->queues_n; ++i) {
+	for (i = 0; i != flow->ind_table->queues_n; ++i) {
 		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq = (*priv->rxqs)[flow->queues[i]];
-		struct mlx5_rxq_ctrl *rxq_ctrl =
-			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+		struct mlx5_rxq_data *rxq =
+			(*priv->rxqs)[flow->ind_table->queues[i]];
 
 		/*
 		 * To remove the mark from the queue, the queue must not be
@@ -1251,14 +1233,17 @@ priv_flow_destroy(struct priv *priv,
 
 				if (!tmp->mark)
 					continue;
-				for (j = 0; (j != tmp->queues_n) && !mark; j++)
-					if (tmp->queues[j] == flow->queues[i])
+				for (j = 0;
+				     (j != tmp->ind_table->queues_n) && !mark;
+				     j++)
+					if (tmp->ind_table->queues[j] ==
+					    flow->ind_table->queues[i])
 						mark = 1;
 			}
 			rxq->mark = mark;
 		}
-		mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
 	}
+	mlx5_priv_ind_table_ibv_release(priv, flow->ind_table);
 	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
 	rte_free(flow);
@@ -1373,8 +1358,10 @@ priv_flow_start(struct priv *priv)
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->queues_n; ++n)
-				(*priv->rxqs)[flow->queues[n]]->mark = 1;
+			for (n = 0; n < flow->ind_table->queues_n; ++n) {
+				uint16_t idx = flow->ind_table->queues[n];
+				(*priv->rxqs)[idx]->mark = 1;
+			}
 		}
 	}
 	return 0;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 3b75a7e..bd6f966 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1539,3 +1539,152 @@ mlx5_priv_rxq_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Create an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   Queues entering in the indirection table.
+ * @param queues_n
+ *   Number of queues in the array.
+ *
+ * @return
+ *   A new indirection table.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
+			    uint16_t queues_n)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+	const unsigned int wq_n = 1 << log2above(queues_n);
+	struct ibv_exp_wq *wq[wq_n];
+	unsigned int i;
+	unsigned int j;
+
+	ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
+			     queues_n * sizeof(uint16_t), 0);
+	if (!ind_tbl)
+		return NULL;
+	for (i = 0; i != queues_n; ++i) {
+		struct mlx5_rxq_ctrl *rxq =
+			mlx5_priv_rxq_get(priv, queues[i]);
+
+		wq[i] = rxq->ibv->wq;
+		ind_tbl->queues[i] = queues[i];
+	}
+	ind_tbl->queues_n = queues_n;
+	/* finalise indirection table. */
+	for (j = 0; i < wq_n; ++i, ++j) {
+		wq[i] = wq[j];
+		if (j == queues_n)
+			j = 0;
+	}
+	ind_tbl->ind_table = ibv_exp_create_rwq_ind_table(
+		priv->ctx,
+		&(struct ibv_exp_rwq_ind_table_init_attr){
+			.pd = priv->pd,
+			.log_ind_tbl_size = log2above(queues_n),
+			.ind_tbl = wq,
+			.comp_mask = 0,
+		});
+	if (!ind_tbl->ind_table)
+		goto error;
+	rte_atomic32_inc(&ind_tbl->refcnt);
+	LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
+	DEBUG("%p: Indirection table %p: refcnt %d", (void*)priv,
+	      (void*)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+	return ind_tbl;
+error:
+	rte_free(ind_tbl);
+	return NULL;
+}
+
+/**
+ * Get an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   Queues entering in the indirection table.
+ * @param queues_n
+ *   Number of queues in the array.
+ *
+ * @return
+ *   An indirection table if found.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
+			    uint16_t queues_n)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+
+	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+		if (memcmp(ind_tbl->queues, queues, queues_n) == 0)
+			break;
+	}
+	if (ind_tbl) {
+		unsigned int i;
+
+		rte_atomic32_inc(&ind_tbl->refcnt);
+		DEBUG("%p: Indirection table %p: refcnt %d", (void*)priv,
+		      (void*)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+		for (i = 0; i != ind_tbl->queues_n; ++i)
+			mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
+	}
+	return ind_tbl;
+}
+
+/**
+ * Release an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param ind_table
+ *   Indirection table to release.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_ind_table_ibv_release(struct priv *priv,
+				struct mlx5_ind_table_ibv *ind_tbl)
+{
+	unsigned int i;
+
+	DEBUG("%p: Indirection table %p: refcnt %d", (void*)priv,
+	      (void*)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+	if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
+		claim_zero(ibv_exp_destroy_rwq_ind_table(ind_tbl->ind_table));
+	for (i = 0; i != ind_tbl->queues_n; ++i)
+		claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
+	if (!rte_atomic32_read(&ind_tbl->refcnt)) {
+		LIST_REMOVE(ind_tbl, next);
+		rte_free(ind_tbl);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_ind_table_ibv_verify(struct priv *priv)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+	int ret = 0;
+
+	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+		DEBUG("%p: Verbs indirection table %p still referenced",
+		      (void*)priv, (void*)ind_tbl);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 672793a..2b48a01 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -152,6 +152,15 @@ struct mlx5_rxq_ctrl {
 	unsigned int memory_channel:1; /* Need memory channel. */
 };
 
+/* Indirection table. */
+struct mlx5_ind_table_ibv {
+	LIST_ENTRY(mlx5_ind_table_ibv) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
+	uint16_t queues_n; /**< Number of queues in the list. */
+	uint16_t queues[]; /**< Queue list. */
+};
+
 /* Hash RX queue types. */
 enum hash_rxq_type {
 	HASH_RXQ_TCPV4,
@@ -345,6 +354,15 @@ int mlx5_priv_rxq_release(struct priv *priv, uint16_t idx);
 int mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx);
 int mlx5_priv_rxq_verify(struct priv *priv);
 int rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl);
+struct mlx5_ind_table_ibv* mlx5_priv_ind_table_ibv_new(struct priv * priv,
+						       uint16_t queues[],
+						       uint16_t queue_n);
+struct mlx5_ind_table_ibv* mlx5_priv_ind_table_ibv_get(struct priv *priv,
+						       uint16_t queues[],
+						       uint16_t queue_n);
+int mlx5_priv_ind_table_ibv_release(struct priv * priv,
+				    struct mlx5_ind_table_ibv *ind_table);
+int mlx5_priv_ind_table_ibv_verify(struct priv *priv);
 
 /* mlx5_txq.c */
 
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index a824787..218ae83 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -128,11 +128,13 @@ pmd_drv_log_basename(const char *s)
 
 #define DEBUG(...) PMD_DRV_LOG(DEBUG, __VA_ARGS__)
 #define claim_zero(...) assert((__VA_ARGS__) == 0)
+#define claim_nonzero(...) assert((__VA_ARGS__) != 0)
 
 #else /* NDEBUG */
 
 #define DEBUG(...) (void)0
 #define claim_zero(...) (__VA_ARGS__)
+#define claim_nonzero(...) (__VA_ARGS__)
 
 #endif /* NDEBUG */
 
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 14/21] net/mlx5: add Hash Rx queue object
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (13 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 13/21] net/mlx5: make indirection tables sharable Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 15/21] net/mlx5: disable priority protection in flows Nelio Laranjeiro
                   ` (40 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c      |   3 +
 drivers/net/mlx5/mlx5.h      |   1 +
 drivers/net/mlx5/mlx5_flow.c | 137 +++++++++++++++---------------------
 drivers/net/mlx5/mlx5_rxq.c  | 161 +++++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_rxtx.h |  19 +++++
 5 files changed, 239 insertions(+), 82 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index d5cb6e4..52cbb20 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -182,6 +182,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	}
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
+	i = mlx5_priv_hrxq_ibv_verify(priv);
+	if (i)
+		WARN("%p: some Hash Rx queue still remain", (void*)priv);
 	i = mlx5_priv_ind_table_ibv_verify(priv);
 	if (i)
 		WARN("%p: some Indirection table still remain", (void*)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 081c2c6..3c2e719 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -147,6 +147,7 @@ struct priv {
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
+	LIST_HEAD(hrxq, mlx5_hrxq) hrxqs; /* Verbs Hash Rx queues. */
 	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
 	/* Verbs Indirection tables. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 049a8e2..f258567 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -90,13 +90,9 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 struct rte_flow {
 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
-	struct mlx5_ind_table_ibv *ind_table; /**< Indirection table. */
-	struct ibv_qp *qp; /**< Verbs queue pair. */
 	struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
-	struct ibv_exp_wq *wq; /**< Verbs work queue. */
-	struct ibv_cq *cq; /**< Verbs completion queue. */
+	struct mlx5_hrxq *hrxq; /**< Hash Rx queue. */
 	uint32_t mark:1; /**< Set if the flow is marked. */
-	uint64_t hash_fields; /**< Fields that participate in the hash. */
 };
 
 /** Static initializer for items. */
@@ -1033,56 +1029,36 @@ priv_flow_create_action_queue(struct priv *priv,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	for (i = 0; i != flow->actions.queues_n; ++i) {
-		struct mlx5_rxq_data *q = (*priv->rxqs)[flow->actions.queues[i]];
-
-		q->mark |= flow->actions.mark;
-	}
 	rte_flow->mark = flow->actions.mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
-	rte_flow->hash_fields = flow->hash_fields;
-	rte_flow->ind_table =
-		mlx5_priv_ind_table_ibv_get(priv, flow->actions.queues,
+	rte_flow->hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
+					    rss_hash_default_key_len,
+					    flow->hash_fields,
+					    flow->actions.queues,
 					    flow->actions.queues_n);
-	if (!rte_flow->ind_table) {
-		rte_flow->ind_table =
-			mlx5_priv_ind_table_ibv_new(priv, flow->actions.queues,
-						    flow->actions.queues_n);
-		if (!rte_flow->ind_table) {
-			rte_flow_error_set(error, ENOMEM,
-					   RTE_FLOW_ERROR_TYPE_HANDLE,
-					   NULL,
-					   "cannot allocate indirection table");
-			goto error;
-		}
+	if (rte_flow->hrxq) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "duplicated flow");
+		goto error;
 	}
-	rte_flow->qp = ibv_exp_create_qp(
-		priv->ctx,
-		&(struct ibv_exp_qp_init_attr){
-			.qp_type = IBV_QPT_RAW_PACKET,
-			.comp_mask =
-				IBV_EXP_QP_INIT_ATTR_PD |
-				IBV_EXP_QP_INIT_ATTR_PORT |
-				IBV_EXP_QP_INIT_ATTR_RX_HASH,
-			.pd = priv->pd,
-			.rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
-				.rx_hash_function =
-					IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
-				.rx_hash_key_len = rss_hash_default_key_len,
-				.rx_hash_key = rss_hash_default_key,
-				.rx_hash_fields_mask = rte_flow->hash_fields,
-				.rwq_ind_tbl = rte_flow->ind_table->ind_table,
-			},
-			.port_num = priv->port,
-		});
-	if (!rte_flow->qp) {
+	rte_flow->hrxq = mlx5_priv_hrxq_new(priv, rss_hash_default_key,
+					    rss_hash_default_key_len,
+					    flow->hash_fields,
+					    flow->actions.queues,
+					    flow->actions.queues_n);
+	if (!rte_flow->hrxq) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate QP");
+				   NULL, "cannot create hash rxq");
 		goto error;
 	}
+	for (i = 0; i != flow->actions.queues_n; ++i) {
+		struct mlx5_rxq_data *q = (*priv->rxqs)[flow->actions.queues[i]];
+
+		q->mark |= flow->actions.mark;
+	}
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
-	rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
+	rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->hrxq->qp,
 						 rte_flow->ibv_attr);
 	if (!rte_flow->ibv_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
@@ -1092,10 +1068,8 @@ priv_flow_create_action_queue(struct priv *priv,
 	return rte_flow;
 error:
 	assert(rte_flow);
-	if (rte_flow->qp)
-		ibv_destroy_qp(rte_flow->qp);
-	if (rte_flow->ind_table)
-		mlx5_priv_ind_table_ibv_release(priv, rte_flow->ind_table);
+	if (rte_flow->hrxq)
+		mlx5_priv_hrxq_release(priv, rte_flow->hrxq);
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -1210,40 +1184,41 @@ priv_flow_destroy(struct priv *priv,
 		  struct rte_flow *flow)
 {
 	unsigned int i;
-
-	TAILQ_REMOVE(&priv->flows, flow, next);
-	if (flow->ibv_flow)
-		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
-	if (flow->qp)
-		claim_zero(ibv_destroy_qp(flow->qp));
-	for (i = 0; i != flow->ind_table->queues_n; ++i) {
+	uint16_t *queues;
+	uint16_t queues_n;
+
+	queues = flow->hrxq->ind_table->queues;
+	queues_n = flow->hrxq->ind_table->queues_n;
+	if (!flow->mark)
+		goto out;
+	for (i = 0; i != queues_n; ++i) {
 		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq =
-			(*priv->rxqs)[flow->ind_table->queues[i]];
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[queues[i]];
+		int mark = 0;
 
 		/*
 		 * To remove the mark from the queue, the queue must not be
 		 * present in any other marked flow (RSS or not).
 		 */
-		if (flow->mark) {
-			int mark = 0;
-
-			TAILQ_FOREACH(tmp, &priv->flows, next) {
-				unsigned int j;
-
-				if (!tmp->mark)
-					continue;
-				for (j = 0;
-				     (j != tmp->ind_table->queues_n) && !mark;
-				     j++)
-					if (tmp->ind_table->queues[j] ==
-					    flow->ind_table->queues[i])
-						mark = 1;
-			}
-			rxq->mark = mark;
+		TAILQ_FOREACH(tmp, &priv->flows, next) {
+			unsigned int j;
+
+			if (!tmp->mark)
+				continue;
+			for (j = 0;
+			     (j != tmp->hrxq->ind_table->queues_n) && !mark;
+			     j++)
+				if (tmp->hrxq->ind_table->queues[j] ==
+				    queues[i])
+					mark = 1;
 		}
+		rxq->mark = mark;
 	}
-	mlx5_priv_ind_table_ibv_release(priv, flow->ind_table);
+out:
+	TAILQ_REMOVE(&priv->flows, flow, next);
+	if (flow->ibv_flow)
+		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
+	mlx5_priv_hrxq_release(priv, flow->hrxq);
 	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
 	rte_free(flow);
@@ -1345,10 +1320,8 @@ priv_flow_start(struct priv *priv)
 	struct rte_flow *flow;
 
 	TAILQ_FOREACH(flow, &priv->flows, next) {
-		struct ibv_qp *qp;
-
-		qp = flow->qp;
-		flow->ibv_flow = ibv_exp_create_flow(qp, flow->ibv_attr);
+		flow->ibv_flow = ibv_exp_create_flow(flow->hrxq->qp,
+						     flow->ibv_attr);
 		if (!flow->ibv_flow) {
 			DEBUG("Flow %p cannot be applied", (void *)flow);
 			rte_errno = EINVAL;
@@ -1358,8 +1331,8 @@ priv_flow_start(struct priv *priv)
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->ind_table->queues_n; ++n) {
-				uint16_t idx = flow->ind_table->queues[n];
+			for (n = 0; n < flow->hrxq->ind_table->queues_n; ++n) {
+				uint16_t idx = flow->hrxq->ind_table->queues[n];
 				(*priv->rxqs)[idx]->mark = 1;
 			}
 		}
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index bd6f966..076b575 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1688,3 +1688,164 @@ mlx5_priv_ind_table_ibv_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Create an Rx Hash queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rss_key
+ *   RSS key for the Rx hash queue.
+ * @param rss_key_len
+ *   RSS key length.
+ * @param hash_fields
+ *   Verbs protocol hash field to make the RSS on.
+ * @param queues
+ *   Queues entering in hash queue.
+ * @param queues_n
+ *   Number of queues.
+ *
+ * @return
+ *   An hash Rx queue on success.
+ */
+struct mlx5_hrxq*
+mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
+		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+{
+	struct mlx5_hrxq *hrxq;
+	struct mlx5_ind_table_ibv *ind_tbl;
+	struct ibv_qp *qp;
+
+	ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
+	if (!ind_tbl)
+		ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
+	if (!ind_tbl)
+		return NULL;
+	qp = ibv_exp_create_qp(
+		priv->ctx,
+		&(struct ibv_exp_qp_init_attr){
+			.qp_type = IBV_QPT_RAW_PACKET,
+			.comp_mask =
+				IBV_EXP_QP_INIT_ATTR_PD |
+				IBV_EXP_QP_INIT_ATTR_PORT |
+				IBV_EXP_QP_INIT_ATTR_RX_HASH,
+			.pd = priv->pd,
+			.rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
+				.rx_hash_function =
+					IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
+				.rx_hash_key_len = rss_key_len,
+				.rx_hash_key = rss_key,
+				.rx_hash_fields_mask = hash_fields,
+				.rwq_ind_tbl = ind_tbl->ind_table,
+			},
+			.port_num = priv->port,
+		});
+	if (!qp)
+		goto error;
+	hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
+	if (!hrxq)
+		goto error;
+	hrxq->ind_table = ind_tbl;
+	hrxq->qp = qp;
+	hrxq->rss_key_len = rss_key_len;
+	hrxq->hash_fields = hash_fields;
+	memcpy(hrxq->rss_key, rss_key, rss_key_len);
+	rte_atomic32_inc(&hrxq->refcnt);
+	LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
+	DEBUG("%p: Hash Rx queue %p: refcnt %d", (void*)priv,
+	      (void*)hrxq, rte_atomic32_read(&hrxq->refcnt));
+	return hrxq;
+error:
+	mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
+	if (qp)
+		claim_zero(ibv_destroy_qp(qp));
+	return NULL;
+}
+
+/**
+ * Get an Rx Hash queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rss_conf
+ *   RSS configuration for the Rx hash queue.
+ * @param queues
+ *   Queues entering in hash queue.
+ * @param queues_n
+ *   Number of queues.
+ *
+ * @return
+ *   An hash Rx queue on success.
+ */
+struct mlx5_hrxq*
+mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
+		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+{
+	struct mlx5_hrxq *hrxq;
+
+	LIST_FOREACH(hrxq, &priv->hrxqs, next) {
+		if (hrxq->rss_key_len != rss_key_len)
+			continue;
+		if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
+			continue;
+		if (hrxq->hash_fields != hash_fields)
+			continue;
+		mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
+		rte_atomic32_inc(&hrxq->refcnt);
+		DEBUG("%p: Hash Rx queue %p: refcnt %d", (void*)priv,
+		      (void*)hrxq, rte_atomic32_read(&hrxq->refcnt));
+		return hrxq;
+	}
+	return NULL;
+}
+
+/**
+ * Release the hash Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param hrxq
+ *   Pointer to Hash Rx queue to release.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq)
+{
+	DEBUG("%p: Hash Rx queue %p: refcnt %d", (void*)priv,
+	      (void*)hrxq, rte_atomic32_read(&hrxq->refcnt));
+	if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
+		claim_zero(ibv_destroy_qp(hrxq->qp));
+		mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table);
+		LIST_REMOVE(hrxq, next);
+		rte_free(hrxq);
+		return 0;
+	} else {
+		claim_nonzero(mlx5_priv_ind_table_ibv_release(priv,
+							      hrxq->ind_table));
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_hrxq_ibv_verify(struct priv *priv)
+{
+	struct mlx5_hrxq *hrxq;
+	int ret = 0;
+
+	LIST_FOREACH(hrxq, &priv->hrxqs, next) {
+		DEBUG("%p: Verbs Hash Rx queue %p still referenced",
+		      (void*)priv, (void*)hrxq);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 2b48a01..6397a50 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -161,6 +161,17 @@ struct mlx5_ind_table_ibv {
 	uint16_t queues[]; /**< Queue list. */
 };
 
+/* Hash Rx queue. */
+struct mlx5_hrxq {
+	LIST_ENTRY(mlx5_hrxq) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct mlx5_ind_table_ibv *ind_table; /* Indirection table. */
+	struct ibv_qp *qp; /* Verbs queue pair. */
+	uint64_t hash_fields; /* Verbs Hash fields. */
+	uint8_t rss_key_len; /* Hash key length in bytes. */
+	uint8_t rss_key[]; /* Hash key. */
+};
+
 /* Hash RX queue types. */
 enum hash_rxq_type {
 	HASH_RXQ_TCPV4,
@@ -363,6 +374,14 @@ struct mlx5_ind_table_ibv* mlx5_priv_ind_table_ibv_get(struct priv *priv,
 int mlx5_priv_ind_table_ibv_release(struct priv * priv,
 				    struct mlx5_ind_table_ibv *ind_table);
 int mlx5_priv_ind_table_ibv_verify(struct priv *priv);
+struct mlx5_hrxq* mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key,
+				     uint8_t rss_key_len, uint64_t hash_fields,
+				     uint16_t queues[], uint16_t queues_n);
+struct mlx5_hrxq* mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key,
+				     uint8_t rss_key_len, uint64_t hash_fields,
+				     uint16_t queues[], uint16_t queues_n);
+int mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq);
+int mlx5_priv_hrxq_ibv_verify(struct priv *priv);
 
 /* mlx5_txq.c */
 
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 15/21] net/mlx5: disable priority protection in flows
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (14 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 14/21] net/mlx5: add Hash Rx queue object Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 16/21] net/mlx5: use flow to enable promiscuous mode Nelio Laranjeiro
                   ` (39 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 7 -------
 1 file changed, 7 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index f258567..90deb30 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -426,13 +426,6 @@ priv_flow_validate(struct priv *priv,
 				   "groups are not supported");
 		return -rte_errno;
 	}
-	if (attr->priority) {
-		rte_flow_error_set(error, ENOTSUP,
-				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
-				   NULL,
-				   "priorities are not supported");
-		return -rte_errno;
-	}
 	if (attr->egress) {
 		rte_flow_error_set(error, ENOTSUP,
 				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 16/21] net/mlx5: use flow to enable promiscuous mode
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (15 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 15/21] net/mlx5: disable priority protection in flows Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 17/21] net/mlx5: use flow to enable all multi mode Nelio Laranjeiro
                   ` (38 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
RSS hash configuration is currently ignored by the PMD, this commits
removes the RSS feature on promiscuous mode.
This functionality will be added in a later commit.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c         |   1 +
 drivers/net/mlx5/mlx5.h         |  15 +++--
 drivers/net/mlx5/mlx5_flow.c    | 127 ++++++++++++++++++++++++++++++++++++----
 drivers/net/mlx5/mlx5_rxmode.c  |  52 +++++-----------
 drivers/net/mlx5/mlx5_rxq.c     |   8 +--
 drivers/net/mlx5/mlx5_rxtx.h    |   3 -
 drivers/net/mlx5/mlx5_trigger.c |  20 +++++--
 7 files changed, 158 insertions(+), 68 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 52cbb20..f000404 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -775,6 +775,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		priv->dev = eth_dev;
 		eth_dev->dev_ops = &mlx5_dev_ops;
 		TAILQ_INIT(&priv->flows);
+		TAILQ_INIT(&priv->ctrl_flows);
 
 		/* Bring Ethernet device up. */
 		DEBUG("forcing Ethernet interface up");
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 3c2e719..cbf8849 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -39,6 +39,7 @@
 #include <limits.h>
 #include <net/if.h>
 #include <netinet/in.h>
+#include <sys/queue.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -86,6 +87,9 @@ struct mlx5_xstats_ctrl {
 	uint64_t base[MLX5_MAX_XSTATS];
 };
 
+/* Flow list . */
+TAILQ_HEAD(mlx5_flows, rte_flow);
+
 struct priv {
 	struct rte_eth_dev *dev; /* Ethernet device. */
 	struct ibv_context *ctx; /* Verbs context. */
@@ -103,7 +107,6 @@ struct priv {
 	/* Device properties. */
 	uint16_t mtu; /* Configured MTU. */
 	uint8_t port; /* Physical port number. */
-	unsigned int promisc_req:1; /* Promiscuous mode requested. */
 	unsigned int allmulti_req:1; /* All multicast mode requested. */
 	unsigned int hw_csum:1; /* Checksum offload is supported. */
 	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
@@ -143,7 +146,8 @@ struct priv {
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
-	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
+	struct mlx5_flows flows; /* RTE Flow rules. */
+	struct mlx5_flows ctrl_flows; /* Control flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
@@ -289,11 +293,14 @@ struct rte_flow *mlx5_flow_create(struct rte_eth_dev *,
 				  struct rte_flow_error *);
 int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
 		      struct rte_flow_error *);
+void priv_flow_flush(struct priv *, struct mlx5_flows *);
 int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
 int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
-int priv_flow_start(struct priv *);
-void priv_flow_stop(struct priv *);
+int priv_flow_start(struct priv *, struct mlx5_flows *);
+void priv_flow_stop(struct priv *, struct mlx5_flows *);
 int priv_flow_verify(struct priv *);
+int mlx5_flow_ctrl(struct rte_eth_dev *, struct rte_flow_item_eth *,
+		   struct rte_flow_item_eth *, unsigned int, unsigned int);
 
 /* mlx5_mr.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 90deb30..39a49af 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1169,11 +1169,14 @@ mlx5_flow_create(struct rte_eth_dev *dev,
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  * @param[in] flow
  *   Flow to destroy.
  */
 static void
 priv_flow_destroy(struct priv *priv,
+		  struct mlx5_flows *list,
 		  struct rte_flow *flow)
 {
 	unsigned int i;
@@ -1193,7 +1196,7 @@ priv_flow_destroy(struct priv *priv,
 		 * To remove the mark from the queue, the queue must not be
 		 * present in any other marked flow (RSS or not).
 		 */
-		TAILQ_FOREACH(tmp, &priv->flows, next) {
+		TAILQ_FOREACH(tmp, list, next) {
 			unsigned int j;
 
 			if (!tmp->mark)
@@ -1208,7 +1211,7 @@ priv_flow_destroy(struct priv *priv,
 		rxq->mark = mark;
 	}
 out:
-	TAILQ_REMOVE(&priv->flows, flow, next);
+	TAILQ_REMOVE(list, flow, next);
 	if (flow->ibv_flow)
 		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
 	mlx5_priv_hrxq_release(priv, flow->hrxq);
@@ -1232,7 +1235,7 @@ mlx5_flow_destroy(struct rte_eth_dev *dev,
 
 	(void)error;
 	priv_lock(priv);
-	priv_flow_destroy(priv, flow);
+	priv_flow_destroy(priv, &priv->flows, flow);
 	priv_unlock(priv);
 	return 0;
 }
@@ -1242,15 +1245,17 @@ mlx5_flow_destroy(struct rte_eth_dev *dev,
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  */
-static void
-priv_flow_flush(struct priv *priv)
+void
+priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
 {
-	while (!TAILQ_EMPTY(&priv->flows)) {
+	while (!TAILQ_EMPTY(list)) {
 		struct rte_flow *flow;
 
-		flow = TAILQ_FIRST(&priv->flows);
-		priv_flow_destroy(priv, flow);
+		flow = TAILQ_FIRST(list);
+		priv_flow_destroy(priv, list, flow);
 	}
 }
 
@@ -1268,7 +1273,7 @@ mlx5_flow_flush(struct rte_eth_dev *dev,
 
 	(void)error;
 	priv_lock(priv);
-	priv_flow_flush(priv);
+	priv_flow_flush(priv, &priv->flows);
 	priv_unlock(priv);
 	return 0;
 }
@@ -1280,13 +1285,15 @@ mlx5_flow_flush(struct rte_eth_dev *dev,
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  */
 void
-priv_flow_stop(struct priv *priv)
+priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 {
 	struct rte_flow *flow;
 
-	TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
+	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
 		unsigned int i;
 
 		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
@@ -1303,16 +1310,18 @@ priv_flow_stop(struct priv *priv)
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  *
  * @return
  *   0 on success, a errno value otherwise and rte_errno is set.
  */
 int
-priv_flow_start(struct priv *priv)
+priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 {
 	struct rte_flow *flow;
 
-	TAILQ_FOREACH(flow, &priv->flows, next) {
+	TAILQ_FOREACH(flow, list, next) {
 		flow->ibv_flow = ibv_exp_create_flow(flow->hrxq->qp,
 						     flow->ibv_attr);
 		if (!flow->ibv_flow) {
@@ -1381,3 +1390,95 @@ priv_flow_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Enable/disable a flow control configured from the control plane.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param spec
+ *   An Ethernet flow spec to apply.
+ * @param mask
+ *   An Ethernet flow mask to apply.
+ * @param priority
+ *   The flow priority.
+ * @param enable
+ *   Enable/disable the flow.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+mlx5_flow_ctrl(struct rte_eth_dev *dev,
+	       struct rte_flow_item_eth *spec,
+	       struct rte_flow_item_eth *mask,
+	       unsigned int priority,
+	       unsigned int enable)
+{
+	struct priv *priv = dev->data->dev_private;
+	const struct rte_flow_attr attr = {
+		.priority = priority,
+		.ingress = 1,
+	};
+	struct rte_flow_item items[] = {
+		{
+			.type = RTE_FLOW_ITEM_TYPE_ETH,
+			.spec = spec,
+			.last = NULL,
+			.mask = mask,
+		},
+		{
+			.type = RTE_FLOW_ITEM_TYPE_END,
+		},
+	};
+	struct rte_flow_action actions[] = {
+		{
+			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
+			.conf = &(struct rte_flow_action_queue){
+				.index = 0,
+			},
+		},
+		{
+			.type = RTE_FLOW_ACTION_TYPE_END,
+		},
+	};
+	struct rte_flow *flow;
+	struct rte_flow_error error;
+
+	if (enable) {
+		flow = priv_flow_create(priv, &attr, items, actions, &error);
+		if (!flow) {
+			return 1;
+		}
+		TAILQ_INSERT_TAIL(&priv->ctrl_flows, flow, next);
+		DEBUG("Control flow created %p", (void *)flow);
+	} else {
+		struct spec {
+			struct ibv_exp_flow_attr ibv_attr;
+			struct ibv_exp_flow_spec_eth eth;
+		} spec;
+		struct mlx5_flow_parse parser = {
+			.ibv_attr = &spec.ibv_attr,
+			.offset = sizeof(struct ibv_exp_flow_attr),
+		};
+		struct ibv_exp_flow_spec_eth *eth;
+		const unsigned int attr_size = sizeof(struct ibv_exp_flow_attr);
+
+		claim_zero(mlx5_flow_create_eth(&items[0], NULL, &parser));
+		TAILQ_FOREACH(flow, &priv->ctrl_flows, next) {
+			eth = (void *)((uintptr_t)flow->ibv_attr + attr_size);
+			assert(eth->type == IBV_EXP_FLOW_SPEC_ETH);
+			if (!memcmp(eth, &spec.eth, sizeof(*eth)))
+				break;
+		}
+		if (flow) {
+			claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
+			mlx5_priv_hrxq_release(priv, flow->hrxq);
+			rte_free(flow->ibv_attr);
+			DEBUG("Control flow destroyed %p", (void *)flow);
+			TAILQ_REMOVE(&priv->ctrl_flows, flow, next);
+			rte_free(flow);
+		}
+	}
+	return 0;
+}
diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index 4a51e47..d6ca907 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -53,20 +53,6 @@
 
 /* Initialization data for special flows. */
 static const struct special_flow_init special_flow_init[] = {
-	[HASH_RXQ_FLOW_TYPE_PROMISC] = {
-		.dst_mac_val = "\x00\x00\x00\x00\x00\x00",
-		.dst_mac_mask = "\x00\x00\x00\x00\x00\x00",
-		.hash_types =
-			1 << HASH_RXQ_TCPV4 |
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_TCPV6 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 0,
-	},
 	[HASH_RXQ_FLOW_TYPE_ALLMULTI] = {
 		.dst_mac_val = "\x01\x00\x00\x00\x00\x00",
 		.dst_mac_mask = "\x01\x00\x00\x00\x00\x00",
@@ -342,7 +328,7 @@ priv_special_flow_enable_all(struct priv *priv)
 
 	if (priv->isolated)
 		return 0;
-	for (flow_type = HASH_RXQ_FLOW_TYPE_PROMISC;
+	for (flow_type = HASH_RXQ_FLOW_TYPE_ALLMULTI;
 			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
 			++flow_type) {
 		int ret;
@@ -369,7 +355,7 @@ priv_special_flow_disable_all(struct priv *priv)
 {
 	enum hash_rxq_flow_type flow_type;
 
-	for (flow_type = HASH_RXQ_FLOW_TYPE_PROMISC;
+	for (flow_type = HASH_RXQ_FLOW_TYPE_ALLMULTI;
 			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
 			++flow_type)
 		priv_special_flow_disable(priv, flow_type);
@@ -384,19 +370,16 @@ priv_special_flow_disable_all(struct priv *priv)
 void
 mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
+	struct rte_flow_item_eth eth = {
+		.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		.type = 0,
+	};
 
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->promisc_req = 1;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while enabling promiscuous mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->promiscuous = 1;
+	claim_zero(mlx5_flow_ctrl(dev, ð, ð, 3, 1));
 }
 
 /**
@@ -408,19 +391,16 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 void
 mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
+	struct rte_flow_item_eth eth = {
+		.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		.type = 0,
+	};
 
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->promisc_req = 0;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while disabling promiscuous mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->promiscuous = 0;
+	claim_zero(mlx5_flow_ctrl(dev, ð, ð, 3, 0));
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 076b575..5f9e84a 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -573,13 +573,7 @@ priv_destroy_hash_rxqs(struct priv *priv)
 int
 priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
 {
-	/* Only FLOW_TYPE_PROMISC is allowed when promiscuous mode
-	 * has been requested. */
-	if (priv->promisc_req)
-		return type == HASH_RXQ_FLOW_TYPE_PROMISC;
 	switch (type) {
-	case HASH_RXQ_FLOW_TYPE_PROMISC:
-		return !!priv->promisc_req;
 	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
 		return !!priv->allmulti_req;
 	case HASH_RXQ_FLOW_TYPE_BROADCAST:
@@ -610,7 +604,7 @@ priv_rehash_flows(struct priv *priv)
 {
 	enum hash_rxq_flow_type i;
 
-	for (i = HASH_RXQ_FLOW_TYPE_PROMISC;
+	for (i = HASH_RXQ_FLOW_TYPE_ALLMULTI;
 			i != RTE_DIM((*priv->hash_rxqs)[0].special_flow);
 			++i)
 		if (!priv_allow_flow_type(priv, i)) {
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 6397a50..166cd5d 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -233,7 +233,6 @@ struct special_flow_init {
 };
 
 enum hash_rxq_flow_type {
-	HASH_RXQ_FLOW_TYPE_PROMISC,
 	HASH_RXQ_FLOW_TYPE_ALLMULTI,
 	HASH_RXQ_FLOW_TYPE_BROADCAST,
 	HASH_RXQ_FLOW_TYPE_IPV6MULTI,
@@ -245,8 +244,6 @@ static inline const char *
 hash_rxq_flow_type_str(enum hash_rxq_flow_type flow_type)
 {
 	switch (flow_type) {
-	case HASH_RXQ_FLOW_TYPE_PROMISC:
-		return "promiscuous";
 	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
 		return "allmulticast";
 	case HASH_RXQ_FLOW_TYPE_BROADCAST:
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index bedce0a..ead8238 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -165,7 +165,16 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		      (void *)priv, strerror(err));
 		goto error;
 	}
-	err = priv_flow_start(priv);
+	if (dev->data->promiscuous)
+	       mlx5_promiscuous_enable(dev);
+	err = priv_flow_start(priv, &priv->ctrl_flows);
+	if (err) {
+		ERROR("%p: an error occurred while configuring control flows:"
+		      " %s",
+		      (void *)priv, strerror(err));
+		goto error;
+	}
+	err = priv_flow_start(priv, &priv->flows);
 	if (err) {
 		ERROR("%p: an error occurred while configuring flows:"
 		      " %s",
@@ -189,7 +198,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
-	priv_flow_stop(priv);
+	priv_flow_stop(priv, &priv->flows);
+	priv_flow_flush(priv, &priv->ctrl_flows);
 	priv_rxq_stop(priv);
 	priv_txq_stop(priv);
 	priv_unlock(priv);
@@ -218,13 +228,13 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
-	priv_flow_stop(priv);
-	priv_rx_intr_vec_disable(priv);
+	priv_flow_stop(priv, &priv->flows);
+	priv_flow_flush(priv, &priv->ctrl_flows);
 	LIST_FOREACH(mr, &priv->mr, next) {
 		priv_mr_release(priv, mr);
 	}
 	priv_txq_stop(priv);
 	priv_rxq_stop(priv);
-	priv_dev_interrupt_handler_uninstall(priv, dev);
+	priv_rx_intr_vec_disable(priv);
 	priv_unlock(priv);
 }
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 17/21] net/mlx5: use flow to enable all multi mode
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (16 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 16/21] net/mlx5: use flow to enable promiscuous mode Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 18/21] net/mlx5: use flow to enable unicast traffic Nelio Laranjeiro
                   ` (37 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
RSS hash configuration is currently ignored by the PMD, this commits
removes the RSS feature on promiscuous mode.
This functionality will be added in a later commit.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.h         |  1 -
 drivers/net/mlx5/mlx5_rxmode.c  | 52 ++++++++++++++---------------------------
 drivers/net/mlx5/mlx5_rxq.c     |  9 +++----
 drivers/net/mlx5/mlx5_rxtx.h    |  3 ---
 drivers/net/mlx5/mlx5_trigger.c |  2 ++
 5 files changed, 23 insertions(+), 44 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index cbf8849..ba461a3 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -107,7 +107,6 @@ struct priv {
 	/* Device properties. */
 	uint16_t mtu; /* Configured MTU. */
 	uint8_t port; /* Physical port number. */
-	unsigned int allmulti_req:1; /* All multicast mode requested. */
 	unsigned int hw_csum:1; /* Checksum offload is supported. */
 	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
 	unsigned int hw_vlan_strip:1; /* VLAN stripping is supported. */
diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index d6ca907..3fcfec7 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -53,18 +53,6 @@
 
 /* Initialization data for special flows. */
 static const struct special_flow_init special_flow_init[] = {
-	[HASH_RXQ_FLOW_TYPE_ALLMULTI] = {
-		.dst_mac_val = "\x01\x00\x00\x00\x00\x00",
-		.dst_mac_mask = "\x01\x00\x00\x00\x00\x00",
-		.hash_types =
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 0,
-	},
 	[HASH_RXQ_FLOW_TYPE_BROADCAST] = {
 		.dst_mac_val = "\xff\xff\xff\xff\xff\xff",
 		.dst_mac_mask = "\xff\xff\xff\xff\xff\xff",
@@ -328,7 +316,7 @@ priv_special_flow_enable_all(struct priv *priv)
 
 	if (priv->isolated)
 		return 0;
-	for (flow_type = HASH_RXQ_FLOW_TYPE_ALLMULTI;
+	for (flow_type = HASH_RXQ_FLOW_TYPE_BROADCAST;
 			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
 			++flow_type) {
 		int ret;
@@ -355,7 +343,7 @@ priv_special_flow_disable_all(struct priv *priv)
 {
 	enum hash_rxq_flow_type flow_type;
 
-	for (flow_type = HASH_RXQ_FLOW_TYPE_ALLMULTI;
+	for (flow_type = HASH_RXQ_FLOW_TYPE_BROADCAST;
 			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
 			++flow_type)
 		priv_special_flow_disable(priv, flow_type);
@@ -412,19 +400,17 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
+	struct rte_flow_item_eth eth = {
+		.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+		.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+		.type = 0,
+	};
 
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->allmulti_req = 1;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while enabling allmulticast mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->all_multicast = 1;
+	if (dev->data->dev_started)
+		claim_zero(mlx5_flow_ctrl(dev, ð, ð, 3, 1));
 }
 
 /**
@@ -436,17 +422,15 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_disable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
+	struct rte_flow_item_eth eth = {
+		.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+		.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+		.type = 0,
+	};
 
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->allmulti_req = 0;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while disabling allmulticast mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->all_multicast = 0;
+	if (dev->data->dev_started)
+		claim_zero(mlx5_flow_ctrl(dev, ð, ð, 3, 0));
 }
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 5f9e84a..e5ec57f 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -573,16 +573,13 @@ priv_destroy_hash_rxqs(struct priv *priv)
 int
 priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
 {
+	(void)priv;
 	switch (type) {
-	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
-		return !!priv->allmulti_req;
 	case HASH_RXQ_FLOW_TYPE_BROADCAST:
 	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
-		/* If allmulti is enabled, broadcast and ipv6multi
-		 * are unnecessary. */
-		return !priv->allmulti_req;
 	case HASH_RXQ_FLOW_TYPE_MAC:
 		return 1;
+		return 1;
 	default:
 		/* Unsupported flow type is not allowed. */
 		return 0;
@@ -604,7 +601,7 @@ priv_rehash_flows(struct priv *priv)
 {
 	enum hash_rxq_flow_type i;
 
-	for (i = HASH_RXQ_FLOW_TYPE_ALLMULTI;
+	for (i = HASH_RXQ_FLOW_TYPE_BROADCAST;
 			i != RTE_DIM((*priv->hash_rxqs)[0].special_flow);
 			++i)
 		if (!priv_allow_flow_type(priv, i)) {
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 166cd5d..4d26726 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -233,7 +233,6 @@ struct special_flow_init {
 };
 
 enum hash_rxq_flow_type {
-	HASH_RXQ_FLOW_TYPE_ALLMULTI,
 	HASH_RXQ_FLOW_TYPE_BROADCAST,
 	HASH_RXQ_FLOW_TYPE_IPV6MULTI,
 	HASH_RXQ_FLOW_TYPE_MAC,
@@ -244,8 +243,6 @@ static inline const char *
 hash_rxq_flow_type_str(enum hash_rxq_flow_type flow_type)
 {
 	switch (flow_type) {
-	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
-		return "allmulticast";
 	case HASH_RXQ_FLOW_TYPE_BROADCAST:
 		return "broadcast";
 	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index ead8238..6370d6f 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -167,6 +167,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	}
 	if (dev->data->promiscuous)
 	       mlx5_promiscuous_enable(dev);
+	else if (dev->data->all_multicast)
+		mlx5_allmulticast_enable(dev);
 	err = priv_flow_start(priv, &priv->ctrl_flows);
 	if (err) {
 		ERROR("%p: an error occurred while configuring control flows:"
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 18/21] net/mlx5: use flow to enable unicast traffic
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (17 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 17/21] net/mlx5: use flow to enable all multi mode Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 19/21] net/mlx5: handle a single RSS hash key for all protocols Nelio Laranjeiro
                   ` (36 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
RSS hash configuration is currently ignored by the PMD, this commits
removes the RSS feature.
This functionality will be added in a later commit.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c         |   9 +-
 drivers/net/mlx5/mlx5.h         |  22 +--
 drivers/net/mlx5/mlx5_defs.h    |   3 -
 drivers/net/mlx5/mlx5_flow.c    |  26 ++-
 drivers/net/mlx5/mlx5_mac.c     | 403 +++-------------------------------------
 drivers/net/mlx5/mlx5_rxmode.c  | 332 +--------------------------------
 drivers/net/mlx5/mlx5_rxq.c     |  65 -------
 drivers/net/mlx5/mlx5_rxtx.h    |  26 ---
 drivers/net/mlx5/mlx5_trigger.c | 181 ++++++++++++++++--
 drivers/net/mlx5/mlx5_vlan.c    |  54 ++----
 10 files changed, 231 insertions(+), 890 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index f000404..af3f7c8 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -146,13 +146,12 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
 	/* In case mlx5_dev_stop() has not been called. */
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-	priv_special_flow_disable_all(priv);
-	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
 	dev->tx_pkt_burst = removed_tx_burst;
+	priv_dev_traffic_disable(priv, dev);
 	if (priv->rxqs != NULL) {
 		/* XXX race condition if mlx5_rx_burst() is still running. */
 		usleep(1000);
@@ -734,10 +733,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		     mac.addr_bytes[0], mac.addr_bytes[1],
 		     mac.addr_bytes[2], mac.addr_bytes[3],
 		     mac.addr_bytes[4], mac.addr_bytes[5]);
-		/* Register MAC address. */
-		claim_zero(priv_mac_addr_add(priv, 0,
-					     (const uint8_t (*)[ETHER_ADDR_LEN])
-					     mac.addr_bytes));
 #ifndef NDEBUG
 		{
 			char ifname[IF_NAMESIZE];
@@ -774,6 +769,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		eth_dev->device->driver = &mlx5_driver.driver;
 		priv->dev = eth_dev;
 		eth_dev->dev_ops = &mlx5_dev_ops;
+		/* Register MAC address. */
+		claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 		TAILQ_INIT(&priv->flows);
 		TAILQ_INIT(&priv->ctrl_flows);
 
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ba461a3..ee0de3c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -95,13 +95,7 @@ struct priv {
 	struct ibv_context *ctx; /* Verbs context. */
 	struct ibv_device_attr device_attr; /* Device properties. */
 	struct ibv_pd *pd; /* Protection Domain. */
-	/*
-	 * MAC addresses array and configuration bit-field.
-	 * An extra entry that cannot be modified by the DPDK is reserved
-	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
-	 */
-	struct ether_addr mac[MLX5_MAX_MAC_ADDRESSES];
-	BITFIELD_DECLARE(mac_configured, uint32_t, MLX5_MAX_MAC_ADDRESSES);
+	struct ether_addr mac[MLX5_MAX_MAC_ADDRESSES]; /* MAC addresses. */
 	uint16_t vlan_filter[MLX5_MAX_VLAN_IDS]; /* VLAN filters table. */
 	unsigned int vlan_filter_n; /* Number of configured VLAN filters. */
 	/* Device properties. */
@@ -221,13 +215,7 @@ void priv_select_rx_function(struct priv *);
 /* mlx5_mac.c */
 
 int priv_get_mac(struct priv *, uint8_t (*)[ETHER_ADDR_LEN]);
-void hash_rxq_mac_addrs_del(struct hash_rxq *);
-void priv_mac_addrs_disable(struct priv *);
 void mlx5_mac_addr_remove(struct rte_eth_dev *, uint32_t);
-int hash_rxq_mac_addrs_add(struct hash_rxq *);
-int priv_mac_addr_add(struct priv *, unsigned int,
-		      const uint8_t (*)[ETHER_ADDR_LEN]);
-int priv_mac_addrs_enable(struct priv *);
 int mlx5_mac_addr_add(struct rte_eth_dev *, struct ether_addr *, uint32_t,
 		      uint32_t);
 void mlx5_mac_addr_set(struct rte_eth_dev *, struct ether_addr *);
@@ -246,10 +234,6 @@ int mlx5_dev_rss_reta_update(struct rte_eth_dev *,
 
 /* mlx5_rxmode.c */
 
-int priv_special_flow_enable(struct priv *, enum hash_rxq_flow_type);
-void priv_special_flow_disable(struct priv *, enum hash_rxq_flow_type);
-int priv_special_flow_enable_all(struct priv *);
-void priv_special_flow_disable_all(struct priv *);
 void mlx5_promiscuous_enable(struct rte_eth_dev *);
 void mlx5_promiscuous_disable(struct rte_eth_dev *);
 void mlx5_allmulticast_enable(struct rte_eth_dev *);
@@ -276,6 +260,10 @@ void mlx5_vlan_strip_queue_set(struct rte_eth_dev *, uint16_t, int);
 
 int mlx5_dev_start(struct rte_eth_dev *);
 void mlx5_dev_stop(struct rte_eth_dev *);
+int priv_dev_traffic_enable(struct priv *, struct rte_eth_dev *);
+int priv_dev_traffic_disable(struct priv *, struct rte_eth_dev *);
+int priv_dev_traffic_restart(struct priv *, struct rte_eth_dev *);
+int mlx5_traffic_restart(struct rte_eth_dev *);
 
 /* mlx5_flow.c */
 
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index a76bc6f..969315c 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -45,9 +45,6 @@
 /* Maximum number of simultaneous VLAN filters. */
 #define MLX5_MAX_VLAN_IDS 128
 
-/* Maximum number of special flows. */
-#define MLX5_MAX_SPECIAL_FLOWS 4
-
 /*
  * Request TX completion every time descriptors reach this threshold since
  * the previous request. Must be a power of two for performance reasons.
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 39a49af..8316255 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1029,20 +1029,18 @@ priv_flow_create_action_queue(struct priv *priv,
 					    flow->hash_fields,
 					    flow->actions.queues,
 					    flow->actions.queues_n);
-	if (rte_flow->hrxq) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "duplicated flow");
-		goto error;
-	}
-	rte_flow->hrxq = mlx5_priv_hrxq_new(priv, rss_hash_default_key,
-					    rss_hash_default_key_len,
-					    flow->hash_fields,
-					    flow->actions.queues,
-					    flow->actions.queues_n);
 	if (!rte_flow->hrxq) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot create hash rxq");
-		goto error;
+		rte_flow->hrxq = mlx5_priv_hrxq_new(priv, rss_hash_default_key,
+						    rss_hash_default_key_len,
+						    flow->hash_fields,
+						    flow->actions.queues,
+						    flow->actions.queues_n);
+		if (!rte_flow->hrxq) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "cannot create hash rxq");
+			goto error;
+		}
 	}
 	for (i = 0; i != flow->actions.queues_n; ++i) {
 		struct mlx5_rxq_data *q = (*priv->rxqs)[flow->actions.queues[i]];
@@ -1448,7 +1446,7 @@ mlx5_flow_ctrl(struct rte_eth_dev *dev,
 	if (enable) {
 		flow = priv_flow_create(priv, &attr, items, actions, &error);
 		if (!flow) {
-			return 1;
+			return rte_errno;
 		}
 		TAILQ_INSERT_TAIL(&priv->ctrl_flows, flow, next);
 		DEBUG("Control flow created %p", (void *)flow);
diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c
index 45d23e4..ca26bcc 100644
--- a/drivers/net/mlx5/mlx5_mac.c
+++ b/drivers/net/mlx5/mlx5_mac.c
@@ -83,112 +83,6 @@ priv_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
 }
 
 /**
- * Delete MAC flow steering rule.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index.
- * @param vlan_index
- *   VLAN index to use.
- */
-static void
-hash_rxq_del_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index,
-		      unsigned int vlan_index)
-{
-#ifndef NDEBUG
-	const uint8_t (*mac)[ETHER_ADDR_LEN] =
-		(const uint8_t (*)[ETHER_ADDR_LEN])
-		hash_rxq->priv->mac[mac_index].addr_bytes;
-#endif
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index]));
-	if (hash_rxq->mac_flow[mac_index][vlan_index] == NULL)
-		return;
-	DEBUG("%p: removing MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
-	      " VLAN index %u",
-	      (void *)hash_rxq,
-	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
-	      mac_index,
-	      vlan_index);
-	claim_zero(ibv_exp_destroy_flow(hash_rxq->mac_flow
-					[mac_index][vlan_index]));
-	hash_rxq->mac_flow[mac_index][vlan_index] = NULL;
-}
-
-/**
- * Unregister a MAC address from a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index.
- */
-static void
-hash_rxq_mac_addr_del(struct hash_rxq *hash_rxq, unsigned int mac_index)
-{
-	unsigned int i;
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow[mac_index])); ++i)
-		hash_rxq_del_mac_flow(hash_rxq, mac_index, i);
-}
-
-/**
- * Unregister all MAC addresses from a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- */
-void
-hash_rxq_mac_addrs_del(struct hash_rxq *hash_rxq)
-{
-	unsigned int i;
-
-	for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow)); ++i)
-		hash_rxq_mac_addr_del(hash_rxq, i);
-}
-
-/**
- * Unregister a MAC address.
- *
- * This is done for each hash RX queue.
- *
- * @param priv
- *   Pointer to private structure.
- * @param mac_index
- *   MAC address index.
- */
-static void
-priv_mac_addr_del(struct priv *priv, unsigned int mac_index)
-{
-	unsigned int i;
-
-	assert(mac_index < RTE_DIM(priv->mac));
-	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
-		return;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i)
-		hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[i], mac_index);
-	BITFIELD_RESET(priv->mac_configured, mac_index);
-}
-
-/**
- * Unregister all MAC addresses from all hash RX queues.
- *
- * @param priv
- *   Pointer to private structure.
- */
-void
-priv_mac_addrs_disable(struct priv *priv)
-{
-	unsigned int i;
-
-	for (i = 0; (i != priv->hash_rxqs_n); ++i)
-		hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[i]);
-}
-
-/**
  * DPDK callback to remove a MAC address.
  *
  * @param dev
@@ -199,258 +93,12 @@ priv_mac_addrs_disable(struct priv *priv)
 void
 mlx5_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
 {
-	struct priv *priv = dev->data->dev_private;
-
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	DEBUG("%p: removing MAC address from index %" PRIu32,
-	      (void *)dev, index);
-	if (index >= RTE_DIM(priv->mac))
-		goto end;
-	priv_mac_addr_del(priv, index);
-end:
-	priv_unlock(priv);
-}
-
-/**
- * Add MAC flow steering rule.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index to register.
- * @param vlan_index
- *   VLAN index to use.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_add_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index,
-		      unsigned int vlan_index)
-{
-	struct ibv_exp_flow *flow;
-	struct priv *priv = hash_rxq->priv;
-	const uint8_t (*mac)[ETHER_ADDR_LEN] =
-			(const uint8_t (*)[ETHER_ADDR_LEN])
-			priv->mac[mac_index].addr_bytes;
-	FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, hash_rxq->type));
-	struct ibv_exp_flow_attr *attr = &data->attr;
-	struct ibv_exp_flow_spec_eth *spec = &data->spec;
-	unsigned int vlan_enabled = !!priv->vlan_filter_n;
-	unsigned int vlan_id = priv->vlan_filter[vlan_index];
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index]));
-	if (hash_rxq->mac_flow[mac_index][vlan_index] != NULL)
-		return 0;
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
-	priv_flow_attr(priv, attr, sizeof(data), hash_rxq->type);
-	/* The first specification must be Ethernet. */
-	assert(spec->type == IBV_EXP_FLOW_SPEC_ETH);
-	assert(spec->size == sizeof(*spec));
-	*spec = (struct ibv_exp_flow_spec_eth){
-		.type = IBV_EXP_FLOW_SPEC_ETH,
-		.size = sizeof(*spec),
-		.val = {
-			.dst_mac = {
-				(*mac)[0], (*mac)[1], (*mac)[2],
-				(*mac)[3], (*mac)[4], (*mac)[5]
-			},
-			.vlan_tag = (vlan_enabled ? htons(vlan_id) : 0),
-		},
-		.mask = {
-			.dst_mac = "\xff\xff\xff\xff\xff\xff",
-			.vlan_tag = (vlan_enabled ? htons(0xfff) : 0),
-		},
-	};
-	DEBUG("%p: adding MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
-	      " VLAN index %u filtering %s, ID %u",
-	      (void *)hash_rxq,
-	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
-	      mac_index,
-	      vlan_index,
-	      (vlan_enabled ? "enabled" : "disabled"),
-	      vlan_id);
-	/* Create related flow. */
-	errno = 0;
-	flow = ibv_exp_create_flow(hash_rxq->qp, attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)hash_rxq, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-	hash_rxq->mac_flow[mac_index][vlan_index] = flow;
-	return 0;
-}
-
-/**
- * Register a MAC address in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index to register.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_mac_addr_add(struct hash_rxq *hash_rxq, unsigned int mac_index)
-{
-	struct priv *priv = hash_rxq->priv;
-	unsigned int i = 0;
-	int ret;
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	assert(RTE_DIM(hash_rxq->mac_flow[mac_index]) ==
-	       RTE_DIM(priv->vlan_filter));
-	/* Add a MAC address for each VLAN filter, or at least once. */
-	do {
-		ret = hash_rxq_add_mac_flow(hash_rxq, mac_index, i);
-		if (ret) {
-			/* Failure, rollback. */
-			while (i != 0)
-				hash_rxq_del_mac_flow(hash_rxq, mac_index,
-						      --i);
-			return ret;
-		}
-	} while (++i < priv->vlan_filter_n);
-	return 0;
-}
-
-/**
- * Register all MAC addresses in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-hash_rxq_mac_addrs_add(struct hash_rxq *hash_rxq)
-{
-	struct priv *priv = hash_rxq->priv;
-	unsigned int i;
-	int ret;
-
-	assert(RTE_DIM(priv->mac) == RTE_DIM(hash_rxq->mac_flow));
-	for (i = 0; (i != RTE_DIM(priv->mac)); ++i) {
-		if (!BITFIELD_ISSET(priv->mac_configured, i))
-			continue;
-		ret = hash_rxq_mac_addr_add(hash_rxq, i);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			hash_rxq_mac_addr_del(hash_rxq, --i);
-		assert(ret > 0);
-		return ret;
-	}
-	return 0;
-}
-
-/**
- * Register a MAC address.
- *
- * This is done for each hash RX queue.
- *
- * @param priv
- *   Pointer to private structure.
- * @param mac_index
- *   MAC address index to use.
- * @param mac
- *   MAC address to register.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_mac_addr_add(struct priv *priv, unsigned int mac_index,
-		  const uint8_t (*mac)[ETHER_ADDR_LEN])
-{
-	unsigned int i;
-	int ret;
-
-	assert(mac_index < RTE_DIM(priv->mac));
-	/* First, make sure this address isn't already configured. */
-	for (i = 0; (i != RTE_DIM(priv->mac)); ++i) {
-		/* Skip this index, it's going to be reconfigured. */
-		if (i == mac_index)
-			continue;
-		if (!BITFIELD_ISSET(priv->mac_configured, i))
-			continue;
-		if (memcmp(priv->mac[i].addr_bytes, *mac, sizeof(*mac)))
-			continue;
-		/* Address already configured elsewhere, return with error. */
-		return EADDRINUSE;
-	}
-	if (BITFIELD_ISSET(priv->mac_configured, mac_index))
-		priv_mac_addr_del(priv, mac_index);
-	priv->mac[mac_index] = (struct ether_addr){
-		{
-			(*mac)[0], (*mac)[1], (*mac)[2],
-			(*mac)[3], (*mac)[4], (*mac)[5]
-		}
-	};
-	if (!priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
-		goto end;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		ret = hash_rxq_mac_addr_add(&(*priv->hash_rxqs)[i], mac_index);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[--i],
-					      mac_index);
-		return ret;
-	}
-end:
-	BITFIELD_SET(priv->mac_configured, mac_index);
-	return 0;
-}
-
-/**
- * Register all MAC addresses in all hash RX queues.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_mac_addrs_enable(struct priv *priv)
-{
-	unsigned int i;
-	int ret;
-
-	if (priv->isolated)
-		return 0;
-	if (!priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
-		return 0;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		ret = hash_rxq_mac_addrs_add(&(*priv->hash_rxqs)[i]);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[--i]);
-		assert(ret > 0);
-		return ret;
-	}
-	return 0;
+	assert(index < RTE_DIM(dev->data->mac_addrs));
+	memset(&dev->data->mac_addrs[index], 0, sizeof(struct ether_addr));
+	if (!dev->data->promiscuous && !dev->data->all_multicast)
+		mlx5_traffic_restart(dev);
 }
 
 /**
@@ -464,31 +112,35 @@ priv_mac_addrs_enable(struct priv *priv)
  *   MAC address index.
  * @param vmdq
  *   VMDq pool index to associate address with (ignored).
+ *
+ * @return
+ *   0 on success.
  */
 int
-mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
+mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac,
 		  uint32_t index, uint32_t vmdq)
 {
-	struct priv *priv = dev->data->dev_private;
-	int re;
-
-	if (mlx5_is_secondary())
-		return -ENOTSUP;
+	unsigned int i;
+	int ret = 0;
 
 	(void)vmdq;
-	priv_lock(priv);
-	DEBUG("%p: adding MAC address at index %" PRIu32,
-	      (void *)dev, index);
-	if (index >= RTE_DIM(priv->mac)) {
-		re = EINVAL;
-		goto end;
+	if (mlx5_is_secondary())
+		return 0;
+	assert(index < RTE_DIM(dev->data->mac_addrs));
+	/* First, make sure this address isn't already configured. */
+	for (i = 0; (i != RTE_DIM(dev->data->mac_addrs)); ++i) {
+		/* Skip this index, it's going to be reconfigured. */
+		if (i == index)
+			continue;
+		if (memcmp(&dev->data->mac_addrs[i], mac, sizeof(*mac)))
+			continue;
+		/* Address already configured elsewhere, return with error. */
+		return EADDRINUSE;
 	}
-	re = priv_mac_addr_add(priv, index,
-			       (const uint8_t (*)[ETHER_ADDR_LEN])
-			       mac_addr->addr_bytes);
-end:
-	priv_unlock(priv);
-	return -re;
+	dev->data->mac_addrs[index] = *mac;
+	if (!dev->data->promiscuous && !dev->data->all_multicast)
+		mlx5_traffic_restart(dev);
+	return ret;
 }
 
 /**
@@ -502,7 +154,8 @@ mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
 void
 mlx5_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
 {
+	if (mlx5_is_secondary())
+		return;
 	DEBUG("%p: setting primary MAC address", (void *)dev);
-	mlx5_mac_addr_remove(dev, 0);
 	mlx5_mac_addr_add(dev, mac_addr, 0, 0);
 }
diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index 3fcfec7..0ef2cdf 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -51,304 +51,6 @@
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 
-/* Initialization data for special flows. */
-static const struct special_flow_init special_flow_init[] = {
-	[HASH_RXQ_FLOW_TYPE_BROADCAST] = {
-		.dst_mac_val = "\xff\xff\xff\xff\xff\xff",
-		.dst_mac_mask = "\xff\xff\xff\xff\xff\xff",
-		.hash_types =
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 1,
-	},
-	[HASH_RXQ_FLOW_TYPE_IPV6MULTI] = {
-		.dst_mac_val = "\x33\x33\x00\x00\x00\x00",
-		.dst_mac_mask = "\xff\xff\x00\x00\x00\x00",
-		.hash_types =
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 1,
-	},
-};
-
-/**
- * Enable a special flow in a hash RX queue for a given VLAN index.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- * @param vlan_index
- *   VLAN index to use.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_special_flow_enable_vlan(struct hash_rxq *hash_rxq,
-				  enum hash_rxq_flow_type flow_type,
-				  unsigned int vlan_index)
-{
-	struct priv *priv = hash_rxq->priv;
-	struct ibv_exp_flow *flow;
-	FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, hash_rxq->type));
-	struct ibv_exp_flow_attr *attr = &data->attr;
-	struct ibv_exp_flow_spec_eth *spec = &data->spec;
-	const uint8_t *mac;
-	const uint8_t *mask;
-	unsigned int vlan_enabled = (priv->vlan_filter_n &&
-				     special_flow_init[flow_type].per_vlan);
-	unsigned int vlan_id = priv->vlan_filter[vlan_index];
-
-	/* Check if flow is relevant for this hash_rxq. */
-	if (!(special_flow_init[flow_type].hash_types & (1 << hash_rxq->type)))
-		return 0;
-	/* Check if flow already exists. */
-	if (hash_rxq->special_flow[flow_type][vlan_index] != NULL)
-		return 0;
-
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
-	priv_flow_attr(priv, attr, sizeof(data), hash_rxq->type);
-	/* The first specification must be Ethernet. */
-	assert(spec->type == IBV_EXP_FLOW_SPEC_ETH);
-	assert(spec->size == sizeof(*spec));
-
-	mac = special_flow_init[flow_type].dst_mac_val;
-	mask = special_flow_init[flow_type].dst_mac_mask;
-	*spec = (struct ibv_exp_flow_spec_eth){
-		.type = IBV_EXP_FLOW_SPEC_ETH,
-		.size = sizeof(*spec),
-		.val = {
-			.dst_mac = {
-				mac[0], mac[1], mac[2],
-				mac[3], mac[4], mac[5],
-			},
-			.vlan_tag = (vlan_enabled ? htons(vlan_id) : 0),
-		},
-		.mask = {
-			.dst_mac = {
-				mask[0], mask[1], mask[2],
-				mask[3], mask[4], mask[5],
-			},
-			.vlan_tag = (vlan_enabled ? htons(0xfff) : 0),
-		},
-	};
-
-	errno = 0;
-	flow = ibv_exp_create_flow(hash_rxq->qp, attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)hash_rxq, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-	hash_rxq->special_flow[flow_type][vlan_index] = flow;
-	DEBUG("%p: special flow %s (index %d) VLAN %u (index %u) enabled",
-	      (void *)hash_rxq, hash_rxq_flow_type_str(flow_type), flow_type,
-	      vlan_id, vlan_index);
-	return 0;
-}
-
-/**
- * Disable a special flow in a hash RX queue for a given VLAN index.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- * @param vlan_index
- *   VLAN index to use.
- */
-static void
-hash_rxq_special_flow_disable_vlan(struct hash_rxq *hash_rxq,
-				   enum hash_rxq_flow_type flow_type,
-				   unsigned int vlan_index)
-{
-	struct ibv_exp_flow *flow =
-		hash_rxq->special_flow[flow_type][vlan_index];
-
-	if (flow == NULL)
-		return;
-	claim_zero(ibv_exp_destroy_flow(flow));
-	hash_rxq->special_flow[flow_type][vlan_index] = NULL;
-	DEBUG("%p: special flow %s (index %d) VLAN %u (index %u) disabled",
-	      (void *)hash_rxq, hash_rxq_flow_type_str(flow_type), flow_type,
-	      hash_rxq->priv->vlan_filter[vlan_index], vlan_index);
-}
-
-/**
- * Enable a special flow in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- * @param vlan_index
- *   VLAN index to use.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_special_flow_enable(struct hash_rxq *hash_rxq,
-			     enum hash_rxq_flow_type flow_type)
-{
-	struct priv *priv = hash_rxq->priv;
-	unsigned int i = 0;
-	int ret;
-
-	assert((unsigned int)flow_type < RTE_DIM(hash_rxq->special_flow));
-	assert(RTE_DIM(hash_rxq->special_flow[flow_type]) ==
-	       RTE_DIM(priv->vlan_filter));
-	/* Add a special flow for each VLAN filter when relevant. */
-	do {
-		ret = hash_rxq_special_flow_enable_vlan(hash_rxq, flow_type, i);
-		if (ret) {
-			/* Failure, rollback. */
-			while (i != 0)
-				hash_rxq_special_flow_disable_vlan(hash_rxq,
-								   flow_type,
-								   --i);
-			return ret;
-		}
-	} while (special_flow_init[flow_type].per_vlan &&
-		 ++i < priv->vlan_filter_n);
-	return 0;
-}
-
-/**
- * Disable a special flow in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- */
-static void
-hash_rxq_special_flow_disable(struct hash_rxq *hash_rxq,
-			      enum hash_rxq_flow_type flow_type)
-{
-	unsigned int i;
-
-	assert((unsigned int)flow_type < RTE_DIM(hash_rxq->special_flow));
-	for (i = 0; (i != RTE_DIM(hash_rxq->special_flow[flow_type])); ++i)
-		hash_rxq_special_flow_disable_vlan(hash_rxq, flow_type, i);
-}
-
-/**
- * Enable a special flow in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- * @param flow_type
- *   Special flow type.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_special_flow_enable(struct priv *priv, enum hash_rxq_flow_type flow_type)
-{
-	unsigned int i;
-
-	if (!priv_allow_flow_type(priv, flow_type))
-		return 0;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
-		int ret;
-
-		ret = hash_rxq_special_flow_enable(hash_rxq, flow_type);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0) {
-			hash_rxq = &(*priv->hash_rxqs)[--i];
-			hash_rxq_special_flow_disable(hash_rxq, flow_type);
-		}
-		return ret;
-	}
-	return 0;
-}
-
-/**
- * Disable a special flow in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- * @param flow_type
- *   Special flow type.
- */
-void
-priv_special_flow_disable(struct priv *priv, enum hash_rxq_flow_type flow_type)
-{
-	unsigned int i;
-
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
-
-		hash_rxq_special_flow_disable(hash_rxq, flow_type);
-	}
-}
-
-/**
- * Enable all special flows in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- */
-int
-priv_special_flow_enable_all(struct priv *priv)
-{
-	enum hash_rxq_flow_type flow_type;
-
-	if (priv->isolated)
-		return 0;
-	for (flow_type = HASH_RXQ_FLOW_TYPE_BROADCAST;
-			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
-			++flow_type) {
-		int ret;
-
-		ret = priv_special_flow_enable(priv, flow_type);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (flow_type)
-			priv_special_flow_disable(priv, --flow_type);
-		return ret;
-	}
-	return 0;
-}
-
-/**
- * Disable all special flows in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_special_flow_disable_all(struct priv *priv)
-{
-	enum hash_rxq_flow_type flow_type;
-
-	for (flow_type = HASH_RXQ_FLOW_TYPE_BROADCAST;
-			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
-			++flow_type)
-		priv_special_flow_disable(priv, flow_type);
-}
-
 /**
  * DPDK callback to enable promiscuous mode.
  *
@@ -358,16 +60,10 @@ priv_special_flow_disable_all(struct priv *priv)
 void
 mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 {
-	struct rte_flow_item_eth eth = {
-		.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
-		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
-		.type = 0,
-	};
-
 	if (mlx5_is_secondary())
 		return;
 	dev->data->promiscuous = 1;
-	claim_zero(mlx5_flow_ctrl(dev, ð, ð, 3, 1));
+	mlx5_traffic_restart(dev);
 }
 
 /**
@@ -379,16 +75,10 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 void
 mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 {
-	struct rte_flow_item_eth eth = {
-		.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
-		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
-		.type = 0,
-	};
-
 	if (mlx5_is_secondary())
 		return;
 	dev->data->promiscuous = 0;
-	claim_zero(mlx5_flow_ctrl(dev, ð, ð, 3, 0));
+	mlx5_traffic_restart(dev);
 }
 
 /**
@@ -400,17 +90,10 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 {
-	struct rte_flow_item_eth eth = {
-		.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
-		.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
-		.type = 0,
-	};
-
 	if (mlx5_is_secondary())
 		return;
 	dev->data->all_multicast = 1;
-	if (dev->data->dev_started)
-		claim_zero(mlx5_flow_ctrl(dev, ð, ð, 3, 1));
+	mlx5_traffic_restart(dev);
 }
 
 /**
@@ -422,15 +105,8 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_disable(struct rte_eth_dev *dev)
 {
-	struct rte_flow_item_eth eth = {
-		.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
-		.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
-		.type = 0,
-	};
-
 	if (mlx5_is_secondary())
 		return;
 	dev->data->all_multicast = 0;
-	if (dev->data->dev_started)
-		claim_zero(mlx5_flow_ctrl(dev, ð, ð, 3, 0));
+	mlx5_traffic_restart(dev);
 }
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index e5ec57f..438db07 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -533,12 +533,6 @@ priv_destroy_hash_rxqs(struct priv *priv)
 
 		assert(hash_rxq->priv == priv);
 		assert(hash_rxq->qp != NULL);
-		/* Also check that there are no remaining flows. */
-		for (j = 0; (j != RTE_DIM(hash_rxq->special_flow)); ++j)
-			for (k = 0;
-			     (k != RTE_DIM(hash_rxq->special_flow[j]));
-			     ++k)
-				assert(hash_rxq->special_flow[j][k] == NULL);
 		for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
 			for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
 				assert(hash_rxq->mac_flow[j][k] == NULL);
@@ -560,65 +554,6 @@ priv_destroy_hash_rxqs(struct priv *priv)
 }
 
 /**
- * Check whether a given flow type is allowed.
- *
- * @param priv
- *   Pointer to private structure.
- * @param type
- *   Flow type to check.
- *
- * @return
- *   Nonzero if the given flow type is allowed.
- */
-int
-priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
-{
-	(void)priv;
-	switch (type) {
-	case HASH_RXQ_FLOW_TYPE_BROADCAST:
-	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
-	case HASH_RXQ_FLOW_TYPE_MAC:
-		return 1;
-		return 1;
-	default:
-		/* Unsupported flow type is not allowed. */
-		return 0;
-	}
-	return 0;
-}
-
-/**
- * Automatically enable/disable flows according to configuration.
- *
- * @param priv
- *   Private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_rehash_flows(struct priv *priv)
-{
-	enum hash_rxq_flow_type i;
-
-	for (i = HASH_RXQ_FLOW_TYPE_BROADCAST;
-			i != RTE_DIM((*priv->hash_rxqs)[0].special_flow);
-			++i)
-		if (!priv_allow_flow_type(priv, i)) {
-			priv_special_flow_disable(priv, i);
-		} else {
-			int ret = priv_special_flow_enable(priv, i);
-
-			if (ret)
-				return ret;
-		}
-	if (priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
-		return priv_mac_addrs_enable(priv);
-	priv_mac_addrs_disable(priv);
-	return 0;
-}
-
-/**
  * Allocate RX queue elements.
  *
  * @param rxq_ctrl
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 4d26726..683a866 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -232,28 +232,6 @@ struct special_flow_init {
 	unsigned int per_vlan:1;
 };
 
-enum hash_rxq_flow_type {
-	HASH_RXQ_FLOW_TYPE_BROADCAST,
-	HASH_RXQ_FLOW_TYPE_IPV6MULTI,
-	HASH_RXQ_FLOW_TYPE_MAC,
-};
-
-#ifndef NDEBUG
-static inline const char *
-hash_rxq_flow_type_str(enum hash_rxq_flow_type flow_type)
-{
-	switch (flow_type) {
-	case HASH_RXQ_FLOW_TYPE_BROADCAST:
-		return "broadcast";
-	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
-		return "IPv6 multicast";
-	case HASH_RXQ_FLOW_TYPE_MAC:
-		return "MAC";
-	}
-	return NULL;
-}
-#endif /* NDEBUG */
-
 struct hash_rxq {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_qp *qp; /* Hash RX QP. */
@@ -261,8 +239,6 @@ struct hash_rxq {
 	/* MAC flow steering rules, one per VLAN ID. */
 	struct ibv_exp_flow *mac_flow
 		[MLX5_MAX_MAC_ADDRESSES][MLX5_MAX_VLAN_IDS];
-	struct ibv_exp_flow *special_flow
-		[MLX5_MAX_SPECIAL_FLOWS][MLX5_MAX_VLAN_IDS];
 };
 
 /* TX queue descriptor. */
@@ -331,8 +307,6 @@ size_t priv_flow_attr(struct priv *, struct ibv_exp_flow_attr *,
 		      size_t, enum hash_rxq_type);
 int priv_create_hash_rxqs(struct priv *);
 void priv_destroy_hash_rxqs(struct priv *);
-int priv_allow_flow_type(struct priv *, enum hash_rxq_flow_type);
-int priv_rehash_flows(struct priv *);
 void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
 int mlx5_rxq_ctrl_setup(struct rte_eth_dev *, struct mlx5_rxq_ctrl *,
 			uint16_t, unsigned int, const struct rte_eth_rxconf *,
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 6370d6f..28f59dc 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -134,6 +134,7 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	if (mlx5_is_secondary())
 		return -E_RTE_SECONDARY;
 
+	dev->data->dev_started = 1;
 	priv_lock(priv);
 	/* Update Rx/Tx callback. */
 	priv_select_tx_function(priv);
@@ -157,21 +158,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	/* Update receive callback. */
 	priv_select_rx_function(priv);
 	err = priv_create_hash_rxqs(priv);
-	if (!err)
-		err = priv_rehash_flows(priv);
-	else {
-		ERROR("%p: an error occurred while configuring hash RX queues:"
-		      " %s",
-		      (void *)priv, strerror(err));
-		goto error;
-	}
-	if (dev->data->promiscuous)
-	       mlx5_promiscuous_enable(dev);
-	else if (dev->data->all_multicast)
-		mlx5_allmulticast_enable(dev);
-	err = priv_flow_start(priv, &priv->ctrl_flows);
 	if (err) {
-		ERROR("%p: an error occurred while configuring control flows:"
+		ERROR("%p: an error occurred while configuring hash RX queues:"
 		      " %s",
 		      (void *)priv, strerror(err));
 		goto error;
@@ -195,15 +183,13 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	return 0;
 error:
 	/* Rollback. */
+	dev->data->dev_started = 0;
 	LIST_FOREACH(mr, &priv->mr, next)
 		priv_mr_release(priv, mr);
-	priv_special_flow_disable_all(priv);
-	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv, &priv->flows);
-	priv_flow_flush(priv, &priv->ctrl_flows);
-	priv_rxq_stop(priv);
 	priv_txq_stop(priv);
+	priv_rxq_stop(priv);
 	priv_unlock(priv);
 	return -err;
 }
@@ -227,8 +213,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 
 	priv_lock(priv);
 	DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
-	priv_special_flow_disable_all(priv);
-	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv, &priv->flows);
 	priv_flow_flush(priv, &priv->ctrl_flows);
@@ -240,3 +224,160 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_rx_intr_vec_disable(priv);
 	priv_unlock(priv);
 }
+
+/**
+ * Enable traffic flows configured by control plane
+ *
+ * @param priv
+ *   Pointer to Ethernet device private data.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev)
+{
+	if (dev->data->promiscuous) {
+		struct rte_flow_item_eth eth = {
+			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+			.type = 0,
+		};
+
+		claim_zero(mlx5_flow_ctrl(dev, ð, ð, 3, 1));
+	} else if (dev->data->all_multicast) {
+		struct rte_flow_item_eth eth = {
+			.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+			.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+			.type = 0,
+		};
+
+		claim_zero(mlx5_flow_ctrl(dev, ð, ð, 3, 1));
+	} else {
+		struct rte_flow_item_eth bcast = {
+			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		};
+		struct rte_flow_item_eth ipv6_spec = {
+			.dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
+		};
+		struct rte_flow_item_eth ipv6_mask = {
+			.dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
+		};
+		struct rte_flow_item_eth eth = {
+			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		};
+		struct rte_flow_item_eth mask = {
+			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		};
+		uint16_t ether_type = 0;
+		const unsigned int vlan_filter_n = priv->vlan_filter_n;
+		const struct ether_addr cmp = {
+			.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		};
+		unsigned int i;
+		unsigned int j;
+		unsigned int unicast = 0;
+		int ret;
+
+		for (i = 0; i != RTE_DIM(dev->data->mac_addrs); ++i) {
+			struct ether_addr *mac = &dev->data->mac_addrs[i];
+
+			if (!memcmp(mac, &cmp, sizeof(*mac)))
+				continue;
+			memcpy(ð.dst.addr_bytes,
+			       mac->addr_bytes,
+			       ETHER_ADDR_LEN);
+			for (j = 0; j != vlan_filter_n; ++j) {
+				ether_type = priv->vlan_filter[j];
+				eth.type = ether_type;
+				mask.type = 0xffff;
+				ret = mlx5_flow_ctrl(dev, ð, &mask, 3, 1);
+				if (ret)
+					goto error;
+				unicast = 1;
+			}
+			if (!vlan_filter_n) {
+				ret = mlx5_flow_ctrl(dev, ð, &mask, 3, 1);
+				if (ret)
+					goto error;
+				unicast = 1;
+			}
+		}
+		if (!unicast)
+			return 0;
+		ret = mlx5_flow_ctrl(dev, &bcast, &bcast, 3, 1);
+		if (ret)
+			goto error;
+		ret = mlx5_flow_ctrl(dev, &ipv6_spec, &ipv6_mask, 3, 1);
+		if (ret)
+			goto error;
+	}
+	return 0;
+error:
+	return rte_errno;
+}
+
+/**
+ * Disable traffic flows configured by control plane
+ *
+ * @param priv
+ *   Pointer to Ethernet device private data.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+priv_dev_traffic_disable(struct priv *priv, struct rte_eth_dev *dev)
+{
+	(void)dev;
+	priv_flow_flush(priv, &priv->ctrl_flows);
+	return 0;
+}
+
+/**
+ * Restart traffic flows configured by control plane
+ *
+ * @param priv
+ *   Pointer to Ethernet device private data.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+priv_dev_traffic_restart(struct priv *priv, struct rte_eth_dev *dev)
+{
+	if (dev->data->dev_started) {
+		priv_dev_traffic_disable(priv, dev);
+		priv_dev_traffic_enable(priv, dev);
+	}
+	return 0;
+}
+
+/**
+ * Restart traffic flows configured by control plane
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+mlx5_traffic_restart(struct rte_eth_dev *dev)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	if (dev->data->dev_started) {
+		priv_dev_traffic_disable(priv, dev);
+		priv_dev_traffic_enable(priv, dev);
+	}
+	priv_unlock(priv);
+	return 0;
+}
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index dffa1cd..c41c57b 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -44,7 +44,7 @@
 #include "mlx5_autoconf.h"
 
 /**
- * Configure a VLAN filter.
+ * DPDK callback to configure a VLAN filter.
  *
  * @param dev
  *   Pointer to Ethernet device structure.
@@ -54,14 +54,16 @@
  *   Toggle filter.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, negative errno value on failure.
  */
-static int
-vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+int
+mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 {
 	struct priv *priv = dev->data->dev_private;
 	unsigned int i;
+	int ret;
 
+	priv_lock(priv);
 	DEBUG("%p: %s VLAN filter ID %" PRIu16,
 	      (void *)dev, (on ? "enable" : "disable"), vlan_id);
 	assert(priv->vlan_filter_n <= RTE_DIM(priv->vlan_filter));
@@ -69,13 +71,15 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 		if (priv->vlan_filter[i] == vlan_id)
 			break;
 	/* Check if there's room for another VLAN filter. */
-	if (i == RTE_DIM(priv->vlan_filter))
-		return ENOMEM;
+	if (i == RTE_DIM(priv->vlan_filter)) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	if (i < priv->vlan_filter_n) {
 		assert(priv->vlan_filter_n != 0);
 		/* Enabling an existing VLAN filter has no effect. */
 		if (on)
-			return 0;
+			goto out;
 		/* Remove VLAN filter from list. */
 		--priv->vlan_filter_n;
 		memmove(&priv->vlan_filter[i],
@@ -87,41 +91,19 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 		assert(i == priv->vlan_filter_n);
 		/* Disabling an unknown VLAN filter has no effect. */
 		if (!on)
-			return 0;
+			goto out;
 		/* Add new VLAN filter. */
 		priv->vlan_filter[priv->vlan_filter_n] = vlan_id;
 		++priv->vlan_filter_n;
 	}
-	/* Rehash flows in all hash RX queues. */
-	priv_mac_addrs_disable(priv);
-	priv_special_flow_disable_all(priv);
-	return priv_rehash_flows(priv);
-}
-
-/**
- * DPDK callback to configure a VLAN filter.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param vlan_id
- *   VLAN ID to filter.
- * @param on
- *   Toggle filter.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
-{
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-
-	priv_lock(priv);
-	ret = vlan_filter_set(dev, vlan_id, on);
+	if (dev->data->dev_started) {
+		priv_dev_traffic_disable(priv, dev);
+		priv_dev_traffic_enable(priv, dev);
+	}
+out:
 	priv_unlock(priv);
 	assert(ret >= 0);
-	return -ret;
+	return ret;
 }
 
 /**
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 19/21] net/mlx5: handle a single RSS hash key for all protocols
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (18 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 18/21] net/mlx5: use flow to enable unicast traffic Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 20/21] net/mlx5: remove hash Rx queues support Nelio Laranjeiro
                   ` (35 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
Since RSS configuration can also be used by flow API, there is no more
necessity to keep a list of RSS configurable for all protocols.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c        |  24 +-------
 drivers/net/mlx5/mlx5.h        |   6 +-
 drivers/net/mlx5/mlx5_ethdev.c |  16 ++++--
     | 127 +++++++++--------------------------------
 drivers/net/mlx5/mlx5_rxq.c    |   5 +-
 5 files changed, 45 insertions(+), 133 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index af3f7c8..bf6c66b 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -174,11 +174,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		claim_zero(ibv_close_device(priv->ctx));
 	} else
 		assert(priv->ctx == NULL);
-	if (priv->rss_conf != NULL) {
-		for (i = 0; (i != hash_rxq_init_n); ++i)
-			rte_free((*priv->rss_conf)[i]);
-		rte_free(priv->rss_conf);
-	}
+	if (priv->rss_conf.rss_key != NULL)
+		rte_free(priv->rss_conf.rss_key);
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
 	i = mlx5_priv_hrxq_ibv_verify(priv);
@@ -709,19 +706,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				priv->txq_inline = MLX5_WQE_SIZE_MAX -
 						   MLX5_WQE_SIZE;
 		}
-		/* Allocate and register default RSS hash keys. */
-		priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n,
-					    sizeof((*priv->rss_conf)[0]), 0);
-		if (priv->rss_conf == NULL) {
-			err = ENOMEM;
-			goto port_error;
-		}
-		err = rss_hash_rss_conf_new_key(priv,
-						rss_hash_default_key,
-						rss_hash_default_key_len,
-						ETH_RSS_PROTO_MASK);
-		if (err)
-			goto port_error;
 		/* Configure the first MAC address by default. */
 		if (priv_get_mac(priv, &mac.addr_bytes)) {
 			ERROR("cannot get MAC address, is mlx5_en loaded?"
@@ -781,10 +765,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		continue;
 
 port_error:
-		if (priv) {
-			rte_free(priv->rss_conf);
+		if (priv)
 			rte_free(priv);
-		}
 		if (pd)
 			claim_zero(ibv_dealloc_pd(pd));
 		if (ctx)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ee0de3c..5058bcd 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -133,9 +133,7 @@ struct priv {
 	/* Hash RX QPs feeding the indirection table. */
 	struct hash_rxq (*hash_rxqs)[];
 	unsigned int hash_rxqs_n; /* Hash RX QPs array size. */
-	/* RSS configuration array indexed by hash RX queue type. */
-	struct rte_eth_rss_conf *(*rss_conf)[];
-	uint64_t rss_hf; /* RSS DPDK bit field of active RSS. */
+	struct rte_eth_rss_conf rss_conf; /* RSS configuration. */
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
@@ -222,8 +220,6 @@ void mlx5_mac_addr_set(struct rte_eth_dev *, struct ether_addr *);
 
 /* mlx5_rss.c */
 
-int rss_hash_rss_conf_new_key(struct priv *, const uint8_t *, unsigned int,
-			      uint64_t);
 int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *);
 int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *);
 int priv_rss_reta_index_resize(struct priv *, unsigned int);
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 0e0a99e..1817fdb 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -574,7 +574,17 @@ dev_configure(struct rte_eth_dev *dev)
 	unsigned int j;
 	unsigned int reta_idx_n;
 
-	priv->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
+	priv->rss_conf.rss_key =
+		rte_realloc(priv->rss_conf.rss_key,
+			    rss_hash_default_key_len, 0);
+	if (!priv->rss_conf.rss_key) {
+		ERROR("cannot allocate RSS hash key memory (%u)", rxqs_n);
+		return ENOMEM;
+	}
+	memcpy(priv->rss_conf.rss_key, rss_hash_default_key,
+	       rss_hash_default_key_len);
+	priv->rss_conf.rss_key_len = rss_hash_default_key_len;
+	priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
 	priv->rxqs = (void *)dev->data->rx_queues;
 	priv->txqs = (void *)dev->data->tx_queues;
 	if (txqs_n != priv->txqs_n) {
@@ -692,9 +702,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 		info->if_index = if_nametoindex(ifname);
 	info->reta_size = priv->reta_idx_n ?
 		priv->reta_idx_n : priv->ind_table_max_size;
-	info->hash_key_size = ((*priv->rss_conf) ?
-			       (*priv->rss_conf)[0]->rss_key_len :
-			       0);
+	info->hash_key_size = priv->rss_conf.rss_key_len;
 	info->speed_capa = priv->link_speed_capa;
 	priv_unlock(priv);
 }
 --git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index 1249943..8f04e67 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -54,74 +54,6 @@
 #include "mlx5_rxtx.h"
 
 /**
- * Get a RSS configuration hash key.
- *
- * @param priv
- *   Pointer to private structure.
- * @param rss_hf
- *   RSS hash functions configuration must be retrieved for.
- *
- * @return
- *   Pointer to a RSS configuration structure or NULL if rss_hf cannot
- *   be matched.
- */
-static struct rte_eth_rss_conf *
-rss_hash_get(struct priv *priv, uint64_t rss_hf)
-{
-	unsigned int i;
-
-	for (i = 0; (i != hash_rxq_init_n); ++i) {
-		uint64_t dpdk_rss_hf = hash_rxq_init[i].dpdk_rss_hf;
-
-		if (!(dpdk_rss_hf & rss_hf))
-			continue;
-		return (*priv->rss_conf)[i];
-	}
-	return NULL;
-}
-
-/**
- * Register a RSS key.
- *
- * @param priv
- *   Pointer to private structure.
- * @param key
- *   Hash key to register.
- * @param key_len
- *   Hash key length in bytes.
- * @param rss_hf
- *   RSS hash functions the provided key applies to.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-rss_hash_rss_conf_new_key(struct priv *priv, const uint8_t *key,
-			  unsigned int key_len, uint64_t rss_hf)
-{
-	unsigned int i;
-
-	for (i = 0; (i != hash_rxq_init_n); ++i) {
-		struct rte_eth_rss_conf *rss_conf;
-		uint64_t dpdk_rss_hf = hash_rxq_init[i].dpdk_rss_hf;
-
-		if (!(dpdk_rss_hf & rss_hf))
-			continue;
-		rss_conf = rte_realloc((*priv->rss_conf)[i],
-				       (sizeof(*rss_conf) + key_len),
-				       0);
-		if (!rss_conf)
-			return ENOMEM;
-		rss_conf->rss_key = (void *)(rss_conf + 1);
-		rss_conf->rss_key_len = key_len;
-		rss_conf->rss_hf = dpdk_rss_hf;
-		memcpy(rss_conf->rss_key, key, key_len);
-		(*priv->rss_conf)[i] = rss_conf;
-	}
-	return 0;
-}
-
-/**
  * DPDK callback to update the RSS hash configuration.
  *
  * @param dev
@@ -137,23 +69,24 @@ mlx5_rss_hash_update(struct rte_eth_dev *dev,
 		     struct rte_eth_rss_conf *rss_conf)
 {
 	struct priv *priv = dev->data->dev_private;
-	int err = 0;
+	int ret = 0;
 
 	priv_lock(priv);
-
-	assert(priv->rss_conf != NULL);
-
-	/* Apply configuration. */
-	if (rss_conf->rss_key)
-		err = rss_hash_rss_conf_new_key(priv,
-						rss_conf->rss_key,
-						rss_conf->rss_key_len,
-						rss_conf->rss_hf);
-	/* Store protocols for which RSS is enabled. */
-	priv->rss_hf = rss_conf->rss_hf;
+	if (rss_conf->rss_key_len) {
+		priv->rss_conf.rss_key = rte_realloc(priv->rss_conf.rss_key,
+						     rss_conf->rss_key_len, 0);
+		if (!priv->rss_conf.rss_key) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		memcpy(&priv->rss_conf.rss_key, rss_conf->rss_key,
+		       rss_conf->rss_key_len);
+		priv->rss_conf.rss_key_len = rss_conf->rss_key_len;
+	}
+	priv->rss_conf.rss_hf = rss_conf->rss_hf;
+out:
 	priv_unlock(priv);
-	assert(err >= 0);
-	return -err;
+	return ret;
 }
 
 /**
@@ -172,28 +105,22 @@ mlx5_rss_hash_conf_get(struct rte_eth_dev *dev,
 		       struct rte_eth_rss_conf *rss_conf)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct rte_eth_rss_conf *priv_rss_conf;
+	int ret = 0;
 
 	priv_lock(priv);
-
-	assert(priv->rss_conf != NULL);
-
-	priv_rss_conf = rss_hash_get(priv, rss_conf->rss_hf);
-	if (!priv_rss_conf) {
-		rss_conf->rss_hf = 0;
-		priv_unlock(priv);
-		return -EINVAL;
+	if (!rss_conf->rss_key) {
+		ret = -ENOMEM;
+		goto out;
 	}
-	if (rss_conf->rss_key &&
-	    rss_conf->rss_key_len >= priv_rss_conf->rss_key_len)
-		memcpy(rss_conf->rss_key,
-		       priv_rss_conf->rss_key,
-		       priv_rss_conf->rss_key_len);
-	rss_conf->rss_key_len = priv_rss_conf->rss_key_len;
-	rss_conf->rss_hf = priv_rss_conf->rss_hf;
-
+	if (rss_conf->rss_key_len < priv->rss_conf.rss_key_len) {
+		ret = -EINVAL;
+		goto out;
+	}
+	memcpy(rss_conf->rss_key, priv->rss_conf.rss_key,
+	       priv->rss_conf.rss_key_len);
+out:
 	priv_unlock(priv);
-	return 0;
+	return ret;
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 438db07..d5dc928 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -299,7 +299,7 @@ priv_make_ind_table_init(struct priv *priv,
 	/* Mandatory to receive frames not handled by normal hash RX queues. */
 	unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
 
-	rss_hf = priv->rss_hf;
+	rss_hf = priv->rss_conf.rss_hf;
 	/* Process other protocols only if more than one queue. */
 	if (priv->rxqs_n > 1)
 		for (i = 0; (i != hash_rxq_init_n); ++i)
@@ -436,8 +436,7 @@ priv_create_hash_rxqs(struct priv *priv)
 		struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
 		enum hash_rxq_type type =
 			hash_rxq_type_from_pos(&ind_table_init[j], k);
-		struct rte_eth_rss_conf *priv_rss_conf =
-			(*priv->rss_conf)[type];
+		struct rte_eth_rss_conf *priv_rss_conf = &priv->rss_conf;
 		struct ibv_exp_rx_hash_conf hash_conf = {
 			.rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
 			.rx_hash_key_len = (priv_rss_conf ?
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 20/21] net/mlx5: remove hash Rx queues support
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (19 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 19/21] net/mlx5: handle a single RSS hash key for all protocols Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 21/21] net/mlx5: support RSS hash configuration in generic flow action Nelio Laranjeiro
                   ` (34 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
>From this commit the RSS support becomes un-available until it is replaced
by the generic flow implementation.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c         |   2 -
 drivers/net/mlx5/mlx5.h         |   6 -
 drivers/net/mlx5/mlx5_rxq.c     | 470 ----------------------------------------
 drivers/net/mlx5/mlx5_rxtx.h    |  76 -------
 drivers/net/mlx5/mlx5_trigger.c |  12 +-
 5 files changed, 7 insertions(+), 559 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index bf6c66b..7f6c774 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -146,8 +146,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
 	/* In case mlx5_dev_stop() has not been called. */
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-	priv_destroy_hash_rxqs(priv);
-
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
 	dev->tx_pkt_burst = removed_tx_burst;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 5058bcd..7058256 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -126,13 +126,7 @@ struct priv {
 	unsigned int txqs_n; /* TX queues array size. */
 	struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */
 	struct mlx5_txq_data *(*txqs)[]; /* TX queues. */
-	/* Indirection tables referencing all RX WQs. */
-	struct ibv_exp_rwq_ind_table *(*ind_tables)[];
-	unsigned int ind_tables_n; /* Number of indirection tables. */
 	unsigned int ind_table_max_size; /* Maximum indirection table size. */
-	/* Hash RX QPs feeding the indirection table. */
-	struct hash_rxq (*hash_rxqs)[];
-	unsigned int hash_rxqs_n; /* Hash RX QPs array size. */
 	struct rte_eth_rss_conf rss_conf; /* RSS configuration. */
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index d5dc928..762288b 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -64,121 +64,6 @@
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
-/* Initialization data for hash RX queues. */
-const struct hash_rxq_init hash_rxq_init[] = {
-	[HASH_RXQ_TCPV4] = {
-		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
-				IBV_EXP_RX_HASH_DST_IPV4 |
-				IBV_EXP_RX_HASH_SRC_PORT_TCP |
-				IBV_EXP_RX_HASH_DST_PORT_TCP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_EXP_FLOW_SPEC_TCP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
-	},
-	[HASH_RXQ_UDPV4] = {
-		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
-				IBV_EXP_RX_HASH_DST_IPV4 |
-				IBV_EXP_RX_HASH_SRC_PORT_UDP |
-				IBV_EXP_RX_HASH_DST_PORT_UDP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_EXP_FLOW_SPEC_UDP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
-	},
-	[HASH_RXQ_IPV4] = {
-		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
-				IBV_EXP_RX_HASH_DST_IPV4),
-		.dpdk_rss_hf = (ETH_RSS_IPV4 |
-				ETH_RSS_FRAG_IPV4),
-		.flow_priority = 1,
-		.flow_spec.ipv4 = {
-			.type = IBV_EXP_FLOW_SPEC_IPV4,
-			.size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_ETH],
-	},
-	[HASH_RXQ_TCPV6] = {
-		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
-				IBV_EXP_RX_HASH_DST_IPV6 |
-				IBV_EXP_RX_HASH_SRC_PORT_TCP |
-				IBV_EXP_RX_HASH_DST_PORT_TCP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_EXP_FLOW_SPEC_TCP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
-	},
-	[HASH_RXQ_UDPV6] = {
-		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
-				IBV_EXP_RX_HASH_DST_IPV6 |
-				IBV_EXP_RX_HASH_SRC_PORT_UDP |
-				IBV_EXP_RX_HASH_DST_PORT_UDP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_EXP_FLOW_SPEC_UDP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
-	},
-	[HASH_RXQ_IPV6] = {
-		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
-				IBV_EXP_RX_HASH_DST_IPV6),
-		.dpdk_rss_hf = (ETH_RSS_IPV6 |
-				ETH_RSS_FRAG_IPV6),
-		.flow_priority = 1,
-		.flow_spec.ipv6 = {
-			.type = IBV_EXP_FLOW_SPEC_IPV6,
-			.size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_ETH],
-	},
-	[HASH_RXQ_ETH] = {
-		.hash_fields = 0,
-		.dpdk_rss_hf = 0,
-		.flow_priority = 2,
-		.flow_spec.eth = {
-			.type = IBV_EXP_FLOW_SPEC_ETH,
-			.size = sizeof(hash_rxq_init[0].flow_spec.eth),
-		},
-		.underlayer = NULL,
-	},
-};
-
-/* Number of entries in hash_rxq_init[]. */
-const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
-
-/* Initialization data for hash RX queue indirection tables. */
-static const struct ind_table_init ind_table_init[] = {
-	{
-		.max_size = -1u, /* Superseded by HW limitations. */
-		.hash_types =
-			1 << HASH_RXQ_TCPV4 |
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_TCPV6 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			0,
-		.hash_types_n = 6,
-	},
-	{
-		.max_size = 1,
-		.hash_types = 1 << HASH_RXQ_ETH,
-		.hash_types_n = 1,
-	},
-};
-
-#define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
 
 /* Default RSS hash key also used for ConnectX-3. */
 uint8_t rss_hash_default_key[] = {
@@ -198,361 +83,6 @@ uint8_t rss_hash_default_key[] = {
 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
 
 /**
- * Populate flow steering rule for a given hash RX queue type using
- * information from hash_rxq_init[]. Nothing is written to flow_attr when
- * flow_attr_size is not large enough, but the required size is still returned.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[out] flow_attr
- *   Pointer to flow attribute structure to fill. Note that the allocated
- *   area must be larger and large enough to hold all flow specifications.
- * @param flow_attr_size
- *   Entire size of flow_attr and trailing room for flow specifications.
- * @param type
- *   Hash RX queue type to use for flow steering rule.
- *
- * @return
- *   Total size of the flow attribute buffer. No errors are defined.
- */
-size_t
-priv_flow_attr(struct priv *priv, struct ibv_exp_flow_attr *flow_attr,
-	       size_t flow_attr_size, enum hash_rxq_type type)
-{
-	size_t offset = sizeof(*flow_attr);
-	const struct hash_rxq_init *init = &hash_rxq_init[type];
-
-	assert(priv != NULL);
-	assert((size_t)type < RTE_DIM(hash_rxq_init));
-	do {
-		offset += init->flow_spec.hdr.size;
-		init = init->underlayer;
-	} while (init != NULL);
-	if (offset > flow_attr_size)
-		return offset;
-	flow_attr_size = offset;
-	init = &hash_rxq_init[type];
-	*flow_attr = (struct ibv_exp_flow_attr){
-		.type = IBV_EXP_FLOW_ATTR_NORMAL,
-		/* Priorities < 3 are reserved for flow director. */
-		.priority = init->flow_priority + 3,
-		.num_of_specs = 0,
-		.port = priv->port,
-		.flags = 0,
-	};
-	do {
-		offset -= init->flow_spec.hdr.size;
-		memcpy((void *)((uintptr_t)flow_attr + offset),
-		       &init->flow_spec,
-		       init->flow_spec.hdr.size);
-		++flow_attr->num_of_specs;
-		init = init->underlayer;
-	} while (init != NULL);
-	return flow_attr_size;
-}
-
-/**
- * Convert hash type position in indirection table initializer to
- * hash RX queue type.
- *
- * @param table
- *   Indirection table initializer.
- * @param pos
- *   Hash type position.
- *
- * @return
- *   Hash RX queue type.
- */
-static enum hash_rxq_type
-hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos)
-{
-	enum hash_rxq_type type = HASH_RXQ_TCPV4;
-
-	assert(pos < table->hash_types_n);
-	do {
-		if ((table->hash_types & (1 << type)) && (pos-- == 0))
-			break;
-		++type;
-	} while (1);
-	return type;
-}
-
-/**
- * Filter out disabled hash RX queue types from ind_table_init[].
- *
- * @param priv
- *   Pointer to private structure.
- * @param[out] table
- *   Output table.
- *
- * @return
- *   Number of table entries.
- */
-static unsigned int
-priv_make_ind_table_init(struct priv *priv,
-			 struct ind_table_init (*table)[IND_TABLE_INIT_N])
-{
-	uint64_t rss_hf;
-	unsigned int i;
-	unsigned int j;
-	unsigned int table_n = 0;
-	/* Mandatory to receive frames not handled by normal hash RX queues. */
-	unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
-
-	rss_hf = priv->rss_conf.rss_hf;
-	/* Process other protocols only if more than one queue. */
-	if (priv->rxqs_n > 1)
-		for (i = 0; (i != hash_rxq_init_n); ++i)
-			if (rss_hf & hash_rxq_init[i].dpdk_rss_hf)
-				hash_types_sup |= (1 << i);
-
-	/* Filter out entries whose protocols are not in the set. */
-	for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
-		unsigned int nb;
-		unsigned int h;
-
-		/* j is increased only if the table has valid protocols. */
-		assert(j <= i);
-		(*table)[j] = ind_table_init[i];
-		(*table)[j].hash_types &= hash_types_sup;
-		for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
-			if (((*table)[j].hash_types >> h) & 0x1)
-				++nb;
-		(*table)[i].hash_types_n = nb;
-		if (nb) {
-			++table_n;
-			++j;
-		}
-	}
-	return table_n;
-}
-
-/**
- * Initialize hash RX queues and indirection table.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_create_hash_rxqs(struct priv *priv)
-{
-	struct ibv_exp_wq *wqs[priv->reta_idx_n];
-	struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
-	unsigned int ind_tables_n =
-		priv_make_ind_table_init(priv, &ind_table_init);
-	unsigned int hash_rxqs_n = 0;
-	struct hash_rxq (*hash_rxqs)[] = NULL;
-	struct ibv_exp_rwq_ind_table *(*ind_tables)[] = NULL;
-	unsigned int i;
-	unsigned int j;
-	unsigned int k;
-	int err = 0;
-
-	assert(priv->ind_tables == NULL);
-	assert(priv->ind_tables_n == 0);
-	assert(priv->hash_rxqs == NULL);
-	assert(priv->hash_rxqs_n == 0);
-	assert(priv->pd != NULL);
-	assert(priv->ctx != NULL);
-	if (priv->isolated)
-		return 0;
-	if (priv->rxqs_n == 0)
-		return EINVAL;
-	assert(priv->rxqs != NULL);
-	if (ind_tables_n == 0) {
-		ERROR("all hash RX queue types have been filtered out,"
-		      " indirection table cannot be created");
-		return EINVAL;
-	}
-	if (priv->rxqs_n & (priv->rxqs_n - 1)) {
-		INFO("%u RX queues are configured, consider rounding this"
-		     " number to the next power of two for better balancing",
-		     priv->rxqs_n);
-		DEBUG("indirection table extended to assume %u WQs",
-		      priv->reta_idx_n);
-	}
-	for (i = 0; (i != priv->reta_idx_n); ++i) {
-		struct mlx5_rxq_ctrl *rxq_ctrl;
-
-		rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
-					struct mlx5_rxq_ctrl, rxq);
-		wqs[i] = rxq_ctrl->ibv->wq;
-	}
-	/* Get number of hash RX queues to configure. */
-	for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
-		hash_rxqs_n += ind_table_init[i].hash_types_n;
-	DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
-	      hash_rxqs_n, priv->rxqs_n, ind_tables_n);
-	/* Create indirection tables. */
-	ind_tables = rte_calloc(__func__, ind_tables_n,
-				sizeof((*ind_tables)[0]), 0);
-	if (ind_tables == NULL) {
-		err = ENOMEM;
-		ERROR("cannot allocate indirection tables container: %s",
-		      strerror(err));
-		goto error;
-	}
-	for (i = 0; (i != ind_tables_n); ++i) {
-		struct ibv_exp_rwq_ind_table_init_attr ind_init_attr = {
-			.pd = priv->pd,
-			.log_ind_tbl_size = 0, /* Set below. */
-			.ind_tbl = wqs,
-			.comp_mask = 0,
-		};
-		unsigned int ind_tbl_size = ind_table_init[i].max_size;
-		struct ibv_exp_rwq_ind_table *ind_table;
-
-		if (priv->reta_idx_n < ind_tbl_size)
-			ind_tbl_size = priv->reta_idx_n;
-		ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
-		errno = 0;
-		ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
-							 &ind_init_attr);
-		if (ind_table != NULL) {
-			(*ind_tables)[i] = ind_table;
-			continue;
-		}
-		/* Not clear whether errno is set. */
-		err = (errno ? errno : EINVAL);
-		ERROR("RX indirection table creation failed with error %d: %s",
-		      err, strerror(err));
-		goto error;
-	}
-	/* Allocate array that holds hash RX queues and related data. */
-	hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
-			       sizeof((*hash_rxqs)[0]), 0);
-	if (hash_rxqs == NULL) {
-		err = ENOMEM;
-		ERROR("cannot allocate hash RX queues container: %s",
-		      strerror(err));
-		goto error;
-	}
-	for (i = 0, j = 0, k = 0;
-	     ((i != hash_rxqs_n) && (j != ind_tables_n));
-	     ++i) {
-		struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
-		enum hash_rxq_type type =
-			hash_rxq_type_from_pos(&ind_table_init[j], k);
-		struct rte_eth_rss_conf *priv_rss_conf = &priv->rss_conf;
-		struct ibv_exp_rx_hash_conf hash_conf = {
-			.rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
-			.rx_hash_key_len = (priv_rss_conf ?
-					    priv_rss_conf->rss_key_len :
-					    rss_hash_default_key_len),
-			.rx_hash_key = (priv_rss_conf ?
-					priv_rss_conf->rss_key :
-					rss_hash_default_key),
-			.rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
-			.rwq_ind_tbl = (*ind_tables)[j],
-		};
-		struct ibv_exp_qp_init_attr qp_init_attr = {
-			.max_inl_recv = 0, /* Currently not supported. */
-			.qp_type = IBV_QPT_RAW_PACKET,
-			.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
-				      IBV_EXP_QP_INIT_ATTR_RX_HASH),
-			.pd = priv->pd,
-			.rx_hash_conf = &hash_conf,
-			.port_num = priv->port,
-		};
-
-		DEBUG("using indirection table %u for hash RX queue %u type %d",
-		      j, i, type);
-		*hash_rxq = (struct hash_rxq){
-			.priv = priv,
-			.qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr),
-			.type = type,
-		};
-		if (hash_rxq->qp == NULL) {
-			err = (errno ? errno : EINVAL);
-			ERROR("Hash RX QP creation failure: %s",
-			      strerror(err));
-			goto error;
-		}
-		if (++k < ind_table_init[j].hash_types_n)
-			continue;
-		/* Switch to the next indirection table and reset hash RX
-		 * queue type array index. */
-		++j;
-		k = 0;
-	}
-	priv->ind_tables = ind_tables;
-	priv->ind_tables_n = ind_tables_n;
-	priv->hash_rxqs = hash_rxqs;
-	priv->hash_rxqs_n = hash_rxqs_n;
-	assert(err == 0);
-	return 0;
-error:
-	if (hash_rxqs != NULL) {
-		for (i = 0; (i != hash_rxqs_n); ++i) {
-			struct ibv_qp *qp = (*hash_rxqs)[i].qp;
-
-			if (qp == NULL)
-				continue;
-			claim_zero(ibv_destroy_qp(qp));
-		}
-		rte_free(hash_rxqs);
-	}
-	if (ind_tables != NULL) {
-		for (j = 0; (j != ind_tables_n); ++j) {
-			struct ibv_exp_rwq_ind_table *ind_table =
-				(*ind_tables)[j];
-
-			if (ind_table == NULL)
-				continue;
-			claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table));
-		}
-		rte_free(ind_tables);
-	}
-	return err;
-}
-
-/**
- * Clean up hash RX queues and indirection table.
- *
- * @param priv
- *   Pointer to private structure.
- */
-void
-priv_destroy_hash_rxqs(struct priv *priv)
-{
-	unsigned int i;
-
-	DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
-	if (priv->hash_rxqs_n == 0) {
-		assert(priv->hash_rxqs == NULL);
-		assert(priv->ind_tables == NULL);
-		return;
-	}
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
-		unsigned int j, k;
-
-		assert(hash_rxq->priv == priv);
-		assert(hash_rxq->qp != NULL);
-		for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
-			for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
-				assert(hash_rxq->mac_flow[j][k] == NULL);
-		claim_zero(ibv_destroy_qp(hash_rxq->qp));
-	}
-	priv->hash_rxqs_n = 0;
-	rte_free(priv->hash_rxqs);
-	priv->hash_rxqs = NULL;
-	for (i = 0; (i != priv->ind_tables_n); ++i) {
-		struct ibv_exp_rwq_ind_table *ind_table =
-			(*priv->ind_tables)[i];
-
-		assert(ind_table != NULL);
-		claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table));
-	}
-	priv->ind_tables_n = 0;
-	rte_free(priv->ind_tables);
-	priv->ind_tables = NULL;
-}
-
-/**
  * Allocate RX queue elements.
  *
  * @param rxq_ctrl
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 683a866..c49b798 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -172,75 +172,6 @@ struct mlx5_hrxq {
 	uint8_t rss_key[]; /* Hash key. */
 };
 
-/* Hash RX queue types. */
-enum hash_rxq_type {
-	HASH_RXQ_TCPV4,
-	HASH_RXQ_UDPV4,
-	HASH_RXQ_IPV4,
-	HASH_RXQ_TCPV6,
-	HASH_RXQ_UDPV6,
-	HASH_RXQ_IPV6,
-	HASH_RXQ_ETH,
-};
-
-/* Flow structure with Ethernet specification. It is packed to prevent padding
- * between attr and spec as this layout is expected by libibverbs. */
-struct flow_attr_spec_eth {
-	struct ibv_exp_flow_attr attr;
-	struct ibv_exp_flow_spec_eth spec;
-} __attribute__((packed));
-
-/* Define a struct flow_attr_spec_eth object as an array of at least
- * "size" bytes. Room after the first index is normally used to store
- * extra flow specifications. */
-#define FLOW_ATTR_SPEC_ETH(name, size) \
-	struct flow_attr_spec_eth name \
-		[((size) / sizeof(struct flow_attr_spec_eth)) + \
-		 !!((size) % sizeof(struct flow_attr_spec_eth))]
-
-/* Initialization data for hash RX queue. */
-struct hash_rxq_init {
-	uint64_t hash_fields; /* Fields that participate in the hash. */
-	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
-	unsigned int flow_priority; /* Flow priority to use. */
-	union {
-		struct {
-			enum ibv_exp_flow_spec_type type;
-			uint16_t size;
-		} hdr;
-		struct ibv_exp_flow_spec_tcp_udp tcp_udp;
-		struct ibv_exp_flow_spec_ipv4 ipv4;
-		struct ibv_exp_flow_spec_ipv6 ipv6;
-		struct ibv_exp_flow_spec_eth eth;
-	} flow_spec; /* Flow specification template. */
-	const struct hash_rxq_init *underlayer; /* Pointer to underlayer. */
-};
-
-/* Initialization data for indirection table. */
-struct ind_table_init {
-	unsigned int max_size; /* Maximum number of WQs. */
-	/* Hash RX queues using this table. */
-	unsigned int hash_types;
-	unsigned int hash_types_n;
-};
-
-/* Initialization data for special flows. */
-struct special_flow_init {
-	uint8_t dst_mac_val[6];
-	uint8_t dst_mac_mask[6];
-	unsigned int hash_types;
-	unsigned int per_vlan:1;
-};
-
-struct hash_rxq {
-	struct priv *priv; /* Back pointer to private data. */
-	struct ibv_qp *qp; /* Hash RX QP. */
-	enum hash_rxq_type type; /* Hash RX queue type. */
-	/* MAC flow steering rules, one per VLAN ID. */
-	struct ibv_exp_flow *mac_flow
-		[MLX5_MAX_MAC_ADDRESSES][MLX5_MAX_VLAN_IDS];
-};
-
 /* TX queue descriptor. */
 RTE_STD_C11
 struct mlx5_txq_data {
@@ -297,16 +228,9 @@ struct mlx5_txq_ctrl {
 
 /* mlx5_rxq.c */
 
-extern const struct hash_rxq_init hash_rxq_init[];
-extern const unsigned int hash_rxq_init_n;
-
 extern uint8_t rss_hash_default_key[];
 extern const size_t rss_hash_default_key_len;
 
-size_t priv_flow_attr(struct priv *, struct ibv_exp_flow_attr *,
-		      size_t, enum hash_rxq_type);
-int priv_create_hash_rxqs(struct priv *);
-void priv_destroy_hash_rxqs(struct priv *);
 void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
 int mlx5_rxq_ctrl_setup(struct rte_eth_dev *, struct mlx5_rxq_ctrl *,
 			uint16_t, unsigned int, const struct rte_eth_rxconf *,
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 28f59dc..ef36ee2 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -157,9 +157,10 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	}
 	/* Update receive callback. */
 	priv_select_rx_function(priv);
-	err = priv_create_hash_rxqs(priv);
+	priv_dev_traffic_disable(priv, dev);
+	priv_dev_traffic_enable(priv, dev);
 	if (err) {
-		ERROR("%p: an error occurred while configuring hash RX queues:"
+		ERROR("%p: an error occurred while configuring control flows:"
 		      " %s",
 		      (void *)priv, strerror(err));
 		goto error;
@@ -186,8 +187,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	dev->data->dev_started = 0;
 	LIST_FOREACH(mr, &priv->mr, next)
 		priv_mr_release(priv, mr);
-	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv, &priv->flows);
+	priv_dev_traffic_disable(priv, dev);
 	priv_txq_stop(priv);
 	priv_rxq_stop(priv);
 	priv_unlock(priv);
@@ -213,9 +214,10 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 
 	priv_lock(priv);
 	DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
-	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv, &priv->flows);
-	priv_flow_flush(priv, &priv->ctrl_flows);
+	priv_dev_traffic_disable(priv, dev);
+	priv_txq_stop(priv);
+	priv_rxq_stop(priv);
 	LIST_FOREACH(mr, &priv->mr, next) {
 		priv_mr_release(priv, mr);
 	}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v1 21/21] net/mlx5: support RSS hash configuration in generic flow action
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (20 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 20/21] net/mlx5: remove hash Rx queues support Nelio Laranjeiro
@ 2017-08-02 14:10 ` Nelio Laranjeiro
  2017-08-18 13:44 ` [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Ferruh Yigit
                   ` (33 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-08-02 14:10 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil
This also bring back the RSS support on all flows created by default from
the control plane.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 437 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 346 insertions(+), 91 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 8316255..fe21dac 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -87,12 +87,89 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 		       const void *default_mask,
 		       void *data);
 
+/* Hash RX queue types. */
+enum hash_rxq_type {
+	HASH_RXQ_TCPV4,
+	HASH_RXQ_UDPV4,
+	HASH_RXQ_IPV4,
+	HASH_RXQ_TCPV6,
+	HASH_RXQ_UDPV6,
+	HASH_RXQ_IPV6,
+	HASH_RXQ_ETH,
+};
+
+/* Initialization data for hash RX queue. */
+struct hash_rxq_init {
+	uint64_t hash_fields; /* Fields that participate in the hash. */
+	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
+	unsigned int flow_priority; /* Flow priority to use. */
+};
+
+/* Initialization data for hash RX queues. */
+const struct hash_rxq_init hash_rxq_init[] = {
+	[HASH_RXQ_TCPV4] = {
+		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
+				IBV_EXP_RX_HASH_DST_IPV4 |
+				IBV_EXP_RX_HASH_SRC_PORT_TCP |
+				IBV_EXP_RX_HASH_DST_PORT_TCP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
+		.flow_priority = 4,
+	},
+	[HASH_RXQ_UDPV4] = {
+		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
+				IBV_EXP_RX_HASH_DST_IPV4 |
+				IBV_EXP_RX_HASH_SRC_PORT_UDP |
+				IBV_EXP_RX_HASH_DST_PORT_UDP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
+		.flow_priority = 4,
+	},
+	[HASH_RXQ_IPV4] = {
+		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
+				IBV_EXP_RX_HASH_DST_IPV4),
+		.dpdk_rss_hf = (ETH_RSS_IPV4 |
+				ETH_RSS_FRAG_IPV4),
+		.flow_priority = 5,
+	},
+	[HASH_RXQ_TCPV6] = {
+		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
+				IBV_EXP_RX_HASH_DST_IPV6 |
+				IBV_EXP_RX_HASH_SRC_PORT_TCP |
+				IBV_EXP_RX_HASH_DST_PORT_TCP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
+		.flow_priority = 4,
+	},
+	[HASH_RXQ_UDPV6] = {
+		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
+				IBV_EXP_RX_HASH_DST_IPV6 |
+				IBV_EXP_RX_HASH_SRC_PORT_UDP |
+				IBV_EXP_RX_HASH_DST_PORT_UDP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
+		.flow_priority = 4,
+	},
+	[HASH_RXQ_IPV6] = {
+		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
+				IBV_EXP_RX_HASH_DST_IPV6),
+		.dpdk_rss_hf = (ETH_RSS_IPV6 |
+				ETH_RSS_FRAG_IPV6),
+		.flow_priority = 5,
+	},
+	[HASH_RXQ_ETH] = {
+		.hash_fields = 0,
+		.dpdk_rss_hf = 0,
+		.flow_priority = 6,
+	},
+};
+
+/* Number of entries in hash_rxq_init[]. */
+const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
+
 struct rte_flow {
 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
-	struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
-	struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
-	struct mlx5_hrxq *hrxq; /**< Hash Rx queue. */
 	uint32_t mark:1; /**< Set if the flow is marked. */
+	struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct mlx5_hrxq *hrxqs[RTE_DIM(hash_rxq_init)]; /**< Hash Rx queues. */
+	struct ibv_exp_flow *ibv_flows[RTE_DIM(hash_rxq_init)];
+		/**< Verbs flows. */
 };
 
 /** Static initializer for items. */
@@ -271,6 +348,7 @@ struct mlx5_flow_action {
 	uint32_t mark_id; /**< Mark identifier. */
 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
 	uint16_t queues_n; /**< Number of entries in queue[]. */
+	const struct rte_eth_rss_conf *rss_conf; /**< User RSS configuration. */
 };
 
 /** Structure to pass to the conversion function. */
@@ -278,7 +356,6 @@ struct mlx5_flow_parse {
 	struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 	uint32_t inner; /**< Set once VXLAN is encountered. */
-	uint64_t hash_fields; /**< Fields that participate in the hash. */
 	struct mlx5_flow_action actions; /**< Parsed action result. */
 };
 
@@ -526,6 +603,7 @@ priv_flow_validate(struct priv *priv,
 				(const struct rte_flow_action_rss *)
 				actions->conf;
 			uint16_t n;
+			int rxq_n;
 
 			if (!rss || !rss->num) {
 				rte_flow_error_set(error, EINVAL,
@@ -534,6 +612,9 @@ priv_flow_validate(struct priv *priv,
 						   "no valid queues");
 				return -rte_errno;
 			}
+			rxq_n = rss->num;
+			if (rss->rss_conf && !rss->rss_conf->rss_hf)
+				rxq_n = 1;
 			if (flow->actions.queues_n == 1) {
 				uint16_t found = 0;
 
@@ -554,7 +635,7 @@ priv_flow_validate(struct priv *priv,
 					return -rte_errno;
 				}
 			}
-			for (n = 0; n < rss->num; ++n) {
+			for (n = 0; n < rxq_n; ++n) {
 				if (rss->queue[n] >= priv->rxqs_n) {
 					rte_flow_error_set(error, EINVAL,
 						   RTE_FLOW_ERROR_TYPE_ACTION,
@@ -565,9 +646,10 @@ priv_flow_validate(struct priv *priv,
 				}
 			}
 			flow->actions.queue = 1;
-			for (n = 0; n < rss->num; ++n)
+			for (n = 0; n < rxq_n; ++n)
 				flow->actions.queues[n] = rss->queue[n];
-			flow->actions.queues_n = rss->num;
+			flow->actions.queues_n = rxq_n;
+			flow->actions.rss_conf = rss->rss_conf;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
 			const struct rte_flow_action_mark *mark =
 				(const struct rte_flow_action_mark *)
@@ -666,7 +748,6 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
 
 	++flow->ibv_attr->num_of_specs;
 	flow->ibv_attr->priority = 2;
-	flow->hash_fields = 0;
 	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*eth = (struct ibv_exp_flow_spec_eth) {
 		.type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
@@ -746,8 +827,6 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
 
 	++flow->ibv_attr->num_of_specs;
 	flow->ibv_attr->priority = 1;
-	flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
-			     IBV_EXP_RX_HASH_DST_IPV4);
 	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
 		.type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
@@ -801,8 +880,6 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
 
 	++flow->ibv_attr->num_of_specs;
 	flow->ibv_attr->priority = 1;
-	flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
-			     IBV_EXP_RX_HASH_DST_IPV6);
 	ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
 		.type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
@@ -857,8 +934,6 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
 
 	++flow->ibv_attr->num_of_specs;
 	flow->ibv_attr->priority = 0;
-	flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
-			      IBV_EXP_RX_HASH_DST_PORT_UDP);
 	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*udp = (struct ibv_exp_flow_spec_tcp_udp) {
 		.type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
@@ -901,8 +976,6 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
 
 	++flow->ibv_attr->num_of_specs;
 	flow->ibv_attr->priority = 0;
-	flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
-			      IBV_EXP_RX_HASH_DST_PORT_TCP);
 	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*tcp = (struct ibv_exp_flow_spec_tcp_udp) {
 		.type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
@@ -994,6 +1067,118 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
 }
 
 /**
+ * Create hash Rx queues when RSS is disabled.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param flow
+ *   MLX5 flow attributes (filled by mlx5_flow_validate()).
+ * @param rte_flow
+ *   Pointer to rte flow structure.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_create_action_queue_no_rss(struct priv *priv,
+				     struct mlx5_flow_parse *flow,
+				     struct rte_flow *rte_flow,
+				     struct rte_flow_error *error)
+{
+	rte_flow->hrxqs[HASH_RXQ_ETH] =
+		mlx5_priv_hrxq_get(priv, rss_hash_default_key,
+				   rss_hash_default_key_len,
+				   0,
+				   flow->actions.queues,
+				   flow->actions.queues_n);
+	if (rte_flow->hrxqs[HASH_RXQ_ETH])
+		return 0;
+	rte_flow->hrxqs[HASH_RXQ_ETH] =
+		mlx5_priv_hrxq_new(priv, rss_hash_default_key,
+				   rss_hash_default_key_len,
+				   0,
+				   flow->actions.queues,
+				   flow->actions.queues_n);
+	if (!rte_flow->hrxqs[HASH_RXQ_ETH]) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "cannot create hash rxq");
+		return ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * Create hash Rx queues when RSS is enabled.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param flow
+ *   MLX5 flow attributes (filled by mlx5_flow_validate()).
+ * @param rte_flow
+ *   Pointer to rte flow structure.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_create_action_queue_rss(struct priv *priv,
+				  struct mlx5_flow_parse *flow,
+				  struct rte_flow *rte_flow,
+				  struct rte_flow_error *error)
+{
+	unsigned int i;
+
+	/**
+	 * 7 types of Hash Rx queues can be created to make the RSS
+	 * possible on the different kind of packets:
+	 *  - IPv4 UDP
+	 *  - IPv4 TCP
+	 *  - IPv6 UDP
+	 *  - IPv6 TCP
+	 *  - IPv4
+	 *  - IPv6
+	 * This can be possible when the DPDK rss_conf.hf is full.
+	 */
+	for (i = 0; i != (hash_rxq_init_n - 1); ++i) {
+		uint64_t hash_fields;
+
+		if ((flow->actions.rss_conf->rss_hf &
+		     hash_rxq_init[i].dpdk_rss_hf) !=
+		    hash_rxq_init[i].dpdk_rss_hf)
+			continue;
+		hash_fields = hash_rxq_init[i].hash_fields;
+		rte_flow->hrxqs[i] =
+			mlx5_priv_hrxq_get(priv,
+					   flow->actions.rss_conf->rss_key,
+					   flow->actions.rss_conf->rss_key_len,
+					   hash_fields,
+					   flow->actions.queues,
+					   flow->actions.queues_n);
+		if (rte_flow->hrxqs[i])
+			continue;
+		rte_flow->hrxqs[i] =
+			mlx5_priv_hrxq_new(priv,
+					   flow->actions.rss_conf->rss_key,
+					   flow->actions.rss_conf->rss_key_len,
+					   hash_fields,
+					   flow->actions.queues,
+					   flow->actions.queues_n);
+		if (!rte_flow->hrxqs[i]) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "cannot create hash rxq");
+			return ENOMEM;
+		}
+	}
+	return 0;
+}
+
+/**
  * Complete flow rule creation.
  *
  * @param priv
@@ -1024,23 +1209,20 @@ priv_flow_create_action_queue(struct priv *priv,
 	}
 	rte_flow->mark = flow->actions.mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
-	rte_flow->hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
-					    rss_hash_default_key_len,
-					    flow->hash_fields,
-					    flow->actions.queues,
-					    flow->actions.queues_n);
-	if (!rte_flow->hrxq) {
-		rte_flow->hrxq = mlx5_priv_hrxq_new(priv, rss_hash_default_key,
-						    rss_hash_default_key_len,
-						    flow->hash_fields,
-						    flow->actions.queues,
-						    flow->actions.queues_n);
-		if (!rte_flow->hrxq) {
-			rte_flow_error_set(error, ENOMEM,
-					   RTE_FLOW_ERROR_TYPE_HANDLE,
-					   NULL, "cannot create hash rxq");
+	if (flow->actions.queues_n == 1) {
+		unsigned int ret;
+
+		ret = priv_flow_create_action_queue_no_rss(priv, flow, rte_flow,
+							   error);
+		if (ret)
+			goto error;
+	} else {
+		unsigned int ret;
+
+		ret = priv_flow_create_action_queue_rss(priv, flow, rte_flow,
+							error);
+		if (ret)
 			goto error;
-		}
 	}
 	for (i = 0; i != flow->actions.queues_n; ++i) {
 		struct mlx5_rxq_data *q = (*priv->rxqs)[flow->actions.queues[i]];
@@ -1049,18 +1231,31 @@ priv_flow_create_action_queue(struct priv *priv,
 	}
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
-	rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->hrxq->qp,
-						 rte_flow->ibv_attr);
-	if (!rte_flow->ibv_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "flow rule creation failure");
-		goto error;
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (!rte_flow->hrxqs[i])
+			continue;
+		rte_flow->ibv_flows[i] =
+			ibv_exp_create_flow(rte_flow->hrxqs[i]->qp,
+					    rte_flow->ibv_attr);
+		if (!rte_flow->ibv_flows[i]) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "flow rule creation failure");
+			goto error;
+		}
+		DEBUG("%p type %d QP %p ibv_flow %p",
+		      (void*)rte_flow, i, (void*)rte_flow->hrxqs[i],
+		      (void*)rte_flow->ibv_flows[i]);
 	}
 	return rte_flow;
 error:
 	assert(rte_flow);
-	if (rte_flow->hrxq)
-		mlx5_priv_hrxq_release(priv, rte_flow->hrxq);
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (rte_flow->ibv_flows[i])
+			claim_zero(ibv_exp_destroy_flow(rte_flow->ibv_flows[i]));
+		if (rte_flow->hrxqs[i])
+			mlx5_priv_hrxq_release(priv, rte_flow->hrxqs[i]);
+	}
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -1120,7 +1315,6 @@ priv_flow_create(struct priv *priv,
 		.reserved = 0,
 	};
 	flow.inner = 0;
-	flow.hash_fields = 0;
 	claim_zero(priv_flow_validate(priv, attr, items, actions,
 				      error, &flow));
 	if (flow.actions.mark) {
@@ -1178,41 +1372,53 @@ priv_flow_destroy(struct priv *priv,
 		  struct rte_flow *flow)
 {
 	unsigned int i;
+	unsigned int j;
 	uint16_t *queues;
 	uint16_t queues_n;
 
-	queues = flow->hrxq->ind_table->queues;
-	queues_n = flow->hrxq->ind_table->queues_n;
-	if (!flow->mark)
-		goto out;
-	for (i = 0; i != queues_n; ++i) {
-		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq = (*priv->rxqs)[queues[i]];
-		int mark = 0;
-
-		/*
-		 * To remove the mark from the queue, the queue must not be
-		 * present in any other marked flow (RSS or not).
-		 */
-		TAILQ_FOREACH(tmp, list, next) {
-			unsigned int j;
-
-			if (!tmp->mark)
-				continue;
-			for (j = 0;
-			     (j != tmp->hrxq->ind_table->queues_n) && !mark;
-			     j++)
-				if (tmp->hrxq->ind_table->queues[j] ==
-				    queues[i])
-					mark = 1;
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (!flow->hrxqs[i])
+			continue;
+		queues = flow->hrxqs[i]->ind_table->queues;
+		queues_n = flow->hrxqs[i]->ind_table->queues_n;
+		if (!flow->mark)
+			goto out;
+		for (j = 0; j != queues_n; ++j) {
+			struct rte_flow *tmp;
+			struct mlx5_rxq_data *rxq = (*priv->rxqs)[queues[j]];
+			int mark = 0;
+
+			/*
+			 * To remove the mark from the queue, the queue must not be
+			 * present in any other marked flow (RSS or not).
+			 */
+			TAILQ_FOREACH(tmp, list, next) {
+				unsigned int k;
+				uint16_t *tqueues =
+					tmp->hrxqs[j]->ind_table->queues;
+				uint16_t tqueues_n =
+					tmp->hrxqs[j]->ind_table->queues_n;
+
+				if (!tmp->mark)
+					continue;
+				for (k = 0; (k != tqueues_n) && !mark; k++)
+					if (tqueues[k] == queues[i])
+						mark = 1;
+			}
+			rxq->mark = mark;
 		}
-		rxq->mark = mark;
 	}
 out:
 	TAILQ_REMOVE(list, flow, next);
-	if (flow->ibv_flow)
-		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
-	mlx5_priv_hrxq_release(priv, flow->hrxq);
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (flow->ibv_flows[i]) {
+			DEBUG("%p type %d QP %p ibv_flow %p",
+			      (void*)flow, i, (void*)flow->hrxqs[i],
+			      (void*)flow->ibv_flows[i]);
+			claim_zero(ibv_exp_destroy_flow(flow->ibv_flows[i]));
+			mlx5_priv_hrxq_release(priv, flow->hrxqs[i]);
+		}
+	}
 	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
 	rte_free(flow);
@@ -1294,8 +1500,12 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
 		unsigned int i;
 
-		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
-		flow->ibv_flow = NULL;
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!flow->ibv_flows[i])
+				continue;
+			claim_zero(ibv_exp_destroy_flow(flow->ibv_flows[i]));
+			flow->ibv_flows[i] = NULL;
+		}
 		/* Disable mark on all queues. */
 		for (i = 0; i != priv->rxqs_n; ++i)
 			(*priv->rxqs)[i]->mark = 0;
@@ -1320,20 +1530,41 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 	struct rte_flow *flow;
 
 	TAILQ_FOREACH(flow, list, next) {
-		flow->ibv_flow = ibv_exp_create_flow(flow->hrxq->qp,
-						     flow->ibv_attr);
-		if (!flow->ibv_flow) {
-			DEBUG("Flow %p cannot be applied", (void *)flow);
-			rte_errno = EINVAL;
-			return rte_errno;
+		unsigned int i;
+
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!flow->hrxqs[i])
+				continue;
+			flow->ibv_flows[i] =
+				ibv_exp_create_flow(flow->hrxqs[i]->qp,
+						    flow->ibv_attr);
+			if (!flow->ibv_flows[i]) {
+				DEBUG("Flow %p cannot be applied",
+				      (void *)flow);
+				rte_errno = EINVAL;
+				return rte_errno;
+			}
 		}
 		DEBUG("Flow %p applied", (void *)flow);
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->hrxq->ind_table->queues_n; ++n) {
-				uint16_t idx = flow->hrxq->ind_table->queues[n];
-				(*priv->rxqs)[idx]->mark = 1;
+			/*
+			 * Inside the same flow, queue list will remain the
+			 * same.
+			 */
+			for (i = 0; i != hash_rxq_init_n; ++i) {
+				uint16_t *queues;
+				uint16_t queues_n;
+
+				if (!flow->hrxqs[i])
+					continue;
+				queues_n = flow->hrxqs[i]->ind_table->queues_n;
+				queues = flow->hrxqs[i]->ind_table->queues;
+				for (n = 0; n < queues_n; ++n) {
+					(*priv->rxqs)[queues[n]]->mark = 1;
+				}
+				break;
 			}
 		}
 	}
@@ -1431,18 +1662,35 @@ mlx5_flow_ctrl(struct rte_eth_dev *dev,
 	};
 	struct rte_flow_action actions[] = {
 		{
-			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
-			.conf = &(struct rte_flow_action_queue){
-				.index = 0,
-			},
+			.type = RTE_FLOW_ACTION_TYPE_RSS,
 		},
 		{
 			.type = RTE_FLOW_ACTION_TYPE_END,
 		},
 	};
+	struct rte_flow_action_rss *conf;
 	struct rte_flow *flow;
 	struct rte_flow_error error;
+	unsigned int i;
+	unsigned int j;
 
+	conf = rte_malloc(__func__, sizeof(*conf) +
+			  priv->rxqs_n * sizeof(uint16_t), 0);
+	if (!conf)
+		goto out;
+	for (i = 0, j = 0; i != priv->rxqs_n; ++i) {
+		if ((*priv->rxqs)[i]) {
+			conf->queue[j] = i;
+			++j;
+			++conf->num;
+		}
+	}
+	if (!conf->num) {
+		rte_free(conf);
+		goto out;
+	}
+	conf->rss_conf = &priv->rss_conf;
+	actions[0].conf = conf;
 	if (enable) {
 		flow = priv_flow_create(priv, &attr, items, actions, &error);
 		if (!flow) {
@@ -1461,6 +1709,7 @@ mlx5_flow_ctrl(struct rte_eth_dev *dev,
 		};
 		struct ibv_exp_flow_spec_eth *eth;
 		const unsigned int attr_size = sizeof(struct ibv_exp_flow_attr);
+		unsigned int i;
 
 		claim_zero(mlx5_flow_create_eth(&items[0], NULL, &parser));
 		TAILQ_FOREACH(flow, &priv->ctrl_flows, next) {
@@ -1469,14 +1718,20 @@ mlx5_flow_ctrl(struct rte_eth_dev *dev,
 			if (!memcmp(eth, &spec.eth, sizeof(*eth)))
 				break;
 		}
-		if (flow) {
-			claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
-			mlx5_priv_hrxq_release(priv, flow->hrxq);
-			rte_free(flow->ibv_attr);
-			DEBUG("Control flow destroyed %p", (void *)flow);
-			TAILQ_REMOVE(&priv->ctrl_flows, flow, next);
-			rte_free(flow);
+		if (!flow)
+			goto out;
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!flow->ibv_flows[i])
+				continue;
+			claim_zero(ibv_exp_destroy_flow(flow->ibv_flows[i]));
+			mlx5_priv_hrxq_release(priv, flow->hrxqs[i]);
 		}
+		rte_free(flow->ibv_attr);
+		DEBUG("Control flow destroyed %p", (void *)flow);
+		TAILQ_REMOVE(&priv->ctrl_flows, flow, next);
+		rte_free(flow);
 	}
+	rte_free(conf);
+out:
 	return 0;
 }
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (21 preceding siblings ...)
  2017-08-02 14:10 ` [dpdk-dev] [PATCH v1 21/21] net/mlx5: support RSS hash configuration in generic flow action Nelio Laranjeiro
@ 2017-08-18 13:44 ` Ferruh Yigit
  2017-08-22  9:15   ` Nélio Laranjeiro
  2017-10-05 12:49 ` [dpdk-dev] [PATCH v2 00/30] " Nelio Laranjeiro
                   ` (32 subsequent siblings)
  55 siblings, 1 reply; 129+ messages in thread
From: Ferruh Yigit @ 2017-08-18 13:44 UTC (permalink / raw)
  To: Nelio Laranjeiro, dev; +Cc: adrien.mazarguil
On 8/2/2017 3:10 PM, Nelio Laranjeiro wrote:
> This series cleanups the control plane part and the way it uses the different
> kind of objects (DPDK queues, Verbs Queues, ...).  It has three goals:
> 
>  1. Reduce the memory usage by sharing all possible objects.
> 
>  2. Leave the configuration to the control plane and the creation/destruction
>     of queues to the dev_start/dev_stop() to have a better control on object
>     and easily apply the configuration.
> 
>  3. Create all flows through the generic flow API, it will also help to
>     implement a detection collision algorithm as all flows are using the same
>     service and thus the same kind of object.
Hi Nelio,
Patchset is not applying cleanly, can you please rebase it on top of
latest tree?
And there are some checkpatch warnings for the set.
There are two other mlx5 patchsets, what is the dependency between them?
Thanks,
ferruh
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode
  2017-08-18 13:44 ` [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Ferruh Yigit
@ 2017-08-22  9:15   ` Nélio Laranjeiro
  0 siblings, 0 replies; 129+ messages in thread
From: Nélio Laranjeiro @ 2017-08-22  9:15 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: dev, adrien.mazarguil
On Fri, Aug 18, 2017 at 02:44:53PM +0100, Ferruh Yigit wrote:
> On 8/2/2017 3:10 PM, Nelio Laranjeiro wrote:
> > This series cleanups the control plane part and the way it uses the different
> > kind of objects (DPDK queues, Verbs Queues, ...).  It has three goals:
> > 
> >  1. Reduce the memory usage by sharing all possible objects.
> > 
> >  2. Leave the configuration to the control plane and the creation/destruction
> >     of queues to the dev_start/dev_stop() to have a better control on object
> >     and easily apply the configuration.
> > 
> >  3. Create all flows through the generic flow API, it will also help to
> >     implement a detection collision algorithm as all flows are using the same
> >     service and thus the same kind of object.
> 
> Hi Nelio,
> 
> Patchset is not applying cleanly, can you please rebase it on top of
> latest tree?
yes,
> And there are some checkpatch warnings for the set.
sure,
> There are two other mlx5 patchsets, what is the dependency between them?
It is mlx5-cleanup first and then this one.
I will put clearly the dependency in cover letter to help.
Thanks,
-- 
Nélio Laranjeiro
6WIND
^ permalink raw reply	[flat|nested] 129+ messages in thread 
 
- * [dpdk-dev] [PATCH v2 00/30] net/mlx5: cleanup for isolated mode
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (22 preceding siblings ...)
  2017-08-18 13:44 ` [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Ferruh Yigit
@ 2017-10-05 12:49 ` Nelio Laranjeiro
  2017-10-05 19:14   ` Ferruh Yigit
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                   ` (31 subsequent siblings)
  55 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
This series cleanups the control plane part and the way it uses the different
kind of objects (DPDK queues, Verbs Queues, ...).  It has three goals:
 1. Reduce the memory usage by sharing all possible objects.
 2. Leave the configuration to the control plane and the creation/destruction
    of queues to the dev_start/dev_stop() to have a better control on object
    and easily apply the configuration.
 3. Create all flows through the generic flow API, it will also help to
    implement a detection collision algorithm as all flows are using the same
    service and thus the same kind of object.
Changes in v2:
 * rebase on upstream rdma-core/MLNX_OFED 4.2
 * split in smaller patches
This series apply on top of:
http://dpdk.org/ml/archives/dev/2017-September/075575.html
Nelio Laranjeiro (30):
  net/mlx5: merge action and flow parser structure
  net/mlx5: remove flow director support
  net/mlx5: prefix Rx structures and functions
  net/mlx5: prefix Tx structures and functions
  net/mlx5: remove redundant started flag
  net/mlx5: verify all flows are been removed on close
  net/mlx5: fix reta update can segfault
  net/mlx5: fix rxqs vector support verification
  net/mlx5: add reference counter on memory region
  net/mlx5: separate DPDK from Verbs Rx queue objects
  net/mlx5: separate DPDK from Verbs Tx queue objects
  net/mlx5: add reference counter on DPDK Tx queues
  net/mlx5: add reference counter on DPDK Rx queues
  net/mlx5: make indirection tables shareable
  net/mlx5: add Hash Rx queue object
  net/mlx5: fix clang compilation error
  net/mlx5: use flow to enable promiscuous mode
  net/mlx5: use flow to enable all multi mode
  net/mlx5: use flow to enable unicast traffic
  net/mlx5: handle a single RSS hash key for all protocols
  net/mlx5: remove hash Rx queues support
  net/mlx5: fully convert a flow to verbs in validate
  net/mlx5: process flows actions before of items
  net/mlx5: merge internal parser and actions structures
  net/mlx5: use a better name for the flow parser
  net/mlx5: reorganise functions in the file
  net/mlx5: move Verbs flows and attributes
  net/mlx5: handle RSS hash configuration in RSS flow
  net/mlx5: support flow director
  net/mlx5: add new operations for isolated mode
 drivers/net/mlx5/Makefile            |    1 -
 drivers/net/mlx5/mlx5.c              |  134 +-
 drivers/net/mlx5/mlx5.h              |   91 +-
 drivers/net/mlx5/mlx5_defs.h         |    3 -
 drivers/net/mlx5/mlx5_ethdev.c       |   27 +-
 drivers/net/mlx5/mlx5_fdir.c         | 1091 ---------------
 drivers/net/mlx5/mlx5_flow.c         | 2475 +++++++++++++++++++++++++---------
 drivers/net/mlx5/mlx5_mac.c          |  407 +-----
 drivers/net/mlx5/mlx5_mr.c           |  263 ++--
 drivers/net/mlx5/mlx5_rss.c          |  136 +-
 drivers/net/mlx5/mlx5_rxmode.c       |  380 +-----
 drivers/net/mlx5/mlx5_rxq.c          | 1999 ++++++++++++++-------------
 drivers/net/mlx5/mlx5_rxtx.c         |   39 +-
 drivers/net/mlx5/mlx5_rxtx.h         |  284 ++--
 drivers/net/mlx5/mlx5_rxtx_vec_sse.c |   42 +-
 drivers/net/mlx5/mlx5_stats.c        |    4 +-
 drivers/net/mlx5/mlx5_trigger.c      |  320 ++++-
 drivers/net/mlx5/mlx5_txq.c          |  876 +++++++-----
 drivers/net/mlx5/mlx5_utils.h        |    2 +
 drivers/net/mlx5/mlx5_vlan.c         |   58 +-
 20 files changed, 4241 insertions(+), 4391 deletions(-)
 delete mode 100644 drivers/net/mlx5/mlx5_fdir.c
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 00/30] net/mlx5: cleanup for isolated mode
  2017-10-05 12:49 ` [dpdk-dev] [PATCH v2 00/30] " Nelio Laranjeiro
@ 2017-10-05 19:14   ` Ferruh Yigit
  0 siblings, 0 replies; 129+ messages in thread
From: Ferruh Yigit @ 2017-10-05 19:14 UTC (permalink / raw)
  To: Nelio Laranjeiro, dev; +Cc: adrien.mazarguil, yskoh
On 10/5/2017 1:49 PM, Nelio Laranjeiro wrote:
> This series cleanups the control plane part and the way it uses the different
> kind of objects (DPDK queues, Verbs Queues, ...).  It has three goals:
> 
>  1. Reduce the memory usage by sharing all possible objects.
> 
>  2. Leave the configuration to the control plane and the creation/destruction
>     of queues to the dev_start/dev_stop() to have a better control on object
>     and easily apply the configuration.
> 
>  3. Create all flows through the generic flow API, it will also help to
>     implement a detection collision algorithm as all flows are using the same
>     service and thus the same kind of object.
> 
> Changes in v2:
> 
>  * rebase on upstream rdma-core/MLNX_OFED 4.2
>  * split in smaller patches
> 
> This series apply on top of:
> http://dpdk.org/ml/archives/dev/2017-September/075575.html
And Xueming's patch is waiting for rebase because it doesn't apply cleanly.
So I will first wait Xueming's new version, later will get this one.
^ permalink raw reply	[flat|nested] 129+ messages in thread 
 
- [parent not found: <cover.1507207731.git.nelio.laranjeiro@6wind.com>] 
- * [dpdk-dev] [PATCH v2 01/30] net/mlx5: merge action and flow parser structure
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  0:47     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 02/30] net/mlx5: remove flow director support Nelio Laranjeiro
                     ` (28 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
mlx5_flow_create() and mlx5_flow_validate() are making common checks.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 153 +++++++++++++++++++++----------------------
 1 file changed, 73 insertions(+), 80 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index dbd241f..fb30803 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -201,7 +201,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.default_mask = &rte_flow_item_ipv4_mask,
 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
 		.convert = mlx5_flow_create_ipv4,
-		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
+		.dst_sz = sizeof(struct ibv_flow_spec_ipv4),
 	},
 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
@@ -271,12 +271,23 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 	},
 };
 
+/* Structure to parse actions. */
+struct mlx5_flow_action {
+	uint32_t queue:1; /**< Target is a receive queue. */
+	uint32_t drop:1; /**< Target is a drop queue. */
+	uint32_t mark:1; /**< Mark is present in the flow. */
+	uint32_t mark_id; /**< Mark identifier. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
+	uint16_t queues_n; /**< Number of entries in queue[]. */
+};
+
 /** Structure to pass to the conversion function. */
-struct mlx5_flow {
+struct mlx5_flow_parse {
 	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 	uint32_t inner; /**< Set once VXLAN is encountered. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
+	struct mlx5_flow_action actions; /**< Parsed action result. */
 };
 
 /** Structure for Drop queue. */
@@ -287,15 +298,6 @@ struct rte_flow_drop {
 	struct ibv_cq *cq; /**< Verbs completion queue. */
 };
 
-struct mlx5_flow_action {
-	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t drop:1; /**< Target is a drop queue. */
-	uint32_t mark:1; /**< Mark is present in the flow. */
-	uint32_t mark_id; /**< Mark identifier. */
-	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
-	uint16_t queues_n; /**< Number of entries in queue[]. */
-};
-
 /**
  * Check support for a given item.
  *
@@ -374,8 +376,6 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
  *   Perform verbose error reporting if not NULL.
  * @param[in, out] flow
  *   Flow structure to update.
- * @param[in, out] action
- *   Action structure to update.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
@@ -386,8 +386,7 @@ priv_flow_validate(struct priv *priv,
 		   const struct rte_flow_item items[],
 		   const struct rte_flow_action actions[],
 		   struct rte_flow_error *error,
-		   struct mlx5_flow *flow,
-		   struct mlx5_flow_action *action)
+		   struct mlx5_flow_parse *flow)
 {
 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
 
@@ -469,7 +468,7 @@ priv_flow_validate(struct priv *priv,
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
-			action->drop = 1;
+			flow->actions.drop = 1;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			const struct rte_flow_action_queue *queue =
 				(const struct rte_flow_action_queue *)
@@ -479,13 +478,13 @@ priv_flow_validate(struct priv *priv,
 
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
-			for (n = 0; n < action->queues_n; ++n) {
-				if (action->queues[n] == queue->index) {
+			for (n = 0; n < flow->actions.queues_n; ++n) {
+				if (flow->actions.queues[n] == queue->index) {
 					found = 1;
 					break;
 				}
 			}
-			if (action->queues_n > 1 && !found) {
+			if (flow->actions.queues_n > 1 && !found) {
 				rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
 					   actions,
@@ -493,9 +492,9 @@ priv_flow_validate(struct priv *priv,
 				return -rte_errno;
 			}
 			if (!found) {
-				action->queue = 1;
-				action->queues_n = 1;
-				action->queues[0] = queue->index;
+				flow->actions.queue = 1;
+				flow->actions.queues_n = 1;
+				flow->actions.queues[0] = queue->index;
 			}
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
 			const struct rte_flow_action_rss *rss =
@@ -510,12 +509,12 @@ priv_flow_validate(struct priv *priv,
 						   "no valid queues");
 				return -rte_errno;
 			}
-			if (action->queues_n == 1) {
+			if (flow->actions.queues_n == 1) {
 				uint16_t found = 0;
 
-				assert(action->queues_n);
+				assert(flow->actions.queues_n);
 				for (n = 0; n < rss->num; ++n) {
-					if (action->queues[0] ==
+					if (flow->actions.queues[0] ==
 					    rss->queue[n]) {
 						found = 1;
 						break;
@@ -540,10 +539,10 @@ priv_flow_validate(struct priv *priv,
 					return -rte_errno;
 				}
 			}
-			action->queue = 1;
+			flow->actions.queue = 1;
 			for (n = 0; n < rss->num; ++n)
-				action->queues[n] = rss->queue[n];
-			action->queues_n = rss->num;
+				flow->actions.queues[n] = rss->queue[n];
+			flow->actions.queues_n = rss->num;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
 			const struct rte_flow_action_mark *mark =
 				(const struct rte_flow_action_mark *)
@@ -563,19 +562,19 @@ priv_flow_validate(struct priv *priv,
 						   " and 16777199");
 				return -rte_errno;
 			}
-			action->mark = 1;
-			action->mark_id = mark->id;
+			flow->actions.mark = 1;
+			flow->actions.mark_id = mark->id;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
-			action->mark = 1;
+			flow->actions.mark = 1;
 		} else {
 			goto exit_action_not_supported;
 		}
 	}
-	if (action->mark && !flow->ibv_attr && !action->drop)
+	if (flow->actions.mark && !flow->ibv_attr && !flow->actions.drop)
 		flow->offset += sizeof(struct ibv_flow_spec_action_tag);
-	if (!flow->ibv_attr && action->drop)
+	if (!flow->ibv_attr && flow->actions.drop)
 		flow->offset += sizeof(struct ibv_flow_spec_action_drop);
-	if (!action->queue && !action->drop) {
+	if (!flow->actions.queue && !flow->actions.drop) {
 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "no valid action");
 		return -rte_errno;
@@ -606,18 +605,16 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
 {
 	struct priv *priv = dev->data->dev_private;
 	int ret;
-	struct mlx5_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
-	struct mlx5_flow_action action = {
-		.queue = 0,
-		.drop = 0,
-		.mark = 0,
-		.mark_id = MLX5_FLOW_MARK_DEFAULT,
-		.queues_n = 0,
+	struct mlx5_flow_parse flow = {
+		.offset = sizeof(struct ibv_flow_attr),
+		.actions = {
+			.mark_id = MLX5_FLOW_MARK_DEFAULT,
+			.queues_n = 0,
+		},
 	};
 
 	priv_lock(priv);
-	ret = priv_flow_validate(priv, attr, items, actions, error, &flow,
-				 &action);
+	ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
 	priv_unlock(priv);
 	return ret;
 }
@@ -639,7 +636,7 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_eth *spec = item->spec;
 	const struct rte_flow_item_eth *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
 	unsigned int i;
@@ -688,7 +685,7 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_vlan *spec = item->spec;
 	const struct rte_flow_item_vlan *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
 
@@ -720,7 +717,7 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_ipv4 *spec = item->spec;
 	const struct rte_flow_item_ipv4 *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_ipv4_ext *ipv4;
 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
 
@@ -774,7 +771,7 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_ipv6 *spec = item->spec;
 	const struct rte_flow_item_ipv6 *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_ipv6 *ipv6;
 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
 	unsigned int i;
@@ -831,7 +828,7 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_udp *spec = item->spec;
 	const struct rte_flow_item_udp *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_tcp_udp *udp;
 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
 
@@ -875,7 +872,7 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_tcp *spec = item->spec;
 	const struct rte_flow_item_tcp *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_tcp_udp *tcp;
 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
 
@@ -919,7 +916,7 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_vxlan *spec = item->spec;
 	const struct rte_flow_item_vxlan *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_tunnel *vxlan;
 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
 	union vni {
@@ -958,7 +955,7 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
  *   Mark identifier.
  */
 static int
-mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
+mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
 {
 	struct ibv_flow_spec_action_tag *tag;
 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
@@ -988,7 +985,7 @@ mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
  */
 static struct rte_flow *
 priv_flow_create_action_queue_drop(struct priv *priv,
-				   struct mlx5_flow *flow,
+				   struct mlx5_flow_parse *flow,
 				   struct rte_flow_error *error)
 {
 	struct rte_flow *rte_flow;
@@ -1036,8 +1033,6 @@ priv_flow_create_action_queue_drop(struct priv *priv,
  *   Pointer to private structure.
  * @param flow
  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
- * @param action
- *   Target action structure.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
@@ -1046,49 +1041,49 @@ priv_flow_create_action_queue_drop(struct priv *priv,
  */
 static struct rte_flow *
 priv_flow_create_action_queue(struct priv *priv,
-			      struct mlx5_flow *flow,
-			      struct mlx5_flow_action *action,
+			      struct mlx5_flow_parse *flow,
 			      struct rte_flow_error *error)
 {
 	struct rte_flow *rte_flow;
 	unsigned int i;
 	unsigned int j;
-	const unsigned int wqs_n = 1 << log2above(action->queues_n);
+	const unsigned int wqs_n = 1 << log2above(flow->actions.queues_n);
 	struct ibv_wq *wqs[wqs_n];
 
 	assert(priv->pd);
 	assert(priv->ctx);
-	assert(!action->drop);
+	assert(!flow->actions.drop);
 	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
-			      sizeof(*rte_flow->rxqs) * action->queues_n, 0);
+			      sizeof(*rte_flow->rxqs) * flow->actions.queues_n,
+			      0);
 	if (!rte_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	for (i = 0; i < action->queues_n; ++i) {
+	for (i = 0; i < flow->actions.queues_n; ++i) {
 		struct rxq_ctrl *rxq;
 
-		rxq = container_of((*priv->rxqs)[action->queues[i]],
+		rxq = container_of((*priv->rxqs)[flow->actions.queues[i]],
 				   struct rxq_ctrl, rxq);
 		wqs[i] = rxq->wq;
 		rte_flow->rxqs[i] = &rxq->rxq;
 		++rte_flow->rxqs_n;
-		rxq->rxq.mark |= action->mark;
+		rxq->rxq.mark |= flow->actions.mark;
 	}
 	/* finalise indirection table. */
 	for (j = 0; i < wqs_n; ++i, ++j) {
 		wqs[i] = wqs[j];
-		if (j == action->queues_n)
+		if (j == flow->actions.queues_n)
 			j = 0;
 	}
-	rte_flow->mark = action->mark;
+	rte_flow->mark = flow->actions.mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
 	rte_flow->hash_fields = flow->hash_fields;
 	rte_flow->ind_table = ibv_create_rwq_ind_table(
 		priv->ctx,
 		&(struct ibv_rwq_ind_table_init_attr){
-			.log_ind_tbl_size = log2above(action->queues_n),
+			.log_ind_tbl_size = log2above(flow->actions.queues_n),
 			.ind_tbl = wqs,
 			.comp_mask = 0,
 		});
@@ -1165,18 +1160,17 @@ priv_flow_create(struct priv *priv,
 		 struct rte_flow_error *error)
 {
 	struct rte_flow *rte_flow;
-	struct mlx5_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
-	struct mlx5_flow_action action = {
-		.queue = 0,
-		.drop = 0,
-		.mark = 0,
-		.mark_id = MLX5_FLOW_MARK_DEFAULT,
-		.queues_n = 0,
+	struct mlx5_flow_parse flow = {
+		.offset = sizeof(struct ibv_flow_attr),
+		.actions = {
+			.mark_id = MLX5_FLOW_MARK_DEFAULT,
+			.queues = { 0 },
+			.queues_n = 0,
+		},
 	};
 	int err;
 
-	err = priv_flow_validate(priv, attr, items, actions, error, &flow,
-				 &action);
+	err = priv_flow_validate(priv, attr, items, actions, error, &flow);
 	if (err)
 		goto exit;
 	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
@@ -1197,17 +1191,16 @@ priv_flow_create(struct priv *priv,
 	flow.inner = 0;
 	flow.hash_fields = 0;
 	claim_zero(priv_flow_validate(priv, attr, items, actions,
-				      error, &flow, &action));
-	if (action.mark && !action.drop) {
-		mlx5_flow_create_flag_mark(&flow, action.mark_id);
+				      error, &flow));
+	if (flow.actions.mark && !flow.actions.drop) {
+		mlx5_flow_create_flag_mark(&flow, flow.actions.mark_id);
 		flow.offset += sizeof(struct ibv_flow_spec_action_tag);
 	}
-	if (action.drop)
+	if (flow.actions.drop)
 		rte_flow =
 			priv_flow_create_action_queue_drop(priv, &flow, error);
 	else
-		rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
-							 error);
+		rte_flow = priv_flow_create_action_queue(priv, &flow, error);
 	if (!rte_flow)
 		goto exit;
 	return rte_flow;
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 02/30] net/mlx5: remove flow director support
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 01/30] net/mlx5: merge action and flow parser structure Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  0:49     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 03/30] net/mlx5: prefix Rx structures and functions Nelio Laranjeiro
                     ` (27 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Generic flow API should be use for flow steering as is provides a better
and easier way to configure flows.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 doc/guides/nics/features/mlx5.ini |    1 -
 doc/guides/nics/mlx5.rst          |    2 -
 drivers/net/mlx5/Makefile         |    1 -
 drivers/net/mlx5/mlx5.c           |    8 -
 drivers/net/mlx5/mlx5.h           |   12 +-
 drivers/net/mlx5/mlx5_fdir.c      | 1091 -------------------------------------
 drivers/net/mlx5/mlx5_flow.c      |   43 ++
 drivers/net/mlx5/mlx5_rxq.c       |    2 -
 drivers/net/mlx5/mlx5_rxtx.h      |    9 -
 drivers/net/mlx5/mlx5_trigger.c   |    3 -
 10 files changed, 44 insertions(+), 1128 deletions(-)
 delete mode 100644 drivers/net/mlx5/mlx5_fdir.c
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index c363639..34a796d 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -23,7 +23,6 @@ RSS key update       = Y
 RSS reta update      = Y
 SR-IOV               = Y
 VLAN filter          = Y
-Flow director        = Y
 Flow API             = Y
 CRC offload          = Y
 VLAN offload         = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index d24941a..09fb738 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -89,8 +89,6 @@ Features
 - Promiscuous mode.
 - Multicast promiscuous mode.
 - Hardware checksum offloads.
-- Flow director (RTE_FDIR_MODE_PERFECT, RTE_FDIR_MODE_PERFECT_MAC_VLAN and
-  RTE_ETH_FDIR_REJECT).
 - Flow API.
 - Multiple process.
 - KVM and VMware ESX SR-IOV modes are supported.
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index bd9ea57..361cec5 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -49,7 +49,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxmode.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_vlan.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_stats.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
-SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_fdir.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 4ffaa58..9cfb754 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -201,10 +201,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 
-	/* Remove flow director elements. */
-	priv_fdir_disable(priv);
-	priv_fdir_delete_filters_list(priv);
-
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
 	dev->tx_pkt_burst = removed_tx_burst;
@@ -844,10 +840,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		claim_zero(priv_mac_addr_add(priv, 0,
 					     (const uint8_t (*)[ETHER_ADDR_LEN])
 					     mac.addr_bytes));
-		/* Initialize FD filters list. */
-		err = fdir_init_filters_list(priv);
-		if (err)
-			goto port_error;
 #ifndef NDEBUG
 		{
 			char ifname[IF_NAMESIZE];
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 928aeb6..adac5f4 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -145,8 +145,6 @@ struct priv {
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
-	struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */
-	struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
@@ -273,18 +271,10 @@ void mlx5_vlan_strip_queue_set(struct rte_eth_dev *, uint16_t, int);
 int mlx5_dev_start(struct rte_eth_dev *);
 void mlx5_dev_stop(struct rte_eth_dev *);
 
-/* mlx5_fdir.c */
+/* mlx5_flow.c */
 
-void priv_fdir_queue_destroy(struct priv *, struct fdir_queue *);
-int fdir_init_filters_list(struct priv *);
-void priv_fdir_delete_filters_list(struct priv *);
-void priv_fdir_disable(struct priv *);
-void priv_fdir_enable(struct priv *);
 int mlx5_dev_filter_ctrl(struct rte_eth_dev *, enum rte_filter_type,
 			 enum rte_filter_op, void *);
-
-/* mlx5_flow.c */
-
 int mlx5_flow_validate(struct rte_eth_dev *, const struct rte_flow_attr *,
 		       const struct rte_flow_item [],
 		       const struct rte_flow_action [],
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
deleted file mode 100644
index 66e3818..0000000
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ /dev/null
@@ -1,1091 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright 2015 6WIND S.A.
- *   Copyright 2015 Mellanox.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of 6WIND S.A. nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stddef.h>
-#include <assert.h>
-#include <stdint.h>
-#include <string.h>
-#include <errno.h>
-
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <infiniband/verbs.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
-#include <rte_ether.h>
-#include <rte_malloc.h>
-#include <rte_ethdev.h>
-#include <rte_common.h>
-#include <rte_flow.h>
-#include <rte_flow_driver.h>
-
-#include "mlx5.h"
-#include "mlx5_rxtx.h"
-
-struct fdir_flow_desc {
-	uint16_t dst_port;
-	uint16_t src_port;
-	uint32_t src_ip[4];
-	uint32_t dst_ip[4];
-	uint8_t	mac[6];
-	uint16_t vlan_tag;
-	enum hash_rxq_type type;
-};
-
-struct mlx5_fdir_filter {
-	LIST_ENTRY(mlx5_fdir_filter) next;
-	uint16_t queue; /* Queue assigned to if FDIR match. */
-	enum rte_eth_fdir_behavior behavior;
-	struct fdir_flow_desc desc;
-	struct ibv_flow *flow;
-};
-
-LIST_HEAD(fdir_filter_list, mlx5_fdir_filter);
-
-/**
- * Convert struct rte_eth_fdir_filter to mlx5 filter descriptor.
- *
- * @param[in] fdir_filter
- *   DPDK filter structure to convert.
- * @param[out] desc
- *   Resulting mlx5 filter descriptor.
- * @param mode
- *   Flow director mode.
- */
-static void
-fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
-			 struct fdir_flow_desc *desc, enum rte_fdir_mode mode)
-{
-	/* Initialize descriptor. */
-	memset(desc, 0, sizeof(*desc));
-
-	/* Set VLAN ID. */
-	desc->vlan_tag = fdir_filter->input.flow_ext.vlan_tci;
-
-	/* Set MAC address. */
-	if (mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
-		rte_memcpy(desc->mac,
-			   fdir_filter->input.flow.mac_vlan_flow.mac_addr.
-				addr_bytes,
-			   sizeof(desc->mac));
-		desc->type = HASH_RXQ_ETH;
-		return;
-	}
-
-	/* Set mode */
-	switch (fdir_filter->input.flow_type) {
-	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
-		desc->type = HASH_RXQ_UDPV4;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
-		desc->type = HASH_RXQ_TCPV4;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
-		desc->type = HASH_RXQ_IPV4;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
-		desc->type = HASH_RXQ_UDPV6;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
-		desc->type = HASH_RXQ_TCPV6;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
-		desc->type = HASH_RXQ_IPV6;
-		break;
-	default:
-		break;
-	}
-
-	/* Set flow values */
-	switch (fdir_filter->input.flow_type) {
-	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
-	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
-		desc->src_port = fdir_filter->input.flow.udp4_flow.src_port;
-		desc->dst_port = fdir_filter->input.flow.udp4_flow.dst_port;
-		/* fallthrough */
-	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
-		desc->src_ip[0] = fdir_filter->input.flow.ip4_flow.src_ip;
-		desc->dst_ip[0] = fdir_filter->input.flow.ip4_flow.dst_ip;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
-	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
-		desc->src_port = fdir_filter->input.flow.udp6_flow.src_port;
-		desc->dst_port = fdir_filter->input.flow.udp6_flow.dst_port;
-		/* Fall through. */
-	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
-		rte_memcpy(desc->src_ip,
-			   fdir_filter->input.flow.ipv6_flow.src_ip,
-			   sizeof(desc->src_ip));
-		rte_memcpy(desc->dst_ip,
-			   fdir_filter->input.flow.ipv6_flow.dst_ip,
-			   sizeof(desc->dst_ip));
-		break;
-	default:
-		break;
-	}
-}
-
-/**
- * Check if two flow descriptors overlap according to configured mask.
- *
- * @param priv
- *   Private structure that provides flow director mask.
- * @param desc1
- *   First flow descriptor to compare.
- * @param desc2
- *   Second flow descriptor to compare.
- *
- * @return
- *   Nonzero if descriptors overlap.
- */
-static int
-priv_fdir_overlap(const struct priv *priv,
-		  const struct fdir_flow_desc *desc1,
-		  const struct fdir_flow_desc *desc2)
-{
-	const struct rte_eth_fdir_masks *mask =
-		&priv->dev->data->dev_conf.fdir_conf.mask;
-	unsigned int i;
-
-	if (desc1->type != desc2->type)
-		return 0;
-	/* Ignore non masked bits. */
-	for (i = 0; i != RTE_DIM(desc1->mac); ++i)
-		if ((desc1->mac[i] & mask->mac_addr_byte_mask) !=
-		    (desc2->mac[i] & mask->mac_addr_byte_mask))
-			return 0;
-	if (((desc1->src_port & mask->src_port_mask) !=
-	     (desc2->src_port & mask->src_port_mask)) ||
-	    ((desc1->dst_port & mask->dst_port_mask) !=
-	     (desc2->dst_port & mask->dst_port_mask)))
-		return 0;
-	switch (desc1->type) {
-	case HASH_RXQ_IPV4:
-	case HASH_RXQ_UDPV4:
-	case HASH_RXQ_TCPV4:
-		if (((desc1->src_ip[0] & mask->ipv4_mask.src_ip) !=
-		     (desc2->src_ip[0] & mask->ipv4_mask.src_ip)) ||
-		    ((desc1->dst_ip[0] & mask->ipv4_mask.dst_ip) !=
-		     (desc2->dst_ip[0] & mask->ipv4_mask.dst_ip)))
-			return 0;
-		break;
-	case HASH_RXQ_IPV6:
-	case HASH_RXQ_UDPV6:
-	case HASH_RXQ_TCPV6:
-		for (i = 0; i != RTE_DIM(desc1->src_ip); ++i)
-			if (((desc1->src_ip[i] & mask->ipv6_mask.src_ip[i]) !=
-			     (desc2->src_ip[i] & mask->ipv6_mask.src_ip[i])) ||
-			    ((desc1->dst_ip[i] & mask->ipv6_mask.dst_ip[i]) !=
-			     (desc2->dst_ip[i] & mask->ipv6_mask.dst_ip[i])))
-				return 0;
-		break;
-	default:
-		break;
-	}
-	return 1;
-}
-
-/**
- * Create flow director steering rule for a specific filter.
- *
- * @param priv
- *   Private structure.
- * @param mlx5_fdir_filter
- *   Filter to create a steering rule for.
- * @param fdir_queue
- *   Flow director queue for matching packets.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_flow_add(struct priv *priv,
-		   struct mlx5_fdir_filter *mlx5_fdir_filter,
-		   struct fdir_queue *fdir_queue)
-{
-	struct ibv_flow *flow;
-	struct fdir_flow_desc *desc = &mlx5_fdir_filter->desc;
-	enum rte_fdir_mode fdir_mode =
-		priv->dev->data->dev_conf.fdir_conf.mode;
-	struct rte_eth_fdir_masks *mask =
-		&priv->dev->data->dev_conf.fdir_conf.mask;
-	FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, desc->type));
-	struct ibv_flow_attr *attr = &data->attr;
-	uintptr_t spec_offset = (uintptr_t)&data->spec;
-	struct ibv_flow_spec_eth *spec_eth;
-	struct ibv_flow_spec_ipv4 *spec_ipv4;
-	struct ibv_flow_spec_ipv6 *spec_ipv6;
-	struct ibv_flow_spec_tcp_udp *spec_tcp_udp;
-	struct mlx5_fdir_filter *iter_fdir_filter;
-	unsigned int i;
-
-	/* Abort if an existing flow overlaps this one to avoid packet
-	 * duplication, even if it targets another queue. */
-	LIST_FOREACH(iter_fdir_filter, priv->fdir_filter_list, next)
-		if ((iter_fdir_filter != mlx5_fdir_filter) &&
-		    (iter_fdir_filter->flow != NULL) &&
-		    (priv_fdir_overlap(priv,
-				       &mlx5_fdir_filter->desc,
-				       &iter_fdir_filter->desc)))
-			return EEXIST;
-
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec_offset);
-	priv_flow_attr(priv, attr, sizeof(data), desc->type);
-
-	/* Set Ethernet spec */
-	spec_eth = (struct ibv_flow_spec_eth *)spec_offset;
-
-	/* The first specification must be Ethernet. */
-	assert(spec_eth->type == IBV_FLOW_SPEC_ETH);
-	assert(spec_eth->size == sizeof(*spec_eth));
-
-	/* VLAN ID */
-	spec_eth->val.vlan_tag = desc->vlan_tag & mask->vlan_tci_mask;
-	spec_eth->mask.vlan_tag = mask->vlan_tci_mask;
-
-	/* Update priority */
-	attr->priority = 2;
-
-	if (fdir_mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
-		/* MAC Address */
-		for (i = 0; i != RTE_DIM(spec_eth->mask.dst_mac); ++i) {
-			spec_eth->val.dst_mac[i] =
-				desc->mac[i] & mask->mac_addr_byte_mask;
-			spec_eth->mask.dst_mac[i] = mask->mac_addr_byte_mask;
-		}
-		goto create_flow;
-	}
-
-	switch (desc->type) {
-	case HASH_RXQ_IPV4:
-	case HASH_RXQ_UDPV4:
-	case HASH_RXQ_TCPV4:
-		spec_offset += spec_eth->size;
-
-		/* Set IP spec */
-		spec_ipv4 = (struct ibv_flow_spec_ipv4 *)spec_offset;
-
-		/* The second specification must be IP. */
-		assert(spec_ipv4->type == IBV_FLOW_SPEC_IPV4);
-		assert(spec_ipv4->size == sizeof(*spec_ipv4));
-
-		spec_ipv4->val.src_ip =
-			desc->src_ip[0] & mask->ipv4_mask.src_ip;
-		spec_ipv4->val.dst_ip =
-			desc->dst_ip[0] & mask->ipv4_mask.dst_ip;
-		spec_ipv4->mask.src_ip = mask->ipv4_mask.src_ip;
-		spec_ipv4->mask.dst_ip = mask->ipv4_mask.dst_ip;
-
-		/* Update priority */
-		attr->priority = 1;
-
-		if (desc->type == HASH_RXQ_IPV4)
-			goto create_flow;
-
-		spec_offset += spec_ipv4->size;
-		break;
-	case HASH_RXQ_IPV6:
-	case HASH_RXQ_UDPV6:
-	case HASH_RXQ_TCPV6:
-		spec_offset += spec_eth->size;
-
-		/* Set IP spec */
-		spec_ipv6 = (struct ibv_flow_spec_ipv6 *)spec_offset;
-
-		/* The second specification must be IP. */
-		assert(spec_ipv6->type == IBV_FLOW_SPEC_IPV6);
-		assert(spec_ipv6->size == sizeof(*spec_ipv6));
-
-		for (i = 0; i != RTE_DIM(desc->src_ip); ++i) {
-			((uint32_t *)spec_ipv6->val.src_ip)[i] =
-				desc->src_ip[i] & mask->ipv6_mask.src_ip[i];
-			((uint32_t *)spec_ipv6->val.dst_ip)[i] =
-				desc->dst_ip[i] & mask->ipv6_mask.dst_ip[i];
-		}
-		rte_memcpy(spec_ipv6->mask.src_ip,
-			   mask->ipv6_mask.src_ip,
-			   sizeof(spec_ipv6->mask.src_ip));
-		rte_memcpy(spec_ipv6->mask.dst_ip,
-			   mask->ipv6_mask.dst_ip,
-			   sizeof(spec_ipv6->mask.dst_ip));
-
-		/* Update priority */
-		attr->priority = 1;
-
-		if (desc->type == HASH_RXQ_IPV6)
-			goto create_flow;
-
-		spec_offset += spec_ipv6->size;
-		break;
-	default:
-		ERROR("invalid flow attribute type");
-		return EINVAL;
-	}
-
-	/* Set TCP/UDP flow specification. */
-	spec_tcp_udp = (struct ibv_flow_spec_tcp_udp *)spec_offset;
-
-	/* The third specification must be TCP/UDP. */
-	assert(spec_tcp_udp->type == IBV_FLOW_SPEC_TCP ||
-	       spec_tcp_udp->type == IBV_FLOW_SPEC_UDP);
-	assert(spec_tcp_udp->size == sizeof(*spec_tcp_udp));
-
-	spec_tcp_udp->val.src_port = desc->src_port & mask->src_port_mask;
-	spec_tcp_udp->val.dst_port = desc->dst_port & mask->dst_port_mask;
-	spec_tcp_udp->mask.src_port = mask->src_port_mask;
-	spec_tcp_udp->mask.dst_port = mask->dst_port_mask;
-
-	/* Update priority */
-	attr->priority = 0;
-
-create_flow:
-
-	errno = 0;
-	flow = ibv_create_flow(fdir_queue->qp, attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow director configuration failed, errno=%d: %s",
-		      (void *)priv, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-
-	DEBUG("%p: added flow director rule (%p)", (void *)priv, (void *)flow);
-	mlx5_fdir_filter->flow = flow;
-	return 0;
-}
-
-/**
- * Destroy a flow director queue.
- *
- * @param fdir_queue
- *   Flow director queue to be destroyed.
- */
-void
-priv_fdir_queue_destroy(struct priv *priv, struct fdir_queue *fdir_queue)
-{
-	struct mlx5_fdir_filter *fdir_filter;
-
-	/* Disable filter flows still applying to this queue. */
-	LIST_FOREACH(fdir_filter, priv->fdir_filter_list, next) {
-		unsigned int idx = fdir_filter->queue;
-		struct rxq_ctrl *rxq_ctrl =
-			container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
-
-		assert(idx < priv->rxqs_n);
-		if (fdir_queue == rxq_ctrl->fdir_queue &&
-		    fdir_filter->flow != NULL) {
-			claim_zero(ibv_destroy_flow(fdir_filter->flow));
-			fdir_filter->flow = NULL;
-		}
-	}
-	assert(fdir_queue->qp);
-	claim_zero(ibv_destroy_qp(fdir_queue->qp));
-	assert(fdir_queue->ind_table);
-	claim_zero(ibv_destroy_rwq_ind_table(fdir_queue->ind_table));
-	if (fdir_queue->wq)
-		claim_zero(ibv_destroy_wq(fdir_queue->wq));
-	if (fdir_queue->cq)
-		claim_zero(ibv_destroy_cq(fdir_queue->cq));
-#ifndef NDEBUG
-	memset(fdir_queue, 0x2a, sizeof(*fdir_queue));
-#endif
-	rte_free(fdir_queue);
-}
-
-/**
- * Create a flow director queue.
- *
- * @param priv
- *   Private structure.
- * @param wq
- *   Work queue to route matched packets to, NULL if one needs to
- *   be created.
- *
- * @return
- *   Related flow director queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_fdir_queue_create(struct priv *priv, struct ibv_wq *wq,
-		       unsigned int socket)
-{
-	struct fdir_queue *fdir_queue;
-
-	fdir_queue = rte_calloc_socket(__func__, 1, sizeof(*fdir_queue),
-				       0, socket);
-	if (!fdir_queue) {
-		ERROR("cannot allocate flow director queue");
-		return NULL;
-	}
-	assert(priv->pd);
-	assert(priv->ctx);
-	if (!wq) {
-		fdir_queue->cq = ibv_create_cq(
-			priv->ctx, 1, NULL, NULL, 0);
-		if (!fdir_queue->cq) {
-			ERROR("cannot create flow director CQ");
-			goto error;
-		}
-		fdir_queue->wq = ibv_create_wq(
-			priv->ctx,
-			&(struct ibv_wq_init_attr){
-				.wq_type = IBV_WQT_RQ,
-				.max_wr = 1,
-				.max_sge = 1,
-				.pd = priv->pd,
-				.cq = fdir_queue->cq,
-			});
-		if (!fdir_queue->wq) {
-			ERROR("cannot create flow director WQ");
-			goto error;
-		}
-		wq = fdir_queue->wq;
-	}
-	fdir_queue->ind_table = ibv_create_rwq_ind_table(
-		priv->ctx,
-		&(struct ibv_rwq_ind_table_init_attr){
-			.log_ind_tbl_size = 0,
-			.ind_tbl = &wq,
-			.comp_mask = 0,
-		});
-	if (!fdir_queue->ind_table) {
-		ERROR("cannot create flow director indirection table");
-		goto error;
-	}
-	fdir_queue->qp = ibv_create_qp_ex(
-		priv->ctx,
-		&(struct ibv_qp_init_attr_ex){
-			.qp_type = IBV_QPT_RAW_PACKET,
-			.comp_mask =
-				IBV_QP_INIT_ATTR_PD |
-				IBV_QP_INIT_ATTR_IND_TABLE |
-				IBV_QP_INIT_ATTR_RX_HASH,
-			.rx_hash_conf = (struct ibv_rx_hash_conf){
-				.rx_hash_function =
-					IBV_RX_HASH_FUNC_TOEPLITZ,
-				.rx_hash_key_len = rss_hash_default_key_len,
-				.rx_hash_key = rss_hash_default_key,
-				.rx_hash_fields_mask = 0,
-			},
-			.rwq_ind_tbl = fdir_queue->ind_table,
-			.pd = priv->pd,
-		});
-	if (!fdir_queue->qp) {
-		ERROR("cannot create flow director hash RX QP");
-		goto error;
-	}
-	return fdir_queue;
-error:
-	assert(fdir_queue);
-	assert(!fdir_queue->qp);
-	if (fdir_queue->ind_table)
-		claim_zero(ibv_destroy_rwq_ind_table
-			   (fdir_queue->ind_table));
-	if (fdir_queue->wq)
-		claim_zero(ibv_destroy_wq(fdir_queue->wq));
-	if (fdir_queue->cq)
-		claim_zero(ibv_destroy_cq(fdir_queue->cq));
-	rte_free(fdir_queue);
-	return NULL;
-}
-
-/**
- * Get flow director queue for a specific RX queue, create it in case
- * it does not exist.
- *
- * @param priv
- *   Private structure.
- * @param idx
- *   RX queue index.
- *
- * @return
- *   Related flow director queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_get_fdir_queue(struct priv *priv, uint16_t idx)
-{
-	struct rxq_ctrl *rxq_ctrl =
-		container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
-	struct fdir_queue *fdir_queue = rxq_ctrl->fdir_queue;
-
-	assert(rxq_ctrl->wq);
-	if (fdir_queue == NULL) {
-		fdir_queue = priv_fdir_queue_create(priv, rxq_ctrl->wq,
-						    rxq_ctrl->socket);
-		rxq_ctrl->fdir_queue = fdir_queue;
-	}
-	return fdir_queue;
-}
-
-/**
- * Get or flow director drop queue. Create it if it does not exist.
- *
- * @param priv
- *   Private structure.
- *
- * @return
- *   Flow director drop queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_get_fdir_drop_queue(struct priv *priv)
-{
-	struct fdir_queue *fdir_queue = priv->fdir_drop_queue;
-
-	if (fdir_queue == NULL) {
-		unsigned int socket = SOCKET_ID_ANY;
-
-		/* Select a known NUMA socket if possible. */
-		if (priv->rxqs_n && (*priv->rxqs)[0])
-			socket = container_of((*priv->rxqs)[0],
-					      struct rxq_ctrl, rxq)->socket;
-		fdir_queue = priv_fdir_queue_create(priv, NULL, socket);
-		priv->fdir_drop_queue = fdir_queue;
-	}
-	return fdir_queue;
-}
-
-/**
- * Enable flow director filter and create steering rules.
- *
- * @param priv
- *   Private structure.
- * @param mlx5_fdir_filter
- *   Filter to create steering rule for.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_enable(struct priv *priv,
-			struct mlx5_fdir_filter *mlx5_fdir_filter)
-{
-	struct fdir_queue *fdir_queue;
-
-	/* Check if flow already exists. */
-	if (mlx5_fdir_filter->flow != NULL)
-		return 0;
-
-	/* Get fdir_queue for specific queue. */
-	if (mlx5_fdir_filter->behavior == RTE_ETH_FDIR_REJECT)
-		fdir_queue = priv_get_fdir_drop_queue(priv);
-	else
-		fdir_queue = priv_get_fdir_queue(priv,
-						 mlx5_fdir_filter->queue);
-
-	if (fdir_queue == NULL) {
-		ERROR("failed to create flow director rxq for queue %d",
-		      mlx5_fdir_filter->queue);
-		return EINVAL;
-	}
-
-	/* Create flow */
-	return priv_fdir_flow_add(priv, mlx5_fdir_filter, fdir_queue);
-}
-
-/**
- * Initialize flow director filters list.
- *
- * @param priv
- *   Private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-fdir_init_filters_list(struct priv *priv)
-{
-	/* Filter list initialization should be done only once. */
-	if (priv->fdir_filter_list)
-		return 0;
-
-	/* Create filters list. */
-	priv->fdir_filter_list =
-		rte_calloc(__func__, 1, sizeof(*priv->fdir_filter_list), 0);
-
-	if (priv->fdir_filter_list == NULL) {
-		int err = ENOMEM;
-
-		ERROR("cannot allocate flow director filter list: %s",
-		      strerror(err));
-		return err;
-	}
-
-	LIST_INIT(priv->fdir_filter_list);
-
-	return 0;
-}
-
-/**
- * Flush all filters.
- *
- * @param priv
- *   Private structure.
- */
-static void
-priv_fdir_filter_flush(struct priv *priv)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	while ((mlx5_fdir_filter = LIST_FIRST(priv->fdir_filter_list))) {
-		struct ibv_flow *flow = mlx5_fdir_filter->flow;
-
-		DEBUG("%p: flushing flow director filter %p",
-		      (void *)priv, (void *)mlx5_fdir_filter);
-		LIST_REMOVE(mlx5_fdir_filter, next);
-		if (flow != NULL)
-			claim_zero(ibv_destroy_flow(flow));
-		rte_free(mlx5_fdir_filter);
-	}
-}
-
-/**
- * Remove all flow director filters and delete list.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_fdir_delete_filters_list(struct priv *priv)
-{
-	priv_fdir_filter_flush(priv);
-	rte_free(priv->fdir_filter_list);
-	priv->fdir_filter_list = NULL;
-}
-
-/**
- * Disable flow director, remove all steering rules.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_fdir_disable(struct priv *priv)
-{
-	unsigned int i;
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	/* Run on every flow director filter and destroy flow handle. */
-	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
-		struct ibv_flow *flow;
-
-		/* Only valid elements should be in the list */
-		assert(mlx5_fdir_filter != NULL);
-		flow = mlx5_fdir_filter->flow;
-
-		/* Destroy flow handle */
-		if (flow != NULL) {
-			claim_zero(ibv_destroy_flow(flow));
-			mlx5_fdir_filter->flow = NULL;
-		}
-	}
-
-	/* Destroy flow director context in each RX queue. */
-	for (i = 0; (i != priv->rxqs_n); i++) {
-		struct rxq_ctrl *rxq_ctrl;
-
-		if (!(*priv->rxqs)[i])
-			continue;
-		rxq_ctrl = container_of((*priv->rxqs)[i], struct rxq_ctrl, rxq);
-		if (!rxq_ctrl->fdir_queue)
-			continue;
-		priv_fdir_queue_destroy(priv, rxq_ctrl->fdir_queue);
-		rxq_ctrl->fdir_queue = NULL;
-	}
-	if (priv->fdir_drop_queue) {
-		priv_fdir_queue_destroy(priv, priv->fdir_drop_queue);
-		priv->fdir_drop_queue = NULL;
-	}
-}
-
-/**
- * Enable flow director, create steering rules.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_fdir_enable(struct priv *priv)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	/* Run on every fdir filter and create flow handle */
-	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
-		/* Only valid elements should be in the list */
-		assert(mlx5_fdir_filter != NULL);
-
-		priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-	}
-}
-
-/**
- * Find specific filter in list.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Flow director filter to find.
- *
- * @return
- *   Filter element if found, otherwise NULL.
- */
-static struct mlx5_fdir_filter *
-priv_find_filter_in_list(struct priv *priv,
-			 const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct fdir_flow_desc desc;
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-	enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
-
-	/* Get flow director filter to look for. */
-	fdir_filter_to_flow_desc(fdir_filter, &desc, fdir_mode);
-
-	/* Look for the requested element. */
-	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
-		/* Only valid elements should be in the list. */
-		assert(mlx5_fdir_filter != NULL);
-
-		/* Return matching filter. */
-		if (!memcmp(&desc, &mlx5_fdir_filter->desc, sizeof(desc)))
-			return mlx5_fdir_filter;
-	}
-
-	/* Filter not found */
-	return NULL;
-}
-
-/**
- * Add new flow director filter and store it in list.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Flow director filter to add.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_add(struct priv *priv,
-		     const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-	enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
-	int err = 0;
-
-	/* Validate queue number. */
-	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
-		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
-		return EINVAL;
-	}
-
-	/* Duplicate filters are currently unsupported. */
-	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
-	if (mlx5_fdir_filter != NULL) {
-		ERROR("filter already exists");
-		return EINVAL;
-	}
-
-	/* Create new flow director filter. */
-	mlx5_fdir_filter =
-		rte_calloc(__func__, 1, sizeof(*mlx5_fdir_filter), 0);
-	if (mlx5_fdir_filter == NULL) {
-		err = ENOMEM;
-		ERROR("cannot allocate flow director filter: %s",
-		      strerror(err));
-		return err;
-	}
-
-	/* Set action parameters. */
-	mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
-	mlx5_fdir_filter->behavior = fdir_filter->action.behavior;
-
-	/* Convert to mlx5 filter descriptor. */
-	fdir_filter_to_flow_desc(fdir_filter,
-				 &mlx5_fdir_filter->desc, fdir_mode);
-
-	/* Insert new filter into list. */
-	LIST_INSERT_HEAD(priv->fdir_filter_list, mlx5_fdir_filter, next);
-
-	DEBUG("%p: flow director filter %p added",
-	      (void *)priv, (void *)mlx5_fdir_filter);
-
-	/* Enable filter immediately if device is started. */
-	if (priv->started)
-		err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-
-	return err;
-}
-
-/**
- * Update queue for specific filter.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Filter to be updated.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_update(struct priv *priv,
-			const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	/* Validate queue number. */
-	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
-		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
-		return EINVAL;
-	}
-
-	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
-	if (mlx5_fdir_filter != NULL) {
-		struct ibv_flow *flow = mlx5_fdir_filter->flow;
-		int err = 0;
-
-		/* Update queue number. */
-		mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
-
-		/* Destroy flow handle. */
-		if (flow != NULL) {
-			claim_zero(ibv_destroy_flow(flow));
-			mlx5_fdir_filter->flow = NULL;
-		}
-		DEBUG("%p: flow director filter %p updated",
-		      (void *)priv, (void *)mlx5_fdir_filter);
-
-		/* Enable filter if device is started. */
-		if (priv->started)
-			err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-
-		return err;
-	}
-
-	/* Filter not found, create it. */
-	DEBUG("%p: filter not found for update, creating new filter",
-	      (void *)priv);
-	return priv_fdir_filter_add(priv, fdir_filter);
-}
-
-/**
- * Delete specific filter.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Filter to be deleted.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_delete(struct priv *priv,
-			const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
-	if (mlx5_fdir_filter != NULL) {
-		struct ibv_flow *flow = mlx5_fdir_filter->flow;
-
-		/* Remove element from list. */
-		LIST_REMOVE(mlx5_fdir_filter, next);
-
-		/* Destroy flow handle. */
-		if (flow != NULL) {
-			claim_zero(ibv_destroy_flow(flow));
-			mlx5_fdir_filter->flow = NULL;
-		}
-
-		DEBUG("%p: flow director filter %p deleted",
-		      (void *)priv, (void *)mlx5_fdir_filter);
-
-		/* Delete filter. */
-		rte_free(mlx5_fdir_filter);
-
-		return 0;
-	}
-
-	ERROR("%p: flow director delete failed, cannot find filter",
-	      (void *)priv);
-	return EINVAL;
-}
-
-/**
- * Get flow director information.
- *
- * @param priv
- *   Private structure.
- * @param[out] fdir_info
- *   Resulting flow director information.
- */
-static void
-priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
-{
-	struct rte_eth_fdir_masks *mask =
-		&priv->dev->data->dev_conf.fdir_conf.mask;
-
-	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
-	fdir_info->guarant_spc = 0;
-
-	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
-
-	fdir_info->max_flexpayload = 0;
-	fdir_info->flow_types_mask[0] = 0;
-
-	fdir_info->flex_payload_unit = 0;
-	fdir_info->max_flex_payload_segment_num = 0;
-	fdir_info->flex_payload_limit = 0;
-	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
-}
-
-/**
- * Deal with flow director operations.
- *
- * @param priv
- *   Pointer to private structure.
- * @param filter_op
- *   Operation to perform.
- * @param arg
- *   Pointer to operation-specific structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
-{
-	enum rte_fdir_mode fdir_mode =
-		priv->dev->data->dev_conf.fdir_conf.mode;
-	int ret = 0;
-
-	if (filter_op == RTE_ETH_FILTER_NOP)
-		return 0;
-
-	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
-	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
-		ERROR("%p: flow director mode %d not supported",
-		      (void *)priv, fdir_mode);
-		return EINVAL;
-	}
-
-	switch (filter_op) {
-	case RTE_ETH_FILTER_ADD:
-		ret = priv_fdir_filter_add(priv, arg);
-		break;
-	case RTE_ETH_FILTER_UPDATE:
-		ret = priv_fdir_filter_update(priv, arg);
-		break;
-	case RTE_ETH_FILTER_DELETE:
-		ret = priv_fdir_filter_delete(priv, arg);
-		break;
-	case RTE_ETH_FILTER_FLUSH:
-		priv_fdir_filter_flush(priv);
-		break;
-	case RTE_ETH_FILTER_INFO:
-		priv_fdir_info_get(priv, arg);
-		break;
-	default:
-		DEBUG("%p: unknown operation %u", (void *)priv, filter_op);
-		ret = EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static const struct rte_flow_ops mlx5_flow_ops = {
-	.validate = mlx5_flow_validate,
-	.create = mlx5_flow_create,
-	.destroy = mlx5_flow_destroy,
-	.flush = mlx5_flow_flush,
-	.query = NULL,
-	.isolate = mlx5_flow_isolate,
-};
-
-/**
- * Manage filter operations.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param filter_type
- *   Filter type.
- * @param filter_op
- *   Operation to perform.
- * @param arg
- *   Pointer to operation-specific structure.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
-		     enum rte_filter_type filter_type,
-		     enum rte_filter_op filter_op,
-		     void *arg)
-{
-	int ret = EINVAL;
-	struct priv *priv = dev->data->dev_private;
-
-	if (mlx5_is_secondary())
-		return -E_RTE_SECONDARY;
-	switch (filter_type) {
-	case RTE_ETH_FILTER_GENERIC:
-		if (filter_op != RTE_ETH_FILTER_GET)
-			return -EINVAL;
-		*(const void **)arg = &mlx5_flow_ops;
-		return 0;
-	case RTE_ETH_FILTER_FDIR:
-		priv_lock(priv);
-		ret = priv_fdir_ctrl_func(priv, filter_op, arg);
-		priv_unlock(priv);
-		break;
-	default:
-		ERROR("%p: filter type (%d) not supported",
-		      (void *)dev, filter_type);
-		break;
-	}
-
-	return -ret;
-}
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index fb30803..266ae24 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -298,6 +298,49 @@ struct rte_flow_drop {
 	struct ibv_cq *cq; /**< Verbs completion queue. */
 };
 
+static const struct rte_flow_ops mlx5_flow_ops = {
+	.validate = mlx5_flow_validate,
+	.create = mlx5_flow_create,
+	.destroy = mlx5_flow_destroy,
+	.flush = mlx5_flow_flush,
+	.query = NULL,
+	.isolate = mlx5_flow_isolate,
+};
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param filter_type
+ *   Filter type.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
+		     enum rte_filter_type filter_type,
+		     enum rte_filter_op filter_op,
+		     void *arg)
+{
+	int ret = EINVAL;
+
+	if (filter_type == RTE_ETH_FILTER_GENERIC) {
+		if (filter_op != RTE_ETH_FILTER_GET)
+			return -EINVAL;
+		*(const void **)arg = &mlx5_flow_ops;
+		return 0;
+	}
+	ERROR("%p: filter type (%d) not supported",
+	      (void *)dev, filter_type);
+	return -ret;
+}
+
 /**
  * Check support for a given item.
  *
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index b71f72f..5819e92 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -760,8 +760,6 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 {
 	DEBUG("cleaning up %p", (void *)rxq_ctrl);
 	rxq_free_elts(rxq_ctrl);
-	if (rxq_ctrl->fdir_queue != NULL)
-		priv_fdir_queue_destroy(rxq_ctrl->priv, rxq_ctrl->fdir_queue);
 	if (rxq_ctrl->wq != NULL)
 		claim_zero(ibv_destroy_wq(rxq_ctrl->wq));
 	if (rxq_ctrl->cq != NULL)
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 8c689b9..daf9eae 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -78,14 +78,6 @@ struct mlx5_txq_stats {
 	uint64_t oerrors; /**< Total number of failed transmitted packets. */
 };
 
-/* Flow director queue structure. */
-struct fdir_queue {
-	struct ibv_qp *qp; /* Associated RX QP. */
-	struct ibv_rwq_ind_table *ind_table; /* Indirection table. */
-	struct ibv_wq *wq; /* Work queue. */
-	struct ibv_cq *cq; /* Completion queue. */
-};
-
 struct priv;
 
 /* Compressed CQE context. */
@@ -134,7 +126,6 @@ struct rxq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_wq *wq; /* Work Queue. */
-	struct fdir_queue *fdir_queue; /* Flow director queue. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
 	struct ibv_comp_channel *channel;
 	unsigned int socket; /* CPU socket ID for allocations. */
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 51c31aa..0d0f340 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -80,8 +80,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		      (void *)priv, strerror(err));
 		goto error;
 	}
-	if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE)
-		priv_fdir_enable(priv);
 	err = priv_flow_start(priv);
 	if (err) {
 		priv->started = 0;
@@ -135,7 +133,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
-	priv_fdir_disable(priv);
 	priv_flow_stop(priv);
 	priv_rx_intr_vec_disable(priv);
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 03/30] net/mlx5: prefix Rx structures and functions
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 01/30] net/mlx5: merge action and flow parser structure Nelio Laranjeiro
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 02/30] net/mlx5: remove flow director support Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  0:50     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 04/30] net/mlx5: prefix Tx " Nelio Laranjeiro
                     ` (26 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Prefix struct rxq_ctrl and associated functions with mlx5.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c              |  8 ++---
 drivers/net/mlx5/mlx5.h              |  4 +--
 drivers/net/mlx5/mlx5_flow.c         | 12 ++++----
 drivers/net/mlx5/mlx5_rxq.c          | 57 +++++++++++++++++++-----------------
 drivers/net/mlx5/mlx5_rxtx.c         | 14 ++++-----
 drivers/net/mlx5/mlx5_rxtx.h         | 10 +++----
 drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 23 ++++++++-------
 drivers/net/mlx5/mlx5_stats.c        |  2 +-
 drivers/net/mlx5/mlx5_vlan.c         |  5 ++--
 9 files changed, 70 insertions(+), 65 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 9cfb754..77d9def 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -208,14 +208,14 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		/* XXX race condition if mlx5_rx_burst() is still running. */
 		usleep(1000);
 		for (i = 0; (i != priv->rxqs_n); ++i) {
-			struct rxq *rxq = (*priv->rxqs)[i];
-			struct rxq_ctrl *rxq_ctrl;
+			struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
+			struct mlx5_rxq_ctrl *rxq_ctrl;
 
 			if (rxq == NULL)
 				continue;
-			rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+			rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 			(*priv->rxqs)[i] = NULL;
-			rxq_cleanup(rxq_ctrl);
+			mlx5_rxq_cleanup(rxq_ctrl);
 			rte_free(rxq_ctrl);
 		}
 		priv->rxqs_n = 0;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index adac5f4..ddaf227 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -130,7 +130,7 @@ struct priv {
 	/* RX/TX queues. */
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
-	struct rxq *(*rxqs)[]; /* RX queues. */
+	struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */
 	struct txq *(*txqs)[]; /* TX queues. */
 	/* Indirection tables referencing all RX WQs. */
 	struct ibv_rwq_ind_table *(*ind_tables)[];
@@ -290,7 +290,7 @@ int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
 int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
 int priv_flow_start(struct priv *);
 void priv_flow_stop(struct priv *);
-int priv_flow_rxq_in_use(struct priv *, struct rxq *);
+int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *);
 
 /* mlx5_socket.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 266ae24..99dbd8c 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -99,7 +99,7 @@ struct rte_flow {
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
-	struct rxq *rxqs[]; /**< Pointer to the queues array. */
+	struct mlx5_rxq_data *rxqs[]; /**< Pointer to the queues array. */
 };
 
 /** Static initializer for items. */
@@ -1105,10 +1105,10 @@ priv_flow_create_action_queue(struct priv *priv,
 		return NULL;
 	}
 	for (i = 0; i < flow->actions.queues_n; ++i) {
-		struct rxq_ctrl *rxq;
+		struct mlx5_rxq_ctrl *rxq;
 
 		rxq = container_of((*priv->rxqs)[flow->actions.queues[i]],
-				   struct rxq_ctrl, rxq);
+				   struct mlx5_rxq_ctrl, rxq);
 		wqs[i] = rxq->wq;
 		rte_flow->rxqs[i] = &rxq->rxq;
 		++rte_flow->rxqs_n;
@@ -1301,7 +1301,7 @@ priv_flow_destroy(struct priv *priv,
 		claim_zero(ibv_destroy_rwq_ind_table(flow->ind_table));
 	if (flow->mark) {
 		struct rte_flow *tmp;
-		struct rxq *rxq;
+		struct mlx5_rxq_data *rxq;
 		uint32_t mark_n = 0;
 		uint32_t queue_n;
 
@@ -1321,7 +1321,7 @@ priv_flow_destroy(struct priv *priv,
 				for (tqueue_n = 0;
 				     tqueue_n < tmp->rxqs_n;
 				     ++tqueue_n) {
-					struct rxq *trxq;
+					struct mlx5_rxq_data *trxq;
 
 					trxq = tmp->rxqs[tqueue_n];
 					if (rxq == trxq)
@@ -1585,7 +1585,7 @@ priv_flow_start(struct priv *priv)
  *   Nonzero if the queue is used by a flow.
  */
 int
-priv_flow_rxq_in_use(struct priv *priv, struct rxq *rxq)
+priv_flow_rxq_in_use(struct priv *priv, struct mlx5_rxq_data *rxq)
 {
 	struct rte_flow *flow;
 
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 5819e92..6e520fb 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -374,10 +374,10 @@ priv_create_hash_rxqs(struct priv *priv)
 		      priv->reta_idx_n);
 	}
 	for (i = 0; (i != priv->reta_idx_n); ++i) {
-		struct rxq_ctrl *rxq_ctrl;
+		struct mlx5_rxq_ctrl *rxq_ctrl;
 
 		rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
-					struct rxq_ctrl, rxq);
+					struct mlx5_rxq_ctrl, rxq);
 		wqs[i] = rxq_ctrl->wq;
 	}
 	/* Get number of hash RX queues to configure. */
@@ -636,7 +636,7 @@ priv_rehash_flows(struct priv *priv)
  *   0 on success, errno value on failure.
  */
 static int
-rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n)
+rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 {
 	const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
 	unsigned int i;
@@ -678,7 +678,7 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
 	if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
-		struct rxq *rxq = &rxq_ctrl->rxq;
+		struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
 		struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
 
 		assert(rxq->elts_n == rxq->cqe_n);
@@ -720,9 +720,9 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n)
  *   Pointer to RX queue structure.
  */
 static void
-rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
+rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
-	struct rxq *rxq = &rxq_ctrl->rxq;
+	struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
 	const uint16_t q_n = (1 << rxq->elts_n);
 	const uint16_t q_mask = q_n - 1;
 	uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
@@ -756,7 +756,7 @@ rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
  *   Pointer to RX queue structure.
  */
 void
-rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
+mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	DEBUG("cleaning up %p", (void *)rxq_ctrl);
 	rxq_free_elts(rxq_ctrl);
@@ -781,7 +781,7 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
  *   0 on success, errno value on failure.
  */
 static inline int
-rxq_setup(struct rxq_ctrl *tmpl)
+rxq_setup(struct mlx5_rxq_ctrl *tmpl)
 {
 	struct ibv_cq *ibcq = tmpl->cq;
 	struct mlx5dv_cq cq_info;
@@ -848,12 +848,12 @@ rxq_setup(struct rxq_ctrl *tmpl)
  *   0 on success, errno value on failure.
  */
 static int
-rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
+rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 	       uint16_t desc, unsigned int socket,
 	       const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct rxq_ctrl tmpl = {
+	struct mlx5_rxq_ctrl tmpl = {
 		.priv = priv,
 		.socket = socket,
 		.rxq = {
@@ -1072,7 +1072,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
 	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl);
-	rxq_cleanup(rxq_ctrl);
+	mlx5_rxq_cleanup(rxq_ctrl);
 	/* Move mbuf pointers to dedicated storage area in RX queue. */
 	elts = (void *)(rxq_ctrl + 1);
 	rte_memcpy(elts, tmpl.rxq.elts, sizeof(*elts));
@@ -1091,7 +1091,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
 	return 0;
 error:
 	elts = tmpl.rxq.elts;
-	rxq_cleanup(&tmpl);
+	mlx5_rxq_cleanup(&tmpl);
 	rte_free(elts);
 	assert(ret > 0);
 	return ret;
@@ -1122,8 +1122,9 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		    struct rte_mempool *mp)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct rxq *rxq = (*priv->rxqs)[idx];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	const uint16_t desc_n =
 		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
 	int ret;
@@ -1154,7 +1155,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			return -EEXIST;
 		}
 		(*priv->rxqs)[idx] = NULL;
-		rxq_cleanup(rxq_ctrl);
+		mlx5_rxq_cleanup(rxq_ctrl);
 		/* Resize if rxq size is changed. */
 		if (rxq_ctrl->rxq.elts_n != log2above(desc)) {
 			rxq_ctrl = rte_realloc(rxq_ctrl,
@@ -1202,8 +1203,8 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 void
 mlx5_rx_queue_release(void *dpdk_rxq)
 {
-	struct rxq *rxq = (struct rxq *)dpdk_rxq;
-	struct rxq_ctrl *rxq_ctrl;
+	struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
+	struct mlx5_rxq_ctrl *rxq_ctrl;
 	struct priv *priv;
 	unsigned int i;
 
@@ -1212,7 +1213,7 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 
 	if (rxq == NULL)
 		return;
-	rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
 	priv_lock(priv);
 	if (priv_flow_rxq_in_use(priv, rxq))
@@ -1225,7 +1226,7 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 			(*priv->rxqs)[i] = NULL;
 			break;
 		}
-	rxq_cleanup(rxq_ctrl);
+	mlx5_rxq_cleanup(rxq_ctrl);
 	rte_free(rxq_ctrl);
 	priv_unlock(priv);
 }
@@ -1260,9 +1261,9 @@ priv_rx_intr_vec_enable(struct priv *priv)
 	}
 	intr_handle->type = RTE_INTR_HANDLE_EXT;
 	for (i = 0; i != n; ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
-		struct rxq_ctrl *rxq_ctrl =
-			container_of(rxq, struct rxq_ctrl, rxq);
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
+		struct mlx5_rxq_ctrl *rxq_ctrl =
+			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 		int fd;
 		int flags;
 		int rc;
@@ -1328,7 +1329,7 @@ priv_rx_intr_vec_disable(struct priv *priv)
  *     Sequence number per receive queue .
  */
 static inline void
-mlx5_arm_cq(struct rxq *rxq, int sq_n_rxq)
+mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
 {
 	int sq_n = 0;
 	uint32_t doorbell_hi;
@@ -1359,8 +1360,9 @@ int
 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct rxq *rxq = (*priv->rxqs)[rx_queue_id];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	int ret = 0;
 
 	if (!rxq || !rxq_ctrl->channel) {
@@ -1388,8 +1390,9 @@ int
 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct rxq *rxq = (*priv->rxqs)[rx_queue_id];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	struct ibv_cq *ev_cq;
 	void *ev_ctx;
 	int ret;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index c45ebee..ad1071b 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -65,11 +65,11 @@ static __rte_always_inline uint32_t
 rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe);
 
 static __rte_always_inline int
-mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 		 uint16_t cqe_cnt, uint32_t *rss_hash);
 
 static __rte_always_inline uint32_t
-rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe);
+rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe);
 
 uint32_t mlx5_ptype_table[] __rte_cache_aligned = {
 	[0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */
@@ -282,7 +282,7 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
 int
 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct rxq *rxq = rx_queue;
+	struct mlx5_rxq_data *rxq = rx_queue;
 	struct rxq_zip *zip = &rxq->zip;
 	volatile struct mlx5_cqe *cqe;
 	const unsigned int cqe_n = (1 << rxq->cqe_n);
@@ -1647,7 +1647,7 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
  *   with error.
  */
 static inline int
-mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 		 uint16_t cqe_cnt, uint32_t *rss_hash)
 {
 	struct rxq_zip *zip = &rxq->zip;
@@ -1758,7 +1758,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
  *   Offload flags (ol_flags) for struct rte_mbuf.
  */
 static inline uint32_t
-rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
+rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
 {
 	uint32_t ol_flags = 0;
 	uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc);
@@ -1797,7 +1797,7 @@ rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
 uint16_t
 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct rxq *rxq = dpdk_rxq;
+	struct mlx5_rxq_data *rxq = dpdk_rxq;
 	const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
 	const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
 	const unsigned int sges_n = rxq->sges_n;
@@ -2037,7 +2037,7 @@ priv_check_vec_tx_support(struct priv *priv)
 }
 
 int __attribute__((weak))
-rxq_check_vec_support(struct rxq *rxq)
+rxq_check_vec_support(struct mlx5_rxq_data *rxq)
 {
 	(void)rxq;
 	return -ENOTSUP;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index daf9eae..d10868c 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -90,7 +90,7 @@ struct rxq_zip {
 };
 
 /* RX queue descriptor. */
-struct rxq {
+struct mlx5_rxq_data {
 	unsigned int csum:1; /* Enable checksum offloading. */
 	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
 	unsigned int vlan_strip:1; /* Enable VLAN stripping. */
@@ -122,14 +122,14 @@ struct rxq {
 } __rte_cache_aligned;
 
 /* RX queue control descriptor. */
-struct rxq_ctrl {
+struct mlx5_rxq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_wq *wq; /* Work Queue. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
 	struct ibv_comp_channel *channel;
 	unsigned int socket; /* CPU socket ID for allocations. */
-	struct rxq rxq; /* Data path structure. */
+	struct mlx5_rxq_data rxq; /* Data path structure. */
 };
 
 /* Hash RX queue types. */
@@ -294,7 +294,7 @@ int priv_create_hash_rxqs(struct priv *);
 void priv_destroy_hash_rxqs(struct priv *);
 int priv_allow_flow_type(struct priv *, enum hash_rxq_flow_type);
 int priv_rehash_flows(struct priv *);
-void rxq_cleanup(struct rxq_ctrl *);
+void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
 int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_rxconf *, struct rte_mempool *);
 void mlx5_rx_queue_release(void *);
@@ -331,7 +331,7 @@ int mlx5_tx_descriptor_status(void *, uint16_t);
 /* Vectorized version of mlx5_rxtx.c */
 int priv_check_raw_vec_tx_support(struct priv *);
 int priv_check_vec_tx_support(struct priv *);
-int rxq_check_vec_support(struct rxq *);
+int rxq_check_vec_support(struct mlx5_rxq_data *);
 int priv_check_vec_rx_support(struct priv *);
 uint16_t mlx5_tx_burst_raw_vec(void *, struct rte_mbuf **, uint16_t);
 uint16_t mlx5_tx_burst_vec(void *, struct rte_mbuf **, uint16_t);
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index 33988e3..fbdd14e 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -518,7 +518,7 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Number of packets to be stored.
  */
 static inline void
-rxq_copy_mbuf_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t n)
+rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n)
 {
 	const uint16_t q_mask = (1 << rxq->elts_n) - 1;
 	struct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask];
@@ -544,7 +544,7 @@ rxq_copy_mbuf_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t n)
  *   Number of buffers to be replenished.
  */
 static inline void
-rxq_replenish_bulk_mbuf(struct rxq *rxq, uint16_t n)
+rxq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)
 {
 	const uint16_t q_n = 1 << rxq->elts_n;
 	const uint16_t q_mask = q_n - 1;
@@ -583,7 +583,7 @@ rxq_replenish_bulk_mbuf(struct rxq *rxq, uint16_t n)
  *   the title completion descriptor to be copied to the rest of mbufs.
  */
 static inline void
-rxq_cq_decompress_v(struct rxq *rxq,
+rxq_cq_decompress_v(struct mlx5_rxq_data *rxq,
 		    volatile struct mlx5_cqe *cq,
 		    struct rte_mbuf **elts)
 {
@@ -742,8 +742,8 @@ rxq_cq_decompress_v(struct rxq *rxq,
  *   Pointer to array of packets to be filled.
  */
 static inline void
-rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
-			 struct rte_mbuf **pkts)
+rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
+			 __m128i op_err, struct rte_mbuf **pkts)
 {
 	__m128i pinfo0, pinfo1;
 	__m128i pinfo, ptype;
@@ -877,7 +877,7 @@ rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
  *   Number of packets successfully received (<= pkts_n).
  */
 static uint16_t
-rxq_handle_pending_error(struct rxq *rxq, struct rte_mbuf **pkts,
+rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
 			 uint16_t pkts_n)
 {
 	uint16_t n = 0;
@@ -924,7 +924,7 @@ rxq_handle_pending_error(struct rxq *rxq, struct rte_mbuf **pkts,
  *   Number of packets received including errors (<= pkts_n).
  */
 static inline uint16_t
-rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
 	const uint16_t q_n = 1 << rxq->cqe_n;
 	const uint16_t q_mask = q_n - 1;
@@ -1270,7 +1270,7 @@ rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 uint16_t
 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct rxq *rxq = dpdk_rxq;
+	struct mlx5_rxq_data *rxq = dpdk_rxq;
 	uint16_t nb_rx;
 
 	nb_rx = rxq_burst_v(rxq, pkts, pkts_n);
@@ -1336,9 +1336,10 @@ priv_check_vec_tx_support(struct priv *priv)
  *   1 if supported, negative errno value if not.
  */
 int __attribute__((cold))
-rxq_check_vec_support(struct rxq *rxq)
+rxq_check_vec_support(struct mlx5_rxq_data *rxq)
 {
-	struct rxq_ctrl *ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_ctrl *ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 
 	if (!ctrl->priv->rx_vec_en || rxq->sges_n != 0)
 		return -ENOTSUP;
@@ -1363,7 +1364,7 @@ priv_check_vec_rx_support(struct priv *priv)
 		return -ENOTSUP;
 	/* All the configured queues should support. */
 	for (i = 0; i < priv->rxqs_n; ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
 
 		if (rxq_check_vec_support(rxq) < 0)
 			break;
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 06348c8..3de3af8 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -329,7 +329,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	priv_lock(priv);
 	/* Add software counters. */
 	for (i = 0; (i != priv->rxqs_n); ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
 
 		if (rxq == NULL)
 			continue;
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 36ffbba..0d91591 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -137,8 +137,9 @@ mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 static void
 priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
 {
-	struct rxq *rxq = (*priv->rxqs)[idx];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	struct ibv_wq_attr mod;
 	uint16_t vlan_offloads =
 		(on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 04/30] net/mlx5: prefix Tx structures and functions
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (2 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 03/30] net/mlx5: prefix Rx structures and functions Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  0:50     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 05/30] net/mlx5: remove redundant started flag Nelio Laranjeiro
                     ` (25 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Prefix struct txq_ctrl and associated function with mlx5.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c              |  8 +++----
 drivers/net/mlx5/mlx5.h              |  2 +-
 drivers/net/mlx5/mlx5_mr.c           | 12 ++++++----
 drivers/net/mlx5/mlx5_rxtx.c         | 25 ++++++++++----------
 drivers/net/mlx5/mlx5_rxtx.h         | 27 +++++++++++-----------
 drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 17 +++++++-------
 drivers/net/mlx5/mlx5_stats.c        |  2 +-
 drivers/net/mlx5/mlx5_txq.c          | 45 ++++++++++++++++++------------------
 8 files changed, 72 insertions(+), 66 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 77d9def..4da0524 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -225,14 +225,14 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		/* XXX race condition if mlx5_tx_burst() is still running. */
 		usleep(1000);
 		for (i = 0; (i != priv->txqs_n); ++i) {
-			struct txq *txq = (*priv->txqs)[i];
-			struct txq_ctrl *txq_ctrl;
+			struct mlx5_txq_data *txq = (*priv->txqs)[i];
+			struct mlx5_txq_ctrl *txq_ctrl;
 
 			if (txq == NULL)
 				continue;
-			txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+			txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 			(*priv->txqs)[i] = NULL;
-			txq_cleanup(txq_ctrl);
+			mlx5_txq_cleanup(txq_ctrl);
 			rte_free(txq_ctrl);
 		}
 		priv->txqs_n = 0;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ddaf227..1b511e1 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -131,7 +131,7 @@ struct priv {
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
 	struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */
-	struct txq *(*txqs)[]; /* TX queues. */
+	struct mlx5_txq_data *(*txqs)[]; /* TX queues. */
 	/* Indirection tables referencing all RX WQs. */
 	struct ibv_rwq_ind_table *(*ind_tables)[];
 	unsigned int ind_tables_n; /* Number of indirection tables. */
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 9a9f73a..6199746 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -175,9 +175,11 @@ mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
  *   mr->lkey on success, (uint32_t)-1 on failure.
  */
 uint32_t
-txq_mp2mr_reg(struct txq *txq, struct rte_mempool *mp, unsigned int idx)
+mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
+		   unsigned int idx)
 {
-	struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
 	struct ibv_mr *mr;
 
 	/* Add a new entry, register MR first. */
@@ -253,9 +255,9 @@ txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
  *   Pointer to TX queue structure.
  */
 void
-txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
+mlx5_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
 {
-	struct txq_ctrl *txq_ctrl = arg;
+	struct mlx5_txq_ctrl *txq_ctrl = arg;
 	struct txq_mp2mr_mbuf_check_data data = {
 		.ret = 0,
 	};
@@ -283,5 +285,5 @@ txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
 		    end <= (uintptr_t)mr->addr + mr->length)
 			return;
 	}
-	txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
+	mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
 }
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ad1071b..9389383 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -200,7 +200,7 @@ mlx5_set_ptype_table(void)
  *   Size of tailroom.
  */
 static inline size_t
-tx_mlx5_wq_tailroom(struct txq *txq, void *addr)
+tx_mlx5_wq_tailroom(struct mlx5_txq_data *txq, void *addr)
 {
 	size_t tailroom;
 	tailroom = (uintptr_t)(txq->wqes) +
@@ -258,7 +258,7 @@ mlx5_copy_to_wq(void *dst, const void *src, size_t n,
 int
 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
 {
-	struct txq *txq = tx_queue;
+	struct mlx5_txq_data *txq = tx_queue;
 	uint16_t used;
 
 	mlx5_tx_complete(txq);
@@ -334,7 +334,7 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
 uint16_t
 mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -747,7 +747,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Packet length.
  */
 static inline void
-mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
+mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
 	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 	volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
@@ -787,7 +787,7 @@ mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
  *   Pointer to MPW session structure.
  */
 static inline void
-mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_mpw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
 {
 	unsigned int num = mpw->pkts_n;
 
@@ -821,7 +821,7 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
 uint16_t
 mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -964,7 +964,8 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Packet length.
  */
 static inline void
-mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
+mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw,
+		    uint32_t length)
 {
 	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 	struct mlx5_wqe_inl_small *inl;
@@ -999,7 +1000,7 @@ mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
  *   Pointer to MPW session structure.
  */
 static inline void
-mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_mpw_inline_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
 {
 	unsigned int size;
 	struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *)
@@ -1034,7 +1035,7 @@ uint16_t
 mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 			 uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -1260,7 +1261,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
  *   Packet length.
  */
 static inline void
-mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
+mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding)
 {
 	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 
@@ -1302,7 +1303,7 @@ mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
  *   Number of consumed WQEs.
  */
 static inline uint16_t
-mlx5_empw_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
 {
 	uint16_t ret;
 
@@ -1333,7 +1334,7 @@ mlx5_empw_close(struct txq *txq, struct mlx5_mpw *mpw)
 uint16_t
 mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index d10868c..4f877cb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -233,7 +233,7 @@ struct hash_rxq {
 
 /* TX queue descriptor. */
 __extension__
-struct txq {
+struct mlx5_txq_data {
 	uint16_t elts_head; /* Current counter in (*elts)[]. */
 	uint16_t elts_tail; /* Counter of first element awaiting completion. */
 	uint16_t elts_comp; /* Counter since last completion request. */
@@ -271,12 +271,12 @@ struct txq {
 } __rte_cache_aligned;
 
 /* TX queue control descriptor. */
-struct txq_ctrl {
+struct mlx5_txq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_qp *qp; /* Queue Pair. */
 	unsigned int socket; /* CPU socket ID for allocations. */
-	struct txq txq; /* Data path structure. */
+	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 };
 
@@ -305,9 +305,9 @@ int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 
 /* mlx5_txq.c */
 
-void txq_cleanup(struct txq_ctrl *);
-int txq_ctrl_setup(struct rte_eth_dev *, struct txq_ctrl *, uint16_t,
-		   unsigned int, const struct rte_eth_txconf *);
+void mlx5_txq_cleanup(struct mlx5_txq_ctrl *);
+int mlx5_txq_ctrl_setup(struct rte_eth_dev *, struct mlx5_txq_ctrl *, uint16_t,
+			unsigned int, const struct rte_eth_txconf *);
 int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_txconf *);
 void mlx5_tx_queue_release(void *);
@@ -340,8 +340,9 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
 /* mlx5_mr.c */
 
 struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *);
-void txq_mp2mr_iter(struct rte_mempool *, void *);
-uint32_t txq_mp2mr_reg(struct txq *, struct rte_mempool *, unsigned int);
+void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *);
+uint32_t mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
+			    unsigned int);
 
 #ifndef NDEBUG
 /**
@@ -439,7 +440,7 @@ check_cqe(volatile struct mlx5_cqe *cqe,
  *   WQE address.
  */
 static inline uintptr_t *
-tx_mlx5_wqe(struct txq *txq, uint16_t ci)
+tx_mlx5_wqe(struct mlx5_txq_data *txq, uint16_t ci)
 {
 	ci &= ((1 << txq->wqe_n) - 1);
 	return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
@@ -454,7 +455,7 @@ tx_mlx5_wqe(struct txq *txq, uint16_t ci)
  *   Pointer to TX queue structure.
  */
 static __rte_always_inline void
-mlx5_tx_complete(struct txq *txq)
+mlx5_tx_complete(struct mlx5_txq_data *txq)
 {
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -559,7 +560,7 @@ mlx5_tx_mb2mp(struct rte_mbuf *buf)
  *   mr->lkey on success, (uint32_t)-1 on failure.
  */
 static __rte_always_inline uint32_t
-mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
+mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
 {
 	uint16_t i = txq->mr_cache_idx;
 	uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
@@ -582,7 +583,7 @@ mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
 		}
 	}
 	txq->mr_cache_idx = 0;
-	return txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+	return mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
 }
 
 /**
@@ -594,7 +595,7 @@ mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
  *   Pointer to the last WQE posted in the NIC.
  */
 static __rte_always_inline void
-mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
+mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe)
 {
 	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index fbdd14e..2750eac 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -76,7 +76,7 @@
  *   Number of packets to be filled.
  */
 static inline void
-txq_wr_dseg_v(struct txq *txq, __m128i *dseg,
+txq_wr_dseg_v(struct mlx5_txq_data *txq, __m128i *dseg,
 	      struct rte_mbuf **pkts, unsigned int n)
 {
 	unsigned int pos;
@@ -151,8 +151,8 @@ txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Number of packets having same ol_flags.
  */
 static inline unsigned int
-txq_calc_offload(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
-		 uint8_t *cs_flags)
+txq_calc_offload(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
+		 uint16_t pkts_n, uint8_t *cs_flags)
 {
 	unsigned int pos;
 	const uint64_t ol_mask =
@@ -202,7 +202,8 @@ txq_calc_offload(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
  *   Number of packets successfully transmitted (<= pkts_n).
  */
 static uint16_t
-txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
+	      uint16_t pkts_n)
 {
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
@@ -332,7 +333,7 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Number of packets successfully transmitted (<= pkts_n).
  */
 static inline uint16_t
-txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
+txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	    uint8_t cs_flags)
 {
 	struct rte_mbuf **elts;
@@ -448,7 +449,7 @@ uint16_t
 mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
 		      uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t nb_tx = 0;
 
 	while (pkts_n > nb_tx) {
@@ -480,7 +481,7 @@ mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
 uint16_t
 mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t nb_tx = 0;
 
 	while (pkts_n > nb_tx) {
@@ -1295,7 +1296,7 @@ priv_check_raw_vec_tx_support(struct priv *priv)
 
 	/* All the configured queues should support. */
 	for (i = 0; i < priv->txqs_n; ++i) {
-		struct txq *txq = (*priv->txqs)[i];
+		struct mlx5_txq_data *txq = (*priv->txqs)[i];
 
 		if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) ||
 		    !(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 3de3af8..6b4772c 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -350,7 +350,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		tmp.rx_nombuf += rxq->stats.rx_nombuf;
 	}
 	for (i = 0; (i != priv->txqs_n); ++i) {
-		struct txq *txq = (*priv->txqs)[i];
+		struct mlx5_txq_data *txq = (*priv->txqs)[i];
 
 		if (txq == NULL)
 			continue;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1b45b4a..ee9f703 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -69,7 +69,7 @@
  *   Number of elements to allocate.
  */
 static void
-txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
+txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl, unsigned int elts_n)
 {
 	unsigned int i;
 
@@ -95,7 +95,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
  *   Pointer to TX queue structure.
  */
 static void
-txq_free_elts(struct txq_ctrl *txq_ctrl)
+txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 {
 	const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -132,7 +132,7 @@ txq_free_elts(struct txq_ctrl *txq_ctrl)
  *   Pointer to TX queue structure.
  */
 void
-txq_cleanup(struct txq_ctrl *txq_ctrl)
+mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl)
 {
 	size_t i;
 
@@ -162,7 +162,7 @@ txq_cleanup(struct txq_ctrl *txq_ctrl)
  *   0 on success, errno value on failure.
  */
 static inline int
-txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
+txq_setup(struct mlx5_txq_ctrl *tmpl, struct mlx5_txq_ctrl *txq_ctrl)
 {
 	struct mlx5dv_qp qp;
 	struct ibv_cq *ibcq = tmpl->cq;
@@ -225,12 +225,12 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
  *   0 on success, errno value on failure.
  */
 int
-txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
-	       uint16_t desc, unsigned int socket,
-	       const struct rte_eth_txconf *conf)
+mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
+		    uint16_t desc, unsigned int socket,
+		    const struct rte_eth_txconf *conf)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct txq_ctrl tmpl = {
+	struct mlx5_txq_ctrl tmpl = {
 		.priv = priv,
 		.socket = socket,
 	};
@@ -422,15 +422,15 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
 	}
 	/* Clean up txq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old txq just in case", (void *)txq_ctrl);
-	txq_cleanup(txq_ctrl);
+	mlx5_txq_cleanup(txq_ctrl);
 	*txq_ctrl = tmpl;
 	DEBUG("%p: txq updated with %p", (void *)txq_ctrl, (void *)&tmpl);
 	/* Pre-register known mempools. */
-	rte_mempool_walk(txq_mp2mr_iter, txq_ctrl);
+	rte_mempool_walk(mlx5_txq_mp2mr_iter, txq_ctrl);
 	assert(ret == 0);
 	return 0;
 error:
-	txq_cleanup(&tmpl);
+	mlx5_txq_cleanup(&tmpl);
 	assert(ret > 0);
 	return ret;
 }
@@ -457,8 +457,9 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		    unsigned int socket, const struct rte_eth_txconf *conf)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct txq *txq = (*priv->txqs)[idx];
-	struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+	struct mlx5_txq_data *txq = (*priv->txqs)[idx];
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
 	int ret;
 
 	if (mlx5_is_secondary())
@@ -494,7 +495,7 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			return -EEXIST;
 		}
 		(*priv->txqs)[idx] = NULL;
-		txq_cleanup(txq_ctrl);
+		mlx5_txq_cleanup(txq_ctrl);
 		/* Resize if txq size is changed. */
 		if (txq_ctrl->txq.elts_n != log2above(desc)) {
 			txq_ctrl = rte_realloc(txq_ctrl,
@@ -521,7 +522,7 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			return -ENOMEM;
 		}
 	}
-	ret = txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
+	ret = mlx5_txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
 	if (ret)
 		rte_free(txq_ctrl);
 	else {
@@ -543,8 +544,8 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 void
 mlx5_tx_queue_release(void *dpdk_txq)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
-	struct txq_ctrl *txq_ctrl;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
 	struct priv *priv;
 	unsigned int i;
 
@@ -553,7 +554,7 @@ mlx5_tx_queue_release(void *dpdk_txq)
 
 	if (txq == NULL)
 		return;
-	txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+	txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 	priv = txq_ctrl->priv;
 	priv_lock(priv);
 	for (i = 0; (i != priv->txqs_n); ++i)
@@ -563,7 +564,7 @@ mlx5_tx_queue_release(void *dpdk_txq)
 			(*priv->txqs)[i] = NULL;
 			break;
 		}
-	txq_cleanup(txq_ctrl);
+	mlx5_txq_cleanup(txq_ctrl);
 	rte_free(txq_ctrl);
 	priv_unlock(priv);
 }
@@ -588,8 +589,8 @@ priv_tx_uar_remap(struct priv *priv, int fd)
 	unsigned int pages_n = 0;
 	uintptr_t uar_va;
 	void *addr;
-	struct txq *txq;
-	struct txq_ctrl *txq_ctrl;
+	struct mlx5_txq_data *txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
 	int already_mapped;
 	size_t page_size = sysconf(_SC_PAGESIZE);
 
@@ -600,7 +601,7 @@ priv_tx_uar_remap(struct priv *priv, int fd)
 	 */
 	for (i = 0; i != priv->txqs_n; ++i) {
 		txq = (*priv->txqs)[i];
-		txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 		uar_va = (uintptr_t)txq_ctrl->txq.bf_reg;
 		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size);
 		already_mapped = 0;
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 05/30] net/mlx5: remove redundant started flag
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (3 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 04/30] net/mlx5: prefix Tx " Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  0:50     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 06/30] net/mlx5: verify all flows are been removed on close Nelio Laranjeiro
                     ` (24 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
This flag is already present in the Ethernet device.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.h         |  1 -
 drivers/net/mlx5/mlx5_flow.c    |  6 +++---
 drivers/net/mlx5/mlx5_rxq.c     |  2 +-
 drivers/net/mlx5/mlx5_trigger.c | 12 ------------
 drivers/net/mlx5/mlx5_txq.c     |  2 +-
 5 files changed, 5 insertions(+), 18 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1b511e1..3c58f7a 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -104,7 +104,6 @@ struct priv {
 	/* Device properties. */
 	uint16_t mtu; /* Configured MTU. */
 	uint8_t port; /* Physical port number. */
-	unsigned int started:1; /* Device started, flows enabled. */
 	unsigned int promisc_req:1; /* Promiscuous mode requested. */
 	unsigned int allmulti_req:1; /* All multicast mode requested. */
 	unsigned int hw_csum:1; /* Checksum offload is supported. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 99dbd8c..3504c43 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1052,7 +1052,7 @@ priv_flow_create_action_queue_drop(struct priv *priv,
 	++flow->ibv_attr->num_of_specs;
 	flow->offset += sizeof(struct ibv_flow_spec_action_drop);
 	rte_flow->ibv_attr = flow->ibv_attr;
-	if (!priv->started)
+	if (!priv->dev->data->dev_started)
 		return rte_flow;
 	rte_flow->qp = priv->flow_drop_queue->qp;
 	rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
@@ -1158,7 +1158,7 @@ priv_flow_create_action_queue(struct priv *priv,
 				   NULL, "cannot allocate QP");
 		goto error;
 	}
-	if (!priv->started)
+	if (!priv->dev->data->dev_started)
 		return rte_flow;
 	rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
 					     rte_flow->ibv_attr);
@@ -1618,7 +1618,7 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 	struct priv *priv = dev->data->dev_private;
 
 	priv_lock(priv);
-	if (priv->started) {
+	if (dev->data->dev_started) {
 		rte_flow_error_set(error, EBUSY,
 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 				   NULL,
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 6e520fb..683a4a7 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1150,7 +1150,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	if (rxq != NULL) {
 		DEBUG("%p: reusing already allocated queue index %u (%p)",
 		      (void *)dev, idx, (void *)rxq);
-		if (priv->started) {
+		if (dev->data->dev_started) {
 			priv_unlock(priv);
 			return -EEXIST;
 		}
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 0d0f340..212b4df 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -61,10 +61,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		return -E_RTE_SECONDARY;
 
 	priv_lock(priv);
-	if (priv->started) {
-		priv_unlock(priv);
-		return 0;
-	}
 	/* Update Rx/Tx callback. */
 	priv_dev_select_tx_function(priv, dev);
 	priv_dev_select_rx_function(priv, dev);
@@ -72,8 +68,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	err = priv_create_hash_rxqs(priv);
 	if (!err)
 		err = priv_rehash_flows(priv);
-	if (!err)
-		priv->started = 1;
 	else {
 		ERROR("%p: an error occurred while configuring hash RX queues:"
 		      " %s",
@@ -82,7 +76,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	}
 	err = priv_flow_start(priv);
 	if (err) {
-		priv->started = 0;
 		ERROR("%p: an error occurred while configuring flows:"
 		      " %s",
 		      (void *)priv, strerror(err));
@@ -125,10 +118,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 		return;
 
 	priv_lock(priv);
-	if (!priv->started) {
-		priv_unlock(priv);
-		return;
-	}
 	DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
@@ -136,6 +125,5 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_flow_stop(priv);
 	priv_rx_intr_vec_disable(priv);
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-	priv->started = 0;
 	priv_unlock(priv);
 }
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index ee9f703..f551f87 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -490,7 +490,7 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	if (txq != NULL) {
 		DEBUG("%p: reusing already allocated queue index %u (%p)",
 		      (void *)dev, idx, (void *)txq);
-		if (priv->started) {
+		if (dev->data->dev_started) {
 			priv_unlock(priv);
 			return -EEXIST;
 		}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 06/30] net/mlx5: verify all flows are been removed on close
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (4 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 05/30] net/mlx5: remove redundant started flag Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  0:50     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 07/30] net/mlx5: fix reta update can segfault Nelio Laranjeiro
                     ` (23 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Debug tools to verify all flows are be un-register from the NIC.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c      |  4 ++++
 drivers/net/mlx5/mlx5.h      |  1 +
 drivers/net/mlx5/mlx5_flow.c | 22 ++++++++++++++++++++++
 3 files changed, 27 insertions(+)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 4da0524..6d17d30 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -190,6 +190,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 {
 	struct priv *priv = mlx5_get_priv(dev);
 	unsigned int i;
+	int ret;
 
 	priv_lock(priv);
 	DEBUG("%p: closing device \"%s\"",
@@ -252,6 +253,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
 	priv_socket_uninit(priv);
+	ret = priv_flow_verify(priv);
+	if (ret)
+		WARN("%p: some flows still remain", (void *)priv);
 	priv_unlock(priv);
 	memset(priv, 0, sizeof(*priv));
 }
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 3c58f7a..c6563bd 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -290,6 +290,7 @@ int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
 int priv_flow_start(struct priv *);
 void priv_flow_stop(struct priv *);
 int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *);
+int priv_flow_verify(struct priv *);
 
 /* mlx5_socket.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 3504c43..193a90b 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1630,3 +1630,25 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 	priv_unlock(priv);
 	return 0;
 }
+
+/**
+ * Verify the flow list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of flows not released.
+ */
+int
+priv_flow_verify(struct priv *priv)
+{
+	struct rte_flow *flow;
+	int ret = 0;
+
+	TAILQ_FOREACH(flow, &priv->flows, next) {
+		DEBUG("%p: flow %p still referenced", (void *)priv,
+		      (void *)flow);
+		++ret;
+	}
+	return ret;
+}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 07/30] net/mlx5: fix reta update can segfault
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (5 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 06/30] net/mlx5: verify all flows are been removed on close Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  0:51     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 08/30] net/mlx5: fix rxqs vector support verification Nelio Laranjeiro
                     ` (22 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit, stable
Reta update needs to stop/start the port but stopping the port does not
disable the polling functions which may end in a segfault if a core is
polling the queue while the control thread is modifying it.
This patch changes the sequences to an order where such situation cannot
happen.
Fixes: aa13338faf5e ("net/mlx5: rebuild flows on updating RETA")
Cc: yskoh@mellanox.com
Cc: stable@dpdk.org
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
      | 9 +++++----
 drivers/net/mlx5/mlx5_trigger.c | 7 +++++++
 2 files changed, 12 insertions(+), 4 deletions(-)
 --git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index d3d2603..8942879 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -351,11 +351,12 @@ mlx5_dev_rss_reta_update(struct rte_eth_dev *dev,
 	struct priv *priv = dev->data->dev_private;
 
 	assert(!mlx5_is_secondary());
-	mlx5_dev_stop(dev);
 	priv_lock(priv);
 	ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
 	priv_unlock(priv);
-	if (ret)
-		return -ret;
-	return mlx5_dev_start(dev);
+	if (dev->data->dev_started) {
+		mlx5_dev_stop(dev);
+		mlx5_dev_start(dev);
+	}
+	return -ret;
 }
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 212b4df..eeb9585 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -30,6 +30,7 @@
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
+#include <unistd.h>
 
 #include <rte_ether.h>
 #include <rte_ethdev.h>
@@ -118,6 +119,12 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 		return;
 
 	priv_lock(priv);
+	dev->data->dev_started = 0;
+	/* Prevent crashes when queues are still in use. */
+	dev->rx_pkt_burst = removed_rx_burst;
+	dev->tx_pkt_burst = removed_tx_burst;
+	rte_wmb();
+	usleep(1000 * priv->rxqs_n);
 	DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 07/30] net/mlx5: fix reta update can segfault
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 07/30] net/mlx5: fix reta update can segfault Nelio Laranjeiro
@ 2017-10-06  0:51     ` Yongseok Koh
  0 siblings, 0 replies; 129+ messages in thread
From: Yongseok Koh @ 2017-10-06  0:51 UTC (permalink / raw)
  To: Nélio Laranjeiro; +Cc: dev, Adrien Mazarguil, ferruh.yigit, stable
> On Oct 5, 2017, at 5:49 AM, Nelio Laranjeiro <nelio.laranjeiro@6wind.com> wrote:
> 
> Reta update needs to stop/start the port but stopping the port does not
> disable the polling functions which may end in a segfault if a core is
> polling the queue while the control thread is modifying it.
> 
> This patch changes the sequences to an order where such situation cannot
> happen.
> 
> Fixes: aa13338faf5e ("net/mlx5: rebuild flows on updating RETA")
> Cc: yskoh@mellanox.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> ---
Acked-by: Yongseok Koh <yskoh@mellanox.com>
 
Thanks
^ permalink raw reply	[flat|nested] 129+ messages in thread
 
- * [dpdk-dev] [PATCH v2 08/30] net/mlx5: fix rxqs vector support verification
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (6 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 07/30] net/mlx5: fix reta update can segfault Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  0:51     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 09/30] net/mlx5: add reference counter on memory region Nelio Laranjeiro
                     ` (21 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit, stable
The number of queues in DPDK does not means that the array of queue will be
totally filled, those information are uncorrelated.  The number of queues
is provided in the port configuration whereas the array is filled by
calling tx/rx_queue_setup().  As this number of queue is not increased or
decrease according to tx/rx_queue_setup() or tx/rx_queue_release(), PMD
must consider a queue may not be initialised in some position of the array.
Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86")
Cc: yskoh@mellanox.com
Cc: stable@dpdk.org
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 2 ++
 1 file changed, 2 insertions(+)
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index 2750eac..20ea38e 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -1367,6 +1367,8 @@ priv_check_vec_rx_support(struct priv *priv)
 	for (i = 0; i < priv->rxqs_n; ++i) {
 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
 
+		if (!rxq)
+			continue;
 		if (rxq_check_vec_support(rxq) < 0)
 			break;
 	}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 08/30] net/mlx5: fix rxqs vector support verification
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 08/30] net/mlx5: fix rxqs vector support verification Nelio Laranjeiro
@ 2017-10-06  0:51     ` Yongseok Koh
  0 siblings, 0 replies; 129+ messages in thread
From: Yongseok Koh @ 2017-10-06  0:51 UTC (permalink / raw)
  To: Nélio Laranjeiro; +Cc: dev, Adrien Mazarguil, ferruh.yigit, stable
> On Oct 5, 2017, at 5:49 AM, Nelio Laranjeiro <nelio.laranjeiro@6wind.com> wrote:
> 
> The number of queues in DPDK does not means that the array of queue will be
> totally filled, those information are uncorrelated.  The number of queues
> is provided in the port configuration whereas the array is filled by
> calling tx/rx_queue_setup().  As this number of queue is not increased or
> decrease according to tx/rx_queue_setup() or tx/rx_queue_release(), PMD
> must consider a queue may not be initialised in some position of the array.
> 
> Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86")
> Cc: yskoh@mellanox.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> ---
Acked-by: Yongseok Koh <yskoh@mellanox.com>
 
Thanks
^ permalink raw reply	[flat|nested] 129+ messages in thread
 
- * [dpdk-dev] [PATCH v2 09/30] net/mlx5: add reference counter on memory region
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (7 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 08/30] net/mlx5: fix rxqs vector support verification Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  1:11     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 10/30] net/mlx5: separate DPDK from Verbs Rx queue objects Nelio Laranjeiro
                     ` (20 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
This patch introduce the Memory region as a shared object where users
should get a reference to it by calling the priv_mr_get() or priv_mr_new()
to create the memory region.  This last one will register the memory pool
in the kernel driver and retrieve the associated memory region.
This should help to reduce the memory consumption cause by registering
multiple times the same memory pool.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c      |   3 +
 drivers/net/mlx5/mlx5.h      |   8 ++
 drivers/net/mlx5/mlx5_mr.c   | 210 ++++++++++++++++++++++++++++++-------------
 drivers/net/mlx5/mlx5_rxq.c  |  17 ++--
 drivers/net/mlx5/mlx5_rxtx.h |  52 +++++++----
 drivers/net/mlx5/mlx5_txq.c  |   8 +-
 6 files changed, 206 insertions(+), 92 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 6d17d30..eb0d6c5 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -256,6 +256,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	ret = priv_flow_verify(priv);
 	if (ret)
 		WARN("%p: some flows still remain", (void *)priv);
+	ret = priv_mr_verify(priv);
+	if (ret)
+		WARN("%p: some Memory Region still remain", (void *)priv);
 	priv_unlock(priv);
 	memset(priv, 0, sizeof(*priv));
 }
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c6563bd..f563722 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -146,6 +146,7 @@ struct priv {
 	unsigned int reta_idx_n; /* RETA index size. */
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
+	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
@@ -299,4 +300,11 @@ int priv_socket_uninit(struct priv *priv);
 void priv_socket_handle(struct priv *priv);
 int priv_socket_connect(struct priv *priv);
 
+/* mlx5_mr.c */
+
+struct mlx5_mr *priv_mr_new(struct priv *, struct rte_mempool *);
+struct mlx5_mr *priv_mr_get(struct priv *, struct rte_mempool *);
+int priv_mr_release(struct priv *, struct mlx5_mr *);
+int priv_mr_verify(struct priv *);
+
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 6199746..3f14c47 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -42,6 +42,7 @@
 #endif
 
 #include <rte_mempool.h>
+#include <rte_malloc.h>
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
@@ -111,54 +112,6 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
 }
 
 /**
- * Register mempool as a memory region.
- *
- * @param pd
- *   Pointer to protection domain.
- * @param mp
- *   Pointer to memory pool.
- *
- * @return
- *   Memory region pointer, NULL in case of error.
- */
-struct ibv_mr *
-mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
-{
-	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-	uintptr_t start;
-	uintptr_t end;
-	unsigned int i;
-
-	if (mlx5_check_mempool(mp, &start, &end) != 0) {
-		ERROR("mempool %p: not virtually contiguous",
-		      (void *)mp);
-		return NULL;
-	}
-
-	DEBUG("mempool %p area start=%p end=%p size=%zu",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	/* Round start and end to page boundary if found in memory segments. */
-	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
-		uintptr_t addr = (uintptr_t)ms[i].addr;
-		size_t len = ms[i].len;
-		unsigned int align = ms[i].hugepage_sz;
-
-		if ((start > addr) && (start < addr + len))
-			start = RTE_ALIGN_FLOOR(start, align);
-		if ((end > addr) && (end < addr + len))
-			end = RTE_ALIGN_CEIL(end, align);
-	}
-	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	return ibv_reg_mr(pd,
-			  (void *)start,
-			  end - start,
-			  IBV_ACCESS_LOCAL_WRITE);
-}
-
-/**
  * Register a Memory Region (MR) <-> Memory Pool (MP) association in
  * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
  *
@@ -180,12 +133,14 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
 {
 	struct mlx5_txq_ctrl *txq_ctrl =
 		container_of(txq, struct mlx5_txq_ctrl, txq);
-	struct ibv_mr *mr;
+	struct mlx5_mr *mr;
 
 	/* Add a new entry, register MR first. */
 	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
 	      (void *)txq_ctrl, mp->name, (void *)mp);
-	mr = mlx5_mp2mr(txq_ctrl->priv->pd, mp);
+	mr = priv_mr_get(txq_ctrl->priv, mp);
+	if (mr == NULL)
+		mr = priv_mr_new(txq_ctrl->priv, mp);
 	if (unlikely(mr == NULL)) {
 		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
 		      (void *)txq_ctrl);
@@ -196,20 +151,17 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
 		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
 		      (void *)txq_ctrl);
 		--idx;
-		claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[0].mr));
+		priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[0]);
 		memmove(&txq_ctrl->txq.mp2mr[0], &txq_ctrl->txq.mp2mr[1],
 			(sizeof(txq_ctrl->txq.mp2mr) -
 			 sizeof(txq_ctrl->txq.mp2mr[0])));
 	}
 	/* Store the new entry. */
-	txq_ctrl->txq.mp2mr[idx].start = (uintptr_t)mr->addr;
-	txq_ctrl->txq.mp2mr[idx].end = (uintptr_t)mr->addr + mr->length;
-	txq_ctrl->txq.mp2mr[idx].mr = mr;
-	txq_ctrl->txq.mp2mr[idx].lkey = rte_cpu_to_be_32(mr->lkey);
+	txq_ctrl->txq.mp2mr[idx] = mr;
 	DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
 	      (void *)txq_ctrl, mp->name, (void *)mp,
-	      txq_ctrl->txq.mp2mr[idx].lkey);
-	return txq_ctrl->txq.mp2mr[idx].lkey;
+	      txq_ctrl->txq.mp2mr[idx]->lkey);
+	return mr->lkey;
 }
 
 struct txq_mp2mr_mbuf_check_data {
@@ -275,15 +227,149 @@ mlx5_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
 		return;
 	}
 	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-		struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;
-
-		if (unlikely(mr == NULL)) {
+		if (unlikely(txq_ctrl->txq.mp2mr[i] == NULL)) {
 			/* Unknown MP, add a new MR for it. */
 			break;
 		}
-		if (start >= (uintptr_t)mr->addr &&
-		    end <= (uintptr_t)mr->addr + mr->length)
+		if (start >= (uintptr_t)txq_ctrl->txq.mp2mr[i]->start &&
+		    end <= (uintptr_t)txq_ctrl->txq.mp2mr[i]->end)
 			return;
 	}
 	mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
 }
+
+/**
+ * Register a new memory region from the mempool and store it in the memory
+ * region list.
+ *
+ * @param  priv
+ *   Pointer to private structure.
+ * @param mp
+ *   Pointer to the memory pool to register.
+ * @return
+ *   The memory region on success.
+ */
+struct mlx5_mr*
+priv_mr_new(struct priv *priv, struct rte_mempool *mp)
+{
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+	uintptr_t start;
+	uintptr_t end;
+	unsigned int i;
+	struct mlx5_mr *mr;
+
+	mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id);
+	if (!mr) {
+		DEBUG("unable to configure MR, ibv_reg_mr() failed.");
+		return NULL;
+	}
+	if (mlx5_check_mempool(mp, &start, &end) != 0) {
+		ERROR("mempool %p: not virtually contiguous",
+		      (void *)mp);
+		return NULL;
+	}
+	DEBUG("mempool %p area start=%p end=%p size=%zu",
+	      (void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	/* Round start and end to page boundary if found in memory segments. */
+	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
+		uintptr_t addr = (uintptr_t)ms[i].addr;
+		size_t len = ms[i].len;
+		unsigned int align = ms[i].hugepage_sz;
+
+		if ((start > addr) && (start < addr + len))
+			start = RTE_ALIGN_FLOOR(start, align);
+		if ((end > addr) && (end < addr + len))
+			end = RTE_ALIGN_CEIL(end, align);
+	}
+	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
+	      (void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start,
+			    IBV_ACCESS_LOCAL_WRITE);
+	mr->mp = mp;
+	mr->lkey = rte_cpu_to_be_32(mr->mr->lkey);
+	mr->start = start;
+	mr->end = (uintptr_t)mr->mr->addr + mr->mr->length;
+	rte_atomic32_inc(&mr->refcnt);
+	DEBUG("%p: new Memory Region %p refcnt: %d", (void *)priv,
+	      (void *)mr, rte_atomic32_read(&mr->refcnt));
+	LIST_INSERT_HEAD(&priv->mr, mr, next);
+	return mr;
+}
+
+/**
+ * Search the memory region object in the memory region list.
+ *
+ * @param  priv
+ *   Pointer to private structure.
+ * @param mp
+ *   Pointer to the memory pool to register.
+ * @return
+ *   The memory region on success.
+ */
+struct mlx5_mr*
+priv_mr_get(struct priv *priv, struct rte_mempool *mp)
+{
+	struct mlx5_mr *mr;
+
+	assert(mp);
+	if (LIST_EMPTY(&priv->mr))
+		return NULL;
+	LIST_FOREACH(mr, &priv->mr, next) {
+		if (mr->mp == mp) {
+			rte_atomic32_inc(&mr->refcnt);
+			DEBUG("Memory Region %p refcnt: %d",
+			      (void *)mr, rte_atomic32_read(&mr->refcnt));
+			return mr;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * Release the memory region object.
+ *
+ * @param  mr
+ *   Pointer to memory region to release.
+ *
+ * @return
+ *   0 on success, errno on failure.
+ */
+int
+priv_mr_release(struct priv *priv, struct mlx5_mr *mr)
+{
+	(void)priv;
+	assert(mr);
+	DEBUG("Memory Region %p refcnt: %d",
+	      (void *)mr, rte_atomic32_read(&mr->refcnt));
+	if (rte_atomic32_dec_and_test(&mr->refcnt)) {
+		claim_zero(ibv_dereg_mr(mr->mr));
+		LIST_REMOVE(mr, next);
+		rte_free(mr);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the flow list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+priv_mr_verify(struct priv *priv)
+{
+	int ret = 0;
+	struct mlx5_mr *mr;
+
+	LIST_FOREACH(mr, &priv->mr, next) {
+		DEBUG("%p: mr %p still referenced", (void *)priv,
+		      (void *)mr);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 683a4a7..0d645ec 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -673,7 +673,7 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 			.addr =
 			    rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)),
 			.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
-			.lkey = rte_cpu_to_be_32(rxq_ctrl->mr->lkey),
+			.lkey = rxq_ctrl->mr->lkey,
 		};
 		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
@@ -767,7 +767,7 @@ mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 	if (rxq_ctrl->channel != NULL)
 		claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel));
 	if (rxq_ctrl->mr != NULL)
-		claim_zero(ibv_dereg_mr(rxq_ctrl->mr));
+		priv_mr_release(rxq_ctrl->priv, rxq_ctrl->mr);
 	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
 }
 
@@ -929,12 +929,15 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 		tmpl.rxq.csum_l2tun =
 			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
 	/* Use the entire RX mempool as the memory region. */
-	tmpl.mr = mlx5_mp2mr(priv->pd, mp);
+	tmpl.mr = priv_mr_get(priv, mp);
 	if (tmpl.mr == NULL) {
-		ret = EINVAL;
-		ERROR("%p: MR creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
+		tmpl.mr = priv_mr_new(priv, mp);
+		if (tmpl.mr == NULL) {
+			ret = EINVAL;
+			ERROR("%p: MR creation failure: %s",
+			      (void *)dev, strerror(ret));
+			goto error;
+		}
 	}
 	if (dev->data->dev_conf.intr_conf.rxq) {
 		tmpl.channel = ibv_create_comp_channel(priv->ctx);
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 4f877cb..b0f17c0 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -36,6 +36,7 @@
 
 #include <stddef.h>
 #include <stdint.h>
+#include <sys/queue.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -52,6 +53,7 @@
 #include <rte_mempool.h>
 #include <rte_common.h>
 #include <rte_hexdump.h>
+#include <rte_atomic.h>
 
 #include "mlx5_utils.h"
 #include "mlx5.h"
@@ -80,6 +82,17 @@ struct mlx5_txq_stats {
 
 struct priv;
 
+/* Memory region queue object. */
+struct mlx5_mr {
+	LIST_ENTRY(mlx5_mr) next; /**< Pointer to the next element. */
+	rte_atomic32_t refcnt; /*<< Reference counter. */
+	uint32_t lkey; /*<< rte_cpu_to_be_32(mr->lkey) */
+	uintptr_t start; /* Start address of MR */
+	uintptr_t end; /* End address of MR */
+	struct ibv_mr *mr; /*<< Memory Region. */
+	struct rte_mempool *mp; /*<< Memory Pool. */
+};
+
 /* Compressed CQE context. */
 struct rxq_zip {
 	uint16_t ai; /* Array index. */
@@ -126,7 +139,7 @@ struct mlx5_rxq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_wq *wq; /* Work Queue. */
-	struct ibv_mr *mr; /* Memory Region (for mp). */
+	struct mlx5_mr *mr; /* Memory Region (for mp). */
 	struct ibv_comp_channel *channel;
 	unsigned int socket; /* CPU socket ID for allocations. */
 	struct mlx5_rxq_data rxq; /* Data path structure. */
@@ -252,6 +265,7 @@ struct mlx5_txq_data {
 	uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
 	uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
 	uint16_t inline_max_packet_sz; /* Max packet size for inlining. */
+	uint16_t mr_cache_idx; /* Index of last hit entry. */
 	uint32_t qp_num_8s; /* QP number shifted by 8. */
 	uint32_t flags; /* Flags for Tx Queue. */
 	volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
@@ -259,13 +273,7 @@ struct mlx5_txq_data {
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	volatile void *bf_reg; /* Blueflame register. */
-	struct {
-		uintptr_t start; /* Start address of MR */
-		uintptr_t end; /* End address of MR */
-		struct ibv_mr *mr; /* Memory Region (for mp). */
-		uint32_t lkey; /* rte_cpu_to_be_32(mr->lkey) */
-	} mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
-	uint16_t mr_cache_idx; /* Index of last hit entry. */
+	struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 } __rte_cache_aligned;
@@ -564,26 +572,34 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
 {
 	uint16_t i = txq->mr_cache_idx;
 	uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
+	uint32_t lkey;
 
 	assert(i < RTE_DIM(txq->mp2mr));
-	if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
-		return txq->mp2mr[i].lkey;
+	if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr))
+		return txq->mp2mr[i]->lkey;
 	for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
-		if (unlikely(txq->mp2mr[i].mr == NULL)) {
+		if (unlikely(txq->mp2mr[i]->mr == NULL)) {
 			/* Unknown MP, add a new MR for it. */
 			break;
 		}
-		if (txq->mp2mr[i].start <= addr &&
-		    txq->mp2mr[i].end >= addr) {
-			assert(txq->mp2mr[i].lkey != (uint32_t)-1);
-			assert(rte_cpu_to_be_32(txq->mp2mr[i].mr->lkey) ==
-			       txq->mp2mr[i].lkey);
+		if (txq->mp2mr[i]->start <= addr &&
+		    txq->mp2mr[i]->end >= addr) {
+			assert(txq->mp2mr[i]->lkey != (uint32_t)-1);
+			assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey) ==
+			       txq->mp2mr[i]->lkey);
 			txq->mr_cache_idx = i;
-			return txq->mp2mr[i].lkey;
+			return txq->mp2mr[i]->lkey;
 		}
 	}
 	txq->mr_cache_idx = 0;
-	return mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+	lkey = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+	/*
+	 * Request the reference to use in this queue, the original one is
+	 * kept by the control plane.
+	 */
+	if (lkey != (uint32_t)-1)
+		rte_atomic32_inc(&txq->mp2mr[i]->refcnt);
+	return lkey;
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index f551f87..1899850 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -142,11 +142,9 @@ mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl)
 		claim_zero(ibv_destroy_qp(txq_ctrl->qp));
 	if (txq_ctrl->cq != NULL)
 		claim_zero(ibv_destroy_cq(txq_ctrl->cq));
-	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-		if (txq_ctrl->txq.mp2mr[i].mr == NULL)
-			break;
-		claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[i].mr));
-	}
+	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i)
+		if (txq_ctrl->txq.mp2mr[i])
+			priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[i]);
 	memset(txq_ctrl, 0, sizeof(*txq_ctrl));
 }
 
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 09/30] net/mlx5: add reference counter on memory region
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 09/30] net/mlx5: add reference counter on memory region Nelio Laranjeiro
@ 2017-10-06  1:11     ` Yongseok Koh
  2017-10-06  8:30       ` Nélio Laranjeiro
  0 siblings, 1 reply; 129+ messages in thread
From: Yongseok Koh @ 2017-10-06  1:11 UTC (permalink / raw)
  To: Nelio Laranjeiro; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Thu, Oct 05, 2017 at 02:49:41PM +0200, Nelio Laranjeiro wrote:
[...]
> @@ -180,12 +133,14 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
>  {
>  	struct mlx5_txq_ctrl *txq_ctrl =
>  		container_of(txq, struct mlx5_txq_ctrl, txq);
> -	struct ibv_mr *mr;
> +	struct mlx5_mr *mr;
>  
>  	/* Add a new entry, register MR first. */
>  	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
>  	      (void *)txq_ctrl, mp->name, (void *)mp);
> -	mr = mlx5_mp2mr(txq_ctrl->priv->pd, mp);
> +	mr = priv_mr_get(txq_ctrl->priv, mp);
> +	if (mr == NULL)
> +		mr = priv_mr_new(txq_ctrl->priv, mp);
>  	if (unlikely(mr == NULL)) {
>  		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
>  		      (void *)txq_ctrl);
> @@ -196,20 +151,17 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
>  		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
>  		      (void *)txq_ctrl);
>  		--idx;
> -		claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[0].mr));
> +		priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[0]);
In this function, txq_ctrl->txq can be replaced with txq.
[...]
> @@ -564,26 +572,34 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
>  {
>  	uint16_t i = txq->mr_cache_idx;
>  	uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
> +	uint32_t lkey;
>  
>  	assert(i < RTE_DIM(txq->mp2mr));
> -	if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
> -		return txq->mp2mr[i].lkey;
> +	if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr))
> +		return txq->mp2mr[i]->lkey;
>  	for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
> -		if (unlikely(txq->mp2mr[i].mr == NULL)) {
> +		if (unlikely(txq->mp2mr[i]->mr == NULL)) {
>  			/* Unknown MP, add a new MR for it. */
>  			break;
>  		}
> -		if (txq->mp2mr[i].start <= addr &&
> -		    txq->mp2mr[i].end >= addr) {
> -			assert(txq->mp2mr[i].lkey != (uint32_t)-1);
> -			assert(rte_cpu_to_be_32(txq->mp2mr[i].mr->lkey) ==
> -			       txq->mp2mr[i].lkey);
> +		if (txq->mp2mr[i]->start <= addr &&
> +		    txq->mp2mr[i]->end >= addr) {
> +			assert(txq->mp2mr[i]->lkey != (uint32_t)-1);
> +			assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey) ==
> +			       txq->mp2mr[i]->lkey);
>  			txq->mr_cache_idx = i;
> -			return txq->mp2mr[i].lkey;
> +			return txq->mp2mr[i]->lkey;
>  		}
>  	}
>  	txq->mr_cache_idx = 0;
> -	return mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
> +	lkey = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
> +	/*
> +	 * Request the reference to use in this queue, the original one is
> +	 * kept by the control plane.
> +	 */
> +	if (lkey != (uint32_t)-1)
> +		rte_atomic32_inc(&txq->mp2mr[i]->refcnt);
If mp2mr is overflowed (i == RTE_DIM(txq->mp2mr)), then mp2mr[0] will be removed
with shifting other slots and the new entry will be added at the end. But
referencing txq->mp2mr[i] would be illegal - out of range.
Thanks,
Yongseok
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 09/30] net/mlx5: add reference counter on memory region
  2017-10-06  1:11     ` Yongseok Koh
@ 2017-10-06  8:30       ` Nélio Laranjeiro
  0 siblings, 0 replies; 129+ messages in thread
From: Nélio Laranjeiro @ 2017-10-06  8:30 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Thu, Oct 05, 2017 at 06:11:26PM -0700, Yongseok Koh wrote:
> On Thu, Oct 05, 2017 at 02:49:41PM +0200, Nelio Laranjeiro wrote:
> [...]
> > @@ -180,12 +133,14 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
> >  {
> >  	struct mlx5_txq_ctrl *txq_ctrl =
> >  		container_of(txq, struct mlx5_txq_ctrl, txq);
> > -	struct ibv_mr *mr;
> > +	struct mlx5_mr *mr;
> >  
> >  	/* Add a new entry, register MR first. */
> >  	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
> >  	      (void *)txq_ctrl, mp->name, (void *)mp);
> > -	mr = mlx5_mp2mr(txq_ctrl->priv->pd, mp);
> > +	mr = priv_mr_get(txq_ctrl->priv, mp);
> > +	if (mr == NULL)
> > +		mr = priv_mr_new(txq_ctrl->priv, mp);
> >  	if (unlikely(mr == NULL)) {
> >  		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
> >  		      (void *)txq_ctrl);
> > @@ -196,20 +151,17 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
> >  		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
> >  		      (void *)txq_ctrl);
> >  		--idx;
> > -		claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[0].mr));
> > +		priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[0]);
> In this function, txq_ctrl->txq can be replaced with txq.
Indeed,
>[...]
> >  	}
> >  	txq->mr_cache_idx = 0;
> > -	return mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
> > +	lkey = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
> > +	/*
> > +	 * Request the reference to use in this queue, the original one is
> > +	 * kept by the control plane.
> > +	 */
> > +	if (lkey != (uint32_t)-1)
> > +		rte_atomic32_inc(&txq->mp2mr[i]->refcnt);
> If mp2mr is overflowed (i == RTE_DIM(txq->mp2mr)), then mp2mr[0] will be removed
> with shifting other slots and the new entry will be added at the end. But
> referencing txq->mp2mr[i] would be illegal - out of range.
You are right, I missed that one,
Both will be updated in a v3,
Thanks,
-- 
Nélio Laranjeiro
6WIND
^ permalink raw reply	[flat|nested] 129+ messages in thread
 
 
- * [dpdk-dev] [PATCH v2 10/30] net/mlx5: separate DPDK from Verbs Rx queue objects
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (8 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 09/30] net/mlx5: add reference counter on memory region Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  3:26     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 11/30] net/mlx5: separate DPDK from Verbs Tx " Nelio Laranjeiro
                     ` (19 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Move verbs object to their own functions to allocate/release them
independently from the DPDK queue.  At the same time a reference counter is
added to help in issues detections when the queue is being release but
still in use somewhere else (flows for instance).
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c      |   3 +
 drivers/net/mlx5/mlx5.h      |   2 +-
 drivers/net/mlx5/mlx5_flow.c | 101 +++----
 drivers/net/mlx5/mlx5_rxq.c  | 635 +++++++++++++++++++++++++++----------------
 drivers/net/mlx5/mlx5_rxtx.h |  25 +-
 drivers/net/mlx5/mlx5_vlan.c |   2 +-
 6 files changed, 462 insertions(+), 306 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index eb0d6c5..2b7edef 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -253,6 +253,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
 	priv_socket_uninit(priv);
+	ret = mlx5_priv_rxq_ibv_verify(priv);
+	if (ret)
+		WARN("%p: some Verbs Rx queue still remain", (void *)priv);
 	ret = priv_flow_verify(priv);
 	if (ret)
 		WARN("%p: some flows still remain", (void *)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index f563722..48c0c8e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -147,6 +147,7 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
+	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
@@ -290,7 +291,6 @@ int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
 int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
 int priv_flow_start(struct priv *);
 void priv_flow_stop(struct priv *);
-int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *);
 int priv_flow_verify(struct priv *);
 
 /* mlx5_socket.c */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 193a90b..362ec91 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -95,11 +95,11 @@ struct rte_flow {
 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	struct ibv_wq *wq; /**< Verbs work queue. */
 	struct ibv_cq *cq; /**< Verbs completion queue. */
-	uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
-	struct mlx5_rxq_data *rxqs[]; /**< Pointer to the queues array. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< List of queues. */
+	uint16_t queues_n; /**< Number of queues in the list. */
 };
 
 /** Static initializer for items. */
@@ -1096,23 +1096,21 @@ priv_flow_create_action_queue(struct priv *priv,
 	assert(priv->pd);
 	assert(priv->ctx);
 	assert(!flow->actions.drop);
-	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
-			      sizeof(*rte_flow->rxqs) * flow->actions.queues_n,
-			      0);
+	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
 	if (!rte_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
 	for (i = 0; i < flow->actions.queues_n; ++i) {
-		struct mlx5_rxq_ctrl *rxq;
-
-		rxq = container_of((*priv->rxqs)[flow->actions.queues[i]],
-				   struct mlx5_rxq_ctrl, rxq);
-		wqs[i] = rxq->wq;
-		rte_flow->rxqs[i] = &rxq->rxq;
-		++rte_flow->rxqs_n;
-		rxq->rxq.mark |= flow->actions.mark;
+		struct mlx5_rxq_ibv *rxq_ibv =
+			mlx5_priv_rxq_ibv_get(priv, flow->actions.queues[i]);
+
+		wqs[i] = rxq_ibv->wq;
+		rte_flow->queues[i] = flow->actions.queues[i];
+		++rte_flow->queues_n;
+		(*priv->rxqs)[flow->actions.queues[i]]->mark |=
+			flow->actions.mark;
 	}
 	/* finalise indirection table. */
 	for (j = 0; i < wqs_n; ++i, ++j) {
@@ -1290,6 +1288,8 @@ static void
 priv_flow_destroy(struct priv *priv,
 		  struct rte_flow *flow)
 {
+	unsigned int i;
+
 	TAILQ_REMOVE(&priv->flows, flow, next);
 	if (flow->ibv_flow)
 		claim_zero(ibv_destroy_flow(flow->ibv_flow));
@@ -1299,37 +1299,33 @@ priv_flow_destroy(struct priv *priv,
 		claim_zero(ibv_destroy_qp(flow->qp));
 	if (flow->ind_table)
 		claim_zero(ibv_destroy_rwq_ind_table(flow->ind_table));
-	if (flow->mark) {
+	for (i = 0; i != flow->queues_n; ++i) {
 		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq;
-		uint32_t mark_n = 0;
-		uint32_t queue_n;
+		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[flow->queues[i]];
+		struct mlx5_rxq_ctrl *rxq_ctrl =
+			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
 
 		/*
 		 * To remove the mark from the queue, the queue must not be
 		 * present in any other marked flow (RSS or not).
 		 */
-		for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
-			rxq = flow->rxqs[queue_n];
-			for (tmp = TAILQ_FIRST(&priv->flows);
-			     tmp;
-			     tmp = TAILQ_NEXT(tmp, next)) {
-				uint32_t tqueue_n;
+		if (flow->mark) {
+			int mark = 0;
+
+			TAILQ_FOREACH(tmp, &priv->flows, next) {
+				unsigned int j;
 
 				if (tmp->drop)
 					continue;
-				for (tqueue_n = 0;
-				     tqueue_n < tmp->rxqs_n;
-				     ++tqueue_n) {
-					struct mlx5_rxq_data *trxq;
-
-					trxq = tmp->rxqs[tqueue_n];
-					if (rxq == trxq)
-						++mark_n;
-				}
+				if (!tmp->mark)
+					continue;
+				for (j = 0; (j != tmp->queues_n) && !mark; j++)
+					if (tmp->queues[j] == flow->queues[i])
+						mark = 1;
 			}
-			rxq->mark = !!mark_n;
+			rxq_data->mark = mark;
 		}
+		mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
 	}
 free:
 	rte_free(flow->ibv_attr);
@@ -1523,8 +1519,8 @@ priv_flow_stop(struct priv *priv)
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->rxqs_n; ++n)
-				flow->rxqs[n]->mark = 0;
+			for (n = 0; n < flow->queues_n; ++n)
+				(*priv->rxqs)[flow->queues[n]]->mark = 0;
 		}
 		DEBUG("Flow %p removed", (void *)flow);
 	}
@@ -1566,39 +1562,8 @@ priv_flow_start(struct priv *priv)
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->rxqs_n; ++n)
-				flow->rxqs[n]->mark = 1;
-		}
-	}
-	return 0;
-}
-
-/**
- * Verify if the Rx queue is used in a flow.
- *
- * @param priv
- *   Pointer to private structure.
- * @param rxq
- *   Pointer to the queue to search.
- *
- * @return
- *   Nonzero if the queue is used by a flow.
- */
-int
-priv_flow_rxq_in_use(struct priv *priv, struct mlx5_rxq_data *rxq)
-{
-	struct rte_flow *flow;
-
-	for (flow = TAILQ_FIRST(&priv->flows);
-	     flow;
-	     flow = TAILQ_NEXT(flow, next)) {
-		unsigned int n;
-
-		if (flow->drop)
-			continue;
-		for (n = 0; n < flow->rxqs_n; ++n) {
-			if (flow->rxqs[n] == rxq)
-				return 1;
+			for (n = 0; n < flow->queues_n; ++n)
+				(*priv->rxqs)[flow->queues[n]]->mark = 1;
 		}
 	}
 	return 0;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 0d645ec..89c2cdb 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -378,7 +378,7 @@ priv_create_hash_rxqs(struct priv *priv)
 
 		rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
 					struct mlx5_rxq_ctrl, rxq);
-		wqs[i] = rxq_ctrl->wq;
+		wqs[i] = rxq_ctrl->ibv->wq;
 	}
 	/* Get number of hash RX queues to configure. */
 	for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
@@ -645,8 +645,6 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 	/* Iterate on segments. */
 	for (i = 0; (i != elts_n); ++i) {
 		struct rte_mbuf *buf;
-		volatile struct mlx5_wqe_data_seg *scat =
-			&(*rxq_ctrl->rxq.wqes)[i];
 
 		buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
 		if (buf == NULL) {
@@ -667,21 +665,12 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 		DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
 		PKT_LEN(buf) = DATA_LEN(buf);
 		NB_SEGS(buf) = 1;
-		/* scat->addr must be able to store a pointer. */
-		assert(sizeof(scat->addr) >= sizeof(uintptr_t));
-		*scat = (struct mlx5_wqe_data_seg){
-			.addr =
-			    rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)),
-			.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
-			.lkey = rxq_ctrl->mr->lkey,
-		};
 		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
 	if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
 		struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
 		struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
 
-		assert(rxq->elts_n == rxq->cqe_n);
 		/* Initialize default rearm_data for vPMD. */
 		mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
 		rte_mbuf_refcnt_set(mbuf_init, 1);
@@ -759,76 +748,12 @@ void
 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	DEBUG("cleaning up %p", (void *)rxq_ctrl);
-	rxq_free_elts(rxq_ctrl);
-	if (rxq_ctrl->wq != NULL)
-		claim_zero(ibv_destroy_wq(rxq_ctrl->wq));
-	if (rxq_ctrl->cq != NULL)
-		claim_zero(ibv_destroy_cq(rxq_ctrl->cq));
-	if (rxq_ctrl->channel != NULL)
-		claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel));
-	if (rxq_ctrl->mr != NULL)
-		priv_mr_release(rxq_ctrl->priv, rxq_ctrl->mr);
+	if (rxq_ctrl->ibv)
+		mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
 	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
 }
 
 /**
- * Initialize RX queue.
- *
- * @param tmpl
- *   Pointer to RX queue control template.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static inline int
-rxq_setup(struct mlx5_rxq_ctrl *tmpl)
-{
-	struct ibv_cq *ibcq = tmpl->cq;
-	struct mlx5dv_cq cq_info;
-	struct mlx5dv_rwq rwq;
-	const uint16_t desc_n =
-		(1 << tmpl->rxq.elts_n) + tmpl->priv->rx_vec_en *
-		MLX5_VPMD_DESCS_PER_LOOP;
-	struct rte_mbuf *(*elts)[desc_n] =
-		rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket);
-	struct mlx5dv_obj obj;
-	int ret = 0;
-
-	obj.cq.in = ibcq;
-	obj.cq.out = &cq_info;
-	obj.rwq.in = tmpl->wq;
-	obj.rwq.out = &rwq;
-	ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
-	if (ret != 0) {
-		return -EINVAL;
-	}
-	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
-		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
-		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
-		return EINVAL;
-	}
-	if (elts == NULL)
-		return ENOMEM;
-	tmpl->rxq.rq_db = rwq.dbrec;
-	tmpl->rxq.cqe_n = log2above(cq_info.cqe_cnt);
-	tmpl->rxq.cq_ci = 0;
-	tmpl->rxq.rq_ci = 0;
-	tmpl->rxq.rq_pi = 0;
-	tmpl->rxq.cq_db = cq_info.dbrec;
-	tmpl->rxq.wqes =
-		(volatile struct mlx5_wqe_data_seg (*)[])
-		(uintptr_t)rwq.buf;
-	tmpl->rxq.cqes =
-		(volatile struct mlx5_cqe (*)[])
-		(uintptr_t)cq_info.buf;
-	tmpl->rxq.elts = elts;
-	tmpl->rxq.cq_uar = cq_info.cq_uar;
-	tmpl->rxq.cqn = cq_info.cqn;
-	tmpl->rxq.cq_arm_sn = 0;
-	return 0;
-}
-
-/**
  * Configure a RX queue.
  *
  * @param dev
@@ -853,29 +778,28 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 	       const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
 {
 	struct priv *priv = dev->data->dev_private;
+	const uint16_t desc_n =
+		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
 	struct mlx5_rxq_ctrl tmpl = {
 		.priv = priv,
 		.socket = socket,
 		.rxq = {
+			.elts = rte_calloc_socket("RXQ", 1,
+						  desc_n *
+						  sizeof(struct rte_mbuf *), 0,
+						  socket),
 			.elts_n = log2above(desc),
 			.mp = mp,
 			.rss_hash = priv->rxqs_n > 1,
 		},
 	};
-	struct ibv_wq_attr mod;
-	union {
-		struct ibv_cq_init_attr_ex cq;
-		struct ibv_wq_init_attr wq;
-		struct ibv_cq_ex cq_attr;
-	} attr;
 	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
-	unsigned int cqe_n = desc - 1;
-	const uint16_t desc_n =
-		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
 	struct rte_mbuf *(*elts)[desc_n] = NULL;
 	int ret = 0;
 
 	(void)conf; /* Thresholds configuration (ignored). */
+	if (dev->data->dev_conf.intr_conf.rxq)
+		tmpl.irq = 1;
 	/* Enable scattered packets support for this queue if necessary. */
 	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
 	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
@@ -928,77 +852,13 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 	if (priv->hw_csum_l2tun)
 		tmpl.rxq.csum_l2tun =
 			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
-	/* Use the entire RX mempool as the memory region. */
-	tmpl.mr = priv_mr_get(priv, mp);
-	if (tmpl.mr == NULL) {
-		tmpl.mr = priv_mr_new(priv, mp);
-		if (tmpl.mr == NULL) {
-			ret = EINVAL;
-			ERROR("%p: MR creation failure: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
-	if (dev->data->dev_conf.intr_conf.rxq) {
-		tmpl.channel = ibv_create_comp_channel(priv->ctx);
-		if (tmpl.channel == NULL) {
-			ret = ENOMEM;
-			ERROR("%p: Rx interrupt completion channel creation"
-			      " failure: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
-	attr.cq = (struct ibv_cq_init_attr_ex){
-		.comp_mask = 0,
-	};
-	if (priv->cqe_comp) {
-		attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS;
-		attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
-		/*
-		 * For vectorized Rx, it must not be doubled in order to
-		 * make cq_ci and rq_ci aligned.
-		 */
-		if (rxq_check_vec_support(&tmpl.rxq) < 0)
-			cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */
-	}
-	tmpl.cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl.channel, 0);
-	if (tmpl.cq == NULL) {
-		ret = ENOMEM;
-		ERROR("%p: CQ creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	DEBUG("priv->device_attr.max_qp_wr is %d",
-	      priv->device_attr.orig_attr.max_qp_wr);
-	DEBUG("priv->device_attr.max_sge is %d",
-	      priv->device_attr.orig_attr.max_sge);
 	/* Configure VLAN stripping. */
 	tmpl.rxq.vlan_strip = (priv->hw_vlan_strip &&
 			       !!dev->data->dev_conf.rxmode.hw_vlan_strip);
-	attr.wq = (struct ibv_wq_init_attr){
-		.wq_context = NULL, /* Could be useful in the future. */
-		.wq_type = IBV_WQT_RQ,
-		/* Max number of outstanding WRs. */
-		.max_wr = desc >> tmpl.rxq.sges_n,
-		/* Max number of scatter/gather elements in a WR. */
-		.max_sge = 1 << tmpl.rxq.sges_n,
-		.pd = priv->pd,
-		.cq = tmpl.cq,
-		.comp_mask =
-			IBV_WQ_FLAGS_CVLAN_STRIPPING |
-			0,
-		.create_flags = (tmpl.rxq.vlan_strip ?
-				 IBV_WQ_FLAGS_CVLAN_STRIPPING :
-				 0),
-	};
 	/* By default, FCS (CRC) is stripped by hardware. */
 	if (dev->data->dev_conf.rxmode.hw_strip_crc) {
 		tmpl.rxq.crc_present = 0;
 	} else if (priv->hw_fcs_strip) {
-		/* Ask HW/Verbs to leave CRC in place when supported. */
-		attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
-		attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
 		tmpl.rxq.crc_present = 1;
 	} else {
 		WARN("%p: CRC stripping has been disabled but will still"
@@ -1013,60 +873,21 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 	      tmpl.rxq.crc_present ? "disabled" : "enabled",
 	      tmpl.rxq.crc_present << 2);
 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
-	if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING"))
+	if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING")) {
 		; /* Nothing else to do. */
-	else if (priv->hw_padding) {
+	} else if (priv->hw_padding) {
 		INFO("%p: enabling packet padding on queue %p",
 		     (void *)dev, (void *)rxq_ctrl);
-		attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
-		attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
-	} else
+	} else {
 		WARN("%p: packet padding has been requested but is not"
 		     " supported, make sure MLNX_OFED and firmware are"
 		     " up to date",
 		     (void *)dev);
-#endif
-
-	tmpl.wq = ibv_create_wq(priv->ctx, &attr.wq);
-	if (tmpl.wq == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: WQ creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	/*
-	 * Make sure number of WRs*SGEs match expectations since a queue
-	 * cannot allocate more than "desc" buffers.
-	 */
-	if (((int)attr.wq.max_wr != (desc >> tmpl.rxq.sges_n)) ||
-	    ((int)attr.wq.max_sge != (1 << tmpl.rxq.sges_n))) {
-		ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
-		      (void *)dev,
-		      (desc >> tmpl.rxq.sges_n), (1 << tmpl.rxq.sges_n),
-		      attr.wq.max_wr, attr.wq.max_sge);
-		ret = EINVAL;
-		goto error;
 	}
+#endif
 	/* Save port ID. */
 	tmpl.rxq.port_id = dev->data->port_id;
 	DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
-	/* Change queue state to ready. */
-	mod = (struct ibv_wq_attr){
-		.attr_mask = IBV_WQ_ATTR_STATE,
-		.wq_state = IBV_WQS_RDY,
-	};
-	ret = ibv_modify_wq(tmpl.wq, &mod);
-	if (ret) {
-		ERROR("%p: WQ state to IBV_WQS_RDY failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = rxq_setup(&tmpl);
-	if (ret) {
-		ERROR("%p: cannot initialize RX queue structure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
 	ret = rxq_alloc_elts(&tmpl, desc);
 	if (ret) {
 		ERROR("%p: RXQ allocation failed: %s",
@@ -1085,17 +906,12 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 	rte_free(tmpl.rxq.elts);
 	tmpl.rxq.elts = elts;
 	*rxq_ctrl = tmpl;
-	/* Update doorbell counter. */
-	rxq_ctrl->rxq.rq_ci = desc >> rxq_ctrl->rxq.sges_n;
-	rte_wmb();
-	*rxq_ctrl->rxq.rq_db = rte_cpu_to_be_32(rxq_ctrl->rxq.rq_ci);
 	DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
 	assert(ret == 0);
 	return 0;
 error:
-	elts = tmpl.rxq.elts;
+	rte_free(tmpl.rxq.elts);
 	mlx5_rxq_cleanup(&tmpl);
-	rte_free(elts);
 	assert(ret > 0);
 	return ret;
 }
@@ -1185,14 +1001,20 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		}
 	}
 	ret = rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
-	if (ret)
+	if (ret) {
 		rte_free(rxq_ctrl);
-	else {
-		rxq_ctrl->rxq.stats.idx = idx;
-		DEBUG("%p: adding RX queue %p to list",
-		      (void *)dev, (void *)rxq_ctrl);
-		(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
+		goto out;
+	}
+	rxq_ctrl->rxq.stats.idx = idx;
+	DEBUG("%p: adding RX queue %p to list",
+	      (void *)dev, (void *)rxq_ctrl);
+	(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
+	rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, idx);
+	if (!rxq_ctrl->ibv) {
+		ret = EAGAIN;
+		goto out;
 	}
+out:
 	priv_unlock(priv);
 	return -ret;
 }
@@ -1219,7 +1041,7 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
 	priv_lock(priv);
-	if (priv_flow_rxq_in_use(priv, rxq))
+	if (!mlx5_priv_rxq_ibv_releasable(priv, rxq_ctrl->ibv))
 		rte_panic("Rx queue %p is still used by a flow and cannot be"
 			  " removed\n", (void *)rxq_ctrl);
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -1264,15 +1086,14 @@ priv_rx_intr_vec_enable(struct priv *priv)
 	}
 	intr_handle->type = RTE_INTR_HANDLE_EXT;
 	for (i = 0; i != n; ++i) {
-		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
-		struct mlx5_rxq_ctrl *rxq_ctrl =
-			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+		/* This rxq ibv must not be released in this function. */
+		struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i);
 		int fd;
 		int flags;
 		int rc;
 
 		/* Skip queues that cannot request interrupts. */
-		if (!rxq || !rxq_ctrl->channel) {
+		if (!rxq_ibv || !rxq_ibv->channel) {
 			/* Use invalid intr_vec[] index to disable entry. */
 			intr_handle->intr_vec[i] =
 				RTE_INTR_VEC_RXTX_OFFSET +
@@ -1286,7 +1107,7 @@ priv_rx_intr_vec_enable(struct priv *priv)
 			priv_rx_intr_vec_disable(priv);
 			return -1;
 		}
-		fd = rxq_ctrl->channel->fd;
+		fd = rxq_ibv->channel->fd;
 		flags = fcntl(fd, F_GETFL);
 		rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
 		if (rc < 0) {
@@ -1316,7 +1137,27 @@ void
 priv_rx_intr_vec_disable(struct priv *priv)
 {
 	struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+	unsigned int i;
+	unsigned int rxqs_n = priv->rxqs_n;
+	unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
+
+	if (!priv->dev->data->dev_conf.intr_conf.rxq)
+		return;
+	for (i = 0; i != n; ++i) {
+		struct mlx5_rxq_ctrl *rxq_ctrl;
+		struct mlx5_rxq_data *rxq_data;
 
+		if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
+		    RTE_MAX_RXTX_INTR_VEC_ID)
+			continue;
+		/**
+		 * Need to access directly the queue to release the reference
+		 * kept in priv_rx_intr_vec_enable().
+		 */
+		rxq_data = (*priv->rxqs)[i];
+		rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+		mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
+	}
 	rte_intr_free_epoll_fd(intr_handle);
 	free(intr_handle->intr_vec);
 	intr_handle->nb_efd = 0;
@@ -1363,16 +1204,30 @@ int
 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
-	struct mlx5_rxq_ctrl *rxq_ctrl =
-		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq_data;
+	struct mlx5_rxq_ctrl *rxq_ctrl;
 	int ret = 0;
 
-	if (!rxq || !rxq_ctrl->channel) {
+	priv_lock(priv);
+	rxq_data = (*priv->rxqs)[rx_queue_id];
+	if (!rxq_data) {
 		ret = EINVAL;
-	} else {
-		mlx5_arm_cq(rxq, rxq->cq_arm_sn);
+		goto exit;
+	}
+	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+	if (rxq_ctrl->irq) {
+		struct mlx5_rxq_ibv *rxq_ibv;
+
+		rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
+		if (!rxq_ibv) {
+			ret = EINVAL;
+			goto exit;
+		}
+		mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
+		mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
 	}
+exit:
+	priv_unlock(priv);
 	if (ret)
 		WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
 	return -ret;
@@ -1393,25 +1248,345 @@ int
 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
-	struct mlx5_rxq_ctrl *rxq_ctrl =
-		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq_data;
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+	struct mlx5_rxq_ibv *rxq_ibv = NULL;
 	struct ibv_cq *ev_cq;
 	void *ev_ctx;
-	int ret;
+	int ret = 0;
 
-	if (!rxq || !rxq_ctrl->channel) {
+	priv_lock(priv);
+	rxq_data = (*priv->rxqs)[rx_queue_id];
+	if (!rxq_data) {
 		ret = EINVAL;
-	} else {
-		ret = ibv_get_cq_event(rxq_ctrl->cq->channel, &ev_cq, &ev_ctx);
-		rxq->cq_arm_sn++;
-		if (ret || ev_cq != rxq_ctrl->cq)
-			ret = EINVAL;
+		goto exit;
+	}
+	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+	if (!rxq_ctrl->irq)
+		goto exit;
+	rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
+	if (!rxq_ibv) {
+		ret = EINVAL;
+		goto exit;
+	}
+	ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
+	if (ret || ev_cq != rxq_ibv->cq) {
+		ret = EINVAL;
+		goto exit;
 	}
+	rxq_data->cq_arm_sn++;
+	ibv_ack_cq_events(rxq_ibv->cq, 1);
+exit:
+	if (rxq_ibv)
+		mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
+	priv_unlock(priv);
 	if (ret)
 		WARN("unable to disable interrupt on rx queue %d",
 		     rx_queue_id);
-	else
-		ibv_ack_cq_events(rxq_ctrl->cq, 1);
 	return -ret;
 }
+
+/**
+ * Create the Rx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object initialised if it can be created.
+ */
+struct mlx5_rxq_ibv*
+mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+	struct ibv_wq_attr mod;
+	union {
+		struct ibv_cq_init_attr_ex cq;
+		struct ibv_wq_init_attr wq;
+		struct ibv_cq_ex cq_attr;
+	} attr;
+	unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
+	struct mlx5_rxq_ibv *tmpl;
+	struct mlx5dv_cq cq_info;
+	struct mlx5dv_rwq rwq;
+	unsigned int i;
+	int ret = 0;
+	struct mlx5dv_obj obj;
+
+	assert(rxq_data);
+	assert(!rxq_ctrl->ibv);
+	tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
+				 rxq_ctrl->socket);
+	if (!tmpl) {
+		ERROR("%p: cannot allocate verbs resources",
+		       (void *)rxq_ctrl);
+		goto error;
+	}
+	tmpl->rxq_ctrl = rxq_ctrl;
+	/* Use the entire RX mempool as the memory region. */
+	tmpl->mr = priv_mr_get(priv, rxq_data->mp);
+	if (!tmpl->mr) {
+		tmpl->mr = priv_mr_new(priv, rxq_data->mp);
+		if (!tmpl->mr) {
+			ERROR("%p: MR creation failure", (void *)rxq_ctrl);
+			goto error;
+		}
+	}
+	if (rxq_ctrl->irq) {
+		tmpl->channel = ibv_create_comp_channel(priv->ctx);
+		if (!tmpl->channel) {
+			ERROR("%p: Comp Channel creation failure",
+			      (void *)rxq_ctrl);
+			goto error;
+		}
+	}
+	attr.cq = (struct ibv_cq_init_attr_ex){
+		.comp_mask = 0,
+	};
+	if (priv->cqe_comp) {
+		attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS;
+		attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
+		/*
+		 * For vectorized Rx, it must not be doubled in order to
+		 * make cq_ci and rq_ci aligned.
+		 */
+		if (rxq_check_vec_support(rxq_data) < 0)
+			cqe_n *= 2;
+	}
+	tmpl->cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl->channel, 0);
+	if (tmpl->cq == NULL) {
+		ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
+		goto error;
+	}
+	DEBUG("priv->device_attr.max_qp_wr is %d",
+	      priv->device_attr.orig_attr.max_qp_wr);
+	DEBUG("priv->device_attr.max_sge is %d",
+	      priv->device_attr.orig_attr.max_sge);
+	attr.wq = (struct ibv_wq_init_attr){
+		.wq_context = NULL, /* Could be useful in the future. */
+		.wq_type = IBV_WQT_RQ,
+		/* Max number of outstanding WRs. */
+		.max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
+		/* Max number of scatter/gather elements in a WR. */
+		.max_sge = 1 << rxq_data->sges_n,
+		.pd = priv->pd,
+		.cq = tmpl->cq,
+		.comp_mask =
+			IBV_WQ_FLAGS_CVLAN_STRIPPING |
+			0,
+		.create_flags = (rxq_data->vlan_strip ?
+				 IBV_WQ_FLAGS_CVLAN_STRIPPING :
+				 0),
+	};
+	/* By default, FCS (CRC) is stripped by hardware. */
+	if (rxq_data->crc_present) {
+		attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
+		attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+	}
+#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
+	if (priv->hw_padding) {
+		attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
+		attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+	}
+#endif
+	tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq);
+	if (tmpl->wq == NULL) {
+		ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
+		goto error;
+	}
+	/*
+	 * Make sure number of WRs*SGEs match expectations since a queue
+	 * cannot allocate more than "desc" buffers.
+	 */
+	if (((int)attr.wq.max_wr !=
+	     ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
+	    ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
+		ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
+		      (void *)rxq_ctrl,
+		      ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
+		      (1 << rxq_data->sges_n),
+		      attr.wq.max_wr, attr.wq.max_sge);
+		goto error;
+	}
+	/* Change queue state to ready. */
+	mod = (struct ibv_wq_attr){
+		.attr_mask = IBV_WQ_ATTR_STATE,
+		.wq_state = IBV_WQS_RDY,
+	};
+	ret = ibv_modify_wq(tmpl->wq, &mod);
+	if (ret) {
+		ERROR("%p: WQ state to IBV_WQS_RDY failed",
+		      (void *)rxq_ctrl);
+		goto error;
+	}
+	obj.cq.in = tmpl->cq;
+	obj.cq.out = &cq_info;
+	obj.rwq.in = tmpl->wq;
+	obj.rwq.out = &rwq;
+	ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
+	if (ret != 0)
+		goto error;
+	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
+		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
+		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
+		goto error;
+	}
+	/* Fill the rings. */
+	rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
+		(uintptr_t)rwq.buf;
+	for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
+		struct rte_mbuf *buf = (*rxq_data->elts)[i];
+		volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
+
+		/* scat->addr must be able to store a pointer. */
+		assert(sizeof(scat->addr) >= sizeof(uintptr_t));
+		*scat = (struct mlx5_wqe_data_seg){
+			.addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+								  uintptr_t)),
+			.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
+			.lkey = tmpl->mr->lkey,
+		};
+	}
+	rxq_data->rq_db = rwq.dbrec;
+	rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
+	rxq_data->cq_ci = 0;
+	rxq_data->rq_ci = 0;
+	rxq_data->rq_pi = 0;
+	rxq_data->zip = (struct rxq_zip){
+		.ai = 0,
+	};
+	rxq_data->cq_db = cq_info.dbrec;
+	rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
+	/* Update doorbell counter. */
+	rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
+	rte_wmb();
+	*rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
+	DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
+	rte_atomic32_inc(&tmpl->refcnt);
+	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+	LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
+	return tmpl;
+error:
+	if (tmpl->wq)
+		claim_zero(ibv_destroy_wq(tmpl->wq));
+	if (tmpl->cq)
+		claim_zero(ibv_destroy_cq(tmpl->cq));
+	if (tmpl->channel)
+		claim_zero(ibv_destroy_comp_channel(tmpl->channel));
+	if (tmpl->mr)
+		priv_mr_release(priv, tmpl->mr);
+	return NULL;
+}
+
+/**
+ * Get an Rx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object if it exists.
+ */
+struct mlx5_rxq_ibv*
+mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+
+	if (idx >= priv->rxqs_n)
+		return NULL;
+	if (!rxq_data)
+		return NULL;
+	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+	if (rxq_ctrl->ibv) {
+		priv_mr_get(priv, rxq_data->mp);
+		rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
+		DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
+		      (void *)rxq_ctrl->ibv,
+		      rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
+	}
+	return rxq_ctrl->ibv;
+}
+
+/**
+ * Release an Rx verbs queue object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rxq_ibv
+ *   Verbs Rx queue object.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
+{
+	int ret;
+
+	assert(rxq_ibv);
+	assert(rxq_ibv->wq);
+	assert(rxq_ibv->cq);
+	assert(rxq_ibv->mr);
+	ret = priv_mr_release(priv, rxq_ibv->mr);
+	if (!ret)
+		rxq_ibv->mr = NULL;
+	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
+	if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
+		rxq_free_elts(rxq_ibv->rxq_ctrl);
+		claim_zero(ibv_destroy_wq(rxq_ibv->wq));
+		claim_zero(ibv_destroy_cq(rxq_ibv->cq));
+		if (rxq_ibv->channel)
+			claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel));
+		LIST_REMOVE(rxq_ibv, next);
+		rte_free(rxq_ibv);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the Verbs Rx queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_rxq_ibv_verify(struct priv *priv)
+{
+	int ret = 0;
+	struct mlx5_rxq_ibv *rxq_ibv;
+
+	LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
+		DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv,
+		      (void *)rxq_ibv);
+		++ret;
+	}
+	return ret;
+}
+
+/**
+ * Return true if a single reference exists on the object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rxq_ibv
+ *   Verbs Rx queue object.
+ */
+int
+mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
+{
+	(void)priv;
+	assert(rxq_ibv);
+	return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index b0f17c0..ae3009f 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -134,15 +134,24 @@ struct mlx5_rxq_data {
 	uint8_t cq_arm_sn; /* CQ arm seq number. */
 } __rte_cache_aligned;
 
-/* RX queue control descriptor. */
-struct mlx5_rxq_ctrl {
-	struct priv *priv; /* Back pointer to private data. */
+/* Verbs Rx queue elements. */
+struct mlx5_rxq_ibv {
+	LIST_ENTRY(mlx5_rxq_ibv) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct mlx5_rxq_ctrl *rxq_ctrl; /* Back pointer to parent. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_wq *wq; /* Work Queue. */
-	struct mlx5_mr *mr; /* Memory Region (for mp). */
 	struct ibv_comp_channel *channel;
-	unsigned int socket; /* CPU socket ID for allocations. */
+	struct mlx5_mr *mr; /* Memory Region (for mp). */
+};
+
+/* RX queue control descriptor. */
+struct mlx5_rxq_ctrl {
+	struct priv *priv; /* Back pointer to private data. */
+	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
 	struct mlx5_rxq_data rxq; /* Data path structure. */
+	unsigned int socket; /* CPU socket ID for allocations. */
+	unsigned int irq:1; /* Whether IRQ is enabled. */
 };
 
 /* Hash RX queue types. */
@@ -310,6 +319,11 @@ int priv_rx_intr_vec_enable(struct priv *priv);
 void priv_rx_intr_vec_disable(struct priv *priv);
 int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
+struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_new(struct priv *, uint16_t);
+struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_get(struct priv *, uint16_t);
+int mlx5_priv_rxq_ibv_release(struct priv *, struct mlx5_rxq_ibv *);
+int mlx5_priv_rxq_ibv_releasable(struct priv *, struct mlx5_rxq_ibv *);
+int mlx5_priv_rxq_ibv_verify(struct priv *);
 
 /* mlx5_txq.c */
 
@@ -347,7 +361,6 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
 
 /* mlx5_mr.c */
 
-struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *);
 void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *);
 uint32_t mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
 			    unsigned int);
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 0d91591..d707984 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -154,7 +154,7 @@ priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
 		.flags = vlan_offloads,
 	};
 
-	err = ibv_modify_wq(rxq_ctrl->wq, &mod);
+	err = ibv_modify_wq(rxq_ctrl->ibv->wq, &mod);
 	if (err) {
 		ERROR("%p: failed to modified stripping mode: %s",
 		      (void *)priv, strerror(err));
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 10/30] net/mlx5: separate DPDK from Verbs Rx queue objects
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 10/30] net/mlx5: separate DPDK from Verbs Rx queue objects Nelio Laranjeiro
@ 2017-10-06  3:26     ` Yongseok Koh
  2017-10-06  8:52       ` Nélio Laranjeiro
  0 siblings, 1 reply; 129+ messages in thread
From: Yongseok Koh @ 2017-10-06  3:26 UTC (permalink / raw)
  To: Nelio Laranjeiro; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Thu, Oct 05, 2017 at 02:49:42PM +0200, Nelio Laranjeiro wrote:
[...]
> +struct mlx5_rxq_ibv*
> +mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
> +{
> +	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
> +	struct mlx5_rxq_ctrl *rxq_ctrl;
> +
> +	if (idx >= priv->rxqs_n)
> +		return NULL;
> +	if (!rxq_data)
> +		return NULL;
> +	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
> +	if (rxq_ctrl->ibv) {
> +		priv_mr_get(priv, rxq_data->mp);
One rxq_ibv has one mr as one rxq has one mp. As long as rxq_ibv exist, the mr
can't be released. So, it looks unnecessary to increase refcnt of the mr here.
> +		rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
> +		DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
> +		      (void *)rxq_ctrl->ibv,
> +		      rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
> +	}
> +	return rxq_ctrl->ibv;
> +}
> +
> +/**
> + * Release an Rx verbs queue object.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param rxq_ibv
> + *   Verbs Rx queue object.
> + *
> + * @return
> + *   0 on success, errno value on failure.
> + */
> +int
> +mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
> +{
> +	int ret;
> +
> +	assert(rxq_ibv);
> +	assert(rxq_ibv->wq);
> +	assert(rxq_ibv->cq);
> +	assert(rxq_ibv->mr);
> +	ret = priv_mr_release(priv, rxq_ibv->mr);
Like I mentioned above, this can be moved inside the following destruction part.
 
As current code is logically flawless,
Acked-by: Yongseok Koh <yskoh@mellanox.com>
 
Thanks
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 10/30] net/mlx5: separate DPDK from Verbs Rx queue objects
  2017-10-06  3:26     ` Yongseok Koh
@ 2017-10-06  8:52       ` Nélio Laranjeiro
  2017-10-06 22:57         ` Yongseok Koh
  0 siblings, 1 reply; 129+ messages in thread
From: Nélio Laranjeiro @ 2017-10-06  8:52 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Thu, Oct 05, 2017 at 08:26:38PM -0700, Yongseok Koh wrote:
> On Thu, Oct 05, 2017 at 02:49:42PM +0200, Nelio Laranjeiro wrote:
> [...]
> > +struct mlx5_rxq_ibv*
> > +mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
> > +{
> > +	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
> > +	struct mlx5_rxq_ctrl *rxq_ctrl;
> > +
> > +	if (idx >= priv->rxqs_n)
> > +		return NULL;
> > +	if (!rxq_data)
> > +		return NULL;
> > +	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
> > +	if (rxq_ctrl->ibv) {
> > +		priv_mr_get(priv, rxq_data->mp);
> 
> One rxq_ibv has one mr as one rxq has one mp. As long as rxq_ibv exist, the mr
> can't be released. So, it looks unnecessary to increase refcnt of the mr here.
>[...]
But on MP can be shared among several queues, (see eth_rx_queue_setup()
and eth_tx_queue_setup()), which means that a queue is not the single
owner of the Memory pool and thus the memory region.
As the Memory region can be shared among several queues, it is necessary
to increase/decrease the ref count accordingly.
Thanks,
-- 
Nélio Laranjeiro
6WIND
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 10/30] net/mlx5: separate DPDK from Verbs Rx queue objects
  2017-10-06  8:52       ` Nélio Laranjeiro
@ 2017-10-06 22:57         ` Yongseok Koh
  0 siblings, 0 replies; 129+ messages in thread
From: Yongseok Koh @ 2017-10-06 22:57 UTC (permalink / raw)
  To: Nélio Laranjeiro; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Fri, Oct 06, 2017 at 10:52:38AM +0200, Nélio Laranjeiro wrote:
> On Thu, Oct 05, 2017 at 08:26:38PM -0700, Yongseok Koh wrote:
> > On Thu, Oct 05, 2017 at 02:49:42PM +0200, Nelio Laranjeiro wrote:
> > [...]
> > > +struct mlx5_rxq_ibv*
> > > +mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
> > > +{
> > > +	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
> > > +	struct mlx5_rxq_ctrl *rxq_ctrl;
> > > +
> > > +	if (idx >= priv->rxqs_n)
> > > +		return NULL;
> > > +	if (!rxq_data)
> > > +		return NULL;
> > > +	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
> > > +	if (rxq_ctrl->ibv) {
> > > +		priv_mr_get(priv, rxq_data->mp);
> > 
> > One rxq_ibv has one mr as one rxq has one mp. As long as rxq_ibv exist, the mr
> > can't be released. So, it looks unnecessary to increase refcnt of the mr here.
> >[...]
> 
> But on MP can be shared among several queues, (see eth_rx_queue_setup()
> and eth_tx_queue_setup()), which means that a queue is not the single
> owner of the Memory pool and thus the memory region.
> 
> As the Memory region can be shared among several queues, it is necessary
> to increase/decrease the ref count accordingly.
Here again, as mr->refcnt is increased when it firstly referenced/created in
mlx5_priv_rxq_ibv_new(), I thought it was redundant. If so, priv_mr_release()
can also be called once when rxq_ibv is really destroyed.
Thanks,
Yongseok
^ permalink raw reply	[flat|nested] 129+ messages in thread
 
 
 
- * [dpdk-dev] [PATCH v2 11/30] net/mlx5: separate DPDK from Verbs Tx queue objects
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (9 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 10/30] net/mlx5: separate DPDK from Verbs Rx queue objects Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  3:32     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 12/30] net/mlx5: add reference counter on DPDK Tx queues Nelio Laranjeiro
                     ` (18 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Move verbs object to their own functions to allocate/release them
independently from the DPDK queue.  At the same time a reference counter is
added to help in issues detections when the queue is being release but
still in use somewhere else (flows for instance).
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c      |   3 +
 drivers/net/mlx5/mlx5.h      |   1 +
 drivers/net/mlx5/mlx5_rxtx.h |  18 +-
 drivers/net/mlx5/mlx5_txq.c  | 479 ++++++++++++++++++++++++++-----------------
 4 files changed, 308 insertions(+), 193 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 2b7edef..bb7cbe0 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -256,6 +256,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	ret = mlx5_priv_rxq_ibv_verify(priv);
 	if (ret)
 		WARN("%p: some Verbs Rx queue still remain", (void *)priv);
+	ret = mlx5_priv_txq_ibv_verify(priv);
+	if (ret)
+		WARN("%p: some Verbs Tx queue still remain", (void *)priv);
 	ret = priv_flow_verify(priv);
 	if (ret)
 		WARN("%p: some flows still remain", (void *)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 48c0c8e..67d2edb 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -148,6 +148,7 @@ struct priv {
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
+	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index ae3009f..eabee67 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -287,12 +287,21 @@ struct mlx5_txq_data {
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 } __rte_cache_aligned;
 
+/* Verbs Rx queue elements. */
+struct mlx5_txq_ibv {
+	LIST_ENTRY(mlx5_txq_ibv) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct ibv_cq *cq; /* Completion Queue. */
+	struct ibv_qp *qp; /* Queue Pair. */
+};
+
 /* TX queue control descriptor. */
 struct mlx5_txq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
-	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_qp *qp; /* Queue Pair. */
 	unsigned int socket; /* CPU socket ID for allocations. */
+	unsigned int max_inline_data; /* Max inline data. */
+	unsigned int max_tso_header; /* Max TSO header size. */
+	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 };
@@ -334,6 +343,11 @@ int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_txconf *);
 void mlx5_tx_queue_release(void *);
 int priv_tx_uar_remap(struct priv *priv, int fd);
+struct mlx5_txq_ibv *mlx5_priv_txq_ibv_new(struct priv *, uint16_t);
+struct mlx5_txq_ibv *mlx5_priv_txq_ibv_get(struct priv *, uint16_t);
+int mlx5_priv_txq_ibv_release(struct priv *, struct mlx5_txq_ibv *);
+int mlx5_priv_txq_ibv_releasable(struct priv *, struct mlx5_txq_ibv *);
+int mlx5_priv_txq_ibv_verify(struct priv *);
 
 /* mlx5_rxtx.c */
 
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1899850..3a6ef39 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -75,13 +75,6 @@ txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl, unsigned int elts_n)
 
 	for (i = 0; (i != elts_n); ++i)
 		(*txq_ctrl->txq.elts)[i] = NULL;
-	for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) {
-		volatile struct mlx5_wqe64 *wqe =
-			(volatile struct mlx5_wqe64 *)
-			txq_ctrl->txq.wqes + i;
-
-		memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe));
-	}
 	DEBUG("%p: allocated and configured %u WRs", (void *)txq_ctrl, elts_n);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
@@ -138,74 +131,15 @@ mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl)
 
 	DEBUG("cleaning up %p", (void *)txq_ctrl);
 	txq_free_elts(txq_ctrl);
-	if (txq_ctrl->qp != NULL)
-		claim_zero(ibv_destroy_qp(txq_ctrl->qp));
-	if (txq_ctrl->cq != NULL)
-		claim_zero(ibv_destroy_cq(txq_ctrl->cq));
 	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i)
 		if (txq_ctrl->txq.mp2mr[i])
 			priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[i]);
+	if (txq_ctrl->ibv)
+		mlx5_priv_txq_ibv_release(txq_ctrl->priv, txq_ctrl->ibv);
 	memset(txq_ctrl, 0, sizeof(*txq_ctrl));
 }
 
 /**
- * Initialize TX queue.
- *
- * @param tmpl
- *   Pointer to TX queue control template.
- * @param txq_ctrl
- *   Pointer to TX queue control.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static inline int
-txq_setup(struct mlx5_txq_ctrl *tmpl, struct mlx5_txq_ctrl *txq_ctrl)
-{
-	struct mlx5dv_qp qp;
-	struct ibv_cq *ibcq = tmpl->cq;
-	struct mlx5dv_cq cq_info;
-	struct mlx5dv_obj obj;
-	int ret = 0;
-
-	qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;
-	obj.cq.in = ibcq;
-	obj.cq.out = &cq_info;
-	obj.qp.in = tmpl->qp;
-	obj.qp.out = &qp;
-	ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
-	if (ret != 0) {
-		return -EINVAL;
-	}
-	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
-		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
-		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
-		return EINVAL;
-	}
-	tmpl->txq.cqe_n = log2above(cq_info.cqe_cnt);
-	tmpl->txq.qp_num_8s = tmpl->qp->qp_num << 8;
-	tmpl->txq.wqes = qp.sq.buf;
-	tmpl->txq.wqe_n = log2above(qp.sq.wqe_cnt);
-	tmpl->txq.qp_db = &qp.dbrec[MLX5_SND_DBR];
-	tmpl->txq.bf_reg = qp.bf.reg;
-	tmpl->txq.cq_db = cq_info.dbrec;
-	tmpl->txq.cqes =
-		(volatile struct mlx5_cqe (*)[])
-		(uintptr_t)cq_info.buf;
-	tmpl->txq.elts =
-		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])
-		((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
-	if (qp.comp_mask | MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
-		tmpl->uar_mmap_offset = qp.uar_mmap_offset;
-	} else {
-		ERROR("Failed to retrieve UAR info, invalid libmlx5.so version");
-		return EINVAL;
-	}
-
-	return 0;
-}
-
-/**
  * Configure a TX queue.
  *
  * @param dev
@@ -232,22 +166,13 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 		.priv = priv,
 		.socket = socket,
 	};
-	union {
-		struct ibv_qp_init_attr_ex init;
-		struct ibv_cq_init_attr_ex cq;
-		struct ibv_qp_attr mod;
-		struct ibv_cq_ex cq_attr;
-	} attr;
-	unsigned int cqe_n;
 	const unsigned int max_tso_inline = ((MLX5_MAX_TSO_HEADER +
 					     (RTE_CACHE_LINE_SIZE - 1)) /
 					      RTE_CACHE_LINE_SIZE);
-	int ret = 0;
 
 	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
-		ret = ENOTSUP;
 		ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
-		goto error;
+		return ENOTSUP;
 	}
 	tmpl.txq.flags = conf->txq_flags;
 	assert(desc > MLX5_TX_COMP_THRESH);
@@ -255,53 +180,10 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 	if (priv->mps == MLX5_MPW_ENHANCED)
 		tmpl.txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
 	/* MRs will be registered in mp2mr[] later. */
-	attr.cq = (struct ibv_cq_init_attr_ex){
-		.comp_mask = 0,
-	};
-	cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ?
-		((desc / MLX5_TX_COMP_THRESH) - 1) : 1;
-	if (priv->mps == MLX5_MPW_ENHANCED)
-		cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
-	tmpl.cq = ibv_create_cq(priv->ctx,
-				cqe_n,
-				NULL, NULL, 0);
-	if (tmpl.cq == NULL) {
-		ret = ENOMEM;
-		ERROR("%p: CQ creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
 	DEBUG("priv->device_attr.max_qp_wr is %d",
 	      priv->device_attr.orig_attr.max_qp_wr);
 	DEBUG("priv->device_attr.max_sge is %d",
 	      priv->device_attr.orig_attr.max_sge);
-	attr.init = (struct ibv_qp_init_attr_ex){
-		/* CQ to be associated with the send queue. */
-		.send_cq = tmpl.cq,
-		/* CQ to be associated with the receive queue. */
-		.recv_cq = tmpl.cq,
-		.cap = {
-			/* Max number of outstanding WRs. */
-			.max_send_wr =
-			 ((priv->device_attr.orig_attr.max_qp_wr < desc) ?
-			   priv->device_attr.orig_attr.max_qp_wr :
-			   desc),
-			/*
-			 * Max number of scatter/gather elements in a WR,
-			 * must be 1 to prevent libmlx5 from trying to affect
-			 * too much memory. TX gather is not impacted by the
-			 * priv->device_attr.max_sge limit and will still work
-			 * properly.
-			 */
-			.max_send_sge = 1,
-		},
-		.qp_type = IBV_QPT_RAW_PACKET,
-		/* Do *NOT* enable this, completions events are managed per
-		 * TX burst. */
-		.sq_sig_all = 0,
-		.pd = priv->pd,
-		.comp_mask = IBV_QP_INIT_ATTR_PD,
-	};
 	if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
 		unsigned int ds_cnt;
 
@@ -317,7 +199,7 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 			/* To minimize the size of data set, avoid requesting
 			 * too large WQ.
 			 */
-			attr.init.cap.max_inline_data =
+			tmpl.max_inline_data =
 				((RTE_MIN(priv->txq_inline,
 					  priv->inline_max_packet_sz) +
 				  (RTE_CACHE_LINE_SIZE - 1)) /
@@ -329,12 +211,12 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 			 * Adjust inline value as Verbs aggregates
 			 * tso_inline and txq_inline fields.
 			 */
-			attr.init.cap.max_inline_data = inline_diff > 0 ?
-							inline_diff *
-							RTE_CACHE_LINE_SIZE :
-							0;
+			tmpl.max_inline_data = inline_diff > 0 ?
+					       inline_diff *
+					       RTE_CACHE_LINE_SIZE :
+					       0;
 		} else {
-			attr.init.cap.max_inline_data =
+			tmpl.max_inline_data =
 				tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
 		}
 		/*
@@ -345,8 +227,7 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 		 *	WQE ETH  (1 DS)
 		 *	Inline part (N DS)
 		 */
-		ds_cnt = 2 +
-			(attr.init.cap.max_inline_data / MLX5_WQE_DWORD_SIZE);
+		ds_cnt = 2 + (tmpl.max_inline_data / MLX5_WQE_DWORD_SIZE);
 		if (ds_cnt > MLX5_DSEG_MAX) {
 			unsigned int max_inline = (MLX5_DSEG_MAX - 2) *
 						   MLX5_WQE_DWORD_SIZE;
@@ -357,67 +238,20 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 			     "the maximum possible: %d\n",
 			     priv->txq_inline, max_inline);
 			tmpl.txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
-			attr.init.cap.max_inline_data = max_inline;
 		}
 	}
 	if (priv->tso) {
-		attr.init.max_tso_header =
-			max_tso_inline * RTE_CACHE_LINE_SIZE;
-		attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;
+		tmpl.max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE;
 		tmpl.txq.max_inline = RTE_MAX(tmpl.txq.max_inline,
 					      max_tso_inline);
 		tmpl.txq.tso_en = 1;
 	}
 	if (priv->tunnel_en)
 		tmpl.txq.tunnel_en = 1;
-	tmpl.qp = ibv_create_qp_ex(priv->ctx, &attr.init);
-	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	DEBUG("TX queue capabilities: max_send_wr=%u, max_send_sge=%u,"
-	      " max_inline_data=%u",
-	      attr.init.cap.max_send_wr,
-	      attr.init.cap.max_send_sge,
-	      attr.init.cap.max_inline_data);
-	attr.mod = (struct ibv_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	ret = ibv_modify_qp(tmpl.qp, &attr.mod,
-			    (IBV_QP_STATE | IBV_QP_PORT));
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = txq_setup(&tmpl, txq_ctrl);
-	if (ret) {
-		ERROR("%p: cannot initialize TX queue structure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
+	tmpl.txq.elts =
+		(struct rte_mbuf *(*)[1 << tmpl.txq.elts_n])
+		((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
 	txq_alloc_elts(&tmpl, desc);
-	attr.mod = (struct ibv_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	attr.mod.qp_state = IBV_QPS_RTS;
-	ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTS failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
 	/* Clean up txq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old txq just in case", (void *)txq_ctrl);
 	mlx5_txq_cleanup(txq_ctrl);
@@ -425,12 +259,7 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 	DEBUG("%p: txq updated with %p", (void *)txq_ctrl, (void *)&tmpl);
 	/* Pre-register known mempools. */
 	rte_mempool_walk(mlx5_txq_mp2mr_iter, txq_ctrl);
-	assert(ret == 0);
 	return 0;
-error:
-	mlx5_txq_cleanup(&tmpl);
-	assert(ret > 0);
-	return ret;
 }
 
 /**
@@ -521,14 +350,22 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		}
 	}
 	ret = mlx5_txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
-	if (ret)
+	if (ret) {
 		rte_free(txq_ctrl);
-	else {
-		txq_ctrl->txq.stats.idx = idx;
-		DEBUG("%p: adding TX queue %p to list",
-		      (void *)dev, (void *)txq_ctrl);
-		(*priv->txqs)[idx] = &txq_ctrl->txq;
+		goto out;
 	}
+	txq_ctrl->txq.stats.idx = idx;
+	DEBUG("%p: adding TX queue %p to list",
+	      (void *)dev, (void *)txq_ctrl);
+	(*priv->txqs)[idx] = &txq_ctrl->txq;
+	txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, idx);
+	if (!txq_ctrl->ibv) {
+		ret = EAGAIN;
+		goto out;
+	}
+	/* Update send callback. */
+	priv_dev_select_tx_function(priv, priv->dev);
+out:
 	priv_unlock(priv);
 	return -ret;
 }
@@ -622,3 +459,263 @@ priv_tx_uar_remap(struct priv *priv, int fd)
 	}
 	return 0;
 }
+
+/**
+ * Create the Tx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object initialised if it can be created.
+ */
+struct mlx5_txq_ibv*
+mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq_data, struct mlx5_txq_ctrl, txq);
+	struct mlx5_txq_ibv tmpl;
+	struct mlx5_txq_ibv *txq_ibv;
+	union {
+		struct ibv_qp_init_attr_ex init;
+		struct ibv_cq_init_attr_ex cq;
+		struct ibv_qp_attr mod;
+		struct ibv_cq_ex cq_attr;
+	} attr;
+	unsigned int cqe_n;
+	struct mlx5dv_qp qp;
+	struct mlx5dv_cq cq_info;
+	struct mlx5dv_obj obj;
+	const int desc = 1 << txq_data->elts_n;
+	int ret = 0;
+
+	assert(txq_data);
+	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
+		ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
+		goto error;
+	}
+	memset(&tmpl, 0, sizeof(struct mlx5_txq_ibv));
+	/* MRs will be registered in mp2mr[] later. */
+	attr.cq = (struct ibv_cq_init_attr_ex){
+		.comp_mask = 0,
+	};
+	cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ?
+		((desc / MLX5_TX_COMP_THRESH) - 1) : 1;
+	if (priv->mps == MLX5_MPW_ENHANCED)
+		cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
+	tmpl.cq = ibv_create_cq(priv->ctx, cqe_n, NULL, NULL, 0);
+	if (tmpl.cq == NULL) {
+		ERROR("%p: CQ creation failure", (void *)txq_ctrl);
+		goto error;
+	}
+	attr.init = (struct ibv_qp_init_attr_ex){
+		/* CQ to be associated with the send queue. */
+		.send_cq = tmpl.cq,
+		/* CQ to be associated with the receive queue. */
+		.recv_cq = tmpl.cq,
+		.cap = {
+			/* Max number of outstanding WRs. */
+			.max_send_wr =
+				((priv->device_attr.orig_attr.max_qp_wr <
+				  desc) ?
+				 priv->device_attr.orig_attr.max_qp_wr :
+				 desc),
+			/*
+			 * Max number of scatter/gather elements in a WR,
+			 * must be 1 to prevent libmlx5 from trying to affect
+			 * too much memory. TX gather is not impacted by the
+			 * priv->device_attr.max_sge limit and will still work
+			 * properly.
+			 */
+			.max_send_sge = 1,
+		},
+		.qp_type = IBV_QPT_RAW_PACKET,
+		/*
+		 * Do *NOT* enable this, completions events are managed per
+		 * Tx burst.
+		 */
+		.sq_sig_all = 0,
+		.pd = priv->pd,
+		.comp_mask = IBV_QP_INIT_ATTR_PD,
+	};
+	if (txq_data->inline_en)
+		attr.init.cap.max_inline_data = txq_ctrl->max_inline_data;
+	if (txq_data->tso_en) {
+		attr.init.max_tso_header = txq_ctrl->max_tso_header;
+		attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;
+	}
+	tmpl.qp = ibv_create_qp_ex(priv->ctx, &attr.init);
+	if (tmpl.qp == NULL) {
+		ERROR("%p: QP creation failure", (void *)txq_ctrl);
+		goto error;
+	}
+	attr.mod = (struct ibv_qp_attr){
+		/* Move the QP to this state. */
+		.qp_state = IBV_QPS_INIT,
+		/* Primary port number. */
+		.port_num = priv->port
+	};
+	ret = ibv_modify_qp(tmpl.qp, &attr.mod, (IBV_QP_STATE | IBV_QP_PORT));
+	if (ret) {
+		ERROR("%p: QP state to IBV_QPS_INIT failed", (void *)txq_ctrl);
+		goto error;
+	}
+	attr.mod = (struct ibv_qp_attr){
+		.qp_state = IBV_QPS_RTR
+	};
+	ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
+	if (ret) {
+		ERROR("%p: QP state to IBV_QPS_RTR failed", (void *)txq_ctrl);
+		goto error;
+	}
+	attr.mod.qp_state = IBV_QPS_RTS;
+	ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
+	if (ret) {
+		ERROR("%p: QP state to IBV_QPS_RTS failed", (void *)txq_ctrl);
+		goto error;
+	}
+	txq_ibv = rte_calloc_socket(__func__, 1, sizeof(struct mlx5_txq_ibv), 0,
+				    txq_ctrl->socket);
+	if (!txq_ibv) {
+		ERROR("%p: cannot allocate memory", (void *)txq_ctrl);
+		goto error;
+	}
+	obj.cq.in = tmpl.cq;
+	obj.cq.out = &cq_info;
+	obj.qp.in = tmpl.qp;
+	obj.qp.out = &qp;
+	ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
+	if (ret != 0)
+		goto error;
+	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
+		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
+		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
+		goto error;
+	}
+	txq_data->cqe_n = log2above(cq_info.cqe_cnt);
+	txq_data->qp_num_8s = tmpl.qp->qp_num << 8;
+	txq_data->wqes = qp.sq.buf;
+	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
+	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
+	txq_data->bf_reg = qp.bf.reg;
+	txq_data->cq_db = cq_info.dbrec;
+	txq_data->cqes =
+		(volatile struct mlx5_cqe (*)[])
+		(uintptr_t)cq_info.buf;
+	txq_data->cq_ci = 0;
+	txq_data->cq_pi = 0;
+	txq_data->wqe_ci = 0;
+	txq_data->wqe_pi = 0;
+	txq_ibv->qp = tmpl.qp;
+	txq_ibv->cq = tmpl.cq;
+	rte_atomic32_inc(&txq_ibv->refcnt);
+	DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
+	      (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt));
+	LIST_INSERT_HEAD(&priv->txqsibv, txq_ibv, next);
+	return txq_ibv;
+error:
+	if (tmpl.cq)
+		claim_zero(ibv_destroy_cq(tmpl.cq));
+	if (tmpl.qp)
+		claim_zero(ibv_destroy_qp(tmpl.qp));
+	return NULL;
+}
+
+/**
+ * Get an Tx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object if it exists.
+ */
+struct mlx5_txq_ibv*
+mlx5_priv_txq_ibv_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_ctrl *txq_ctrl;
+
+	if (idx >= priv->txqs_n)
+		return NULL;
+	if (!(*priv->txqs)[idx])
+		return NULL;
+	txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+	if (txq_ctrl->ibv) {
+		rte_atomic32_inc(&txq_ctrl->ibv->refcnt);
+		DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
+		      (void *)txq_ctrl->ibv,
+		      rte_atomic32_read(&txq_ctrl->ibv->refcnt));
+	}
+	return txq_ctrl->ibv;
+}
+
+/**
+ * Release an Tx verbs queue object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param txq_ibv
+ *   Verbs Tx queue object.
+ *
+ * @return
+ *   0 on success, errno on failure.
+ */
+int
+mlx5_priv_txq_ibv_release(struct priv *priv, struct mlx5_txq_ibv *txq_ibv)
+{
+	(void)priv;
+	assert(txq_ibv);
+	DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
+	      (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt));
+	if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) {
+		claim_zero(ibv_destroy_qp(txq_ibv->qp));
+		claim_zero(ibv_destroy_cq(txq_ibv->cq));
+		LIST_REMOVE(txq_ibv, next);
+		rte_free(txq_ibv);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Return true if a single reference exists on the object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param txq_ibv
+ *   Verbs Tx queue object.
+ */
+int
+mlx5_priv_txq_ibv_releasable(struct priv *priv, struct mlx5_txq_ibv *txq_ibv)
+{
+	(void)priv;
+	assert(txq_ibv);
+	return (rte_atomic32_read(&txq_ibv->refcnt) == 1);
+}
+
+/**
+ * Verify the Verbs Tx queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_txq_ibv_verify(struct priv *priv)
+{
+	int ret = 0;
+	struct mlx5_txq_ibv *txq_ibv;
+
+	LIST_FOREACH(txq_ibv, &priv->txqsibv, next) {
+		DEBUG("%p: Verbs Tx queue %p still referenced", (void *)priv,
+		      (void *)txq_ibv);
+		++ret;
+	}
+	return ret;
+}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 11/30] net/mlx5: separate DPDK from Verbs Tx queue objects
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 11/30] net/mlx5: separate DPDK from Verbs Tx " Nelio Laranjeiro
@ 2017-10-06  3:32     ` Yongseok Koh
  0 siblings, 0 replies; 129+ messages in thread
From: Yongseok Koh @ 2017-10-06  3:32 UTC (permalink / raw)
  To: Nelio Laranjeiro; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Thu, Oct 05, 2017 at 02:49:43PM +0200, Nelio Laranjeiro wrote:
> Move verbs object to their own functions to allocate/release them
> independently from the DPDK queue.  At the same time a reference counter is
> added to help in issues detections when the queue is being release but
> still in use somewhere else (flows for instance).
> 
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> ---
Acked-by: Yongseok Koh <yskoh@mellanox.com>
 
Thanks
^ permalink raw reply	[flat|nested] 129+ messages in thread 
 
- * [dpdk-dev] [PATCH v2 12/30] net/mlx5: add reference counter on DPDK Tx queues
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (10 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 11/30] net/mlx5: separate DPDK from Verbs Tx " Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  3:51     ` Yongseok Koh
  2017-10-09 18:33     ` Ferruh Yigit
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 13/30] net/mlx5: add reference counter on DPDK Rx queues Nelio Laranjeiro
                     ` (17 subsequent siblings)
  29 siblings, 2 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Use the same design for DPDK queue as for Verbs queue for symmetry, this
also helps in fixing some issues like the DPDK release queue API which is
not expected to fail.  With such design, the queue is released when the
reference counters reaches 0.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c         |  16 +-
 drivers/net/mlx5/mlx5.h         |   1 +
 drivers/net/mlx5/mlx5_mr.c      |  73 ++++---
 drivers/net/mlx5/mlx5_rxtx.h    |  17 +-
 drivers/net/mlx5/mlx5_trigger.c |  57 ++++-
 drivers/net/mlx5/mlx5_txq.c     | 460 +++++++++++++++++++++++-----------------
 6 files changed, 383 insertions(+), 241 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index bb7cbe0..cbf22eb 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -225,17 +225,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->txqs != NULL) {
 		/* XXX race condition if mlx5_tx_burst() is still running. */
 		usleep(1000);
-		for (i = 0; (i != priv->txqs_n); ++i) {
-			struct mlx5_txq_data *txq = (*priv->txqs)[i];
-			struct mlx5_txq_ctrl *txq_ctrl;
-
-			if (txq == NULL)
-				continue;
-			txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
-			(*priv->txqs)[i] = NULL;
-			mlx5_txq_cleanup(txq_ctrl);
-			rte_free(txq_ctrl);
-		}
+		for (i = 0; (i != priv->txqs_n); ++i)
+			mlx5_priv_txq_release(priv, i);
 		priv->txqs_n = 0;
 		priv->txqs = NULL;
 	}
@@ -259,6 +250,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	ret = mlx5_priv_txq_ibv_verify(priv);
 	if (ret)
 		WARN("%p: some Verbs Tx queue still remain", (void *)priv);
+	ret = mlx5_priv_txq_verify(priv);
+	if (ret)
+		WARN("%p: some Tx Queues still remain", (void *)priv);
 	ret = priv_flow_verify(priv);
 	if (ret)
 		WARN("%p: some flows still remain", (void *)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 67d2edb..b20c39c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -148,6 +148,7 @@ struct priv {
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
+	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 3f14c47..f0e6505 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -117,6 +117,8 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
  *
  * This function should only be called by txq_mp2mr().
  *
+ * @param priv
+ *   Pointer to private structure.
  * @param txq
  *   Pointer to TX queue structure.
  * @param[in] mp
@@ -128,8 +130,8 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
  *   mr->lkey on success, (uint32_t)-1 on failure.
  */
 uint32_t
-mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
-		   unsigned int idx)
+priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *txq,
+		   struct rte_mempool *mp, unsigned int idx)
 {
 	struct mlx5_txq_ctrl *txq_ctrl =
 		container_of(txq, struct mlx5_txq_ctrl, txq);
@@ -138,9 +140,9 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
 	/* Add a new entry, register MR first. */
 	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
 	      (void *)txq_ctrl, mp->name, (void *)mp);
-	mr = priv_mr_get(txq_ctrl->priv, mp);
+	mr = priv_mr_get(priv, mp);
 	if (mr == NULL)
-		mr = priv_mr_new(txq_ctrl->priv, mp);
+		mr = priv_mr_new(priv, mp);
 	if (unlikely(mr == NULL)) {
 		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
 		      (void *)txq_ctrl);
@@ -151,7 +153,7 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
 		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
 		      (void *)txq_ctrl);
 		--idx;
-		priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[0]);
+		priv_mr_release(priv, txq_ctrl->txq.mp2mr[0]);
 		memmove(&txq_ctrl->txq.mp2mr[0], &txq_ctrl->txq.mp2mr[1],
 			(sizeof(txq_ctrl->txq.mp2mr) -
 			 sizeof(txq_ctrl->txq.mp2mr[0])));
@@ -164,7 +166,37 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
 	return mr->lkey;
 }
 
-struct txq_mp2mr_mbuf_check_data {
+/**
+ * Register a Memory Region (MR) <-> Memory Pool (MP) association in
+ * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
+ *
+ * This function should only be called by txq_mp2mr().
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param[in] mp
+ *   Memory Pool for which a Memory Region lkey must be returned.
+ * @param idx
+ *   Index of the next available entry.
+ *
+ * @return
+ *   mr->lkey on success, (uint32_t)-1 on failure.
+ */
+uint32_t
+mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
+		   unsigned int idx)
+{
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
+	uint32_t lkey;
+
+	priv_lock(txq_ctrl->priv);
+	lkey = priv_txq_mp2mr_reg(txq_ctrl->priv, txq, mp, idx);
+	priv_unlock(txq_ctrl->priv);
+	return lkey;
+}
+
+struct mlx5_mp2mr_mbuf_check_data {
 	int ret;
 };
 
@@ -186,7 +218,7 @@ static void
 txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
 	uint32_t index __rte_unused)
 {
-	struct txq_mp2mr_mbuf_check_data *data = arg;
+	struct mlx5_mp2mr_mbuf_check_data *data = arg;
 	struct rte_mbuf *buf = obj;
 
 	/*
@@ -207,35 +239,24 @@ txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
  *   Pointer to TX queue structure.
  */
 void
-mlx5_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
+mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg)
 {
-	struct mlx5_txq_ctrl *txq_ctrl = arg;
-	struct txq_mp2mr_mbuf_check_data data = {
+	struct priv *priv = (struct priv *)arg;
+	struct mlx5_mp2mr_mbuf_check_data data = {
 		.ret = 0,
 	};
-	uintptr_t start;
-	uintptr_t end;
-	unsigned int i;
+	struct mlx5_mr *mr;
 
 	/* Register mempool only if the first element looks like a mbuf. */
 	if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
 			data.ret == -1)
 		return;
-	if (mlx5_check_mempool(mp, &start, &end) != 0) {
-		ERROR("mempool %p: not virtually contiguous",
-		      (void *)mp);
+	mr = priv_mr_get(priv, mp);
+	if (mr) {
+		priv_mr_release(priv, mr);
 		return;
 	}
-	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-		if (unlikely(txq_ctrl->txq.mp2mr[i] == NULL)) {
-			/* Unknown MP, add a new MR for it. */
-			break;
-		}
-		if (start >= (uintptr_t)txq_ctrl->txq.mp2mr[i]->start &&
-		    end <= (uintptr_t)txq_ctrl->txq.mp2mr[i]->end)
-			return;
-	}
-	mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
+	priv_mr_new(priv, mp);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index eabee67..f3a2f41 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -297,6 +297,8 @@ struct mlx5_txq_ibv {
 
 /* TX queue control descriptor. */
 struct mlx5_txq_ctrl {
+	LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
 	struct priv *priv; /* Back pointer to private data. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	unsigned int max_inline_data; /* Max inline data. */
@@ -336,9 +338,6 @@ int mlx5_priv_rxq_ibv_verify(struct priv *);
 
 /* mlx5_txq.c */
 
-void mlx5_txq_cleanup(struct mlx5_txq_ctrl *);
-int mlx5_txq_ctrl_setup(struct rte_eth_dev *, struct mlx5_txq_ctrl *, uint16_t,
-			unsigned int, const struct rte_eth_txconf *);
 int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_txconf *);
 void mlx5_tx_queue_release(void *);
@@ -348,6 +347,14 @@ struct mlx5_txq_ibv *mlx5_priv_txq_ibv_get(struct priv *, uint16_t);
 int mlx5_priv_txq_ibv_release(struct priv *, struct mlx5_txq_ibv *);
 int mlx5_priv_txq_ibv_releasable(struct priv *, struct mlx5_txq_ibv *);
 int mlx5_priv_txq_ibv_verify(struct priv *);
+struct mlx5_txq_ctrl *mlx5_priv_txq_new(struct priv *, uint16_t,
+					uint16_t, unsigned int,
+					const struct rte_eth_txconf *);
+struct mlx5_txq_ctrl *mlx5_priv_txq_get(struct priv *, uint16_t);
+int mlx5_priv_txq_release(struct priv *, uint16_t);
+int mlx5_priv_txq_releasable(struct priv *, uint16_t);
+int mlx5_priv_txq_verify(struct priv *);
+void txq_alloc_elts(struct mlx5_txq_ctrl *);
 
 /* mlx5_rxtx.c */
 
@@ -375,7 +382,9 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
 
 /* mlx5_mr.c */
 
-void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *);
+void mlx5_mp2mr_iter(struct rte_mempool *, void *);
+uint32_t priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *,
+			    struct rte_mempool *, unsigned int);
 uint32_t mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
 			    unsigned int);
 
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index eeb9585..7a12768 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -41,6 +41,44 @@
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 
+static void
+priv_txq_stop(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; i != priv->txqs_n; ++i)
+		mlx5_priv_txq_release(priv, i);
+}
+
+static int
+priv_txq_start(struct priv *priv)
+{
+	unsigned int i;
+	int ret = 0;
+
+	/* Add memory regions to Tx queues. */
+	for (i = 0; i != priv->txqs_n; ++i) {
+		unsigned int idx = 0;
+		struct mlx5_mr *mr;
+		struct mlx5_txq_ctrl *txq_ctrl = mlx5_priv_txq_get(priv, i);
+
+		if (!txq_ctrl)
+			continue;
+		LIST_FOREACH(mr, &priv->mr, next)
+			priv_txq_mp2mr_reg(priv, &txq_ctrl->txq, mr->mp, idx++);
+		txq_alloc_elts(txq_ctrl);
+		txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, i);
+		if (!txq_ctrl->ibv) {
+			ret = ENOMEM;
+			goto error;
+		}
+	}
+	return -ret;
+error:
+	priv_txq_stop(priv);
+	return -ret;
+}
+
 /**
  * DPDK callback to start the device.
  *
@@ -56,6 +94,7 @@ int
 mlx5_dev_start(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
+	struct mlx5_mr *mr = NULL;
 	int err;
 
 	if (mlx5_is_secondary())
@@ -63,9 +102,17 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 
 	priv_lock(priv);
 	/* Update Rx/Tx callback. */
-	priv_dev_select_tx_function(priv, dev);
 	priv_dev_select_rx_function(priv, dev);
 	DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
+	rte_mempool_walk(mlx5_mp2mr_iter, priv);
+	err = priv_txq_start(priv);
+	if (err) {
+		ERROR("%p: TXQ allocation failed: %s",
+		      (void *)dev, strerror(err));
+		goto error;
+	}
+	/* Update send callback. */
+	priv_dev_select_tx_function(priv, dev);
 	err = priv_create_hash_rxqs(priv);
 	if (!err)
 		err = priv_rehash_flows(priv);
@@ -94,10 +141,13 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	return 0;
 error:
 	/* Rollback. */
+	LIST_FOREACH(mr, &priv->mr, next)
+		priv_mr_release(priv, mr);
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv);
+	priv_txq_stop(priv);
 	priv_unlock(priv);
 	return -err;
 }
@@ -114,6 +164,7 @@ void
 mlx5_dev_stop(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
+	struct mlx5_mr *mr;
 
 	if (mlx5_is_secondary())
 		return;
@@ -131,6 +182,10 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv);
 	priv_rx_intr_vec_disable(priv);
+	priv_txq_stop(priv);
+	LIST_FOREACH(mr, &priv->mr, next) {
+		priv_mr_release(priv, mr);
+	}
 	priv_dev_interrupt_handler_uninstall(priv, dev);
 	priv_unlock(priv);
 }
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 3a6ef39..e7c4ff6 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -65,12 +65,11 @@
  *
  * @param txq_ctrl
  *   Pointer to TX queue structure.
- * @param elts_n
- *   Number of elements to allocate.
  */
-static void
-txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl, unsigned int elts_n)
+void
+txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl)
 {
+	const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n;
 	unsigned int i;
 
 	for (i = 0; (i != elts_n); ++i)
@@ -117,152 +116,6 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 }
 
 /**
- * Clean up a TX queue.
- *
- * Destroy objects, free allocated memory and reset the structure for reuse.
- *
- * @param txq_ctrl
- *   Pointer to TX queue structure.
- */
-void
-mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl)
-{
-	size_t i;
-
-	DEBUG("cleaning up %p", (void *)txq_ctrl);
-	txq_free_elts(txq_ctrl);
-	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i)
-		if (txq_ctrl->txq.mp2mr[i])
-			priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[i]);
-	if (txq_ctrl->ibv)
-		mlx5_priv_txq_ibv_release(txq_ctrl->priv, txq_ctrl->ibv);
-	memset(txq_ctrl, 0, sizeof(*txq_ctrl));
-}
-
-/**
- * Configure a TX queue.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param txq_ctrl
- *   Pointer to TX queue structure.
- * @param desc
- *   Number of descriptors to configure in queue.
- * @param socket
- *   NUMA socket on which memory must be allocated.
- * @param[in] conf
- *   Thresholds parameters.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
-		    uint16_t desc, unsigned int socket,
-		    const struct rte_eth_txconf *conf)
-{
-	struct priv *priv = mlx5_get_priv(dev);
-	struct mlx5_txq_ctrl tmpl = {
-		.priv = priv,
-		.socket = socket,
-	};
-	const unsigned int max_tso_inline = ((MLX5_MAX_TSO_HEADER +
-					     (RTE_CACHE_LINE_SIZE - 1)) /
-					      RTE_CACHE_LINE_SIZE);
-
-	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
-		ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
-		return ENOTSUP;
-	}
-	tmpl.txq.flags = conf->txq_flags;
-	assert(desc > MLX5_TX_COMP_THRESH);
-	tmpl.txq.elts_n = log2above(desc);
-	if (priv->mps == MLX5_MPW_ENHANCED)
-		tmpl.txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
-	/* MRs will be registered in mp2mr[] later. */
-	DEBUG("priv->device_attr.max_qp_wr is %d",
-	      priv->device_attr.orig_attr.max_qp_wr);
-	DEBUG("priv->device_attr.max_sge is %d",
-	      priv->device_attr.orig_attr.max_sge);
-	if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
-		unsigned int ds_cnt;
-
-		tmpl.txq.max_inline =
-			((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
-			 RTE_CACHE_LINE_SIZE);
-		tmpl.txq.inline_en = 1;
-		/* TSO and MPS can't be enabled concurrently. */
-		assert(!priv->tso || !priv->mps);
-		if (priv->mps == MLX5_MPW_ENHANCED) {
-			tmpl.txq.inline_max_packet_sz =
-				priv->inline_max_packet_sz;
-			/* To minimize the size of data set, avoid requesting
-			 * too large WQ.
-			 */
-			tmpl.max_inline_data =
-				((RTE_MIN(priv->txq_inline,
-					  priv->inline_max_packet_sz) +
-				  (RTE_CACHE_LINE_SIZE - 1)) /
-				 RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
-		} else if (priv->tso) {
-			int inline_diff = tmpl.txq.max_inline - max_tso_inline;
-
-			/*
-			 * Adjust inline value as Verbs aggregates
-			 * tso_inline and txq_inline fields.
-			 */
-			tmpl.max_inline_data = inline_diff > 0 ?
-					       inline_diff *
-					       RTE_CACHE_LINE_SIZE :
-					       0;
-		} else {
-			tmpl.max_inline_data =
-				tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
-		}
-		/*
-		 * Check if the inline size is too large in a way which
-		 * can make the WQE DS to overflow.
-		 * Considering in calculation:
-		 *	WQE CTRL (1 DS)
-		 *	WQE ETH  (1 DS)
-		 *	Inline part (N DS)
-		 */
-		ds_cnt = 2 + (tmpl.max_inline_data / MLX5_WQE_DWORD_SIZE);
-		if (ds_cnt > MLX5_DSEG_MAX) {
-			unsigned int max_inline = (MLX5_DSEG_MAX - 2) *
-						   MLX5_WQE_DWORD_SIZE;
-
-			max_inline = max_inline - (max_inline %
-						   RTE_CACHE_LINE_SIZE);
-			WARN("txq inline is too large (%d) setting it to "
-			     "the maximum possible: %d\n",
-			     priv->txq_inline, max_inline);
-			tmpl.txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
-		}
-	}
-	if (priv->tso) {
-		tmpl.max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE;
-		tmpl.txq.max_inline = RTE_MAX(tmpl.txq.max_inline,
-					      max_tso_inline);
-		tmpl.txq.tso_en = 1;
-	}
-	if (priv->tunnel_en)
-		tmpl.txq.tunnel_en = 1;
-	tmpl.txq.elts =
-		(struct rte_mbuf *(*)[1 << tmpl.txq.elts_n])
-		((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
-	txq_alloc_elts(&tmpl, desc);
-	/* Clean up txq in case we're reinitializing it. */
-	DEBUG("%p: cleaning-up old txq just in case", (void *)txq_ctrl);
-	mlx5_txq_cleanup(txq_ctrl);
-	*txq_ctrl = tmpl;
-	DEBUG("%p: txq updated with %p", (void *)txq_ctrl, (void *)&tmpl);
-	/* Pre-register known mempools. */
-	rte_mempool_walk(mlx5_txq_mp2mr_iter, txq_ctrl);
-	return 0;
-}
-
-/**
  * DPDK callback to configure a TX queue.
  *
  * @param dev
@@ -287,7 +140,7 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	struct mlx5_txq_data *txq = (*priv->txqs)[idx];
 	struct mlx5_txq_ctrl *txq_ctrl =
 		container_of(txq, struct mlx5_txq_ctrl, txq);
-	int ret;
+	int ret = 0;
 
 	if (mlx5_is_secondary())
 		return -E_RTE_SECONDARY;
@@ -314,57 +167,23 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		priv_unlock(priv);
 		return -EOVERFLOW;
 	}
-	if (txq != NULL) {
-		DEBUG("%p: reusing already allocated queue index %u (%p)",
-		      (void *)dev, idx, (void *)txq);
-		if (dev->data->dev_started) {
-			priv_unlock(priv);
-			return -EEXIST;
-		}
-		(*priv->txqs)[idx] = NULL;
-		mlx5_txq_cleanup(txq_ctrl);
-		/* Resize if txq size is changed. */
-		if (txq_ctrl->txq.elts_n != log2above(desc)) {
-			txq_ctrl = rte_realloc(txq_ctrl,
-					       sizeof(*txq_ctrl) +
-					       desc * sizeof(struct rte_mbuf *),
-					       RTE_CACHE_LINE_SIZE);
-			if (!txq_ctrl) {
-				ERROR("%p: unable to reallocate queue index %u",
-					(void *)dev, idx);
-				priv_unlock(priv);
-				return -ENOMEM;
-			}
-		}
-	} else {
-		txq_ctrl =
-			rte_calloc_socket("TXQ", 1,
-					  sizeof(*txq_ctrl) +
-					  desc * sizeof(struct rte_mbuf *),
-					  0, socket);
-		if (txq_ctrl == NULL) {
-			ERROR("%p: unable to allocate queue index %u",
-			      (void *)dev, idx);
-			priv_unlock(priv);
-			return -ENOMEM;
-		}
+	if (!mlx5_priv_txq_releasable(priv, idx)) {
+		ret = EBUSY;
+		ERROR("%p: unable to release queue index %u",
+		      (void *)dev, idx);
+		goto out;
 	}
-	ret = mlx5_txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
-	if (ret) {
-		rte_free(txq_ctrl);
+	mlx5_priv_txq_release(priv, idx);
+	txq_ctrl = mlx5_priv_txq_new(priv, idx, desc, socket, conf);
+	if (!txq_ctrl) {
+		ERROR("%p: unable to allocate queue index %u",
+		      (void *)dev, idx);
+		ret = ENOMEM;
 		goto out;
 	}
-	txq_ctrl->txq.stats.idx = idx;
 	DEBUG("%p: adding TX queue %p to list",
 	      (void *)dev, (void *)txq_ctrl);
 	(*priv->txqs)[idx] = &txq_ctrl->txq;
-	txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, idx);
-	if (!txq_ctrl->ibv) {
-		ret = EAGAIN;
-		goto out;
-	}
-	/* Update send callback. */
-	priv_dev_select_tx_function(priv, priv->dev);
 out:
 	priv_unlock(priv);
 	return -ret;
@@ -396,11 +215,9 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		if ((*priv->txqs)[i] == txq) {
 			DEBUG("%p: removing TX queue %p from list",
 			      (void *)priv->dev, (void *)txq_ctrl);
-			(*priv->txqs)[i] = NULL;
+			mlx5_priv_txq_release(priv, i);
 			break;
 		}
-	mlx5_txq_cleanup(txq_ctrl);
-	rte_free(txq_ctrl);
 	priv_unlock(priv);
 }
 
@@ -719,3 +536,248 @@ mlx5_priv_txq_ibv_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Create a DPDK Tx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ * @param[in] conf
+ *  Thresholds parameters.
+ *
+ * @return
+ *   A DPDK queue object on success.
+ */
+struct mlx5_txq_ctrl*
+mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc,
+		  unsigned int socket,
+		  const struct rte_eth_txconf *conf)
+{
+	const unsigned int max_tso_inline =
+		((MLX5_MAX_TSO_HEADER + (RTE_CACHE_LINE_SIZE - 1)) /
+		 RTE_CACHE_LINE_SIZE);
+	struct mlx5_txq_ctrl *tmpl;
+
+	tmpl = rte_calloc_socket("TXQ", 1,
+				 sizeof(*tmpl) +
+				 desc * sizeof(struct rte_mbuf *),
+				 0, socket);
+	if (!tmpl)
+		return NULL;
+	assert(desc > MLX5_TX_COMP_THRESH);
+	tmpl->txq.flags = conf->txq_flags;
+	tmpl->priv = priv;
+	tmpl->txq.elts_n = log2above(desc);
+	if (priv->mps == MLX5_MPW_ENHANCED)
+		tmpl->txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
+	/* MRs will be registered in mp2mr[] later. */
+	DEBUG("priv->device_attr.max_qp_wr is %d",
+	      priv->device_attr.orig_attr.max_qp_wr);
+	DEBUG("priv->device_attr.max_sge is %d",
+	      priv->device_attr.orig_attr.max_sge);
+	if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+		unsigned int ds_cnt;
+
+		tmpl->txq.max_inline =
+			((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
+			 RTE_CACHE_LINE_SIZE);
+		tmpl->txq.inline_en = 1;
+		/* TSO and MPS can't be enabled concurrently. */
+		assert(!priv->tso || !priv->mps);
+		if (priv->mps == MLX5_MPW_ENHANCED) {
+			tmpl->txq.inline_max_packet_sz =
+				priv->inline_max_packet_sz;
+			/* To minimize the size of data set, avoid requesting
+			 * too large WQ.
+			 */
+			tmpl->max_inline_data =
+				((RTE_MIN(priv->txq_inline,
+					  priv->inline_max_packet_sz) +
+				  (RTE_CACHE_LINE_SIZE - 1)) /
+				 RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
+		} else if (priv->tso) {
+			int inline_diff = tmpl->txq.max_inline - max_tso_inline;
+
+			/*
+			 * Adjust inline value as Verbs aggregates
+			 * tso_inline and txq_inline fields.
+			 */
+			tmpl->max_inline_data = inline_diff > 0 ?
+					       inline_diff *
+					       RTE_CACHE_LINE_SIZE :
+					       0;
+		} else {
+			tmpl->max_inline_data =
+				tmpl->txq.max_inline * RTE_CACHE_LINE_SIZE;
+		}
+		/*
+		 * Check if the inline size is too large in a way which
+		 * can make the WQE DS to overflow.
+		 * Considering in calculation:
+		 *      WQE CTRL (1 DS)
+		 *      WQE ETH  (1 DS)
+		 *      Inline part (N DS)
+		 */
+		ds_cnt = 2 + (tmpl->txq.max_inline / MLX5_WQE_DWORD_SIZE);
+		if (ds_cnt > MLX5_DSEG_MAX) {
+			unsigned int max_inline = (MLX5_DSEG_MAX - 2) *
+						  MLX5_WQE_DWORD_SIZE;
+
+			max_inline = max_inline - (max_inline %
+						   RTE_CACHE_LINE_SIZE);
+			WARN("txq inline is too large (%d) setting it to "
+			     "the maximum possible: %d\n",
+			     priv->txq_inline, max_inline);
+			tmpl->txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
+		}
+	}
+	if (priv->tso) {
+		tmpl->max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE;
+		tmpl->txq.max_inline = RTE_MAX(tmpl->txq.max_inline,
+					       max_tso_inline);
+		tmpl->txq.tso_en = 1;
+	}
+	if (priv->tunnel_en)
+		tmpl->txq.tunnel_en = 1;
+	tmpl->txq.elts =
+		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1);
+	tmpl->txq.stats.idx = idx;
+	rte_atomic32_inc(&tmpl->refcnt);
+	DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
+	      (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+	LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
+	return tmpl;
+}
+
+/**
+ * Get a Tx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   A pointer to the queue if it exists.
+ */
+struct mlx5_txq_ctrl*
+mlx5_priv_txq_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_ctrl *ctrl = NULL;
+
+	if ((*priv->txqs)[idx]) {
+		ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl,
+				    txq);
+		unsigned int i;
+
+		mlx5_priv_txq_ibv_get(priv, idx);
+		for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
+			struct mlx5_mr *mr;
+
+			(void)mr;
+			if (ctrl->txq.mp2mr[i]) {
+				mr = priv_mr_get(priv, ctrl->txq.mp2mr[i]->mp);
+				assert(mr);
+			}
+		}
+		rte_atomic32_inc(&ctrl->refcnt);
+		DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
+		      (void *)ctrl, rte_atomic32_read(&ctrl->refcnt));
+	}
+	return ctrl;
+}
+
+/**
+ * Release a Tx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   0 on success, errno on failure.
+ */
+int
+mlx5_priv_txq_release(struct priv *priv, uint16_t idx)
+{
+	unsigned int i;
+	struct mlx5_txq_ctrl *txq;
+
+	if (!(*priv->txqs)[idx])
+		return 0;
+	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+	DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
+	      (void *)txq, rte_atomic32_read(&txq->refcnt));
+	if (txq->ibv) {
+		int ret;
+
+		ret = mlx5_priv_txq_ibv_release(priv, txq->ibv);
+		if (!ret)
+			txq->ibv = NULL;
+	}
+	for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
+		if (txq->txq.mp2mr[i]) {
+			priv_mr_release(priv, txq->txq.mp2mr[i]);
+			txq->txq.mp2mr[i] = NULL;
+		}
+	}
+	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
+		txq_free_elts(txq);
+		LIST_REMOVE(txq, next);
+		rte_free(txq);
+		(*priv->txqs)[idx] = NULL;
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify if the queue can be released.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   1 if the queue can be released.
+ */
+int
+mlx5_priv_txq_releasable(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_ctrl *txq;
+
+	if (!(*priv->txqs)[idx])
+		return -1;
+	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+	return (rte_atomic32_read(&txq->refcnt) == 1);
+}
+
+/**
+ * Verify the Tx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_txq_verify(struct priv *priv)
+{
+	struct mlx5_txq_ctrl *txq;
+	int ret = 0;
+
+	LIST_FOREACH(txq, &priv->txqsctrl, next) {
+		DEBUG("%p: Tx Queue %p still referenced", (void *)priv,
+		      (void *)txq);
+		++ret;
+	}
+	return ret;
+}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 12/30] net/mlx5: add reference counter on DPDK Tx queues
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 12/30] net/mlx5: add reference counter on DPDK Tx queues Nelio Laranjeiro
@ 2017-10-06  3:51     ` Yongseok Koh
  2017-10-09 18:33     ` Ferruh Yigit
  1 sibling, 0 replies; 129+ messages in thread
From: Yongseok Koh @ 2017-10-06  3:51 UTC (permalink / raw)
  To: Nelio Laranjeiro; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Thu, Oct 05, 2017 at 02:49:44PM +0200, Nelio Laranjeiro wrote:
> Use the same design for DPDK queue as for Verbs queue for symmetry, this
> also helps in fixing some issues like the DPDK release queue API which is
> not expected to fail.  With such design, the queue is released when the
> reference counters reaches 0.
> 
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> ---
Acked-by: Yongseok Koh <yskoh@mellanox.com>
 
Thanks
^ permalink raw reply	[flat|nested] 129+ messages in thread 
- * Re: [dpdk-dev] [PATCH v2 12/30] net/mlx5: add reference counter on DPDK Tx queues
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 12/30] net/mlx5: add reference counter on DPDK Tx queues Nelio Laranjeiro
  2017-10-06  3:51     ` Yongseok Koh
@ 2017-10-09 18:33     ` Ferruh Yigit
  1 sibling, 0 replies; 129+ messages in thread
From: Ferruh Yigit @ 2017-10-09 18:33 UTC (permalink / raw)
  To: Nelio Laranjeiro, dev; +Cc: adrien.mazarguil, yskoh
On 10/5/2017 1:49 PM, Nelio Laranjeiro wrote:
> Use the same design for DPDK queue as for Verbs queue for symmetry, this
> also helps in fixing some issues like the DPDK release queue API which is
> not expected to fail.  With such design, the queue is released when the
> reference counters reaches 0.
> 
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
<...>
> +struct mlx5_txq_ctrl*
> +mlx5_priv_txq_get(struct priv *priv, uint16_t idx)
> +{
> +	struct mlx5_txq_ctrl *ctrl = NULL;
> +
> +	if ((*priv->txqs)[idx]) {
> +		ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl,
> +				    txq);
> +		unsigned int i;
> +
> +		mlx5_priv_txq_ibv_get(priv, idx);
> +		for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
> +			struct mlx5_mr *mr;
While applying slightly updated as following to fix icc warning:
    struct mlx5_mr *mr = NULL;
Please let me know if you disagree with update.
^ permalink raw reply	[flat|nested] 129+ messages in thread
 
- * [dpdk-dev] [PATCH v2 13/30] net/mlx5: add reference counter on DPDK Rx queues
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (11 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 12/30] net/mlx5: add reference counter on DPDK Tx queues Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  3:56     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 14/30] net/mlx5: make indirection tables shareable Nelio Laranjeiro
                     ` (16 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Use the same design for DPDK queue as for Verbs queue for symmetry, this
also helps in fixing some issues like the DPDK release queue API which is
not expected to fail.  With such design, the queue is released when the
reference counters reaches 0.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c         |  16 +-
 drivers/net/mlx5/mlx5.h         |   1 +
 drivers/net/mlx5/mlx5_rxq.c     | 488 +++++++++++++++++++++-------------------
 drivers/net/mlx5/mlx5_rxtx.h    |  10 +
 drivers/net/mlx5/mlx5_trigger.c |  47 +++-
 5 files changed, 321 insertions(+), 241 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index cbf22eb..22fd5e4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -208,17 +208,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->rxqs != NULL) {
 		/* XXX race condition if mlx5_rx_burst() is still running. */
 		usleep(1000);
-		for (i = 0; (i != priv->rxqs_n); ++i) {
-			struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
-			struct mlx5_rxq_ctrl *rxq_ctrl;
-
-			if (rxq == NULL)
-				continue;
-			rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
-			(*priv->rxqs)[i] = NULL;
-			mlx5_rxq_cleanup(rxq_ctrl);
-			rte_free(rxq_ctrl);
-		}
+		for (i = 0; (i != priv->rxqs_n); ++i)
+			mlx5_priv_rxq_release(priv, i);
 		priv->rxqs_n = 0;
 		priv->rxqs = NULL;
 	}
@@ -247,6 +238,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	ret = mlx5_priv_rxq_ibv_verify(priv);
 	if (ret)
 		WARN("%p: some Verbs Rx queue still remain", (void *)priv);
+	ret = mlx5_priv_rxq_verify(priv);
+	if (ret)
+		WARN("%p: some Rx Queues still remain", (void *)priv);
 	ret = mlx5_priv_txq_ibv_verify(priv);
 	if (ret)
 		WARN("%p: some Verbs Tx queue still remain", (void *)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index b20c39c..d0ef21a 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -147,6 +147,7 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
+	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
 	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 89c2cdb..87efeed 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -37,6 +37,7 @@
 #include <string.h>
 #include <stdint.h>
 #include <fcntl.h>
+#include <sys/queue.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -629,16 +630,15 @@ priv_rehash_flows(struct priv *priv)
  *
  * @param rxq_ctrl
  *   Pointer to RX queue structure.
- * @param elts_n
- *   Number of elements to allocate.
  *
  * @return
  *   0 on success, errno value on failure.
  */
-static int
-rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
+int
+rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
+	unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
 	unsigned int i;
 	int ret = 0;
 
@@ -667,9 +667,11 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 		NB_SEGS(buf) = 1;
 		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
+	/* If Rx vector is activated. */
 	if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
 		struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
 		struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
+		int j;
 
 		/* Initialize default rearm_data for vPMD. */
 		mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
@@ -681,10 +683,11 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 		 * rearm_data covers previous fields.
 		 */
 		rte_compiler_barrier();
-		rxq->mbuf_initializer = *(uint64_t *)&mbuf_init->rearm_data;
+		rxq->mbuf_initializer =
+			*(uint64_t *)&mbuf_init->rearm_data;
 		/* Padding with a fake mbuf for vectorized Rx. */
-		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
-			(*rxq->elts)[elts_n + i] = &rxq->fake_mbuf;
+		for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
+			(*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
 	}
 	DEBUG("%p: allocated and configured %u segments (max %u packets)",
 	      (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
@@ -754,170 +757,6 @@ mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 }
 
 /**
- * Configure a RX queue.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param rxq_ctrl
- *   Pointer to RX queue structure.
- * @param desc
- *   Number of descriptors to configure in queue.
- * @param socket
- *   NUMA socket on which memory must be allocated.
- * @param[in] conf
- *   Thresholds parameters.
- * @param mp
- *   Memory pool for buffer allocations.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
-	       uint16_t desc, unsigned int socket,
-	       const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
-{
-	struct priv *priv = dev->data->dev_private;
-	const uint16_t desc_n =
-		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
-	struct mlx5_rxq_ctrl tmpl = {
-		.priv = priv,
-		.socket = socket,
-		.rxq = {
-			.elts = rte_calloc_socket("RXQ", 1,
-						  desc_n *
-						  sizeof(struct rte_mbuf *), 0,
-						  socket),
-			.elts_n = log2above(desc),
-			.mp = mp,
-			.rss_hash = priv->rxqs_n > 1,
-		},
-	};
-	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
-	struct rte_mbuf *(*elts)[desc_n] = NULL;
-	int ret = 0;
-
-	(void)conf; /* Thresholds configuration (ignored). */
-	if (dev->data->dev_conf.intr_conf.rxq)
-		tmpl.irq = 1;
-	/* Enable scattered packets support for this queue if necessary. */
-	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
-	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
-	    (mb_len - RTE_PKTMBUF_HEADROOM)) {
-		tmpl.rxq.sges_n = 0;
-	} else if (dev->data->dev_conf.rxmode.enable_scatter) {
-		unsigned int size =
-			RTE_PKTMBUF_HEADROOM +
-			dev->data->dev_conf.rxmode.max_rx_pkt_len;
-		unsigned int sges_n;
-
-		/*
-		 * Determine the number of SGEs needed for a full packet
-		 * and round it to the next power of two.
-		 */
-		sges_n = log2above((size / mb_len) + !!(size % mb_len));
-		tmpl.rxq.sges_n = sges_n;
-		/* Make sure rxq.sges_n did not overflow. */
-		size = mb_len * (1 << tmpl.rxq.sges_n);
-		size -= RTE_PKTMBUF_HEADROOM;
-		if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
-			ERROR("%p: too many SGEs (%u) needed to handle"
-			      " requested maximum packet size %u",
-			      (void *)dev,
-			      1 << sges_n,
-			      dev->data->dev_conf.rxmode.max_rx_pkt_len);
-			return EOVERFLOW;
-		}
-	} else {
-		WARN("%p: the requested maximum Rx packet size (%u) is"
-		     " larger than a single mbuf (%u) and scattered"
-		     " mode has not been requested",
-		     (void *)dev,
-		     dev->data->dev_conf.rxmode.max_rx_pkt_len,
-		     mb_len - RTE_PKTMBUF_HEADROOM);
-	}
-	DEBUG("%p: maximum number of segments per packet: %u",
-	      (void *)dev, 1 << tmpl.rxq.sges_n);
-	if (desc % (1 << tmpl.rxq.sges_n)) {
-		ERROR("%p: number of RX queue descriptors (%u) is not a"
-		      " multiple of SGEs per packet (%u)",
-		      (void *)dev,
-		      desc,
-		      1 << tmpl.rxq.sges_n);
-		return EINVAL;
-	}
-	/* Toggle RX checksum offload if hardware supports it. */
-	if (priv->hw_csum)
-		tmpl.rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
-	if (priv->hw_csum_l2tun)
-		tmpl.rxq.csum_l2tun =
-			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
-	/* Configure VLAN stripping. */
-	tmpl.rxq.vlan_strip = (priv->hw_vlan_strip &&
-			       !!dev->data->dev_conf.rxmode.hw_vlan_strip);
-	/* By default, FCS (CRC) is stripped by hardware. */
-	if (dev->data->dev_conf.rxmode.hw_strip_crc) {
-		tmpl.rxq.crc_present = 0;
-	} else if (priv->hw_fcs_strip) {
-		tmpl.rxq.crc_present = 1;
-	} else {
-		WARN("%p: CRC stripping has been disabled but will still"
-		     " be performed by hardware, make sure MLNX_OFED and"
-		     " firmware are up to date",
-		     (void *)dev);
-		tmpl.rxq.crc_present = 0;
-	}
-	DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
-	      " incoming frames to hide it",
-	      (void *)dev,
-	      tmpl.rxq.crc_present ? "disabled" : "enabled",
-	      tmpl.rxq.crc_present << 2);
-#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
-	if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING")) {
-		; /* Nothing else to do. */
-	} else if (priv->hw_padding) {
-		INFO("%p: enabling packet padding on queue %p",
-		     (void *)dev, (void *)rxq_ctrl);
-	} else {
-		WARN("%p: packet padding has been requested but is not"
-		     " supported, make sure MLNX_OFED and firmware are"
-		     " up to date",
-		     (void *)dev);
-	}
-#endif
-	/* Save port ID. */
-	tmpl.rxq.port_id = dev->data->port_id;
-	DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
-	ret = rxq_alloc_elts(&tmpl, desc);
-	if (ret) {
-		ERROR("%p: RXQ allocation failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	/* Clean up rxq in case we're reinitializing it. */
-	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl);
-	mlx5_rxq_cleanup(rxq_ctrl);
-	/* Move mbuf pointers to dedicated storage area in RX queue. */
-	elts = (void *)(rxq_ctrl + 1);
-	rte_memcpy(elts, tmpl.rxq.elts, sizeof(*elts));
-#ifndef NDEBUG
-	memset(tmpl.rxq.elts, 0x55, sizeof(*elts));
-#endif
-	rte_free(tmpl.rxq.elts);
-	tmpl.rxq.elts = elts;
-	*rxq_ctrl = tmpl;
-	DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
-	assert(ret == 0);
-	return 0;
-error:
-	rte_free(tmpl.rxq.elts);
-	mlx5_rxq_cleanup(&tmpl);
-	assert(ret > 0);
-	return ret;
-}
-
-/**
- * DPDK callback to configure a RX queue.
  *
  * @param dev
  *   Pointer to Ethernet device structure.
@@ -944,13 +783,11 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
 	struct mlx5_rxq_ctrl *rxq_ctrl =
 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
-	const uint16_t desc_n =
-		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
-	int ret;
+	int ret = 0;
 
+	(void)conf;
 	if (mlx5_is_secondary())
 		return -E_RTE_SECONDARY;
-
 	priv_lock(priv);
 	if (!rte_is_power_of_2(desc)) {
 		desc = 1 << log2above(desc);
@@ -966,54 +803,23 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		priv_unlock(priv);
 		return -EOVERFLOW;
 	}
-	if (rxq != NULL) {
-		DEBUG("%p: reusing already allocated queue index %u (%p)",
-		      (void *)dev, idx, (void *)rxq);
-		if (dev->data->dev_started) {
-			priv_unlock(priv);
-			return -EEXIST;
-		}
-		(*priv->rxqs)[idx] = NULL;
-		mlx5_rxq_cleanup(rxq_ctrl);
-		/* Resize if rxq size is changed. */
-		if (rxq_ctrl->rxq.elts_n != log2above(desc)) {
-			rxq_ctrl = rte_realloc(rxq_ctrl,
-					       sizeof(*rxq_ctrl) + desc_n *
-					       sizeof(struct rte_mbuf *),
-					       RTE_CACHE_LINE_SIZE);
-			if (!rxq_ctrl) {
-				ERROR("%p: unable to reallocate queue index %u",
-					(void *)dev, idx);
-				priv_unlock(priv);
-				return -ENOMEM;
-			}
-		}
-	} else {
-		rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl) +
-					     desc_n *
-					     sizeof(struct rte_mbuf *),
-					     0, socket);
-		if (rxq_ctrl == NULL) {
-			ERROR("%p: unable to allocate queue index %u",
-			      (void *)dev, idx);
-			priv_unlock(priv);
-			return -ENOMEM;
-		}
+	if (!mlx5_priv_rxq_releasable(priv, idx)) {
+		ret = EBUSY;
+		ERROR("%p: unable to release queue index %u",
+		      (void *)dev, idx);
+		goto out;
 	}
-	ret = rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
-	if (ret) {
-		rte_free(rxq_ctrl);
+	mlx5_priv_rxq_release(priv, idx);
+	rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp);
+	if (!rxq_ctrl) {
+		ERROR("%p: unable to allocate queue index %u",
+		      (void *)dev, idx);
+		ret = ENOMEM;
 		goto out;
 	}
-	rxq_ctrl->rxq.stats.idx = idx;
 	DEBUG("%p: adding RX queue %p to list",
 	      (void *)dev, (void *)rxq_ctrl);
 	(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
-	rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, idx);
-	if (!rxq_ctrl->ibv) {
-		ret = EAGAIN;
-		goto out;
-	}
 out:
 	priv_unlock(priv);
 	return -ret;
@@ -1031,7 +837,6 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 	struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
 	struct mlx5_rxq_ctrl *rxq_ctrl;
 	struct priv *priv;
-	unsigned int i;
 
 	if (mlx5_is_secondary())
 		return;
@@ -1041,18 +846,10 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
 	priv_lock(priv);
-	if (!mlx5_priv_rxq_ibv_releasable(priv, rxq_ctrl->ibv))
+	if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx))
 		rte_panic("Rx queue %p is still used by a flow and cannot be"
 			  " removed\n", (void *)rxq_ctrl);
-	for (i = 0; (i != priv->rxqs_n); ++i)
-		if ((*priv->rxqs)[i] == rxq) {
-			DEBUG("%p: removing RX queue %p from list",
-			      (void *)priv->dev, (void *)rxq_ctrl);
-			(*priv->rxqs)[i] = NULL;
-			break;
-		}
-	mlx5_rxq_cleanup(rxq_ctrl);
-	rte_free(rxq_ctrl);
+	mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx);
 	priv_unlock(priv);
 }
 
@@ -1590,3 +1387,238 @@ mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
 	assert(rxq_ibv);
 	return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
 }
+
+/**
+ * Create a DPDK Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ *
+ * @return
+ *   A DPDK queue object on success.
+ */
+struct mlx5_rxq_ctrl*
+mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
+		  unsigned int socket, struct rte_mempool *mp)
+{
+	struct rte_eth_dev *dev = priv->dev;
+	struct mlx5_rxq_ctrl *tmpl;
+	const uint16_t desc_n =
+		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
+	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
+
+	tmpl = rte_calloc_socket("RXQ", 1,
+				 sizeof(*tmpl) +
+				 desc_n * sizeof(struct rte_mbuf *),
+				 0, socket);
+	if (!tmpl)
+		return NULL;
+	if (priv->dev->data->dev_conf.intr_conf.rxq)
+		tmpl->irq = 1;
+	/* Enable scattered packets support for this queue if necessary. */
+	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
+	    (mb_len - RTE_PKTMBUF_HEADROOM)) {
+		tmpl->rxq.sges_n = 0;
+	} else if (dev->data->dev_conf.rxmode.enable_scatter) {
+		unsigned int size =
+			RTE_PKTMBUF_HEADROOM +
+			dev->data->dev_conf.rxmode.max_rx_pkt_len;
+		unsigned int sges_n;
+
+		/*
+		 * Determine the number of SGEs needed for a full packet
+		 * and round it to the next power of two.
+		 */
+		sges_n = log2above((size / mb_len) + !!(size % mb_len));
+		tmpl->rxq.sges_n = sges_n;
+		/* Make sure rxq.sges_n did not overflow. */
+		size = mb_len * (1 << tmpl->rxq.sges_n);
+		size -= RTE_PKTMBUF_HEADROOM;
+		if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
+			ERROR("%p: too many SGEs (%u) needed to handle"
+			      " requested maximum packet size %u",
+			      (void *)dev,
+			      1 << sges_n,
+			      dev->data->dev_conf.rxmode.max_rx_pkt_len);
+			goto error;
+		}
+	} else {
+		WARN("%p: the requested maximum Rx packet size (%u) is"
+		     " larger than a single mbuf (%u) and scattered"
+		     " mode has not been requested",
+		     (void *)dev,
+		     dev->data->dev_conf.rxmode.max_rx_pkt_len,
+		     mb_len - RTE_PKTMBUF_HEADROOM);
+	}
+	DEBUG("%p: maximum number of segments per packet: %u",
+	      (void *)dev, 1 << tmpl->rxq.sges_n);
+	if (desc % (1 << tmpl->rxq.sges_n)) {
+		ERROR("%p: number of RX queue descriptors (%u) is not a"
+		      " multiple of SGEs per packet (%u)",
+		      (void *)dev,
+		      desc,
+		      1 << tmpl->rxq.sges_n);
+		goto error;
+	}
+	/* Toggle RX checksum offload if hardware supports it. */
+	if (priv->hw_csum)
+		tmpl->rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
+	if (priv->hw_csum_l2tun)
+		tmpl->rxq.csum_l2tun =
+			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
+	/* Configure VLAN stripping. */
+	tmpl->rxq.vlan_strip = (priv->hw_vlan_strip &&
+			       !!dev->data->dev_conf.rxmode.hw_vlan_strip);
+	/* By default, FCS (CRC) is stripped by hardware. */
+	if (dev->data->dev_conf.rxmode.hw_strip_crc) {
+		tmpl->rxq.crc_present = 0;
+	} else if (priv->hw_fcs_strip) {
+		tmpl->rxq.crc_present = 1;
+	} else {
+		WARN("%p: CRC stripping has been disabled but will still"
+		     " be performed by hardware, make sure MLNX_OFED and"
+		     " firmware are up to date",
+		     (void *)dev);
+		tmpl->rxq.crc_present = 0;
+	}
+	DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
+	      " incoming frames to hide it",
+	      (void *)dev,
+	      tmpl->rxq.crc_present ? "disabled" : "enabled",
+	      tmpl->rxq.crc_present << 2);
+	/* Save port ID. */
+	tmpl->rxq.rss_hash = priv->rxqs_n > 1;
+	tmpl->rxq.port_id = dev->data->port_id;
+	tmpl->priv = priv;
+	tmpl->rxq.mp = mp;
+	tmpl->rxq.stats.idx = idx;
+	tmpl->rxq.elts_n = log2above(desc);
+	tmpl->rxq.elts =
+		(struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
+	rte_atomic32_inc(&tmpl->refcnt);
+	DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
+	return tmpl;
+error:
+	rte_free(tmpl);
+	return NULL;
+}
+
+/**
+ * Get a Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   A pointer to the queue if it exists.
+ */
+struct mlx5_rxq_ctrl*
+mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
+
+	if ((*priv->rxqs)[idx]) {
+		rxq_ctrl = container_of((*priv->rxqs)[idx],
+					struct mlx5_rxq_ctrl,
+					rxq);
+
+		mlx5_priv_rxq_ibv_get(priv, idx);
+		rte_atomic32_inc(&rxq_ctrl->refcnt);
+		DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+		      (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
+	}
+	return rxq_ctrl;
+}
+
+/**
+ * Release a Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+
+	if (!(*priv->rxqs)[idx])
+		return 0;
+	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
+	assert(rxq_ctrl->priv);
+	if (rxq_ctrl->ibv) {
+		int ret;
+
+		ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
+		if (!ret)
+			rxq_ctrl->ibv = NULL;
+	}
+	DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
+	if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
+		LIST_REMOVE(rxq_ctrl, next);
+		rte_free(rxq_ctrl);
+		(*priv->rxqs)[idx] = NULL;
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify if the queue can be released.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   1 if the queue can be released.
+ */
+int
+mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+
+	if (!(*priv->rxqs)[idx])
+		return -1;
+	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
+	return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_rxq_verify(struct priv *priv)
+{
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+	int ret = 0;
+
+	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
+		DEBUG("%p: Rx Queue %p still referenced", (void *)priv,
+		      (void *)rxq_ctrl);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index f3a2f41..57935cb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -147,6 +147,8 @@ struct mlx5_rxq_ibv {
 
 /* RX queue control descriptor. */
 struct mlx5_rxq_ctrl {
+	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
 	struct priv *priv; /* Back pointer to private data. */
 	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
 	struct mlx5_rxq_data rxq; /* Data path structure. */
@@ -335,6 +337,14 @@ struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_get(struct priv *, uint16_t);
 int mlx5_priv_rxq_ibv_release(struct priv *, struct mlx5_rxq_ibv *);
 int mlx5_priv_rxq_ibv_releasable(struct priv *, struct mlx5_rxq_ibv *);
 int mlx5_priv_rxq_ibv_verify(struct priv *);
+struct mlx5_rxq_ctrl *mlx5_priv_rxq_new(struct priv *, uint16_t,
+					uint16_t, unsigned int,
+					struct rte_mempool *);
+struct mlx5_rxq_ctrl *mlx5_priv_rxq_get(struct priv *, uint16_t);
+int mlx5_priv_rxq_release(struct priv *, uint16_t);
+int mlx5_priv_rxq_releasable(struct priv *, uint16_t);
+int mlx5_priv_rxq_verify(struct priv *);
+int rxq_alloc_elts(struct mlx5_rxq_ctrl *);
 
 /* mlx5_txq.c */
 
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7a12768..a311499 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -79,6 +79,41 @@ priv_txq_start(struct priv *priv)
 	return -ret;
 }
 
+static void
+priv_rxq_stop(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; i != priv->rxqs_n; ++i)
+		mlx5_priv_rxq_release(priv, i);
+}
+
+static int
+priv_rxq_start(struct priv *priv)
+{
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i != priv->rxqs_n; ++i) {
+		struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_priv_rxq_get(priv, i);
+
+		if (!rxq_ctrl)
+			continue;
+		ret = rxq_alloc_elts(rxq_ctrl);
+		if (ret)
+			goto error;
+		rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, i);
+		if (!rxq_ctrl->ibv) {
+			ret = ENOMEM;
+			goto error;
+		}
+	}
+	return -ret;
+error:
+	priv_rxq_stop(priv);
+	return -ret;
+}
+
 /**
  * DPDK callback to start the device.
  *
@@ -101,8 +136,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		return -E_RTE_SECONDARY;
 
 	priv_lock(priv);
-	/* Update Rx/Tx callback. */
-	priv_dev_select_rx_function(priv, dev);
 	DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
 	rte_mempool_walk(mlx5_mp2mr_iter, priv);
 	err = priv_txq_start(priv);
@@ -113,6 +146,14 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	}
 	/* Update send callback. */
 	priv_dev_select_tx_function(priv, dev);
+	err = priv_rxq_start(priv);
+	if (err) {
+		ERROR("%p: RXQ allocation failed: %s",
+		      (void *)dev, strerror(err));
+		goto error;
+	}
+	/* Update receive callback. */
+	priv_dev_select_rx_function(priv, dev);
 	err = priv_create_hash_rxqs(priv);
 	if (!err)
 		err = priv_rehash_flows(priv);
@@ -147,6 +188,7 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv);
+	priv_rxq_stop(priv);
 	priv_txq_stop(priv);
 	priv_unlock(priv);
 	return -err;
@@ -183,6 +225,7 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_flow_stop(priv);
 	priv_rx_intr_vec_disable(priv);
 	priv_txq_stop(priv);
+	priv_rxq_stop(priv);
 	LIST_FOREACH(mr, &priv->mr, next) {
 		priv_mr_release(priv, mr);
 	}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 14/30] net/mlx5: make indirection tables shareable
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (12 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 13/30] net/mlx5: add reference counter on DPDK Rx queues Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  4:08     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 15/30] net/mlx5: add Hash Rx queue object Nelio Laranjeiro
                     ` (15 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Indirection table in verbs side resides in a list of final work queues to
spread the packets according to an higher level queue.  This indirection
table can be shared among the hash Rx queues which points to them.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c       |   3 +
 drivers/net/mlx5/mlx5.h       |   2 +
 drivers/net/mlx5/mlx5_flow.c  |  83 ++++++++++-------------
 drivers/net/mlx5/mlx5_rxq.c   | 153 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_rxtx.h  |  17 +++++
 drivers/net/mlx5/mlx5_utils.h |   2 +
 6 files changed, 214 insertions(+), 46 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 22fd5e4..929f0df 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -235,6 +235,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
 	priv_socket_uninit(priv);
+	ret = mlx5_priv_ind_table_ibv_verify(priv);
+	if (ret)
+		WARN("%p: some Indirection table still remain", (void *)priv);
 	ret = mlx5_priv_rxq_ibv_verify(priv);
 	if (ret)
 		WARN("%p: some Verbs Rx queue still remain", (void *)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d0ef21a..ab17ce6 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -151,6 +151,8 @@ struct priv {
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
 	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
+	/* Verbs Indirection tables. */
+	LIST_HEAD(ind_tables, mlx5_ind_table_ibv) ind_tbls;
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 362ec91..dc9adeb 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -90,7 +90,7 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 struct rte_flow {
 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
-	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
+	struct mlx5_ind_table_ibv *ind_table; /**< Indirection table. */
 	struct ibv_qp *qp; /**< Verbs queue pair. */
 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	struct ibv_wq *wq; /**< Verbs work queue. */
@@ -98,8 +98,6 @@ struct rte_flow {
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
-	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< List of queues. */
-	uint16_t queues_n; /**< Number of queues in the list. */
 };
 
 /** Static initializer for items. */
@@ -1089,9 +1087,6 @@ priv_flow_create_action_queue(struct priv *priv,
 {
 	struct rte_flow *rte_flow;
 	unsigned int i;
-	unsigned int j;
-	const unsigned int wqs_n = 1 << log2above(flow->actions.queues_n);
-	struct ibv_wq *wqs[wqs_n];
 
 	assert(priv->pd);
 	assert(priv->ctx);
@@ -1102,36 +1097,29 @@ priv_flow_create_action_queue(struct priv *priv,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	for (i = 0; i < flow->actions.queues_n; ++i) {
-		struct mlx5_rxq_ibv *rxq_ibv =
-			mlx5_priv_rxq_ibv_get(priv, flow->actions.queues[i]);
-
-		wqs[i] = rxq_ibv->wq;
-		rte_flow->queues[i] = flow->actions.queues[i];
-		++rte_flow->queues_n;
-		(*priv->rxqs)[flow->actions.queues[i]]->mark |=
-			flow->actions.mark;
-	}
-	/* finalise indirection table. */
-	for (j = 0; i < wqs_n; ++i, ++j) {
-		wqs[i] = wqs[j];
-		if (j == flow->actions.queues_n)
-			j = 0;
+	for (i = 0; i != flow->actions.queues_n; ++i) {
+		struct mlx5_rxq_data *q =
+			(*priv->rxqs)[flow->actions.queues[i]];
+
+		q->mark |= flow->actions.mark;
 	}
 	rte_flow->mark = flow->actions.mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
 	rte_flow->hash_fields = flow->hash_fields;
-	rte_flow->ind_table = ibv_create_rwq_ind_table(
-		priv->ctx,
-		&(struct ibv_rwq_ind_table_init_attr){
-			.log_ind_tbl_size = log2above(flow->actions.queues_n),
-			.ind_tbl = wqs,
-			.comp_mask = 0,
-		});
+	rte_flow->ind_table =
+		mlx5_priv_ind_table_ibv_get(priv, flow->actions.queues,
+					    flow->actions.queues_n);
 	if (!rte_flow->ind_table) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate indirection table");
-		goto error;
+		rte_flow->ind_table =
+			mlx5_priv_ind_table_ibv_new(priv, flow->actions.queues,
+						    flow->actions.queues_n);
+		if (!rte_flow->ind_table) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL,
+					   "cannot allocate indirection table");
+			goto error;
+		}
 	}
 	rte_flow->qp = ibv_create_qp_ex(
 		priv->ctx,
@@ -1148,7 +1136,7 @@ priv_flow_create_action_queue(struct priv *priv,
 				.rx_hash_key = rss_hash_default_key,
 				.rx_hash_fields_mask = rte_flow->hash_fields,
 			},
-			.rwq_ind_tbl = rte_flow->ind_table,
+			.rwq_ind_tbl = rte_flow->ind_table->ind_table,
 			.pd = priv->pd
 		});
 	if (!rte_flow->qp) {
@@ -1171,7 +1159,7 @@ priv_flow_create_action_queue(struct priv *priv,
 	if (rte_flow->qp)
 		ibv_destroy_qp(rte_flow->qp);
 	if (rte_flow->ind_table)
-		ibv_destroy_rwq_ind_table(rte_flow->ind_table);
+		mlx5_priv_ind_table_ibv_release(priv, rte_flow->ind_table);
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -1297,13 +1285,10 @@ priv_flow_destroy(struct priv *priv,
 		goto free;
 	if (flow->qp)
 		claim_zero(ibv_destroy_qp(flow->qp));
-	if (flow->ind_table)
-		claim_zero(ibv_destroy_rwq_ind_table(flow->ind_table));
-	for (i = 0; i != flow->queues_n; ++i) {
+	for (i = 0; i != flow->ind_table->queues_n; ++i) {
 		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[flow->queues[i]];
-		struct mlx5_rxq_ctrl *rxq_ctrl =
-			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+		struct mlx5_rxq_data *rxq_data =
+			(*priv->rxqs)[flow->ind_table->queues[i]];
 
 		/*
 		 * To remove the mark from the queue, the queue must not be
@@ -1319,14 +1304,17 @@ priv_flow_destroy(struct priv *priv,
 					continue;
 				if (!tmp->mark)
 					continue;
-				for (j = 0; (j != tmp->queues_n) && !mark; j++)
-					if (tmp->queues[j] == flow->queues[i])
+				for (j = 0;
+				     (j != tmp->ind_table->queues_n) && !mark;
+				     j++)
+					if (tmp->ind_table->queues[j] ==
+					    flow->ind_table->queues[i])
 						mark = 1;
 			}
 			rxq_data->mark = mark;
 		}
-		mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
 	}
+	mlx5_priv_ind_table_ibv_release(priv, flow->ind_table);
 free:
 	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
@@ -1518,9 +1506,10 @@ priv_flow_stop(struct priv *priv)
 		flow->ibv_flow = NULL;
 		if (flow->mark) {
 			unsigned int n;
+			struct mlx5_ind_table_ibv *ind_tbl = flow->ind_table;
 
-			for (n = 0; n < flow->queues_n; ++n)
-				(*priv->rxqs)[flow->queues[n]]->mark = 0;
+			for (n = 0; n < ind_tbl->queues_n; ++n)
+				(*priv->rxqs)[ind_tbl->queues[n]]->mark = 0;
 		}
 		DEBUG("Flow %p removed", (void *)flow);
 	}
@@ -1562,8 +1551,10 @@ priv_flow_start(struct priv *priv)
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->queues_n; ++n)
-				(*priv->rxqs)[flow->queues[n]]->mark = 1;
+			for (n = 0; n < flow->ind_table->queues_n; ++n) {
+				uint16_t idx = flow->ind_table->queues[n];
+				(*priv->rxqs)[idx]->mark = 1;
+			}
 		}
 	}
 	return 0;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 87efeed..4a53282 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1622,3 +1622,156 @@ mlx5_priv_rxq_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Create an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   Queues entering in the indirection table.
+ * @param queues_n
+ *   Number of queues in the array.
+ *
+ * @return
+ *   A new indirection table.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
+			    uint16_t queues_n)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+	const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
+		log2above(queues_n) :
+		priv->ind_table_max_size;
+	struct ibv_wq *wq[1 << wq_n];
+	unsigned int i;
+	unsigned int j;
+
+	ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
+			     queues_n * sizeof(uint16_t), 0);
+	if (!ind_tbl)
+		return NULL;
+	for (i = 0; i != queues_n; ++i) {
+		struct mlx5_rxq_ctrl *rxq =
+			mlx5_priv_rxq_get(priv, queues[i]);
+
+		if (!rxq)
+			goto error;
+		wq[i] = rxq->ibv->wq;
+		ind_tbl->queues[i] = queues[i];
+	}
+	ind_tbl->queues_n = queues_n;
+	/* Finalise indirection table. */
+	for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j)
+		wq[i] = wq[j];
+	ind_tbl->ind_table = ibv_create_rwq_ind_table(
+		priv->ctx,
+		&(struct ibv_rwq_ind_table_init_attr){
+			.log_ind_tbl_size = wq_n,
+			.ind_tbl = wq,
+			.comp_mask = 0,
+		});
+	if (!ind_tbl->ind_table)
+		goto error;
+	rte_atomic32_inc(&ind_tbl->refcnt);
+	LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
+	DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+	      (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+	return ind_tbl;
+error:
+	rte_free(ind_tbl);
+	DEBUG("%p cannot create indirection table", (void *)priv);
+	return NULL;
+}
+
+/**
+ * Get an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   Queues entering in the indirection table.
+ * @param queues_n
+ *   Number of queues in the array.
+ *
+ * @return
+ *   An indirection table if found.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
+			    uint16_t queues_n)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+
+	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+		if ((ind_tbl->queues_n == queues_n) &&
+		    (memcmp(ind_tbl->queues, queues,
+			    ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
+		     == 0))
+			break;
+	}
+	if (ind_tbl) {
+		unsigned int i;
+
+		rte_atomic32_inc(&ind_tbl->refcnt);
+		DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+		      (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+		for (i = 0; i != ind_tbl->queues_n; ++i)
+			mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
+	}
+	return ind_tbl;
+}
+
+/**
+ * Release an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param ind_table
+ *   Indirection table to release.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_ind_table_ibv_release(struct priv *priv,
+				struct mlx5_ind_table_ibv *ind_tbl)
+{
+	unsigned int i;
+
+	DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+	      (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+	if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
+		claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
+	for (i = 0; i != ind_tbl->queues_n; ++i)
+		claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
+	if (!rte_atomic32_read(&ind_tbl->refcnt)) {
+		LIST_REMOVE(ind_tbl, next);
+		rte_free(ind_tbl);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_ind_table_ibv_verify(struct priv *priv)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+	int ret = 0;
+
+	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+		DEBUG("%p: Verbs indirection table %p still referenced",
+		      (void *)priv, (void *)ind_tbl);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 57935cb..1b6dc97 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -156,6 +156,15 @@ struct mlx5_rxq_ctrl {
 	unsigned int irq:1; /* Whether IRQ is enabled. */
 };
 
+/* Indirection table. */
+struct mlx5_ind_table_ibv {
+	LIST_ENTRY(mlx5_ind_table_ibv) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
+	uint16_t queues_n; /**< Number of queues in the list. */
+	uint16_t queues[]; /**< Queue list. */
+};
+
 /* Hash RX queue types. */
 enum hash_rxq_type {
 	HASH_RXQ_TCPV4,
@@ -345,6 +354,14 @@ int mlx5_priv_rxq_release(struct priv *, uint16_t);
 int mlx5_priv_rxq_releasable(struct priv *, uint16_t);
 int mlx5_priv_rxq_verify(struct priv *);
 int rxq_alloc_elts(struct mlx5_rxq_ctrl *);
+struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_new(struct priv *,
+						       uint16_t [],
+						       uint16_t);
+struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_get(struct priv *,
+						       uint16_t [],
+						       uint16_t);
+int mlx5_priv_ind_table_ibv_release(struct priv *, struct mlx5_ind_table_ibv *);
+int mlx5_priv_ind_table_ibv_verify(struct priv *);
 
 /* mlx5_txq.c */
 
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index a824787..218ae83 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -128,11 +128,13 @@ pmd_drv_log_basename(const char *s)
 
 #define DEBUG(...) PMD_DRV_LOG(DEBUG, __VA_ARGS__)
 #define claim_zero(...) assert((__VA_ARGS__) == 0)
+#define claim_nonzero(...) assert((__VA_ARGS__) != 0)
 
 #else /* NDEBUG */
 
 #define DEBUG(...) (void)0
 #define claim_zero(...) (__VA_ARGS__)
+#define claim_nonzero(...) (__VA_ARGS__)
 
 #endif /* NDEBUG */
 
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 15/30] net/mlx5: add Hash Rx queue object
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (13 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 14/30] net/mlx5: make indirection tables shareable Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  4:59     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 16/30] net/mlx5: fix clang compilation error Nelio Laranjeiro
                     ` (14 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Hash Rx queue is an high level queue providing the RSS hash algorithm, key
and indirection table to spread the packets.  Those objects can be easily
shared between several Verbs flows.  This commit bring this capability to
the PMD.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c      |   3 +
 drivers/net/mlx5/mlx5.h      |   3 +-
 drivers/net/mlx5/mlx5_flow.c | 228 ++++++++++++++++++++++++-------------------
 drivers/net/mlx5/mlx5_rxq.c  | 165 +++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_rxtx.h |  17 ++++
 5 files changed, 312 insertions(+), 104 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 929f0df..2860480 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -235,6 +235,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
 	priv_socket_uninit(priv);
+	ret = mlx5_priv_hrxq_ibv_verify(priv);
+	if (ret)
+		WARN("%p: some Hash Rx queue still remain", (void *)priv);
 	ret = mlx5_priv_ind_table_ibv_verify(priv);
 	if (ret)
 		WARN("%p: some Indirection table still remain", (void *)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ab17ce6..77413c9 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -144,11 +144,12 @@ struct priv {
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
-	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
+	struct mlx5_hrxq_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
+	LIST_HEAD(hrxq, mlx5_hrxq) hrxqs; /* Verbs Hash Rx queues. */
 	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
 	/* Verbs Indirection tables. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index dc9adeb..4948882 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -87,17 +87,37 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 		       const void *default_mask,
 		       void *data);
 
-struct rte_flow {
-	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
-	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
-	struct mlx5_ind_table_ibv *ind_table; /**< Indirection table. */
+/** Structure for Drop queue. */
+struct mlx5_hrxq_drop {
+	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
 	struct ibv_qp *qp; /**< Verbs queue pair. */
-	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	struct ibv_wq *wq; /**< Verbs work queue. */
 	struct ibv_cq *cq; /**< Verbs completion queue. */
+};
+
+/* Flows structures. */
+struct mlx5_flow {
+	uint64_t hash_fields; /**< Fields that participate in the hash. */
+	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
+};
+
+/* Drop flows structures. */
+struct mlx5_flow_drop {
+	struct mlx5_hrxq_drop hrxq; /**< Drop hash Rx queue. */
+};
+
+struct rte_flow {
+	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
-	uint64_t hash_fields; /**< Fields that participate in the hash. */
+	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct ibv_flow *ibv_flow; /**< Verbs flow. */
+	uint16_t queues_n; /**< Number of entries in queue[]. */
+	uint16_t (*queues)[]; /**< Queues indexes to use. */
+	union {
+		struct mlx5_flow frxq; /**< Flow with Rx queue. */
+		struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
+	};
 };
 
 /** Static initializer for items. */
@@ -288,14 +308,6 @@ struct mlx5_flow_parse {
 	struct mlx5_flow_action actions; /**< Parsed action result. */
 };
 
-/** Structure for Drop queue. */
-struct rte_flow_drop {
-	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
-	struct ibv_qp *qp; /**< Verbs queue pair. */
-	struct ibv_wq *wq; /**< Verbs work queue. */
-	struct ibv_cq *cq; /**< Verbs completion queue. */
-};
-
 static const struct rte_flow_ops mlx5_flow_ops = {
 	.validate = mlx5_flow_validate,
 	.create = mlx5_flow_create,
@@ -1052,8 +1064,8 @@ priv_flow_create_action_queue_drop(struct priv *priv,
 	rte_flow->ibv_attr = flow->ibv_attr;
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
-	rte_flow->qp = priv->flow_drop_queue->qp;
-	rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
+	rte_flow->drxq.hrxq.qp = priv->flow_drop_queue->qp;
+	rte_flow->ibv_flow = ibv_create_flow(rte_flow->drxq.hrxq.qp,
 					     rte_flow->ibv_attr);
 	if (!rte_flow->ibv_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
@@ -1091,62 +1103,52 @@ priv_flow_create_action_queue(struct priv *priv,
 	assert(priv->pd);
 	assert(priv->ctx);
 	assert(!flow->actions.drop);
-	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
+	rte_flow =
+		rte_calloc(__func__, 1,
+			   sizeof(*flow) +
+			   flow->actions.queues_n * sizeof(uint16_t),
+			   0);
 	if (!rte_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	for (i = 0; i != flow->actions.queues_n; ++i) {
-		struct mlx5_rxq_data *q =
-			(*priv->rxqs)[flow->actions.queues[i]];
-
-		q->mark |= flow->actions.mark;
-	}
 	rte_flow->mark = flow->actions.mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
-	rte_flow->hash_fields = flow->hash_fields;
-	rte_flow->ind_table =
-		mlx5_priv_ind_table_ibv_get(priv, flow->actions.queues,
-					    flow->actions.queues_n);
-	if (!rte_flow->ind_table) {
-		rte_flow->ind_table =
-			mlx5_priv_ind_table_ibv_new(priv, flow->actions.queues,
-						    flow->actions.queues_n);
-		if (!rte_flow->ind_table) {
-			rte_flow_error_set(error, ENOMEM,
-					   RTE_FLOW_ERROR_TYPE_HANDLE,
-					   NULL,
-					   "cannot allocate indirection table");
-			goto error;
-		}
+	rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
+	memcpy(rte_flow->queues, flow->actions.queues,
+	       flow->actions.queues_n * sizeof(uint16_t));
+	rte_flow->queues_n = flow->actions.queues_n;
+	rte_flow->frxq.hash_fields = flow->hash_fields;
+	rte_flow->frxq.hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
+						 rss_hash_default_key_len,
+						 flow->hash_fields,
+						 (*rte_flow->queues),
+						 rte_flow->queues_n);
+	if (rte_flow->frxq.hrxq) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "duplicated flow");
+		goto error;
 	}
-	rte_flow->qp = ibv_create_qp_ex(
-		priv->ctx,
-		&(struct ibv_qp_init_attr_ex){
-			.qp_type = IBV_QPT_RAW_PACKET,
-			.comp_mask =
-				IBV_QP_INIT_ATTR_PD |
-				IBV_QP_INIT_ATTR_IND_TABLE |
-				IBV_QP_INIT_ATTR_RX_HASH,
-			.rx_hash_conf = (struct ibv_rx_hash_conf){
-				.rx_hash_function =
-					IBV_RX_HASH_FUNC_TOEPLITZ,
-				.rx_hash_key_len = rss_hash_default_key_len,
-				.rx_hash_key = rss_hash_default_key,
-				.rx_hash_fields_mask = rte_flow->hash_fields,
-			},
-			.rwq_ind_tbl = rte_flow->ind_table->ind_table,
-			.pd = priv->pd
-		});
-	if (!rte_flow->qp) {
+	rte_flow->frxq.hrxq = mlx5_priv_hrxq_new(priv, rss_hash_default_key,
+						 rss_hash_default_key_len,
+						 flow->hash_fields,
+						 (*rte_flow->queues),
+						 rte_flow->queues_n);
+	if (!rte_flow->frxq.hrxq) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate QP");
+				   NULL, "cannot create hash rxq");
 		goto error;
 	}
+	for (i = 0; i != flow->actions.queues_n; ++i) {
+		struct mlx5_rxq_data *q =
+			(*priv->rxqs)[flow->actions.queues[i]];
+
+		q->mark |= flow->actions.mark;
+	}
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
-	rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
+	rte_flow->ibv_flow = ibv_create_flow(rte_flow->frxq.hrxq->qp,
 					     rte_flow->ibv_attr);
 	if (!rte_flow->ibv_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
@@ -1156,10 +1158,8 @@ priv_flow_create_action_queue(struct priv *priv,
 	return rte_flow;
 error:
 	assert(rte_flow);
-	if (rte_flow->qp)
-		ibv_destroy_qp(rte_flow->qp);
-	if (rte_flow->ind_table)
-		mlx5_priv_ind_table_ibv_release(priv, rte_flow->ind_table);
+	if (rte_flow->frxq.hrxq)
+		mlx5_priv_hrxq_release(priv, rte_flow->frxq.hrxq);
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -1277,45 +1277,43 @@ priv_flow_destroy(struct priv *priv,
 		  struct rte_flow *flow)
 {
 	unsigned int i;
+	uint16_t *queues;
+	uint16_t queues_n;
 
-	TAILQ_REMOVE(&priv->flows, flow, next);
-	if (flow->ibv_flow)
-		claim_zero(ibv_destroy_flow(flow->ibv_flow));
-	if (flow->drop)
+	if (flow->drop || !flow->mark)
 		goto free;
-	if (flow->qp)
-		claim_zero(ibv_destroy_qp(flow->qp));
-	for (i = 0; i != flow->ind_table->queues_n; ++i) {
+	queues = flow->frxq.hrxq->ind_table->queues;
+	queues_n = flow->frxq.hrxq->ind_table->queues_n;
+	for (i = 0; i != queues_n; ++i) {
 		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq_data =
-			(*priv->rxqs)[flow->ind_table->queues[i]];
+		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[queues[i]];
+		int mark = 0;
 
 		/*
 		 * To remove the mark from the queue, the queue must not be
 		 * present in any other marked flow (RSS or not).
 		 */
-		if (flow->mark) {
-			int mark = 0;
-
-			TAILQ_FOREACH(tmp, &priv->flows, next) {
-				unsigned int j;
-
-				if (tmp->drop)
-					continue;
-				if (!tmp->mark)
-					continue;
-				for (j = 0;
-				     (j != tmp->ind_table->queues_n) && !mark;
-				     j++)
-					if (tmp->ind_table->queues[j] ==
-					    flow->ind_table->queues[i])
-						mark = 1;
-			}
-			rxq_data->mark = mark;
+		TAILQ_FOREACH(tmp, &priv->flows, next) {
+			unsigned int j;
+
+			if (!tmp->mark)
+				continue;
+			for (j = 0;
+			     (j != tmp->frxq.hrxq->ind_table->queues_n) &&
+			     !mark;
+			     j++)
+				if (tmp->frxq.hrxq->ind_table->queues[j] ==
+				    queues[i])
+					mark = 1;
 		}
+		rxq_data->mark = mark;
 	}
-	mlx5_priv_ind_table_ibv_release(priv, flow->ind_table);
 free:
+	if (flow->ibv_flow)
+		claim_zero(ibv_destroy_flow(flow->ibv_flow));
+	if (!flow->drop)
+		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
+	TAILQ_REMOVE(&priv->flows, flow, next);
 	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
 	rte_free(flow);
@@ -1389,7 +1387,7 @@ mlx5_flow_flush(struct rte_eth_dev *dev,
 static int
 priv_flow_create_drop_queue(struct priv *priv)
 {
-	struct rte_flow_drop *fdq = NULL;
+	struct mlx5_hrxq_drop *fdq = NULL;
 
 	assert(priv->pd);
 	assert(priv->ctx);
@@ -1472,7 +1470,7 @@ priv_flow_create_drop_queue(struct priv *priv)
 static void
 priv_flow_delete_drop_queue(struct priv *priv)
 {
-	struct rte_flow_drop *fdq = priv->flow_drop_queue;
+	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
 
 	if (!fdq)
 		return;
@@ -1504,9 +1502,12 @@ priv_flow_stop(struct priv *priv)
 	TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
 		claim_zero(ibv_destroy_flow(flow->ibv_flow));
 		flow->ibv_flow = NULL;
+		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
+		flow->frxq.hrxq = NULL;
 		if (flow->mark) {
 			unsigned int n;
-			struct mlx5_ind_table_ibv *ind_tbl = flow->ind_table;
+			struct mlx5_ind_table_ibv *ind_tbl =
+				flow->frxq.hrxq->ind_table;
 
 			for (n = 0; n < ind_tbl->queues_n; ++n)
 				(*priv->rxqs)[ind_tbl->queues[n]]->mark = 0;
@@ -1535,13 +1536,31 @@ priv_flow_start(struct priv *priv)
 	if (ret)
 		return -1;
 	TAILQ_FOREACH(flow, &priv->flows, next) {
-		struct ibv_qp *qp;
-
-		if (flow->drop)
-			qp = priv->flow_drop_queue->qp;
-		else
-			qp = flow->qp;
-		flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
+		if (flow->frxq.hrxq)
+			goto flow_create;
+		flow->frxq.hrxq =
+			mlx5_priv_hrxq_get(priv, rss_hash_default_key,
+					   rss_hash_default_key_len,
+					   flow->frxq.hash_fields,
+					   (*flow->queues),
+					   flow->queues_n);
+		if (flow->frxq.hrxq)
+			goto flow_create;
+		flow->frxq.hrxq =
+			mlx5_priv_hrxq_new(priv, rss_hash_default_key,
+					   rss_hash_default_key_len,
+					   flow->frxq.hash_fields,
+					   (*flow->queues),
+					   flow->queues_n);
+		if (!flow->frxq.hrxq) {
+			DEBUG("Flow %p cannot be applied",
+			      (void *)flow);
+			rte_errno = EINVAL;
+			return rte_errno;
+		}
+flow_create:
+		flow->ibv_flow = ibv_create_flow(flow->frxq.hrxq->qp,
+						 flow->ibv_attr);
 		if (!flow->ibv_flow) {
 			DEBUG("Flow %p cannot be applied", (void *)flow);
 			rte_errno = EINVAL;
@@ -1551,8 +1570,11 @@ priv_flow_start(struct priv *priv)
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->ind_table->queues_n; ++n) {
-				uint16_t idx = flow->ind_table->queues[n];
+			for (n = 0;
+			     n < flow->frxq.hrxq->ind_table->queues_n;
+			     ++n) {
+				uint16_t idx =
+					flow->frxq.hrxq->ind_table->queues[n];
 				(*priv->rxqs)[idx]->mark = 1;
 			}
 		}
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 4a53282..b240c16 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1775,3 +1775,168 @@ mlx5_priv_ind_table_ibv_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Create an Rx Hash queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rss_key
+ *   RSS key for the Rx hash queue.
+ * @param rss_key_len
+ *   RSS key length.
+ * @param hash_fields
+ *   Verbs protocol hash field to make the RSS on.
+ * @param queues
+ *   Queues entering in hash queue.
+ * @param queues_n
+ *   Number of queues.
+ *
+ * @return
+ *   An hash Rx queue on success.
+ */
+struct mlx5_hrxq*
+mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
+		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+{
+	struct mlx5_hrxq *hrxq;
+	struct mlx5_ind_table_ibv *ind_tbl;
+	struct ibv_qp *qp;
+
+	ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
+	if (!ind_tbl)
+		ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
+	if (!ind_tbl)
+		return NULL;
+	qp = ibv_create_qp_ex(
+		priv->ctx,
+		&(struct ibv_qp_init_attr_ex){
+			.qp_type = IBV_QPT_RAW_PACKET,
+			.comp_mask =
+				IBV_QP_INIT_ATTR_PD |
+				IBV_QP_INIT_ATTR_IND_TABLE |
+				IBV_QP_INIT_ATTR_RX_HASH,
+			.rx_hash_conf = (struct ibv_rx_hash_conf){
+				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
+				.rx_hash_key_len = rss_key_len,
+				.rx_hash_key = rss_key,
+				.rx_hash_fields_mask = hash_fields,
+			},
+			.rwq_ind_tbl = ind_tbl->ind_table,
+			.pd = priv->pd,
+		});
+	if (!qp)
+		goto error;
+	hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
+	if (!hrxq)
+		goto error;
+	hrxq->ind_table = ind_tbl;
+	hrxq->qp = qp;
+	hrxq->rss_key_len = rss_key_len;
+	hrxq->hash_fields = hash_fields;
+	memcpy(hrxq->rss_key, rss_key, rss_key_len);
+	rte_atomic32_inc(&hrxq->refcnt);
+	LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
+	DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+	return hrxq;
+error:
+	mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
+	if (qp)
+		claim_zero(ibv_destroy_qp(qp));
+	return NULL;
+}
+
+/**
+ * Get an Rx Hash queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rss_conf
+ *   RSS configuration for the Rx hash queue.
+ * @param queues
+ *   Queues entering in hash queue.
+ * @param queues_n
+ *   Number of queues.
+ *
+ * @return
+ *   An hash Rx queue on success.
+ */
+struct mlx5_hrxq*
+mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
+		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+{
+	struct mlx5_hrxq *hrxq;
+
+	LIST_FOREACH(hrxq, &priv->hrxqs, next) {
+		struct mlx5_ind_table_ibv *ind_tbl;
+
+		if (hrxq->rss_key_len != rss_key_len)
+			continue;
+		if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
+			continue;
+		if (hrxq->hash_fields != hash_fields)
+			continue;
+		ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
+		if (!ind_tbl)
+			continue;
+		if (ind_tbl != hrxq->ind_table) {
+			mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
+			continue;
+		}
+		rte_atomic32_inc(&hrxq->refcnt);
+		DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+		      (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+		return hrxq;
+	}
+	return NULL;
+}
+
+/**
+ * Release the hash Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param hrxq
+ *   Pointer to Hash Rx queue to release.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq)
+{
+	DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+	if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
+		claim_zero(ibv_destroy_qp(hrxq->qp));
+		mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table);
+		LIST_REMOVE(hrxq, next);
+		rte_free(hrxq);
+		return 0;
+	}
+	claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table));
+	return EBUSY;
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_hrxq_ibv_verify(struct priv *priv)
+{
+	struct mlx5_hrxq *hrxq;
+	int ret = 0;
+
+	LIST_FOREACH(hrxq, &priv->hrxqs, next) {
+		DEBUG("%p: Verbs Hash Rx queue %p still referenced",
+		      (void *)priv, (void *)hrxq);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 1b6dc97..30ce810 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -165,6 +165,17 @@ struct mlx5_ind_table_ibv {
 	uint16_t queues[]; /**< Queue list. */
 };
 
+/* Hash Rx queue. */
+struct mlx5_hrxq {
+	LIST_ENTRY(mlx5_hrxq) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct mlx5_ind_table_ibv *ind_table; /* Indirection table. */
+	struct ibv_qp *qp; /* Verbs queue pair. */
+	uint64_t hash_fields; /* Verbs Hash fields. */
+	uint8_t rss_key_len; /* Hash key length in bytes. */
+	uint8_t rss_key[]; /* Hash key. */
+};
+
 /* Hash RX queue types. */
 enum hash_rxq_type {
 	HASH_RXQ_TCPV4,
@@ -362,6 +373,12 @@ struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_get(struct priv *,
 						       uint16_t);
 int mlx5_priv_ind_table_ibv_release(struct priv *, struct mlx5_ind_table_ibv *);
 int mlx5_priv_ind_table_ibv_verify(struct priv *);
+struct mlx5_hrxq *mlx5_priv_hrxq_new(struct priv *, uint8_t *, uint8_t,
+				     uint64_t, uint16_t [], uint16_t);
+struct mlx5_hrxq *mlx5_priv_hrxq_get(struct priv *, uint8_t *, uint8_t,
+				     uint64_t, uint16_t [], uint16_t);
+int mlx5_priv_hrxq_release(struct priv *, struct mlx5_hrxq *);
+int mlx5_priv_hrxq_ibv_verify(struct priv *);
 
 /* mlx5_txq.c */
 
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 15/30] net/mlx5: add Hash Rx queue object
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 15/30] net/mlx5: add Hash Rx queue object Nelio Laranjeiro
@ 2017-10-06  4:59     ` Yongseok Koh
  2017-10-06  7:03       ` Nélio Laranjeiro
  0 siblings, 1 reply; 129+ messages in thread
From: Yongseok Koh @ 2017-10-06  4:59 UTC (permalink / raw)
  To: Nelio Laranjeiro; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Thu, Oct 05, 2017 at 02:49:47PM +0200, Nelio Laranjeiro wrote:
[...]
> +struct mlx5_hrxq*
> +mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
> +		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
> +{
> +	struct mlx5_hrxq *hrxq;
> +
> +	LIST_FOREACH(hrxq, &priv->hrxqs, next) {
> +		struct mlx5_ind_table_ibv *ind_tbl;
> +
> +		if (hrxq->rss_key_len != rss_key_len)
> +			continue;
> +		if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
> +			continue;
> +		if (hrxq->hash_fields != hash_fields)
> +			continue;
> +		ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
> +		if (!ind_tbl)
> +			continue;
> +		if (ind_tbl != hrxq->ind_table) {
> +			mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
As one hrxq can have only one ind_tbl, it looks unnecessary to increment refcnt
of ind_tbl. As long as a hrxq exist, its ind_tbl can't be destroyed. So, it's
safe. How about moving up this _release() outside of this if-clause and remove
_release() in _hrxq_release()?
However, it is logically flawless, so
Acked-by: Yongseok Koh <yskoh@mellanox.com>
 
Thanks
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 15/30] net/mlx5: add Hash Rx queue object
  2017-10-06  4:59     ` Yongseok Koh
@ 2017-10-06  7:03       ` Nélio Laranjeiro
  2017-10-06 22:50         ` Yongseok Koh
  0 siblings, 1 reply; 129+ messages in thread
From: Nélio Laranjeiro @ 2017-10-06  7:03 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Thu, Oct 05, 2017 at 09:59:58PM -0700, Yongseok Koh wrote:
> On Thu, Oct 05, 2017 at 02:49:47PM +0200, Nelio Laranjeiro wrote:
> [...]
> > +struct mlx5_hrxq*
> > +mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
> > +		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
> > +{
> > +	struct mlx5_hrxq *hrxq;
> > +
> > +	LIST_FOREACH(hrxq, &priv->hrxqs, next) {
> > +		struct mlx5_ind_table_ibv *ind_tbl;
> > +
> > +		if (hrxq->rss_key_len != rss_key_len)
> > +			continue;
> > +		if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
> > +			continue;
> > +		if (hrxq->hash_fields != hash_fields)
> > +			continue;
> > +		ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
> > +		if (!ind_tbl)
> > +			continue;
> > +		if (ind_tbl != hrxq->ind_table) {
> > +			mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
> 
> As one hrxq can have only one ind_tbl, it looks unnecessary to increment refcnt
> of ind_tbl. As long as a hrxq exist, its ind_tbl can't be destroyed. So, it's
> safe. How about moving up this _release() outside of this if-clause and remove
> _release() in _hrxq_release()?
This is right, but in the other side, an indirection table can be used
by several hash rx queues, that is the main reason why they have their
own reference counter.
  +-------+  +-------+
  | Hrxq  |  | Hrxq  |
  | r = 1 |  | r = 1 |
  +-------+  +-------+
      |          |
      v          v
 +-------------------+
 | indirection table |
 | r = 2             |
 +-------------------+
Seems logical to make the Indirection table counter evolve the same way
as the hash rx queue, otherwise a second hash rx queue using this
indirection may release it whereas it is still in use by another hash rx
queue.
> However, it is logically flawless, so
> Acked-by: Yongseok Koh <yskoh@mellanox.com>
Thanks,
-- 
Nélio Laranjeiro
6WIND
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 15/30] net/mlx5: add Hash Rx queue object
  2017-10-06  7:03       ` Nélio Laranjeiro
@ 2017-10-06 22:50         ` Yongseok Koh
  2017-10-09  8:05           ` Nélio Laranjeiro
  0 siblings, 1 reply; 129+ messages in thread
From: Yongseok Koh @ 2017-10-06 22:50 UTC (permalink / raw)
  To: Nélio Laranjeiro; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Fri, Oct 06, 2017 at 09:03:25AM +0200, Nélio Laranjeiro wrote:
> On Thu, Oct 05, 2017 at 09:59:58PM -0700, Yongseok Koh wrote:
> > On Thu, Oct 05, 2017 at 02:49:47PM +0200, Nelio Laranjeiro wrote:
> > [...]
> > > +struct mlx5_hrxq*
> > > +mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
> > > +		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
> > > +{
> > > +	struct mlx5_hrxq *hrxq;
> > > +
> > > +	LIST_FOREACH(hrxq, &priv->hrxqs, next) {
> > > +		struct mlx5_ind_table_ibv *ind_tbl;
> > > +
> > > +		if (hrxq->rss_key_len != rss_key_len)
> > > +			continue;
> > > +		if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
> > > +			continue;
> > > +		if (hrxq->hash_fields != hash_fields)
> > > +			continue;
> > > +		ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
> > > +		if (!ind_tbl)
> > > +			continue;
> > > +		if (ind_tbl != hrxq->ind_table) {
> > > +			mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
> > 
> > As one hrxq can have only one ind_tbl, it looks unnecessary to increment refcnt
> > of ind_tbl. As long as a hrxq exist, its ind_tbl can't be destroyed. So, it's
> > safe. How about moving up this _release() outside of this if-clause and remove
> > _release() in _hrxq_release()?
> 
> This is right, but in the other side, an indirection table can be used
> by several hash rx queues, that is the main reason why they have their
> own reference counter.
> 
> 
>   +-------+  +-------+
>   | Hrxq  |  | Hrxq  |
>   | r = 1 |  | r = 1 |
>   +-------+  +-------+
>       |          |
>       v          v
>  +-------------------+
>  | indirection table |
>  | r = 2             |
>  +-------------------+
> 
> Seems logical to make the Indirection table counter evolve the same way
> as the hash rx queue, otherwise a second hash rx queue using this
> indirection may release it whereas it is still in use by another hash rx
> queue.
Whenever a hash Rx queue is created, it gets to have a ind_tbl either by
mlx5_priv_ind_table_ibv_get() or by mlx5_priv_ind_table_ibv_new(). So, the
refcnt of the ind_tbl is already increased. So, even if other hash RxQ which
have had the ind_tbl releases it, it is safe. That's why I don't think
ind_tbl->refcnt needs to get increased on calling mlx5_priv_hrxq_get(). Makes
sense?
Thanks,
Yongseok
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 15/30] net/mlx5: add Hash Rx queue object
  2017-10-06 22:50         ` Yongseok Koh
@ 2017-10-09  8:05           ` Nélio Laranjeiro
  2017-10-09 13:48             ` Yongseok Koh
  0 siblings, 1 reply; 129+ messages in thread
From: Nélio Laranjeiro @ 2017-10-09  8:05 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Fri, Oct 06, 2017 at 03:50:06PM -0700, Yongseok Koh wrote:
> On Fri, Oct 06, 2017 at 09:03:25AM +0200, Nélio Laranjeiro wrote:
> > On Thu, Oct 05, 2017 at 09:59:58PM -0700, Yongseok Koh wrote:
> > > On Thu, Oct 05, 2017 at 02:49:47PM +0200, Nelio Laranjeiro wrote:
> > > [...]
> > > > +struct mlx5_hrxq*
> > > > +mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
> > > > +		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
> > > > +{
> > > > +	struct mlx5_hrxq *hrxq;
> > > > +
> > > > +	LIST_FOREACH(hrxq, &priv->hrxqs, next) {
> > > > +		struct mlx5_ind_table_ibv *ind_tbl;
> > > > +
> > > > +		if (hrxq->rss_key_len != rss_key_len)
> > > > +			continue;
> > > > +		if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
> > > > +			continue;
> > > > +		if (hrxq->hash_fields != hash_fields)
> > > > +			continue;
> > > > +		ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
> > > > +		if (!ind_tbl)
> > > > +			continue;
> > > > +		if (ind_tbl != hrxq->ind_table) {
> > > > +			mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
> > > 
> > > As one hrxq can have only one ind_tbl, it looks unnecessary to increment refcnt
> > > of ind_tbl. As long as a hrxq exist, its ind_tbl can't be destroyed. So, it's
> > > safe. How about moving up this _release() outside of this if-clause and remove
> > > _release() in _hrxq_release()?
> > 
> > This is right, but in the other side, an indirection table can be used
> > by several hash rx queues, that is the main reason why they have their
> > own reference counter.
> > 
> > 
> >   +-------+  +-------+
> >   | Hrxq  |  | Hrxq  |
> >   | r = 1 |  | r = 1 |
> >   +-------+  +-------+
> >       |          |
> >       v          v
> >  +-------------------+
> >  | indirection table |
> >  | r = 2             |
> >  +-------------------+
> > 
> > Seems logical to make the Indirection table counter evolve the same way
> > as the hash rx queue, otherwise a second hash rx queue using this
> > indirection may release it whereas it is still in use by another hash rx
> > queue.
> 
> Whenever a hash Rx queue is created, it gets to have a ind_tbl either by
> mlx5_priv_ind_table_ibv_get() or by mlx5_priv_ind_table_ibv_new(). So, the
> refcnt of the ind_tbl is already increased. So, even if other hash RxQ which
> have had the ind_tbl releases it, it is safe. That's why I don't think
> ind_tbl->refcnt needs to get increased on calling mlx5_priv_hrxq_get(). Makes
> sense?
It make sense, but in this situation, the whole patches needs to be
modified to follow this design, the current one being, it needs an
object it gets a reference,  it does not need it anymore, it release the
reference.  Which mean a get() in a high level object causes a get() on
underlying ones.  A release on high level objects causes a release() on
underlying ones.  In this case, a flow will handle a reference on all
objects which contains a reference counter and used by it, even the
hidden ones.
Currently it won't hurt as it is a control plane point which already
rely on a lot of system calls.
Can we agree on letting the design as is for this release and maybe
changing it in the next one?
Thanks,
-- 
Nélio Laranjeiro
6WIND
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 15/30] net/mlx5: add Hash Rx queue object
  2017-10-09  8:05           ` Nélio Laranjeiro
@ 2017-10-09 13:48             ` Yongseok Koh
  0 siblings, 0 replies; 129+ messages in thread
From: Yongseok Koh @ 2017-10-09 13:48 UTC (permalink / raw)
  To: Nélio Laranjeiro; +Cc: dev, Adrien Mazarguil, ferruh.yigit
On Oct 9, 2017, at 1:05 AM, Nélio Laranjeiro <nelio.laranjeiro@6wind.com<mailto:nelio.laranjeiro@6wind.com>> wrote:
Can we agree on letting the design as is for this release and maybe
changing it in the next one?
Sure, I totally agree. I didn’t want to stop you. Like I mentioned, as there’s no logical flaw, I acked the patches. Please go with v3.
Thanks
Yongseok
^ permalink raw reply	[flat|nested] 129+ messages in thread 
 
 
 
 
 
- * [dpdk-dev] [PATCH v2 16/30] net/mlx5: fix clang compilation error
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (14 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 15/30] net/mlx5: add Hash Rx queue object Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  5:01     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 17/30] net/mlx5: use flow to enable promiscuous mode Nelio Laranjeiro
                     ` (13 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit, stable
drivers/net/mlx5/mlx5_rxq.c:606:6: error: comparison of constant 4
      with expression of type 'enum hash_rxq_flow_type' is always true
      [-Werror,-Wtautological-constant-out-of-range-compare]
                        i != (int)RTE_DIM((*priv->hash_rxqs)[0].special_flow);
                        ~ ^  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Clang expects to have an index going upto special_flow size which is
defined by MLX5_MAX_SPECIAL_FLOWS and value is 4.  Comparing to an
unrelated enum where index my be lower cause this compilation issue.
Fixes: 36351ea34b92 ("net/mlx: fix build with icc")
Cc: ferruh.yigit@intel.com
Cc: stable@dpdk.org
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_rxq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index b240c16..81e9eb5 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -606,11 +606,9 @@ priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
 int
 priv_rehash_flows(struct priv *priv)
 {
-	enum hash_rxq_flow_type i;
+	size_t i;
 
-	for (i = HASH_RXQ_FLOW_TYPE_PROMISC;
-			i != RTE_DIM((*priv->hash_rxqs)[0].special_flow);
-			++i)
+	for (i = 0; i != RTE_DIM((*priv->hash_rxqs)[0].special_flow); ++i)
 		if (!priv_allow_flow_type(priv, i)) {
 			priv_special_flow_disable(priv, i);
 		} else {
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 16/30] net/mlx5: fix clang compilation error
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 16/30] net/mlx5: fix clang compilation error Nelio Laranjeiro
@ 2017-10-06  5:01     ` Yongseok Koh
  0 siblings, 0 replies; 129+ messages in thread
From: Yongseok Koh @ 2017-10-06  5:01 UTC (permalink / raw)
  To: Nelio Laranjeiro; +Cc: dev, adrien.mazarguil, ferruh.yigit, stable
On Thu, Oct 05, 2017 at 02:49:48PM +0200, Nelio Laranjeiro wrote:
> drivers/net/mlx5/mlx5_rxq.c:606:6: error: comparison of constant 4
>       with expression of type 'enum hash_rxq_flow_type' is always true
>       [-Werror,-Wtautological-constant-out-of-range-compare]
>                         i != (int)RTE_DIM((*priv->hash_rxqs)[0].special_flow);
>                         ~ ^  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> 
> Clang expects to have an index going upto special_flow size which is
> defined by MLX5_MAX_SPECIAL_FLOWS and value is 4.  Comparing to an
> unrelated enum where index my be lower cause this compilation issue.
> 
> Fixes: 36351ea34b92 ("net/mlx: fix build with icc")
> Cc: ferruh.yigit@intel.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> ---
Acked-by: Yongseok Koh <yskoh@mellanox.com>
 
Thanks
^ permalink raw reply	[flat|nested] 129+ messages in thread
 
- * [dpdk-dev] [PATCH v2 17/30] net/mlx5: use flow to enable promiscuous mode
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (15 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 16/30] net/mlx5: fix clang compilation error Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  5:07     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 18/30] net/mlx5: use flow to enable all multi mode Nelio Laranjeiro
                     ` (12 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
RSS hash configuration is currently ignored by the PMD, this commits
removes the RSS feature on promiscuous mode.
This functionality will be added in a later commit.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c         |   3 +-
 drivers/net/mlx5/mlx5.h         |  15 +++--
 drivers/net/mlx5/mlx5_flow.c    | 141 ++++++++++++++++++++++++++++++++++------
 drivers/net/mlx5/mlx5_rxmode.c  |  52 +++++----------
 drivers/net/mlx5/mlx5_rxq.c     |   6 --
 drivers/net/mlx5/mlx5_rxtx.h    |   3 -
 drivers/net/mlx5/mlx5_trigger.c |  19 ++++--
 7 files changed, 166 insertions(+), 73 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 2860480..8c99fa2 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -201,7 +201,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
-
+	priv_flow_flush(priv, &priv->flows);
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
 	dev->tx_pkt_burst = removed_tx_burst;
@@ -884,6 +884,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		priv->dev = eth_dev;
 		eth_dev->dev_ops = &mlx5_dev_ops;
 		TAILQ_INIT(&priv->flows);
+		TAILQ_INIT(&priv->ctrl_flows);
 
 		/* Hint libmlx5 to use PMD allocator for data plane resources */
 		struct mlx5dv_ctx_allocators alctr = {
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 77413c9..2699917 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -39,6 +39,7 @@
 #include <limits.h>
 #include <net/if.h>
 #include <netinet/in.h>
+#include <sys/queue.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -86,6 +87,9 @@ struct mlx5_xstats_ctrl {
 	uint64_t base[MLX5_MAX_XSTATS];
 };
 
+/* Flow list . */
+TAILQ_HEAD(mlx5_flows, rte_flow);
+
 struct priv {
 	struct rte_eth_dev *dev; /* Ethernet device of master process. */
 	struct ibv_context *ctx; /* Verbs context. */
@@ -104,7 +108,6 @@ struct priv {
 	/* Device properties. */
 	uint16_t mtu; /* Configured MTU. */
 	uint8_t port; /* Physical port number. */
-	unsigned int promisc_req:1; /* Promiscuous mode requested. */
 	unsigned int allmulti_req:1; /* All multicast mode requested. */
 	unsigned int hw_csum:1; /* Checksum offload is supported. */
 	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
@@ -145,7 +148,8 @@ struct priv {
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
 	struct mlx5_hrxq_drop *flow_drop_queue; /* Flow drop queue. */
-	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
+	struct mlx5_flows flows; /* RTE Flow rules. */
+	struct mlx5_flows ctrl_flows; /* Control flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
@@ -293,11 +297,14 @@ struct rte_flow *mlx5_flow_create(struct rte_eth_dev *,
 				  struct rte_flow_error *);
 int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
 		      struct rte_flow_error *);
+void priv_flow_flush(struct priv *, struct mlx5_flows *);
 int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
 int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
-int priv_flow_start(struct priv *);
-void priv_flow_stop(struct priv *);
+int priv_flow_start(struct priv *, struct mlx5_flows *);
+void priv_flow_stop(struct priv *, struct mlx5_flows *);
 int priv_flow_verify(struct priv *);
+int mlx5_ctrl_flow(struct rte_eth_dev *, struct rte_flow_item_eth *,
+		   struct rte_flow_item_eth *, unsigned int);
 
 /* mlx5_socket.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 4948882..c8b16d8 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -52,6 +52,9 @@
 #include "mlx5.h"
 #include "mlx5_prm.h"
 
+/* Define minimal priority for control plane flows. */
+#define MLX5_CTRL_FLOW_PRIORITY 4
+
 static int
 mlx5_flow_create_eth(const struct rte_flow_item *item,
 		     const void *default_mask,
@@ -451,7 +454,7 @@ priv_flow_validate(struct priv *priv,
 				   "groups are not supported");
 		return -rte_errno;
 	}
-	if (attr->priority) {
+	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
 		rte_flow_error_set(error, ENOTSUP,
 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
 				   NULL,
@@ -1169,6 +1172,8 @@ priv_flow_create_action_queue(struct priv *priv,
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  * @param[in] attr
  *   Flow rule attributes.
  * @param[in] pattern
@@ -1183,6 +1188,7 @@ priv_flow_create_action_queue(struct priv *priv,
  */
 static struct rte_flow *
 priv_flow_create(struct priv *priv,
+		 struct mlx5_flows *list,
 		 const struct rte_flow_attr *attr,
 		 const struct rte_flow_item items[],
 		 const struct rte_flow_action actions[],
@@ -1232,6 +1238,10 @@ priv_flow_create(struct priv *priv,
 		rte_flow = priv_flow_create_action_queue(priv, &flow, error);
 	if (!rte_flow)
 		goto exit;
+	if (rte_flow) {
+		TAILQ_INSERT_TAIL(list, rte_flow, next);
+		DEBUG("Flow created %p", (void *)rte_flow);
+	}
 	return rte_flow;
 exit:
 	rte_free(flow.ibv_attr);
@@ -1255,11 +1265,8 @@ mlx5_flow_create(struct rte_eth_dev *dev,
 	struct rte_flow *flow;
 
 	priv_lock(priv);
-	flow = priv_flow_create(priv, attr, items, actions, error);
-	if (flow) {
-		TAILQ_INSERT_TAIL(&priv->flows, flow, next);
-		DEBUG("Flow created %p", (void *)flow);
-	}
+	flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
+				error);
 	priv_unlock(priv);
 	return flow;
 }
@@ -1269,11 +1276,14 @@ mlx5_flow_create(struct rte_eth_dev *dev,
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  * @param[in] flow
  *   Flow to destroy.
  */
 static void
 priv_flow_destroy(struct priv *priv,
+		  struct mlx5_flows *list,
 		  struct rte_flow *flow)
 {
 	unsigned int i;
@@ -1293,7 +1303,7 @@ priv_flow_destroy(struct priv *priv,
 		 * To remove the mark from the queue, the queue must not be
 		 * present in any other marked flow (RSS or not).
 		 */
-		TAILQ_FOREACH(tmp, &priv->flows, next) {
+		TAILQ_FOREACH(tmp, list, next) {
 			unsigned int j;
 
 			if (!tmp->mark)
@@ -1313,7 +1323,7 @@ priv_flow_destroy(struct priv *priv,
 		claim_zero(ibv_destroy_flow(flow->ibv_flow));
 	if (!flow->drop)
 		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
-	TAILQ_REMOVE(&priv->flows, flow, next);
+	TAILQ_REMOVE(list, flow, next);
 	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
 	rte_free(flow);
@@ -1334,7 +1344,7 @@ mlx5_flow_destroy(struct rte_eth_dev *dev,
 
 	(void)error;
 	priv_lock(priv);
-	priv_flow_destroy(priv, flow);
+	priv_flow_destroy(priv, &priv->flows, flow);
 	priv_unlock(priv);
 	return 0;
 }
@@ -1344,15 +1354,17 @@ mlx5_flow_destroy(struct rte_eth_dev *dev,
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  */
-static void
-priv_flow_flush(struct priv *priv)
+void
+priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
 {
-	while (!TAILQ_EMPTY(&priv->flows)) {
+	while (!TAILQ_EMPTY(list)) {
 		struct rte_flow *flow;
 
-		flow = TAILQ_FIRST(&priv->flows);
-		priv_flow_destroy(priv, flow);
+		flow = TAILQ_FIRST(list);
+		priv_flow_destroy(priv, list, flow);
 	}
 }
 
@@ -1370,7 +1382,7 @@ mlx5_flow_flush(struct rte_eth_dev *dev,
 
 	(void)error;
 	priv_lock(priv);
-	priv_flow_flush(priv);
+	priv_flow_flush(priv, &priv->flows);
 	priv_unlock(priv);
 	return 0;
 }
@@ -1493,13 +1505,15 @@ priv_flow_delete_drop_queue(struct priv *priv)
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  */
 void
-priv_flow_stop(struct priv *priv)
+priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 {
 	struct rte_flow *flow;
 
-	TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
+	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
 		claim_zero(ibv_destroy_flow(flow->ibv_flow));
 		flow->ibv_flow = NULL;
 		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
@@ -1522,12 +1536,14 @@ priv_flow_stop(struct priv *priv)
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  *
  * @return
  *   0 on success, a errno value otherwise and rte_errno is set.
  */
 int
-priv_flow_start(struct priv *priv)
+priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 {
 	int ret;
 	struct rte_flow *flow;
@@ -1535,7 +1551,7 @@ priv_flow_start(struct priv *priv)
 	ret = priv_flow_create_drop_queue(priv);
 	if (ret)
 		return -1;
-	TAILQ_FOREACH(flow, &priv->flows, next) {
+	TAILQ_FOREACH(flow, list, next) {
 		if (flow->frxq.hrxq)
 			goto flow_create;
 		flow->frxq.hrxq =
@@ -1630,3 +1646,90 @@ priv_flow_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Enable/disable a flow control configured from the control plane.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param spec
+ *   An Ethernet flow spec to apply.
+ * @param mask
+ *   An Ethernet flow mask to apply.
+ * @param enable
+ *   Enable/disable the flow.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+mlx5_ctrl_flow(struct rte_eth_dev *dev,
+	       struct rte_flow_item_eth *spec,
+	       struct rte_flow_item_eth *mask,
+	       unsigned int enable)
+{
+	struct priv *priv = dev->data->dev_private;
+	const struct rte_flow_attr attr = {
+		.ingress = 1,
+		.priority = MLX5_CTRL_FLOW_PRIORITY,
+	};
+	struct rte_flow_item items[] = {
+		{
+			.type = RTE_FLOW_ITEM_TYPE_ETH,
+			.spec = spec,
+			.last = NULL,
+			.mask = mask,
+		},
+		{
+			.type = RTE_FLOW_ITEM_TYPE_END,
+		},
+	};
+	struct rte_flow_action actions[] = {
+		{
+			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
+			.conf = &(struct rte_flow_action_queue){
+				.index = 0,
+			},
+		},
+		{
+			.type = RTE_FLOW_ACTION_TYPE_END,
+		},
+	};
+	struct rte_flow *flow;
+	struct rte_flow_error error;
+
+	if (enable) {
+		flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items,
+					actions, &error);
+		if (!flow)
+			return 1;
+	} else {
+		struct spec {
+			struct ibv_flow_attr ibv_attr;
+			struct ibv_flow_spec_eth eth;
+		} spec;
+		struct mlx5_flow_parse parser = {
+			.ibv_attr = &spec.ibv_attr,
+			.offset = sizeof(struct ibv_flow_attr),
+		};
+		struct ibv_flow_spec_eth *eth;
+		const unsigned int attr_size = sizeof(struct ibv_flow_attr);
+
+		claim_zero(mlx5_flow_create_eth(&items[0], NULL, &parser));
+		TAILQ_FOREACH(flow, &priv->ctrl_flows, next) {
+			eth = (void *)((uintptr_t)flow->ibv_attr + attr_size);
+			assert(eth->type == IBV_FLOW_SPEC_ETH);
+			if (!memcmp(eth, &spec.eth, sizeof(*eth)))
+				break;
+		}
+		if (flow) {
+			claim_zero(ibv_destroy_flow(flow->ibv_flow));
+			mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
+			rte_free(flow->ibv_attr);
+			DEBUG("Control flow destroyed %p", (void *)flow);
+			TAILQ_REMOVE(&priv->ctrl_flows, flow, next);
+			rte_free(flow);
+		}
+	}
+	return 0;
+}
diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index e9ea2aa..f469f41 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -53,20 +53,6 @@
 
 /* Initialization data for special flows. */
 static const struct special_flow_init special_flow_init[] = {
-	[HASH_RXQ_FLOW_TYPE_PROMISC] = {
-		.dst_mac_val = "\x00\x00\x00\x00\x00\x00",
-		.dst_mac_mask = "\x00\x00\x00\x00\x00\x00",
-		.hash_types =
-			1 << HASH_RXQ_TCPV4 |
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_TCPV6 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 0,
-	},
 	[HASH_RXQ_FLOW_TYPE_ALLMULTI] = {
 		.dst_mac_val = "\x01\x00\x00\x00\x00\x00",
 		.dst_mac_mask = "\x01\x00\x00\x00\x00\x00",
@@ -346,7 +332,7 @@ priv_special_flow_enable_all(struct priv *priv)
 
 	if (priv->isolated)
 		return 0;
-	for (flow_type = HASH_RXQ_FLOW_TYPE_PROMISC;
+	for (flow_type = HASH_RXQ_FLOW_TYPE_ALLMULTI;
 			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
 			++flow_type) {
 		int ret;
@@ -373,7 +359,7 @@ priv_special_flow_disable_all(struct priv *priv)
 {
 	enum hash_rxq_flow_type flow_type;
 
-	for (flow_type = HASH_RXQ_FLOW_TYPE_PROMISC;
+	for (flow_type = HASH_RXQ_FLOW_TYPE_ALLMULTI;
 			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
 			++flow_type)
 		priv_special_flow_disable(priv, flow_type);
@@ -388,19 +374,16 @@ priv_special_flow_disable_all(struct priv *priv)
 void
 mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
+	struct rte_flow_item_eth eth = {
+		.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		.type = 0,
+	};
 
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->promisc_req = 1;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while enabling promiscuous mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->promiscuous = 1;
+	claim_zero(mlx5_ctrl_flow(dev, ð, ð, 1));
 }
 
 /**
@@ -412,19 +395,16 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 void
 mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
+	struct rte_flow_item_eth eth = {
+		.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		.type = 0,
+	};
 
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->promisc_req = 0;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while disabling promiscuous mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->promiscuous = 0;
+	claim_zero(mlx5_ctrl_flow(dev, ð, ð, 0));
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 81e9eb5..d3d1355 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -571,13 +571,7 @@ priv_destroy_hash_rxqs(struct priv *priv)
 int
 priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
 {
-	/* Only FLOW_TYPE_PROMISC is allowed when promiscuous mode
-	 * has been requested. */
-	if (priv->promisc_req)
-		return type == HASH_RXQ_FLOW_TYPE_PROMISC;
 	switch (type) {
-	case HASH_RXQ_FLOW_TYPE_PROMISC:
-		return !!priv->promisc_req;
 	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
 		return !!priv->allmulti_req;
 	case HASH_RXQ_FLOW_TYPE_BROADCAST:
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 30ce810..a9af1ec 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -237,7 +237,6 @@ struct special_flow_init {
 };
 
 enum hash_rxq_flow_type {
-	HASH_RXQ_FLOW_TYPE_PROMISC,
 	HASH_RXQ_FLOW_TYPE_ALLMULTI,
 	HASH_RXQ_FLOW_TYPE_BROADCAST,
 	HASH_RXQ_FLOW_TYPE_IPV6MULTI,
@@ -249,8 +248,6 @@ static inline const char *
 hash_rxq_flow_type_str(enum hash_rxq_flow_type flow_type)
 {
 	switch (flow_type) {
-	case HASH_RXQ_FLOW_TYPE_PROMISC:
-		return "promiscuous";
 	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
 		return "allmulticast";
 	case HASH_RXQ_FLOW_TYPE_BROADCAST:
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index a311499..085abcc 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -163,7 +163,16 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		      (void *)priv, strerror(err));
 		goto error;
 	}
-	err = priv_flow_start(priv);
+	if (dev->data->promiscuous)
+		mlx5_promiscuous_enable(dev);
+	err = priv_flow_start(priv, &priv->ctrl_flows);
+	if (err) {
+		ERROR("%p: an error occurred while configuring control flows:"
+		      " %s",
+		      (void *)priv, strerror(err));
+		goto error;
+	}
+	err = priv_flow_start(priv, &priv->flows);
 	if (err) {
 		ERROR("%p: an error occurred while configuring flows:"
 		      " %s",
@@ -187,7 +196,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
-	priv_flow_stop(priv);
+	priv_flow_stop(priv, &priv->flows);
+	priv_flow_flush(priv, &priv->ctrl_flows);
 	priv_rxq_stop(priv);
 	priv_txq_stop(priv);
 	priv_unlock(priv);
@@ -222,13 +232,14 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
-	priv_flow_stop(priv);
+	priv_flow_stop(priv, &priv->flows);
+	priv_flow_flush(priv, &priv->ctrl_flows);
 	priv_rx_intr_vec_disable(priv);
+	priv_dev_interrupt_handler_uninstall(priv, dev);
 	priv_txq_stop(priv);
 	priv_rxq_stop(priv);
 	LIST_FOREACH(mr, &priv->mr, next) {
 		priv_mr_release(priv, mr);
 	}
-	priv_dev_interrupt_handler_uninstall(priv, dev);
 	priv_unlock(priv);
 }
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 18/30] net/mlx5: use flow to enable all multi mode
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (16 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 17/30] net/mlx5: use flow to enable promiscuous mode Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  5:10     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 19/30] net/mlx5: use flow to enable unicast traffic Nelio Laranjeiro
                     ` (11 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
RSS hash configuration is currently ignored by the PMD, this commits
removes the RSS feature on promiscuous mode.
This functionality will be added in a later commit.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.h         |  1 -
 drivers/net/mlx5/mlx5_rxmode.c  | 52 ++++++++++++++---------------------------
 drivers/net/mlx5/mlx5_rxq.c     |  7 ++----
 drivers/net/mlx5/mlx5_rxtx.h    |  3 ---
 drivers/net/mlx5/mlx5_trigger.c |  2 ++
 5 files changed, 22 insertions(+), 43 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 2699917..45673b1 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -108,7 +108,6 @@ struct priv {
 	/* Device properties. */
 	uint16_t mtu; /* Configured MTU. */
 	uint8_t port; /* Physical port number. */
-	unsigned int allmulti_req:1; /* All multicast mode requested. */
 	unsigned int hw_csum:1; /* Checksum offload is supported. */
 	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
 	unsigned int hw_vlan_strip:1; /* VLAN stripping is supported. */
diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index f469f41..0c75889 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -53,18 +53,6 @@
 
 /* Initialization data for special flows. */
 static const struct special_flow_init special_flow_init[] = {
-	[HASH_RXQ_FLOW_TYPE_ALLMULTI] = {
-		.dst_mac_val = "\x01\x00\x00\x00\x00\x00",
-		.dst_mac_mask = "\x01\x00\x00\x00\x00\x00",
-		.hash_types =
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 0,
-	},
 	[HASH_RXQ_FLOW_TYPE_BROADCAST] = {
 		.dst_mac_val = "\xff\xff\xff\xff\xff\xff",
 		.dst_mac_mask = "\xff\xff\xff\xff\xff\xff",
@@ -332,7 +320,7 @@ priv_special_flow_enable_all(struct priv *priv)
 
 	if (priv->isolated)
 		return 0;
-	for (flow_type = HASH_RXQ_FLOW_TYPE_ALLMULTI;
+	for (flow_type = HASH_RXQ_FLOW_TYPE_BROADCAST;
 			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
 			++flow_type) {
 		int ret;
@@ -359,7 +347,7 @@ priv_special_flow_disable_all(struct priv *priv)
 {
 	enum hash_rxq_flow_type flow_type;
 
-	for (flow_type = HASH_RXQ_FLOW_TYPE_ALLMULTI;
+	for (flow_type = HASH_RXQ_FLOW_TYPE_BROADCAST;
 			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
 			++flow_type)
 		priv_special_flow_disable(priv, flow_type);
@@ -416,19 +404,17 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
+	struct rte_flow_item_eth eth = {
+		.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+		.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+		.type = 0,
+	};
 
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->allmulti_req = 1;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while enabling allmulticast mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->all_multicast = 1;
+	if (dev->data->dev_started)
+		claim_zero(mlx5_ctrl_flow(dev, ð, ð, 1));
 }
 
 /**
@@ -440,17 +426,15 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_disable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
+	struct rte_flow_item_eth eth = {
+		.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+		.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+		.type = 0,
+	};
 
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->allmulti_req = 0;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while disabling allmulticast mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->all_multicast = 0;
+	if (dev->data->dev_started)
+		claim_zero(mlx5_ctrl_flow(dev, ð, ð, 0));
 }
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index d3d1355..d3cd58e 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -571,16 +571,13 @@ priv_destroy_hash_rxqs(struct priv *priv)
 int
 priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
 {
+	(void)priv;
 	switch (type) {
-	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
-		return !!priv->allmulti_req;
 	case HASH_RXQ_FLOW_TYPE_BROADCAST:
 	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
-		/* If allmulti is enabled, broadcast and ipv6multi
-		 * are unnecessary. */
-		return !priv->allmulti_req;
 	case HASH_RXQ_FLOW_TYPE_MAC:
 		return 1;
+		return 1;
 	default:
 		/* Unsupported flow type is not allowed. */
 		return 0;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index a9af1ec..7107a6b 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -237,7 +237,6 @@ struct special_flow_init {
 };
 
 enum hash_rxq_flow_type {
-	HASH_RXQ_FLOW_TYPE_ALLMULTI,
 	HASH_RXQ_FLOW_TYPE_BROADCAST,
 	HASH_RXQ_FLOW_TYPE_IPV6MULTI,
 	HASH_RXQ_FLOW_TYPE_MAC,
@@ -248,8 +247,6 @@ static inline const char *
 hash_rxq_flow_type_str(enum hash_rxq_flow_type flow_type)
 {
 	switch (flow_type) {
-	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
-		return "allmulticast";
 	case HASH_RXQ_FLOW_TYPE_BROADCAST:
 		return "broadcast";
 	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 085abcc..27e7890 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -165,6 +165,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	}
 	if (dev->data->promiscuous)
 		mlx5_promiscuous_enable(dev);
+	else if (dev->data->all_multicast)
+		mlx5_allmulticast_enable(dev);
 	err = priv_flow_start(priv, &priv->ctrl_flows);
 	if (err) {
 		ERROR("%p: an error occurred while configuring control flows:"
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 19/30] net/mlx5: use flow to enable unicast traffic
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (17 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 18/30] net/mlx5: use flow to enable all multi mode Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  5:18     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 20/30] net/mlx5: handle a single RSS hash key for all protocols Nelio Laranjeiro
                     ` (10 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
RSS hash configuration is currently ignored by the PMD, this commits
removes the RSS feature.
This functionality will be added in a later commit.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c         |  10 +-
 drivers/net/mlx5/mlx5.h         |  29 ++-
 drivers/net/mlx5/mlx5_defs.h    |   3 -
 drivers/net/mlx5/mlx5_flow.c    | 126 ++++++-------
 drivers/net/mlx5/mlx5_mac.c     | 407 +++-------------------------------------
 drivers/net/mlx5/mlx5_rxmode.c  | 336 +--------------------------------
 drivers/net/mlx5/mlx5_rxq.c     |  63 -------
 drivers/net/mlx5/mlx5_rxtx.h    |  26 ---
 drivers/net/mlx5/mlx5_trigger.c | 198 +++++++++++++++++--
 drivers/net/mlx5/mlx5_vlan.c    |  51 ++---
 10 files changed, 297 insertions(+), 952 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 8c99fa2..fd6f0c0 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -198,10 +198,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
 	/* In case mlx5_dev_stop() has not been called. */
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-	priv_special_flow_disable_all(priv);
-	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
-	priv_flow_flush(priv, &priv->flows);
+	priv_dev_traffic_disable(priv, dev);
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
 	dev->tx_pkt_burst = removed_tx_burst;
@@ -843,10 +841,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		     mac.addr_bytes[0], mac.addr_bytes[1],
 		     mac.addr_bytes[2], mac.addr_bytes[3],
 		     mac.addr_bytes[4], mac.addr_bytes[5]);
-		/* Register MAC address. */
-		claim_zero(priv_mac_addr_add(priv, 0,
-					     (const uint8_t (*)[ETHER_ADDR_LEN])
-					     mac.addr_bytes));
 #ifndef NDEBUG
 		{
 			char ifname[IF_NAMESIZE];
@@ -883,6 +877,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		eth_dev->device->driver = &mlx5_driver.driver;
 		priv->dev = eth_dev;
 		eth_dev->dev_ops = &mlx5_dev_ops;
+		/* Register MAC address. */
+		claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 		TAILQ_INIT(&priv->flows);
 		TAILQ_INIT(&priv->ctrl_flows);
 
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 45673b1..e83961f 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -96,13 +96,7 @@ struct priv {
 	struct ibv_device_attr_ex device_attr; /* Device properties. */
 	struct ibv_pd *pd; /* Protection Domain. */
 	char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for secondary */
-	/*
-	 * MAC addresses array and configuration bit-field.
-	 * An extra entry that cannot be modified by the DPDK is reserved
-	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
-	 */
-	struct ether_addr mac[MLX5_MAX_MAC_ADDRESSES];
-	BITFIELD_DECLARE(mac_configured, uint32_t, MLX5_MAX_MAC_ADDRESSES);
+	struct ether_addr mac[MLX5_MAX_MAC_ADDRESSES]; /* MAC addresses. */
 	uint16_t vlan_filter[MLX5_MAX_VLAN_IDS]; /* VLAN filters table. */
 	unsigned int vlan_filter_n; /* Number of configured VLAN filters. */
 	/* Device properties. */
@@ -225,13 +219,7 @@ void priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev);
 /* mlx5_mac.c */
 
 int priv_get_mac(struct priv *, uint8_t (*)[ETHER_ADDR_LEN]);
-void hash_rxq_mac_addrs_del(struct hash_rxq *);
-void priv_mac_addrs_disable(struct priv *);
 void mlx5_mac_addr_remove(struct rte_eth_dev *, uint32_t);
-int hash_rxq_mac_addrs_add(struct hash_rxq *);
-int priv_mac_addr_add(struct priv *, unsigned int,
-		      const uint8_t (*)[ETHER_ADDR_LEN]);
-int priv_mac_addrs_enable(struct priv *);
 int mlx5_mac_addr_add(struct rte_eth_dev *, struct ether_addr *, uint32_t,
 		      uint32_t);
 void mlx5_mac_addr_set(struct rte_eth_dev *, struct ether_addr *);
@@ -250,10 +238,6 @@ int mlx5_dev_rss_reta_update(struct rte_eth_dev *,
 
 /* mlx5_rxmode.c */
 
-int priv_special_flow_enable(struct priv *, enum hash_rxq_flow_type);
-void priv_special_flow_disable(struct priv *, enum hash_rxq_flow_type);
-int priv_special_flow_enable_all(struct priv *);
-void priv_special_flow_disable_all(struct priv *);
 void mlx5_promiscuous_enable(struct rte_eth_dev *);
 void mlx5_promiscuous_disable(struct rte_eth_dev *);
 void mlx5_allmulticast_enable(struct rte_eth_dev *);
@@ -280,6 +264,10 @@ void mlx5_vlan_strip_queue_set(struct rte_eth_dev *, uint16_t, int);
 
 int mlx5_dev_start(struct rte_eth_dev *);
 void mlx5_dev_stop(struct rte_eth_dev *);
+int priv_dev_traffic_enable(struct priv *, struct rte_eth_dev *);
+int priv_dev_traffic_disable(struct priv *, struct rte_eth_dev *);
+int priv_dev_traffic_restart(struct priv *, struct rte_eth_dev *);
+int mlx5_traffic_restart(struct rte_eth_dev *);
 
 /* mlx5_flow.c */
 
@@ -302,8 +290,13 @@ int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
 int priv_flow_start(struct priv *, struct mlx5_flows *);
 void priv_flow_stop(struct priv *, struct mlx5_flows *);
 int priv_flow_verify(struct priv *);
+int mlx5_ctrl_flow_vlan(struct rte_eth_dev *, struct rte_flow_item_eth *,
+			struct rte_flow_item_eth *, struct rte_flow_item_vlan *,
+			struct rte_flow_item_vlan *);
 int mlx5_ctrl_flow(struct rte_eth_dev *, struct rte_flow_item_eth *,
-		   struct rte_flow_item_eth *, unsigned int);
+		   struct rte_flow_item_eth *);
+int priv_flow_create_drop_queue(struct priv *);
+void priv_flow_delete_drop_queue(struct priv *);
 
 /* mlx5_socket.c */
 
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index 59ff00d..3a7706c 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -45,9 +45,6 @@
 /* Maximum number of simultaneous VLAN filters. */
 #define MLX5_MAX_VLAN_IDS 128
 
-/* Maximum number of special flows. */
-#define MLX5_MAX_SPECIAL_FLOWS 4
-
 /*
  * Request TX completion every time descriptors reach this threshold since
  * the previous request. Must be a power of two for performance reasons.
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index c8b16d8..88d02c9 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1128,20 +1128,19 @@ priv_flow_create_action_queue(struct priv *priv,
 						 flow->hash_fields,
 						 (*rte_flow->queues),
 						 rte_flow->queues_n);
-	if (rte_flow->frxq.hrxq) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "duplicated flow");
-		goto error;
-	}
-	rte_flow->frxq.hrxq = mlx5_priv_hrxq_new(priv, rss_hash_default_key,
-						 rss_hash_default_key_len,
-						 flow->hash_fields,
-						 (*rte_flow->queues),
-						 rte_flow->queues_n);
 	if (!rte_flow->frxq.hrxq) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot create hash rxq");
-		goto error;
+		rte_flow->frxq.hrxq =
+			mlx5_priv_hrxq_new(priv, rss_hash_default_key,
+					   rss_hash_default_key_len,
+					   flow->hash_fields,
+					   (*rte_flow->queues),
+					   rte_flow->queues_n);
+		if (!rte_flow->frxq.hrxq) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "cannot create hash rxq");
+			goto error;
+		}
 	}
 	for (i = 0; i != flow->actions.queues_n; ++i) {
 		struct mlx5_rxq_data *q =
@@ -1396,7 +1395,7 @@ mlx5_flow_flush(struct rte_eth_dev *dev,
  * @return
  *   0 on success.
  */
-static int
+int
 priv_flow_create_drop_queue(struct priv *priv)
 {
 	struct mlx5_hrxq_drop *fdq = NULL;
@@ -1479,7 +1478,7 @@ priv_flow_create_drop_queue(struct priv *priv)
  * @param priv
  *   Pointer to private structure.
  */
-static void
+void
 priv_flow_delete_drop_queue(struct priv *priv)
 {
 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
@@ -1501,8 +1500,6 @@ priv_flow_delete_drop_queue(struct priv *priv)
 /**
  * Remove all flows.
  *
- * Called by dev_stop() to remove all flows.
- *
  * @param priv
  *   Pointer to private structure.
  * @param list
@@ -1528,7 +1525,6 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 		}
 		DEBUG("Flow %p removed", (void *)flow);
 	}
-	priv_flow_delete_drop_queue(priv);
 }
 
 /**
@@ -1545,12 +1541,8 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 int
 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 {
-	int ret;
 	struct rte_flow *flow;
 
-	ret = priv_flow_create_drop_queue(priv);
-	if (ret)
-		return -1;
 	TAILQ_FOREACH(flow, list, next) {
 		if (flow->frxq.hrxq)
 			goto flow_create;
@@ -1648,25 +1640,28 @@ priv_flow_verify(struct priv *priv)
 }
 
 /**
- * Enable/disable a flow control configured from the control plane.
+ * Enable a flow control configured from the control plane.
  *
  * @param dev
  *   Pointer to Ethernet device.
- * @param spec
+ * @param eth_spec
  *   An Ethernet flow spec to apply.
- * @param mask
+ * @param eth_mask
  *   An Ethernet flow mask to apply.
- * @param enable
- *   Enable/disable the flow.
+ * @param vlan_spec
+ *   A VLAN flow spec to apply.
+ * @param vlan_mask
+ *   A VLAN flow mask to apply.
  *
  * @return
  *   0 on success.
  */
 int
-mlx5_ctrl_flow(struct rte_eth_dev *dev,
-	       struct rte_flow_item_eth *spec,
-	       struct rte_flow_item_eth *mask,
-	       unsigned int enable)
+mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
+		    struct rte_flow_item_eth *eth_spec,
+		    struct rte_flow_item_eth *eth_mask,
+		    struct rte_flow_item_vlan *vlan_spec,
+		    struct rte_flow_item_vlan *vlan_mask)
 {
 	struct priv *priv = dev->data->dev_private;
 	const struct rte_flow_attr attr = {
@@ -1676,9 +1671,16 @@ mlx5_ctrl_flow(struct rte_eth_dev *dev,
 	struct rte_flow_item items[] = {
 		{
 			.type = RTE_FLOW_ITEM_TYPE_ETH,
-			.spec = spec,
+			.spec = eth_spec,
+			.last = NULL,
+			.mask = eth_mask,
+		},
+		{
+			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
+				RTE_FLOW_ITEM_TYPE_END,
+			.spec = vlan_spec,
 			.last = NULL,
-			.mask = mask,
+			.mask = vlan_mask,
 		},
 		{
 			.type = RTE_FLOW_ITEM_TYPE_END,
@@ -1698,38 +1700,30 @@ mlx5_ctrl_flow(struct rte_eth_dev *dev,
 	struct rte_flow *flow;
 	struct rte_flow_error error;
 
-	if (enable) {
-		flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items,
-					actions, &error);
-		if (!flow)
-			return 1;
-	} else {
-		struct spec {
-			struct ibv_flow_attr ibv_attr;
-			struct ibv_flow_spec_eth eth;
-		} spec;
-		struct mlx5_flow_parse parser = {
-			.ibv_attr = &spec.ibv_attr,
-			.offset = sizeof(struct ibv_flow_attr),
-		};
-		struct ibv_flow_spec_eth *eth;
-		const unsigned int attr_size = sizeof(struct ibv_flow_attr);
-
-		claim_zero(mlx5_flow_create_eth(&items[0], NULL, &parser));
-		TAILQ_FOREACH(flow, &priv->ctrl_flows, next) {
-			eth = (void *)((uintptr_t)flow->ibv_attr + attr_size);
-			assert(eth->type == IBV_FLOW_SPEC_ETH);
-			if (!memcmp(eth, &spec.eth, sizeof(*eth)))
-				break;
-		}
-		if (flow) {
-			claim_zero(ibv_destroy_flow(flow->ibv_flow));
-			mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
-			rte_free(flow->ibv_attr);
-			DEBUG("Control flow destroyed %p", (void *)flow);
-			TAILQ_REMOVE(&priv->ctrl_flows, flow, next);
-			rte_free(flow);
-		}
-	}
+	flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
+				&error);
+	if (!flow)
+		return rte_errno;
 	return 0;
 }
+
+/**
+ * Enable a flow control configured from the control plane.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param eth_spec
+ *   An Ethernet flow spec to apply.
+ * @param eth_mask
+ *   An Ethernet flow mask to apply.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+mlx5_ctrl_flow(struct rte_eth_dev *dev,
+	       struct rte_flow_item_eth *eth_spec,
+	       struct rte_flow_item_eth *eth_mask)
+{
+	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
+}
diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c
index 086af58..d17b991 100644
--- a/drivers/net/mlx5/mlx5_mac.c
+++ b/drivers/net/mlx5/mlx5_mac.c
@@ -83,112 +83,6 @@ priv_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
 }
 
 /**
- * Delete MAC flow steering rule.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index.
- * @param vlan_index
- *   VLAN index to use.
- */
-static void
-hash_rxq_del_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index,
-		      unsigned int vlan_index)
-{
-#ifndef NDEBUG
-	const uint8_t (*mac)[ETHER_ADDR_LEN] =
-		(const uint8_t (*)[ETHER_ADDR_LEN])
-		hash_rxq->priv->mac[mac_index].addr_bytes;
-#endif
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index]));
-	if (hash_rxq->mac_flow[mac_index][vlan_index] == NULL)
-		return;
-	DEBUG("%p: removing MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
-	      " VLAN index %u",
-	      (void *)hash_rxq,
-	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
-	      mac_index,
-	      vlan_index);
-	claim_zero(ibv_destroy_flow(hash_rxq->mac_flow
-				    [mac_index][vlan_index]));
-	hash_rxq->mac_flow[mac_index][vlan_index] = NULL;
-}
-
-/**
- * Unregister a MAC address from a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index.
- */
-static void
-hash_rxq_mac_addr_del(struct hash_rxq *hash_rxq, unsigned int mac_index)
-{
-	unsigned int i;
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow[mac_index])); ++i)
-		hash_rxq_del_mac_flow(hash_rxq, mac_index, i);
-}
-
-/**
- * Unregister all MAC addresses from a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- */
-void
-hash_rxq_mac_addrs_del(struct hash_rxq *hash_rxq)
-{
-	unsigned int i;
-
-	for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow)); ++i)
-		hash_rxq_mac_addr_del(hash_rxq, i);
-}
-
-/**
- * Unregister a MAC address.
- *
- * This is done for each hash RX queue.
- *
- * @param priv
- *   Pointer to private structure.
- * @param mac_index
- *   MAC address index.
- */
-static void
-priv_mac_addr_del(struct priv *priv, unsigned int mac_index)
-{
-	unsigned int i;
-
-	assert(mac_index < RTE_DIM(priv->mac));
-	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
-		return;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i)
-		hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[i], mac_index);
-	BITFIELD_RESET(priv->mac_configured, mac_index);
-}
-
-/**
- * Unregister all MAC addresses from all hash RX queues.
- *
- * @param priv
- *   Pointer to private structure.
- */
-void
-priv_mac_addrs_disable(struct priv *priv)
-{
-	unsigned int i;
-
-	for (i = 0; (i != priv->hash_rxqs_n); ++i)
-		hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[i]);
-}
-
-/**
  * DPDK callback to remove a MAC address.
  *
  * @param dev
@@ -199,262 +93,12 @@ priv_mac_addrs_disable(struct priv *priv)
 void
 mlx5_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
 {
-	struct priv *priv = dev->data->dev_private;
-
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	DEBUG("%p: removing MAC address from index %" PRIu32,
-	      (void *)dev, index);
-	if (index >= RTE_DIM(priv->mac))
-		goto end;
-	priv_mac_addr_del(priv, index);
-end:
-	priv_unlock(priv);
-}
-
-/**
- * Add MAC flow steering rule.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index to register.
- * @param vlan_index
- *   VLAN index to use.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_add_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index,
-		      unsigned int vlan_index)
-{
-	struct ibv_flow *flow;
-	struct priv *priv = hash_rxq->priv;
-	const uint8_t (*mac)[ETHER_ADDR_LEN] =
-			(const uint8_t (*)[ETHER_ADDR_LEN])
-			priv->mac[mac_index].addr_bytes;
-	FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, hash_rxq->type));
-	struct ibv_flow_attr *attr = &data->attr;
-	struct ibv_flow_spec_eth *spec = &data->spec;
-	unsigned int vlan_enabled = !!priv->vlan_filter_n;
-	unsigned int vlan_id = priv->vlan_filter[vlan_index];
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index]));
-	if (hash_rxq->mac_flow[mac_index][vlan_index] != NULL)
-		return 0;
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
-	priv_flow_attr(priv, attr, sizeof(data), hash_rxq->type);
-	/* The first specification must be Ethernet. */
-	assert(spec->type == IBV_FLOW_SPEC_ETH);
-	assert(spec->size == sizeof(*spec));
-	*spec = (struct ibv_flow_spec_eth){
-		.type = IBV_FLOW_SPEC_ETH,
-		.size = sizeof(*spec),
-		.val = {
-			.dst_mac = {
-				(*mac)[0], (*mac)[1], (*mac)[2],
-				(*mac)[3], (*mac)[4], (*mac)[5]
-			},
-			.vlan_tag = (vlan_enabled ?
-				     rte_cpu_to_be_16(vlan_id)
-				     : 0),
-		},
-		.mask = {
-			.dst_mac = "\xff\xff\xff\xff\xff\xff",
-			.vlan_tag = (vlan_enabled ?
-				     rte_cpu_to_be_16(0xfff) :
-				     0),
-		},
-	};
-	DEBUG("%p: adding MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
-	      " VLAN index %u filtering %s, ID %u",
-	      (void *)hash_rxq,
-	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
-	      mac_index,
-	      vlan_index,
-	      (vlan_enabled ? "enabled" : "disabled"),
-	      vlan_id);
-	/* Create related flow. */
-	errno = 0;
-	flow = ibv_create_flow(hash_rxq->qp, attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)hash_rxq, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-	hash_rxq->mac_flow[mac_index][vlan_index] = flow;
-	return 0;
-}
-
-/**
- * Register a MAC address in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index to register.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_mac_addr_add(struct hash_rxq *hash_rxq, unsigned int mac_index)
-{
-	struct priv *priv = hash_rxq->priv;
-	unsigned int i = 0;
-	int ret;
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	assert(RTE_DIM(hash_rxq->mac_flow[mac_index]) ==
-	       RTE_DIM(priv->vlan_filter));
-	/* Add a MAC address for each VLAN filter, or at least once. */
-	do {
-		ret = hash_rxq_add_mac_flow(hash_rxq, mac_index, i);
-		if (ret) {
-			/* Failure, rollback. */
-			while (i != 0)
-				hash_rxq_del_mac_flow(hash_rxq, mac_index,
-						      --i);
-			return ret;
-		}
-	} while (++i < priv->vlan_filter_n);
-	return 0;
-}
-
-/**
- * Register all MAC addresses in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-hash_rxq_mac_addrs_add(struct hash_rxq *hash_rxq)
-{
-	struct priv *priv = hash_rxq->priv;
-	unsigned int i;
-	int ret;
-
-	assert(RTE_DIM(priv->mac) == RTE_DIM(hash_rxq->mac_flow));
-	for (i = 0; (i != RTE_DIM(priv->mac)); ++i) {
-		if (!BITFIELD_ISSET(priv->mac_configured, i))
-			continue;
-		ret = hash_rxq_mac_addr_add(hash_rxq, i);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			hash_rxq_mac_addr_del(hash_rxq, --i);
-		assert(ret > 0);
-		return ret;
-	}
-	return 0;
-}
-
-/**
- * Register a MAC address.
- *
- * This is done for each hash RX queue.
- *
- * @param priv
- *   Pointer to private structure.
- * @param mac_index
- *   MAC address index to use.
- * @param mac
- *   MAC address to register.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_mac_addr_add(struct priv *priv, unsigned int mac_index,
-		  const uint8_t (*mac)[ETHER_ADDR_LEN])
-{
-	unsigned int i;
-	int ret;
-
-	assert(mac_index < RTE_DIM(priv->mac));
-	/* First, make sure this address isn't already configured. */
-	for (i = 0; (i != RTE_DIM(priv->mac)); ++i) {
-		/* Skip this index, it's going to be reconfigured. */
-		if (i == mac_index)
-			continue;
-		if (!BITFIELD_ISSET(priv->mac_configured, i))
-			continue;
-		if (memcmp(priv->mac[i].addr_bytes, *mac, sizeof(*mac)))
-			continue;
-		/* Address already configured elsewhere, return with error. */
-		return EADDRINUSE;
-	}
-	if (BITFIELD_ISSET(priv->mac_configured, mac_index))
-		priv_mac_addr_del(priv, mac_index);
-	priv->mac[mac_index] = (struct ether_addr){
-		{
-			(*mac)[0], (*mac)[1], (*mac)[2],
-			(*mac)[3], (*mac)[4], (*mac)[5]
-		}
-	};
-	if (!priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
-		goto end;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		ret = hash_rxq_mac_addr_add(&(*priv->hash_rxqs)[i], mac_index);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[--i],
-					      mac_index);
-		return ret;
-	}
-end:
-	BITFIELD_SET(priv->mac_configured, mac_index);
-	return 0;
-}
-
-/**
- * Register all MAC addresses in all hash RX queues.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_mac_addrs_enable(struct priv *priv)
-{
-	unsigned int i;
-	int ret;
-
-	if (priv->isolated)
-		return 0;
-	if (!priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
-		return 0;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		ret = hash_rxq_mac_addrs_add(&(*priv->hash_rxqs)[i]);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[--i]);
-		assert(ret > 0);
-		return ret;
-	}
-	return 0;
+	assert(index < MLX5_MAX_MAC_ADDRESSES);
+	memset(&dev->data->mac_addrs[index], 0, sizeof(struct ether_addr));
+	if (!dev->data->promiscuous && !dev->data->all_multicast)
+		mlx5_traffic_restart(dev);
 }
 
 /**
@@ -468,31 +112,35 @@ priv_mac_addrs_enable(struct priv *priv)
  *   MAC address index.
  * @param vmdq
  *   VMDq pool index to associate address with (ignored).
+ *
+ * @return
+ *   0 on success.
  */
 int
-mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
+mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac,
 		  uint32_t index, uint32_t vmdq)
 {
-	struct priv *priv = dev->data->dev_private;
-	int re;
-
-	if (mlx5_is_secondary())
-		return -ENOTSUP;
+	unsigned int i;
+	int ret = 0;
 
 	(void)vmdq;
-	priv_lock(priv);
-	DEBUG("%p: adding MAC address at index %" PRIu32,
-	      (void *)dev, index);
-	if (index >= RTE_DIM(priv->mac)) {
-		re = EINVAL;
-		goto end;
+	if (mlx5_is_secondary())
+		return 0;
+	assert(index < MLX5_MAX_MAC_ADDRESSES);
+	/* First, make sure this address isn't already configured. */
+	for (i = 0; (i != MLX5_MAX_MAC_ADDRESSES); ++i) {
+		/* Skip this index, it's going to be reconfigured. */
+		if (i == index)
+			continue;
+		if (memcmp(&dev->data->mac_addrs[i], mac, sizeof(*mac)))
+			continue;
+		/* Address already configured elsewhere, return with error. */
+		return EADDRINUSE;
 	}
-	re = priv_mac_addr_add(priv, index,
-			       (const uint8_t (*)[ETHER_ADDR_LEN])
-			       mac_addr->addr_bytes);
-end:
-	priv_unlock(priv);
-	return -re;
+	dev->data->mac_addrs[index] = *mac;
+	if (!dev->data->promiscuous && !dev->data->all_multicast)
+		mlx5_traffic_restart(dev);
+	return ret;
 }
 
 /**
@@ -506,7 +154,8 @@ mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
 void
 mlx5_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
 {
+	if (mlx5_is_secondary())
+		return;
 	DEBUG("%p: setting primary MAC address", (void *)dev);
-	mlx5_mac_addr_remove(dev, 0);
 	mlx5_mac_addr_add(dev, mac_addr, 0, 0);
 }
diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index 0c75889..0ef2cdf 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -51,308 +51,6 @@
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 
-/* Initialization data for special flows. */
-static const struct special_flow_init special_flow_init[] = {
-	[HASH_RXQ_FLOW_TYPE_BROADCAST] = {
-		.dst_mac_val = "\xff\xff\xff\xff\xff\xff",
-		.dst_mac_mask = "\xff\xff\xff\xff\xff\xff",
-		.hash_types =
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 1,
-	},
-	[HASH_RXQ_FLOW_TYPE_IPV6MULTI] = {
-		.dst_mac_val = "\x33\x33\x00\x00\x00\x00",
-		.dst_mac_mask = "\xff\xff\x00\x00\x00\x00",
-		.hash_types =
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 1,
-	},
-};
-
-/**
- * Enable a special flow in a hash RX queue for a given VLAN index.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- * @param vlan_index
- *   VLAN index to use.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_special_flow_enable_vlan(struct hash_rxq *hash_rxq,
-				  enum hash_rxq_flow_type flow_type,
-				  unsigned int vlan_index)
-{
-	struct priv *priv = hash_rxq->priv;
-	struct ibv_flow *flow;
-	FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, hash_rxq->type));
-	struct ibv_flow_attr *attr = &data->attr;
-	struct ibv_flow_spec_eth *spec = &data->spec;
-	const uint8_t *mac;
-	const uint8_t *mask;
-	unsigned int vlan_enabled = (priv->vlan_filter_n &&
-				     special_flow_init[flow_type].per_vlan);
-	unsigned int vlan_id = priv->vlan_filter[vlan_index];
-
-	/* Check if flow is relevant for this hash_rxq. */
-	if (!(special_flow_init[flow_type].hash_types & (1 << hash_rxq->type)))
-		return 0;
-	/* Check if flow already exists. */
-	if (hash_rxq->special_flow[flow_type][vlan_index] != NULL)
-		return 0;
-
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
-	priv_flow_attr(priv, attr, sizeof(data), hash_rxq->type);
-	/* The first specification must be Ethernet. */
-	assert(spec->type == IBV_FLOW_SPEC_ETH);
-	assert(spec->size == sizeof(*spec));
-
-	mac = special_flow_init[flow_type].dst_mac_val;
-	mask = special_flow_init[flow_type].dst_mac_mask;
-	*spec = (struct ibv_flow_spec_eth){
-		.type = IBV_FLOW_SPEC_ETH,
-		.size = sizeof(*spec),
-		.val = {
-			.dst_mac = {
-				mac[0], mac[1], mac[2],
-				mac[3], mac[4], mac[5],
-			},
-			.vlan_tag = (vlan_enabled ?
-				     rte_cpu_to_be_16(vlan_id) :
-				     0),
-		},
-		.mask = {
-			.dst_mac = {
-				mask[0], mask[1], mask[2],
-				mask[3], mask[4], mask[5],
-			},
-			.vlan_tag = (vlan_enabled ?
-				     rte_cpu_to_be_16(0xfff) :
-				     0),
-		},
-	};
-
-	errno = 0;
-	flow = ibv_create_flow(hash_rxq->qp, attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)hash_rxq, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-	hash_rxq->special_flow[flow_type][vlan_index] = flow;
-	DEBUG("%p: special flow %s (index %d) VLAN %u (index %u) enabled",
-	      (void *)hash_rxq, hash_rxq_flow_type_str(flow_type), flow_type,
-	      vlan_id, vlan_index);
-	return 0;
-}
-
-/**
- * Disable a special flow in a hash RX queue for a given VLAN index.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- * @param vlan_index
- *   VLAN index to use.
- */
-static void
-hash_rxq_special_flow_disable_vlan(struct hash_rxq *hash_rxq,
-				   enum hash_rxq_flow_type flow_type,
-				   unsigned int vlan_index)
-{
-	struct ibv_flow *flow =
-		hash_rxq->special_flow[flow_type][vlan_index];
-
-	if (flow == NULL)
-		return;
-	claim_zero(ibv_destroy_flow(flow));
-	hash_rxq->special_flow[flow_type][vlan_index] = NULL;
-	DEBUG("%p: special flow %s (index %d) VLAN %u (index %u) disabled",
-	      (void *)hash_rxq, hash_rxq_flow_type_str(flow_type), flow_type,
-	      hash_rxq->priv->vlan_filter[vlan_index], vlan_index);
-}
-
-/**
- * Enable a special flow in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- * @param vlan_index
- *   VLAN index to use.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_special_flow_enable(struct hash_rxq *hash_rxq,
-			     enum hash_rxq_flow_type flow_type)
-{
-	struct priv *priv = hash_rxq->priv;
-	unsigned int i = 0;
-	int ret;
-
-	assert((unsigned int)flow_type < RTE_DIM(hash_rxq->special_flow));
-	assert(RTE_DIM(hash_rxq->special_flow[flow_type]) ==
-	       RTE_DIM(priv->vlan_filter));
-	/* Add a special flow for each VLAN filter when relevant. */
-	do {
-		ret = hash_rxq_special_flow_enable_vlan(hash_rxq, flow_type, i);
-		if (ret) {
-			/* Failure, rollback. */
-			while (i != 0)
-				hash_rxq_special_flow_disable_vlan(hash_rxq,
-								   flow_type,
-								   --i);
-			return ret;
-		}
-	} while (special_flow_init[flow_type].per_vlan &&
-		 ++i < priv->vlan_filter_n);
-	return 0;
-}
-
-/**
- * Disable a special flow in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- */
-static void
-hash_rxq_special_flow_disable(struct hash_rxq *hash_rxq,
-			      enum hash_rxq_flow_type flow_type)
-{
-	unsigned int i;
-
-	assert((unsigned int)flow_type < RTE_DIM(hash_rxq->special_flow));
-	for (i = 0; (i != RTE_DIM(hash_rxq->special_flow[flow_type])); ++i)
-		hash_rxq_special_flow_disable_vlan(hash_rxq, flow_type, i);
-}
-
-/**
- * Enable a special flow in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- * @param flow_type
- *   Special flow type.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_special_flow_enable(struct priv *priv, enum hash_rxq_flow_type flow_type)
-{
-	unsigned int i;
-
-	if (!priv_allow_flow_type(priv, flow_type))
-		return 0;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
-		int ret;
-
-		ret = hash_rxq_special_flow_enable(hash_rxq, flow_type);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0) {
-			hash_rxq = &(*priv->hash_rxqs)[--i];
-			hash_rxq_special_flow_disable(hash_rxq, flow_type);
-		}
-		return ret;
-	}
-	return 0;
-}
-
-/**
- * Disable a special flow in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- * @param flow_type
- *   Special flow type.
- */
-void
-priv_special_flow_disable(struct priv *priv, enum hash_rxq_flow_type flow_type)
-{
-	unsigned int i;
-
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
-
-		hash_rxq_special_flow_disable(hash_rxq, flow_type);
-	}
-}
-
-/**
- * Enable all special flows in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- */
-int
-priv_special_flow_enable_all(struct priv *priv)
-{
-	enum hash_rxq_flow_type flow_type;
-
-	if (priv->isolated)
-		return 0;
-	for (flow_type = HASH_RXQ_FLOW_TYPE_BROADCAST;
-			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
-			++flow_type) {
-		int ret;
-
-		ret = priv_special_flow_enable(priv, flow_type);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (flow_type)
-			priv_special_flow_disable(priv, --flow_type);
-		return ret;
-	}
-	return 0;
-}
-
-/**
- * Disable all special flows in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_special_flow_disable_all(struct priv *priv)
-{
-	enum hash_rxq_flow_type flow_type;
-
-	for (flow_type = HASH_RXQ_FLOW_TYPE_BROADCAST;
-			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
-			++flow_type)
-		priv_special_flow_disable(priv, flow_type);
-}
-
 /**
  * DPDK callback to enable promiscuous mode.
  *
@@ -362,16 +60,10 @@ priv_special_flow_disable_all(struct priv *priv)
 void
 mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 {
-	struct rte_flow_item_eth eth = {
-		.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
-		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
-		.type = 0,
-	};
-
 	if (mlx5_is_secondary())
 		return;
 	dev->data->promiscuous = 1;
-	claim_zero(mlx5_ctrl_flow(dev, ð, ð, 1));
+	mlx5_traffic_restart(dev);
 }
 
 /**
@@ -383,16 +75,10 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 void
 mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 {
-	struct rte_flow_item_eth eth = {
-		.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
-		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
-		.type = 0,
-	};
-
 	if (mlx5_is_secondary())
 		return;
 	dev->data->promiscuous = 0;
-	claim_zero(mlx5_ctrl_flow(dev, ð, ð, 0));
+	mlx5_traffic_restart(dev);
 }
 
 /**
@@ -404,17 +90,10 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 {
-	struct rte_flow_item_eth eth = {
-		.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
-		.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
-		.type = 0,
-	};
-
 	if (mlx5_is_secondary())
 		return;
 	dev->data->all_multicast = 1;
-	if (dev->data->dev_started)
-		claim_zero(mlx5_ctrl_flow(dev, ð, ð, 1));
+	mlx5_traffic_restart(dev);
 }
 
 /**
@@ -426,15 +105,8 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_disable(struct rte_eth_dev *dev)
 {
-	struct rte_flow_item_eth eth = {
-		.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
-		.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
-		.type = 0,
-	};
-
 	if (mlx5_is_secondary())
 		return;
 	dev->data->all_multicast = 0;
-	if (dev->data->dev_started)
-		claim_zero(mlx5_ctrl_flow(dev, ð, ð, 0));
+	mlx5_traffic_restart(dev);
 }
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index d3cd58e..c603d2b 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -531,12 +531,6 @@ priv_destroy_hash_rxqs(struct priv *priv)
 
 		assert(hash_rxq->priv == priv);
 		assert(hash_rxq->qp != NULL);
-		/* Also check that there are no remaining flows. */
-		for (j = 0; (j != RTE_DIM(hash_rxq->special_flow)); ++j)
-			for (k = 0;
-			     (k != RTE_DIM(hash_rxq->special_flow[j]));
-			     ++k)
-				assert(hash_rxq->special_flow[j][k] == NULL);
 		for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
 			for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
 				assert(hash_rxq->mac_flow[j][k] == NULL);
@@ -558,63 +552,6 @@ priv_destroy_hash_rxqs(struct priv *priv)
 }
 
 /**
- * Check whether a given flow type is allowed.
- *
- * @param priv
- *   Pointer to private structure.
- * @param type
- *   Flow type to check.
- *
- * @return
- *   Nonzero if the given flow type is allowed.
- */
-int
-priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
-{
-	(void)priv;
-	switch (type) {
-	case HASH_RXQ_FLOW_TYPE_BROADCAST:
-	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
-	case HASH_RXQ_FLOW_TYPE_MAC:
-		return 1;
-		return 1;
-	default:
-		/* Unsupported flow type is not allowed. */
-		return 0;
-	}
-	return 0;
-}
-
-/**
- * Automatically enable/disable flows according to configuration.
- *
- * @param priv
- *   Private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_rehash_flows(struct priv *priv)
-{
-	size_t i;
-
-	for (i = 0; i != RTE_DIM((*priv->hash_rxqs)[0].special_flow); ++i)
-		if (!priv_allow_flow_type(priv, i)) {
-			priv_special_flow_disable(priv, i);
-		} else {
-			int ret = priv_special_flow_enable(priv, i);
-
-			if (ret)
-				return ret;
-		}
-	if (priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
-		return priv_mac_addrs_enable(priv);
-	priv_mac_addrs_disable(priv);
-	return 0;
-}
-
-/**
  * Allocate RX queue elements.
  *
  * @param rxq_ctrl
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 7107a6b..a052b4b 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -236,28 +236,6 @@ struct special_flow_init {
 	unsigned int per_vlan:1;
 };
 
-enum hash_rxq_flow_type {
-	HASH_RXQ_FLOW_TYPE_BROADCAST,
-	HASH_RXQ_FLOW_TYPE_IPV6MULTI,
-	HASH_RXQ_FLOW_TYPE_MAC,
-};
-
-#ifndef NDEBUG
-static inline const char *
-hash_rxq_flow_type_str(enum hash_rxq_flow_type flow_type)
-{
-	switch (flow_type) {
-	case HASH_RXQ_FLOW_TYPE_BROADCAST:
-		return "broadcast";
-	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
-		return "IPv6 multicast";
-	case HASH_RXQ_FLOW_TYPE_MAC:
-		return "MAC";
-	}
-	return NULL;
-}
-#endif /* NDEBUG */
-
 struct hash_rxq {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_qp *qp; /* Hash RX QP. */
@@ -265,8 +243,6 @@ struct hash_rxq {
 	/* MAC flow steering rules, one per VLAN ID. */
 	struct ibv_flow *mac_flow
 		[MLX5_MAX_MAC_ADDRESSES][MLX5_MAX_VLAN_IDS];
-	struct ibv_flow *special_flow
-		[MLX5_MAX_SPECIAL_FLOWS][MLX5_MAX_VLAN_IDS];
 };
 
 /* TX queue descriptor. */
@@ -336,8 +312,6 @@ size_t priv_flow_attr(struct priv *, struct ibv_flow_attr *,
 		      size_t, enum hash_rxq_type);
 int priv_create_hash_rxqs(struct priv *);
 void priv_destroy_hash_rxqs(struct priv *);
-int priv_allow_flow_type(struct priv *, enum hash_rxq_flow_type);
-int priv_rehash_flows(struct priv *);
 void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
 int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_rxconf *, struct rte_mempool *);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 27e7890..4143571 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -135,7 +135,14 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	if (mlx5_is_secondary())
 		return -E_RTE_SECONDARY;
 
+	dev->data->dev_started = 1;
 	priv_lock(priv);
+	err = priv_flow_create_drop_queue(priv);
+	if (err) {
+		ERROR("%p: Drop queue allocation failed: %s",
+		      (void *)dev, strerror(err));
+		goto error;
+	}
 	DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
 	rte_mempool_walk(mlx5_mp2mr_iter, priv);
 	err = priv_txq_start(priv);
@@ -155,21 +162,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	/* Update receive callback. */
 	priv_dev_select_rx_function(priv, dev);
 	err = priv_create_hash_rxqs(priv);
-	if (!err)
-		err = priv_rehash_flows(priv);
-	else {
-		ERROR("%p: an error occurred while configuring hash RX queues:"
-		      " %s",
-		      (void *)priv, strerror(err));
-		goto error;
-	}
-	if (dev->data->promiscuous)
-		mlx5_promiscuous_enable(dev);
-	else if (dev->data->all_multicast)
-		mlx5_allmulticast_enable(dev);
-	err = priv_flow_start(priv, &priv->ctrl_flows);
 	if (err) {
-		ERROR("%p: an error occurred while configuring control flows:"
+		ERROR("%p: an error occurred while configuring hash RX queues:"
 		      " %s",
 		      (void *)priv, strerror(err));
 		goto error;
@@ -193,15 +187,14 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	return 0;
 error:
 	/* Rollback. */
+	dev->data->dev_started = 0;
 	LIST_FOREACH(mr, &priv->mr, next)
 		priv_mr_release(priv, mr);
-	priv_special_flow_disable_all(priv);
-	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv, &priv->flows);
-	priv_flow_flush(priv, &priv->ctrl_flows);
-	priv_rxq_stop(priv);
 	priv_txq_stop(priv);
+	priv_rxq_stop(priv);
+	priv_flow_delete_drop_queue(priv);
 	priv_unlock(priv);
 	return -err;
 }
@@ -231,8 +224,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	rte_wmb();
 	usleep(1000 * priv->rxqs_n);
 	DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
-	priv_special_flow_disable_all(priv);
-	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv, &priv->flows);
 	priv_flow_flush(priv, &priv->ctrl_flows);
@@ -243,5 +234,172 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	LIST_FOREACH(mr, &priv->mr, next) {
 		priv_mr_release(priv, mr);
 	}
+	priv_flow_delete_drop_queue(priv);
+	priv_unlock(priv);
+}
+
+/**
+ * Enable traffic flows configured by control plane
+ *
+ * @param priv
+ *   Pointer to Ethernet device private data.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev)
+{
+	if (priv->isolated)
+		return 0;
+	if (dev->data->promiscuous) {
+		struct rte_flow_item_eth promisc = {
+			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+			.type = 0,
+		};
+
+		claim_zero(mlx5_ctrl_flow(dev, &promisc, &promisc));
+	} else if (dev->data->all_multicast) {
+		struct rte_flow_item_eth multicast = {
+			.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+			.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+			.type = 0,
+		};
+
+		claim_zero(mlx5_ctrl_flow(dev, &multicast, &multicast));
+	} else {
+		struct rte_flow_item_eth bcast = {
+			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		};
+		struct rte_flow_item_eth ipv6_multi_spec = {
+			.dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
+		};
+		struct rte_flow_item_eth ipv6_multi_mask = {
+			.dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
+		};
+		struct rte_flow_item_eth unicast = {
+			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		};
+		struct rte_flow_item_eth unicast_mask = {
+			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		};
+		const unsigned int vlan_filter_n = priv->vlan_filter_n;
+		const struct ether_addr cmp = {
+			.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		};
+		unsigned int i;
+		unsigned int j;
+		unsigned int unicast_flow = 0;
+		int ret;
+
+		for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
+			struct ether_addr *mac = &dev->data->mac_addrs[i];
+
+			if (!memcmp(mac, &cmp, sizeof(*mac)))
+				continue;
+			memcpy(&unicast.dst.addr_bytes,
+			       mac->addr_bytes,
+			       ETHER_ADDR_LEN);
+			for (j = 0; j != vlan_filter_n; ++j) {
+				uint16_t vlan = priv->vlan_filter[j];
+
+				struct rte_flow_item_vlan vlan_spec = {
+					.tci = rte_cpu_to_be_16(vlan),
+				};
+				struct rte_flow_item_vlan vlan_mask = {
+					.tci = 0xffff,
+				};
+
+				ret = mlx5_ctrl_flow_vlan(dev, &unicast,
+							  &unicast_mask,
+							  &vlan_spec,
+							  &vlan_mask);
+				if (ret)
+					goto error;
+				unicast_flow = 1;
+			}
+			if (!vlan_filter_n) {
+				ret = mlx5_ctrl_flow(dev, &unicast,
+						     &unicast_mask);
+				if (ret)
+					goto error;
+				unicast_flow = 1;
+			}
+		}
+		if (!unicast_flow)
+			return 0;
+		ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
+		if (ret)
+			goto error;
+		ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec, &ipv6_multi_mask);
+		if (ret)
+			goto error;
+	}
+	return 0;
+error:
+	return rte_errno;
+}
+
+
+/**
+ * Disable traffic flows configured by control plane
+ *
+ * @param priv
+ *   Pointer to Ethernet device private data.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+priv_dev_traffic_disable(struct priv *priv, struct rte_eth_dev *dev)
+{
+	(void)dev;
+	priv_flow_flush(priv, &priv->ctrl_flows);
+	return 0;
+}
+
+/**
+ * Restart traffic flows configured by control plane
+ *
+ * @param priv
+ *   Pointer to Ethernet device private data.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+priv_dev_traffic_restart(struct priv *priv, struct rte_eth_dev *dev)
+{
+	if (dev->data->dev_started) {
+		priv_dev_traffic_disable(priv, dev);
+		priv_dev_traffic_enable(priv, dev);
+	}
+	return 0;
+}
+
+/**
+ * Restart traffic flows configured by control plane
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+mlx5_traffic_restart(struct rte_eth_dev *dev)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	priv_dev_traffic_restart(priv, dev);
 	priv_unlock(priv);
+	return 0;
 }
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index d707984..3afbe8e 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -44,7 +44,7 @@
 #include "mlx5_autoconf.h"
 
 /**
- * Configure a VLAN filter.
+ * DPDK callback to configure a VLAN filter.
  *
  * @param dev
  *   Pointer to Ethernet device structure.
@@ -54,28 +54,26 @@
  *   Toggle filter.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, negative errno value on failure.
  */
-static int
-vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+int
+mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 {
 	struct priv *priv = dev->data->dev_private;
 	unsigned int i;
 
+	priv_lock(priv);
 	DEBUG("%p: %s VLAN filter ID %" PRIu16,
 	      (void *)dev, (on ? "enable" : "disable"), vlan_id);
 	assert(priv->vlan_filter_n <= RTE_DIM(priv->vlan_filter));
 	for (i = 0; (i != priv->vlan_filter_n); ++i)
 		if (priv->vlan_filter[i] == vlan_id)
 			break;
-	/* Check if there's room for another VLAN filter. */
-	if (i == RTE_DIM(priv->vlan_filter))
-		return ENOMEM;
 	if (i < priv->vlan_filter_n) {
 		assert(priv->vlan_filter_n != 0);
 		/* Enabling an existing VLAN filter has no effect. */
 		if (on)
-			return 0;
+			goto out;
 		/* Remove VLAN filter from list. */
 		--priv->vlan_filter_n;
 		memmove(&priv->vlan_filter[i],
@@ -87,41 +85,18 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 		assert(i == priv->vlan_filter_n);
 		/* Disabling an unknown VLAN filter has no effect. */
 		if (!on)
-			return 0;
+			goto out;
 		/* Add new VLAN filter. */
 		priv->vlan_filter[priv->vlan_filter_n] = vlan_id;
 		++priv->vlan_filter_n;
 	}
-	/* Rehash flows in all hash RX queues. */
-	priv_mac_addrs_disable(priv);
-	priv_special_flow_disable_all(priv);
-	return priv_rehash_flows(priv);
-}
-
-/**
- * DPDK callback to configure a VLAN filter.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param vlan_id
- *   VLAN ID to filter.
- * @param on
- *   Toggle filter.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
-{
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-
-	priv_lock(priv);
-	ret = vlan_filter_set(dev, vlan_id, on);
+	if (dev->data->dev_started) {
+		priv_dev_traffic_disable(priv, dev);
+		priv_dev_traffic_enable(priv, dev);
+	}
+out:
 	priv_unlock(priv);
-	assert(ret >= 0);
-	return -ret;
+	return 0;
 }
 
 /**
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 19/30] net/mlx5: use flow to enable unicast traffic
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 19/30] net/mlx5: use flow to enable unicast traffic Nelio Laranjeiro
@ 2017-10-06  5:18     ` Yongseok Koh
  0 siblings, 0 replies; 129+ messages in thread
From: Yongseok Koh @ 2017-10-06  5:18 UTC (permalink / raw)
  To: Nelio Laranjeiro; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Thu, Oct 05, 2017 at 02:49:51PM +0200, Nelio Laranjeiro wrote:
[...]
> -int
> -mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
> -{
> -	struct priv *priv = dev->data->dev_private;
> -	int ret;
> -
> -	priv_lock(priv);
> -	ret = vlan_filter_set(dev, vlan_id, on);
> +	if (dev->data->dev_started) {
> +		priv_dev_traffic_disable(priv, dev);
> +		priv_dev_traffic_enable(priv, dev);
> +	}
Same as priv_dev_traffic_restart()
Acked-by: Yongseok Koh <yskoh@mellanox.com>
 
Thanks
^ permalink raw reply	[flat|nested] 129+ messages in thread
 
- * [dpdk-dev] [PATCH v2 20/30] net/mlx5: handle a single RSS hash key for all protocols
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (18 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 19/30] net/mlx5: use flow to enable unicast traffic Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  5:23     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 21/30] net/mlx5: remove hash Rx queues support Nelio Laranjeiro
                     ` (9 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Since RSS configuration can also be used by flow API, there is no more
necessity to keep a list of RSS configurable for each protocol.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c        |  24 +-------
 drivers/net/mlx5/mlx5.h        |   6 +-
 drivers/net/mlx5/mlx5_ethdev.c |  27 +++++++--
     | 127 +++++++++--------------------------------
 drivers/net/mlx5/mlx5_rxq.c    |   5 +-
 5 files changed, 56 insertions(+), 133 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index fd6f0c0..6e81d37 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -225,11 +225,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		claim_zero(ibv_close_device(priv->ctx));
 	} else
 		assert(priv->ctx == NULL);
-	if (priv->rss_conf != NULL) {
-		for (i = 0; (i != hash_rxq_init_n); ++i)
-			rte_free((*priv->rss_conf)[i]);
-		rte_free(priv->rss_conf);
-	}
+	if (priv->rss_conf.rss_key != NULL)
+		rte_free(priv->rss_conf.rss_key);
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
 	priv_socket_uninit(priv);
@@ -816,19 +813,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				priv->txq_inline = MLX5_WQE_SIZE_MAX -
 						   MLX5_WQE_SIZE;
 		}
-		/* Allocate and register default RSS hash keys. */
-		priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n,
-					    sizeof((*priv->rss_conf)[0]), 0);
-		if (priv->rss_conf == NULL) {
-			err = ENOMEM;
-			goto port_error;
-		}
-		err = rss_hash_rss_conf_new_key(priv,
-						rss_hash_default_key,
-						rss_hash_default_key_len,
-						ETH_RSS_PROTO_MASK);
-		if (err)
-			goto port_error;
 		/* Configure the first MAC address by default. */
 		if (priv_get_mac(priv, &mac.addr_bytes)) {
 			ERROR("cannot get MAC address, is mlx5_en loaded?"
@@ -898,10 +882,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		continue;
 
 port_error:
-		if (priv) {
-			rte_free(priv->rss_conf);
+		if (priv)
 			rte_free(priv);
-		}
 		if (pd)
 			claim_zero(ibv_dealloc_pd(pd));
 		if (ctx)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index e83961f..4718506 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -134,9 +134,7 @@ struct priv {
 	/* Hash RX QPs feeding the indirection table. */
 	struct hash_rxq (*hash_rxqs)[];
 	unsigned int hash_rxqs_n; /* Hash RX QPs array size. */
-	/* RSS configuration array indexed by hash RX queue type. */
-	struct rte_eth_rss_conf *(*rss_conf)[];
-	uint64_t rss_hf; /* RSS DPDK bit field of active RSS. */
+	struct rte_eth_rss_conf rss_conf; /* RSS configuration. */
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
@@ -226,8 +224,6 @@ void mlx5_mac_addr_set(struct rte_eth_dev *, struct ether_addr *);
 
 /* mlx5_rss.c */
 
-int rss_hash_rss_conf_new_key(struct priv *, const uint8_t *, unsigned int,
-			      uint64_t);
 int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *);
 int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *);
 int priv_rss_reta_index_resize(struct priv *, unsigned int);
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 318bc9d..9f5b489 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -575,8 +575,29 @@ dev_configure(struct rte_eth_dev *dev)
 	unsigned int i;
 	unsigned int j;
 	unsigned int reta_idx_n;
+	const uint8_t use_app_rss_key =
+		!!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
 
-	priv->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
+	if (use_app_rss_key &&
+	    (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len !=
+	     rss_hash_default_key_len)) {
+		/* MLX5 RSS only support 40bytes key. */
+		return EINVAL;
+	}
+	priv->rss_conf.rss_key =
+		rte_realloc(priv->rss_conf.rss_key,
+			    rss_hash_default_key_len, 0);
+	if (!priv->rss_conf.rss_key) {
+		ERROR("cannot allocate RSS hash key memory (%u)", rxqs_n);
+		return ENOMEM;
+	}
+	memcpy(priv->rss_conf.rss_key,
+	       use_app_rss_key ?
+	       dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key :
+	       rss_hash_default_key,
+	       rss_hash_default_key_len);
+	priv->rss_conf.rss_key_len = rss_hash_default_key_len;
+	priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
 	priv->rxqs = (void *)dev->data->rx_queues;
 	priv->txqs = (void *)dev->data->tx_queues;
 	if (txqs_n != priv->txqs_n) {
@@ -694,9 +715,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 		info->if_index = if_nametoindex(ifname);
 	info->reta_size = priv->reta_idx_n ?
 		priv->reta_idx_n : priv->ind_table_max_size;
-	info->hash_key_size = ((*priv->rss_conf) ?
-			       (*priv->rss_conf)[0]->rss_key_len :
-			       0);
+	info->hash_key_size = priv->rss_conf.rss_key_len;
 	info->speed_capa = priv->link_speed_capa;
 	priv_unlock(priv);
 }
 --git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index 8942879..ad6d9ab 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -54,74 +54,6 @@
 #include "mlx5_rxtx.h"
 
 /**
- * Get a RSS configuration hash key.
- *
- * @param priv
- *   Pointer to private structure.
- * @param rss_hf
- *   RSS hash functions configuration must be retrieved for.
- *
- * @return
- *   Pointer to a RSS configuration structure or NULL if rss_hf cannot
- *   be matched.
- */
-static struct rte_eth_rss_conf *
-rss_hash_get(struct priv *priv, uint64_t rss_hf)
-{
-	unsigned int i;
-
-	for (i = 0; (i != hash_rxq_init_n); ++i) {
-		uint64_t dpdk_rss_hf = hash_rxq_init[i].dpdk_rss_hf;
-
-		if (!(dpdk_rss_hf & rss_hf))
-			continue;
-		return (*priv->rss_conf)[i];
-	}
-	return NULL;
-}
-
-/**
- * Register a RSS key.
- *
- * @param priv
- *   Pointer to private structure.
- * @param key
- *   Hash key to register.
- * @param key_len
- *   Hash key length in bytes.
- * @param rss_hf
- *   RSS hash functions the provided key applies to.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-rss_hash_rss_conf_new_key(struct priv *priv, const uint8_t *key,
-			  unsigned int key_len, uint64_t rss_hf)
-{
-	unsigned int i;
-
-	for (i = 0; (i != hash_rxq_init_n); ++i) {
-		struct rte_eth_rss_conf *rss_conf;
-		uint64_t dpdk_rss_hf = hash_rxq_init[i].dpdk_rss_hf;
-
-		if (!(dpdk_rss_hf & rss_hf))
-			continue;
-		rss_conf = rte_realloc((*priv->rss_conf)[i],
-				       (sizeof(*rss_conf) + key_len),
-				       0);
-		if (!rss_conf)
-			return ENOMEM;
-		rss_conf->rss_key = (void *)(rss_conf + 1);
-		rss_conf->rss_key_len = key_len;
-		rss_conf->rss_hf = dpdk_rss_hf;
-		memcpy(rss_conf->rss_key, key, key_len);
-		(*priv->rss_conf)[i] = rss_conf;
-	}
-	return 0;
-}
-
-/**
  * DPDK callback to update the RSS hash configuration.
  *
  * @param dev
@@ -137,23 +69,24 @@ mlx5_rss_hash_update(struct rte_eth_dev *dev,
 		     struct rte_eth_rss_conf *rss_conf)
 {
 	struct priv *priv = dev->data->dev_private;
-	int err = 0;
+	int ret = 0;
 
 	priv_lock(priv);
-
-	assert(priv->rss_conf != NULL);
-
-	/* Apply configuration. */
-	if (rss_conf->rss_key)
-		err = rss_hash_rss_conf_new_key(priv,
-						rss_conf->rss_key,
-						rss_conf->rss_key_len,
-						rss_conf->rss_hf);
-	/* Store protocols for which RSS is enabled. */
-	priv->rss_hf = rss_conf->rss_hf;
+	if (rss_conf->rss_key_len) {
+		priv->rss_conf.rss_key = rte_realloc(priv->rss_conf.rss_key,
+						     rss_conf->rss_key_len, 0);
+		if (!priv->rss_conf.rss_key) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		memcpy(&priv->rss_conf.rss_key, rss_conf->rss_key,
+		       rss_conf->rss_key_len);
+		priv->rss_conf.rss_key_len = rss_conf->rss_key_len;
+	}
+	priv->rss_conf.rss_hf = rss_conf->rss_hf;
+out:
 	priv_unlock(priv);
-	assert(err >= 0);
-	return -err;
+	return ret;
 }
 
 /**
@@ -172,28 +105,22 @@ mlx5_rss_hash_conf_get(struct rte_eth_dev *dev,
 		       struct rte_eth_rss_conf *rss_conf)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct rte_eth_rss_conf *priv_rss_conf;
+	int ret = 0;
 
 	priv_lock(priv);
-
-	assert(priv->rss_conf != NULL);
-
-	priv_rss_conf = rss_hash_get(priv, rss_conf->rss_hf);
-	if (!priv_rss_conf) {
-		rss_conf->rss_hf = 0;
-		priv_unlock(priv);
-		return -EINVAL;
+	if (!rss_conf->rss_key) {
+		ret = -ENOMEM;
+		goto out;
 	}
-	if (rss_conf->rss_key &&
-	    rss_conf->rss_key_len >= priv_rss_conf->rss_key_len)
-		memcpy(rss_conf->rss_key,
-		       priv_rss_conf->rss_key,
-		       priv_rss_conf->rss_key_len);
-	rss_conf->rss_key_len = priv_rss_conf->rss_key_len;
-	rss_conf->rss_hf = priv_rss_conf->rss_hf;
-
+	if (rss_conf->rss_key_len < priv->rss_conf.rss_key_len) {
+		ret = -EINVAL;
+		goto out;
+	}
+	memcpy(rss_conf->rss_key, priv->rss_conf.rss_key,
+	       priv->rss_conf.rss_key_len);
+out:
 	priv_unlock(priv);
-	return 0;
+	return ret;
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index c603d2b..d37dfbb 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -299,7 +299,7 @@ priv_make_ind_table_init(struct priv *priv,
 	/* Mandatory to receive frames not handled by normal hash RX queues. */
 	unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
 
-	rss_hf = priv->rss_hf;
+	rss_hf = priv->rss_conf.rss_hf;
 	/* Process other protocols only if more than one queue. */
 	if (priv->rxqs_n > 1)
 		for (i = 0; (i != hash_rxq_init_n); ++i)
@@ -435,8 +435,7 @@ priv_create_hash_rxqs(struct priv *priv)
 		struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
 		enum hash_rxq_type type =
 			hash_rxq_type_from_pos(&ind_table_init[j], k);
-		struct rte_eth_rss_conf *priv_rss_conf =
-			(*priv->rss_conf)[type];
+		struct rte_eth_rss_conf *priv_rss_conf = &priv->rss_conf;
 		struct ibv_rx_hash_conf hash_conf = {
 			.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
 			.rx_hash_key_len = (priv_rss_conf ?
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 21/30] net/mlx5: remove hash Rx queues support
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (19 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 20/30] net/mlx5: handle a single RSS hash key for all protocols Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  5:27     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 22/30] net/mlx5: fully convert a flow to verbs in validate Nelio Laranjeiro
                     ` (8 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
>From this commit the RSS support becomes un-available until it is replaced
by the generic flow implementation.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c         |   1 -
 drivers/net/mlx5/mlx5.h         |   6 -
 drivers/net/mlx5/mlx5_rxq.c     | 469 ----------------------------------------
 drivers/net/mlx5/mlx5_rxtx.h    |  76 -------
 drivers/net/mlx5/mlx5_trigger.c |   9 +-
 5 files changed, 4 insertions(+), 557 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 6e81d37..7edc918 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -198,7 +198,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
 	/* In case mlx5_dev_stop() has not been called. */
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-	priv_destroy_hash_rxqs(priv);
 	priv_dev_traffic_disable(priv, dev);
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 4718506..643bab6 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -127,13 +127,7 @@ struct priv {
 	unsigned int txqs_n; /* TX queues array size. */
 	struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */
 	struct mlx5_txq_data *(*txqs)[]; /* TX queues. */
-	/* Indirection tables referencing all RX WQs. */
-	struct ibv_rwq_ind_table *(*ind_tables)[];
-	unsigned int ind_tables_n; /* Number of indirection tables. */
 	unsigned int ind_table_max_size; /* Maximum indirection table size. */
-	/* Hash RX QPs feeding the indirection table. */
-	struct hash_rxq (*hash_rxqs)[];
-	unsigned int hash_rxqs_n; /* Hash RX QPs array size. */
 	struct rte_eth_rss_conf rss_conf; /* RSS configuration. */
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index d37dfbb..e7ec1da 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -64,122 +64,6 @@
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
-/* Initialization data for hash RX queues. */
-const struct hash_rxq_init hash_rxq_init[] = {
-	[HASH_RXQ_TCPV4] = {
-		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
-				IBV_RX_HASH_DST_IPV4 |
-				IBV_RX_HASH_SRC_PORT_TCP |
-				IBV_RX_HASH_DST_PORT_TCP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_FLOW_SPEC_TCP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
-	},
-	[HASH_RXQ_UDPV4] = {
-		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
-				IBV_RX_HASH_DST_IPV4 |
-				IBV_RX_HASH_SRC_PORT_UDP |
-				IBV_RX_HASH_DST_PORT_UDP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_FLOW_SPEC_UDP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
-	},
-	[HASH_RXQ_IPV4] = {
-		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
-				IBV_RX_HASH_DST_IPV4),
-		.dpdk_rss_hf = (ETH_RSS_IPV4 |
-				ETH_RSS_FRAG_IPV4),
-		.flow_priority = 1,
-		.flow_spec.ipv4 = {
-			.type = IBV_FLOW_SPEC_IPV4,
-			.size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_ETH],
-	},
-	[HASH_RXQ_TCPV6] = {
-		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
-				IBV_RX_HASH_DST_IPV6 |
-				IBV_RX_HASH_SRC_PORT_TCP |
-				IBV_RX_HASH_DST_PORT_TCP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_FLOW_SPEC_TCP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
-	},
-	[HASH_RXQ_UDPV6] = {
-		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
-				IBV_RX_HASH_DST_IPV6 |
-				IBV_RX_HASH_SRC_PORT_UDP |
-				IBV_RX_HASH_DST_PORT_UDP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_FLOW_SPEC_UDP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
-	},
-	[HASH_RXQ_IPV6] = {
-		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
-				IBV_RX_HASH_DST_IPV6),
-		.dpdk_rss_hf = (ETH_RSS_IPV6 |
-				ETH_RSS_FRAG_IPV6),
-		.flow_priority = 1,
-		.flow_spec.ipv6 = {
-			.type = IBV_FLOW_SPEC_IPV6,
-			.size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_ETH],
-	},
-	[HASH_RXQ_ETH] = {
-		.hash_fields = 0,
-		.dpdk_rss_hf = 0,
-		.flow_priority = 2,
-		.flow_spec.eth = {
-			.type = IBV_FLOW_SPEC_ETH,
-			.size = sizeof(hash_rxq_init[0].flow_spec.eth),
-		},
-		.underlayer = NULL,
-	},
-};
-
-/* Number of entries in hash_rxq_init[]. */
-const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
-
-/* Initialization data for hash RX queue indirection tables. */
-static const struct ind_table_init ind_table_init[] = {
-	{
-		.max_size = -1u, /* Superseded by HW limitations. */
-		.hash_types =
-			1 << HASH_RXQ_TCPV4 |
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_TCPV6 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			0,
-		.hash_types_n = 6,
-	},
-	{
-		.max_size = 1,
-		.hash_types = 1 << HASH_RXQ_ETH,
-		.hash_types_n = 1,
-	},
-};
-
-#define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
-
 /* Default RSS hash key also used for ConnectX-3. */
 uint8_t rss_hash_default_key[] = {
 	0x2c, 0xc6, 0x81, 0xd1,
@@ -198,359 +82,6 @@ uint8_t rss_hash_default_key[] = {
 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
 
 /**
- * Populate flow steering rule for a given hash RX queue type using
- * information from hash_rxq_init[]. Nothing is written to flow_attr when
- * flow_attr_size is not large enough, but the required size is still returned.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[out] flow_attr
- *   Pointer to flow attribute structure to fill. Note that the allocated
- *   area must be larger and large enough to hold all flow specifications.
- * @param flow_attr_size
- *   Entire size of flow_attr and trailing room for flow specifications.
- * @param type
- *   Hash RX queue type to use for flow steering rule.
- *
- * @return
- *   Total size of the flow attribute buffer. No errors are defined.
- */
-size_t
-priv_flow_attr(struct priv *priv, struct ibv_flow_attr *flow_attr,
-	       size_t flow_attr_size, enum hash_rxq_type type)
-{
-	size_t offset = sizeof(*flow_attr);
-	const struct hash_rxq_init *init = &hash_rxq_init[type];
-
-	assert(priv != NULL);
-	assert((size_t)type < RTE_DIM(hash_rxq_init));
-	do {
-		offset += init->flow_spec.hdr.size;
-		init = init->underlayer;
-	} while (init != NULL);
-	if (offset > flow_attr_size)
-		return offset;
-	flow_attr_size = offset;
-	init = &hash_rxq_init[type];
-	*flow_attr = (struct ibv_flow_attr){
-		.type = IBV_FLOW_ATTR_NORMAL,
-		/* Priorities < 3 are reserved for flow director. */
-		.priority = init->flow_priority + 3,
-		.num_of_specs = 0,
-		.port = priv->port,
-		.flags = 0,
-	};
-	do {
-		offset -= init->flow_spec.hdr.size;
-		memcpy((void *)((uintptr_t)flow_attr + offset),
-		       &init->flow_spec,
-		       init->flow_spec.hdr.size);
-		++flow_attr->num_of_specs;
-		init = init->underlayer;
-	} while (init != NULL);
-	return flow_attr_size;
-}
-
-/**
- * Convert hash type position in indirection table initializer to
- * hash RX queue type.
- *
- * @param table
- *   Indirection table initializer.
- * @param pos
- *   Hash type position.
- *
- * @return
- *   Hash RX queue type.
- */
-static enum hash_rxq_type
-hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos)
-{
-	enum hash_rxq_type type = HASH_RXQ_TCPV4;
-
-	assert(pos < table->hash_types_n);
-	do {
-		if ((table->hash_types & (1 << type)) && (pos-- == 0))
-			break;
-		++type;
-	} while (1);
-	return type;
-}
-
-/**
- * Filter out disabled hash RX queue types from ind_table_init[].
- *
- * @param priv
- *   Pointer to private structure.
- * @param[out] table
- *   Output table.
- *
- * @return
- *   Number of table entries.
- */
-static unsigned int
-priv_make_ind_table_init(struct priv *priv,
-			 struct ind_table_init (*table)[IND_TABLE_INIT_N])
-{
-	uint64_t rss_hf;
-	unsigned int i;
-	unsigned int j;
-	unsigned int table_n = 0;
-	/* Mandatory to receive frames not handled by normal hash RX queues. */
-	unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
-
-	rss_hf = priv->rss_conf.rss_hf;
-	/* Process other protocols only if more than one queue. */
-	if (priv->rxqs_n > 1)
-		for (i = 0; (i != hash_rxq_init_n); ++i)
-			if (rss_hf & hash_rxq_init[i].dpdk_rss_hf)
-				hash_types_sup |= (1 << i);
-
-	/* Filter out entries whose protocols are not in the set. */
-	for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
-		unsigned int nb;
-		unsigned int h;
-
-		/* j is increased only if the table has valid protocols. */
-		assert(j <= i);
-		(*table)[j] = ind_table_init[i];
-		(*table)[j].hash_types &= hash_types_sup;
-		for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
-			if (((*table)[j].hash_types >> h) & 0x1)
-				++nb;
-		(*table)[i].hash_types_n = nb;
-		if (nb) {
-			++table_n;
-			++j;
-		}
-	}
-	return table_n;
-}
-
-/**
- * Initialize hash RX queues and indirection table.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_create_hash_rxqs(struct priv *priv)
-{
-	struct ibv_wq *wqs[priv->reta_idx_n];
-	struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
-	unsigned int ind_tables_n =
-		priv_make_ind_table_init(priv, &ind_table_init);
-	unsigned int hash_rxqs_n = 0;
-	struct hash_rxq (*hash_rxqs)[] = NULL;
-	struct ibv_rwq_ind_table *(*ind_tables)[] = NULL;
-	unsigned int i;
-	unsigned int j;
-	unsigned int k;
-	int err = 0;
-
-	assert(priv->ind_tables == NULL);
-	assert(priv->ind_tables_n == 0);
-	assert(priv->hash_rxqs == NULL);
-	assert(priv->hash_rxqs_n == 0);
-	assert(priv->pd != NULL);
-	assert(priv->ctx != NULL);
-	if (priv->isolated)
-		return 0;
-	if (priv->rxqs_n == 0)
-		return EINVAL;
-	assert(priv->rxqs != NULL);
-	if (ind_tables_n == 0) {
-		ERROR("all hash RX queue types have been filtered out,"
-		      " indirection table cannot be created");
-		return EINVAL;
-	}
-	if (priv->rxqs_n & (priv->rxqs_n - 1)) {
-		INFO("%u RX queues are configured, consider rounding this"
-		     " number to the next power of two for better balancing",
-		     priv->rxqs_n);
-		DEBUG("indirection table extended to assume %u WQs",
-		      priv->reta_idx_n);
-	}
-	for (i = 0; (i != priv->reta_idx_n); ++i) {
-		struct mlx5_rxq_ctrl *rxq_ctrl;
-
-		rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
-					struct mlx5_rxq_ctrl, rxq);
-		wqs[i] = rxq_ctrl->ibv->wq;
-	}
-	/* Get number of hash RX queues to configure. */
-	for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
-		hash_rxqs_n += ind_table_init[i].hash_types_n;
-	DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
-	      hash_rxqs_n, priv->rxqs_n, ind_tables_n);
-	/* Create indirection tables. */
-	ind_tables = rte_calloc(__func__, ind_tables_n,
-				sizeof((*ind_tables)[0]), 0);
-	if (ind_tables == NULL) {
-		err = ENOMEM;
-		ERROR("cannot allocate indirection tables container: %s",
-		      strerror(err));
-		goto error;
-	}
-	for (i = 0; (i != ind_tables_n); ++i) {
-		struct ibv_rwq_ind_table_init_attr ind_init_attr = {
-			.log_ind_tbl_size = 0, /* Set below. */
-			.ind_tbl = wqs,
-			.comp_mask = 0,
-		};
-		unsigned int ind_tbl_size = ind_table_init[i].max_size;
-		struct ibv_rwq_ind_table *ind_table;
-
-		if (priv->reta_idx_n < ind_tbl_size)
-			ind_tbl_size = priv->reta_idx_n;
-		ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
-		errno = 0;
-		ind_table = ibv_create_rwq_ind_table(priv->ctx,
-						     &ind_init_attr);
-		if (ind_table != NULL) {
-			(*ind_tables)[i] = ind_table;
-			continue;
-		}
-		/* Not clear whether errno is set. */
-		err = (errno ? errno : EINVAL);
-		ERROR("RX indirection table creation failed with error %d: %s",
-		      err, strerror(err));
-		goto error;
-	}
-	/* Allocate array that holds hash RX queues and related data. */
-	hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
-			       sizeof((*hash_rxqs)[0]), 0);
-	if (hash_rxqs == NULL) {
-		err = ENOMEM;
-		ERROR("cannot allocate hash RX queues container: %s",
-		      strerror(err));
-		goto error;
-	}
-	for (i = 0, j = 0, k = 0;
-	     ((i != hash_rxqs_n) && (j != ind_tables_n));
-	     ++i) {
-		struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
-		enum hash_rxq_type type =
-			hash_rxq_type_from_pos(&ind_table_init[j], k);
-		struct rte_eth_rss_conf *priv_rss_conf = &priv->rss_conf;
-		struct ibv_rx_hash_conf hash_conf = {
-			.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
-			.rx_hash_key_len = (priv_rss_conf ?
-					    priv_rss_conf->rss_key_len :
-					    rss_hash_default_key_len),
-			.rx_hash_key = (priv_rss_conf ?
-					priv_rss_conf->rss_key :
-					rss_hash_default_key),
-			.rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
-		};
-		struct ibv_qp_init_attr_ex qp_init_attr = {
-			.qp_type = IBV_QPT_RAW_PACKET,
-			.comp_mask = (IBV_QP_INIT_ATTR_PD |
-				      IBV_QP_INIT_ATTR_IND_TABLE |
-				      IBV_QP_INIT_ATTR_RX_HASH),
-			.rx_hash_conf = hash_conf,
-			.rwq_ind_tbl = (*ind_tables)[j],
-			.pd = priv->pd,
-		};
-
-		DEBUG("using indirection table %u for hash RX queue %u type %d",
-		      j, i, type);
-		*hash_rxq = (struct hash_rxq){
-			.priv = priv,
-			.qp = ibv_create_qp_ex(priv->ctx, &qp_init_attr),
-			.type = type,
-		};
-		if (hash_rxq->qp == NULL) {
-			err = (errno ? errno : EINVAL);
-			ERROR("Hash RX QP creation failure: %s",
-			      strerror(err));
-			goto error;
-		}
-		if (++k < ind_table_init[j].hash_types_n)
-			continue;
-		/* Switch to the next indirection table and reset hash RX
-		 * queue type array index. */
-		++j;
-		k = 0;
-	}
-	priv->ind_tables = ind_tables;
-	priv->ind_tables_n = ind_tables_n;
-	priv->hash_rxqs = hash_rxqs;
-	priv->hash_rxqs_n = hash_rxqs_n;
-	assert(err == 0);
-	return 0;
-error:
-	if (hash_rxqs != NULL) {
-		for (i = 0; (i != hash_rxqs_n); ++i) {
-			struct ibv_qp *qp = (*hash_rxqs)[i].qp;
-
-			if (qp == NULL)
-				continue;
-			claim_zero(ibv_destroy_qp(qp));
-		}
-		rte_free(hash_rxqs);
-	}
-	if (ind_tables != NULL) {
-		for (j = 0; (j != ind_tables_n); ++j) {
-			struct ibv_rwq_ind_table *ind_table =
-				(*ind_tables)[j];
-
-			if (ind_table == NULL)
-				continue;
-			claim_zero(ibv_destroy_rwq_ind_table(ind_table));
-		}
-		rte_free(ind_tables);
-	}
-	return err;
-}
-
-/**
- * Clean up hash RX queues and indirection table.
- *
- * @param priv
- *   Pointer to private structure.
- */
-void
-priv_destroy_hash_rxqs(struct priv *priv)
-{
-	unsigned int i;
-
-	DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
-	if (priv->hash_rxqs_n == 0) {
-		assert(priv->hash_rxqs == NULL);
-		assert(priv->ind_tables == NULL);
-		return;
-	}
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
-		unsigned int j, k;
-
-		assert(hash_rxq->priv == priv);
-		assert(hash_rxq->qp != NULL);
-		for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
-			for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
-				assert(hash_rxq->mac_flow[j][k] == NULL);
-		claim_zero(ibv_destroy_qp(hash_rxq->qp));
-	}
-	priv->hash_rxqs_n = 0;
-	rte_free(priv->hash_rxqs);
-	priv->hash_rxqs = NULL;
-	for (i = 0; (i != priv->ind_tables_n); ++i) {
-		struct ibv_rwq_ind_table *ind_table =
-			(*priv->ind_tables)[i];
-
-		assert(ind_table != NULL);
-		claim_zero(ibv_destroy_rwq_ind_table(ind_table));
-	}
-	priv->ind_tables_n = 0;
-	rte_free(priv->ind_tables);
-	priv->ind_tables = NULL;
-}
-
-/**
  * Allocate RX queue elements.
  *
  * @param rxq_ctrl
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index a052b4b..0bb71ae 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -176,75 +176,6 @@ struct mlx5_hrxq {
 	uint8_t rss_key[]; /* Hash key. */
 };
 
-/* Hash RX queue types. */
-enum hash_rxq_type {
-	HASH_RXQ_TCPV4,
-	HASH_RXQ_UDPV4,
-	HASH_RXQ_IPV4,
-	HASH_RXQ_TCPV6,
-	HASH_RXQ_UDPV6,
-	HASH_RXQ_IPV6,
-	HASH_RXQ_ETH,
-};
-
-/* Flow structure with Ethernet specification. It is packed to prevent padding
- * between attr and spec as this layout is expected by libibverbs. */
-struct flow_attr_spec_eth {
-	struct ibv_flow_attr attr;
-	struct ibv_flow_spec_eth spec;
-} __attribute__((packed));
-
-/* Define a struct flow_attr_spec_eth object as an array of at least
- * "size" bytes. Room after the first index is normally used to store
- * extra flow specifications. */
-#define FLOW_ATTR_SPEC_ETH(name, size) \
-	struct flow_attr_spec_eth name \
-		[((size) / sizeof(struct flow_attr_spec_eth)) + \
-		 !!((size) % sizeof(struct flow_attr_spec_eth))]
-
-/* Initialization data for hash RX queue. */
-struct hash_rxq_init {
-	uint64_t hash_fields; /* Fields that participate in the hash. */
-	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
-	unsigned int flow_priority; /* Flow priority to use. */
-	union {
-		struct {
-			enum ibv_flow_spec_type type;
-			uint16_t size;
-		} hdr;
-		struct ibv_flow_spec_tcp_udp tcp_udp;
-		struct ibv_flow_spec_ipv4 ipv4;
-		struct ibv_flow_spec_ipv6 ipv6;
-		struct ibv_flow_spec_eth eth;
-	} flow_spec; /* Flow specification template. */
-	const struct hash_rxq_init *underlayer; /* Pointer to underlayer. */
-};
-
-/* Initialization data for indirection table. */
-struct ind_table_init {
-	unsigned int max_size; /* Maximum number of WQs. */
-	/* Hash RX queues using this table. */
-	unsigned int hash_types;
-	unsigned int hash_types_n;
-};
-
-/* Initialization data for special flows. */
-struct special_flow_init {
-	uint8_t dst_mac_val[6];
-	uint8_t dst_mac_mask[6];
-	unsigned int hash_types;
-	unsigned int per_vlan:1;
-};
-
-struct hash_rxq {
-	struct priv *priv; /* Back pointer to private data. */
-	struct ibv_qp *qp; /* Hash RX QP. */
-	enum hash_rxq_type type; /* Hash RX queue type. */
-	/* MAC flow steering rules, one per VLAN ID. */
-	struct ibv_flow *mac_flow
-		[MLX5_MAX_MAC_ADDRESSES][MLX5_MAX_VLAN_IDS];
-};
-
 /* TX queue descriptor. */
 __extension__
 struct mlx5_txq_data {
@@ -302,16 +233,9 @@ struct mlx5_txq_ctrl {
 
 /* mlx5_rxq.c */
 
-extern const struct hash_rxq_init hash_rxq_init[];
-extern const unsigned int hash_rxq_init_n;
-
 extern uint8_t rss_hash_default_key[];
 extern const size_t rss_hash_default_key_len;
 
-size_t priv_flow_attr(struct priv *, struct ibv_flow_attr *,
-		      size_t, enum hash_rxq_type);
-int priv_create_hash_rxqs(struct priv *);
-void priv_destroy_hash_rxqs(struct priv *);
 void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
 int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_rxconf *, struct rte_mempool *);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 4143571..29167ba 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -161,9 +161,9 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	}
 	/* Update receive callback. */
 	priv_dev_select_rx_function(priv, dev);
-	err = priv_create_hash_rxqs(priv);
+	err = priv_dev_traffic_enable(priv, dev);
 	if (err) {
-		ERROR("%p: an error occurred while configuring hash RX queues:"
+		ERROR("%p: an error occurred while configuring control flows:"
 		      " %s",
 		      (void *)priv, strerror(err));
 		goto error;
@@ -190,8 +190,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	dev->data->dev_started = 0;
 	LIST_FOREACH(mr, &priv->mr, next)
 		priv_mr_release(priv, mr);
-	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv, &priv->flows);
+	priv_dev_traffic_disable(priv, dev);
 	priv_txq_stop(priv);
 	priv_rxq_stop(priv);
 	priv_flow_delete_drop_queue(priv);
@@ -224,9 +224,8 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	rte_wmb();
 	usleep(1000 * priv->rxqs_n);
 	DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
-	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv, &priv->flows);
-	priv_flow_flush(priv, &priv->ctrl_flows);
+	priv_dev_traffic_disable(priv, dev);
 	priv_rx_intr_vec_disable(priv);
 	priv_dev_interrupt_handler_uninstall(priv, dev);
 	priv_txq_stop(priv);
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 22/30] net/mlx5: fully convert a flow to verbs in validate
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (20 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 21/30] net/mlx5: remove hash Rx queues support Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  5:33     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 23/30] net/mlx5: process flows actions before of items Nelio Laranjeiro
                     ` (7 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Validation of flows is only making few verifications on the pattern, in
some situation the validate action could end by with success whereas the
pattern could not be converted correctly.
This brings this conversion verification part also to the validate.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 194 +++++++++++++++++++++++++------------------
 1 file changed, 114 insertions(+), 80 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 88d02c9..090a298 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -307,6 +307,7 @@ struct mlx5_flow_parse {
 	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 	uint32_t inner; /**< Set once VXLAN is encountered. */
+	uint32_t create:1; /**< Leave allocated resources on exit. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
 	struct mlx5_flow_action actions; /**< Parsed action result. */
 };
@@ -418,7 +419,7 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
 }
 
 /**
- * Validate a flow supported by the NIC.
+ * Validate and convert a flow supported by the NIC.
  *
  * @param priv
  *   Pointer to private structure.
@@ -437,16 +438,24 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_validate(struct priv *priv,
-		   const struct rte_flow_attr *attr,
-		   const struct rte_flow_item items[],
-		   const struct rte_flow_action actions[],
-		   struct rte_flow_error *error,
-		   struct mlx5_flow_parse *flow)
+priv_flow_convert(struct priv *priv,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item items[],
+		  const struct rte_flow_action actions[],
+		  struct rte_flow_error *error,
+		  struct mlx5_flow_parse *flow)
 {
 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
 
 	(void)priv;
+	*flow = (struct mlx5_flow_parse){
+		.ibv_attr = flow->ibv_attr,
+		.create = flow->create,
+		.offset = sizeof(struct ibv_flow_attr),
+		.actions = {
+			.mark_id = MLX5_FLOW_MARK_DEFAULT,
+		},
+	};
 	if (attr->group) {
 		rte_flow_error_set(error, ENOTSUP,
 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
@@ -647,35 +656,6 @@ priv_flow_validate(struct priv *priv,
 }
 
 /**
- * Validate a flow supported by the NIC.
- *
- * @see rte_flow_validate()
- * @see rte_flow_ops
- */
-int
-mlx5_flow_validate(struct rte_eth_dev *dev,
-		   const struct rte_flow_attr *attr,
-		   const struct rte_flow_item items[],
-		   const struct rte_flow_action actions[],
-		   struct rte_flow_error *error)
-{
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-	struct mlx5_flow_parse flow = {
-		.offset = sizeof(struct ibv_flow_attr),
-		.actions = {
-			.mark_id = MLX5_FLOW_MARK_DEFAULT,
-			.queues_n = 0,
-		},
-	};
-
-	priv_lock(priv);
-	ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
-	priv_unlock(priv);
-	return ret;
-}
-
-/**
  * Convert Ethernet item to Verbs specification.
  *
  * @param item[in]
@@ -1016,6 +996,7 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
 	struct ibv_flow_spec_action_tag *tag;
 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
 
+	assert(flow->actions.mark);
 	tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*tag = (struct ibv_flow_spec_action_tag){
 		.type = IBV_FLOW_SPEC_ACTION_TAG,
@@ -1023,6 +1004,7 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
 		.tag_id = mlx5_flow_mark_set(mark_id),
 	};
 	++flow->ibv_attr->num_of_specs;
+	flow->offset += size;
 	return 0;
 }
 
@@ -1167,12 +1149,10 @@ priv_flow_create_action_queue(struct priv *priv,
 }
 
 /**
- * Convert a flow.
+ * Validate a flow.
  *
  * @param priv
  *   Pointer to private structure.
- * @param list
- *   Pointer to a TAILQ flow list.
  * @param[in] attr
  *   Flow rule attributes.
  * @param[in] pattern
@@ -1181,40 +1161,35 @@ priv_flow_create_action_queue(struct priv *priv,
  *   Associated actions (list terminated by the END action).
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
+ * @param[in,out] parser
+ *   MLX5 parser structure.
  *
  * @return
- *   A flow on success, NULL otherwise.
+ *   0 on success, negative errno value on failure.
  */
-static struct rte_flow *
-priv_flow_create(struct priv *priv,
-		 struct mlx5_flows *list,
-		 const struct rte_flow_attr *attr,
-		 const struct rte_flow_item items[],
-		 const struct rte_flow_action actions[],
-		 struct rte_flow_error *error)
+static int
+priv_flow_validate(struct priv *priv,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error,
+		   struct mlx5_flow_parse *parser)
 {
-	struct rte_flow *rte_flow;
-	struct mlx5_flow_parse flow = {
-		.offset = sizeof(struct ibv_flow_attr),
-		.actions = {
-			.mark_id = MLX5_FLOW_MARK_DEFAULT,
-			.queues = { 0 },
-			.queues_n = 0,
-		},
-	};
 	int err;
 
-	err = priv_flow_validate(priv, attr, items, actions, error, &flow);
+	err = priv_flow_convert(priv, attr, items, actions, error, parser);
 	if (err)
 		goto exit;
-	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
-	flow.offset = sizeof(struct ibv_flow_attr);
-	if (!flow.ibv_attr) {
+	if (parser->actions.mark)
+		parser->offset += sizeof(struct ibv_flow_spec_action_tag);
+	parser->ibv_attr = rte_malloc(__func__, parser->offset, 0);
+	if (!parser->ibv_attr) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate ibv_attr memory");
+		err = rte_errno;
 		goto exit;
 	}
-	*flow.ibv_attr = (struct ibv_flow_attr){
+	*parser->ibv_attr = (struct ibv_flow_attr){
 		.type = IBV_FLOW_ATTR_NORMAL,
 		.size = sizeof(struct ibv_flow_attr),
 		.priority = attr->priority,
@@ -1222,32 +1197,91 @@ priv_flow_create(struct priv *priv,
 		.port = 0,
 		.flags = 0,
 	};
-	flow.inner = 0;
-	flow.hash_fields = 0;
-	claim_zero(priv_flow_validate(priv, attr, items, actions,
-				      error, &flow));
-	if (flow.actions.mark && !flow.actions.drop) {
-		mlx5_flow_create_flag_mark(&flow, flow.actions.mark_id);
-		flow.offset += sizeof(struct ibv_flow_spec_action_tag);
-	}
-	if (flow.actions.drop)
-		rte_flow =
-			priv_flow_create_action_queue_drop(priv, &flow, error);
+	err = priv_flow_convert(priv, attr, items, actions, error, parser);
+	if (err || parser->create)
+		goto exit;
+	if (parser->actions.mark)
+		mlx5_flow_create_flag_mark(parser, parser->actions.mark_id);
+	return 0;
+exit:
+	if (parser->ibv_attr)
+		rte_free(parser->ibv_attr);
+	return err;
+}
+
+/**
+ * Convert a flow.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A flow on success, NULL otherwise.
+ */
+static struct rte_flow *
+priv_flow_create(struct priv *priv,
+		 struct mlx5_flows *list,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	struct mlx5_flow_parse parser = { .create = 1, };
+	struct rte_flow *flow;
+	int err;
+
+	err = priv_flow_validate(priv, attr, items, actions, error, &parser);
+	if (err)
+		goto exit;
+	if (parser.actions.drop)
+		flow = priv_flow_create_action_queue_drop(priv, &parser, error);
 	else
-		rte_flow = priv_flow_create_action_queue(priv, &flow, error);
-	if (!rte_flow)
+		flow = priv_flow_create_action_queue(priv, &parser, error);
+	if (!flow)
 		goto exit;
-	if (rte_flow) {
-		TAILQ_INSERT_TAIL(list, rte_flow, next);
-		DEBUG("Flow created %p", (void *)rte_flow);
-	}
-	return rte_flow;
+	TAILQ_INSERT_TAIL(list, flow, next);
+	DEBUG("Flow created %p", (void *)flow);
+	return flow;
 exit:
-	rte_free(flow.ibv_attr);
+	if (parser.ibv_attr)
+		rte_free(parser.ibv_attr);
 	return NULL;
 }
 
 /**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_validate(struct rte_eth_dev *dev,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	int ret;
+	struct mlx5_flow_parse parser = { .create = 0, };
+
+	priv_lock(priv);
+	ret = priv_flow_validate(priv, attr, items, actions, error, &parser);
+	priv_unlock(priv);
+	return ret;
+}
+
+/**
  * Create a flow.
  *
  * @see rte_flow_create()
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 23/30] net/mlx5: process flows actions before of items
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (21 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 22/30] net/mlx5: fully convert a flow to verbs in validate Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  5:36     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 24/30] net/mlx5: merge internal parser and actions structures Nelio Laranjeiro
                     ` (6 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
In case the pattern contains an RSS actions, the RSS configuration to use
is the one provided by the user.  To make the correct conversion from DPDK
RSS hash fields to Verbs ones according to the users requests the actions
must be processed first.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 90 ++++++++++++++++++++++----------------------
 1 file changed, 45 insertions(+), 45 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 090a298..a30f1ae 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -484,51 +484,6 @@ priv_flow_convert(struct priv *priv,
 				   "only ingress is supported");
 		return -rte_errno;
 	}
-	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
-		const struct mlx5_flow_items *token = NULL;
-		unsigned int i;
-		int err;
-
-		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
-			continue;
-		for (i = 0;
-		     cur_item->items &&
-		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
-		     ++i) {
-			if (cur_item->items[i] == items->type) {
-				token = &mlx5_flow_items[items->type];
-				break;
-			}
-		}
-		if (!token)
-			goto exit_item_not_supported;
-		cur_item = token;
-		err = mlx5_flow_item_validate(items,
-					      (const uint8_t *)cur_item->mask,
-					      cur_item->mask_sz);
-		if (err)
-			goto exit_item_not_supported;
-		if (flow->ibv_attr && cur_item->convert) {
-			err = cur_item->convert(items,
-						(cur_item->default_mask ?
-						 cur_item->default_mask :
-						 cur_item->mask),
-						flow);
-			if (err)
-				goto exit_item_not_supported;
-		} else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
-			if (flow->inner) {
-				rte_flow_error_set(error, ENOTSUP,
-						   RTE_FLOW_ERROR_TYPE_ITEM,
-						   items,
-						   "cannot recognize multiple"
-						   " VXLAN encapsulations");
-				return -rte_errno;
-			}
-			flow->inner = 1;
-		}
-		flow->offset += cur_item->dst_sz;
-	}
 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
 			continue;
@@ -644,6 +599,51 @@ priv_flow_convert(struct priv *priv,
 				   NULL, "no valid action");
 		return -rte_errno;
 	}
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+		const struct mlx5_flow_items *token = NULL;
+		unsigned int i;
+		int err;
+
+		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+			continue;
+		for (i = 0;
+		     cur_item->items &&
+		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
+		     ++i) {
+			if (cur_item->items[i] == items->type) {
+				token = &mlx5_flow_items[items->type];
+				break;
+			}
+		}
+		if (!token)
+			goto exit_item_not_supported;
+		cur_item = token;
+		err = mlx5_flow_item_validate(items,
+					      (const uint8_t *)cur_item->mask,
+					      cur_item->mask_sz);
+		if (err)
+			goto exit_item_not_supported;
+		if (flow->ibv_attr && cur_item->convert) {
+			err = cur_item->convert(items,
+						(cur_item->default_mask ?
+						 cur_item->default_mask :
+						 cur_item->mask),
+						flow);
+			if (err)
+				goto exit_item_not_supported;
+		} else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
+			if (flow->inner) {
+				rte_flow_error_set(error, ENOTSUP,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   items,
+						   "cannot recognize multiple"
+						   " VXLAN encapsulations");
+				return -rte_errno;
+			}
+			flow->inner = 1;
+		}
+		flow->offset += cur_item->dst_sz;
+	}
 	return 0;
 exit_item_not_supported:
 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 24/30] net/mlx5: merge internal parser and actions structures
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (22 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 23/30] net/mlx5: process flows actions before of items Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  5:37     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 25/30] net/mlx5: use a better name for the flow parser Nelio Laranjeiro
                     ` (5 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
struct mlx5_flow_parse now embed fields from struct mlx5_flow_action.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 94 ++++++++++++++++++++------------------------
 1 file changed, 43 insertions(+), 51 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index a30f1ae..98ee807 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -292,24 +292,19 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 	},
 };
 
-/* Structure to parse actions. */
-struct mlx5_flow_action {
-	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t drop:1; /**< Target is a drop queue. */
-	uint32_t mark:1; /**< Mark is present in the flow. */
-	uint32_t mark_id; /**< Mark identifier. */
-	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
-	uint16_t queues_n; /**< Number of entries in queue[]. */
-};
-
 /** Structure to pass to the conversion function. */
 struct mlx5_flow_parse {
 	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 	uint32_t inner; /**< Set once VXLAN is encountered. */
 	uint32_t create:1; /**< Leave allocated resources on exit. */
+	uint32_t queue:1; /**< Target is a receive queue. */
+	uint32_t drop:1; /**< Target is a drop queue. */
+	uint32_t mark:1; /**< Mark is present in the flow. */
+	uint32_t mark_id; /**< Mark identifier. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
-	struct mlx5_flow_action actions; /**< Parsed action result. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
+	uint16_t queues_n; /**< Number of entries in queue[]. */
 };
 
 static const struct rte_flow_ops mlx5_flow_ops = {
@@ -452,9 +447,7 @@ priv_flow_convert(struct priv *priv,
 		.ibv_attr = flow->ibv_attr,
 		.create = flow->create,
 		.offset = sizeof(struct ibv_flow_attr),
-		.actions = {
-			.mark_id = MLX5_FLOW_MARK_DEFAULT,
-		},
+		.mark_id = MLX5_FLOW_MARK_DEFAULT,
 	};
 	if (attr->group) {
 		rte_flow_error_set(error, ENOTSUP,
@@ -488,7 +481,7 @@ priv_flow_convert(struct priv *priv,
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
-			flow->actions.drop = 1;
+			flow->drop = 1;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			const struct rte_flow_action_queue *queue =
 				(const struct rte_flow_action_queue *)
@@ -498,13 +491,13 @@ priv_flow_convert(struct priv *priv,
 
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
-			for (n = 0; n < flow->actions.queues_n; ++n) {
-				if (flow->actions.queues[n] == queue->index) {
+			for (n = 0; n < flow->queues_n; ++n) {
+				if (flow->queues[n] == queue->index) {
 					found = 1;
 					break;
 				}
 			}
-			if (flow->actions.queues_n > 1 && !found) {
+			if (flow->queues_n > 1 && !found) {
 				rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
 					   actions,
@@ -512,9 +505,9 @@ priv_flow_convert(struct priv *priv,
 				return -rte_errno;
 			}
 			if (!found) {
-				flow->actions.queue = 1;
-				flow->actions.queues_n = 1;
-				flow->actions.queues[0] = queue->index;
+				flow->queue = 1;
+				flow->queues_n = 1;
+				flow->queues[0] = queue->index;
 			}
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
 			const struct rte_flow_action_rss *rss =
@@ -529,12 +522,12 @@ priv_flow_convert(struct priv *priv,
 						   "no valid queues");
 				return -rte_errno;
 			}
-			if (flow->actions.queues_n == 1) {
+			if (flow->queues_n == 1) {
 				uint16_t found = 0;
 
-				assert(flow->actions.queues_n);
+				assert(flow->queues_n);
 				for (n = 0; n < rss->num; ++n) {
-					if (flow->actions.queues[0] ==
+					if (flow->queues[0] ==
 					    rss->queue[n]) {
 						found = 1;
 						break;
@@ -559,10 +552,10 @@ priv_flow_convert(struct priv *priv,
 					return -rte_errno;
 				}
 			}
-			flow->actions.queue = 1;
+			flow->queue = 1;
 			for (n = 0; n < rss->num; ++n)
-				flow->actions.queues[n] = rss->queue[n];
-			flow->actions.queues_n = rss->num;
+				flow->queues[n] = rss->queue[n];
+			flow->queues_n = rss->num;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
 			const struct rte_flow_action_mark *mark =
 				(const struct rte_flow_action_mark *)
@@ -582,19 +575,19 @@ priv_flow_convert(struct priv *priv,
 						   " and 16777199");
 				return -rte_errno;
 			}
-			flow->actions.mark = 1;
-			flow->actions.mark_id = mark->id;
+			flow->mark = 1;
+			flow->mark_id = mark->id;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
-			flow->actions.mark = 1;
+			flow->mark = 1;
 		} else {
 			goto exit_action_not_supported;
 		}
 	}
-	if (flow->actions.mark && !flow->ibv_attr && !flow->actions.drop)
+	if (flow->mark && !flow->ibv_attr && !flow->drop)
 		flow->offset += sizeof(struct ibv_flow_spec_action_tag);
-	if (!flow->ibv_attr && flow->actions.drop)
+	if (!flow->ibv_attr && flow->drop)
 		flow->offset += sizeof(struct ibv_flow_spec_action_drop);
-	if (!flow->actions.queue && !flow->actions.drop) {
+	if (!flow->queue && !flow->drop) {
 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "no valid action");
 		return -rte_errno;
@@ -996,7 +989,7 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
 	struct ibv_flow_spec_action_tag *tag;
 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
 
-	assert(flow->actions.mark);
+	assert(flow->mark);
 	tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*tag = (struct ibv_flow_spec_action_tag){
 		.type = IBV_FLOW_SPEC_ACTION_TAG,
@@ -1087,23 +1080,22 @@ priv_flow_create_action_queue(struct priv *priv,
 
 	assert(priv->pd);
 	assert(priv->ctx);
-	assert(!flow->actions.drop);
-	rte_flow =
-		rte_calloc(__func__, 1,
-			   sizeof(*flow) +
-			   flow->actions.queues_n * sizeof(uint16_t),
-			   0);
+	assert(!flow->drop);
+	rte_flow = rte_calloc(__func__, 1,
+			      sizeof(*rte_flow) +
+			      flow->queues_n * sizeof(uint16_t),
+			      0);
 	if (!rte_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	rte_flow->mark = flow->actions.mark;
+	rte_flow->mark = flow->mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
 	rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
-	memcpy(rte_flow->queues, flow->actions.queues,
-	       flow->actions.queues_n * sizeof(uint16_t));
-	rte_flow->queues_n = flow->actions.queues_n;
+	memcpy(rte_flow->queues, flow->queues,
+	       flow->queues_n * sizeof(uint16_t));
+	rte_flow->queues_n = flow->queues_n;
 	rte_flow->frxq.hash_fields = flow->hash_fields;
 	rte_flow->frxq.hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
 						 rss_hash_default_key_len,
@@ -1124,11 +1116,11 @@ priv_flow_create_action_queue(struct priv *priv,
 			goto error;
 		}
 	}
-	for (i = 0; i != flow->actions.queues_n; ++i) {
+	for (i = 0; i != flow->queues_n; ++i) {
 		struct mlx5_rxq_data *q =
-			(*priv->rxqs)[flow->actions.queues[i]];
+			(*priv->rxqs)[flow->queues[i]];
 
-		q->mark |= flow->actions.mark;
+		q->mark |= flow->mark;
 	}
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
@@ -1180,7 +1172,7 @@ priv_flow_validate(struct priv *priv,
 	err = priv_flow_convert(priv, attr, items, actions, error, parser);
 	if (err)
 		goto exit;
-	if (parser->actions.mark)
+	if (parser->mark)
 		parser->offset += sizeof(struct ibv_flow_spec_action_tag);
 	parser->ibv_attr = rte_malloc(__func__, parser->offset, 0);
 	if (!parser->ibv_attr) {
@@ -1200,8 +1192,8 @@ priv_flow_validate(struct priv *priv,
 	err = priv_flow_convert(priv, attr, items, actions, error, parser);
 	if (err || parser->create)
 		goto exit;
-	if (parser->actions.mark)
-		mlx5_flow_create_flag_mark(parser, parser->actions.mark_id);
+	if (parser->mark)
+		mlx5_flow_create_flag_mark(parser, parser->mark_id);
 	return 0;
 exit:
 	if (parser->ibv_attr)
@@ -1243,7 +1235,7 @@ priv_flow_create(struct priv *priv,
 	err = priv_flow_validate(priv, attr, items, actions, error, &parser);
 	if (err)
 		goto exit;
-	if (parser.actions.drop)
+	if (parser.drop)
 		flow = priv_flow_create_action_queue_drop(priv, &parser, error);
 	else
 		flow = priv_flow_create_action_queue(priv, &parser, error);
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 25/30] net/mlx5: use a better name for the flow parser
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (23 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 24/30] net/mlx5: merge internal parser and actions structures Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  5:41     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 26/30] net/mlx5: reorganise functions in the file Nelio Laranjeiro
                     ` (4 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
struct mlx5_flow_parse was commonly used with the name "flow" confusing
sometimes the development.  The variable name is replaced by parser to
reflect its use.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 200 +++++++++++++++++++++----------------------
 1 file changed, 96 insertions(+), 104 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 98ee807..454325c 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -426,8 +426,8 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
  *   Associated actions (list terminated by the END action).
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
- * @param[in, out] flow
- *   Flow structure to update.
+ * @param[in, out] parser
+ *   Internal parser structure.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
@@ -438,14 +438,14 @@ priv_flow_convert(struct priv *priv,
 		  const struct rte_flow_item items[],
 		  const struct rte_flow_action actions[],
 		  struct rte_flow_error *error,
-		  struct mlx5_flow_parse *flow)
+		  struct mlx5_flow_parse *parser)
 {
 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
 
 	(void)priv;
-	*flow = (struct mlx5_flow_parse){
-		.ibv_attr = flow->ibv_attr,
-		.create = flow->create,
+	*parser = (struct mlx5_flow_parse){
+		.ibv_attr = parser->ibv_attr,
+		.create = parser->create,
 		.offset = sizeof(struct ibv_flow_attr),
 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
 	};
@@ -481,7 +481,7 @@ priv_flow_convert(struct priv *priv,
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
-			flow->drop = 1;
+			parser->drop = 1;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			const struct rte_flow_action_queue *queue =
 				(const struct rte_flow_action_queue *)
@@ -491,13 +491,13 @@ priv_flow_convert(struct priv *priv,
 
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
-			for (n = 0; n < flow->queues_n; ++n) {
-				if (flow->queues[n] == queue->index) {
+			for (n = 0; n < parser->queues_n; ++n) {
+				if (parser->queues[n] == queue->index) {
 					found = 1;
 					break;
 				}
 			}
-			if (flow->queues_n > 1 && !found) {
+			if (parser->queues_n > 1 && !found) {
 				rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
 					   actions,
@@ -505,9 +505,9 @@ priv_flow_convert(struct priv *priv,
 				return -rte_errno;
 			}
 			if (!found) {
-				flow->queue = 1;
-				flow->queues_n = 1;
-				flow->queues[0] = queue->index;
+				parser->queue = 1;
+				parser->queues_n = 1;
+				parser->queues[0] = queue->index;
 			}
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
 			const struct rte_flow_action_rss *rss =
@@ -522,12 +522,12 @@ priv_flow_convert(struct priv *priv,
 						   "no valid queues");
 				return -rte_errno;
 			}
-			if (flow->queues_n == 1) {
+			if (parser->queues_n == 1) {
 				uint16_t found = 0;
 
-				assert(flow->queues_n);
+				assert(parser->queues_n);
 				for (n = 0; n < rss->num; ++n) {
-					if (flow->queues[0] ==
+					if (parser->queues[0] ==
 					    rss->queue[n]) {
 						found = 1;
 						break;
@@ -552,10 +552,10 @@ priv_flow_convert(struct priv *priv,
 					return -rte_errno;
 				}
 			}
-			flow->queue = 1;
+			parser->queue = 1;
 			for (n = 0; n < rss->num; ++n)
-				flow->queues[n] = rss->queue[n];
-			flow->queues_n = rss->num;
+				parser->queues[n] = rss->queue[n];
+			parser->queues_n = rss->num;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
 			const struct rte_flow_action_mark *mark =
 				(const struct rte_flow_action_mark *)
@@ -575,19 +575,19 @@ priv_flow_convert(struct priv *priv,
 						   " and 16777199");
 				return -rte_errno;
 			}
-			flow->mark = 1;
-			flow->mark_id = mark->id;
+			parser->mark = 1;
+			parser->mark_id = mark->id;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
-			flow->mark = 1;
+			parser->mark = 1;
 		} else {
 			goto exit_action_not_supported;
 		}
 	}
-	if (flow->mark && !flow->ibv_attr && !flow->drop)
-		flow->offset += sizeof(struct ibv_flow_spec_action_tag);
-	if (!flow->ibv_attr && flow->drop)
-		flow->offset += sizeof(struct ibv_flow_spec_action_drop);
-	if (!flow->queue && !flow->drop) {
+	if (parser->mark && !parser->ibv_attr && !parser->drop)
+		parser->offset += sizeof(struct ibv_flow_spec_action_tag);
+	if (!parser->ibv_attr && parser->drop)
+		parser->offset += sizeof(struct ibv_flow_spec_action_drop);
+	if (!parser->queue && !parser->drop) {
 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "no valid action");
 		return -rte_errno;
@@ -616,16 +616,16 @@ priv_flow_convert(struct priv *priv,
 					      cur_item->mask_sz);
 		if (err)
 			goto exit_item_not_supported;
-		if (flow->ibv_attr && cur_item->convert) {
+		if (parser->ibv_attr && cur_item->convert) {
 			err = cur_item->convert(items,
 						(cur_item->default_mask ?
 						 cur_item->default_mask :
 						 cur_item->mask),
-						flow);
+						parser);
 			if (err)
 				goto exit_item_not_supported;
 		} else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
-			if (flow->inner) {
+			if (parser->inner) {
 				rte_flow_error_set(error, ENOTSUP,
 						   RTE_FLOW_ERROR_TYPE_ITEM,
 						   items,
@@ -633,9 +633,9 @@ priv_flow_convert(struct priv *priv,
 						   " VXLAN encapsulations");
 				return -rte_errno;
 			}
-			flow->inner = 1;
+			parser->inner = 1;
 		}
-		flow->offset += cur_item->dst_sz;
+		parser->offset += cur_item->dst_sz;
 	}
 	return 0;
 exit_item_not_supported:
@@ -665,17 +665,16 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_eth *spec = item->spec;
 	const struct rte_flow_item_eth *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
 	unsigned int i;
 
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 2;
-	flow->hash_fields = 0;
-	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	++parser->ibv_attr->num_of_specs;
+	parser->hash_fields = 0;
+	eth = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*eth = (struct ibv_flow_spec_eth) {
-		.type = flow->inner | IBV_FLOW_SPEC_ETH,
+		.type = parser->inner | IBV_FLOW_SPEC_ETH,
 		.size = eth_size,
 	};
 	if (!spec)
@@ -714,11 +713,11 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_vlan *spec = item->spec;
 	const struct rte_flow_item_vlan *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
 
-	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
+	eth = (void *)((uintptr_t)parser->ibv_attr + parser->offset - eth_size);
 	if (!spec)
 		return 0;
 	if (!mask)
@@ -746,17 +745,15 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_ipv4 *spec = item->spec;
 	const struct rte_flow_item_ipv4 *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_ipv4_ext *ipv4;
 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
 
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 1;
-	flow->hash_fields = (IBV_RX_HASH_SRC_IPV4 |
-			     IBV_RX_HASH_DST_IPV4);
-	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	++parser->ibv_attr->num_of_specs;
+	parser->hash_fields = (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4);
+	ipv4 = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*ipv4 = (struct ibv_flow_spec_ipv4_ext) {
-		.type = flow->inner | IBV_FLOW_SPEC_IPV4_EXT,
+		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
 		.size = ipv4_size,
 	};
 	if (!spec)
@@ -800,18 +797,16 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_ipv6 *spec = item->spec;
 	const struct rte_flow_item_ipv6 *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_ipv6 *ipv6;
 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
 	unsigned int i;
 
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 1;
-	flow->hash_fields = (IBV_RX_HASH_SRC_IPV6 |
-			     IBV_RX_HASH_DST_IPV6);
-	ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	++parser->ibv_attr->num_of_specs;
+	parser->hash_fields = (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6);
+	ipv6 = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*ipv6 = (struct ibv_flow_spec_ipv6) {
-		.type = flow->inner | IBV_FLOW_SPEC_IPV6,
+		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
 		.size = ipv6_size,
 	};
 	if (!spec)
@@ -857,17 +852,16 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_udp *spec = item->spec;
 	const struct rte_flow_item_udp *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_tcp_udp *udp;
 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
 
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 0;
-	flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_UDP |
-			      IBV_RX_HASH_DST_PORT_UDP);
-	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	++parser->ibv_attr->num_of_specs;
+	parser->hash_fields |= (IBV_RX_HASH_SRC_PORT_UDP |
+				IBV_RX_HASH_DST_PORT_UDP);
+	udp = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*udp = (struct ibv_flow_spec_tcp_udp) {
-		.type = flow->inner | IBV_FLOW_SPEC_UDP,
+		.type = parser->inner | IBV_FLOW_SPEC_UDP,
 		.size = udp_size,
 	};
 	if (!spec)
@@ -901,17 +895,16 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_tcp *spec = item->spec;
 	const struct rte_flow_item_tcp *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_tcp_udp *tcp;
 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
 
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 0;
-	flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_TCP |
-			      IBV_RX_HASH_DST_PORT_TCP);
-	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	++parser->ibv_attr->num_of_specs;
+	parser->hash_fields |= (IBV_RX_HASH_SRC_PORT_TCP |
+				IBV_RX_HASH_DST_PORT_TCP);
+	tcp = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*tcp = (struct ibv_flow_spec_tcp_udp) {
-		.type = flow->inner | IBV_FLOW_SPEC_TCP,
+		.type = parser->inner | IBV_FLOW_SPEC_TCP,
 		.size = tcp_size,
 	};
 	if (!spec)
@@ -945,7 +938,7 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_vxlan *spec = item->spec;
 	const struct rte_flow_item_vxlan *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_tunnel *vxlan;
 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
 	union vni {
@@ -953,15 +946,14 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 		uint8_t vni[4];
 	} id;
 
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 0;
+	++parser->ibv_attr->num_of_specs;
 	id.vni[0] = 0;
-	vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	vxlan = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*vxlan = (struct ibv_flow_spec_tunnel) {
-		.type = flow->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
+		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
 		.size = size,
 	};
-	flow->inner = IBV_FLOW_SPEC_INNER;
+	parser->inner = IBV_FLOW_SPEC_INNER;
 	if (!spec)
 		return 0;
 	if (!mask)
@@ -978,26 +970,26 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 /**
  * Convert mark/flag action to Verbs specification.
  *
- * @param flow
- *   Pointer to MLX5 flow structure.
+ * @param parser
+ *   Internal parser structure.
  * @param mark_id
  *   Mark identifier.
  */
 static int
-mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
+mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
 {
 	struct ibv_flow_spec_action_tag *tag;
 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
 
-	assert(flow->mark);
-	tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	assert(parser->mark);
+	tag = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*tag = (struct ibv_flow_spec_action_tag){
 		.type = IBV_FLOW_SPEC_ACTION_TAG,
 		.size = size,
 		.tag_id = mlx5_flow_mark_set(mark_id),
 	};
-	++flow->ibv_attr->num_of_specs;
-	flow->offset += size;
+	++parser->ibv_attr->num_of_specs;
+	parser->offset += size;
 	return 0;
 }
 
@@ -1006,8 +998,8 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
  *
  * @param priv
  *   Pointer to private structure.
- * @param flow
- *   MLX5 flow attributes (filled by mlx5_flow_validate()).
+ * @param parser
+ *   Internal parser structure.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
@@ -1016,7 +1008,7 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
  */
 static struct rte_flow *
 priv_flow_create_action_queue_drop(struct priv *priv,
-				   struct mlx5_flow_parse *flow,
+				   struct mlx5_flow_parse *parser,
 				   struct rte_flow_error *error)
 {
 	struct rte_flow *rte_flow;
@@ -1032,14 +1024,14 @@ priv_flow_create_action_queue_drop(struct priv *priv,
 		return NULL;
 	}
 	rte_flow->drop = 1;
-	drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	drop = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*drop = (struct ibv_flow_spec_action_drop){
 			.type = IBV_FLOW_SPEC_ACTION_DROP,
 			.size = size,
 	};
-	++flow->ibv_attr->num_of_specs;
-	flow->offset += sizeof(struct ibv_flow_spec_action_drop);
-	rte_flow->ibv_attr = flow->ibv_attr;
+	++parser->ibv_attr->num_of_specs;
+	parser->offset += sizeof(struct ibv_flow_spec_action_drop);
+	rte_flow->ibv_attr = parser->ibv_attr;
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
 	rte_flow->drxq.hrxq.qp = priv->flow_drop_queue->qp;
@@ -1062,8 +1054,8 @@ priv_flow_create_action_queue_drop(struct priv *priv,
  *
  * @param priv
  *   Pointer to private structure.
- * @param flow
- *   MLX5 flow attributes (filled by mlx5_flow_validate()).
+ * @param parser
+ *   MLX5 flow parser attributes (filled by mlx5_flow_validate()).
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
@@ -1072,7 +1064,7 @@ priv_flow_create_action_queue_drop(struct priv *priv,
  */
 static struct rte_flow *
 priv_flow_create_action_queue(struct priv *priv,
-			      struct mlx5_flow_parse *flow,
+			      struct mlx5_flow_parse *parser,
 			      struct rte_flow_error *error)
 {
 	struct rte_flow *rte_flow;
@@ -1080,33 +1072,33 @@ priv_flow_create_action_queue(struct priv *priv,
 
 	assert(priv->pd);
 	assert(priv->ctx);
-	assert(!flow->drop);
+	assert(!parser->drop);
 	rte_flow = rte_calloc(__func__, 1,
 			      sizeof(*rte_flow) +
-			      flow->queues_n * sizeof(uint16_t),
+			      parser->queues_n * sizeof(uint16_t),
 			      0);
 	if (!rte_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	rte_flow->mark = flow->mark;
-	rte_flow->ibv_attr = flow->ibv_attr;
+	rte_flow->mark = parser->mark;
+	rte_flow->ibv_attr = parser->ibv_attr;
 	rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
-	memcpy(rte_flow->queues, flow->queues,
-	       flow->queues_n * sizeof(uint16_t));
-	rte_flow->queues_n = flow->queues_n;
-	rte_flow->frxq.hash_fields = flow->hash_fields;
+	memcpy(rte_flow->queues, parser->queues,
+	       parser->queues_n * sizeof(uint16_t));
+	rte_flow->queues_n = parser->queues_n;
+	rte_flow->frxq.hash_fields = parser->hash_fields;
 	rte_flow->frxq.hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
 						 rss_hash_default_key_len,
-						 flow->hash_fields,
+						 parser->hash_fields,
 						 (*rte_flow->queues),
 						 rte_flow->queues_n);
 	if (!rte_flow->frxq.hrxq) {
 		rte_flow->frxq.hrxq =
 			mlx5_priv_hrxq_new(priv, rss_hash_default_key,
 					   rss_hash_default_key_len,
-					   flow->hash_fields,
+					   parser->hash_fields,
 					   (*rte_flow->queues),
 					   rte_flow->queues_n);
 		if (!rte_flow->frxq.hrxq) {
@@ -1116,11 +1108,11 @@ priv_flow_create_action_queue(struct priv *priv,
 			goto error;
 		}
 	}
-	for (i = 0; i != flow->queues_n; ++i) {
+	for (i = 0; i != parser->queues_n; ++i) {
 		struct mlx5_rxq_data *q =
-			(*priv->rxqs)[flow->queues[i]];
+			(*priv->rxqs)[parser->queues[i]];
 
-		q->mark |= flow->mark;
+		q->mark |= parser->mark;
 	}
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 26/30] net/mlx5: reorganise functions in the file
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (24 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 25/30] net/mlx5: use a better name for the flow parser Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  5:42     ` Yongseok Koh
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 27/30] net/mlx5: move Verbs flows and attributes Nelio Laranjeiro
                     ` (3 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Move mlx5_flow_validate/create/flush/isolate() to the end of the file.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 132 +++++++++++++++++++++----------------------
 1 file changed, 66 insertions(+), 66 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 454325c..2fb50b3 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1347,26 +1347,6 @@ priv_flow_destroy(struct priv *priv,
 }
 
 /**
- * Destroy a flow.
- *
- * @see rte_flow_destroy()
- * @see rte_flow_ops
- */
-int
-mlx5_flow_destroy(struct rte_eth_dev *dev,
-		  struct rte_flow *flow,
-		  struct rte_flow_error *error)
-{
-	struct priv *priv = dev->data->dev_private;
-
-	(void)error;
-	priv_lock(priv);
-	priv_flow_destroy(priv, &priv->flows, flow);
-	priv_unlock(priv);
-	return 0;
-}
-
-/**
  * Destroy all flows.
  *
  * @param priv
@@ -1386,25 +1366,6 @@ priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
 }
 
 /**
- * Destroy all flows.
- *
- * @see rte_flow_flush()
- * @see rte_flow_ops
- */
-int
-mlx5_flow_flush(struct rte_eth_dev *dev,
-		struct rte_flow_error *error)
-{
-	struct priv *priv = dev->data->dev_private;
-
-	(void)error;
-	priv_lock(priv);
-	priv_flow_flush(priv, &priv->flows);
-	priv_unlock(priv);
-	return 0;
-}
-
-/**
  * Create drop queue.
  *
  * @param priv
@@ -1609,33 +1570,6 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 }
 
 /**
- * Isolated mode.
- *
- * @see rte_flow_isolate()
- * @see rte_flow_ops
- */
-int
-mlx5_flow_isolate(struct rte_eth_dev *dev,
-		  int enable,
-		  struct rte_flow_error *error)
-{
-	struct priv *priv = dev->data->dev_private;
-
-	priv_lock(priv);
-	if (dev->data->dev_started) {
-		rte_flow_error_set(error, EBUSY,
-				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				   NULL,
-				   "port must be stopped first");
-		priv_unlock(priv);
-		return -rte_errno;
-	}
-	priv->isolated = !!enable;
-	priv_unlock(priv);
-	return 0;
-}
-
-/**
  * Verify the flow list is empty
  *
  * @param priv
@@ -1745,3 +1679,69 @@ mlx5_ctrl_flow(struct rte_eth_dev *dev,
 {
 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
 }
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_destroy(struct rte_eth_dev *dev,
+		  struct rte_flow *flow,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	(void)error;
+	priv_lock(priv);
+	priv_flow_destroy(priv, &priv->flows, flow);
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_flush(struct rte_eth_dev *dev,
+		struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	(void)error;
+	priv_lock(priv);
+	priv_flow_flush(priv, &priv->flows);
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
+ * Isolated mode.
+ *
+ * @see rte_flow_isolate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	if (dev->data->dev_started) {
+		rte_flow_error_set(error, EBUSY,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL,
+				   "port must be stopped first");
+		priv_unlock(priv);
+		return -rte_errno;
+	}
+	priv->isolated = !!enable;
+	priv_unlock(priv);
+	return 0;
+}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 27/30] net/mlx5: move Verbs flows and attributes
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (25 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 26/30] net/mlx5: reorganise functions in the file Nelio Laranjeiro
@ 2017-10-05 12:49   ` Nelio Laranjeiro
  2017-10-06  5:44     ` Yongseok Koh
  2017-10-05 12:50   ` [dpdk-dev] [PATCH v2 28/30] net/mlx5: handle RSS hash configuration in RSS flow Nelio Laranjeiro
                     ` (2 subsequent siblings)
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:49 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Moves ibv_attr containing the specification of the flow from Verbs point of
view also with the verbs flow itself near the related verbs objects making
the flow.
This is also a preparation to handle correctly the RSS hash configuration
provided by the user, has multiple Verbs flows will be necessary for a
single generic flow.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 46 ++++++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 19 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 2fb50b3..8ada144 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -101,11 +101,15 @@ struct mlx5_hrxq_drop {
 /* Flows structures. */
 struct mlx5_flow {
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
+	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
 };
 
 /* Drop flows structures. */
 struct mlx5_flow_drop {
+	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	struct mlx5_hrxq_drop hrxq; /**< Drop hash Rx queue. */
 };
 
@@ -113,8 +117,6 @@ struct rte_flow {
 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
-	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
-	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	uint16_t queues_n; /**< Number of entries in queue[]. */
 	uint16_t (*queues)[]; /**< Queues indexes to use. */
 	union {
@@ -1031,13 +1033,13 @@ priv_flow_create_action_queue_drop(struct priv *priv,
 	};
 	++parser->ibv_attr->num_of_specs;
 	parser->offset += sizeof(struct ibv_flow_spec_action_drop);
-	rte_flow->ibv_attr = parser->ibv_attr;
+	rte_flow->drxq.ibv_attr = parser->ibv_attr;
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
 	rte_flow->drxq.hrxq.qp = priv->flow_drop_queue->qp;
-	rte_flow->ibv_flow = ibv_create_flow(rte_flow->drxq.hrxq.qp,
-					     rte_flow->ibv_attr);
-	if (!rte_flow->ibv_flow) {
+	rte_flow->drxq.ibv_flow = ibv_create_flow(rte_flow->drxq.hrxq.qp,
+						  rte_flow->drxq.ibv_attr);
+	if (!rte_flow->drxq.ibv_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "flow rule creation failure");
 		goto error;
@@ -1083,7 +1085,7 @@ priv_flow_create_action_queue(struct priv *priv,
 		return NULL;
 	}
 	rte_flow->mark = parser->mark;
-	rte_flow->ibv_attr = parser->ibv_attr;
+	rte_flow->frxq.ibv_attr = parser->ibv_attr;
 	rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
 	memcpy(rte_flow->queues, parser->queues,
 	       parser->queues_n * sizeof(uint16_t));
@@ -1116,9 +1118,9 @@ priv_flow_create_action_queue(struct priv *priv,
 	}
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
-	rte_flow->ibv_flow = ibv_create_flow(rte_flow->frxq.hrxq->qp,
-					     rte_flow->ibv_attr);
-	if (!rte_flow->ibv_flow) {
+	rte_flow->frxq.ibv_flow = ibv_create_flow(rte_flow->frxq.hrxq->qp,
+						  rte_flow->frxq.ibv_attr);
+	if (!rte_flow->frxq.ibv_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "flow rule creation failure");
 		goto error;
@@ -1336,12 +1338,17 @@ priv_flow_destroy(struct priv *priv,
 		rxq_data->mark = mark;
 	}
 free:
-	if (flow->ibv_flow)
-		claim_zero(ibv_destroy_flow(flow->ibv_flow));
-	if (!flow->drop)
+	if (flow->drop) {
+		if (flow->drxq.ibv_flow)
+			claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
+		rte_free(flow->drxq.ibv_attr);
+	} else {
 		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
+		if (flow->frxq.ibv_flow)
+			claim_zero(ibv_destroy_flow(flow->frxq.ibv_flow));
+		rte_free(flow->frxq.ibv_attr);
+	}
 	TAILQ_REMOVE(list, flow, next);
-	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
 	rte_free(flow);
 }
@@ -1490,8 +1497,9 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 	struct rte_flow *flow;
 
 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
-		claim_zero(ibv_destroy_flow(flow->ibv_flow));
-		flow->ibv_flow = NULL;
+		assert(!flow->drop);
+		claim_zero(ibv_destroy_flow(flow->frxq.ibv_flow));
+		flow->frxq.ibv_flow = NULL;
 		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
 		flow->frxq.hrxq = NULL;
 		if (flow->mark) {
@@ -1546,9 +1554,9 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 			return rte_errno;
 		}
 flow_create:
-		flow->ibv_flow = ibv_create_flow(flow->frxq.hrxq->qp,
-						 flow->ibv_attr);
-		if (!flow->ibv_flow) {
+		flow->frxq.ibv_flow = ibv_create_flow(flow->frxq.hrxq->qp,
+						      flow->frxq.ibv_attr);
+		if (!flow->frxq.ibv_flow) {
 			DEBUG("Flow %p cannot be applied", (void *)flow);
 			rte_errno = EINVAL;
 			return rte_errno;
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 27/30] net/mlx5: move Verbs flows and attributes
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 27/30] net/mlx5: move Verbs flows and attributes Nelio Laranjeiro
@ 2017-10-06  5:44     ` Yongseok Koh
  0 siblings, 0 replies; 129+ messages in thread
From: Yongseok Koh @ 2017-10-06  5:44 UTC (permalink / raw)
  To: Nelio Laranjeiro; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Thu, Oct 05, 2017 at 02:49:59PM +0200, Nelio Laranjeiro wrote:
> Moves ibv_attr containing the specification of the flow from Verbs point of
> view also with the verbs flow itself near the related verbs objects making
> the flow.
> 
> This is also a preparation to handle correctly the RSS hash configuration
> provided by the user, has multiple Verbs flows will be necessary for a
> single generic flow.
> 
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> ---
Acked-by: Yongseok Koh <yskoh@mellanox.com>
 
Thanks
^ permalink raw reply	[flat|nested] 129+ messages in thread 
 
- * [dpdk-dev] [PATCH v2 28/30] net/mlx5: handle RSS hash configuration in RSS flow
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (26 preceding siblings ...)
  2017-10-05 12:49   ` [dpdk-dev] [PATCH v2 27/30] net/mlx5: move Verbs flows and attributes Nelio Laranjeiro
@ 2017-10-05 12:50   ` Nelio Laranjeiro
  2017-10-06 17:30     ` Yongseok Koh
  2017-10-05 12:50   ` [dpdk-dev] [PATCH v2 29/30] net/mlx5: support flow director Nelio Laranjeiro
  2017-10-05 12:50   ` [dpdk-dev] [PATCH v2 30/30] net/mlx5: add new operations for isolated mode Nelio Laranjeiro
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:50 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Add RSS support according to the RSS configuration.
A special case is handled, when the pattern does not cover the RSS hash
configuration request such as:
 flow create 0 ingress pattern eth / end actions rss queues 0 1 end / end
In such situation with the default configuration of testpmd RSS i.e. IP,
it should be converted to 3 Verbs flow to handle correctly the request:
 1. IPv4 flow, an extra IPv4 wildcard specification needs to be added in
    the conversion.
 2. IPv6 flow, same as for IPv4.
 3. Ethernet followed by any other protocol on which no RSS can be
    performed and thus the traffic will be redirected to the first queue of
    the user request.
The same kind of issue is handled if the RSS is performed only on UDPv4 or
UDPv6 or TCPv*.
This does not handle a priority conflict which can occurs if the user adds
several colliding flow rules.  Currently in the example above, the request
is already consuming 2 priorities (1 for IPv4/IPV6 matching rule priority
and one for Ethernet matching rule priority + 1).
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 1424 ++++++++++++++++++++++++++++++------------
 1 file changed, 1022 insertions(+), 402 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 8ada144..d821c79 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -55,6 +55,10 @@
 /* Define minimal priority for control plane flows. */
 #define MLX5_CTRL_FLOW_PRIORITY 4
 
+/* Internet Protocol versions. */
+#define MLX5_IPV4 4
+#define MLX5_IPV6 6
+
 static int
 mlx5_flow_create_eth(const struct rte_flow_item *item,
 		     const void *default_mask,
@@ -90,6 +94,98 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 		       const void *default_mask,
 		       void *data);
 
+struct mlx5_flow_parse;
+
+static void
+mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
+		      unsigned int size);
+
+static int
+mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
+
+/* Hash RX queue types. */
+enum hash_rxq_type {
+	HASH_RXQ_TCPV4,
+	HASH_RXQ_UDPV4,
+	HASH_RXQ_IPV4,
+	HASH_RXQ_TCPV6,
+	HASH_RXQ_UDPV6,
+	HASH_RXQ_IPV6,
+	HASH_RXQ_ETH,
+};
+
+/* Initialization data for hash RX queue. */
+struct hash_rxq_init {
+	uint64_t hash_fields; /* Fields that participate in the hash. */
+	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
+	unsigned int flow_priority; /* Flow priority to use. */
+	unsigned int ip_version; /* Internet protocol. */
+};
+
+/* Initialization data for hash RX queues. */
+const struct hash_rxq_init hash_rxq_init[] = {
+	[HASH_RXQ_TCPV4] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
+				IBV_RX_HASH_DST_IPV4 |
+				IBV_RX_HASH_SRC_PORT_TCP |
+				IBV_RX_HASH_DST_PORT_TCP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
+		.flow_priority = 0,
+		.ip_version = MLX5_IPV4,
+	},
+	[HASH_RXQ_UDPV4] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
+				IBV_RX_HASH_DST_IPV4 |
+				IBV_RX_HASH_SRC_PORT_UDP |
+				IBV_RX_HASH_DST_PORT_UDP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
+		.flow_priority = 0,
+		.ip_version = MLX5_IPV4,
+	},
+	[HASH_RXQ_IPV4] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
+				IBV_RX_HASH_DST_IPV4),
+		.dpdk_rss_hf = (ETH_RSS_IPV4 |
+				ETH_RSS_FRAG_IPV4),
+		.flow_priority = 1,
+		.ip_version = MLX5_IPV4,
+	},
+	[HASH_RXQ_TCPV6] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
+				IBV_RX_HASH_DST_IPV6 |
+				IBV_RX_HASH_SRC_PORT_TCP |
+				IBV_RX_HASH_DST_PORT_TCP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
+		.flow_priority = 0,
+		.ip_version = MLX5_IPV6,
+	},
+	[HASH_RXQ_UDPV6] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
+				IBV_RX_HASH_DST_IPV6 |
+				IBV_RX_HASH_SRC_PORT_UDP |
+				IBV_RX_HASH_DST_PORT_UDP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
+		.flow_priority = 0,
+		.ip_version = MLX5_IPV6,
+	},
+	[HASH_RXQ_IPV6] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
+				IBV_RX_HASH_DST_IPV6),
+		.dpdk_rss_hf = (ETH_RSS_IPV6 |
+				ETH_RSS_FRAG_IPV6),
+		.flow_priority = 1,
+		.ip_version = MLX5_IPV6,
+	},
+	[HASH_RXQ_ETH] = {
+		.hash_fields = 0,
+		.dpdk_rss_hf = 0,
+		.flow_priority = 2,
+	},
+};
+
+/* Number of entries in hash_rxq_init[]. */
+const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
+
 /** Structure for Drop queue. */
 struct mlx5_hrxq_drop {
 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
@@ -110,7 +206,6 @@ struct mlx5_flow {
 struct mlx5_flow_drop {
 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
-	struct mlx5_hrxq_drop hrxq; /**< Drop hash Rx queue. */
 };
 
 struct rte_flow {
@@ -119,8 +214,11 @@ struct rte_flow {
 	uint32_t drop:1; /**< Drop queue. */
 	uint16_t queues_n; /**< Number of entries in queue[]. */
 	uint16_t (*queues)[]; /**< Queues indexes to use. */
+	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
+	uint8_t rss_key[40]; /**< copy of the RSS key. */
 	union {
-		struct mlx5_flow frxq; /**< Flow with Rx queue. */
+		struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
+		/**< Flow with Rx queue. */
 		struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
 	};
 };
@@ -224,7 +322,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.default_mask = &rte_flow_item_ipv4_mask,
 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
 		.convert = mlx5_flow_create_ipv4,
-		.dst_sz = sizeof(struct ibv_flow_spec_ipv4),
+		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
 	},
 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
@@ -296,17 +394,31 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 
 /** Structure to pass to the conversion function. */
 struct mlx5_flow_parse {
-	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
-	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 	uint32_t inner; /**< Set once VXLAN is encountered. */
-	uint32_t create:1; /**< Leave allocated resources on exit. */
-	uint32_t queue:1; /**< Target is a receive queue. */
+	uint32_t create:1;
+	/**< Whether resources should remain after a validate. */
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t mark:1; /**< Mark is present in the flow. */
 	uint32_t mark_id; /**< Mark identifier. */
-	uint64_t hash_fields; /**< Fields that participate in the hash. */
 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
 	uint16_t queues_n; /**< Number of entries in queue[]. */
+	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
+	uint8_t rss_key[40]; /**< copy of the RSS key. */
+	enum hash_rxq_type layer; /**< Last pattern layer detected. */
+	union {
+		struct {
+			struct ibv_flow_attr *ibv_attr;
+			/**< Pointer to Verbs attributes. */
+			unsigned int offset;
+			/**< Current position or total size of the attribute. */
+		} queue[RTE_DIM(hash_rxq_init)];
+		struct {
+			struct ibv_flow_attr *ibv_attr;
+			/**< Pointer to Verbs attributes. */
+			unsigned int offset;
+			/**< Current position or total size of the attribute. */
+		} drop_q;
+	};
 };
 
 static const struct rte_flow_ops mlx5_flow_ops = {
@@ -416,16 +528,42 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
 }
 
 /**
- * Validate and convert a flow supported by the NIC.
+ * Copy the RSS configuration from the user ones.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param parser
+ *   Internal parser structure.
+ * @param rss_conf
+ *   User RSS configuration to save.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_flow_convert_rss_conf(struct priv *priv,
+			   struct mlx5_flow_parse *parser,
+			   const struct rte_eth_rss_conf *rss_conf)
+{
+	const struct rte_eth_rss_conf *rss =
+		rss_conf ? rss_conf : &priv->rss_conf;
+
+	if (rss->rss_key_len > 40)
+		return EINVAL;
+	parser->rss_conf.rss_key_len = rss->rss_key_len;
+	parser->rss_conf.rss_hf = rss->rss_hf;
+	memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
+	parser->rss_conf.rss_key = parser->rss_key;
+	return 0;
+}
+
+/**
+ * Extract attribute to the parser.
  *
  * @param priv
  *   Pointer to private structure.
  * @param[in] attr
  *   Flow rule attributes.
- * @param[in] pattern
- *   Pattern specification (list terminated by the END pattern item).
- * @param[in] actions
- *   Associated actions (list terminated by the END action).
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  * @param[in, out] parser
@@ -435,22 +573,13 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_convert(struct priv *priv,
-		  const struct rte_flow_attr *attr,
-		  const struct rte_flow_item items[],
-		  const struct rte_flow_action actions[],
-		  struct rte_flow_error *error,
-		  struct mlx5_flow_parse *parser)
+priv_flow_convert_attributes(struct priv *priv,
+			     const struct rte_flow_attr *attr,
+			     struct rte_flow_error *error,
+			     struct mlx5_flow_parse *parser)
 {
-	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
-
 	(void)priv;
-	*parser = (struct mlx5_flow_parse){
-		.ibv_attr = parser->ibv_attr,
-		.create = parser->create,
-		.offset = sizeof(struct ibv_flow_attr),
-		.mark_id = MLX5_FLOW_MARK_DEFAULT,
-	};
+	(void)parser;
 	if (attr->group) {
 		rte_flow_error_set(error, ENOTSUP,
 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
@@ -479,6 +608,37 @@ priv_flow_convert(struct priv *priv,
 				   "only ingress is supported");
 		return -rte_errno;
 	}
+	return 0;
+}
+
+/**
+ * Extract actions request to the parser.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] parser
+ *   Internal parser structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_convert_actions(struct priv *priv,
+			  const struct rte_flow_action actions[],
+			  struct rte_flow_error *error,
+			  struct mlx5_flow_parse *parser)
+{
+	/*
+	 * Add default RSS configuration necessary for Verbs to create QP even
+	 * if no RSS is necessary.
+	 */
+	priv_flow_convert_rss_conf(priv, parser,
+				   (const struct rte_eth_rss_conf *)
+				   &priv->rss_conf);
 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
 			continue;
@@ -507,7 +667,6 @@ priv_flow_convert(struct priv *priv,
 				return -rte_errno;
 			}
 			if (!found) {
-				parser->queue = 1;
 				parser->queues_n = 1;
 				parser->queues[0] = queue->index;
 			}
@@ -554,10 +713,17 @@ priv_flow_convert(struct priv *priv,
 					return -rte_errno;
 				}
 			}
-			parser->queue = 1;
 			for (n = 0; n < rss->num; ++n)
 				parser->queues[n] = rss->queue[n];
 			parser->queues_n = rss->num;
+			if (priv_flow_convert_rss_conf(priv, parser,
+						       rss->rss_conf)) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ACTION,
+						   actions,
+						   "wrong RSS configuration");
+				return -rte_errno;
+			}
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
 			const struct rte_flow_action_mark *mark =
 				(const struct rte_flow_action_mark *)
@@ -585,18 +751,53 @@ priv_flow_convert(struct priv *priv,
 			goto exit_action_not_supported;
 		}
 	}
-	if (parser->mark && !parser->ibv_attr && !parser->drop)
-		parser->offset += sizeof(struct ibv_flow_spec_action_tag);
-	if (!parser->ibv_attr && parser->drop)
-		parser->offset += sizeof(struct ibv_flow_spec_action_drop);
-	if (!parser->queue && !parser->drop) {
+	if (!parser->queues_n && !parser->drop) {
 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "no valid action");
 		return -rte_errno;
 	}
+	return 0;
+exit_action_not_supported:
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+			   actions, "action not supported");
+	return -rte_errno;
+}
+
+/**
+ * Validate items.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] items
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] parser
+ *   Internal parser structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_convert_items_validate(struct priv *priv,
+				 const struct rte_flow_item items[],
+				 struct rte_flow_error *error,
+				 struct mlx5_flow_parse *parser)
+{
+	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
+	unsigned int i;
+
+	(void)priv;
+	/* Initialise the offsets to start after verbs attribute. */
+	if (parser->drop) {
+		parser->drop_q.offset = sizeof(struct ibv_flow_attr);
+	} else {
+		for (i = 0; i != hash_rxq_init_n; ++i)
+			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
+	}
 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
 		const struct mlx5_flow_items *token = NULL;
-		unsigned int i;
+		unsigned int n;
 		int err;
 
 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
@@ -618,15 +819,7 @@ priv_flow_convert(struct priv *priv,
 					      cur_item->mask_sz);
 		if (err)
 			goto exit_item_not_supported;
-		if (parser->ibv_attr && cur_item->convert) {
-			err = cur_item->convert(items,
-						(cur_item->default_mask ?
-						 cur_item->default_mask :
-						 cur_item->mask),
-						parser);
-			if (err)
-				goto exit_item_not_supported;
-		} else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
+		if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
 			if (parser->inner) {
 				rte_flow_error_set(error, ENOTSUP,
 						   RTE_FLOW_ERROR_TYPE_ITEM,
@@ -637,17 +830,367 @@ priv_flow_convert(struct priv *priv,
 			}
 			parser->inner = 1;
 		}
-		parser->offset += cur_item->dst_sz;
+		if (parser->drop) {
+			parser->drop_q.offset += cur_item->dst_sz;
+		} else if (parser->queues_n == 1) {
+			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
+		} else {
+			for (n = 0; n != hash_rxq_init_n; ++n)
+				parser->queue[n].offset += cur_item->dst_sz;
+		}
+	}
+	if (parser->mark) {
+		for (i = 0; i != hash_rxq_init_n; ++i)
+			parser->queue[i].offset +=
+				sizeof(struct ibv_flow_spec_action_tag);
 	}
 	return 0;
 exit_item_not_supported:
 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
 			   items, "item not supported");
 	return -rte_errno;
-exit_action_not_supported:
-	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
-			   actions, "action not supported");
-	return -rte_errno;
+}
+
+/**
+ * Allocate memory space to store verbs flow attributes.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] priority
+ *   Flow priority.
+ * @param[in] size
+ *   Amount of byte to allocate.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A verbs flow attribute on success, NULL otherwise.
+ */
+static struct ibv_flow_attr*
+priv_flow_convert_allocate(struct priv *priv,
+			   unsigned int priority,
+			   unsigned int size,
+			   struct rte_flow_error *error)
+{
+	struct ibv_flow_attr *ibv_attr;
+
+	(void)priv;
+	ibv_attr = rte_calloc(__func__, 1, size, 0);
+	if (!ibv_attr) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL,
+				   "cannot allocate verbs spec attributes.");
+		return NULL;
+	}
+	ibv_attr->priority = priority;
+	return ibv_attr;
+}
+
+/**
+ * Finalise verbs flow attributes.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in, out] parser
+ *   Internal parser structure.
+ */
+static void
+priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
+{
+	const unsigned int ipv4 =
+		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
+	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
+	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
+	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
+	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
+	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
+	unsigned int i;
+
+	(void)priv;
+	if (parser->layer == HASH_RXQ_ETH) {
+		goto fill;
+	} else {
+		/*
+		 * This layer becomes useless as the pattern define under
+		 * layers.
+		 */
+		rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
+		parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
+	}
+	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
+	for (i = ohmin; i != (ohmax + 1); ++i) {
+		if (!parser->queue[i].ibv_attr)
+			continue;
+		rte_free(parser->queue[i].ibv_attr);
+		parser->queue[i].ibv_attr = NULL;
+	}
+	/* Remove impossible flow according to the RSS configuration. */
+	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
+	    parser->rss_conf.rss_hf) {
+		/* Remove any other flow. */
+		for (i = hmin; i != (hmax + 1); ++i) {
+			if ((i == parser->layer) ||
+			     (!parser->queue[i].ibv_attr))
+				continue;
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
+	} else  if (!parser->queue[ip].ibv_attr) {
+		/* no RSS possible with the current configuration. */
+		parser->queues_n = 1;
+		return;
+	}
+fill:
+	/*
+	 * Fill missing layers in verbs specifications, or compute the correct
+	 * offset to allocate the memory space for the attributes and
+	 * specifications.
+	 */
+	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
+		union {
+			struct ibv_flow_spec_ipv4_ext ipv4;
+			struct ibv_flow_spec_ipv6 ipv6;
+			struct ibv_flow_spec_tcp_udp udp_tcp;
+		} specs;
+		void *dst;
+		uint16_t size;
+
+		if (i == parser->layer)
+			continue;
+		if (parser->layer == HASH_RXQ_ETH) {
+			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
+				size = sizeof(struct ibv_flow_spec_ipv4_ext);
+				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
+					.type = IBV_FLOW_SPEC_IPV4_EXT |
+						parser->inner,
+					.size = size,
+				};
+			} else {
+				size = sizeof(struct ibv_flow_spec_ipv6);
+				specs.ipv6 = (struct ibv_flow_spec_ipv6){
+					.type = IBV_FLOW_SPEC_IPV6 |
+						parser->inner,
+					.size = size,
+				};
+			}
+			if (parser->queue[i].ibv_attr) {
+				dst = (void *)((uintptr_t)
+					       parser->queue[i].ibv_attr +
+					       parser->queue[i].offset);
+				memcpy(dst, &specs, size);
+				++parser->queue[i].ibv_attr->num_of_specs;
+			}
+			parser->queue[i].offset += size;
+		}
+		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
+		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
+			size = sizeof(struct ibv_flow_spec_tcp_udp);
+			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
+				.type = ((i == HASH_RXQ_UDPV4 ||
+					  i == HASH_RXQ_UDPV6) ?
+					 IBV_FLOW_SPEC_UDP :
+					 IBV_FLOW_SPEC_TCP) |
+					parser->inner,
+				.size = size,
+			};
+			if (parser->queue[i].ibv_attr) {
+				dst = (void *)((uintptr_t)
+					       parser->queue[i].ibv_attr +
+					       parser->queue[i].offset);
+				memcpy(dst, &specs, size);
+				++parser->queue[i].ibv_attr->num_of_specs;
+			}
+			parser->queue[i].offset += size;
+		}
+	}
+}
+
+/**
+ * Validate and convert a flow supported by the NIC.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] parser
+ *   Internal parser structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_convert(struct priv *priv,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item items[],
+		  const struct rte_flow_action actions[],
+		  struct rte_flow_error *error,
+		  struct mlx5_flow_parse *parser)
+{
+	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
+	unsigned int i;
+	int ret;
+
+	/* First step. Validate the attributes, items and actions. */
+	*parser = (struct mlx5_flow_parse){
+		.create = parser->create,
+		.layer = HASH_RXQ_ETH,
+		.mark_id = MLX5_FLOW_MARK_DEFAULT,
+	};
+	ret = priv_flow_convert_attributes(priv, attr, error, parser);
+	if (ret)
+		return ret;
+	ret = priv_flow_convert_actions(priv, actions, error, parser);
+	if (ret)
+		return ret;
+	ret = priv_flow_convert_items_validate(priv, items, error, parser);
+	if (ret)
+		return ret;
+	priv_flow_convert_finalise(priv, parser);
+	/*
+	 * Second step.
+	 * Allocate the memory space to store verbs specifications.
+	 */
+	if (parser->drop) {
+		parser->drop_q.ibv_attr =
+			priv_flow_convert_allocate(priv, attr->priority,
+						   parser->drop_q.offset,
+						   error);
+		if (!parser->drop_q.ibv_attr)
+			return ENOMEM;
+		parser->drop_q.offset = sizeof(struct ibv_flow_attr);
+	} else if (parser->queues_n == 1) {
+		unsigned int priority =
+			attr->priority +
+			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
+		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
+
+		parser->queue[HASH_RXQ_ETH].ibv_attr =
+			priv_flow_convert_allocate(priv, priority,
+						   offset, error);
+		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
+			return ENOMEM;
+		parser->queue[HASH_RXQ_ETH].offset =
+			sizeof(struct ibv_flow_attr);
+	} else {
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			unsigned int priority =
+				attr->priority +
+				hash_rxq_init[HASH_RXQ_ETH].flow_priority;
+			unsigned int offset;
+
+			if (!(parser->rss_conf.rss_hf &
+			      hash_rxq_init[i].dpdk_rss_hf) &&
+			    (i != HASH_RXQ_ETH))
+				continue;
+			offset = parser->queue[i].offset;
+			parser->queue[i].ibv_attr =
+				priv_flow_convert_allocate(priv, priority,
+							   offset, error);
+			if (!parser->queue[i].ibv_attr)
+				goto exit_enomem;
+			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
+		}
+	}
+	/* Third step. Conversion parse, fill the specifications. */
+	parser->inner = 0;
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+			continue;
+		cur_item = &mlx5_flow_items[items->type];
+		ret = cur_item->convert(items,
+					(cur_item->default_mask ?
+					 cur_item->default_mask :
+					 cur_item->mask),
+					parser);
+		if (ret) {
+			rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   items, "item not supported");
+			goto exit_free;
+		}
+	}
+	if (parser->mark)
+		mlx5_flow_create_flag_mark(parser, parser->mark_id);
+	/*
+	 * Last step. Complete missing specification to reach the RSS
+	 * configuration.
+	 */
+	if (parser->queues_n > 1)
+		priv_flow_convert_finalise(priv, parser);
+exit_free:
+	/* Only verification is expected, all resources should be released. */
+	if (!parser->create) {
+		if (parser->drop) {
+			rte_free(parser->drop_q.ibv_attr);
+			parser->drop_q.ibv_attr = NULL;
+		}
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (parser->queue[i].ibv_attr) {
+				rte_free(parser->queue[i].ibv_attr);
+				parser->queue[i].ibv_attr = NULL;
+			}
+		}
+	}
+	return ret;
+exit_enomem:
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (parser->queue[i].ibv_attr) {
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
+	}
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, "cannot allocate verbs spec attributes.");
+	return ret;
+}
+
+/**
+ * Copy the specification created into the flow.
+ *
+ * @param parser
+ *   Internal parser structure.
+ * @param src
+ *   Create specification.
+ * @param size
+ *   Size in bytes of the specification to copy.
+ */
+static void
+mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
+		      unsigned int size)
+{
+	unsigned int i;
+	void *dst;
+
+	if (parser->drop) {
+		dst = (void *)((uintptr_t)parser->drop_q.ibv_attr +
+				parser->drop_q.offset);
+		memcpy(dst, src, size);
+		++parser->drop_q.ibv_attr->num_of_specs;
+		parser->drop_q.offset += size;
+		return;
+	}
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (!parser->queue[i].ibv_attr)
+			continue;
+		/* Specification must be the same l3 type or none. */
+		if (parser->layer == HASH_RXQ_ETH ||
+		    (hash_rxq_init[parser->layer].ip_version ==
+		     hash_rxq_init[i].ip_version) ||
+		    (hash_rxq_init[i].ip_version == 0)) {
+			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
+					parser->queue[i].offset);
+			memcpy(dst, src, size);
+			++parser->queue[i].ibv_attr->num_of_specs;
+			parser->queue[i].offset += size;
+		}
+	}
 }
 
 /**
@@ -668,33 +1211,32 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
 	const struct rte_flow_item_eth *spec = item->spec;
 	const struct rte_flow_item_eth *mask = item->mask;
 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
-	struct ibv_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
-	unsigned int i;
-
-	++parser->ibv_attr->num_of_specs;
-	parser->hash_fields = 0;
-	eth = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*eth = (struct ibv_flow_spec_eth) {
+	struct ibv_flow_spec_eth eth = {
 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
 		.size = eth_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
-	memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
-	eth->val.ether_type = spec->type;
-	memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
-	memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
-	eth->mask.ether_type = mask->type;
-	/* Remove unwanted bits from values. */
-	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
-		eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
-		eth->val.src_mac[i] &= eth->mask.src_mac[i];
+
+	parser->layer = HASH_RXQ_ETH;
+	if (spec) {
+		unsigned int i;
+
+		if (!mask)
+			mask = default_mask;
+		memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+		memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+		eth.val.ether_type = spec->type;
+		memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+		memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+		eth.mask.ether_type = mask->type;
+		/* Remove unwanted bits from values. */
+		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
+			eth.val.src_mac[i] &= eth.mask.src_mac[i];
+		}
+		eth.val.ether_type &= eth.mask.ether_type;
 	}
-	eth->val.ether_type &= eth->mask.ether_type;
+	mlx5_flow_create_copy(parser, ð, eth_size);
 	return 0;
 }
 
@@ -719,14 +1261,30 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item,
 	struct ibv_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
 
-	eth = (void *)((uintptr_t)parser->ibv_attr + parser->offset - eth_size);
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	eth->val.vlan_tag = spec->tci;
-	eth->mask.vlan_tag = mask->tci;
-	eth->val.vlan_tag &= eth->mask.vlan_tag;
+	if (spec) {
+		unsigned int i;
+		if (!mask)
+			mask = default_mask;
+
+		if (parser->drop) {
+			eth = (void *)((uintptr_t)parser->drop_q.ibv_attr +
+				       parser->drop_q.offset - eth_size);
+			eth->val.vlan_tag = spec->tci;
+			eth->mask.vlan_tag = mask->tci;
+			eth->val.vlan_tag &= eth->mask.vlan_tag;
+			return 0;
+		}
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!parser->queue[i].ibv_attr)
+				continue;
+
+			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
+				       parser->queue[i].offset - eth_size);
+			eth->val.vlan_tag = spec->tci;
+			eth->mask.vlan_tag = mask->tci;
+			eth->val.vlan_tag &= eth->mask.vlan_tag;
+		}
+	}
 	return 0;
 }
 
@@ -748,37 +1306,35 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
 	const struct rte_flow_item_ipv4 *spec = item->spec;
 	const struct rte_flow_item_ipv4 *mask = item->mask;
 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
-	struct ibv_flow_spec_ipv4_ext *ipv4;
 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
-
-	++parser->ibv_attr->num_of_specs;
-	parser->hash_fields = (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4);
-	ipv4 = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*ipv4 = (struct ibv_flow_spec_ipv4_ext) {
+	struct ibv_flow_spec_ipv4_ext ipv4 = {
 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
 		.size = ipv4_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	ipv4->val = (struct ibv_flow_ipv4_ext_filter){
-		.src_ip = spec->hdr.src_addr,
-		.dst_ip = spec->hdr.dst_addr,
-		.proto = spec->hdr.next_proto_id,
-		.tos = spec->hdr.type_of_service,
-	};
-	ipv4->mask = (struct ibv_flow_ipv4_ext_filter){
-		.src_ip = mask->hdr.src_addr,
-		.dst_ip = mask->hdr.dst_addr,
-		.proto = mask->hdr.next_proto_id,
-		.tos = mask->hdr.type_of_service,
-	};
-	/* Remove unwanted bits from values. */
-	ipv4->val.src_ip &= ipv4->mask.src_ip;
-	ipv4->val.dst_ip &= ipv4->mask.dst_ip;
-	ipv4->val.proto &= ipv4->mask.proto;
-	ipv4->val.tos &= ipv4->mask.tos;
+
+	parser->layer = HASH_RXQ_IPV4;
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
+			.src_ip = spec->hdr.src_addr,
+			.dst_ip = spec->hdr.dst_addr,
+			.proto = spec->hdr.next_proto_id,
+			.tos = spec->hdr.type_of_service,
+		};
+		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
+			.src_ip = mask->hdr.src_addr,
+			.dst_ip = mask->hdr.dst_addr,
+			.proto = mask->hdr.next_proto_id,
+			.tos = mask->hdr.type_of_service,
+		};
+		/* Remove unwanted bits from values. */
+		ipv4.val.src_ip &= ipv4.mask.src_ip;
+		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
+		ipv4.val.proto &= ipv4.mask.proto;
+		ipv4.val.tos &= ipv4.mask.tos;
+	}
+	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
 	return 0;
 }
 
@@ -800,40 +1356,39 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
 	const struct rte_flow_item_ipv6 *spec = item->spec;
 	const struct rte_flow_item_ipv6 *mask = item->mask;
 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
-	struct ibv_flow_spec_ipv6 *ipv6;
 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
-	unsigned int i;
-
-	++parser->ibv_attr->num_of_specs;
-	parser->hash_fields = (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6);
-	ipv6 = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*ipv6 = (struct ibv_flow_spec_ipv6) {
+	struct ibv_flow_spec_ipv6 ipv6 = {
 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
 		.size = ipv6_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
-	       RTE_DIM(ipv6->val.src_ip));
-	memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
-	       RTE_DIM(ipv6->val.dst_ip));
-	memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
-	       RTE_DIM(ipv6->mask.src_ip));
-	memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
-	       RTE_DIM(ipv6->mask.dst_ip));
-	ipv6->mask.flow_label = mask->hdr.vtc_flow;
-	ipv6->mask.next_hdr = mask->hdr.proto;
-	ipv6->mask.hop_limit = mask->hdr.hop_limits;
-	/* Remove unwanted bits from values. */
-	for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
-		ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
-		ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
+
+	parser->layer = HASH_RXQ_IPV6;
+	if (spec) {
+		unsigned int i;
+
+		if (!mask)
+			mask = default_mask;
+		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
+		       RTE_DIM(ipv6.val.src_ip));
+		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
+		       RTE_DIM(ipv6.val.dst_ip));
+		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
+		       RTE_DIM(ipv6.mask.src_ip));
+		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
+		       RTE_DIM(ipv6.mask.dst_ip));
+		ipv6.mask.flow_label = mask->hdr.vtc_flow;
+		ipv6.mask.next_hdr = mask->hdr.proto;
+		ipv6.mask.hop_limit = mask->hdr.hop_limits;
+		/* Remove unwanted bits from values. */
+		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
+			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
+			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
+		}
+		ipv6.val.flow_label &= ipv6.mask.flow_label;
+		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
+		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
 	}
-	ipv6->val.flow_label &= ipv6->mask.flow_label;
-	ipv6->val.next_hdr &= ipv6->mask.next_hdr;
-	ipv6->val.hop_limit &= ipv6->mask.hop_limit;
+	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
 	return 0;
 }
 
@@ -855,28 +1410,28 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
 	const struct rte_flow_item_udp *spec = item->spec;
 	const struct rte_flow_item_udp *mask = item->mask;
 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
-	struct ibv_flow_spec_tcp_udp *udp;
 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
-
-	++parser->ibv_attr->num_of_specs;
-	parser->hash_fields |= (IBV_RX_HASH_SRC_PORT_UDP |
-				IBV_RX_HASH_DST_PORT_UDP);
-	udp = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*udp = (struct ibv_flow_spec_tcp_udp) {
+	struct ibv_flow_spec_tcp_udp udp = {
 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
 		.size = udp_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	udp->val.dst_port = spec->hdr.dst_port;
-	udp->val.src_port = spec->hdr.src_port;
-	udp->mask.dst_port = mask->hdr.dst_port;
-	udp->mask.src_port = mask->hdr.src_port;
-	/* Remove unwanted bits from values. */
-	udp->val.src_port &= udp->mask.src_port;
-	udp->val.dst_port &= udp->mask.dst_port;
+
+	if (parser->layer == HASH_RXQ_IPV4)
+		parser->layer = HASH_RXQ_UDPV4;
+	else
+		parser->layer = HASH_RXQ_UDPV6;
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		udp.val.dst_port = spec->hdr.dst_port;
+		udp.val.src_port = spec->hdr.src_port;
+		udp.mask.dst_port = mask->hdr.dst_port;
+		udp.mask.src_port = mask->hdr.src_port;
+		/* Remove unwanted bits from values. */
+		udp.val.src_port &= udp.mask.src_port;
+		udp.val.dst_port &= udp.mask.dst_port;
+	}
+	mlx5_flow_create_copy(parser, &udp, udp_size);
 	return 0;
 }
 
@@ -898,28 +1453,28 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
 	const struct rte_flow_item_tcp *spec = item->spec;
 	const struct rte_flow_item_tcp *mask = item->mask;
 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
-	struct ibv_flow_spec_tcp_udp *tcp;
 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
-
-	++parser->ibv_attr->num_of_specs;
-	parser->hash_fields |= (IBV_RX_HASH_SRC_PORT_TCP |
-				IBV_RX_HASH_DST_PORT_TCP);
-	tcp = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*tcp = (struct ibv_flow_spec_tcp_udp) {
+	struct ibv_flow_spec_tcp_udp tcp = {
 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
 		.size = tcp_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	tcp->val.dst_port = spec->hdr.dst_port;
-	tcp->val.src_port = spec->hdr.src_port;
-	tcp->mask.dst_port = mask->hdr.dst_port;
-	tcp->mask.src_port = mask->hdr.src_port;
-	/* Remove unwanted bits from values. */
-	tcp->val.src_port &= tcp->mask.src_port;
-	tcp->val.dst_port &= tcp->mask.dst_port;
+
+	if (parser->layer == HASH_RXQ_IPV4)
+		parser->layer = HASH_RXQ_TCPV4;
+	else
+		parser->layer = HASH_RXQ_TCPV6;
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		tcp.val.dst_port = spec->hdr.dst_port;
+		tcp.val.src_port = spec->hdr.src_port;
+		tcp.mask.dst_port = mask->hdr.dst_port;
+		tcp.mask.src_port = mask->hdr.src_port;
+		/* Remove unwanted bits from values. */
+		tcp.val.src_port &= tcp.mask.src_port;
+		tcp.val.dst_port &= tcp.mask.dst_port;
+	}
+	mlx5_flow_create_copy(parser, &tcp, tcp_size);
 	return 0;
 }
 
@@ -941,31 +1496,29 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 	const struct rte_flow_item_vxlan *spec = item->spec;
 	const struct rte_flow_item_vxlan *mask = item->mask;
 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
-	struct ibv_flow_spec_tunnel *vxlan;
 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+	struct ibv_flow_spec_tunnel vxlan = {
+		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
+		.size = size,
+	};
 	union vni {
 		uint32_t vlan_id;
 		uint8_t vni[4];
 	} id;
 
-	++parser->ibv_attr->num_of_specs;
 	id.vni[0] = 0;
-	vxlan = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*vxlan = (struct ibv_flow_spec_tunnel) {
-		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
-		.size = size,
-	};
 	parser->inner = IBV_FLOW_SPEC_INNER;
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	memcpy(&id.vni[1], spec->vni, 3);
-	vxlan->val.tunnel_id = id.vlan_id;
-	memcpy(&id.vni[1], mask->vni, 3);
-	vxlan->mask.tunnel_id = id.vlan_id;
-	/* Remove unwanted bits from values. */
-	vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		memcpy(&id.vni[1], spec->vni, 3);
+		vxlan.val.tunnel_id = id.vlan_id;
+		memcpy(&id.vni[1], mask->vni, 3);
+		vxlan.mask.tunnel_id = id.vlan_id;
+		/* Remove unwanted bits from values. */
+		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
+	}
+	mlx5_flow_create_copy(parser, &vxlan, size);
 	return 0;
 }
 
@@ -980,18 +1533,15 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 static int
 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
 {
-	struct ibv_flow_spec_action_tag *tag;
 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
-
-	assert(parser->mark);
-	tag = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*tag = (struct ibv_flow_spec_action_tag){
+	struct ibv_flow_spec_action_tag tag = {
 		.type = IBV_FLOW_SPEC_ACTION_TAG,
 		.size = size,
 		.tag_id = mlx5_flow_mark_set(mark_id),
 	};
-	++parser->ibv_attr->num_of_specs;
-	parser->offset += size;
+
+	assert(parser->mark);
+	mlx5_flow_create_copy(parser, &tag, size);
 	return 0;
 }
 
@@ -1002,196 +1552,188 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
  *   Pointer to private structure.
  * @param parser
  *   Internal parser structure.
+ * @param flow
+ *   Pointer to the rte_flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
  * @return
- *   A flow if the rule could be created.
+ *   0 on success, errno value on failure.
  */
-static struct rte_flow *
+static int
 priv_flow_create_action_queue_drop(struct priv *priv,
 				   struct mlx5_flow_parse *parser,
+				   struct rte_flow *flow,
 				   struct rte_flow_error *error)
 {
-	struct rte_flow *rte_flow;
 	struct ibv_flow_spec_action_drop *drop;
 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
+	int err = 0;
 
 	assert(priv->pd);
 	assert(priv->ctx);
-	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
-	if (!rte_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate flow memory");
-		return NULL;
-	}
-	rte_flow->drop = 1;
-	drop = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
+	flow->drop = 1;
+	drop = (void *)((uintptr_t)parser->drop_q.ibv_attr +
+			parser->drop_q.offset);
 	*drop = (struct ibv_flow_spec_action_drop){
 			.type = IBV_FLOW_SPEC_ACTION_DROP,
 			.size = size,
 	};
-	++parser->ibv_attr->num_of_specs;
-	parser->offset += sizeof(struct ibv_flow_spec_action_drop);
-	rte_flow->drxq.ibv_attr = parser->ibv_attr;
+	++parser->drop_q.ibv_attr->num_of_specs;
+	parser->drop_q.offset += size;
 	if (!priv->dev->data->dev_started)
-		return rte_flow;
-	rte_flow->drxq.hrxq.qp = priv->flow_drop_queue->qp;
-	rte_flow->drxq.ibv_flow = ibv_create_flow(rte_flow->drxq.hrxq.qp,
-						  rte_flow->drxq.ibv_attr);
-	if (!rte_flow->drxq.ibv_flow) {
+		return 0;
+	flow->drxq.ibv_attr = parser->drop_q.ibv_attr;
+	parser->drop_q.ibv_attr = NULL;
+	flow->drxq.ibv_flow = ibv_create_flow(priv->flow_drop_queue->qp,
+					      flow->drxq.ibv_attr);
+	if (!flow->drxq.ibv_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "flow rule creation failure");
+		err = ENOMEM;
 		goto error;
 	}
-	return rte_flow;
+	return 0;
 error:
-	assert(rte_flow);
-	rte_free(rte_flow);
-	return NULL;
+	assert(flow);
+	if (flow->drxq.ibv_flow) {
+		claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
+		flow->drxq.ibv_flow = NULL;
+	}
+	if (flow->drxq.ibv_attr) {
+		rte_free(flow->drxq.ibv_attr);
+		flow->drxq.ibv_attr = NULL;
+	}
+	return err;
 }
 
 /**
- * Complete flow rule creation.
+ * Create hash Rx queues when RSS is enabled.
  *
  * @param priv
  *   Pointer to private structure.
  * @param parser
- *   MLX5 flow parser attributes (filled by mlx5_flow_validate()).
+ *   Internal parser structure.
+ * @param flow
+ *   Pointer to the rte_flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
  * @return
- *   A flow if the rule could be created.
+ *   0 on success, a errno value otherwise and rte_errno is set.
  */
-static struct rte_flow *
-priv_flow_create_action_queue(struct priv *priv,
-			      struct mlx5_flow_parse *parser,
-			      struct rte_flow_error *error)
+static int
+priv_flow_create_action_queue_rss(struct priv *priv,
+				  struct mlx5_flow_parse *parser,
+				  struct rte_flow *flow,
+				  struct rte_flow_error *error)
 {
-	struct rte_flow *rte_flow;
 	unsigned int i;
 
-	assert(priv->pd);
-	assert(priv->ctx);
-	assert(!parser->drop);
-	rte_flow = rte_calloc(__func__, 1,
-			      sizeof(*rte_flow) +
-			      parser->queues_n * sizeof(uint16_t),
-			      0);
-	if (!rte_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate flow memory");
-		return NULL;
-	}
-	rte_flow->mark = parser->mark;
-	rte_flow->frxq.ibv_attr = parser->ibv_attr;
-	rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
-	memcpy(rte_flow->queues, parser->queues,
-	       parser->queues_n * sizeof(uint16_t));
-	rte_flow->queues_n = parser->queues_n;
-	rte_flow->frxq.hash_fields = parser->hash_fields;
-	rte_flow->frxq.hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
-						 rss_hash_default_key_len,
-						 parser->hash_fields,
-						 (*rte_flow->queues),
-						 rte_flow->queues_n);
-	if (!rte_flow->frxq.hrxq) {
-		rte_flow->frxq.hrxq =
-			mlx5_priv_hrxq_new(priv, rss_hash_default_key,
-					   rss_hash_default_key_len,
-					   parser->hash_fields,
-					   (*rte_flow->queues),
-					   rte_flow->queues_n);
-		if (!rte_flow->frxq.hrxq) {
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		uint64_t hash_fields;
+
+		if (!parser->queue[i].ibv_attr)
+			continue;
+		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
+		parser->queue[i].ibv_attr = NULL;
+		hash_fields = hash_rxq_init[i].hash_fields;
+		flow->frxq[i].hrxq =
+			mlx5_priv_hrxq_get(priv,
+					   parser->rss_conf.rss_key,
+					   parser->rss_conf.rss_key_len,
+					   hash_fields,
+					   parser->queues,
+					   hash_fields ? parser->queues_n : 1);
+		if (flow->frxq[i].hrxq)
+			continue;
+		flow->frxq[i].hrxq =
+			mlx5_priv_hrxq_new(priv,
+					   parser->rss_conf.rss_key,
+					   parser->rss_conf.rss_key_len,
+					   hash_fields,
+					   parser->queues,
+					   hash_fields ? parser->queues_n : 1);
+		if (!flow->frxq[i].hrxq) {
 			rte_flow_error_set(error, ENOMEM,
 					   RTE_FLOW_ERROR_TYPE_HANDLE,
 					   NULL, "cannot create hash rxq");
-			goto error;
+			return ENOMEM;
 		}
 	}
-	for (i = 0; i != parser->queues_n; ++i) {
-		struct mlx5_rxq_data *q =
-			(*priv->rxqs)[parser->queues[i]];
-
-		q->mark |= parser->mark;
-	}
-	if (!priv->dev->data->dev_started)
-		return rte_flow;
-	rte_flow->frxq.ibv_flow = ibv_create_flow(rte_flow->frxq.hrxq->qp,
-						  rte_flow->frxq.ibv_attr);
-	if (!rte_flow->frxq.ibv_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "flow rule creation failure");
-		goto error;
-	}
-	return rte_flow;
-error:
-	assert(rte_flow);
-	if (rte_flow->frxq.hrxq)
-		mlx5_priv_hrxq_release(priv, rte_flow->frxq.hrxq);
-	rte_free(rte_flow);
-	return NULL;
+	return 0;
 }
 
 /**
- * Validate a flow.
+ * Complete flow rule creation.
  *
  * @param priv
  *   Pointer to private structure.
- * @param[in] attr
- *   Flow rule attributes.
- * @param[in] pattern
- *   Pattern specification (list terminated by the END pattern item).
- * @param[in] actions
- *   Associated actions (list terminated by the END action).
+ * @param parser
+ *   Internal parser structure.
+ * @param flow
+ *   Pointer to the rte_flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
- * @param[in,out] parser
- *   MLX5 parser structure.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_validate(struct priv *priv,
-		   const struct rte_flow_attr *attr,
-		   const struct rte_flow_item items[],
-		   const struct rte_flow_action actions[],
-		   struct rte_flow_error *error,
-		   struct mlx5_flow_parse *parser)
+priv_flow_create_action_queue(struct priv *priv,
+			      struct mlx5_flow_parse *parser,
+			      struct rte_flow *flow,
+			      struct rte_flow_error *error)
 {
-	int err;
+	int err = 0;
+	unsigned int i;
 
-	err = priv_flow_convert(priv, attr, items, actions, error, parser);
+	assert(priv->pd);
+	assert(priv->ctx);
+	assert(!parser->drop);
+	err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
 	if (err)
-		goto exit;
-	if (parser->mark)
-		parser->offset += sizeof(struct ibv_flow_spec_action_tag);
-	parser->ibv_attr = rte_malloc(__func__, parser->offset, 0);
-	if (!parser->ibv_attr) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate ibv_attr memory");
-		err = rte_errno;
-		goto exit;
+		goto error;
+	if (!priv->dev->data->dev_started)
+		return 0;
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (!flow->frxq[i].hrxq)
+			continue;
+		flow->frxq[i].ibv_flow =
+			ibv_create_flow(flow->frxq[i].hrxq->qp,
+					flow->frxq[i].ibv_attr);
+		if (!flow->frxq[i].ibv_flow) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "flow rule creation failure");
+			err = ENOMEM;
+			goto error;
+		}
+		DEBUG("%p type %d QP %p ibv_flow %p",
+		      (void *)flow, i,
+		      (void *)flow->frxq[i].hrxq,
+		      (void *)flow->frxq[i].ibv_flow);
+	}
+	for (i = 0; i != parser->queues_n; ++i) {
+		struct mlx5_rxq_data *q =
+			(*priv->rxqs)[parser->queues[i]];
+
+		q->mark |= parser->mark;
 	}
-	*parser->ibv_attr = (struct ibv_flow_attr){
-		.type = IBV_FLOW_ATTR_NORMAL,
-		.size = sizeof(struct ibv_flow_attr),
-		.priority = attr->priority,
-		.num_of_specs = 0,
-		.port = 0,
-		.flags = 0,
-	};
-	err = priv_flow_convert(priv, attr, items, actions, error, parser);
-	if (err || parser->create)
-		goto exit;
-	if (parser->mark)
-		mlx5_flow_create_flag_mark(parser, parser->mark_id);
 	return 0;
-exit:
-	if (parser->ibv_attr)
-		rte_free(parser->ibv_attr);
+error:
+	assert(flow);
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (flow->frxq[i].ibv_flow) {
+			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
+
+			claim_zero(ibv_destroy_flow(ibv_flow));
+		}
+		if (flow->frxq[i].hrxq)
+			mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
+		if (flow->frxq[i].ibv_attr)
+			rte_free(flow->frxq[i].ibv_attr);
+	}
 	return err;
 }
 
@@ -1223,24 +1765,52 @@ priv_flow_create(struct priv *priv,
 		 struct rte_flow_error *error)
 {
 	struct mlx5_flow_parse parser = { .create = 1, };
-	struct rte_flow *flow;
+	struct rte_flow *flow = NULL;
+	unsigned int i;
 	int err;
 
-	err = priv_flow_validate(priv, attr, items, actions, error, &parser);
+	err = priv_flow_convert(priv, attr, items, actions, error, &parser);
 	if (err)
 		goto exit;
+	flow = rte_calloc(__func__, 1,
+			  sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
+			  0);
+	if (!flow) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL,
+				   "cannot allocate flow memory");
+		return NULL;
+	}
+	/* Copy queues configuration. */
+	flow->queues = (uint16_t (*)[])(flow + 1);
+	memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
+	flow->queues_n = parser.queues_n;
+	/* Copy RSS configuration. */
+	flow->rss_conf = parser.rss_conf;
+	flow->rss_conf.rss_key = flow->rss_key;
+	memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
+	/* finalise the flow. */
 	if (parser.drop)
-		flow = priv_flow_create_action_queue_drop(priv, &parser, error);
+		err = priv_flow_create_action_queue_drop(priv, &parser, flow,
+							 error);
 	else
-		flow = priv_flow_create_action_queue(priv, &parser, error);
-	if (!flow)
+		err = priv_flow_create_action_queue(priv, &parser, flow, error);
+	if (err)
 		goto exit;
 	TAILQ_INSERT_TAIL(list, flow, next);
 	DEBUG("Flow created %p", (void *)flow);
 	return flow;
 exit:
-	if (parser.ibv_attr)
-		rte_free(parser.ibv_attr);
+	if (parser.drop) {
+		rte_free(parser.drop_q.ibv_attr);
+	} else {
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (parser.queue[i].ibv_attr)
+				rte_free(parser.queue[i].ibv_attr);
+		}
+	}
+	rte_free(flow);
 	return NULL;
 }
 
@@ -1262,7 +1832,7 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
 	struct mlx5_flow_parse parser = { .create = 0, };
 
 	priv_lock(priv);
-	ret = priv_flow_validate(priv, attr, items, actions, error, &parser);
+	ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
 	priv_unlock(priv);
 	return ret;
 }
@@ -1306,16 +1876,11 @@ priv_flow_destroy(struct priv *priv,
 		  struct rte_flow *flow)
 {
 	unsigned int i;
-	uint16_t *queues;
-	uint16_t queues_n;
 
 	if (flow->drop || !flow->mark)
 		goto free;
-	queues = flow->frxq.hrxq->ind_table->queues;
-	queues_n = flow->frxq.hrxq->ind_table->queues_n;
-	for (i = 0; i != queues_n; ++i) {
+	for (i = 0; i != flow->queues_n; ++i) {
 		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[queues[i]];
 		int mark = 0;
 
 		/*
@@ -1324,18 +1889,24 @@ priv_flow_destroy(struct priv *priv,
 		 */
 		TAILQ_FOREACH(tmp, list, next) {
 			unsigned int j;
+			uint16_t *tqs = NULL;
+			uint16_t tq_n = 0;
 
 			if (!tmp->mark)
 				continue;
-			for (j = 0;
-			     (j != tmp->frxq.hrxq->ind_table->queues_n) &&
-			     !mark;
-			     j++)
-				if (tmp->frxq.hrxq->ind_table->queues[j] ==
-				    queues[i])
+			for (j = 0; j != hash_rxq_init_n; ++j) {
+				if (!tmp->frxq[j].hrxq)
+					continue;
+				tqs = tmp->frxq[j].hrxq->ind_table->queues;
+				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
+			}
+			if (!tq_n)
+				continue;
+			for (j = 0; (j != tq_n) && !mark; j++)
+				if (tqs[j] == (*flow->queues)[i])
 					mark = 1;
 		}
-		rxq_data->mark = mark;
+		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
 	}
 free:
 	if (flow->drop) {
@@ -1343,10 +1914,16 @@ priv_flow_destroy(struct priv *priv,
 			claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
 		rte_free(flow->drxq.ibv_attr);
 	} else {
-		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
-		if (flow->frxq.ibv_flow)
-			claim_zero(ibv_destroy_flow(flow->frxq.ibv_flow));
-		rte_free(flow->frxq.ibv_attr);
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			struct mlx5_flow *frxq = &flow->frxq[i];
+
+			if (frxq->ibv_flow)
+				claim_zero(ibv_destroy_flow(frxq->ibv_flow));
+			if (frxq->hrxq)
+				mlx5_priv_hrxq_release(priv, frxq->hrxq);
+			if (frxq->ibv_attr)
+				rte_free(frxq->ibv_attr);
+		}
 	}
 	TAILQ_REMOVE(list, flow, next);
 	DEBUG("Flow destroyed %p", (void *)flow);
@@ -1497,18 +2074,35 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 	struct rte_flow *flow;
 
 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
-		assert(!flow->drop);
-		claim_zero(ibv_destroy_flow(flow->frxq.ibv_flow));
-		flow->frxq.ibv_flow = NULL;
-		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
-		flow->frxq.hrxq = NULL;
+		unsigned int i;
+
+		if (flow->drop) {
+			if (!flow->drxq.ibv_flow)
+				continue;
+			claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
+			flow->drxq.ibv_flow = NULL;
+			/* Next flow. */
+			continue;
+		}
 		if (flow->mark) {
-			unsigned int n;
-			struct mlx5_ind_table_ibv *ind_tbl =
-				flow->frxq.hrxq->ind_table;
+			struct mlx5_ind_table_ibv *ind_tbl = NULL;
 
-			for (n = 0; n < ind_tbl->queues_n; ++n)
-				(*priv->rxqs)[ind_tbl->queues[n]]->mark = 0;
+			for (i = 0; i != hash_rxq_init_n; ++i) {
+				if (!flow->frxq[i].hrxq)
+					continue;
+				ind_tbl = flow->frxq[i].hrxq->ind_table;
+			}
+			assert(ind_tbl);
+			for (i = 0; i != ind_tbl->queues_n; ++i)
+				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
+		}
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!flow->frxq[i].ibv_flow)
+				continue;
+			claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
+			flow->frxq[i].ibv_flow = NULL;
+			mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
+			flow->frxq[i].hrxq = NULL;
 		}
 		DEBUG("Flow %p removed", (void *)flow);
 	}
@@ -1531,48 +2125,61 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 	struct rte_flow *flow;
 
 	TAILQ_FOREACH(flow, list, next) {
-		if (flow->frxq.hrxq)
-			goto flow_create;
-		flow->frxq.hrxq =
-			mlx5_priv_hrxq_get(priv, rss_hash_default_key,
-					   rss_hash_default_key_len,
-					   flow->frxq.hash_fields,
-					   (*flow->queues),
-					   flow->queues_n);
-		if (flow->frxq.hrxq)
-			goto flow_create;
-		flow->frxq.hrxq =
-			mlx5_priv_hrxq_new(priv, rss_hash_default_key,
-					   rss_hash_default_key_len,
-					   flow->frxq.hash_fields,
-					   (*flow->queues),
-					   flow->queues_n);
-		if (!flow->frxq.hrxq) {
-			DEBUG("Flow %p cannot be applied",
-			      (void *)flow);
-			rte_errno = EINVAL;
-			return rte_errno;
+		unsigned int i;
+
+		if (flow->drop) {
+			flow->drxq.ibv_flow =
+				ibv_create_flow(priv->flow_drop_queue->qp,
+						flow->drxq.ibv_attr);
+			if (!flow->drxq.ibv_flow) {
+				DEBUG("Flow %p cannot be applied",
+				      (void *)flow);
+				rte_errno = EINVAL;
+				return rte_errno;
+			}
+			DEBUG("Flow %p applied", (void *)flow);
+			/* Next flow. */
+			continue;
 		}
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!flow->frxq[i].ibv_attr)
+				continue;
+			flow->frxq[i].hrxq =
+				mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
+						   flow->rss_conf.rss_key_len,
+						   hash_rxq_init[i].hash_fields,
+						   (*flow->queues),
+						   flow->queues_n);
+			if (flow->frxq[i].hrxq)
+				goto flow_create;
+			flow->frxq[i].hrxq =
+				mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
+						   flow->rss_conf.rss_key_len,
+						   hash_rxq_init[i].hash_fields,
+						   (*flow->queues),
+						   flow->queues_n);
+			if (!flow->frxq[i].hrxq) {
+				DEBUG("Flow %p cannot be applied",
+				      (void *)flow);
+				rte_errno = EINVAL;
+				return rte_errno;
+			}
 flow_create:
-		flow->frxq.ibv_flow = ibv_create_flow(flow->frxq.hrxq->qp,
-						      flow->frxq.ibv_attr);
-		if (!flow->frxq.ibv_flow) {
-			DEBUG("Flow %p cannot be applied", (void *)flow);
-			rte_errno = EINVAL;
-			return rte_errno;
-		}
-		DEBUG("Flow %p applied", (void *)flow);
-		if (flow->mark) {
-			unsigned int n;
-
-			for (n = 0;
-			     n < flow->frxq.hrxq->ind_table->queues_n;
-			     ++n) {
-				uint16_t idx =
-					flow->frxq.hrxq->ind_table->queues[n];
-				(*priv->rxqs)[idx]->mark = 1;
+			flow->frxq[i].ibv_flow =
+				ibv_create_flow(flow->frxq[i].hrxq->qp,
+						flow->frxq[i].ibv_attr);
+			if (!flow->frxq[i].ibv_flow) {
+				DEBUG("Flow %p cannot be applied",
+				      (void *)flow);
+				rte_errno = EINVAL;
+				return rte_errno;
 			}
+			DEBUG("Flow %p applied", (void *)flow);
 		}
+		if (!flow->mark)
+			continue;
+		for (i = 0; i != flow->queues_n; ++i)
+			(*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
 	}
 	return 0;
 }
@@ -1648,10 +2255,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
 	};
 	struct rte_flow_action actions[] = {
 		{
-			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
-			.conf = &(struct rte_flow_action_queue){
-				.index = 0,
-			},
+			.type = RTE_FLOW_ACTION_TYPE_RSS,
 		},
 		{
 			.type = RTE_FLOW_ACTION_TYPE_END,
@@ -1659,7 +2263,23 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
 	};
 	struct rte_flow *flow;
 	struct rte_flow_error error;
-
+	unsigned int i;
+	union {
+		struct rte_flow_action_rss rss;
+		struct {
+			const struct rte_eth_rss_conf *rss_conf;
+			uint16_t num;
+			uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
+		} local;
+	} action_rss;
+
+	if (!priv->reta_idx_n)
+		return EINVAL;
+	for (i = 0; i != priv->reta_idx_n; ++i)
+		action_rss.local.queue[i] = (*priv->reta_idx)[i];
+	action_rss.local.rss_conf = &priv->rss_conf;
+	action_rss.local.num = priv->reta_idx_n;
+	actions[0].conf = (const void *)&action_rss.rss;
 	flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
 				&error);
 	if (!flow)
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v2 28/30] net/mlx5: handle RSS hash configuration in RSS flow
  2017-10-05 12:50   ` [dpdk-dev] [PATCH v2 28/30] net/mlx5: handle RSS hash configuration in RSS flow Nelio Laranjeiro
@ 2017-10-06 17:30     ` Yongseok Koh
  0 siblings, 0 replies; 129+ messages in thread
From: Yongseok Koh @ 2017-10-06 17:30 UTC (permalink / raw)
  To: Nelio Laranjeiro; +Cc: dev, adrien.mazarguil, ferruh.yigit
On Thu, Oct 05, 2017 at 02:50:00PM +0200, Nelio Laranjeiro wrote:
> Add RSS support according to the RSS configuration.
> 
> A special case is handled, when the pattern does not cover the RSS hash
> configuration request such as:
> 
>  flow create 0 ingress pattern eth / end actions rss queues 0 1 end / end
> 
> In such situation with the default configuration of testpmd RSS i.e. IP,
> it should be converted to 3 Verbs flow to handle correctly the request:
> 
>  1. IPv4 flow, an extra IPv4 wildcard specification needs to be added in
>     the conversion.
>  2. IPv6 flow, same as for IPv4.
>  3. Ethernet followed by any other protocol on which no RSS can be
>     performed and thus the traffic will be redirected to the first queue of
>     the user request.
> 
> The same kind of issue is handled if the RSS is performed only on UDPv4 or
> UDPv6 or TCPv*.
> 
> This does not handle a priority conflict which can occurs if the user adds
> several colliding flow rules.  Currently in the example above, the request
> is already consuming 2 priorities (1 for IPv4/IPV6 matching rule priority
> and one for Ethernet matching rule priority + 1).
> 
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> ---
Acked-by: Yongseok Koh <yskoh@mellanox.com>
 
Thanks
^ permalink raw reply	[flat|nested] 129+ messages in thread 
 
- * [dpdk-dev] [PATCH v2 29/30] net/mlx5: support flow director
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (27 preceding siblings ...)
  2017-10-05 12:50   ` [dpdk-dev] [PATCH v2 28/30] net/mlx5: handle RSS hash configuration in RSS flow Nelio Laranjeiro
@ 2017-10-05 12:50   ` Nelio Laranjeiro
  2017-10-06  5:46     ` Yongseok Koh
  2017-10-05 12:50   ` [dpdk-dev] [PATCH v2 30/30] net/mlx5: add new operations for isolated mode Nelio Laranjeiro
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:50 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Support same functionalities as in
commit cf521eaa3c76 ("net/mlx5: remove flow director support")
This implementation is done on top of the generic flow API.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 doc/guides/nics/features/mlx5.ini |   1 +
 doc/guides/nics/mlx5.rst          |   2 +
 drivers/net/mlx5/mlx5_flow.c      | 510 +++++++++++++++++++++++++++++++++++---
 3 files changed, 481 insertions(+), 32 deletions(-)
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index 34a796d..c363639 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -23,6 +23,7 @@ RSS key update       = Y
 RSS reta update      = Y
 SR-IOV               = Y
 VLAN filter          = Y
+Flow director        = Y
 Flow API             = Y
 CRC offload          = Y
 VLAN offload         = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 09fb738..d24941a 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -89,6 +89,8 @@ Features
 - Promiscuous mode.
 - Multicast promiscuous mode.
 - Hardware checksum offloads.
+- Flow director (RTE_FDIR_MODE_PERFECT, RTE_FDIR_MODE_PERFECT_MAC_VLAN and
+  RTE_ETH_FDIR_REJECT).
 - Flow API.
 - Multiple process.
 - KVM and VMware ESX SR-IOV modes are supported.
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index d821c79..46a8cde 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -430,39 +430,28 @@ static const struct rte_flow_ops mlx5_flow_ops = {
 	.isolate = mlx5_flow_isolate,
 };
 
-/**
- * Manage filter operations.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param filter_type
- *   Filter type.
- * @param filter_op
- *   Operation to perform.
- * @param arg
- *   Pointer to operation-specific structure.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
-		     enum rte_filter_type filter_type,
-		     enum rte_filter_op filter_op,
-		     void *arg)
-{
-	int ret = EINVAL;
+/* Convert FDIR request to Generic flow. */
+struct mlx5_fdir {
+	struct rte_flow_attr attr;
+	struct rte_flow_action actions[2];
+	struct rte_flow_item items[4];
+	struct rte_flow_item_eth l2;
+	union {
+		struct rte_flow_item_ipv4 ipv4;
+		struct rte_flow_item_ipv6 ipv6;
+	} l3;
+	union {
+		struct rte_flow_item_udp udp;
+		struct rte_flow_item_tcp tcp;
+	} l4;
+	struct rte_flow_action_queue queue;
+};
 
-	if (filter_type == RTE_ETH_FILTER_GENERIC) {
-		if (filter_op != RTE_ETH_FILTER_GET)
-			return -EINVAL;
-		*(const void **)arg = &mlx5_flow_ops;
-		return 0;
-	}
-	ERROR("%p: filter type (%d) not supported",
-	      (void *)dev, filter_type);
-	return -ret;
-}
+/* Verbs specification header. */
+struct ibv_spec_header {
+	enum ibv_flow_spec_type type;
+	uint16_t size;
+};
 
 /**
  * Check support for a given item.
@@ -2373,3 +2362,460 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 	priv_unlock(priv);
 	return 0;
 }
+
+/**
+ * Convert a flow director filter to a generic flow.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Flow director filter to add.
+ * @param attributes
+ *   Generic flow parameters structure.
+ *
+ * @return
+ *  0 on success, errno value on error.
+ */
+static int
+priv_fdir_filter_convert(struct priv *priv,
+			 const struct rte_eth_fdir_filter *fdir_filter,
+			 struct mlx5_fdir *attributes)
+{
+	const struct rte_eth_fdir_input *input = &fdir_filter->input;
+
+	/* Validate queue number. */
+	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
+		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
+		return EINVAL;
+	}
+	/* Validate the behavior. */
+	if (fdir_filter->action.behavior != RTE_ETH_FDIR_ACCEPT) {
+		ERROR("invalid behavior %d", fdir_filter->action.behavior);
+		return ENOTSUP;
+	}
+	attributes->attr.ingress = 1;
+	attributes->items[0] = (struct rte_flow_item) {
+		.type = RTE_FLOW_ITEM_TYPE_ETH,
+		.spec = &attributes->l2,
+	};
+	attributes->actions[0] = (struct rte_flow_action){
+		.type = RTE_FLOW_ACTION_TYPE_QUEUE,
+		.conf = &attributes->queue,
+	};
+	attributes->queue.index = fdir_filter->action.rx_queue;
+	switch (fdir_filter->input.flow_type) {
+	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
+		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+			.src_addr = input->flow.udp4_flow.ip.src_ip,
+			.dst_addr = input->flow.udp4_flow.ip.dst_ip,
+			.time_to_live = input->flow.udp4_flow.ip.ttl,
+			.type_of_service = input->flow.udp4_flow.ip.tos,
+			.next_proto_id = input->flow.udp4_flow.ip.proto,
+		};
+		attributes->l4.udp.hdr = (struct udp_hdr){
+			.src_port = input->flow.udp4_flow.src_port,
+			.dst_port = input->flow.udp4_flow.dst_port,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV4,
+			.spec = &attributes->l3,
+		};
+		attributes->items[2] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_UDP,
+			.spec = &attributes->l4,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
+		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+			.src_addr = input->flow.tcp4_flow.ip.src_ip,
+			.dst_addr = input->flow.tcp4_flow.ip.dst_ip,
+			.time_to_live = input->flow.tcp4_flow.ip.ttl,
+			.type_of_service = input->flow.tcp4_flow.ip.tos,
+			.next_proto_id = input->flow.tcp4_flow.ip.proto,
+		};
+		attributes->l4.tcp.hdr = (struct tcp_hdr){
+			.src_port = input->flow.tcp4_flow.src_port,
+			.dst_port = input->flow.tcp4_flow.dst_port,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV4,
+			.spec = &attributes->l3,
+		};
+		attributes->items[2] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_TCP,
+			.spec = &attributes->l4,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
+		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+			.src_addr = input->flow.ip4_flow.src_ip,
+			.dst_addr = input->flow.ip4_flow.dst_ip,
+			.time_to_live = input->flow.ip4_flow.ttl,
+			.type_of_service = input->flow.ip4_flow.tos,
+			.next_proto_id = input->flow.ip4_flow.proto,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV4,
+			.spec = &attributes->l3,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
+		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+			.hop_limits = input->flow.udp6_flow.ip.hop_limits,
+			.proto = input->flow.udp6_flow.ip.proto,
+		};
+		memcpy(attributes->l3.ipv6.hdr.src_addr,
+		       input->flow.udp6_flow.ip.src_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		memcpy(attributes->l3.ipv6.hdr.dst_addr,
+		       input->flow.udp6_flow.ip.dst_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		attributes->l4.udp.hdr = (struct udp_hdr){
+			.src_port = input->flow.udp6_flow.src_port,
+			.dst_port = input->flow.udp6_flow.dst_port,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV6,
+			.spec = &attributes->l3,
+		};
+		attributes->items[2] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_UDP,
+			.spec = &attributes->l4,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
+		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+			.hop_limits = input->flow.tcp6_flow.ip.hop_limits,
+			.proto = input->flow.tcp6_flow.ip.proto,
+		};
+		memcpy(attributes->l3.ipv6.hdr.src_addr,
+		       input->flow.tcp6_flow.ip.src_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		memcpy(attributes->l3.ipv6.hdr.dst_addr,
+		       input->flow.tcp6_flow.ip.dst_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		attributes->l4.tcp.hdr = (struct tcp_hdr){
+			.src_port = input->flow.tcp6_flow.src_port,
+			.dst_port = input->flow.tcp6_flow.dst_port,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV6,
+			.spec = &attributes->l3,
+		};
+		attributes->items[2] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_UDP,
+			.spec = &attributes->l4,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
+		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+			.hop_limits = input->flow.ipv6_flow.hop_limits,
+			.proto = input->flow.ipv6_flow.proto,
+		};
+		memcpy(attributes->l3.ipv6.hdr.src_addr,
+		       input->flow.ipv6_flow.src_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		memcpy(attributes->l3.ipv6.hdr.dst_addr,
+		       input->flow.ipv6_flow.dst_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV6,
+			.spec = &attributes->l3,
+		};
+		break;
+	default:
+		ERROR("invalid flow type%d",
+		      fdir_filter->input.flow_type);
+		return ENOTSUP;
+	}
+	return 0;
+}
+
+/**
+ * Add new flow director filter and store it in list.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Flow director filter to add.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_add(struct priv *priv,
+		     const struct rte_eth_fdir_filter *fdir_filter)
+{
+	struct mlx5_fdir attributes = {
+		.attr.group = 0,
+	};
+	struct mlx5_flow_parse parser = {
+		.layer = HASH_RXQ_ETH,
+	};
+	struct rte_flow_error error;
+	struct rte_flow *flow;
+	int ret;
+
+	ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
+	if (ret)
+		return -ret;
+	ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
+				attributes.actions, &error, &parser);
+	if (ret)
+		return -ret;
+	flow = priv_flow_create(priv,
+				&priv->flows,
+				&attributes.attr,
+				attributes.items,
+				attributes.actions,
+				&error);
+	if (flow) {
+		TAILQ_INSERT_TAIL(&priv->flows, flow, next);
+		DEBUG("FDIR created %p", (void *)flow);
+		return 0;
+	}
+	return ENOTSUP;
+}
+
+/**
+ * Delete specific filter.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Filter to be deleted.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_delete(struct priv *priv,
+			const struct rte_eth_fdir_filter *fdir_filter)
+{
+	struct mlx5_fdir attributes;
+	struct mlx5_flow_parse parser = {
+		.create = 1,
+		.layer = HASH_RXQ_ETH,
+	};
+	struct rte_flow_error error;
+	struct rte_flow *flow;
+	unsigned int i;
+	int ret;
+
+	ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
+	if (ret)
+		return -ret;
+	ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
+				attributes.actions, &error, &parser);
+	if (ret)
+		goto exit;
+	TAILQ_FOREACH(flow, &priv->flows, next) {
+		struct ibv_flow_attr *attr;
+		struct ibv_spec_header *attr_h;
+		void *spec;
+		struct ibv_flow_attr *flow_attr;
+		struct ibv_spec_header *flow_h;
+		void *flow_spec;
+		unsigned int specs_n;
+
+		if (parser.drop)
+			attr = parser.drop_q.ibv_attr;
+		else
+			attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
+		if (flow->drop)
+			flow_attr = flow->drxq.ibv_attr;
+		else
+			flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
+		/* Compare first the attributes. */
+		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
+			continue;
+		if (attr->num_of_specs == 0)
+			continue;
+		spec = (void *)((uintptr_t)attr +
+				sizeof(struct ibv_flow_attr));
+		flow_spec = (void *)((uintptr_t)flow_attr +
+				     sizeof(struct ibv_flow_attr));
+		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
+		for (i = 0; i != specs_n; ++i) {
+			attr_h = spec;
+			flow_h = flow_spec;
+			if (memcmp(spec, flow_spec,
+				   RTE_MIN(attr_h->size, flow_h->size)))
+				continue;
+			spec = (void *)((uintptr_t)attr + attr_h->size);
+			flow_spec = (void *)((uintptr_t)flow_attr +
+					     flow_h->size);
+		}
+		/* At this point, the flow match. */
+		break;
+	}
+	if (flow)
+		priv_flow_destroy(priv, &priv->flows, flow);
+exit:
+	if (parser.drop) {
+		rte_free(parser.drop_q.ibv_attr);
+	} else {
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (parser.queue[i].ibv_attr)
+				rte_free(parser.queue[i].ibv_attr);
+		}
+	}
+	return -ret;
+}
+
+/**
+ * Update queue for specific filter.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Filter to be updated.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_update(struct priv *priv,
+			const struct rte_eth_fdir_filter *fdir_filter)
+{
+	int ret;
+
+	ret = priv_fdir_filter_delete(priv, fdir_filter);
+	if (ret)
+		return ret;
+	ret = priv_fdir_filter_add(priv, fdir_filter);
+	return ret;
+}
+
+/**
+ * Flush all filters.
+ *
+ * @param priv
+ *   Private structure.
+ */
+static void
+priv_fdir_filter_flush(struct priv *priv)
+{
+	priv_flow_flush(priv, &priv->flows);
+}
+
+/**
+ * Get flow director information.
+ *
+ * @param priv
+ *   Private structure.
+ * @param[out] fdir_info
+ *   Resulting flow director information.
+ */
+static void
+priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
+{
+	struct rte_eth_fdir_masks *mask =
+		&priv->dev->data->dev_conf.fdir_conf.mask;
+
+	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
+	fdir_info->guarant_spc = 0;
+	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
+	fdir_info->max_flexpayload = 0;
+	fdir_info->flow_types_mask[0] = 0;
+	fdir_info->flex_payload_unit = 0;
+	fdir_info->max_flex_payload_segment_num = 0;
+	fdir_info->flex_payload_limit = 0;
+	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
+}
+
+/**
+ * Deal with flow director operations.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
+{
+	enum rte_fdir_mode fdir_mode =
+		priv->dev->data->dev_conf.fdir_conf.mode;
+	int ret = 0;
+
+	if (filter_op == RTE_ETH_FILTER_NOP)
+		return 0;
+	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
+	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
+		ERROR("%p: flow director mode %d not supported",
+		      (void *)priv, fdir_mode);
+		return EINVAL;
+	}
+	switch (filter_op) {
+	case RTE_ETH_FILTER_ADD:
+		ret = priv_fdir_filter_add(priv, arg);
+		break;
+	case RTE_ETH_FILTER_UPDATE:
+		ret = priv_fdir_filter_update(priv, arg);
+		break;
+	case RTE_ETH_FILTER_DELETE:
+		ret = priv_fdir_filter_delete(priv, arg);
+		break;
+	case RTE_ETH_FILTER_FLUSH:
+		priv_fdir_filter_flush(priv);
+		break;
+	case RTE_ETH_FILTER_INFO:
+		priv_fdir_info_get(priv, arg);
+		break;
+	default:
+		DEBUG("%p: unknown operation %u", (void *)priv,
+		      filter_op);
+		ret = EINVAL;
+		break;
+	}
+	return ret;
+}
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param filter_type
+ *   Filter type.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
+		     enum rte_filter_type filter_type,
+		     enum rte_filter_op filter_op,
+		     void *arg)
+{
+	int ret = EINVAL;
+	struct priv *priv = dev->data->dev_private;
+
+	switch (filter_type) {
+	case RTE_ETH_FILTER_GENERIC:
+		if (filter_op != RTE_ETH_FILTER_GET)
+			return -EINVAL;
+		*(const void **)arg = &mlx5_flow_ops;
+		return 0;
+	case RTE_ETH_FILTER_FDIR:
+		priv_lock(priv);
+		ret = priv_fdir_ctrl_func(priv, filter_op, arg);
+		priv_unlock(priv);
+		break;
+	default:
+		ERROR("%p: filter type (%d) not supported",
+		      (void *)dev, filter_type);
+		break;
+	}
+	return -ret;
+}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v2 30/30] net/mlx5: add new operations for isolated mode
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
                     ` (28 preceding siblings ...)
  2017-10-05 12:50   ` [dpdk-dev] [PATCH v2 29/30] net/mlx5: support flow director Nelio Laranjeiro
@ 2017-10-05 12:50   ` Nelio Laranjeiro
  2017-10-06  5:48     ` Yongseok Koh
  29 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-05 12:50 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Isolated works exclusively with the generic flow API, this patch adds a new
set of operations valid in this mode.
 - promiscuous*()
 - allmulticast*()
 - reta*()
 - rss*()
are not supported in this mode as it is fully supported by generic flow
API.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c      | 39 +++++++++++++++++++++++++++++++++++++--
 drivers/net/mlx5/mlx5_flow.c |  8 ++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 7edc918..3362200 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -257,7 +257,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	memset(priv, 0, sizeof(*priv));
 }
 
-static const struct eth_dev_ops mlx5_dev_ops = {
+const struct eth_dev_ops mlx5_dev_ops = {
 	.dev_configure = mlx5_dev_configure,
 	.dev_start = mlx5_dev_start,
 	.dev_stop = mlx5_dev_stop,
@@ -300,7 +300,6 @@ static const struct eth_dev_ops mlx5_dev_ops = {
 	.rx_queue_intr_disable = mlx5_rx_intr_disable,
 };
 
-
 static const struct eth_dev_ops mlx5_dev_sec_ops = {
 	.stats_get = mlx5_stats_get,
 	.stats_reset = mlx5_stats_reset,
@@ -312,6 +311,42 @@ static const struct eth_dev_ops mlx5_dev_sec_ops = {
 	.tx_descriptor_status = mlx5_tx_descriptor_status,
 };
 
+/* Available operators in flow isolated mode. */
+const struct eth_dev_ops mlx5_dev_ops_isolate = {
+	.dev_configure = mlx5_dev_configure,
+	.dev_start = mlx5_dev_start,
+	.dev_stop = mlx5_dev_stop,
+	.dev_set_link_down = mlx5_set_link_down,
+	.dev_set_link_up = mlx5_set_link_up,
+	.dev_close = mlx5_dev_close,
+	.link_update = mlx5_link_update,
+	.stats_get = mlx5_stats_get,
+	.stats_reset = mlx5_stats_reset,
+	.xstats_get = mlx5_xstats_get,
+	.xstats_reset = mlx5_xstats_reset,
+	.xstats_get_names = mlx5_xstats_get_names,
+	.dev_infos_get = mlx5_dev_infos_get,
+	.dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
+	.vlan_filter_set = mlx5_vlan_filter_set,
+	.rx_queue_setup = mlx5_rx_queue_setup,
+	.tx_queue_setup = mlx5_tx_queue_setup,
+	.rx_queue_release = mlx5_rx_queue_release,
+	.tx_queue_release = mlx5_tx_queue_release,
+	.flow_ctrl_get = mlx5_dev_get_flow_ctrl,
+	.flow_ctrl_set = mlx5_dev_set_flow_ctrl,
+	.mac_addr_remove = mlx5_mac_addr_remove,
+	.mac_addr_add = mlx5_mac_addr_add,
+	.mac_addr_set = mlx5_mac_addr_set,
+	.mtu_set = mlx5_dev_set_mtu,
+	.vlan_strip_queue_set = mlx5_vlan_strip_queue_set,
+	.vlan_offload_set = mlx5_vlan_offload_set,
+	.filter_ctrl = mlx5_dev_filter_ctrl,
+	.rx_descriptor_status = mlx5_rx_descriptor_status,
+	.tx_descriptor_status = mlx5_tx_descriptor_status,
+	.rx_queue_intr_enable = mlx5_rx_intr_enable,
+	.rx_queue_intr_disable = mlx5_rx_intr_disable,
+};
+
 static struct {
 	struct rte_pci_addr pci_addr; /* associated PCI address */
 	uint32_t ports; /* physical ports bitfield. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 46a8cde..c1d4dd5 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -59,6 +59,10 @@
 #define MLX5_IPV4 4
 #define MLX5_IPV6 6
 
+/* Dev ops structure defined in mlx5.c */
+extern const struct eth_dev_ops mlx5_dev_ops;
+extern const struct eth_dev_ops mlx5_dev_ops_isolate;
+
 static int
 mlx5_flow_create_eth(const struct rte_flow_item *item,
 		     const void *default_mask,
@@ -2359,6 +2363,10 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 		return -rte_errno;
 	}
 	priv->isolated = !!enable;
+	if (enable)
+		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
+	else
+		priv->dev->dev_ops = &mlx5_dev_ops;
 	priv_unlock(priv);
 	return 0;
 }
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
 
- * [dpdk-dev] [PATCH v3 00/30] net/mlx5: cleanup for isolated mode
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (24 preceding siblings ...)
       [not found] ` <cover.1507207731.git.nelio.laranjeiro@6wind.com>
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 17:17   ` Yongseok Koh
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 01/30] net/mlx5: merge action and flow parser structure Nelio Laranjeiro
                   ` (29 subsequent siblings)
  55 siblings, 1 reply; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
This series cleanups the control plane part and the way it uses the different
kind of objects (DPDK queues, Verbs Queues, ...).  It has three goals:
 1. Reduce the memory usage by sharing all possible objects.
 2. Leave the configuration to the control plane and the creation/destruction
    of queues to the dev_start/dev_stop() to have a better control on object
    and easily apply the configuration.
 3. Create all flows through the generic flow API, it will also help to
    implement a detection collision algorithm as all flows are using the same
    service and thus the same kind of object.
Changes in v3:
 * Fix illegal access in Tx queue mp2mr cache.
 * Fix some function documentations.
 * Use priv_dev_traffic_restart() in vlan_filter_set().
 * Fix a bug in flow priorities.
Changes in v2:
 * Rebase on upstream rdma-core/MLNX_OFED 4.2
 * Split in smaller patches
Nelio Laranjeiro (30):
  net/mlx5: merge action and flow parser structure
  net/mlx5: remove flow director support
  net/mlx5: prefix Rx structures and functions
  net/mlx5: prefix Tx structures and functions
  net/mlx5: remove redundant started flag
  net/mlx5: verify all flows are been removed on close
  net/mlx5: fix reta update can segfault
  net/mlx5: fix rxqs vector support verification
  net/mlx5: add reference counter on memory region
  net/mlx5: separate DPDK from Verbs Rx queue objects
  net/mlx5: separate DPDK from Verbs Tx queue objects
  net/mlx5: add reference counter on DPDK Tx queues
  net/mlx5: add reference counter on DPDK Rx queues
  net/mlx5: make indirection tables shareable
  net/mlx5: add Hash Rx queue object
  net/mlx5: fix clang compilation error
  net/mlx5: use flow to enable promiscuous mode
  net/mlx5: use flow to enable all multi mode
  net/mlx5: use flow to enable unicast traffic
  net/mlx5: handle a single RSS hash key for all protocols
  net/mlx5: remove hash Rx queues support
  net/mlx5: fully convert a flow to verbs in validate
  net/mlx5: process flows actions before of items
  net/mlx5: merge internal parser and actions structures
  net/mlx5: use a better name for the flow parser
  net/mlx5: reorganise functions in the file
  net/mlx5: move Verbs flows and attributes
  net/mlx5: handle RSS hash configuration in RSS flow
  net/mlx5: support flow director
  net/mlx5: add new operations for isolated mode
 drivers/net/mlx5/Makefile            |    1 -
 drivers/net/mlx5/mlx5.c              |  134 +-
 drivers/net/mlx5/mlx5.h              |   91 +-
 drivers/net/mlx5/mlx5_defs.h         |    3 -
 drivers/net/mlx5/mlx5_ethdev.c       |   27 +-
 drivers/net/mlx5/mlx5_fdir.c         | 1091 ---------------
 drivers/net/mlx5/mlx5_flow.c         | 2475 +++++++++++++++++++++++++---------
 drivers/net/mlx5/mlx5_mac.c          |  407 +-----
 drivers/net/mlx5/mlx5_mr.c           |  276 ++--
 drivers/net/mlx5/mlx5_rss.c          |  136 +-
 drivers/net/mlx5/mlx5_rxmode.c       |  380 +-----
 drivers/net/mlx5/mlx5_rxq.c          | 1999 ++++++++++++++-------------
 drivers/net/mlx5/mlx5_rxtx.c         |   39 +-
 drivers/net/mlx5/mlx5_rxtx.h         |  286 ++--
 drivers/net/mlx5/mlx5_rxtx_vec_sse.c |   42 +-
 drivers/net/mlx5/mlx5_stats.c        |    4 +-
 drivers/net/mlx5/mlx5_trigger.c      |  320 ++++-
 drivers/net/mlx5/mlx5_txq.c          |  876 +++++++-----
 drivers/net/mlx5/mlx5_utils.h        |    2 +
 drivers/net/mlx5/mlx5_vlan.c         |   56 +-
 20 files changed, 4247 insertions(+), 4398 deletions(-)
 delete mode 100644 drivers/net/mlx5/mlx5_fdir.c
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * Re: [dpdk-dev] [PATCH v3 00/30] net/mlx5: cleanup for isolated mode
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 00/30] net/mlx5: cleanup " Nelio Laranjeiro
@ 2017-10-09 17:17   ` Yongseok Koh
  2017-10-09 18:35     ` Ferruh Yigit
  0 siblings, 1 reply; 129+ messages in thread
From: Yongseok Koh @ 2017-10-09 17:17 UTC (permalink / raw)
  To: Nélio Laranjeiro, Ferruh Yigit; +Cc: dev, Adrien Mazarguil
> On Oct 9, 2017, at 7:44 AM, Nelio Laranjeiro <nelio.laranjeiro@6wind.com> wrote:
> 
> This series cleanups the control plane part and the way it uses the different
> kind of objects (DPDK queues, Verbs Queues, ...).  It has three goals:
> 
> 1. Reduce the memory usage by sharing all possible objects.
> 
> 2. Leave the configuration to the control plane and the creation/destruction
>    of queues to the dev_start/dev_stop() to have a better control on object
>    and easily apply the configuration.
> 
> 3. Create all flows through the generic flow API, it will also help to
>    implement a detection collision algorithm as all flows are using the same
>    service and thus the same kind of object.
> 
> Changes in v3:
> 
> * Fix illegal access in Tx queue mp2mr cache.
> * Fix some function documentations.
> * Use priv_dev_traffic_restart() in vlan_filter_set().
> * Fix a bug in flow priorities.
> 
> Changes in v2:
> 
> * Rebase on upstream rdma-core/MLNX_OFED 4.2
> * Split in smaller patches
As Nelio tagged in his patches, for all series,
Acked-by: Yongseok Koh <yskoh@mellanox.com>
 
Thanks
^ permalink raw reply	[flat|nested] 129+ messages in thread 
- * Re: [dpdk-dev] [PATCH v3 00/30] net/mlx5: cleanup for isolated mode
  2017-10-09 17:17   ` Yongseok Koh
@ 2017-10-09 18:35     ` Ferruh Yigit
  2017-10-10  6:55       ` Nélio Laranjeiro
  0 siblings, 1 reply; 129+ messages in thread
From: Ferruh Yigit @ 2017-10-09 18:35 UTC (permalink / raw)
  To: Yongseok Koh, Nélio Laranjeiro; +Cc: dev, Adrien Mazarguil
On 10/9/2017 6:17 PM, Yongseok Koh wrote:
> 
>> On Oct 9, 2017, at 7:44 AM, Nelio Laranjeiro <nelio.laranjeiro@6wind.com> wrote:
>>
>> This series cleanups the control plane part and the way it uses the different
>> kind of objects (DPDK queues, Verbs Queues, ...).  It has three goals:
>>
>> 1. Reduce the memory usage by sharing all possible objects.
>>
>> 2. Leave the configuration to the control plane and the creation/destruction
>>    of queues to the dev_start/dev_stop() to have a better control on object
>>    and easily apply the configuration.
>>
>> 3. Create all flows through the generic flow API, it will also help to
>>    implement a detection collision algorithm as all flows are using the same
>>    service and thus the same kind of object.
>>
>> Changes in v3:
>>
>> * Fix illegal access in Tx queue mp2mr cache.
>> * Fix some function documentations.
>> * Use priv_dev_traffic_restart() in vlan_filter_set().
>> * Fix a bug in flow priorities.
>>
>> Changes in v2:
>>
>> * Rebase on upstream rdma-core/MLNX_OFED 4.2
>> * Split in smaller patches
> 
> As Nelio tagged in his patches, for all series,
> 
> Acked-by: Yongseok Koh <yskoh@mellanox.com>
Series applied to dpdk-next-net/master, thanks.
(There is initial value assignment update in 12/30 to fix icc warning,
please double check the update)
^ permalink raw reply	[flat|nested] 129+ messages in thread 
- * Re: [dpdk-dev] [PATCH v3 00/30] net/mlx5: cleanup for isolated mode
  2017-10-09 18:35     ` Ferruh Yigit
@ 2017-10-10  6:55       ` Nélio Laranjeiro
  0 siblings, 0 replies; 129+ messages in thread
From: Nélio Laranjeiro @ 2017-10-10  6:55 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: Yongseok Koh, dev, Adrien Mazarguil
On Mon, Oct 09, 2017 at 07:35:01PM +0100, Ferruh Yigit wrote:
> On 10/9/2017 6:17 PM, Yongseok Koh wrote:
> > 
> >> On Oct 9, 2017, at 7:44 AM, Nelio Laranjeiro <nelio.laranjeiro@6wind.com> wrote:
> >>
> >> This series cleanups the control plane part and the way it uses the different
> >> kind of objects (DPDK queues, Verbs Queues, ...).  It has three goals:
> >>
> >> 1. Reduce the memory usage by sharing all possible objects.
> >>
> >> 2. Leave the configuration to the control plane and the creation/destruction
> >>    of queues to the dev_start/dev_stop() to have a better control on object
> >>    and easily apply the configuration.
> >>
> >> 3. Create all flows through the generic flow API, it will also help to
> >>    implement a detection collision algorithm as all flows are using the same
> >>    service and thus the same kind of object.
> >>
> >> Changes in v3:
> >>
> >> * Fix illegal access in Tx queue mp2mr cache.
> >> * Fix some function documentations.
> >> * Use priv_dev_traffic_restart() in vlan_filter_set().
> >> * Fix a bug in flow priorities.
> >>
> >> Changes in v2:
> >>
> >> * Rebase on upstream rdma-core/MLNX_OFED 4.2
> >> * Split in smaller patches
> > 
> > As Nelio tagged in his patches, for all series,
> > 
> > Acked-by: Yongseok Koh <yskoh@mellanox.com>
> 
> Series applied to dpdk-next-net/master, thanks.
> 
> (There is initial value assignment update in 12/30 to fix icc warning,
> please double check the update)
Hi Ferruh,
I've checked your modification and I agree with it.
Thanks,
-- 
Nélio Laranjeiro
6WIND
^ permalink raw reply	[flat|nested] 129+ messages in thread 
 
 
 
- * [dpdk-dev] [PATCH v3 01/30] net/mlx5: merge action and flow parser structure
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (25 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 00/30] net/mlx5: cleanup " Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 02/30] net/mlx5: remove flow director support Nelio Laranjeiro
                   ` (28 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
mlx5_flow_create() and mlx5_flow_validate() are making common checks.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c | 153 +++++++++++++++++++++----------------------
 1 file changed, 73 insertions(+), 80 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index dbd241f..fb30803 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -201,7 +201,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.default_mask = &rte_flow_item_ipv4_mask,
 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
 		.convert = mlx5_flow_create_ipv4,
-		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
+		.dst_sz = sizeof(struct ibv_flow_spec_ipv4),
 	},
 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
@@ -271,12 +271,23 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 	},
 };
 
+/* Structure to parse actions. */
+struct mlx5_flow_action {
+	uint32_t queue:1; /**< Target is a receive queue. */
+	uint32_t drop:1; /**< Target is a drop queue. */
+	uint32_t mark:1; /**< Mark is present in the flow. */
+	uint32_t mark_id; /**< Mark identifier. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
+	uint16_t queues_n; /**< Number of entries in queue[]. */
+};
+
 /** Structure to pass to the conversion function. */
-struct mlx5_flow {
+struct mlx5_flow_parse {
 	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 	uint32_t inner; /**< Set once VXLAN is encountered. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
+	struct mlx5_flow_action actions; /**< Parsed action result. */
 };
 
 /** Structure for Drop queue. */
@@ -287,15 +298,6 @@ struct rte_flow_drop {
 	struct ibv_cq *cq; /**< Verbs completion queue. */
 };
 
-struct mlx5_flow_action {
-	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t drop:1; /**< Target is a drop queue. */
-	uint32_t mark:1; /**< Mark is present in the flow. */
-	uint32_t mark_id; /**< Mark identifier. */
-	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
-	uint16_t queues_n; /**< Number of entries in queue[]. */
-};
-
 /**
  * Check support for a given item.
  *
@@ -374,8 +376,6 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
  *   Perform verbose error reporting if not NULL.
  * @param[in, out] flow
  *   Flow structure to update.
- * @param[in, out] action
- *   Action structure to update.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
@@ -386,8 +386,7 @@ priv_flow_validate(struct priv *priv,
 		   const struct rte_flow_item items[],
 		   const struct rte_flow_action actions[],
 		   struct rte_flow_error *error,
-		   struct mlx5_flow *flow,
-		   struct mlx5_flow_action *action)
+		   struct mlx5_flow_parse *flow)
 {
 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
 
@@ -469,7 +468,7 @@ priv_flow_validate(struct priv *priv,
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
-			action->drop = 1;
+			flow->actions.drop = 1;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			const struct rte_flow_action_queue *queue =
 				(const struct rte_flow_action_queue *)
@@ -479,13 +478,13 @@ priv_flow_validate(struct priv *priv,
 
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
-			for (n = 0; n < action->queues_n; ++n) {
-				if (action->queues[n] == queue->index) {
+			for (n = 0; n < flow->actions.queues_n; ++n) {
+				if (flow->actions.queues[n] == queue->index) {
 					found = 1;
 					break;
 				}
 			}
-			if (action->queues_n > 1 && !found) {
+			if (flow->actions.queues_n > 1 && !found) {
 				rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
 					   actions,
@@ -493,9 +492,9 @@ priv_flow_validate(struct priv *priv,
 				return -rte_errno;
 			}
 			if (!found) {
-				action->queue = 1;
-				action->queues_n = 1;
-				action->queues[0] = queue->index;
+				flow->actions.queue = 1;
+				flow->actions.queues_n = 1;
+				flow->actions.queues[0] = queue->index;
 			}
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
 			const struct rte_flow_action_rss *rss =
@@ -510,12 +509,12 @@ priv_flow_validate(struct priv *priv,
 						   "no valid queues");
 				return -rte_errno;
 			}
-			if (action->queues_n == 1) {
+			if (flow->actions.queues_n == 1) {
 				uint16_t found = 0;
 
-				assert(action->queues_n);
+				assert(flow->actions.queues_n);
 				for (n = 0; n < rss->num; ++n) {
-					if (action->queues[0] ==
+					if (flow->actions.queues[0] ==
 					    rss->queue[n]) {
 						found = 1;
 						break;
@@ -540,10 +539,10 @@ priv_flow_validate(struct priv *priv,
 					return -rte_errno;
 				}
 			}
-			action->queue = 1;
+			flow->actions.queue = 1;
 			for (n = 0; n < rss->num; ++n)
-				action->queues[n] = rss->queue[n];
-			action->queues_n = rss->num;
+				flow->actions.queues[n] = rss->queue[n];
+			flow->actions.queues_n = rss->num;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
 			const struct rte_flow_action_mark *mark =
 				(const struct rte_flow_action_mark *)
@@ -563,19 +562,19 @@ priv_flow_validate(struct priv *priv,
 						   " and 16777199");
 				return -rte_errno;
 			}
-			action->mark = 1;
-			action->mark_id = mark->id;
+			flow->actions.mark = 1;
+			flow->actions.mark_id = mark->id;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
-			action->mark = 1;
+			flow->actions.mark = 1;
 		} else {
 			goto exit_action_not_supported;
 		}
 	}
-	if (action->mark && !flow->ibv_attr && !action->drop)
+	if (flow->actions.mark && !flow->ibv_attr && !flow->actions.drop)
 		flow->offset += sizeof(struct ibv_flow_spec_action_tag);
-	if (!flow->ibv_attr && action->drop)
+	if (!flow->ibv_attr && flow->actions.drop)
 		flow->offset += sizeof(struct ibv_flow_spec_action_drop);
-	if (!action->queue && !action->drop) {
+	if (!flow->actions.queue && !flow->actions.drop) {
 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "no valid action");
 		return -rte_errno;
@@ -606,18 +605,16 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
 {
 	struct priv *priv = dev->data->dev_private;
 	int ret;
-	struct mlx5_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
-	struct mlx5_flow_action action = {
-		.queue = 0,
-		.drop = 0,
-		.mark = 0,
-		.mark_id = MLX5_FLOW_MARK_DEFAULT,
-		.queues_n = 0,
+	struct mlx5_flow_parse flow = {
+		.offset = sizeof(struct ibv_flow_attr),
+		.actions = {
+			.mark_id = MLX5_FLOW_MARK_DEFAULT,
+			.queues_n = 0,
+		},
 	};
 
 	priv_lock(priv);
-	ret = priv_flow_validate(priv, attr, items, actions, error, &flow,
-				 &action);
+	ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
 	priv_unlock(priv);
 	return ret;
 }
@@ -639,7 +636,7 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_eth *spec = item->spec;
 	const struct rte_flow_item_eth *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
 	unsigned int i;
@@ -688,7 +685,7 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_vlan *spec = item->spec;
 	const struct rte_flow_item_vlan *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
 
@@ -720,7 +717,7 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_ipv4 *spec = item->spec;
 	const struct rte_flow_item_ipv4 *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_ipv4_ext *ipv4;
 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
 
@@ -774,7 +771,7 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_ipv6 *spec = item->spec;
 	const struct rte_flow_item_ipv6 *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_ipv6 *ipv6;
 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
 	unsigned int i;
@@ -831,7 +828,7 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_udp *spec = item->spec;
 	const struct rte_flow_item_udp *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_tcp_udp *udp;
 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
 
@@ -875,7 +872,7 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_tcp *spec = item->spec;
 	const struct rte_flow_item_tcp *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_tcp_udp *tcp;
 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
 
@@ -919,7 +916,7 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_vxlan *spec = item->spec;
 	const struct rte_flow_item_vxlan *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_tunnel *vxlan;
 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
 	union vni {
@@ -958,7 +955,7 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
  *   Mark identifier.
  */
 static int
-mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
+mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
 {
 	struct ibv_flow_spec_action_tag *tag;
 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
@@ -988,7 +985,7 @@ mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
  */
 static struct rte_flow *
 priv_flow_create_action_queue_drop(struct priv *priv,
-				   struct mlx5_flow *flow,
+				   struct mlx5_flow_parse *flow,
 				   struct rte_flow_error *error)
 {
 	struct rte_flow *rte_flow;
@@ -1036,8 +1033,6 @@ priv_flow_create_action_queue_drop(struct priv *priv,
  *   Pointer to private structure.
  * @param flow
  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
- * @param action
- *   Target action structure.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
@@ -1046,49 +1041,49 @@ priv_flow_create_action_queue_drop(struct priv *priv,
  */
 static struct rte_flow *
 priv_flow_create_action_queue(struct priv *priv,
-			      struct mlx5_flow *flow,
-			      struct mlx5_flow_action *action,
+			      struct mlx5_flow_parse *flow,
 			      struct rte_flow_error *error)
 {
 	struct rte_flow *rte_flow;
 	unsigned int i;
 	unsigned int j;
-	const unsigned int wqs_n = 1 << log2above(action->queues_n);
+	const unsigned int wqs_n = 1 << log2above(flow->actions.queues_n);
 	struct ibv_wq *wqs[wqs_n];
 
 	assert(priv->pd);
 	assert(priv->ctx);
-	assert(!action->drop);
+	assert(!flow->actions.drop);
 	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
-			      sizeof(*rte_flow->rxqs) * action->queues_n, 0);
+			      sizeof(*rte_flow->rxqs) * flow->actions.queues_n,
+			      0);
 	if (!rte_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	for (i = 0; i < action->queues_n; ++i) {
+	for (i = 0; i < flow->actions.queues_n; ++i) {
 		struct rxq_ctrl *rxq;
 
-		rxq = container_of((*priv->rxqs)[action->queues[i]],
+		rxq = container_of((*priv->rxqs)[flow->actions.queues[i]],
 				   struct rxq_ctrl, rxq);
 		wqs[i] = rxq->wq;
 		rte_flow->rxqs[i] = &rxq->rxq;
 		++rte_flow->rxqs_n;
-		rxq->rxq.mark |= action->mark;
+		rxq->rxq.mark |= flow->actions.mark;
 	}
 	/* finalise indirection table. */
 	for (j = 0; i < wqs_n; ++i, ++j) {
 		wqs[i] = wqs[j];
-		if (j == action->queues_n)
+		if (j == flow->actions.queues_n)
 			j = 0;
 	}
-	rte_flow->mark = action->mark;
+	rte_flow->mark = flow->actions.mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
 	rte_flow->hash_fields = flow->hash_fields;
 	rte_flow->ind_table = ibv_create_rwq_ind_table(
 		priv->ctx,
 		&(struct ibv_rwq_ind_table_init_attr){
-			.log_ind_tbl_size = log2above(action->queues_n),
+			.log_ind_tbl_size = log2above(flow->actions.queues_n),
 			.ind_tbl = wqs,
 			.comp_mask = 0,
 		});
@@ -1165,18 +1160,17 @@ priv_flow_create(struct priv *priv,
 		 struct rte_flow_error *error)
 {
 	struct rte_flow *rte_flow;
-	struct mlx5_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
-	struct mlx5_flow_action action = {
-		.queue = 0,
-		.drop = 0,
-		.mark = 0,
-		.mark_id = MLX5_FLOW_MARK_DEFAULT,
-		.queues_n = 0,
+	struct mlx5_flow_parse flow = {
+		.offset = sizeof(struct ibv_flow_attr),
+		.actions = {
+			.mark_id = MLX5_FLOW_MARK_DEFAULT,
+			.queues = { 0 },
+			.queues_n = 0,
+		},
 	};
 	int err;
 
-	err = priv_flow_validate(priv, attr, items, actions, error, &flow,
-				 &action);
+	err = priv_flow_validate(priv, attr, items, actions, error, &flow);
 	if (err)
 		goto exit;
 	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
@@ -1197,17 +1191,16 @@ priv_flow_create(struct priv *priv,
 	flow.inner = 0;
 	flow.hash_fields = 0;
 	claim_zero(priv_flow_validate(priv, attr, items, actions,
-				      error, &flow, &action));
-	if (action.mark && !action.drop) {
-		mlx5_flow_create_flag_mark(&flow, action.mark_id);
+				      error, &flow));
+	if (flow.actions.mark && !flow.actions.drop) {
+		mlx5_flow_create_flag_mark(&flow, flow.actions.mark_id);
 		flow.offset += sizeof(struct ibv_flow_spec_action_tag);
 	}
-	if (action.drop)
+	if (flow.actions.drop)
 		rte_flow =
 			priv_flow_create_action_queue_drop(priv, &flow, error);
 	else
-		rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
-							 error);
+		rte_flow = priv_flow_create_action_queue(priv, &flow, error);
 	if (!rte_flow)
 		goto exit;
 	return rte_flow;
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 02/30] net/mlx5: remove flow director support
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (26 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 01/30] net/mlx5: merge action and flow parser structure Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 03/30] net/mlx5: prefix Rx structures and functions Nelio Laranjeiro
                   ` (27 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Generic flow API should be use for flow steering as is provides a better
and easier way to configure flows.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 doc/guides/nics/features/mlx5.ini |    1 -
 doc/guides/nics/mlx5.rst          |    2 -
 drivers/net/mlx5/Makefile         |    1 -
 drivers/net/mlx5/mlx5.c           |    8 -
 drivers/net/mlx5/mlx5.h           |   12 +-
 drivers/net/mlx5/mlx5_fdir.c      | 1091 -------------------------------------
 drivers/net/mlx5/mlx5_flow.c      |   43 ++
 drivers/net/mlx5/mlx5_rxq.c       |    2 -
 drivers/net/mlx5/mlx5_rxtx.h      |    9 -
 drivers/net/mlx5/mlx5_trigger.c   |    3 -
 10 files changed, 44 insertions(+), 1128 deletions(-)
 delete mode 100644 drivers/net/mlx5/mlx5_fdir.c
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index c363639..34a796d 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -23,7 +23,6 @@ RSS key update       = Y
 RSS reta update      = Y
 SR-IOV               = Y
 VLAN filter          = Y
-Flow director        = Y
 Flow API             = Y
 CRC offload          = Y
 VLAN offload         = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index d24941a..09fb738 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -89,8 +89,6 @@ Features
 - Promiscuous mode.
 - Multicast promiscuous mode.
 - Hardware checksum offloads.
-- Flow director (RTE_FDIR_MODE_PERFECT, RTE_FDIR_MODE_PERFECT_MAC_VLAN and
-  RTE_ETH_FDIR_REJECT).
 - Flow API.
 - Multiple process.
 - KVM and VMware ESX SR-IOV modes are supported.
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 816a9cc..7af5e61 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -49,7 +49,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxmode.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_vlan.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_stats.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
-SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_fdir.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 92adbcd..64b16e5 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -201,10 +201,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 
-	/* Remove flow director elements. */
-	priv_fdir_disable(priv);
-	priv_fdir_delete_filters_list(priv);
-
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
 	dev->tx_pkt_burst = removed_tx_burst;
@@ -844,10 +840,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		claim_zero(priv_mac_addr_add(priv, 0,
 					     (const uint8_t (*)[ETHER_ADDR_LEN])
 					     mac.addr_bytes));
-		/* Initialize FD filters list. */
-		err = fdir_init_filters_list(priv);
-		if (err)
-			goto port_error;
 #ifndef NDEBUG
 		{
 			char ifname[IF_NAMESIZE];
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 928aeb6..adac5f4 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -145,8 +145,6 @@ struct priv {
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
-	struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */
-	struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
@@ -273,18 +271,10 @@ void mlx5_vlan_strip_queue_set(struct rte_eth_dev *, uint16_t, int);
 int mlx5_dev_start(struct rte_eth_dev *);
 void mlx5_dev_stop(struct rte_eth_dev *);
 
-/* mlx5_fdir.c */
+/* mlx5_flow.c */
 
-void priv_fdir_queue_destroy(struct priv *, struct fdir_queue *);
-int fdir_init_filters_list(struct priv *);
-void priv_fdir_delete_filters_list(struct priv *);
-void priv_fdir_disable(struct priv *);
-void priv_fdir_enable(struct priv *);
 int mlx5_dev_filter_ctrl(struct rte_eth_dev *, enum rte_filter_type,
 			 enum rte_filter_op, void *);
-
-/* mlx5_flow.c */
-
 int mlx5_flow_validate(struct rte_eth_dev *, const struct rte_flow_attr *,
 		       const struct rte_flow_item [],
 		       const struct rte_flow_action [],
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
deleted file mode 100644
index 66e3818..0000000
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ /dev/null
@@ -1,1091 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright 2015 6WIND S.A.
- *   Copyright 2015 Mellanox.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of 6WIND S.A. nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stddef.h>
-#include <assert.h>
-#include <stdint.h>
-#include <string.h>
-#include <errno.h>
-
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <infiniband/verbs.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
-#include <rte_ether.h>
-#include <rte_malloc.h>
-#include <rte_ethdev.h>
-#include <rte_common.h>
-#include <rte_flow.h>
-#include <rte_flow_driver.h>
-
-#include "mlx5.h"
-#include "mlx5_rxtx.h"
-
-struct fdir_flow_desc {
-	uint16_t dst_port;
-	uint16_t src_port;
-	uint32_t src_ip[4];
-	uint32_t dst_ip[4];
-	uint8_t	mac[6];
-	uint16_t vlan_tag;
-	enum hash_rxq_type type;
-};
-
-struct mlx5_fdir_filter {
-	LIST_ENTRY(mlx5_fdir_filter) next;
-	uint16_t queue; /* Queue assigned to if FDIR match. */
-	enum rte_eth_fdir_behavior behavior;
-	struct fdir_flow_desc desc;
-	struct ibv_flow *flow;
-};
-
-LIST_HEAD(fdir_filter_list, mlx5_fdir_filter);
-
-/**
- * Convert struct rte_eth_fdir_filter to mlx5 filter descriptor.
- *
- * @param[in] fdir_filter
- *   DPDK filter structure to convert.
- * @param[out] desc
- *   Resulting mlx5 filter descriptor.
- * @param mode
- *   Flow director mode.
- */
-static void
-fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
-			 struct fdir_flow_desc *desc, enum rte_fdir_mode mode)
-{
-	/* Initialize descriptor. */
-	memset(desc, 0, sizeof(*desc));
-
-	/* Set VLAN ID. */
-	desc->vlan_tag = fdir_filter->input.flow_ext.vlan_tci;
-
-	/* Set MAC address. */
-	if (mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
-		rte_memcpy(desc->mac,
-			   fdir_filter->input.flow.mac_vlan_flow.mac_addr.
-				addr_bytes,
-			   sizeof(desc->mac));
-		desc->type = HASH_RXQ_ETH;
-		return;
-	}
-
-	/* Set mode */
-	switch (fdir_filter->input.flow_type) {
-	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
-		desc->type = HASH_RXQ_UDPV4;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
-		desc->type = HASH_RXQ_TCPV4;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
-		desc->type = HASH_RXQ_IPV4;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
-		desc->type = HASH_RXQ_UDPV6;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
-		desc->type = HASH_RXQ_TCPV6;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
-		desc->type = HASH_RXQ_IPV6;
-		break;
-	default:
-		break;
-	}
-
-	/* Set flow values */
-	switch (fdir_filter->input.flow_type) {
-	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
-	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
-		desc->src_port = fdir_filter->input.flow.udp4_flow.src_port;
-		desc->dst_port = fdir_filter->input.flow.udp4_flow.dst_port;
-		/* fallthrough */
-	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
-		desc->src_ip[0] = fdir_filter->input.flow.ip4_flow.src_ip;
-		desc->dst_ip[0] = fdir_filter->input.flow.ip4_flow.dst_ip;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
-	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
-		desc->src_port = fdir_filter->input.flow.udp6_flow.src_port;
-		desc->dst_port = fdir_filter->input.flow.udp6_flow.dst_port;
-		/* Fall through. */
-	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
-		rte_memcpy(desc->src_ip,
-			   fdir_filter->input.flow.ipv6_flow.src_ip,
-			   sizeof(desc->src_ip));
-		rte_memcpy(desc->dst_ip,
-			   fdir_filter->input.flow.ipv6_flow.dst_ip,
-			   sizeof(desc->dst_ip));
-		break;
-	default:
-		break;
-	}
-}
-
-/**
- * Check if two flow descriptors overlap according to configured mask.
- *
- * @param priv
- *   Private structure that provides flow director mask.
- * @param desc1
- *   First flow descriptor to compare.
- * @param desc2
- *   Second flow descriptor to compare.
- *
- * @return
- *   Nonzero if descriptors overlap.
- */
-static int
-priv_fdir_overlap(const struct priv *priv,
-		  const struct fdir_flow_desc *desc1,
-		  const struct fdir_flow_desc *desc2)
-{
-	const struct rte_eth_fdir_masks *mask =
-		&priv->dev->data->dev_conf.fdir_conf.mask;
-	unsigned int i;
-
-	if (desc1->type != desc2->type)
-		return 0;
-	/* Ignore non masked bits. */
-	for (i = 0; i != RTE_DIM(desc1->mac); ++i)
-		if ((desc1->mac[i] & mask->mac_addr_byte_mask) !=
-		    (desc2->mac[i] & mask->mac_addr_byte_mask))
-			return 0;
-	if (((desc1->src_port & mask->src_port_mask) !=
-	     (desc2->src_port & mask->src_port_mask)) ||
-	    ((desc1->dst_port & mask->dst_port_mask) !=
-	     (desc2->dst_port & mask->dst_port_mask)))
-		return 0;
-	switch (desc1->type) {
-	case HASH_RXQ_IPV4:
-	case HASH_RXQ_UDPV4:
-	case HASH_RXQ_TCPV4:
-		if (((desc1->src_ip[0] & mask->ipv4_mask.src_ip) !=
-		     (desc2->src_ip[0] & mask->ipv4_mask.src_ip)) ||
-		    ((desc1->dst_ip[0] & mask->ipv4_mask.dst_ip) !=
-		     (desc2->dst_ip[0] & mask->ipv4_mask.dst_ip)))
-			return 0;
-		break;
-	case HASH_RXQ_IPV6:
-	case HASH_RXQ_UDPV6:
-	case HASH_RXQ_TCPV6:
-		for (i = 0; i != RTE_DIM(desc1->src_ip); ++i)
-			if (((desc1->src_ip[i] & mask->ipv6_mask.src_ip[i]) !=
-			     (desc2->src_ip[i] & mask->ipv6_mask.src_ip[i])) ||
-			    ((desc1->dst_ip[i] & mask->ipv6_mask.dst_ip[i]) !=
-			     (desc2->dst_ip[i] & mask->ipv6_mask.dst_ip[i])))
-				return 0;
-		break;
-	default:
-		break;
-	}
-	return 1;
-}
-
-/**
- * Create flow director steering rule for a specific filter.
- *
- * @param priv
- *   Private structure.
- * @param mlx5_fdir_filter
- *   Filter to create a steering rule for.
- * @param fdir_queue
- *   Flow director queue for matching packets.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_flow_add(struct priv *priv,
-		   struct mlx5_fdir_filter *mlx5_fdir_filter,
-		   struct fdir_queue *fdir_queue)
-{
-	struct ibv_flow *flow;
-	struct fdir_flow_desc *desc = &mlx5_fdir_filter->desc;
-	enum rte_fdir_mode fdir_mode =
-		priv->dev->data->dev_conf.fdir_conf.mode;
-	struct rte_eth_fdir_masks *mask =
-		&priv->dev->data->dev_conf.fdir_conf.mask;
-	FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, desc->type));
-	struct ibv_flow_attr *attr = &data->attr;
-	uintptr_t spec_offset = (uintptr_t)&data->spec;
-	struct ibv_flow_spec_eth *spec_eth;
-	struct ibv_flow_spec_ipv4 *spec_ipv4;
-	struct ibv_flow_spec_ipv6 *spec_ipv6;
-	struct ibv_flow_spec_tcp_udp *spec_tcp_udp;
-	struct mlx5_fdir_filter *iter_fdir_filter;
-	unsigned int i;
-
-	/* Abort if an existing flow overlaps this one to avoid packet
-	 * duplication, even if it targets another queue. */
-	LIST_FOREACH(iter_fdir_filter, priv->fdir_filter_list, next)
-		if ((iter_fdir_filter != mlx5_fdir_filter) &&
-		    (iter_fdir_filter->flow != NULL) &&
-		    (priv_fdir_overlap(priv,
-				       &mlx5_fdir_filter->desc,
-				       &iter_fdir_filter->desc)))
-			return EEXIST;
-
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec_offset);
-	priv_flow_attr(priv, attr, sizeof(data), desc->type);
-
-	/* Set Ethernet spec */
-	spec_eth = (struct ibv_flow_spec_eth *)spec_offset;
-
-	/* The first specification must be Ethernet. */
-	assert(spec_eth->type == IBV_FLOW_SPEC_ETH);
-	assert(spec_eth->size == sizeof(*spec_eth));
-
-	/* VLAN ID */
-	spec_eth->val.vlan_tag = desc->vlan_tag & mask->vlan_tci_mask;
-	spec_eth->mask.vlan_tag = mask->vlan_tci_mask;
-
-	/* Update priority */
-	attr->priority = 2;
-
-	if (fdir_mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
-		/* MAC Address */
-		for (i = 0; i != RTE_DIM(spec_eth->mask.dst_mac); ++i) {
-			spec_eth->val.dst_mac[i] =
-				desc->mac[i] & mask->mac_addr_byte_mask;
-			spec_eth->mask.dst_mac[i] = mask->mac_addr_byte_mask;
-		}
-		goto create_flow;
-	}
-
-	switch (desc->type) {
-	case HASH_RXQ_IPV4:
-	case HASH_RXQ_UDPV4:
-	case HASH_RXQ_TCPV4:
-		spec_offset += spec_eth->size;
-
-		/* Set IP spec */
-		spec_ipv4 = (struct ibv_flow_spec_ipv4 *)spec_offset;
-
-		/* The second specification must be IP. */
-		assert(spec_ipv4->type == IBV_FLOW_SPEC_IPV4);
-		assert(spec_ipv4->size == sizeof(*spec_ipv4));
-
-		spec_ipv4->val.src_ip =
-			desc->src_ip[0] & mask->ipv4_mask.src_ip;
-		spec_ipv4->val.dst_ip =
-			desc->dst_ip[0] & mask->ipv4_mask.dst_ip;
-		spec_ipv4->mask.src_ip = mask->ipv4_mask.src_ip;
-		spec_ipv4->mask.dst_ip = mask->ipv4_mask.dst_ip;
-
-		/* Update priority */
-		attr->priority = 1;
-
-		if (desc->type == HASH_RXQ_IPV4)
-			goto create_flow;
-
-		spec_offset += spec_ipv4->size;
-		break;
-	case HASH_RXQ_IPV6:
-	case HASH_RXQ_UDPV6:
-	case HASH_RXQ_TCPV6:
-		spec_offset += spec_eth->size;
-
-		/* Set IP spec */
-		spec_ipv6 = (struct ibv_flow_spec_ipv6 *)spec_offset;
-
-		/* The second specification must be IP. */
-		assert(spec_ipv6->type == IBV_FLOW_SPEC_IPV6);
-		assert(spec_ipv6->size == sizeof(*spec_ipv6));
-
-		for (i = 0; i != RTE_DIM(desc->src_ip); ++i) {
-			((uint32_t *)spec_ipv6->val.src_ip)[i] =
-				desc->src_ip[i] & mask->ipv6_mask.src_ip[i];
-			((uint32_t *)spec_ipv6->val.dst_ip)[i] =
-				desc->dst_ip[i] & mask->ipv6_mask.dst_ip[i];
-		}
-		rte_memcpy(spec_ipv6->mask.src_ip,
-			   mask->ipv6_mask.src_ip,
-			   sizeof(spec_ipv6->mask.src_ip));
-		rte_memcpy(spec_ipv6->mask.dst_ip,
-			   mask->ipv6_mask.dst_ip,
-			   sizeof(spec_ipv6->mask.dst_ip));
-
-		/* Update priority */
-		attr->priority = 1;
-
-		if (desc->type == HASH_RXQ_IPV6)
-			goto create_flow;
-
-		spec_offset += spec_ipv6->size;
-		break;
-	default:
-		ERROR("invalid flow attribute type");
-		return EINVAL;
-	}
-
-	/* Set TCP/UDP flow specification. */
-	spec_tcp_udp = (struct ibv_flow_spec_tcp_udp *)spec_offset;
-
-	/* The third specification must be TCP/UDP. */
-	assert(spec_tcp_udp->type == IBV_FLOW_SPEC_TCP ||
-	       spec_tcp_udp->type == IBV_FLOW_SPEC_UDP);
-	assert(spec_tcp_udp->size == sizeof(*spec_tcp_udp));
-
-	spec_tcp_udp->val.src_port = desc->src_port & mask->src_port_mask;
-	spec_tcp_udp->val.dst_port = desc->dst_port & mask->dst_port_mask;
-	spec_tcp_udp->mask.src_port = mask->src_port_mask;
-	spec_tcp_udp->mask.dst_port = mask->dst_port_mask;
-
-	/* Update priority */
-	attr->priority = 0;
-
-create_flow:
-
-	errno = 0;
-	flow = ibv_create_flow(fdir_queue->qp, attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow director configuration failed, errno=%d: %s",
-		      (void *)priv, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-
-	DEBUG("%p: added flow director rule (%p)", (void *)priv, (void *)flow);
-	mlx5_fdir_filter->flow = flow;
-	return 0;
-}
-
-/**
- * Destroy a flow director queue.
- *
- * @param fdir_queue
- *   Flow director queue to be destroyed.
- */
-void
-priv_fdir_queue_destroy(struct priv *priv, struct fdir_queue *fdir_queue)
-{
-	struct mlx5_fdir_filter *fdir_filter;
-
-	/* Disable filter flows still applying to this queue. */
-	LIST_FOREACH(fdir_filter, priv->fdir_filter_list, next) {
-		unsigned int idx = fdir_filter->queue;
-		struct rxq_ctrl *rxq_ctrl =
-			container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
-
-		assert(idx < priv->rxqs_n);
-		if (fdir_queue == rxq_ctrl->fdir_queue &&
-		    fdir_filter->flow != NULL) {
-			claim_zero(ibv_destroy_flow(fdir_filter->flow));
-			fdir_filter->flow = NULL;
-		}
-	}
-	assert(fdir_queue->qp);
-	claim_zero(ibv_destroy_qp(fdir_queue->qp));
-	assert(fdir_queue->ind_table);
-	claim_zero(ibv_destroy_rwq_ind_table(fdir_queue->ind_table));
-	if (fdir_queue->wq)
-		claim_zero(ibv_destroy_wq(fdir_queue->wq));
-	if (fdir_queue->cq)
-		claim_zero(ibv_destroy_cq(fdir_queue->cq));
-#ifndef NDEBUG
-	memset(fdir_queue, 0x2a, sizeof(*fdir_queue));
-#endif
-	rte_free(fdir_queue);
-}
-
-/**
- * Create a flow director queue.
- *
- * @param priv
- *   Private structure.
- * @param wq
- *   Work queue to route matched packets to, NULL if one needs to
- *   be created.
- *
- * @return
- *   Related flow director queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_fdir_queue_create(struct priv *priv, struct ibv_wq *wq,
-		       unsigned int socket)
-{
-	struct fdir_queue *fdir_queue;
-
-	fdir_queue = rte_calloc_socket(__func__, 1, sizeof(*fdir_queue),
-				       0, socket);
-	if (!fdir_queue) {
-		ERROR("cannot allocate flow director queue");
-		return NULL;
-	}
-	assert(priv->pd);
-	assert(priv->ctx);
-	if (!wq) {
-		fdir_queue->cq = ibv_create_cq(
-			priv->ctx, 1, NULL, NULL, 0);
-		if (!fdir_queue->cq) {
-			ERROR("cannot create flow director CQ");
-			goto error;
-		}
-		fdir_queue->wq = ibv_create_wq(
-			priv->ctx,
-			&(struct ibv_wq_init_attr){
-				.wq_type = IBV_WQT_RQ,
-				.max_wr = 1,
-				.max_sge = 1,
-				.pd = priv->pd,
-				.cq = fdir_queue->cq,
-			});
-		if (!fdir_queue->wq) {
-			ERROR("cannot create flow director WQ");
-			goto error;
-		}
-		wq = fdir_queue->wq;
-	}
-	fdir_queue->ind_table = ibv_create_rwq_ind_table(
-		priv->ctx,
-		&(struct ibv_rwq_ind_table_init_attr){
-			.log_ind_tbl_size = 0,
-			.ind_tbl = &wq,
-			.comp_mask = 0,
-		});
-	if (!fdir_queue->ind_table) {
-		ERROR("cannot create flow director indirection table");
-		goto error;
-	}
-	fdir_queue->qp = ibv_create_qp_ex(
-		priv->ctx,
-		&(struct ibv_qp_init_attr_ex){
-			.qp_type = IBV_QPT_RAW_PACKET,
-			.comp_mask =
-				IBV_QP_INIT_ATTR_PD |
-				IBV_QP_INIT_ATTR_IND_TABLE |
-				IBV_QP_INIT_ATTR_RX_HASH,
-			.rx_hash_conf = (struct ibv_rx_hash_conf){
-				.rx_hash_function =
-					IBV_RX_HASH_FUNC_TOEPLITZ,
-				.rx_hash_key_len = rss_hash_default_key_len,
-				.rx_hash_key = rss_hash_default_key,
-				.rx_hash_fields_mask = 0,
-			},
-			.rwq_ind_tbl = fdir_queue->ind_table,
-			.pd = priv->pd,
-		});
-	if (!fdir_queue->qp) {
-		ERROR("cannot create flow director hash RX QP");
-		goto error;
-	}
-	return fdir_queue;
-error:
-	assert(fdir_queue);
-	assert(!fdir_queue->qp);
-	if (fdir_queue->ind_table)
-		claim_zero(ibv_destroy_rwq_ind_table
-			   (fdir_queue->ind_table));
-	if (fdir_queue->wq)
-		claim_zero(ibv_destroy_wq(fdir_queue->wq));
-	if (fdir_queue->cq)
-		claim_zero(ibv_destroy_cq(fdir_queue->cq));
-	rte_free(fdir_queue);
-	return NULL;
-}
-
-/**
- * Get flow director queue for a specific RX queue, create it in case
- * it does not exist.
- *
- * @param priv
- *   Private structure.
- * @param idx
- *   RX queue index.
- *
- * @return
- *   Related flow director queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_get_fdir_queue(struct priv *priv, uint16_t idx)
-{
-	struct rxq_ctrl *rxq_ctrl =
-		container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
-	struct fdir_queue *fdir_queue = rxq_ctrl->fdir_queue;
-
-	assert(rxq_ctrl->wq);
-	if (fdir_queue == NULL) {
-		fdir_queue = priv_fdir_queue_create(priv, rxq_ctrl->wq,
-						    rxq_ctrl->socket);
-		rxq_ctrl->fdir_queue = fdir_queue;
-	}
-	return fdir_queue;
-}
-
-/**
- * Get or flow director drop queue. Create it if it does not exist.
- *
- * @param priv
- *   Private structure.
- *
- * @return
- *   Flow director drop queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_get_fdir_drop_queue(struct priv *priv)
-{
-	struct fdir_queue *fdir_queue = priv->fdir_drop_queue;
-
-	if (fdir_queue == NULL) {
-		unsigned int socket = SOCKET_ID_ANY;
-
-		/* Select a known NUMA socket if possible. */
-		if (priv->rxqs_n && (*priv->rxqs)[0])
-			socket = container_of((*priv->rxqs)[0],
-					      struct rxq_ctrl, rxq)->socket;
-		fdir_queue = priv_fdir_queue_create(priv, NULL, socket);
-		priv->fdir_drop_queue = fdir_queue;
-	}
-	return fdir_queue;
-}
-
-/**
- * Enable flow director filter and create steering rules.
- *
- * @param priv
- *   Private structure.
- * @param mlx5_fdir_filter
- *   Filter to create steering rule for.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_enable(struct priv *priv,
-			struct mlx5_fdir_filter *mlx5_fdir_filter)
-{
-	struct fdir_queue *fdir_queue;
-
-	/* Check if flow already exists. */
-	if (mlx5_fdir_filter->flow != NULL)
-		return 0;
-
-	/* Get fdir_queue for specific queue. */
-	if (mlx5_fdir_filter->behavior == RTE_ETH_FDIR_REJECT)
-		fdir_queue = priv_get_fdir_drop_queue(priv);
-	else
-		fdir_queue = priv_get_fdir_queue(priv,
-						 mlx5_fdir_filter->queue);
-
-	if (fdir_queue == NULL) {
-		ERROR("failed to create flow director rxq for queue %d",
-		      mlx5_fdir_filter->queue);
-		return EINVAL;
-	}
-
-	/* Create flow */
-	return priv_fdir_flow_add(priv, mlx5_fdir_filter, fdir_queue);
-}
-
-/**
- * Initialize flow director filters list.
- *
- * @param priv
- *   Private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-fdir_init_filters_list(struct priv *priv)
-{
-	/* Filter list initialization should be done only once. */
-	if (priv->fdir_filter_list)
-		return 0;
-
-	/* Create filters list. */
-	priv->fdir_filter_list =
-		rte_calloc(__func__, 1, sizeof(*priv->fdir_filter_list), 0);
-
-	if (priv->fdir_filter_list == NULL) {
-		int err = ENOMEM;
-
-		ERROR("cannot allocate flow director filter list: %s",
-		      strerror(err));
-		return err;
-	}
-
-	LIST_INIT(priv->fdir_filter_list);
-
-	return 0;
-}
-
-/**
- * Flush all filters.
- *
- * @param priv
- *   Private structure.
- */
-static void
-priv_fdir_filter_flush(struct priv *priv)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	while ((mlx5_fdir_filter = LIST_FIRST(priv->fdir_filter_list))) {
-		struct ibv_flow *flow = mlx5_fdir_filter->flow;
-
-		DEBUG("%p: flushing flow director filter %p",
-		      (void *)priv, (void *)mlx5_fdir_filter);
-		LIST_REMOVE(mlx5_fdir_filter, next);
-		if (flow != NULL)
-			claim_zero(ibv_destroy_flow(flow));
-		rte_free(mlx5_fdir_filter);
-	}
-}
-
-/**
- * Remove all flow director filters and delete list.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_fdir_delete_filters_list(struct priv *priv)
-{
-	priv_fdir_filter_flush(priv);
-	rte_free(priv->fdir_filter_list);
-	priv->fdir_filter_list = NULL;
-}
-
-/**
- * Disable flow director, remove all steering rules.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_fdir_disable(struct priv *priv)
-{
-	unsigned int i;
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	/* Run on every flow director filter and destroy flow handle. */
-	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
-		struct ibv_flow *flow;
-
-		/* Only valid elements should be in the list */
-		assert(mlx5_fdir_filter != NULL);
-		flow = mlx5_fdir_filter->flow;
-
-		/* Destroy flow handle */
-		if (flow != NULL) {
-			claim_zero(ibv_destroy_flow(flow));
-			mlx5_fdir_filter->flow = NULL;
-		}
-	}
-
-	/* Destroy flow director context in each RX queue. */
-	for (i = 0; (i != priv->rxqs_n); i++) {
-		struct rxq_ctrl *rxq_ctrl;
-
-		if (!(*priv->rxqs)[i])
-			continue;
-		rxq_ctrl = container_of((*priv->rxqs)[i], struct rxq_ctrl, rxq);
-		if (!rxq_ctrl->fdir_queue)
-			continue;
-		priv_fdir_queue_destroy(priv, rxq_ctrl->fdir_queue);
-		rxq_ctrl->fdir_queue = NULL;
-	}
-	if (priv->fdir_drop_queue) {
-		priv_fdir_queue_destroy(priv, priv->fdir_drop_queue);
-		priv->fdir_drop_queue = NULL;
-	}
-}
-
-/**
- * Enable flow director, create steering rules.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_fdir_enable(struct priv *priv)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	/* Run on every fdir filter and create flow handle */
-	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
-		/* Only valid elements should be in the list */
-		assert(mlx5_fdir_filter != NULL);
-
-		priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-	}
-}
-
-/**
- * Find specific filter in list.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Flow director filter to find.
- *
- * @return
- *   Filter element if found, otherwise NULL.
- */
-static struct mlx5_fdir_filter *
-priv_find_filter_in_list(struct priv *priv,
-			 const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct fdir_flow_desc desc;
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-	enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
-
-	/* Get flow director filter to look for. */
-	fdir_filter_to_flow_desc(fdir_filter, &desc, fdir_mode);
-
-	/* Look for the requested element. */
-	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
-		/* Only valid elements should be in the list. */
-		assert(mlx5_fdir_filter != NULL);
-
-		/* Return matching filter. */
-		if (!memcmp(&desc, &mlx5_fdir_filter->desc, sizeof(desc)))
-			return mlx5_fdir_filter;
-	}
-
-	/* Filter not found */
-	return NULL;
-}
-
-/**
- * Add new flow director filter and store it in list.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Flow director filter to add.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_add(struct priv *priv,
-		     const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-	enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
-	int err = 0;
-
-	/* Validate queue number. */
-	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
-		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
-		return EINVAL;
-	}
-
-	/* Duplicate filters are currently unsupported. */
-	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
-	if (mlx5_fdir_filter != NULL) {
-		ERROR("filter already exists");
-		return EINVAL;
-	}
-
-	/* Create new flow director filter. */
-	mlx5_fdir_filter =
-		rte_calloc(__func__, 1, sizeof(*mlx5_fdir_filter), 0);
-	if (mlx5_fdir_filter == NULL) {
-		err = ENOMEM;
-		ERROR("cannot allocate flow director filter: %s",
-		      strerror(err));
-		return err;
-	}
-
-	/* Set action parameters. */
-	mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
-	mlx5_fdir_filter->behavior = fdir_filter->action.behavior;
-
-	/* Convert to mlx5 filter descriptor. */
-	fdir_filter_to_flow_desc(fdir_filter,
-				 &mlx5_fdir_filter->desc, fdir_mode);
-
-	/* Insert new filter into list. */
-	LIST_INSERT_HEAD(priv->fdir_filter_list, mlx5_fdir_filter, next);
-
-	DEBUG("%p: flow director filter %p added",
-	      (void *)priv, (void *)mlx5_fdir_filter);
-
-	/* Enable filter immediately if device is started. */
-	if (priv->started)
-		err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-
-	return err;
-}
-
-/**
- * Update queue for specific filter.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Filter to be updated.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_update(struct priv *priv,
-			const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	/* Validate queue number. */
-	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
-		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
-		return EINVAL;
-	}
-
-	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
-	if (mlx5_fdir_filter != NULL) {
-		struct ibv_flow *flow = mlx5_fdir_filter->flow;
-		int err = 0;
-
-		/* Update queue number. */
-		mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
-
-		/* Destroy flow handle. */
-		if (flow != NULL) {
-			claim_zero(ibv_destroy_flow(flow));
-			mlx5_fdir_filter->flow = NULL;
-		}
-		DEBUG("%p: flow director filter %p updated",
-		      (void *)priv, (void *)mlx5_fdir_filter);
-
-		/* Enable filter if device is started. */
-		if (priv->started)
-			err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-
-		return err;
-	}
-
-	/* Filter not found, create it. */
-	DEBUG("%p: filter not found for update, creating new filter",
-	      (void *)priv);
-	return priv_fdir_filter_add(priv, fdir_filter);
-}
-
-/**
- * Delete specific filter.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Filter to be deleted.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_delete(struct priv *priv,
-			const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
-	if (mlx5_fdir_filter != NULL) {
-		struct ibv_flow *flow = mlx5_fdir_filter->flow;
-
-		/* Remove element from list. */
-		LIST_REMOVE(mlx5_fdir_filter, next);
-
-		/* Destroy flow handle. */
-		if (flow != NULL) {
-			claim_zero(ibv_destroy_flow(flow));
-			mlx5_fdir_filter->flow = NULL;
-		}
-
-		DEBUG("%p: flow director filter %p deleted",
-		      (void *)priv, (void *)mlx5_fdir_filter);
-
-		/* Delete filter. */
-		rte_free(mlx5_fdir_filter);
-
-		return 0;
-	}
-
-	ERROR("%p: flow director delete failed, cannot find filter",
-	      (void *)priv);
-	return EINVAL;
-}
-
-/**
- * Get flow director information.
- *
- * @param priv
- *   Private structure.
- * @param[out] fdir_info
- *   Resulting flow director information.
- */
-static void
-priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
-{
-	struct rte_eth_fdir_masks *mask =
-		&priv->dev->data->dev_conf.fdir_conf.mask;
-
-	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
-	fdir_info->guarant_spc = 0;
-
-	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
-
-	fdir_info->max_flexpayload = 0;
-	fdir_info->flow_types_mask[0] = 0;
-
-	fdir_info->flex_payload_unit = 0;
-	fdir_info->max_flex_payload_segment_num = 0;
-	fdir_info->flex_payload_limit = 0;
-	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
-}
-
-/**
- * Deal with flow director operations.
- *
- * @param priv
- *   Pointer to private structure.
- * @param filter_op
- *   Operation to perform.
- * @param arg
- *   Pointer to operation-specific structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
-{
-	enum rte_fdir_mode fdir_mode =
-		priv->dev->data->dev_conf.fdir_conf.mode;
-	int ret = 0;
-
-	if (filter_op == RTE_ETH_FILTER_NOP)
-		return 0;
-
-	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
-	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
-		ERROR("%p: flow director mode %d not supported",
-		      (void *)priv, fdir_mode);
-		return EINVAL;
-	}
-
-	switch (filter_op) {
-	case RTE_ETH_FILTER_ADD:
-		ret = priv_fdir_filter_add(priv, arg);
-		break;
-	case RTE_ETH_FILTER_UPDATE:
-		ret = priv_fdir_filter_update(priv, arg);
-		break;
-	case RTE_ETH_FILTER_DELETE:
-		ret = priv_fdir_filter_delete(priv, arg);
-		break;
-	case RTE_ETH_FILTER_FLUSH:
-		priv_fdir_filter_flush(priv);
-		break;
-	case RTE_ETH_FILTER_INFO:
-		priv_fdir_info_get(priv, arg);
-		break;
-	default:
-		DEBUG("%p: unknown operation %u", (void *)priv, filter_op);
-		ret = EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static const struct rte_flow_ops mlx5_flow_ops = {
-	.validate = mlx5_flow_validate,
-	.create = mlx5_flow_create,
-	.destroy = mlx5_flow_destroy,
-	.flush = mlx5_flow_flush,
-	.query = NULL,
-	.isolate = mlx5_flow_isolate,
-};
-
-/**
- * Manage filter operations.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param filter_type
- *   Filter type.
- * @param filter_op
- *   Operation to perform.
- * @param arg
- *   Pointer to operation-specific structure.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
-		     enum rte_filter_type filter_type,
-		     enum rte_filter_op filter_op,
-		     void *arg)
-{
-	int ret = EINVAL;
-	struct priv *priv = dev->data->dev_private;
-
-	if (mlx5_is_secondary())
-		return -E_RTE_SECONDARY;
-	switch (filter_type) {
-	case RTE_ETH_FILTER_GENERIC:
-		if (filter_op != RTE_ETH_FILTER_GET)
-			return -EINVAL;
-		*(const void **)arg = &mlx5_flow_ops;
-		return 0;
-	case RTE_ETH_FILTER_FDIR:
-		priv_lock(priv);
-		ret = priv_fdir_ctrl_func(priv, filter_op, arg);
-		priv_unlock(priv);
-		break;
-	default:
-		ERROR("%p: filter type (%d) not supported",
-		      (void *)dev, filter_type);
-		break;
-	}
-
-	return -ret;
-}
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index fb30803..266ae24 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -298,6 +298,49 @@ struct rte_flow_drop {
 	struct ibv_cq *cq; /**< Verbs completion queue. */
 };
 
+static const struct rte_flow_ops mlx5_flow_ops = {
+	.validate = mlx5_flow_validate,
+	.create = mlx5_flow_create,
+	.destroy = mlx5_flow_destroy,
+	.flush = mlx5_flow_flush,
+	.query = NULL,
+	.isolate = mlx5_flow_isolate,
+};
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param filter_type
+ *   Filter type.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
+		     enum rte_filter_type filter_type,
+		     enum rte_filter_op filter_op,
+		     void *arg)
+{
+	int ret = EINVAL;
+
+	if (filter_type == RTE_ETH_FILTER_GENERIC) {
+		if (filter_op != RTE_ETH_FILTER_GET)
+			return -EINVAL;
+		*(const void **)arg = &mlx5_flow_ops;
+		return 0;
+	}
+	ERROR("%p: filter type (%d) not supported",
+	      (void *)dev, filter_type);
+	return -ret;
+}
+
 /**
  * Check support for a given item.
  *
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index b71f72f..5819e92 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -760,8 +760,6 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 {
 	DEBUG("cleaning up %p", (void *)rxq_ctrl);
 	rxq_free_elts(rxq_ctrl);
-	if (rxq_ctrl->fdir_queue != NULL)
-		priv_fdir_queue_destroy(rxq_ctrl->priv, rxq_ctrl->fdir_queue);
 	if (rxq_ctrl->wq != NULL)
 		claim_zero(ibv_destroy_wq(rxq_ctrl->wq));
 	if (rxq_ctrl->cq != NULL)
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index bd1d601..37698ab 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -78,14 +78,6 @@ struct mlx5_txq_stats {
 	uint64_t oerrors; /**< Total number of failed transmitted packets. */
 };
 
-/* Flow director queue structure. */
-struct fdir_queue {
-	struct ibv_qp *qp; /* Associated RX QP. */
-	struct ibv_rwq_ind_table *ind_table; /* Indirection table. */
-	struct ibv_wq *wq; /* Work queue. */
-	struct ibv_cq *cq; /* Completion queue. */
-};
-
 struct priv;
 
 /* Compressed CQE context. */
@@ -134,7 +126,6 @@ struct rxq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_wq *wq; /* Work Queue. */
-	struct fdir_queue *fdir_queue; /* Flow director queue. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
 	struct ibv_comp_channel *channel;
 	unsigned int socket; /* CPU socket ID for allocations. */
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 51c31aa..0d0f340 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -80,8 +80,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		      (void *)priv, strerror(err));
 		goto error;
 	}
-	if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE)
-		priv_fdir_enable(priv);
 	err = priv_flow_start(priv);
 	if (err) {
 		priv->started = 0;
@@ -135,7 +133,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
-	priv_fdir_disable(priv);
 	priv_flow_stop(priv);
 	priv_rx_intr_vec_disable(priv);
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 03/30] net/mlx5: prefix Rx structures and functions
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (27 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 02/30] net/mlx5: remove flow director support Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 04/30] net/mlx5: prefix Tx " Nelio Laranjeiro
                   ` (26 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Prefix struct rxq_ctrl and associated functions with mlx5.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c              |  8 ++---
 drivers/net/mlx5/mlx5.h              |  4 +--
 drivers/net/mlx5/mlx5_flow.c         | 12 ++++----
 drivers/net/mlx5/mlx5_rxq.c          | 57 +++++++++++++++++++-----------------
 drivers/net/mlx5/mlx5_rxtx.c         | 14 ++++-----
 drivers/net/mlx5/mlx5_rxtx.h         | 10 +++----
 drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 23 ++++++++-------
 drivers/net/mlx5/mlx5_stats.c        |  2 +-
 drivers/net/mlx5/mlx5_vlan.c         |  5 ++--
 9 files changed, 70 insertions(+), 65 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 64b16e5..40499b1 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -208,14 +208,14 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		/* XXX race condition if mlx5_rx_burst() is still running. */
 		usleep(1000);
 		for (i = 0; (i != priv->rxqs_n); ++i) {
-			struct rxq *rxq = (*priv->rxqs)[i];
-			struct rxq_ctrl *rxq_ctrl;
+			struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
+			struct mlx5_rxq_ctrl *rxq_ctrl;
 
 			if (rxq == NULL)
 				continue;
-			rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+			rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 			(*priv->rxqs)[i] = NULL;
-			rxq_cleanup(rxq_ctrl);
+			mlx5_rxq_cleanup(rxq_ctrl);
 			rte_free(rxq_ctrl);
 		}
 		priv->rxqs_n = 0;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index adac5f4..ddaf227 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -130,7 +130,7 @@ struct priv {
 	/* RX/TX queues. */
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
-	struct rxq *(*rxqs)[]; /* RX queues. */
+	struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */
 	struct txq *(*txqs)[]; /* TX queues. */
 	/* Indirection tables referencing all RX WQs. */
 	struct ibv_rwq_ind_table *(*ind_tables)[];
@@ -290,7 +290,7 @@ int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
 int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
 int priv_flow_start(struct priv *);
 void priv_flow_stop(struct priv *);
-int priv_flow_rxq_in_use(struct priv *, struct rxq *);
+int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *);
 
 /* mlx5_socket.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 266ae24..99dbd8c 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -99,7 +99,7 @@ struct rte_flow {
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
-	struct rxq *rxqs[]; /**< Pointer to the queues array. */
+	struct mlx5_rxq_data *rxqs[]; /**< Pointer to the queues array. */
 };
 
 /** Static initializer for items. */
@@ -1105,10 +1105,10 @@ priv_flow_create_action_queue(struct priv *priv,
 		return NULL;
 	}
 	for (i = 0; i < flow->actions.queues_n; ++i) {
-		struct rxq_ctrl *rxq;
+		struct mlx5_rxq_ctrl *rxq;
 
 		rxq = container_of((*priv->rxqs)[flow->actions.queues[i]],
-				   struct rxq_ctrl, rxq);
+				   struct mlx5_rxq_ctrl, rxq);
 		wqs[i] = rxq->wq;
 		rte_flow->rxqs[i] = &rxq->rxq;
 		++rte_flow->rxqs_n;
@@ -1301,7 +1301,7 @@ priv_flow_destroy(struct priv *priv,
 		claim_zero(ibv_destroy_rwq_ind_table(flow->ind_table));
 	if (flow->mark) {
 		struct rte_flow *tmp;
-		struct rxq *rxq;
+		struct mlx5_rxq_data *rxq;
 		uint32_t mark_n = 0;
 		uint32_t queue_n;
 
@@ -1321,7 +1321,7 @@ priv_flow_destroy(struct priv *priv,
 				for (tqueue_n = 0;
 				     tqueue_n < tmp->rxqs_n;
 				     ++tqueue_n) {
-					struct rxq *trxq;
+					struct mlx5_rxq_data *trxq;
 
 					trxq = tmp->rxqs[tqueue_n];
 					if (rxq == trxq)
@@ -1585,7 +1585,7 @@ priv_flow_start(struct priv *priv)
  *   Nonzero if the queue is used by a flow.
  */
 int
-priv_flow_rxq_in_use(struct priv *priv, struct rxq *rxq)
+priv_flow_rxq_in_use(struct priv *priv, struct mlx5_rxq_data *rxq)
 {
 	struct rte_flow *flow;
 
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 5819e92..6e520fb 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -374,10 +374,10 @@ priv_create_hash_rxqs(struct priv *priv)
 		      priv->reta_idx_n);
 	}
 	for (i = 0; (i != priv->reta_idx_n); ++i) {
-		struct rxq_ctrl *rxq_ctrl;
+		struct mlx5_rxq_ctrl *rxq_ctrl;
 
 		rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
-					struct rxq_ctrl, rxq);
+					struct mlx5_rxq_ctrl, rxq);
 		wqs[i] = rxq_ctrl->wq;
 	}
 	/* Get number of hash RX queues to configure. */
@@ -636,7 +636,7 @@ priv_rehash_flows(struct priv *priv)
  *   0 on success, errno value on failure.
  */
 static int
-rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n)
+rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 {
 	const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
 	unsigned int i;
@@ -678,7 +678,7 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
 	if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
-		struct rxq *rxq = &rxq_ctrl->rxq;
+		struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
 		struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
 
 		assert(rxq->elts_n == rxq->cqe_n);
@@ -720,9 +720,9 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n)
  *   Pointer to RX queue structure.
  */
 static void
-rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
+rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
-	struct rxq *rxq = &rxq_ctrl->rxq;
+	struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
 	const uint16_t q_n = (1 << rxq->elts_n);
 	const uint16_t q_mask = q_n - 1;
 	uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
@@ -756,7 +756,7 @@ rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
  *   Pointer to RX queue structure.
  */
 void
-rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
+mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	DEBUG("cleaning up %p", (void *)rxq_ctrl);
 	rxq_free_elts(rxq_ctrl);
@@ -781,7 +781,7 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
  *   0 on success, errno value on failure.
  */
 static inline int
-rxq_setup(struct rxq_ctrl *tmpl)
+rxq_setup(struct mlx5_rxq_ctrl *tmpl)
 {
 	struct ibv_cq *ibcq = tmpl->cq;
 	struct mlx5dv_cq cq_info;
@@ -848,12 +848,12 @@ rxq_setup(struct rxq_ctrl *tmpl)
  *   0 on success, errno value on failure.
  */
 static int
-rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
+rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 	       uint16_t desc, unsigned int socket,
 	       const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct rxq_ctrl tmpl = {
+	struct mlx5_rxq_ctrl tmpl = {
 		.priv = priv,
 		.socket = socket,
 		.rxq = {
@@ -1072,7 +1072,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
 	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl);
-	rxq_cleanup(rxq_ctrl);
+	mlx5_rxq_cleanup(rxq_ctrl);
 	/* Move mbuf pointers to dedicated storage area in RX queue. */
 	elts = (void *)(rxq_ctrl + 1);
 	rte_memcpy(elts, tmpl.rxq.elts, sizeof(*elts));
@@ -1091,7 +1091,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
 	return 0;
 error:
 	elts = tmpl.rxq.elts;
-	rxq_cleanup(&tmpl);
+	mlx5_rxq_cleanup(&tmpl);
 	rte_free(elts);
 	assert(ret > 0);
 	return ret;
@@ -1122,8 +1122,9 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		    struct rte_mempool *mp)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct rxq *rxq = (*priv->rxqs)[idx];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	const uint16_t desc_n =
 		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
 	int ret;
@@ -1154,7 +1155,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			return -EEXIST;
 		}
 		(*priv->rxqs)[idx] = NULL;
-		rxq_cleanup(rxq_ctrl);
+		mlx5_rxq_cleanup(rxq_ctrl);
 		/* Resize if rxq size is changed. */
 		if (rxq_ctrl->rxq.elts_n != log2above(desc)) {
 			rxq_ctrl = rte_realloc(rxq_ctrl,
@@ -1202,8 +1203,8 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 void
 mlx5_rx_queue_release(void *dpdk_rxq)
 {
-	struct rxq *rxq = (struct rxq *)dpdk_rxq;
-	struct rxq_ctrl *rxq_ctrl;
+	struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
+	struct mlx5_rxq_ctrl *rxq_ctrl;
 	struct priv *priv;
 	unsigned int i;
 
@@ -1212,7 +1213,7 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 
 	if (rxq == NULL)
 		return;
-	rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
 	priv_lock(priv);
 	if (priv_flow_rxq_in_use(priv, rxq))
@@ -1225,7 +1226,7 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 			(*priv->rxqs)[i] = NULL;
 			break;
 		}
-	rxq_cleanup(rxq_ctrl);
+	mlx5_rxq_cleanup(rxq_ctrl);
 	rte_free(rxq_ctrl);
 	priv_unlock(priv);
 }
@@ -1260,9 +1261,9 @@ priv_rx_intr_vec_enable(struct priv *priv)
 	}
 	intr_handle->type = RTE_INTR_HANDLE_EXT;
 	for (i = 0; i != n; ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
-		struct rxq_ctrl *rxq_ctrl =
-			container_of(rxq, struct rxq_ctrl, rxq);
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
+		struct mlx5_rxq_ctrl *rxq_ctrl =
+			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 		int fd;
 		int flags;
 		int rc;
@@ -1328,7 +1329,7 @@ priv_rx_intr_vec_disable(struct priv *priv)
  *     Sequence number per receive queue .
  */
 static inline void
-mlx5_arm_cq(struct rxq *rxq, int sq_n_rxq)
+mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
 {
 	int sq_n = 0;
 	uint32_t doorbell_hi;
@@ -1359,8 +1360,9 @@ int
 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct rxq *rxq = (*priv->rxqs)[rx_queue_id];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	int ret = 0;
 
 	if (!rxq || !rxq_ctrl->channel) {
@@ -1388,8 +1390,9 @@ int
 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct rxq *rxq = (*priv->rxqs)[rx_queue_id];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	struct ibv_cq *ev_cq;
 	void *ev_ctx;
 	int ret;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index c45ebee..ad1071b 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -65,11 +65,11 @@ static __rte_always_inline uint32_t
 rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe);
 
 static __rte_always_inline int
-mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 		 uint16_t cqe_cnt, uint32_t *rss_hash);
 
 static __rte_always_inline uint32_t
-rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe);
+rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe);
 
 uint32_t mlx5_ptype_table[] __rte_cache_aligned = {
 	[0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */
@@ -282,7 +282,7 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
 int
 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct rxq *rxq = rx_queue;
+	struct mlx5_rxq_data *rxq = rx_queue;
 	struct rxq_zip *zip = &rxq->zip;
 	volatile struct mlx5_cqe *cqe;
 	const unsigned int cqe_n = (1 << rxq->cqe_n);
@@ -1647,7 +1647,7 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
  *   with error.
  */
 static inline int
-mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 		 uint16_t cqe_cnt, uint32_t *rss_hash)
 {
 	struct rxq_zip *zip = &rxq->zip;
@@ -1758,7 +1758,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
  *   Offload flags (ol_flags) for struct rte_mbuf.
  */
 static inline uint32_t
-rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
+rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
 {
 	uint32_t ol_flags = 0;
 	uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc);
@@ -1797,7 +1797,7 @@ rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
 uint16_t
 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct rxq *rxq = dpdk_rxq;
+	struct mlx5_rxq_data *rxq = dpdk_rxq;
 	const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
 	const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
 	const unsigned int sges_n = rxq->sges_n;
@@ -2037,7 +2037,7 @@ priv_check_vec_tx_support(struct priv *priv)
 }
 
 int __attribute__((weak))
-rxq_check_vec_support(struct rxq *rxq)
+rxq_check_vec_support(struct mlx5_rxq_data *rxq)
 {
 	(void)rxq;
 	return -ENOTSUP;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 37698ab..a86b6fb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -90,7 +90,7 @@ struct rxq_zip {
 };
 
 /* RX queue descriptor. */
-struct rxq {
+struct mlx5_rxq_data {
 	unsigned int csum:1; /* Enable checksum offloading. */
 	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
 	unsigned int vlan_strip:1; /* Enable VLAN stripping. */
@@ -122,14 +122,14 @@ struct rxq {
 } __rte_cache_aligned;
 
 /* RX queue control descriptor. */
-struct rxq_ctrl {
+struct mlx5_rxq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_wq *wq; /* Work Queue. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
 	struct ibv_comp_channel *channel;
 	unsigned int socket; /* CPU socket ID for allocations. */
-	struct rxq rxq; /* Data path structure. */
+	struct mlx5_rxq_data rxq; /* Data path structure. */
 };
 
 /* Hash RX queue types. */
@@ -294,7 +294,7 @@ int priv_create_hash_rxqs(struct priv *);
 void priv_destroy_hash_rxqs(struct priv *);
 int priv_allow_flow_type(struct priv *, enum hash_rxq_flow_type);
 int priv_rehash_flows(struct priv *);
-void rxq_cleanup(struct rxq_ctrl *);
+void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
 int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_rxconf *, struct rte_mempool *);
 void mlx5_rx_queue_release(void *);
@@ -331,7 +331,7 @@ int mlx5_tx_descriptor_status(void *, uint16_t);
 /* Vectorized version of mlx5_rxtx.c */
 int priv_check_raw_vec_tx_support(struct priv *);
 int priv_check_vec_tx_support(struct priv *);
-int rxq_check_vec_support(struct rxq *);
+int rxq_check_vec_support(struct mlx5_rxq_data *);
 int priv_check_vec_rx_support(struct priv *);
 uint16_t mlx5_tx_burst_raw_vec(void *, struct rte_mbuf **, uint16_t);
 uint16_t mlx5_tx_burst_vec(void *, struct rte_mbuf **, uint16_t);
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index 075dce9..b0c87bf 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -518,7 +518,7 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Number of packets to be stored.
  */
 static inline void
-rxq_copy_mbuf_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t n)
+rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n)
 {
 	const uint16_t q_mask = (1 << rxq->elts_n) - 1;
 	struct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask];
@@ -544,7 +544,7 @@ rxq_copy_mbuf_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t n)
  *   Number of buffers to be replenished.
  */
 static inline void
-rxq_replenish_bulk_mbuf(struct rxq *rxq, uint16_t n)
+rxq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)
 {
 	const uint16_t q_n = 1 << rxq->elts_n;
 	const uint16_t q_mask = q_n - 1;
@@ -583,7 +583,7 @@ rxq_replenish_bulk_mbuf(struct rxq *rxq, uint16_t n)
  *   the title completion descriptor to be copied to the rest of mbufs.
  */
 static inline void
-rxq_cq_decompress_v(struct rxq *rxq,
+rxq_cq_decompress_v(struct mlx5_rxq_data *rxq,
 		    volatile struct mlx5_cqe *cq,
 		    struct rte_mbuf **elts)
 {
@@ -749,8 +749,8 @@ rxq_cq_decompress_v(struct rxq *rxq,
  *   Pointer to array of packets to be filled.
  */
 static inline void
-rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
-			 struct rte_mbuf **pkts)
+rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
+			 __m128i op_err, struct rte_mbuf **pkts)
 {
 	__m128i pinfo0, pinfo1;
 	__m128i pinfo, ptype;
@@ -884,7 +884,7 @@ rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
  *   Number of packets successfully received (<= pkts_n).
  */
 static uint16_t
-rxq_handle_pending_error(struct rxq *rxq, struct rte_mbuf **pkts,
+rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
 			 uint16_t pkts_n)
 {
 	uint16_t n = 0;
@@ -931,7 +931,7 @@ rxq_handle_pending_error(struct rxq *rxq, struct rte_mbuf **pkts,
  *   Number of packets received including errors (<= pkts_n).
  */
 static inline uint16_t
-rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
 	const uint16_t q_n = 1 << rxq->cqe_n;
 	const uint16_t q_mask = q_n - 1;
@@ -1279,7 +1279,7 @@ rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 uint16_t
 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct rxq *rxq = dpdk_rxq;
+	struct mlx5_rxq_data *rxq = dpdk_rxq;
 	uint16_t nb_rx;
 
 	nb_rx = rxq_burst_v(rxq, pkts, pkts_n);
@@ -1345,9 +1345,10 @@ priv_check_vec_tx_support(struct priv *priv)
  *   1 if supported, negative errno value if not.
  */
 int __attribute__((cold))
-rxq_check_vec_support(struct rxq *rxq)
+rxq_check_vec_support(struct mlx5_rxq_data *rxq)
 {
-	struct rxq_ctrl *ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_ctrl *ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 
 	if (!ctrl->priv->rx_vec_en || rxq->sges_n != 0)
 		return -ENOTSUP;
@@ -1372,7 +1373,7 @@ priv_check_vec_rx_support(struct priv *priv)
 		return -ENOTSUP;
 	/* All the configured queues should support. */
 	for (i = 0; i < priv->rxqs_n; ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
 
 		if (rxq_check_vec_support(rxq) < 0)
 			break;
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 06348c8..3de3af8 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -329,7 +329,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	priv_lock(priv);
 	/* Add software counters. */
 	for (i = 0; (i != priv->rxqs_n); ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
 
 		if (rxq == NULL)
 			continue;
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 36ffbba..0d91591 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -137,8 +137,9 @@ mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 static void
 priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
 {
-	struct rxq *rxq = (*priv->rxqs)[idx];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	struct ibv_wq_attr mod;
 	uint16_t vlan_offloads =
 		(on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 04/30] net/mlx5: prefix Tx structures and functions
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (28 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 03/30] net/mlx5: prefix Rx structures and functions Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 05/30] net/mlx5: remove redundant started flag Nelio Laranjeiro
                   ` (25 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Prefix struct txq_ctrl and associated function with mlx5.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c              |  8 +++----
 drivers/net/mlx5/mlx5.h              |  2 +-
 drivers/net/mlx5/mlx5_mr.c           | 12 ++++++----
 drivers/net/mlx5/mlx5_rxtx.c         | 25 ++++++++++----------
 drivers/net/mlx5/mlx5_rxtx.h         | 27 +++++++++++-----------
 drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 17 +++++++-------
 drivers/net/mlx5/mlx5_stats.c        |  2 +-
 drivers/net/mlx5/mlx5_txq.c          | 45 ++++++++++++++++++------------------
 8 files changed, 72 insertions(+), 66 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 40499b1..7aea5a4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -225,14 +225,14 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		/* XXX race condition if mlx5_tx_burst() is still running. */
 		usleep(1000);
 		for (i = 0; (i != priv->txqs_n); ++i) {
-			struct txq *txq = (*priv->txqs)[i];
-			struct txq_ctrl *txq_ctrl;
+			struct mlx5_txq_data *txq = (*priv->txqs)[i];
+			struct mlx5_txq_ctrl *txq_ctrl;
 
 			if (txq == NULL)
 				continue;
-			txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+			txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 			(*priv->txqs)[i] = NULL;
-			txq_cleanup(txq_ctrl);
+			mlx5_txq_cleanup(txq_ctrl);
 			rte_free(txq_ctrl);
 		}
 		priv->txqs_n = 0;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ddaf227..1b511e1 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -131,7 +131,7 @@ struct priv {
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
 	struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */
-	struct txq *(*txqs)[]; /* TX queues. */
+	struct mlx5_txq_data *(*txqs)[]; /* TX queues. */
 	/* Indirection tables referencing all RX WQs. */
 	struct ibv_rwq_ind_table *(*ind_tables)[];
 	unsigned int ind_tables_n; /* Number of indirection tables. */
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 9a9f73a..6199746 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -175,9 +175,11 @@ mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
  *   mr->lkey on success, (uint32_t)-1 on failure.
  */
 uint32_t
-txq_mp2mr_reg(struct txq *txq, struct rte_mempool *mp, unsigned int idx)
+mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
+		   unsigned int idx)
 {
-	struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
 	struct ibv_mr *mr;
 
 	/* Add a new entry, register MR first. */
@@ -253,9 +255,9 @@ txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
  *   Pointer to TX queue structure.
  */
 void
-txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
+mlx5_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
 {
-	struct txq_ctrl *txq_ctrl = arg;
+	struct mlx5_txq_ctrl *txq_ctrl = arg;
 	struct txq_mp2mr_mbuf_check_data data = {
 		.ret = 0,
 	};
@@ -283,5 +285,5 @@ txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
 		    end <= (uintptr_t)mr->addr + mr->length)
 			return;
 	}
-	txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
+	mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
 }
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ad1071b..9389383 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -200,7 +200,7 @@ mlx5_set_ptype_table(void)
  *   Size of tailroom.
  */
 static inline size_t
-tx_mlx5_wq_tailroom(struct txq *txq, void *addr)
+tx_mlx5_wq_tailroom(struct mlx5_txq_data *txq, void *addr)
 {
 	size_t tailroom;
 	tailroom = (uintptr_t)(txq->wqes) +
@@ -258,7 +258,7 @@ mlx5_copy_to_wq(void *dst, const void *src, size_t n,
 int
 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
 {
-	struct txq *txq = tx_queue;
+	struct mlx5_txq_data *txq = tx_queue;
 	uint16_t used;
 
 	mlx5_tx_complete(txq);
@@ -334,7 +334,7 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
 uint16_t
 mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -747,7 +747,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Packet length.
  */
 static inline void
-mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
+mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
 	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 	volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
@@ -787,7 +787,7 @@ mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
  *   Pointer to MPW session structure.
  */
 static inline void
-mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_mpw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
 {
 	unsigned int num = mpw->pkts_n;
 
@@ -821,7 +821,7 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
 uint16_t
 mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -964,7 +964,8 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Packet length.
  */
 static inline void
-mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
+mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw,
+		    uint32_t length)
 {
 	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 	struct mlx5_wqe_inl_small *inl;
@@ -999,7 +1000,7 @@ mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
  *   Pointer to MPW session structure.
  */
 static inline void
-mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_mpw_inline_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
 {
 	unsigned int size;
 	struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *)
@@ -1034,7 +1035,7 @@ uint16_t
 mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 			 uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -1260,7 +1261,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
  *   Packet length.
  */
 static inline void
-mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
+mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding)
 {
 	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 
@@ -1302,7 +1303,7 @@ mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
  *   Number of consumed WQEs.
  */
 static inline uint16_t
-mlx5_empw_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
 {
 	uint16_t ret;
 
@@ -1333,7 +1334,7 @@ mlx5_empw_close(struct txq *txq, struct mlx5_mpw *mpw)
 uint16_t
 mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index a86b6fb..6ffcfb7 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -233,7 +233,7 @@ struct hash_rxq {
 
 /* TX queue descriptor. */
 __extension__
-struct txq {
+struct mlx5_txq_data {
 	uint16_t elts_head; /* Current counter in (*elts)[]. */
 	uint16_t elts_tail; /* Counter of first element awaiting completion. */
 	uint16_t elts_comp; /* Counter since last completion request. */
@@ -271,12 +271,12 @@ struct txq {
 } __rte_cache_aligned;
 
 /* TX queue control descriptor. */
-struct txq_ctrl {
+struct mlx5_txq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_qp *qp; /* Queue Pair. */
 	unsigned int socket; /* CPU socket ID for allocations. */
-	struct txq txq; /* Data path structure. */
+	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 };
 
@@ -305,9 +305,9 @@ int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 
 /* mlx5_txq.c */
 
-void txq_cleanup(struct txq_ctrl *);
-int txq_ctrl_setup(struct rte_eth_dev *, struct txq_ctrl *, uint16_t,
-		   unsigned int, const struct rte_eth_txconf *);
+void mlx5_txq_cleanup(struct mlx5_txq_ctrl *);
+int mlx5_txq_ctrl_setup(struct rte_eth_dev *, struct mlx5_txq_ctrl *, uint16_t,
+			unsigned int, const struct rte_eth_txconf *);
 int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_txconf *);
 void mlx5_tx_queue_release(void *);
@@ -340,8 +340,9 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
 /* mlx5_mr.c */
 
 struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *);
-void txq_mp2mr_iter(struct rte_mempool *, void *);
-uint32_t txq_mp2mr_reg(struct txq *, struct rte_mempool *, unsigned int);
+void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *);
+uint32_t mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
+			    unsigned int);
 
 #ifndef NDEBUG
 /**
@@ -439,7 +440,7 @@ check_cqe(volatile struct mlx5_cqe *cqe,
  *   WQE address.
  */
 static inline uintptr_t *
-tx_mlx5_wqe(struct txq *txq, uint16_t ci)
+tx_mlx5_wqe(struct mlx5_txq_data *txq, uint16_t ci)
 {
 	ci &= ((1 << txq->wqe_n) - 1);
 	return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
@@ -454,7 +455,7 @@ tx_mlx5_wqe(struct txq *txq, uint16_t ci)
  *   Pointer to TX queue structure.
  */
 static __rte_always_inline void
-mlx5_tx_complete(struct txq *txq)
+mlx5_tx_complete(struct mlx5_txq_data *txq)
 {
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -559,7 +560,7 @@ mlx5_tx_mb2mp(struct rte_mbuf *buf)
  *   mr->lkey on success, (uint32_t)-1 on failure.
  */
 static __rte_always_inline uint32_t
-mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
+mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
 {
 	uint16_t i = txq->mr_cache_idx;
 	uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
@@ -582,7 +583,7 @@ mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
 		}
 	}
 	txq->mr_cache_idx = 0;
-	return txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+	return mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
 }
 
 /**
@@ -594,7 +595,7 @@ mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
  *   Pointer to the last WQE posted in the NIC.
  */
 static __rte_always_inline void
-mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
+mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe)
 {
 	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index b0c87bf..7e5ce6d 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -76,7 +76,7 @@
  *   Number of packets to be filled.
  */
 static inline void
-txq_wr_dseg_v(struct txq *txq, __m128i *dseg,
+txq_wr_dseg_v(struct mlx5_txq_data *txq, __m128i *dseg,
 	      struct rte_mbuf **pkts, unsigned int n)
 {
 	unsigned int pos;
@@ -151,8 +151,8 @@ txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Number of packets having same ol_flags.
  */
 static inline unsigned int
-txq_calc_offload(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
-		 uint8_t *cs_flags)
+txq_calc_offload(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
+		 uint16_t pkts_n, uint8_t *cs_flags)
 {
 	unsigned int pos;
 	const uint64_t ol_mask =
@@ -202,7 +202,8 @@ txq_calc_offload(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
  *   Number of packets successfully transmitted (<= pkts_n).
  */
 static uint16_t
-txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
+	      uint16_t pkts_n)
 {
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
@@ -332,7 +333,7 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Number of packets successfully transmitted (<= pkts_n).
  */
 static inline uint16_t
-txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
+txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	    uint8_t cs_flags)
 {
 	struct rte_mbuf **elts;
@@ -448,7 +449,7 @@ uint16_t
 mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
 		      uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t nb_tx = 0;
 
 	while (pkts_n > nb_tx) {
@@ -480,7 +481,7 @@ mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
 uint16_t
 mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t nb_tx = 0;
 
 	while (pkts_n > nb_tx) {
@@ -1304,7 +1305,7 @@ priv_check_raw_vec_tx_support(struct priv *priv)
 
 	/* All the configured queues should support. */
 	for (i = 0; i < priv->txqs_n; ++i) {
-		struct txq *txq = (*priv->txqs)[i];
+		struct mlx5_txq_data *txq = (*priv->txqs)[i];
 
 		if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) ||
 		    !(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 3de3af8..6b4772c 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -350,7 +350,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		tmp.rx_nombuf += rxq->stats.rx_nombuf;
 	}
 	for (i = 0; (i != priv->txqs_n); ++i) {
-		struct txq *txq = (*priv->txqs)[i];
+		struct mlx5_txq_data *txq = (*priv->txqs)[i];
 
 		if (txq == NULL)
 			continue;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1b45b4a..ee9f703 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -69,7 +69,7 @@
  *   Number of elements to allocate.
  */
 static void
-txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
+txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl, unsigned int elts_n)
 {
 	unsigned int i;
 
@@ -95,7 +95,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
  *   Pointer to TX queue structure.
  */
 static void
-txq_free_elts(struct txq_ctrl *txq_ctrl)
+txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 {
 	const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -132,7 +132,7 @@ txq_free_elts(struct txq_ctrl *txq_ctrl)
  *   Pointer to TX queue structure.
  */
 void
-txq_cleanup(struct txq_ctrl *txq_ctrl)
+mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl)
 {
 	size_t i;
 
@@ -162,7 +162,7 @@ txq_cleanup(struct txq_ctrl *txq_ctrl)
  *   0 on success, errno value on failure.
  */
 static inline int
-txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
+txq_setup(struct mlx5_txq_ctrl *tmpl, struct mlx5_txq_ctrl *txq_ctrl)
 {
 	struct mlx5dv_qp qp;
 	struct ibv_cq *ibcq = tmpl->cq;
@@ -225,12 +225,12 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
  *   0 on success, errno value on failure.
  */
 int
-txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
-	       uint16_t desc, unsigned int socket,
-	       const struct rte_eth_txconf *conf)
+mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
+		    uint16_t desc, unsigned int socket,
+		    const struct rte_eth_txconf *conf)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct txq_ctrl tmpl = {
+	struct mlx5_txq_ctrl tmpl = {
 		.priv = priv,
 		.socket = socket,
 	};
@@ -422,15 +422,15 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
 	}
 	/* Clean up txq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old txq just in case", (void *)txq_ctrl);
-	txq_cleanup(txq_ctrl);
+	mlx5_txq_cleanup(txq_ctrl);
 	*txq_ctrl = tmpl;
 	DEBUG("%p: txq updated with %p", (void *)txq_ctrl, (void *)&tmpl);
 	/* Pre-register known mempools. */
-	rte_mempool_walk(txq_mp2mr_iter, txq_ctrl);
+	rte_mempool_walk(mlx5_txq_mp2mr_iter, txq_ctrl);
 	assert(ret == 0);
 	return 0;
 error:
-	txq_cleanup(&tmpl);
+	mlx5_txq_cleanup(&tmpl);
 	assert(ret > 0);
 	return ret;
 }
@@ -457,8 +457,9 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		    unsigned int socket, const struct rte_eth_txconf *conf)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct txq *txq = (*priv->txqs)[idx];
-	struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+	struct mlx5_txq_data *txq = (*priv->txqs)[idx];
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
 	int ret;
 
 	if (mlx5_is_secondary())
@@ -494,7 +495,7 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			return -EEXIST;
 		}
 		(*priv->txqs)[idx] = NULL;
-		txq_cleanup(txq_ctrl);
+		mlx5_txq_cleanup(txq_ctrl);
 		/* Resize if txq size is changed. */
 		if (txq_ctrl->txq.elts_n != log2above(desc)) {
 			txq_ctrl = rte_realloc(txq_ctrl,
@@ -521,7 +522,7 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			return -ENOMEM;
 		}
 	}
-	ret = txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
+	ret = mlx5_txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
 	if (ret)
 		rte_free(txq_ctrl);
 	else {
@@ -543,8 +544,8 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 void
 mlx5_tx_queue_release(void *dpdk_txq)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
-	struct txq_ctrl *txq_ctrl;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
 	struct priv *priv;
 	unsigned int i;
 
@@ -553,7 +554,7 @@ mlx5_tx_queue_release(void *dpdk_txq)
 
 	if (txq == NULL)
 		return;
-	txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+	txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 	priv = txq_ctrl->priv;
 	priv_lock(priv);
 	for (i = 0; (i != priv->txqs_n); ++i)
@@ -563,7 +564,7 @@ mlx5_tx_queue_release(void *dpdk_txq)
 			(*priv->txqs)[i] = NULL;
 			break;
 		}
-	txq_cleanup(txq_ctrl);
+	mlx5_txq_cleanup(txq_ctrl);
 	rte_free(txq_ctrl);
 	priv_unlock(priv);
 }
@@ -588,8 +589,8 @@ priv_tx_uar_remap(struct priv *priv, int fd)
 	unsigned int pages_n = 0;
 	uintptr_t uar_va;
 	void *addr;
-	struct txq *txq;
-	struct txq_ctrl *txq_ctrl;
+	struct mlx5_txq_data *txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
 	int already_mapped;
 	size_t page_size = sysconf(_SC_PAGESIZE);
 
@@ -600,7 +601,7 @@ priv_tx_uar_remap(struct priv *priv, int fd)
 	 */
 	for (i = 0; i != priv->txqs_n; ++i) {
 		txq = (*priv->txqs)[i];
-		txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 		uar_va = (uintptr_t)txq_ctrl->txq.bf_reg;
 		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size);
 		already_mapped = 0;
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 05/30] net/mlx5: remove redundant started flag
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (29 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 04/30] net/mlx5: prefix Tx " Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 06/30] net/mlx5: verify all flows are been removed on close Nelio Laranjeiro
                   ` (24 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
This flag is already present in the Ethernet device.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.h         |  1 -
 drivers/net/mlx5/mlx5_flow.c    |  6 +++---
 drivers/net/mlx5/mlx5_rxq.c     |  2 +-
 drivers/net/mlx5/mlx5_trigger.c | 12 ------------
 drivers/net/mlx5/mlx5_txq.c     |  2 +-
 5 files changed, 5 insertions(+), 18 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1b511e1..3c58f7a 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -104,7 +104,6 @@ struct priv {
 	/* Device properties. */
 	uint16_t mtu; /* Configured MTU. */
 	uint8_t port; /* Physical port number. */
-	unsigned int started:1; /* Device started, flows enabled. */
 	unsigned int promisc_req:1; /* Promiscuous mode requested. */
 	unsigned int allmulti_req:1; /* All multicast mode requested. */
 	unsigned int hw_csum:1; /* Checksum offload is supported. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 99dbd8c..3504c43 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1052,7 +1052,7 @@ priv_flow_create_action_queue_drop(struct priv *priv,
 	++flow->ibv_attr->num_of_specs;
 	flow->offset += sizeof(struct ibv_flow_spec_action_drop);
 	rte_flow->ibv_attr = flow->ibv_attr;
-	if (!priv->started)
+	if (!priv->dev->data->dev_started)
 		return rte_flow;
 	rte_flow->qp = priv->flow_drop_queue->qp;
 	rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
@@ -1158,7 +1158,7 @@ priv_flow_create_action_queue(struct priv *priv,
 				   NULL, "cannot allocate QP");
 		goto error;
 	}
-	if (!priv->started)
+	if (!priv->dev->data->dev_started)
 		return rte_flow;
 	rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
 					     rte_flow->ibv_attr);
@@ -1618,7 +1618,7 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 	struct priv *priv = dev->data->dev_private;
 
 	priv_lock(priv);
-	if (priv->started) {
+	if (dev->data->dev_started) {
 		rte_flow_error_set(error, EBUSY,
 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 				   NULL,
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 6e520fb..683a4a7 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1150,7 +1150,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	if (rxq != NULL) {
 		DEBUG("%p: reusing already allocated queue index %u (%p)",
 		      (void *)dev, idx, (void *)rxq);
-		if (priv->started) {
+		if (dev->data->dev_started) {
 			priv_unlock(priv);
 			return -EEXIST;
 		}
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 0d0f340..212b4df 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -61,10 +61,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		return -E_RTE_SECONDARY;
 
 	priv_lock(priv);
-	if (priv->started) {
-		priv_unlock(priv);
-		return 0;
-	}
 	/* Update Rx/Tx callback. */
 	priv_dev_select_tx_function(priv, dev);
 	priv_dev_select_rx_function(priv, dev);
@@ -72,8 +68,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	err = priv_create_hash_rxqs(priv);
 	if (!err)
 		err = priv_rehash_flows(priv);
-	if (!err)
-		priv->started = 1;
 	else {
 		ERROR("%p: an error occurred while configuring hash RX queues:"
 		      " %s",
@@ -82,7 +76,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	}
 	err = priv_flow_start(priv);
 	if (err) {
-		priv->started = 0;
 		ERROR("%p: an error occurred while configuring flows:"
 		      " %s",
 		      (void *)priv, strerror(err));
@@ -125,10 +118,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 		return;
 
 	priv_lock(priv);
-	if (!priv->started) {
-		priv_unlock(priv);
-		return;
-	}
 	DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
@@ -136,6 +125,5 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_flow_stop(priv);
 	priv_rx_intr_vec_disable(priv);
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-	priv->started = 0;
 	priv_unlock(priv);
 }
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index ee9f703..f551f87 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -490,7 +490,7 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	if (txq != NULL) {
 		DEBUG("%p: reusing already allocated queue index %u (%p)",
 		      (void *)dev, idx, (void *)txq);
-		if (priv->started) {
+		if (dev->data->dev_started) {
 			priv_unlock(priv);
 			return -EEXIST;
 		}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 06/30] net/mlx5: verify all flows are been removed on close
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (30 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 05/30] net/mlx5: remove redundant started flag Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 07/30] net/mlx5: fix reta update can segfault Nelio Laranjeiro
                   ` (23 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Debug tools to verify all flows are be un-register from the NIC.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c      |  4 ++++
 drivers/net/mlx5/mlx5.h      |  1 +
 drivers/net/mlx5/mlx5_flow.c | 22 ++++++++++++++++++++++
 3 files changed, 27 insertions(+)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 7aea5a4..1397016 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -190,6 +190,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 {
 	struct priv *priv = mlx5_get_priv(dev);
 	unsigned int i;
+	int ret;
 
 	priv_lock(priv);
 	DEBUG("%p: closing device \"%s\"",
@@ -252,6 +253,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
 	priv_socket_uninit(priv);
+	ret = priv_flow_verify(priv);
+	if (ret)
+		WARN("%p: some flows still remain", (void *)priv);
 	priv_unlock(priv);
 	memset(priv, 0, sizeof(*priv));
 }
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 3c58f7a..c6563bd 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -290,6 +290,7 @@ int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
 int priv_flow_start(struct priv *);
 void priv_flow_stop(struct priv *);
 int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *);
+int priv_flow_verify(struct priv *);
 
 /* mlx5_socket.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 3504c43..193a90b 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1630,3 +1630,25 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 	priv_unlock(priv);
 	return 0;
 }
+
+/**
+ * Verify the flow list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of flows not released.
+ */
+int
+priv_flow_verify(struct priv *priv)
+{
+	struct rte_flow *flow;
+	int ret = 0;
+
+	TAILQ_FOREACH(flow, &priv->flows, next) {
+		DEBUG("%p: flow %p still referenced", (void *)priv,
+		      (void *)flow);
+		++ret;
+	}
+	return ret;
+}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 07/30] net/mlx5: fix reta update can segfault
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (31 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 06/30] net/mlx5: verify all flows are been removed on close Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 08/30] net/mlx5: fix rxqs vector support verification Nelio Laranjeiro
                   ` (22 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit, stable
Reta update needs to stop/start the port but stopping the port does not
disable the polling functions which may end in a segfault if a core is
polling the queue while the control thread is modifying it.
This patch changes the sequences to an order where such situation cannot
happen.
Fixes: aa13338faf5e ("net/mlx5: rebuild flows on updating RETA")
Cc: yskoh@mellanox.com
Cc: stable@dpdk.org
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
      | 9 +++++----
 drivers/net/mlx5/mlx5_trigger.c | 7 +++++++
 2 files changed, 12 insertions(+), 4 deletions(-)
 --git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index d3d2603..8942879 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -351,11 +351,12 @@ mlx5_dev_rss_reta_update(struct rte_eth_dev *dev,
 	struct priv *priv = dev->data->dev_private;
 
 	assert(!mlx5_is_secondary());
-	mlx5_dev_stop(dev);
 	priv_lock(priv);
 	ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
 	priv_unlock(priv);
-	if (ret)
-		return -ret;
-	return mlx5_dev_start(dev);
+	if (dev->data->dev_started) {
+		mlx5_dev_stop(dev);
+		mlx5_dev_start(dev);
+	}
+	return -ret;
 }
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 212b4df..eeb9585 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -30,6 +30,7 @@
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
+#include <unistd.h>
 
 #include <rte_ether.h>
 #include <rte_ethdev.h>
@@ -118,6 +119,12 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 		return;
 
 	priv_lock(priv);
+	dev->data->dev_started = 0;
+	/* Prevent crashes when queues are still in use. */
+	dev->rx_pkt_burst = removed_rx_burst;
+	dev->tx_pkt_burst = removed_tx_burst;
+	rte_wmb();
+	usleep(1000 * priv->rxqs_n);
 	DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 08/30] net/mlx5: fix rxqs vector support verification
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (32 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 07/30] net/mlx5: fix reta update can segfault Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 09/30] net/mlx5: add reference counter on memory region Nelio Laranjeiro
                   ` (21 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit, stable
The number of queues in DPDK does not means that the array of queue will be
totally filled, those information are uncorrelated.  The number of queues
is provided in the port configuration whereas the array is filled by
calling tx/rx_queue_setup().  As this number of queue is not increased or
decrease according to tx/rx_queue_setup() or tx/rx_queue_release(), PMD
must consider a queue may not be initialised in some position of the array.
Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86")
Cc: yskoh@mellanox.com
Cc: stable@dpdk.org
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx_vec_sse.c | 2 ++
 1 file changed, 2 insertions(+)
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index 7e5ce6d..6d337ec 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -1376,6 +1376,8 @@ priv_check_vec_rx_support(struct priv *priv)
 	for (i = 0; i < priv->rxqs_n; ++i) {
 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
 
+		if (!rxq)
+			continue;
 		if (rxq_check_vec_support(rxq) < 0)
 			break;
 	}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 09/30] net/mlx5: add reference counter on memory region
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (33 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 08/30] net/mlx5: fix rxqs vector support verification Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 10/30] net/mlx5: separate DPDK from Verbs Rx queue objects Nelio Laranjeiro
                   ` (20 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
This patch introduce the Memory region as a shared object where users
should get a reference to it by calling the priv_mr_get() or priv_mr_new()
to create the memory region.  This last one will register the memory pool
in the kernel driver and retrieve the associated memory region.
This should help to reduce the memory consumption cause by registering
multiple times the same memory pool.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c      |   3 +
 drivers/net/mlx5/mlx5.h      |   8 ++
 drivers/net/mlx5/mlx5_mr.c   | 223 ++++++++++++++++++++++++++++++-------------
 drivers/net/mlx5/mlx5_rxq.c  |  17 ++--
 drivers/net/mlx5/mlx5_rxtx.h |  58 +++++++----
 drivers/net/mlx5/mlx5_txq.c  |   8 +-
 6 files changed, 216 insertions(+), 101 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 1397016..b658b2b 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -256,6 +256,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	ret = priv_flow_verify(priv);
 	if (ret)
 		WARN("%p: some flows still remain", (void *)priv);
+	ret = priv_mr_verify(priv);
+	if (ret)
+		WARN("%p: some Memory Region still remain", (void *)priv);
 	priv_unlock(priv);
 	memset(priv, 0, sizeof(*priv));
 }
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c6563bd..f563722 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -146,6 +146,7 @@ struct priv {
 	unsigned int reta_idx_n; /* RETA index size. */
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
+	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
@@ -299,4 +300,11 @@ int priv_socket_uninit(struct priv *priv);
 void priv_socket_handle(struct priv *priv);
 int priv_socket_connect(struct priv *priv);
 
+/* mlx5_mr.c */
+
+struct mlx5_mr *priv_mr_new(struct priv *, struct rte_mempool *);
+struct mlx5_mr *priv_mr_get(struct priv *, struct rte_mempool *);
+int priv_mr_release(struct priv *, struct mlx5_mr *);
+int priv_mr_verify(struct priv *);
+
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 6199746..54fdc16 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -42,6 +42,7 @@
 #endif
 
 #include <rte_mempool.h>
+#include <rte_malloc.h>
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
@@ -111,54 +112,6 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
 }
 
 /**
- * Register mempool as a memory region.
- *
- * @param pd
- *   Pointer to protection domain.
- * @param mp
- *   Pointer to memory pool.
- *
- * @return
- *   Memory region pointer, NULL in case of error.
- */
-struct ibv_mr *
-mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
-{
-	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-	uintptr_t start;
-	uintptr_t end;
-	unsigned int i;
-
-	if (mlx5_check_mempool(mp, &start, &end) != 0) {
-		ERROR("mempool %p: not virtually contiguous",
-		      (void *)mp);
-		return NULL;
-	}
-
-	DEBUG("mempool %p area start=%p end=%p size=%zu",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	/* Round start and end to page boundary if found in memory segments. */
-	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
-		uintptr_t addr = (uintptr_t)ms[i].addr;
-		size_t len = ms[i].len;
-		unsigned int align = ms[i].hugepage_sz;
-
-		if ((start > addr) && (start < addr + len))
-			start = RTE_ALIGN_FLOOR(start, align);
-		if ((end > addr) && (end < addr + len))
-			end = RTE_ALIGN_CEIL(end, align);
-	}
-	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	return ibv_reg_mr(pd,
-			  (void *)start,
-			  end - start,
-			  IBV_ACCESS_LOCAL_WRITE);
-}
-
-/**
  * Register a Memory Region (MR) <-> Memory Pool (MP) association in
  * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
  *
@@ -172,44 +125,42 @@ mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
  *   Index of the next available entry.
  *
  * @return
- *   mr->lkey on success, (uint32_t)-1 on failure.
+ *   mr on success, NULL on failure.
  */
-uint32_t
+struct mlx5_mr*
 mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
 		   unsigned int idx)
 {
 	struct mlx5_txq_ctrl *txq_ctrl =
 		container_of(txq, struct mlx5_txq_ctrl, txq);
-	struct ibv_mr *mr;
+	struct mlx5_mr *mr;
 
 	/* Add a new entry, register MR first. */
 	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
 	      (void *)txq_ctrl, mp->name, (void *)mp);
-	mr = mlx5_mp2mr(txq_ctrl->priv->pd, mp);
+	mr = priv_mr_get(txq_ctrl->priv, mp);
+	if (mr == NULL)
+		mr = priv_mr_new(txq_ctrl->priv, mp);
 	if (unlikely(mr == NULL)) {
 		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
 		      (void *)txq_ctrl);
-		return (uint32_t)-1;
+		return NULL;
 	}
-	if (unlikely(idx == RTE_DIM(txq_ctrl->txq.mp2mr))) {
+	if (unlikely(idx == RTE_DIM(txq->mp2mr))) {
 		/* Table is full, remove oldest entry. */
 		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
 		      (void *)txq_ctrl);
 		--idx;
-		claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[0].mr));
-		memmove(&txq_ctrl->txq.mp2mr[0], &txq_ctrl->txq.mp2mr[1],
-			(sizeof(txq_ctrl->txq.mp2mr) -
-			 sizeof(txq_ctrl->txq.mp2mr[0])));
+		priv_mr_release(txq_ctrl->priv, txq->mp2mr[0]);
+		memmove(&txq->mp2mr[0], &txq->mp2mr[1],
+			(sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
 	}
 	/* Store the new entry. */
-	txq_ctrl->txq.mp2mr[idx].start = (uintptr_t)mr->addr;
-	txq_ctrl->txq.mp2mr[idx].end = (uintptr_t)mr->addr + mr->length;
-	txq_ctrl->txq.mp2mr[idx].mr = mr;
-	txq_ctrl->txq.mp2mr[idx].lkey = rte_cpu_to_be_32(mr->lkey);
+	txq_ctrl->txq.mp2mr[idx] = mr;
 	DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
 	      (void *)txq_ctrl, mp->name, (void *)mp,
-	      txq_ctrl->txq.mp2mr[idx].lkey);
-	return txq_ctrl->txq.mp2mr[idx].lkey;
+	      txq_ctrl->txq.mp2mr[idx]->lkey);
+	return mr;
 }
 
 struct txq_mp2mr_mbuf_check_data {
@@ -275,15 +226,149 @@ mlx5_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
 		return;
 	}
 	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-		struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;
-
-		if (unlikely(mr == NULL)) {
+		if (unlikely(txq_ctrl->txq.mp2mr[i] == NULL)) {
 			/* Unknown MP, add a new MR for it. */
 			break;
 		}
-		if (start >= (uintptr_t)mr->addr &&
-		    end <= (uintptr_t)mr->addr + mr->length)
+		if (start >= (uintptr_t)txq_ctrl->txq.mp2mr[i]->start &&
+		    end <= (uintptr_t)txq_ctrl->txq.mp2mr[i]->end)
 			return;
 	}
 	mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
 }
+
+/**
+ * Register a new memory region from the mempool and store it in the memory
+ * region list.
+ *
+ * @param  priv
+ *   Pointer to private structure.
+ * @param mp
+ *   Pointer to the memory pool to register.
+ * @return
+ *   The memory region on success.
+ */
+struct mlx5_mr*
+priv_mr_new(struct priv *priv, struct rte_mempool *mp)
+{
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+	uintptr_t start;
+	uintptr_t end;
+	unsigned int i;
+	struct mlx5_mr *mr;
+
+	mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id);
+	if (!mr) {
+		DEBUG("unable to configure MR, ibv_reg_mr() failed.");
+		return NULL;
+	}
+	if (mlx5_check_mempool(mp, &start, &end) != 0) {
+		ERROR("mempool %p: not virtually contiguous",
+		      (void *)mp);
+		return NULL;
+	}
+	DEBUG("mempool %p area start=%p end=%p size=%zu",
+	      (void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	/* Round start and end to page boundary if found in memory segments. */
+	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
+		uintptr_t addr = (uintptr_t)ms[i].addr;
+		size_t len = ms[i].len;
+		unsigned int align = ms[i].hugepage_sz;
+
+		if ((start > addr) && (start < addr + len))
+			start = RTE_ALIGN_FLOOR(start, align);
+		if ((end > addr) && (end < addr + len))
+			end = RTE_ALIGN_CEIL(end, align);
+	}
+	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
+	      (void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start,
+			    IBV_ACCESS_LOCAL_WRITE);
+	mr->mp = mp;
+	mr->lkey = rte_cpu_to_be_32(mr->mr->lkey);
+	mr->start = start;
+	mr->end = (uintptr_t)mr->mr->addr + mr->mr->length;
+	rte_atomic32_inc(&mr->refcnt);
+	DEBUG("%p: new Memory Region %p refcnt: %d", (void *)priv,
+	      (void *)mr, rte_atomic32_read(&mr->refcnt));
+	LIST_INSERT_HEAD(&priv->mr, mr, next);
+	return mr;
+}
+
+/**
+ * Search the memory region object in the memory region list.
+ *
+ * @param  priv
+ *   Pointer to private structure.
+ * @param mp
+ *   Pointer to the memory pool to register.
+ * @return
+ *   The memory region on success.
+ */
+struct mlx5_mr*
+priv_mr_get(struct priv *priv, struct rte_mempool *mp)
+{
+	struct mlx5_mr *mr;
+
+	assert(mp);
+	if (LIST_EMPTY(&priv->mr))
+		return NULL;
+	LIST_FOREACH(mr, &priv->mr, next) {
+		if (mr->mp == mp) {
+			rte_atomic32_inc(&mr->refcnt);
+			DEBUG("Memory Region %p refcnt: %d",
+			      (void *)mr, rte_atomic32_read(&mr->refcnt));
+			return mr;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * Release the memory region object.
+ *
+ * @param  mr
+ *   Pointer to memory region to release.
+ *
+ * @return
+ *   0 on success, errno on failure.
+ */
+int
+priv_mr_release(struct priv *priv, struct mlx5_mr *mr)
+{
+	(void)priv;
+	assert(mr);
+	DEBUG("Memory Region %p refcnt: %d",
+	      (void *)mr, rte_atomic32_read(&mr->refcnt));
+	if (rte_atomic32_dec_and_test(&mr->refcnt)) {
+		claim_zero(ibv_dereg_mr(mr->mr));
+		LIST_REMOVE(mr, next);
+		rte_free(mr);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the flow list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+priv_mr_verify(struct priv *priv)
+{
+	int ret = 0;
+	struct mlx5_mr *mr;
+
+	LIST_FOREACH(mr, &priv->mr, next) {
+		DEBUG("%p: mr %p still referenced", (void *)priv,
+		      (void *)mr);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 683a4a7..0d645ec 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -673,7 +673,7 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 			.addr =
 			    rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)),
 			.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
-			.lkey = rte_cpu_to_be_32(rxq_ctrl->mr->lkey),
+			.lkey = rxq_ctrl->mr->lkey,
 		};
 		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
@@ -767,7 +767,7 @@ mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 	if (rxq_ctrl->channel != NULL)
 		claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel));
 	if (rxq_ctrl->mr != NULL)
-		claim_zero(ibv_dereg_mr(rxq_ctrl->mr));
+		priv_mr_release(rxq_ctrl->priv, rxq_ctrl->mr);
 	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
 }
 
@@ -929,12 +929,15 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 		tmpl.rxq.csum_l2tun =
 			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
 	/* Use the entire RX mempool as the memory region. */
-	tmpl.mr = mlx5_mp2mr(priv->pd, mp);
+	tmpl.mr = priv_mr_get(priv, mp);
 	if (tmpl.mr == NULL) {
-		ret = EINVAL;
-		ERROR("%p: MR creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
+		tmpl.mr = priv_mr_new(priv, mp);
+		if (tmpl.mr == NULL) {
+			ret = EINVAL;
+			ERROR("%p: MR creation failure: %s",
+			      (void *)dev, strerror(ret));
+			goto error;
+		}
 	}
 	if (dev->data->dev_conf.intr_conf.rxq) {
 		tmpl.channel = ibv_create_comp_channel(priv->ctx);
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 6ffcfb7..89e60ea 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -36,6 +36,7 @@
 
 #include <stddef.h>
 #include <stdint.h>
+#include <sys/queue.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -52,6 +53,7 @@
 #include <rte_mempool.h>
 #include <rte_common.h>
 #include <rte_hexdump.h>
+#include <rte_atomic.h>
 
 #include "mlx5_utils.h"
 #include "mlx5.h"
@@ -80,6 +82,17 @@ struct mlx5_txq_stats {
 
 struct priv;
 
+/* Memory region queue object. */
+struct mlx5_mr {
+	LIST_ENTRY(mlx5_mr) next; /**< Pointer to the next element. */
+	rte_atomic32_t refcnt; /*<< Reference counter. */
+	uint32_t lkey; /*<< rte_cpu_to_be_32(mr->lkey) */
+	uintptr_t start; /* Start address of MR */
+	uintptr_t end; /* End address of MR */
+	struct ibv_mr *mr; /*<< Memory Region. */
+	struct rte_mempool *mp; /*<< Memory Pool. */
+};
+
 /* Compressed CQE context. */
 struct rxq_zip {
 	uint16_t ai; /* Array index. */
@@ -126,7 +139,7 @@ struct mlx5_rxq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_wq *wq; /* Work Queue. */
-	struct ibv_mr *mr; /* Memory Region (for mp). */
+	struct mlx5_mr *mr; /* Memory Region (for mp). */
 	struct ibv_comp_channel *channel;
 	unsigned int socket; /* CPU socket ID for allocations. */
 	struct mlx5_rxq_data rxq; /* Data path structure. */
@@ -252,6 +265,7 @@ struct mlx5_txq_data {
 	uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
 	uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
 	uint16_t inline_max_packet_sz; /* Max packet size for inlining. */
+	uint16_t mr_cache_idx; /* Index of last hit entry. */
 	uint32_t qp_num_8s; /* QP number shifted by 8. */
 	uint32_t flags; /* Flags for Tx Queue. */
 	volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
@@ -259,13 +273,7 @@ struct mlx5_txq_data {
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	volatile void *bf_reg; /* Blueflame register. */
-	struct {
-		uintptr_t start; /* Start address of MR */
-		uintptr_t end; /* End address of MR */
-		struct ibv_mr *mr; /* Memory Region (for mp). */
-		uint32_t lkey; /* rte_cpu_to_be_32(mr->lkey) */
-	} mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
-	uint16_t mr_cache_idx; /* Index of last hit entry. */
+	struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 } __rte_cache_aligned;
@@ -341,8 +349,8 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
 
 struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *);
 void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *);
-uint32_t mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
-			    unsigned int);
+struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
+				   unsigned int);
 
 #ifndef NDEBUG
 /**
@@ -564,26 +572,36 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
 {
 	uint16_t i = txq->mr_cache_idx;
 	uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
+	struct mlx5_mr *mr;
 
 	assert(i < RTE_DIM(txq->mp2mr));
-	if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
-		return txq->mp2mr[i].lkey;
+	if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr))
+		return txq->mp2mr[i]->lkey;
 	for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
-		if (unlikely(txq->mp2mr[i].mr == NULL)) {
+		if (unlikely(txq->mp2mr[i]->mr == NULL)) {
 			/* Unknown MP, add a new MR for it. */
 			break;
 		}
-		if (txq->mp2mr[i].start <= addr &&
-		    txq->mp2mr[i].end >= addr) {
-			assert(txq->mp2mr[i].lkey != (uint32_t)-1);
-			assert(rte_cpu_to_be_32(txq->mp2mr[i].mr->lkey) ==
-			       txq->mp2mr[i].lkey);
+		if (txq->mp2mr[i]->start <= addr &&
+		    txq->mp2mr[i]->end >= addr) {
+			assert(txq->mp2mr[i]->lkey != (uint32_t)-1);
+			assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey) ==
+			       txq->mp2mr[i]->lkey);
 			txq->mr_cache_idx = i;
-			return txq->mp2mr[i].lkey;
+			return txq->mp2mr[i]->lkey;
 		}
 	}
 	txq->mr_cache_idx = 0;
-	return mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+	mr = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+	/*
+	 * Request the reference to use in this queue, the original one is
+	 * kept by the control plane.
+	 */
+	if (mr) {
+		rte_atomic32_inc(&mr->refcnt);
+		return mr->lkey;
+	}
+	return (uint32_t)-1;
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index f551f87..1899850 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -142,11 +142,9 @@ mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl)
 		claim_zero(ibv_destroy_qp(txq_ctrl->qp));
 	if (txq_ctrl->cq != NULL)
 		claim_zero(ibv_destroy_cq(txq_ctrl->cq));
-	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-		if (txq_ctrl->txq.mp2mr[i].mr == NULL)
-			break;
-		claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[i].mr));
-	}
+	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i)
+		if (txq_ctrl->txq.mp2mr[i])
+			priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[i]);
 	memset(txq_ctrl, 0, sizeof(*txq_ctrl));
 }
 
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 10/30] net/mlx5: separate DPDK from Verbs Rx queue objects
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (34 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 09/30] net/mlx5: add reference counter on memory region Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 11/30] net/mlx5: separate DPDK from Verbs Tx " Nelio Laranjeiro
                   ` (19 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Move verbs object to their own functions to allocate/release them
independently from the DPDK queue.  At the same time a reference counter is
added to help in issues detections when the queue is being release but
still in use somewhere else (flows for instance).
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c      |   3 +
 drivers/net/mlx5/mlx5.h      |   2 +-
 drivers/net/mlx5/mlx5_flow.c | 101 +++----
 drivers/net/mlx5/mlx5_rxq.c  | 635 +++++++++++++++++++++++++++----------------
 drivers/net/mlx5/mlx5_rxtx.h |  25 +-
 drivers/net/mlx5/mlx5_vlan.c |   2 +-
 6 files changed, 462 insertions(+), 306 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index b658b2b..ed77351 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -253,6 +253,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
 	priv_socket_uninit(priv);
+	ret = mlx5_priv_rxq_ibv_verify(priv);
+	if (ret)
+		WARN("%p: some Verbs Rx queue still remain", (void *)priv);
 	ret = priv_flow_verify(priv);
 	if (ret)
 		WARN("%p: some flows still remain", (void *)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index f563722..48c0c8e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -147,6 +147,7 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
+	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
@@ -290,7 +291,6 @@ int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
 int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
 int priv_flow_start(struct priv *);
 void priv_flow_stop(struct priv *);
-int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *);
 int priv_flow_verify(struct priv *);
 
 /* mlx5_socket.c */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 193a90b..362ec91 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -95,11 +95,11 @@ struct rte_flow {
 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	struct ibv_wq *wq; /**< Verbs work queue. */
 	struct ibv_cq *cq; /**< Verbs completion queue. */
-	uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
-	struct mlx5_rxq_data *rxqs[]; /**< Pointer to the queues array. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< List of queues. */
+	uint16_t queues_n; /**< Number of queues in the list. */
 };
 
 /** Static initializer for items. */
@@ -1096,23 +1096,21 @@ priv_flow_create_action_queue(struct priv *priv,
 	assert(priv->pd);
 	assert(priv->ctx);
 	assert(!flow->actions.drop);
-	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
-			      sizeof(*rte_flow->rxqs) * flow->actions.queues_n,
-			      0);
+	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
 	if (!rte_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
 	for (i = 0; i < flow->actions.queues_n; ++i) {
-		struct mlx5_rxq_ctrl *rxq;
-
-		rxq = container_of((*priv->rxqs)[flow->actions.queues[i]],
-				   struct mlx5_rxq_ctrl, rxq);
-		wqs[i] = rxq->wq;
-		rte_flow->rxqs[i] = &rxq->rxq;
-		++rte_flow->rxqs_n;
-		rxq->rxq.mark |= flow->actions.mark;
+		struct mlx5_rxq_ibv *rxq_ibv =
+			mlx5_priv_rxq_ibv_get(priv, flow->actions.queues[i]);
+
+		wqs[i] = rxq_ibv->wq;
+		rte_flow->queues[i] = flow->actions.queues[i];
+		++rte_flow->queues_n;
+		(*priv->rxqs)[flow->actions.queues[i]]->mark |=
+			flow->actions.mark;
 	}
 	/* finalise indirection table. */
 	for (j = 0; i < wqs_n; ++i, ++j) {
@@ -1290,6 +1288,8 @@ static void
 priv_flow_destroy(struct priv *priv,
 		  struct rte_flow *flow)
 {
+	unsigned int i;
+
 	TAILQ_REMOVE(&priv->flows, flow, next);
 	if (flow->ibv_flow)
 		claim_zero(ibv_destroy_flow(flow->ibv_flow));
@@ -1299,37 +1299,33 @@ priv_flow_destroy(struct priv *priv,
 		claim_zero(ibv_destroy_qp(flow->qp));
 	if (flow->ind_table)
 		claim_zero(ibv_destroy_rwq_ind_table(flow->ind_table));
-	if (flow->mark) {
+	for (i = 0; i != flow->queues_n; ++i) {
 		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq;
-		uint32_t mark_n = 0;
-		uint32_t queue_n;
+		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[flow->queues[i]];
+		struct mlx5_rxq_ctrl *rxq_ctrl =
+			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
 
 		/*
 		 * To remove the mark from the queue, the queue must not be
 		 * present in any other marked flow (RSS or not).
 		 */
-		for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
-			rxq = flow->rxqs[queue_n];
-			for (tmp = TAILQ_FIRST(&priv->flows);
-			     tmp;
-			     tmp = TAILQ_NEXT(tmp, next)) {
-				uint32_t tqueue_n;
+		if (flow->mark) {
+			int mark = 0;
+
+			TAILQ_FOREACH(tmp, &priv->flows, next) {
+				unsigned int j;
 
 				if (tmp->drop)
 					continue;
-				for (tqueue_n = 0;
-				     tqueue_n < tmp->rxqs_n;
-				     ++tqueue_n) {
-					struct mlx5_rxq_data *trxq;
-
-					trxq = tmp->rxqs[tqueue_n];
-					if (rxq == trxq)
-						++mark_n;
-				}
+				if (!tmp->mark)
+					continue;
+				for (j = 0; (j != tmp->queues_n) && !mark; j++)
+					if (tmp->queues[j] == flow->queues[i])
+						mark = 1;
 			}
-			rxq->mark = !!mark_n;
+			rxq_data->mark = mark;
 		}
+		mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
 	}
 free:
 	rte_free(flow->ibv_attr);
@@ -1523,8 +1519,8 @@ priv_flow_stop(struct priv *priv)
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->rxqs_n; ++n)
-				flow->rxqs[n]->mark = 0;
+			for (n = 0; n < flow->queues_n; ++n)
+				(*priv->rxqs)[flow->queues[n]]->mark = 0;
 		}
 		DEBUG("Flow %p removed", (void *)flow);
 	}
@@ -1566,39 +1562,8 @@ priv_flow_start(struct priv *priv)
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->rxqs_n; ++n)
-				flow->rxqs[n]->mark = 1;
-		}
-	}
-	return 0;
-}
-
-/**
- * Verify if the Rx queue is used in a flow.
- *
- * @param priv
- *   Pointer to private structure.
- * @param rxq
- *   Pointer to the queue to search.
- *
- * @return
- *   Nonzero if the queue is used by a flow.
- */
-int
-priv_flow_rxq_in_use(struct priv *priv, struct mlx5_rxq_data *rxq)
-{
-	struct rte_flow *flow;
-
-	for (flow = TAILQ_FIRST(&priv->flows);
-	     flow;
-	     flow = TAILQ_NEXT(flow, next)) {
-		unsigned int n;
-
-		if (flow->drop)
-			continue;
-		for (n = 0; n < flow->rxqs_n; ++n) {
-			if (flow->rxqs[n] == rxq)
-				return 1;
+			for (n = 0; n < flow->queues_n; ++n)
+				(*priv->rxqs)[flow->queues[n]]->mark = 1;
 		}
 	}
 	return 0;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 0d645ec..89c2cdb 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -378,7 +378,7 @@ priv_create_hash_rxqs(struct priv *priv)
 
 		rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
 					struct mlx5_rxq_ctrl, rxq);
-		wqs[i] = rxq_ctrl->wq;
+		wqs[i] = rxq_ctrl->ibv->wq;
 	}
 	/* Get number of hash RX queues to configure. */
 	for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
@@ -645,8 +645,6 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 	/* Iterate on segments. */
 	for (i = 0; (i != elts_n); ++i) {
 		struct rte_mbuf *buf;
-		volatile struct mlx5_wqe_data_seg *scat =
-			&(*rxq_ctrl->rxq.wqes)[i];
 
 		buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
 		if (buf == NULL) {
@@ -667,21 +665,12 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 		DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
 		PKT_LEN(buf) = DATA_LEN(buf);
 		NB_SEGS(buf) = 1;
-		/* scat->addr must be able to store a pointer. */
-		assert(sizeof(scat->addr) >= sizeof(uintptr_t));
-		*scat = (struct mlx5_wqe_data_seg){
-			.addr =
-			    rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)),
-			.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
-			.lkey = rxq_ctrl->mr->lkey,
-		};
 		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
 	if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
 		struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
 		struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
 
-		assert(rxq->elts_n == rxq->cqe_n);
 		/* Initialize default rearm_data for vPMD. */
 		mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
 		rte_mbuf_refcnt_set(mbuf_init, 1);
@@ -759,76 +748,12 @@ void
 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	DEBUG("cleaning up %p", (void *)rxq_ctrl);
-	rxq_free_elts(rxq_ctrl);
-	if (rxq_ctrl->wq != NULL)
-		claim_zero(ibv_destroy_wq(rxq_ctrl->wq));
-	if (rxq_ctrl->cq != NULL)
-		claim_zero(ibv_destroy_cq(rxq_ctrl->cq));
-	if (rxq_ctrl->channel != NULL)
-		claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel));
-	if (rxq_ctrl->mr != NULL)
-		priv_mr_release(rxq_ctrl->priv, rxq_ctrl->mr);
+	if (rxq_ctrl->ibv)
+		mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
 	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
 }
 
 /**
- * Initialize RX queue.
- *
- * @param tmpl
- *   Pointer to RX queue control template.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static inline int
-rxq_setup(struct mlx5_rxq_ctrl *tmpl)
-{
-	struct ibv_cq *ibcq = tmpl->cq;
-	struct mlx5dv_cq cq_info;
-	struct mlx5dv_rwq rwq;
-	const uint16_t desc_n =
-		(1 << tmpl->rxq.elts_n) + tmpl->priv->rx_vec_en *
-		MLX5_VPMD_DESCS_PER_LOOP;
-	struct rte_mbuf *(*elts)[desc_n] =
-		rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket);
-	struct mlx5dv_obj obj;
-	int ret = 0;
-
-	obj.cq.in = ibcq;
-	obj.cq.out = &cq_info;
-	obj.rwq.in = tmpl->wq;
-	obj.rwq.out = &rwq;
-	ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
-	if (ret != 0) {
-		return -EINVAL;
-	}
-	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
-		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
-		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
-		return EINVAL;
-	}
-	if (elts == NULL)
-		return ENOMEM;
-	tmpl->rxq.rq_db = rwq.dbrec;
-	tmpl->rxq.cqe_n = log2above(cq_info.cqe_cnt);
-	tmpl->rxq.cq_ci = 0;
-	tmpl->rxq.rq_ci = 0;
-	tmpl->rxq.rq_pi = 0;
-	tmpl->rxq.cq_db = cq_info.dbrec;
-	tmpl->rxq.wqes =
-		(volatile struct mlx5_wqe_data_seg (*)[])
-		(uintptr_t)rwq.buf;
-	tmpl->rxq.cqes =
-		(volatile struct mlx5_cqe (*)[])
-		(uintptr_t)cq_info.buf;
-	tmpl->rxq.elts = elts;
-	tmpl->rxq.cq_uar = cq_info.cq_uar;
-	tmpl->rxq.cqn = cq_info.cqn;
-	tmpl->rxq.cq_arm_sn = 0;
-	return 0;
-}
-
-/**
  * Configure a RX queue.
  *
  * @param dev
@@ -853,29 +778,28 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 	       const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
 {
 	struct priv *priv = dev->data->dev_private;
+	const uint16_t desc_n =
+		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
 	struct mlx5_rxq_ctrl tmpl = {
 		.priv = priv,
 		.socket = socket,
 		.rxq = {
+			.elts = rte_calloc_socket("RXQ", 1,
+						  desc_n *
+						  sizeof(struct rte_mbuf *), 0,
+						  socket),
 			.elts_n = log2above(desc),
 			.mp = mp,
 			.rss_hash = priv->rxqs_n > 1,
 		},
 	};
-	struct ibv_wq_attr mod;
-	union {
-		struct ibv_cq_init_attr_ex cq;
-		struct ibv_wq_init_attr wq;
-		struct ibv_cq_ex cq_attr;
-	} attr;
 	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
-	unsigned int cqe_n = desc - 1;
-	const uint16_t desc_n =
-		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
 	struct rte_mbuf *(*elts)[desc_n] = NULL;
 	int ret = 0;
 
 	(void)conf; /* Thresholds configuration (ignored). */
+	if (dev->data->dev_conf.intr_conf.rxq)
+		tmpl.irq = 1;
 	/* Enable scattered packets support for this queue if necessary. */
 	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
 	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
@@ -928,77 +852,13 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 	if (priv->hw_csum_l2tun)
 		tmpl.rxq.csum_l2tun =
 			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
-	/* Use the entire RX mempool as the memory region. */
-	tmpl.mr = priv_mr_get(priv, mp);
-	if (tmpl.mr == NULL) {
-		tmpl.mr = priv_mr_new(priv, mp);
-		if (tmpl.mr == NULL) {
-			ret = EINVAL;
-			ERROR("%p: MR creation failure: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
-	if (dev->data->dev_conf.intr_conf.rxq) {
-		tmpl.channel = ibv_create_comp_channel(priv->ctx);
-		if (tmpl.channel == NULL) {
-			ret = ENOMEM;
-			ERROR("%p: Rx interrupt completion channel creation"
-			      " failure: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
-	attr.cq = (struct ibv_cq_init_attr_ex){
-		.comp_mask = 0,
-	};
-	if (priv->cqe_comp) {
-		attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS;
-		attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
-		/*
-		 * For vectorized Rx, it must not be doubled in order to
-		 * make cq_ci and rq_ci aligned.
-		 */
-		if (rxq_check_vec_support(&tmpl.rxq) < 0)
-			cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */
-	}
-	tmpl.cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl.channel, 0);
-	if (tmpl.cq == NULL) {
-		ret = ENOMEM;
-		ERROR("%p: CQ creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	DEBUG("priv->device_attr.max_qp_wr is %d",
-	      priv->device_attr.orig_attr.max_qp_wr);
-	DEBUG("priv->device_attr.max_sge is %d",
-	      priv->device_attr.orig_attr.max_sge);
 	/* Configure VLAN stripping. */
 	tmpl.rxq.vlan_strip = (priv->hw_vlan_strip &&
 			       !!dev->data->dev_conf.rxmode.hw_vlan_strip);
-	attr.wq = (struct ibv_wq_init_attr){
-		.wq_context = NULL, /* Could be useful in the future. */
-		.wq_type = IBV_WQT_RQ,
-		/* Max number of outstanding WRs. */
-		.max_wr = desc >> tmpl.rxq.sges_n,
-		/* Max number of scatter/gather elements in a WR. */
-		.max_sge = 1 << tmpl.rxq.sges_n,
-		.pd = priv->pd,
-		.cq = tmpl.cq,
-		.comp_mask =
-			IBV_WQ_FLAGS_CVLAN_STRIPPING |
-			0,
-		.create_flags = (tmpl.rxq.vlan_strip ?
-				 IBV_WQ_FLAGS_CVLAN_STRIPPING :
-				 0),
-	};
 	/* By default, FCS (CRC) is stripped by hardware. */
 	if (dev->data->dev_conf.rxmode.hw_strip_crc) {
 		tmpl.rxq.crc_present = 0;
 	} else if (priv->hw_fcs_strip) {
-		/* Ask HW/Verbs to leave CRC in place when supported. */
-		attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
-		attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
 		tmpl.rxq.crc_present = 1;
 	} else {
 		WARN("%p: CRC stripping has been disabled but will still"
@@ -1013,60 +873,21 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 	      tmpl.rxq.crc_present ? "disabled" : "enabled",
 	      tmpl.rxq.crc_present << 2);
 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
-	if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING"))
+	if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING")) {
 		; /* Nothing else to do. */
-	else if (priv->hw_padding) {
+	} else if (priv->hw_padding) {
 		INFO("%p: enabling packet padding on queue %p",
 		     (void *)dev, (void *)rxq_ctrl);
-		attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
-		attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
-	} else
+	} else {
 		WARN("%p: packet padding has been requested but is not"
 		     " supported, make sure MLNX_OFED and firmware are"
 		     " up to date",
 		     (void *)dev);
-#endif
-
-	tmpl.wq = ibv_create_wq(priv->ctx, &attr.wq);
-	if (tmpl.wq == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: WQ creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	/*
-	 * Make sure number of WRs*SGEs match expectations since a queue
-	 * cannot allocate more than "desc" buffers.
-	 */
-	if (((int)attr.wq.max_wr != (desc >> tmpl.rxq.sges_n)) ||
-	    ((int)attr.wq.max_sge != (1 << tmpl.rxq.sges_n))) {
-		ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
-		      (void *)dev,
-		      (desc >> tmpl.rxq.sges_n), (1 << tmpl.rxq.sges_n),
-		      attr.wq.max_wr, attr.wq.max_sge);
-		ret = EINVAL;
-		goto error;
 	}
+#endif
 	/* Save port ID. */
 	tmpl.rxq.port_id = dev->data->port_id;
 	DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
-	/* Change queue state to ready. */
-	mod = (struct ibv_wq_attr){
-		.attr_mask = IBV_WQ_ATTR_STATE,
-		.wq_state = IBV_WQS_RDY,
-	};
-	ret = ibv_modify_wq(tmpl.wq, &mod);
-	if (ret) {
-		ERROR("%p: WQ state to IBV_WQS_RDY failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = rxq_setup(&tmpl);
-	if (ret) {
-		ERROR("%p: cannot initialize RX queue structure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
 	ret = rxq_alloc_elts(&tmpl, desc);
 	if (ret) {
 		ERROR("%p: RXQ allocation failed: %s",
@@ -1085,17 +906,12 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
 	rte_free(tmpl.rxq.elts);
 	tmpl.rxq.elts = elts;
 	*rxq_ctrl = tmpl;
-	/* Update doorbell counter. */
-	rxq_ctrl->rxq.rq_ci = desc >> rxq_ctrl->rxq.sges_n;
-	rte_wmb();
-	*rxq_ctrl->rxq.rq_db = rte_cpu_to_be_32(rxq_ctrl->rxq.rq_ci);
 	DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
 	assert(ret == 0);
 	return 0;
 error:
-	elts = tmpl.rxq.elts;
+	rte_free(tmpl.rxq.elts);
 	mlx5_rxq_cleanup(&tmpl);
-	rte_free(elts);
 	assert(ret > 0);
 	return ret;
 }
@@ -1185,14 +1001,20 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		}
 	}
 	ret = rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
-	if (ret)
+	if (ret) {
 		rte_free(rxq_ctrl);
-	else {
-		rxq_ctrl->rxq.stats.idx = idx;
-		DEBUG("%p: adding RX queue %p to list",
-		      (void *)dev, (void *)rxq_ctrl);
-		(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
+		goto out;
+	}
+	rxq_ctrl->rxq.stats.idx = idx;
+	DEBUG("%p: adding RX queue %p to list",
+	      (void *)dev, (void *)rxq_ctrl);
+	(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
+	rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, idx);
+	if (!rxq_ctrl->ibv) {
+		ret = EAGAIN;
+		goto out;
 	}
+out:
 	priv_unlock(priv);
 	return -ret;
 }
@@ -1219,7 +1041,7 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
 	priv_lock(priv);
-	if (priv_flow_rxq_in_use(priv, rxq))
+	if (!mlx5_priv_rxq_ibv_releasable(priv, rxq_ctrl->ibv))
 		rte_panic("Rx queue %p is still used by a flow and cannot be"
 			  " removed\n", (void *)rxq_ctrl);
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -1264,15 +1086,14 @@ priv_rx_intr_vec_enable(struct priv *priv)
 	}
 	intr_handle->type = RTE_INTR_HANDLE_EXT;
 	for (i = 0; i != n; ++i) {
-		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
-		struct mlx5_rxq_ctrl *rxq_ctrl =
-			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+		/* This rxq ibv must not be released in this function. */
+		struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i);
 		int fd;
 		int flags;
 		int rc;
 
 		/* Skip queues that cannot request interrupts. */
-		if (!rxq || !rxq_ctrl->channel) {
+		if (!rxq_ibv || !rxq_ibv->channel) {
 			/* Use invalid intr_vec[] index to disable entry. */
 			intr_handle->intr_vec[i] =
 				RTE_INTR_VEC_RXTX_OFFSET +
@@ -1286,7 +1107,7 @@ priv_rx_intr_vec_enable(struct priv *priv)
 			priv_rx_intr_vec_disable(priv);
 			return -1;
 		}
-		fd = rxq_ctrl->channel->fd;
+		fd = rxq_ibv->channel->fd;
 		flags = fcntl(fd, F_GETFL);
 		rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
 		if (rc < 0) {
@@ -1316,7 +1137,27 @@ void
 priv_rx_intr_vec_disable(struct priv *priv)
 {
 	struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+	unsigned int i;
+	unsigned int rxqs_n = priv->rxqs_n;
+	unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
+
+	if (!priv->dev->data->dev_conf.intr_conf.rxq)
+		return;
+	for (i = 0; i != n; ++i) {
+		struct mlx5_rxq_ctrl *rxq_ctrl;
+		struct mlx5_rxq_data *rxq_data;
 
+		if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
+		    RTE_MAX_RXTX_INTR_VEC_ID)
+			continue;
+		/**
+		 * Need to access directly the queue to release the reference
+		 * kept in priv_rx_intr_vec_enable().
+		 */
+		rxq_data = (*priv->rxqs)[i];
+		rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+		mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
+	}
 	rte_intr_free_epoll_fd(intr_handle);
 	free(intr_handle->intr_vec);
 	intr_handle->nb_efd = 0;
@@ -1363,16 +1204,30 @@ int
 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
-	struct mlx5_rxq_ctrl *rxq_ctrl =
-		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq_data;
+	struct mlx5_rxq_ctrl *rxq_ctrl;
 	int ret = 0;
 
-	if (!rxq || !rxq_ctrl->channel) {
+	priv_lock(priv);
+	rxq_data = (*priv->rxqs)[rx_queue_id];
+	if (!rxq_data) {
 		ret = EINVAL;
-	} else {
-		mlx5_arm_cq(rxq, rxq->cq_arm_sn);
+		goto exit;
+	}
+	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+	if (rxq_ctrl->irq) {
+		struct mlx5_rxq_ibv *rxq_ibv;
+
+		rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
+		if (!rxq_ibv) {
+			ret = EINVAL;
+			goto exit;
+		}
+		mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
+		mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
 	}
+exit:
+	priv_unlock(priv);
 	if (ret)
 		WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
 	return -ret;
@@ -1393,25 +1248,345 @@ int
 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
-	struct mlx5_rxq_ctrl *rxq_ctrl =
-		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq_data;
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+	struct mlx5_rxq_ibv *rxq_ibv = NULL;
 	struct ibv_cq *ev_cq;
 	void *ev_ctx;
-	int ret;
+	int ret = 0;
 
-	if (!rxq || !rxq_ctrl->channel) {
+	priv_lock(priv);
+	rxq_data = (*priv->rxqs)[rx_queue_id];
+	if (!rxq_data) {
 		ret = EINVAL;
-	} else {
-		ret = ibv_get_cq_event(rxq_ctrl->cq->channel, &ev_cq, &ev_ctx);
-		rxq->cq_arm_sn++;
-		if (ret || ev_cq != rxq_ctrl->cq)
-			ret = EINVAL;
+		goto exit;
+	}
+	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+	if (!rxq_ctrl->irq)
+		goto exit;
+	rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
+	if (!rxq_ibv) {
+		ret = EINVAL;
+		goto exit;
+	}
+	ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
+	if (ret || ev_cq != rxq_ibv->cq) {
+		ret = EINVAL;
+		goto exit;
 	}
+	rxq_data->cq_arm_sn++;
+	ibv_ack_cq_events(rxq_ibv->cq, 1);
+exit:
+	if (rxq_ibv)
+		mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
+	priv_unlock(priv);
 	if (ret)
 		WARN("unable to disable interrupt on rx queue %d",
 		     rx_queue_id);
-	else
-		ibv_ack_cq_events(rxq_ctrl->cq, 1);
 	return -ret;
 }
+
+/**
+ * Create the Rx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object initialised if it can be created.
+ */
+struct mlx5_rxq_ibv*
+mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+	struct ibv_wq_attr mod;
+	union {
+		struct ibv_cq_init_attr_ex cq;
+		struct ibv_wq_init_attr wq;
+		struct ibv_cq_ex cq_attr;
+	} attr;
+	unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
+	struct mlx5_rxq_ibv *tmpl;
+	struct mlx5dv_cq cq_info;
+	struct mlx5dv_rwq rwq;
+	unsigned int i;
+	int ret = 0;
+	struct mlx5dv_obj obj;
+
+	assert(rxq_data);
+	assert(!rxq_ctrl->ibv);
+	tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
+				 rxq_ctrl->socket);
+	if (!tmpl) {
+		ERROR("%p: cannot allocate verbs resources",
+		       (void *)rxq_ctrl);
+		goto error;
+	}
+	tmpl->rxq_ctrl = rxq_ctrl;
+	/* Use the entire RX mempool as the memory region. */
+	tmpl->mr = priv_mr_get(priv, rxq_data->mp);
+	if (!tmpl->mr) {
+		tmpl->mr = priv_mr_new(priv, rxq_data->mp);
+		if (!tmpl->mr) {
+			ERROR("%p: MR creation failure", (void *)rxq_ctrl);
+			goto error;
+		}
+	}
+	if (rxq_ctrl->irq) {
+		tmpl->channel = ibv_create_comp_channel(priv->ctx);
+		if (!tmpl->channel) {
+			ERROR("%p: Comp Channel creation failure",
+			      (void *)rxq_ctrl);
+			goto error;
+		}
+	}
+	attr.cq = (struct ibv_cq_init_attr_ex){
+		.comp_mask = 0,
+	};
+	if (priv->cqe_comp) {
+		attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS;
+		attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
+		/*
+		 * For vectorized Rx, it must not be doubled in order to
+		 * make cq_ci and rq_ci aligned.
+		 */
+		if (rxq_check_vec_support(rxq_data) < 0)
+			cqe_n *= 2;
+	}
+	tmpl->cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl->channel, 0);
+	if (tmpl->cq == NULL) {
+		ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
+		goto error;
+	}
+	DEBUG("priv->device_attr.max_qp_wr is %d",
+	      priv->device_attr.orig_attr.max_qp_wr);
+	DEBUG("priv->device_attr.max_sge is %d",
+	      priv->device_attr.orig_attr.max_sge);
+	attr.wq = (struct ibv_wq_init_attr){
+		.wq_context = NULL, /* Could be useful in the future. */
+		.wq_type = IBV_WQT_RQ,
+		/* Max number of outstanding WRs. */
+		.max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
+		/* Max number of scatter/gather elements in a WR. */
+		.max_sge = 1 << rxq_data->sges_n,
+		.pd = priv->pd,
+		.cq = tmpl->cq,
+		.comp_mask =
+			IBV_WQ_FLAGS_CVLAN_STRIPPING |
+			0,
+		.create_flags = (rxq_data->vlan_strip ?
+				 IBV_WQ_FLAGS_CVLAN_STRIPPING :
+				 0),
+	};
+	/* By default, FCS (CRC) is stripped by hardware. */
+	if (rxq_data->crc_present) {
+		attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
+		attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+	}
+#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
+	if (priv->hw_padding) {
+		attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
+		attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+	}
+#endif
+	tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq);
+	if (tmpl->wq == NULL) {
+		ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
+		goto error;
+	}
+	/*
+	 * Make sure number of WRs*SGEs match expectations since a queue
+	 * cannot allocate more than "desc" buffers.
+	 */
+	if (((int)attr.wq.max_wr !=
+	     ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
+	    ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
+		ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
+		      (void *)rxq_ctrl,
+		      ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
+		      (1 << rxq_data->sges_n),
+		      attr.wq.max_wr, attr.wq.max_sge);
+		goto error;
+	}
+	/* Change queue state to ready. */
+	mod = (struct ibv_wq_attr){
+		.attr_mask = IBV_WQ_ATTR_STATE,
+		.wq_state = IBV_WQS_RDY,
+	};
+	ret = ibv_modify_wq(tmpl->wq, &mod);
+	if (ret) {
+		ERROR("%p: WQ state to IBV_WQS_RDY failed",
+		      (void *)rxq_ctrl);
+		goto error;
+	}
+	obj.cq.in = tmpl->cq;
+	obj.cq.out = &cq_info;
+	obj.rwq.in = tmpl->wq;
+	obj.rwq.out = &rwq;
+	ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
+	if (ret != 0)
+		goto error;
+	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
+		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
+		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
+		goto error;
+	}
+	/* Fill the rings. */
+	rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
+		(uintptr_t)rwq.buf;
+	for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
+		struct rte_mbuf *buf = (*rxq_data->elts)[i];
+		volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
+
+		/* scat->addr must be able to store a pointer. */
+		assert(sizeof(scat->addr) >= sizeof(uintptr_t));
+		*scat = (struct mlx5_wqe_data_seg){
+			.addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+								  uintptr_t)),
+			.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
+			.lkey = tmpl->mr->lkey,
+		};
+	}
+	rxq_data->rq_db = rwq.dbrec;
+	rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
+	rxq_data->cq_ci = 0;
+	rxq_data->rq_ci = 0;
+	rxq_data->rq_pi = 0;
+	rxq_data->zip = (struct rxq_zip){
+		.ai = 0,
+	};
+	rxq_data->cq_db = cq_info.dbrec;
+	rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
+	/* Update doorbell counter. */
+	rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
+	rte_wmb();
+	*rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
+	DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
+	rte_atomic32_inc(&tmpl->refcnt);
+	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+	LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
+	return tmpl;
+error:
+	if (tmpl->wq)
+		claim_zero(ibv_destroy_wq(tmpl->wq));
+	if (tmpl->cq)
+		claim_zero(ibv_destroy_cq(tmpl->cq));
+	if (tmpl->channel)
+		claim_zero(ibv_destroy_comp_channel(tmpl->channel));
+	if (tmpl->mr)
+		priv_mr_release(priv, tmpl->mr);
+	return NULL;
+}
+
+/**
+ * Get an Rx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object if it exists.
+ */
+struct mlx5_rxq_ibv*
+mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+
+	if (idx >= priv->rxqs_n)
+		return NULL;
+	if (!rxq_data)
+		return NULL;
+	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+	if (rxq_ctrl->ibv) {
+		priv_mr_get(priv, rxq_data->mp);
+		rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
+		DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
+		      (void *)rxq_ctrl->ibv,
+		      rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
+	}
+	return rxq_ctrl->ibv;
+}
+
+/**
+ * Release an Rx verbs queue object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rxq_ibv
+ *   Verbs Rx queue object.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
+{
+	int ret;
+
+	assert(rxq_ibv);
+	assert(rxq_ibv->wq);
+	assert(rxq_ibv->cq);
+	assert(rxq_ibv->mr);
+	ret = priv_mr_release(priv, rxq_ibv->mr);
+	if (!ret)
+		rxq_ibv->mr = NULL;
+	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
+	if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
+		rxq_free_elts(rxq_ibv->rxq_ctrl);
+		claim_zero(ibv_destroy_wq(rxq_ibv->wq));
+		claim_zero(ibv_destroy_cq(rxq_ibv->cq));
+		if (rxq_ibv->channel)
+			claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel));
+		LIST_REMOVE(rxq_ibv, next);
+		rte_free(rxq_ibv);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the Verbs Rx queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_rxq_ibv_verify(struct priv *priv)
+{
+	int ret = 0;
+	struct mlx5_rxq_ibv *rxq_ibv;
+
+	LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
+		DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv,
+		      (void *)rxq_ibv);
+		++ret;
+	}
+	return ret;
+}
+
+/**
+ * Return true if a single reference exists on the object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rxq_ibv
+ *   Verbs Rx queue object.
+ */
+int
+mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
+{
+	(void)priv;
+	assert(rxq_ibv);
+	return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 89e60ea..812fbb1 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -134,15 +134,24 @@ struct mlx5_rxq_data {
 	uint8_t cq_arm_sn; /* CQ arm seq number. */
 } __rte_cache_aligned;
 
-/* RX queue control descriptor. */
-struct mlx5_rxq_ctrl {
-	struct priv *priv; /* Back pointer to private data. */
+/* Verbs Rx queue elements. */
+struct mlx5_rxq_ibv {
+	LIST_ENTRY(mlx5_rxq_ibv) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct mlx5_rxq_ctrl *rxq_ctrl; /* Back pointer to parent. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_wq *wq; /* Work Queue. */
-	struct mlx5_mr *mr; /* Memory Region (for mp). */
 	struct ibv_comp_channel *channel;
-	unsigned int socket; /* CPU socket ID for allocations. */
+	struct mlx5_mr *mr; /* Memory Region (for mp). */
+};
+
+/* RX queue control descriptor. */
+struct mlx5_rxq_ctrl {
+	struct priv *priv; /* Back pointer to private data. */
+	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
 	struct mlx5_rxq_data rxq; /* Data path structure. */
+	unsigned int socket; /* CPU socket ID for allocations. */
+	unsigned int irq:1; /* Whether IRQ is enabled. */
 };
 
 /* Hash RX queue types. */
@@ -310,6 +319,11 @@ int priv_rx_intr_vec_enable(struct priv *priv);
 void priv_rx_intr_vec_disable(struct priv *priv);
 int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
+struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_new(struct priv *, uint16_t);
+struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_get(struct priv *, uint16_t);
+int mlx5_priv_rxq_ibv_release(struct priv *, struct mlx5_rxq_ibv *);
+int mlx5_priv_rxq_ibv_releasable(struct priv *, struct mlx5_rxq_ibv *);
+int mlx5_priv_rxq_ibv_verify(struct priv *);
 
 /* mlx5_txq.c */
 
@@ -347,7 +361,6 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
 
 /* mlx5_mr.c */
 
-struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *);
 void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *);
 struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
 				   unsigned int);
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 0d91591..d707984 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -154,7 +154,7 @@ priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
 		.flags = vlan_offloads,
 	};
 
-	err = ibv_modify_wq(rxq_ctrl->wq, &mod);
+	err = ibv_modify_wq(rxq_ctrl->ibv->wq, &mod);
 	if (err) {
 		ERROR("%p: failed to modified stripping mode: %s",
 		      (void *)priv, strerror(err));
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 11/30] net/mlx5: separate DPDK from Verbs Tx queue objects
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (35 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 10/30] net/mlx5: separate DPDK from Verbs Rx queue objects Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 12/30] net/mlx5: add reference counter on DPDK Tx queues Nelio Laranjeiro
                   ` (18 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Move verbs object to their own functions to allocate/release them
independently from the DPDK queue.  At the same time a reference counter is
added to help in issues detections when the queue is being release but
still in use somewhere else (flows for instance).
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c      |   3 +
 drivers/net/mlx5/mlx5.h      |   1 +
 drivers/net/mlx5/mlx5_rxtx.h |  18 +-
 drivers/net/mlx5/mlx5_txq.c  | 479 ++++++++++++++++++++++++++-----------------
 4 files changed, 308 insertions(+), 193 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index ed77351..bd160c5 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -256,6 +256,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	ret = mlx5_priv_rxq_ibv_verify(priv);
 	if (ret)
 		WARN("%p: some Verbs Rx queue still remain", (void *)priv);
+	ret = mlx5_priv_txq_ibv_verify(priv);
+	if (ret)
+		WARN("%p: some Verbs Tx queue still remain", (void *)priv);
 	ret = priv_flow_verify(priv);
 	if (ret)
 		WARN("%p: some flows still remain", (void *)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 48c0c8e..67d2edb 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -148,6 +148,7 @@ struct priv {
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
+	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 812fbb1..30ad363 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -287,12 +287,21 @@ struct mlx5_txq_data {
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 } __rte_cache_aligned;
 
+/* Verbs Rx queue elements. */
+struct mlx5_txq_ibv {
+	LIST_ENTRY(mlx5_txq_ibv) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct ibv_cq *cq; /* Completion Queue. */
+	struct ibv_qp *qp; /* Queue Pair. */
+};
+
 /* TX queue control descriptor. */
 struct mlx5_txq_ctrl {
 	struct priv *priv; /* Back pointer to private data. */
-	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_qp *qp; /* Queue Pair. */
 	unsigned int socket; /* CPU socket ID for allocations. */
+	unsigned int max_inline_data; /* Max inline data. */
+	unsigned int max_tso_header; /* Max TSO header size. */
+	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 };
@@ -334,6 +343,11 @@ int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_txconf *);
 void mlx5_tx_queue_release(void *);
 int priv_tx_uar_remap(struct priv *priv, int fd);
+struct mlx5_txq_ibv *mlx5_priv_txq_ibv_new(struct priv *, uint16_t);
+struct mlx5_txq_ibv *mlx5_priv_txq_ibv_get(struct priv *, uint16_t);
+int mlx5_priv_txq_ibv_release(struct priv *, struct mlx5_txq_ibv *);
+int mlx5_priv_txq_ibv_releasable(struct priv *, struct mlx5_txq_ibv *);
+int mlx5_priv_txq_ibv_verify(struct priv *);
 
 /* mlx5_rxtx.c */
 
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1899850..3a6ef39 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -75,13 +75,6 @@ txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl, unsigned int elts_n)
 
 	for (i = 0; (i != elts_n); ++i)
 		(*txq_ctrl->txq.elts)[i] = NULL;
-	for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) {
-		volatile struct mlx5_wqe64 *wqe =
-			(volatile struct mlx5_wqe64 *)
-			txq_ctrl->txq.wqes + i;
-
-		memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe));
-	}
 	DEBUG("%p: allocated and configured %u WRs", (void *)txq_ctrl, elts_n);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
@@ -138,74 +131,15 @@ mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl)
 
 	DEBUG("cleaning up %p", (void *)txq_ctrl);
 	txq_free_elts(txq_ctrl);
-	if (txq_ctrl->qp != NULL)
-		claim_zero(ibv_destroy_qp(txq_ctrl->qp));
-	if (txq_ctrl->cq != NULL)
-		claim_zero(ibv_destroy_cq(txq_ctrl->cq));
 	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i)
 		if (txq_ctrl->txq.mp2mr[i])
 			priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[i]);
+	if (txq_ctrl->ibv)
+		mlx5_priv_txq_ibv_release(txq_ctrl->priv, txq_ctrl->ibv);
 	memset(txq_ctrl, 0, sizeof(*txq_ctrl));
 }
 
 /**
- * Initialize TX queue.
- *
- * @param tmpl
- *   Pointer to TX queue control template.
- * @param txq_ctrl
- *   Pointer to TX queue control.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static inline int
-txq_setup(struct mlx5_txq_ctrl *tmpl, struct mlx5_txq_ctrl *txq_ctrl)
-{
-	struct mlx5dv_qp qp;
-	struct ibv_cq *ibcq = tmpl->cq;
-	struct mlx5dv_cq cq_info;
-	struct mlx5dv_obj obj;
-	int ret = 0;
-
-	qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;
-	obj.cq.in = ibcq;
-	obj.cq.out = &cq_info;
-	obj.qp.in = tmpl->qp;
-	obj.qp.out = &qp;
-	ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
-	if (ret != 0) {
-		return -EINVAL;
-	}
-	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
-		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
-		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
-		return EINVAL;
-	}
-	tmpl->txq.cqe_n = log2above(cq_info.cqe_cnt);
-	tmpl->txq.qp_num_8s = tmpl->qp->qp_num << 8;
-	tmpl->txq.wqes = qp.sq.buf;
-	tmpl->txq.wqe_n = log2above(qp.sq.wqe_cnt);
-	tmpl->txq.qp_db = &qp.dbrec[MLX5_SND_DBR];
-	tmpl->txq.bf_reg = qp.bf.reg;
-	tmpl->txq.cq_db = cq_info.dbrec;
-	tmpl->txq.cqes =
-		(volatile struct mlx5_cqe (*)[])
-		(uintptr_t)cq_info.buf;
-	tmpl->txq.elts =
-		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])
-		((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
-	if (qp.comp_mask | MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
-		tmpl->uar_mmap_offset = qp.uar_mmap_offset;
-	} else {
-		ERROR("Failed to retrieve UAR info, invalid libmlx5.so version");
-		return EINVAL;
-	}
-
-	return 0;
-}
-
-/**
  * Configure a TX queue.
  *
  * @param dev
@@ -232,22 +166,13 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 		.priv = priv,
 		.socket = socket,
 	};
-	union {
-		struct ibv_qp_init_attr_ex init;
-		struct ibv_cq_init_attr_ex cq;
-		struct ibv_qp_attr mod;
-		struct ibv_cq_ex cq_attr;
-	} attr;
-	unsigned int cqe_n;
 	const unsigned int max_tso_inline = ((MLX5_MAX_TSO_HEADER +
 					     (RTE_CACHE_LINE_SIZE - 1)) /
 					      RTE_CACHE_LINE_SIZE);
-	int ret = 0;
 
 	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
-		ret = ENOTSUP;
 		ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
-		goto error;
+		return ENOTSUP;
 	}
 	tmpl.txq.flags = conf->txq_flags;
 	assert(desc > MLX5_TX_COMP_THRESH);
@@ -255,53 +180,10 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 	if (priv->mps == MLX5_MPW_ENHANCED)
 		tmpl.txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
 	/* MRs will be registered in mp2mr[] later. */
-	attr.cq = (struct ibv_cq_init_attr_ex){
-		.comp_mask = 0,
-	};
-	cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ?
-		((desc / MLX5_TX_COMP_THRESH) - 1) : 1;
-	if (priv->mps == MLX5_MPW_ENHANCED)
-		cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
-	tmpl.cq = ibv_create_cq(priv->ctx,
-				cqe_n,
-				NULL, NULL, 0);
-	if (tmpl.cq == NULL) {
-		ret = ENOMEM;
-		ERROR("%p: CQ creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
 	DEBUG("priv->device_attr.max_qp_wr is %d",
 	      priv->device_attr.orig_attr.max_qp_wr);
 	DEBUG("priv->device_attr.max_sge is %d",
 	      priv->device_attr.orig_attr.max_sge);
-	attr.init = (struct ibv_qp_init_attr_ex){
-		/* CQ to be associated with the send queue. */
-		.send_cq = tmpl.cq,
-		/* CQ to be associated with the receive queue. */
-		.recv_cq = tmpl.cq,
-		.cap = {
-			/* Max number of outstanding WRs. */
-			.max_send_wr =
-			 ((priv->device_attr.orig_attr.max_qp_wr < desc) ?
-			   priv->device_attr.orig_attr.max_qp_wr :
-			   desc),
-			/*
-			 * Max number of scatter/gather elements in a WR,
-			 * must be 1 to prevent libmlx5 from trying to affect
-			 * too much memory. TX gather is not impacted by the
-			 * priv->device_attr.max_sge limit and will still work
-			 * properly.
-			 */
-			.max_send_sge = 1,
-		},
-		.qp_type = IBV_QPT_RAW_PACKET,
-		/* Do *NOT* enable this, completions events are managed per
-		 * TX burst. */
-		.sq_sig_all = 0,
-		.pd = priv->pd,
-		.comp_mask = IBV_QP_INIT_ATTR_PD,
-	};
 	if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
 		unsigned int ds_cnt;
 
@@ -317,7 +199,7 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 			/* To minimize the size of data set, avoid requesting
 			 * too large WQ.
 			 */
-			attr.init.cap.max_inline_data =
+			tmpl.max_inline_data =
 				((RTE_MIN(priv->txq_inline,
 					  priv->inline_max_packet_sz) +
 				  (RTE_CACHE_LINE_SIZE - 1)) /
@@ -329,12 +211,12 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 			 * Adjust inline value as Verbs aggregates
 			 * tso_inline and txq_inline fields.
 			 */
-			attr.init.cap.max_inline_data = inline_diff > 0 ?
-							inline_diff *
-							RTE_CACHE_LINE_SIZE :
-							0;
+			tmpl.max_inline_data = inline_diff > 0 ?
+					       inline_diff *
+					       RTE_CACHE_LINE_SIZE :
+					       0;
 		} else {
-			attr.init.cap.max_inline_data =
+			tmpl.max_inline_data =
 				tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
 		}
 		/*
@@ -345,8 +227,7 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 		 *	WQE ETH  (1 DS)
 		 *	Inline part (N DS)
 		 */
-		ds_cnt = 2 +
-			(attr.init.cap.max_inline_data / MLX5_WQE_DWORD_SIZE);
+		ds_cnt = 2 + (tmpl.max_inline_data / MLX5_WQE_DWORD_SIZE);
 		if (ds_cnt > MLX5_DSEG_MAX) {
 			unsigned int max_inline = (MLX5_DSEG_MAX - 2) *
 						   MLX5_WQE_DWORD_SIZE;
@@ -357,67 +238,20 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 			     "the maximum possible: %d\n",
 			     priv->txq_inline, max_inline);
 			tmpl.txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
-			attr.init.cap.max_inline_data = max_inline;
 		}
 	}
 	if (priv->tso) {
-		attr.init.max_tso_header =
-			max_tso_inline * RTE_CACHE_LINE_SIZE;
-		attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;
+		tmpl.max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE;
 		tmpl.txq.max_inline = RTE_MAX(tmpl.txq.max_inline,
 					      max_tso_inline);
 		tmpl.txq.tso_en = 1;
 	}
 	if (priv->tunnel_en)
 		tmpl.txq.tunnel_en = 1;
-	tmpl.qp = ibv_create_qp_ex(priv->ctx, &attr.init);
-	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	DEBUG("TX queue capabilities: max_send_wr=%u, max_send_sge=%u,"
-	      " max_inline_data=%u",
-	      attr.init.cap.max_send_wr,
-	      attr.init.cap.max_send_sge,
-	      attr.init.cap.max_inline_data);
-	attr.mod = (struct ibv_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	ret = ibv_modify_qp(tmpl.qp, &attr.mod,
-			    (IBV_QP_STATE | IBV_QP_PORT));
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = txq_setup(&tmpl, txq_ctrl);
-	if (ret) {
-		ERROR("%p: cannot initialize TX queue structure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
+	tmpl.txq.elts =
+		(struct rte_mbuf *(*)[1 << tmpl.txq.elts_n])
+		((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
 	txq_alloc_elts(&tmpl, desc);
-	attr.mod = (struct ibv_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	attr.mod.qp_state = IBV_QPS_RTS;
-	ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTS failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
 	/* Clean up txq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old txq just in case", (void *)txq_ctrl);
 	mlx5_txq_cleanup(txq_ctrl);
@@ -425,12 +259,7 @@ mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
 	DEBUG("%p: txq updated with %p", (void *)txq_ctrl, (void *)&tmpl);
 	/* Pre-register known mempools. */
 	rte_mempool_walk(mlx5_txq_mp2mr_iter, txq_ctrl);
-	assert(ret == 0);
 	return 0;
-error:
-	mlx5_txq_cleanup(&tmpl);
-	assert(ret > 0);
-	return ret;
 }
 
 /**
@@ -521,14 +350,22 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		}
 	}
 	ret = mlx5_txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
-	if (ret)
+	if (ret) {
 		rte_free(txq_ctrl);
-	else {
-		txq_ctrl->txq.stats.idx = idx;
-		DEBUG("%p: adding TX queue %p to list",
-		      (void *)dev, (void *)txq_ctrl);
-		(*priv->txqs)[idx] = &txq_ctrl->txq;
+		goto out;
 	}
+	txq_ctrl->txq.stats.idx = idx;
+	DEBUG("%p: adding TX queue %p to list",
+	      (void *)dev, (void *)txq_ctrl);
+	(*priv->txqs)[idx] = &txq_ctrl->txq;
+	txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, idx);
+	if (!txq_ctrl->ibv) {
+		ret = EAGAIN;
+		goto out;
+	}
+	/* Update send callback. */
+	priv_dev_select_tx_function(priv, priv->dev);
+out:
 	priv_unlock(priv);
 	return -ret;
 }
@@ -622,3 +459,263 @@ priv_tx_uar_remap(struct priv *priv, int fd)
 	}
 	return 0;
 }
+
+/**
+ * Create the Tx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object initialised if it can be created.
+ */
+struct mlx5_txq_ibv*
+mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq_data, struct mlx5_txq_ctrl, txq);
+	struct mlx5_txq_ibv tmpl;
+	struct mlx5_txq_ibv *txq_ibv;
+	union {
+		struct ibv_qp_init_attr_ex init;
+		struct ibv_cq_init_attr_ex cq;
+		struct ibv_qp_attr mod;
+		struct ibv_cq_ex cq_attr;
+	} attr;
+	unsigned int cqe_n;
+	struct mlx5dv_qp qp;
+	struct mlx5dv_cq cq_info;
+	struct mlx5dv_obj obj;
+	const int desc = 1 << txq_data->elts_n;
+	int ret = 0;
+
+	assert(txq_data);
+	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
+		ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
+		goto error;
+	}
+	memset(&tmpl, 0, sizeof(struct mlx5_txq_ibv));
+	/* MRs will be registered in mp2mr[] later. */
+	attr.cq = (struct ibv_cq_init_attr_ex){
+		.comp_mask = 0,
+	};
+	cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ?
+		((desc / MLX5_TX_COMP_THRESH) - 1) : 1;
+	if (priv->mps == MLX5_MPW_ENHANCED)
+		cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
+	tmpl.cq = ibv_create_cq(priv->ctx, cqe_n, NULL, NULL, 0);
+	if (tmpl.cq == NULL) {
+		ERROR("%p: CQ creation failure", (void *)txq_ctrl);
+		goto error;
+	}
+	attr.init = (struct ibv_qp_init_attr_ex){
+		/* CQ to be associated with the send queue. */
+		.send_cq = tmpl.cq,
+		/* CQ to be associated with the receive queue. */
+		.recv_cq = tmpl.cq,
+		.cap = {
+			/* Max number of outstanding WRs. */
+			.max_send_wr =
+				((priv->device_attr.orig_attr.max_qp_wr <
+				  desc) ?
+				 priv->device_attr.orig_attr.max_qp_wr :
+				 desc),
+			/*
+			 * Max number of scatter/gather elements in a WR,
+			 * must be 1 to prevent libmlx5 from trying to affect
+			 * too much memory. TX gather is not impacted by the
+			 * priv->device_attr.max_sge limit and will still work
+			 * properly.
+			 */
+			.max_send_sge = 1,
+		},
+		.qp_type = IBV_QPT_RAW_PACKET,
+		/*
+		 * Do *NOT* enable this, completions events are managed per
+		 * Tx burst.
+		 */
+		.sq_sig_all = 0,
+		.pd = priv->pd,
+		.comp_mask = IBV_QP_INIT_ATTR_PD,
+	};
+	if (txq_data->inline_en)
+		attr.init.cap.max_inline_data = txq_ctrl->max_inline_data;
+	if (txq_data->tso_en) {
+		attr.init.max_tso_header = txq_ctrl->max_tso_header;
+		attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;
+	}
+	tmpl.qp = ibv_create_qp_ex(priv->ctx, &attr.init);
+	if (tmpl.qp == NULL) {
+		ERROR("%p: QP creation failure", (void *)txq_ctrl);
+		goto error;
+	}
+	attr.mod = (struct ibv_qp_attr){
+		/* Move the QP to this state. */
+		.qp_state = IBV_QPS_INIT,
+		/* Primary port number. */
+		.port_num = priv->port
+	};
+	ret = ibv_modify_qp(tmpl.qp, &attr.mod, (IBV_QP_STATE | IBV_QP_PORT));
+	if (ret) {
+		ERROR("%p: QP state to IBV_QPS_INIT failed", (void *)txq_ctrl);
+		goto error;
+	}
+	attr.mod = (struct ibv_qp_attr){
+		.qp_state = IBV_QPS_RTR
+	};
+	ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
+	if (ret) {
+		ERROR("%p: QP state to IBV_QPS_RTR failed", (void *)txq_ctrl);
+		goto error;
+	}
+	attr.mod.qp_state = IBV_QPS_RTS;
+	ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
+	if (ret) {
+		ERROR("%p: QP state to IBV_QPS_RTS failed", (void *)txq_ctrl);
+		goto error;
+	}
+	txq_ibv = rte_calloc_socket(__func__, 1, sizeof(struct mlx5_txq_ibv), 0,
+				    txq_ctrl->socket);
+	if (!txq_ibv) {
+		ERROR("%p: cannot allocate memory", (void *)txq_ctrl);
+		goto error;
+	}
+	obj.cq.in = tmpl.cq;
+	obj.cq.out = &cq_info;
+	obj.qp.in = tmpl.qp;
+	obj.qp.out = &qp;
+	ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
+	if (ret != 0)
+		goto error;
+	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
+		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
+		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
+		goto error;
+	}
+	txq_data->cqe_n = log2above(cq_info.cqe_cnt);
+	txq_data->qp_num_8s = tmpl.qp->qp_num << 8;
+	txq_data->wqes = qp.sq.buf;
+	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
+	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
+	txq_data->bf_reg = qp.bf.reg;
+	txq_data->cq_db = cq_info.dbrec;
+	txq_data->cqes =
+		(volatile struct mlx5_cqe (*)[])
+		(uintptr_t)cq_info.buf;
+	txq_data->cq_ci = 0;
+	txq_data->cq_pi = 0;
+	txq_data->wqe_ci = 0;
+	txq_data->wqe_pi = 0;
+	txq_ibv->qp = tmpl.qp;
+	txq_ibv->cq = tmpl.cq;
+	rte_atomic32_inc(&txq_ibv->refcnt);
+	DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
+	      (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt));
+	LIST_INSERT_HEAD(&priv->txqsibv, txq_ibv, next);
+	return txq_ibv;
+error:
+	if (tmpl.cq)
+		claim_zero(ibv_destroy_cq(tmpl.cq));
+	if (tmpl.qp)
+		claim_zero(ibv_destroy_qp(tmpl.qp));
+	return NULL;
+}
+
+/**
+ * Get an Tx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object if it exists.
+ */
+struct mlx5_txq_ibv*
+mlx5_priv_txq_ibv_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_ctrl *txq_ctrl;
+
+	if (idx >= priv->txqs_n)
+		return NULL;
+	if (!(*priv->txqs)[idx])
+		return NULL;
+	txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+	if (txq_ctrl->ibv) {
+		rte_atomic32_inc(&txq_ctrl->ibv->refcnt);
+		DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
+		      (void *)txq_ctrl->ibv,
+		      rte_atomic32_read(&txq_ctrl->ibv->refcnt));
+	}
+	return txq_ctrl->ibv;
+}
+
+/**
+ * Release an Tx verbs queue object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param txq_ibv
+ *   Verbs Tx queue object.
+ *
+ * @return
+ *   0 on success, errno on failure.
+ */
+int
+mlx5_priv_txq_ibv_release(struct priv *priv, struct mlx5_txq_ibv *txq_ibv)
+{
+	(void)priv;
+	assert(txq_ibv);
+	DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
+	      (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt));
+	if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) {
+		claim_zero(ibv_destroy_qp(txq_ibv->qp));
+		claim_zero(ibv_destroy_cq(txq_ibv->cq));
+		LIST_REMOVE(txq_ibv, next);
+		rte_free(txq_ibv);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Return true if a single reference exists on the object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param txq_ibv
+ *   Verbs Tx queue object.
+ */
+int
+mlx5_priv_txq_ibv_releasable(struct priv *priv, struct mlx5_txq_ibv *txq_ibv)
+{
+	(void)priv;
+	assert(txq_ibv);
+	return (rte_atomic32_read(&txq_ibv->refcnt) == 1);
+}
+
+/**
+ * Verify the Verbs Tx queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_txq_ibv_verify(struct priv *priv)
+{
+	int ret = 0;
+	struct mlx5_txq_ibv *txq_ibv;
+
+	LIST_FOREACH(txq_ibv, &priv->txqsibv, next) {
+		DEBUG("%p: Verbs Tx queue %p still referenced", (void *)priv,
+		      (void *)txq_ibv);
+		++ret;
+	}
+	return ret;
+}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 12/30] net/mlx5: add reference counter on DPDK Tx queues
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (36 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 11/30] net/mlx5: separate DPDK from Verbs Tx " Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 13/30] net/mlx5: add reference counter on DPDK Rx queues Nelio Laranjeiro
                   ` (17 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Use the same design for DPDK queue as for Verbs queue for symmetry, this
also helps in fixing some issues like the DPDK release queue API which is
not expected to fail.  With such design, the queue is released when the
reference counters reaches 0.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         |  16 +-
 drivers/net/mlx5/mlx5.h         |   1 +
 drivers/net/mlx5/mlx5_mr.c      |  73 ++++---
 drivers/net/mlx5/mlx5_rxtx.h    |  17 +-
 drivers/net/mlx5/mlx5_trigger.c |  57 ++++-
 drivers/net/mlx5/mlx5_txq.c     | 460 +++++++++++++++++++++++-----------------
 6 files changed, 383 insertions(+), 241 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index bd160c5..276401d 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -225,17 +225,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->txqs != NULL) {
 		/* XXX race condition if mlx5_tx_burst() is still running. */
 		usleep(1000);
-		for (i = 0; (i != priv->txqs_n); ++i) {
-			struct mlx5_txq_data *txq = (*priv->txqs)[i];
-			struct mlx5_txq_ctrl *txq_ctrl;
-
-			if (txq == NULL)
-				continue;
-			txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
-			(*priv->txqs)[i] = NULL;
-			mlx5_txq_cleanup(txq_ctrl);
-			rte_free(txq_ctrl);
-		}
+		for (i = 0; (i != priv->txqs_n); ++i)
+			mlx5_priv_txq_release(priv, i);
 		priv->txqs_n = 0;
 		priv->txqs = NULL;
 	}
@@ -259,6 +250,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	ret = mlx5_priv_txq_ibv_verify(priv);
 	if (ret)
 		WARN("%p: some Verbs Tx queue still remain", (void *)priv);
+	ret = mlx5_priv_txq_verify(priv);
+	if (ret)
+		WARN("%p: some Tx Queues still remain", (void *)priv);
 	ret = priv_flow_verify(priv);
 	if (ret)
 		WARN("%p: some flows still remain", (void *)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 67d2edb..b20c39c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -148,6 +148,7 @@ struct priv {
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
+	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 54fdc16..6b29eed 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -117,6 +117,8 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
  *
  * This function should only be called by txq_mp2mr().
  *
+ * @param priv
+ *   Pointer to private structure.
  * @param txq
  *   Pointer to TX queue structure.
  * @param[in] mp
@@ -128,8 +130,8 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
  *   mr on success, NULL on failure.
  */
 struct mlx5_mr*
-mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
-		   unsigned int idx)
+priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *txq,
+		   struct rte_mempool *mp, unsigned int idx)
 {
 	struct mlx5_txq_ctrl *txq_ctrl =
 		container_of(txq, struct mlx5_txq_ctrl, txq);
@@ -138,9 +140,9 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
 	/* Add a new entry, register MR first. */
 	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
 	      (void *)txq_ctrl, mp->name, (void *)mp);
-	mr = priv_mr_get(txq_ctrl->priv, mp);
+	mr = priv_mr_get(priv, mp);
 	if (mr == NULL)
-		mr = priv_mr_new(txq_ctrl->priv, mp);
+		mr = priv_mr_new(priv, mp);
 	if (unlikely(mr == NULL)) {
 		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
 		      (void *)txq_ctrl);
@@ -151,7 +153,7 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
 		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
 		      (void *)txq_ctrl);
 		--idx;
-		priv_mr_release(txq_ctrl->priv, txq->mp2mr[0]);
+		priv_mr_release(priv, txq->mp2mr[0]);
 		memmove(&txq->mp2mr[0], &txq->mp2mr[1],
 			(sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
 	}
@@ -163,7 +165,37 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
 	return mr;
 }
 
-struct txq_mp2mr_mbuf_check_data {
+/**
+ * Register a Memory Region (MR) <-> Memory Pool (MP) association in
+ * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
+ *
+ * This function should only be called by txq_mp2mr().
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param[in] mp
+ *   Memory Pool for which a Memory Region lkey must be returned.
+ * @param idx
+ *   Index of the next available entry.
+ *
+ * @return
+ *   mr on success, NULL on failure.
+ */
+struct mlx5_mr*
+mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
+		   unsigned int idx)
+{
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
+	struct mlx5_mr *mr;
+
+	priv_lock(txq_ctrl->priv);
+	mr = priv_txq_mp2mr_reg(txq_ctrl->priv, txq, mp, idx);
+	priv_unlock(txq_ctrl->priv);
+	return mr;
+}
+
+struct mlx5_mp2mr_mbuf_check_data {
 	int ret;
 };
 
@@ -185,7 +217,7 @@ static void
 txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
 	uint32_t index __rte_unused)
 {
-	struct txq_mp2mr_mbuf_check_data *data = arg;
+	struct mlx5_mp2mr_mbuf_check_data *data = arg;
 	struct rte_mbuf *buf = obj;
 
 	/*
@@ -206,35 +238,24 @@ txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
  *   Pointer to TX queue structure.
  */
 void
-mlx5_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
+mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg)
 {
-	struct mlx5_txq_ctrl *txq_ctrl = arg;
-	struct txq_mp2mr_mbuf_check_data data = {
+	struct priv *priv = (struct priv *)arg;
+	struct mlx5_mp2mr_mbuf_check_data data = {
 		.ret = 0,
 	};
-	uintptr_t start;
-	uintptr_t end;
-	unsigned int i;
+	struct mlx5_mr *mr;
 
 	/* Register mempool only if the first element looks like a mbuf. */
 	if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
 			data.ret == -1)
 		return;
-	if (mlx5_check_mempool(mp, &start, &end) != 0) {
-		ERROR("mempool %p: not virtually contiguous",
-		      (void *)mp);
+	mr = priv_mr_get(priv, mp);
+	if (mr) {
+		priv_mr_release(priv, mr);
 		return;
 	}
-	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-		if (unlikely(txq_ctrl->txq.mp2mr[i] == NULL)) {
-			/* Unknown MP, add a new MR for it. */
-			break;
-		}
-		if (start >= (uintptr_t)txq_ctrl->txq.mp2mr[i]->start &&
-		    end <= (uintptr_t)txq_ctrl->txq.mp2mr[i]->end)
-			return;
-	}
-	mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
+	priv_mr_new(priv, mp);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 30ad363..69344f6 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -297,6 +297,8 @@ struct mlx5_txq_ibv {
 
 /* TX queue control descriptor. */
 struct mlx5_txq_ctrl {
+	LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
 	struct priv *priv; /* Back pointer to private data. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	unsigned int max_inline_data; /* Max inline data. */
@@ -336,9 +338,6 @@ int mlx5_priv_rxq_ibv_verify(struct priv *);
 
 /* mlx5_txq.c */
 
-void mlx5_txq_cleanup(struct mlx5_txq_ctrl *);
-int mlx5_txq_ctrl_setup(struct rte_eth_dev *, struct mlx5_txq_ctrl *, uint16_t,
-			unsigned int, const struct rte_eth_txconf *);
 int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_txconf *);
 void mlx5_tx_queue_release(void *);
@@ -348,6 +347,14 @@ struct mlx5_txq_ibv *mlx5_priv_txq_ibv_get(struct priv *, uint16_t);
 int mlx5_priv_txq_ibv_release(struct priv *, struct mlx5_txq_ibv *);
 int mlx5_priv_txq_ibv_releasable(struct priv *, struct mlx5_txq_ibv *);
 int mlx5_priv_txq_ibv_verify(struct priv *);
+struct mlx5_txq_ctrl *mlx5_priv_txq_new(struct priv *, uint16_t,
+					uint16_t, unsigned int,
+					const struct rte_eth_txconf *);
+struct mlx5_txq_ctrl *mlx5_priv_txq_get(struct priv *, uint16_t);
+int mlx5_priv_txq_release(struct priv *, uint16_t);
+int mlx5_priv_txq_releasable(struct priv *, uint16_t);
+int mlx5_priv_txq_verify(struct priv *);
+void txq_alloc_elts(struct mlx5_txq_ctrl *);
 
 /* mlx5_rxtx.c */
 
@@ -375,7 +382,9 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
 
 /* mlx5_mr.c */
 
-void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *);
+void mlx5_mp2mr_iter(struct rte_mempool *, void *);
+struct mlx5_mr *priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *,
+				   struct rte_mempool *, unsigned int);
 struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
 				   unsigned int);
 
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index eeb9585..7a12768 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -41,6 +41,44 @@
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 
+static void
+priv_txq_stop(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; i != priv->txqs_n; ++i)
+		mlx5_priv_txq_release(priv, i);
+}
+
+static int
+priv_txq_start(struct priv *priv)
+{
+	unsigned int i;
+	int ret = 0;
+
+	/* Add memory regions to Tx queues. */
+	for (i = 0; i != priv->txqs_n; ++i) {
+		unsigned int idx = 0;
+		struct mlx5_mr *mr;
+		struct mlx5_txq_ctrl *txq_ctrl = mlx5_priv_txq_get(priv, i);
+
+		if (!txq_ctrl)
+			continue;
+		LIST_FOREACH(mr, &priv->mr, next)
+			priv_txq_mp2mr_reg(priv, &txq_ctrl->txq, mr->mp, idx++);
+		txq_alloc_elts(txq_ctrl);
+		txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, i);
+		if (!txq_ctrl->ibv) {
+			ret = ENOMEM;
+			goto error;
+		}
+	}
+	return -ret;
+error:
+	priv_txq_stop(priv);
+	return -ret;
+}
+
 /**
  * DPDK callback to start the device.
  *
@@ -56,6 +94,7 @@ int
 mlx5_dev_start(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
+	struct mlx5_mr *mr = NULL;
 	int err;
 
 	if (mlx5_is_secondary())
@@ -63,9 +102,17 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 
 	priv_lock(priv);
 	/* Update Rx/Tx callback. */
-	priv_dev_select_tx_function(priv, dev);
 	priv_dev_select_rx_function(priv, dev);
 	DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
+	rte_mempool_walk(mlx5_mp2mr_iter, priv);
+	err = priv_txq_start(priv);
+	if (err) {
+		ERROR("%p: TXQ allocation failed: %s",
+		      (void *)dev, strerror(err));
+		goto error;
+	}
+	/* Update send callback. */
+	priv_dev_select_tx_function(priv, dev);
 	err = priv_create_hash_rxqs(priv);
 	if (!err)
 		err = priv_rehash_flows(priv);
@@ -94,10 +141,13 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	return 0;
 error:
 	/* Rollback. */
+	LIST_FOREACH(mr, &priv->mr, next)
+		priv_mr_release(priv, mr);
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv);
+	priv_txq_stop(priv);
 	priv_unlock(priv);
 	return -err;
 }
@@ -114,6 +164,7 @@ void
 mlx5_dev_stop(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
+	struct mlx5_mr *mr;
 
 	if (mlx5_is_secondary())
 		return;
@@ -131,6 +182,10 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv);
 	priv_rx_intr_vec_disable(priv);
+	priv_txq_stop(priv);
+	LIST_FOREACH(mr, &priv->mr, next) {
+		priv_mr_release(priv, mr);
+	}
 	priv_dev_interrupt_handler_uninstall(priv, dev);
 	priv_unlock(priv);
 }
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 3a6ef39..e7c4ff6 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -65,12 +65,11 @@
  *
  * @param txq_ctrl
  *   Pointer to TX queue structure.
- * @param elts_n
- *   Number of elements to allocate.
  */
-static void
-txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl, unsigned int elts_n)
+void
+txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl)
 {
+	const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n;
 	unsigned int i;
 
 	for (i = 0; (i != elts_n); ++i)
@@ -117,152 +116,6 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 }
 
 /**
- * Clean up a TX queue.
- *
- * Destroy objects, free allocated memory and reset the structure for reuse.
- *
- * @param txq_ctrl
- *   Pointer to TX queue structure.
- */
-void
-mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl)
-{
-	size_t i;
-
-	DEBUG("cleaning up %p", (void *)txq_ctrl);
-	txq_free_elts(txq_ctrl);
-	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i)
-		if (txq_ctrl->txq.mp2mr[i])
-			priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[i]);
-	if (txq_ctrl->ibv)
-		mlx5_priv_txq_ibv_release(txq_ctrl->priv, txq_ctrl->ibv);
-	memset(txq_ctrl, 0, sizeof(*txq_ctrl));
-}
-
-/**
- * Configure a TX queue.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param txq_ctrl
- *   Pointer to TX queue structure.
- * @param desc
- *   Number of descriptors to configure in queue.
- * @param socket
- *   NUMA socket on which memory must be allocated.
- * @param[in] conf
- *   Thresholds parameters.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-mlx5_txq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_txq_ctrl *txq_ctrl,
-		    uint16_t desc, unsigned int socket,
-		    const struct rte_eth_txconf *conf)
-{
-	struct priv *priv = mlx5_get_priv(dev);
-	struct mlx5_txq_ctrl tmpl = {
-		.priv = priv,
-		.socket = socket,
-	};
-	const unsigned int max_tso_inline = ((MLX5_MAX_TSO_HEADER +
-					     (RTE_CACHE_LINE_SIZE - 1)) /
-					      RTE_CACHE_LINE_SIZE);
-
-	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
-		ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
-		return ENOTSUP;
-	}
-	tmpl.txq.flags = conf->txq_flags;
-	assert(desc > MLX5_TX_COMP_THRESH);
-	tmpl.txq.elts_n = log2above(desc);
-	if (priv->mps == MLX5_MPW_ENHANCED)
-		tmpl.txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
-	/* MRs will be registered in mp2mr[] later. */
-	DEBUG("priv->device_attr.max_qp_wr is %d",
-	      priv->device_attr.orig_attr.max_qp_wr);
-	DEBUG("priv->device_attr.max_sge is %d",
-	      priv->device_attr.orig_attr.max_sge);
-	if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
-		unsigned int ds_cnt;
-
-		tmpl.txq.max_inline =
-			((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
-			 RTE_CACHE_LINE_SIZE);
-		tmpl.txq.inline_en = 1;
-		/* TSO and MPS can't be enabled concurrently. */
-		assert(!priv->tso || !priv->mps);
-		if (priv->mps == MLX5_MPW_ENHANCED) {
-			tmpl.txq.inline_max_packet_sz =
-				priv->inline_max_packet_sz;
-			/* To minimize the size of data set, avoid requesting
-			 * too large WQ.
-			 */
-			tmpl.max_inline_data =
-				((RTE_MIN(priv->txq_inline,
-					  priv->inline_max_packet_sz) +
-				  (RTE_CACHE_LINE_SIZE - 1)) /
-				 RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
-		} else if (priv->tso) {
-			int inline_diff = tmpl.txq.max_inline - max_tso_inline;
-
-			/*
-			 * Adjust inline value as Verbs aggregates
-			 * tso_inline and txq_inline fields.
-			 */
-			tmpl.max_inline_data = inline_diff > 0 ?
-					       inline_diff *
-					       RTE_CACHE_LINE_SIZE :
-					       0;
-		} else {
-			tmpl.max_inline_data =
-				tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
-		}
-		/*
-		 * Check if the inline size is too large in a way which
-		 * can make the WQE DS to overflow.
-		 * Considering in calculation:
-		 *	WQE CTRL (1 DS)
-		 *	WQE ETH  (1 DS)
-		 *	Inline part (N DS)
-		 */
-		ds_cnt = 2 + (tmpl.max_inline_data / MLX5_WQE_DWORD_SIZE);
-		if (ds_cnt > MLX5_DSEG_MAX) {
-			unsigned int max_inline = (MLX5_DSEG_MAX - 2) *
-						   MLX5_WQE_DWORD_SIZE;
-
-			max_inline = max_inline - (max_inline %
-						   RTE_CACHE_LINE_SIZE);
-			WARN("txq inline is too large (%d) setting it to "
-			     "the maximum possible: %d\n",
-			     priv->txq_inline, max_inline);
-			tmpl.txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
-		}
-	}
-	if (priv->tso) {
-		tmpl.max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE;
-		tmpl.txq.max_inline = RTE_MAX(tmpl.txq.max_inline,
-					      max_tso_inline);
-		tmpl.txq.tso_en = 1;
-	}
-	if (priv->tunnel_en)
-		tmpl.txq.tunnel_en = 1;
-	tmpl.txq.elts =
-		(struct rte_mbuf *(*)[1 << tmpl.txq.elts_n])
-		((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
-	txq_alloc_elts(&tmpl, desc);
-	/* Clean up txq in case we're reinitializing it. */
-	DEBUG("%p: cleaning-up old txq just in case", (void *)txq_ctrl);
-	mlx5_txq_cleanup(txq_ctrl);
-	*txq_ctrl = tmpl;
-	DEBUG("%p: txq updated with %p", (void *)txq_ctrl, (void *)&tmpl);
-	/* Pre-register known mempools. */
-	rte_mempool_walk(mlx5_txq_mp2mr_iter, txq_ctrl);
-	return 0;
-}
-
-/**
  * DPDK callback to configure a TX queue.
  *
  * @param dev
@@ -287,7 +140,7 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	struct mlx5_txq_data *txq = (*priv->txqs)[idx];
 	struct mlx5_txq_ctrl *txq_ctrl =
 		container_of(txq, struct mlx5_txq_ctrl, txq);
-	int ret;
+	int ret = 0;
 
 	if (mlx5_is_secondary())
 		return -E_RTE_SECONDARY;
@@ -314,57 +167,23 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		priv_unlock(priv);
 		return -EOVERFLOW;
 	}
-	if (txq != NULL) {
-		DEBUG("%p: reusing already allocated queue index %u (%p)",
-		      (void *)dev, idx, (void *)txq);
-		if (dev->data->dev_started) {
-			priv_unlock(priv);
-			return -EEXIST;
-		}
-		(*priv->txqs)[idx] = NULL;
-		mlx5_txq_cleanup(txq_ctrl);
-		/* Resize if txq size is changed. */
-		if (txq_ctrl->txq.elts_n != log2above(desc)) {
-			txq_ctrl = rte_realloc(txq_ctrl,
-					       sizeof(*txq_ctrl) +
-					       desc * sizeof(struct rte_mbuf *),
-					       RTE_CACHE_LINE_SIZE);
-			if (!txq_ctrl) {
-				ERROR("%p: unable to reallocate queue index %u",
-					(void *)dev, idx);
-				priv_unlock(priv);
-				return -ENOMEM;
-			}
-		}
-	} else {
-		txq_ctrl =
-			rte_calloc_socket("TXQ", 1,
-					  sizeof(*txq_ctrl) +
-					  desc * sizeof(struct rte_mbuf *),
-					  0, socket);
-		if (txq_ctrl == NULL) {
-			ERROR("%p: unable to allocate queue index %u",
-			      (void *)dev, idx);
-			priv_unlock(priv);
-			return -ENOMEM;
-		}
+	if (!mlx5_priv_txq_releasable(priv, idx)) {
+		ret = EBUSY;
+		ERROR("%p: unable to release queue index %u",
+		      (void *)dev, idx);
+		goto out;
 	}
-	ret = mlx5_txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
-	if (ret) {
-		rte_free(txq_ctrl);
+	mlx5_priv_txq_release(priv, idx);
+	txq_ctrl = mlx5_priv_txq_new(priv, idx, desc, socket, conf);
+	if (!txq_ctrl) {
+		ERROR("%p: unable to allocate queue index %u",
+		      (void *)dev, idx);
+		ret = ENOMEM;
 		goto out;
 	}
-	txq_ctrl->txq.stats.idx = idx;
 	DEBUG("%p: adding TX queue %p to list",
 	      (void *)dev, (void *)txq_ctrl);
 	(*priv->txqs)[idx] = &txq_ctrl->txq;
-	txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, idx);
-	if (!txq_ctrl->ibv) {
-		ret = EAGAIN;
-		goto out;
-	}
-	/* Update send callback. */
-	priv_dev_select_tx_function(priv, priv->dev);
 out:
 	priv_unlock(priv);
 	return -ret;
@@ -396,11 +215,9 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		if ((*priv->txqs)[i] == txq) {
 			DEBUG("%p: removing TX queue %p from list",
 			      (void *)priv->dev, (void *)txq_ctrl);
-			(*priv->txqs)[i] = NULL;
+			mlx5_priv_txq_release(priv, i);
 			break;
 		}
-	mlx5_txq_cleanup(txq_ctrl);
-	rte_free(txq_ctrl);
 	priv_unlock(priv);
 }
 
@@ -719,3 +536,248 @@ mlx5_priv_txq_ibv_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Create a DPDK Tx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ * @param[in] conf
+ *  Thresholds parameters.
+ *
+ * @return
+ *   A DPDK queue object on success.
+ */
+struct mlx5_txq_ctrl*
+mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc,
+		  unsigned int socket,
+		  const struct rte_eth_txconf *conf)
+{
+	const unsigned int max_tso_inline =
+		((MLX5_MAX_TSO_HEADER + (RTE_CACHE_LINE_SIZE - 1)) /
+		 RTE_CACHE_LINE_SIZE);
+	struct mlx5_txq_ctrl *tmpl;
+
+	tmpl = rte_calloc_socket("TXQ", 1,
+				 sizeof(*tmpl) +
+				 desc * sizeof(struct rte_mbuf *),
+				 0, socket);
+	if (!tmpl)
+		return NULL;
+	assert(desc > MLX5_TX_COMP_THRESH);
+	tmpl->txq.flags = conf->txq_flags;
+	tmpl->priv = priv;
+	tmpl->txq.elts_n = log2above(desc);
+	if (priv->mps == MLX5_MPW_ENHANCED)
+		tmpl->txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
+	/* MRs will be registered in mp2mr[] later. */
+	DEBUG("priv->device_attr.max_qp_wr is %d",
+	      priv->device_attr.orig_attr.max_qp_wr);
+	DEBUG("priv->device_attr.max_sge is %d",
+	      priv->device_attr.orig_attr.max_sge);
+	if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+		unsigned int ds_cnt;
+
+		tmpl->txq.max_inline =
+			((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
+			 RTE_CACHE_LINE_SIZE);
+		tmpl->txq.inline_en = 1;
+		/* TSO and MPS can't be enabled concurrently. */
+		assert(!priv->tso || !priv->mps);
+		if (priv->mps == MLX5_MPW_ENHANCED) {
+			tmpl->txq.inline_max_packet_sz =
+				priv->inline_max_packet_sz;
+			/* To minimize the size of data set, avoid requesting
+			 * too large WQ.
+			 */
+			tmpl->max_inline_data =
+				((RTE_MIN(priv->txq_inline,
+					  priv->inline_max_packet_sz) +
+				  (RTE_CACHE_LINE_SIZE - 1)) /
+				 RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
+		} else if (priv->tso) {
+			int inline_diff = tmpl->txq.max_inline - max_tso_inline;
+
+			/*
+			 * Adjust inline value as Verbs aggregates
+			 * tso_inline and txq_inline fields.
+			 */
+			tmpl->max_inline_data = inline_diff > 0 ?
+					       inline_diff *
+					       RTE_CACHE_LINE_SIZE :
+					       0;
+		} else {
+			tmpl->max_inline_data =
+				tmpl->txq.max_inline * RTE_CACHE_LINE_SIZE;
+		}
+		/*
+		 * Check if the inline size is too large in a way which
+		 * can make the WQE DS to overflow.
+		 * Considering in calculation:
+		 *      WQE CTRL (1 DS)
+		 *      WQE ETH  (1 DS)
+		 *      Inline part (N DS)
+		 */
+		ds_cnt = 2 + (tmpl->txq.max_inline / MLX5_WQE_DWORD_SIZE);
+		if (ds_cnt > MLX5_DSEG_MAX) {
+			unsigned int max_inline = (MLX5_DSEG_MAX - 2) *
+						  MLX5_WQE_DWORD_SIZE;
+
+			max_inline = max_inline - (max_inline %
+						   RTE_CACHE_LINE_SIZE);
+			WARN("txq inline is too large (%d) setting it to "
+			     "the maximum possible: %d\n",
+			     priv->txq_inline, max_inline);
+			tmpl->txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
+		}
+	}
+	if (priv->tso) {
+		tmpl->max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE;
+		tmpl->txq.max_inline = RTE_MAX(tmpl->txq.max_inline,
+					       max_tso_inline);
+		tmpl->txq.tso_en = 1;
+	}
+	if (priv->tunnel_en)
+		tmpl->txq.tunnel_en = 1;
+	tmpl->txq.elts =
+		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1);
+	tmpl->txq.stats.idx = idx;
+	rte_atomic32_inc(&tmpl->refcnt);
+	DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
+	      (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+	LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
+	return tmpl;
+}
+
+/**
+ * Get a Tx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   A pointer to the queue if it exists.
+ */
+struct mlx5_txq_ctrl*
+mlx5_priv_txq_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_ctrl *ctrl = NULL;
+
+	if ((*priv->txqs)[idx]) {
+		ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl,
+				    txq);
+		unsigned int i;
+
+		mlx5_priv_txq_ibv_get(priv, idx);
+		for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
+			struct mlx5_mr *mr;
+
+			(void)mr;
+			if (ctrl->txq.mp2mr[i]) {
+				mr = priv_mr_get(priv, ctrl->txq.mp2mr[i]->mp);
+				assert(mr);
+			}
+		}
+		rte_atomic32_inc(&ctrl->refcnt);
+		DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
+		      (void *)ctrl, rte_atomic32_read(&ctrl->refcnt));
+	}
+	return ctrl;
+}
+
+/**
+ * Release a Tx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   0 on success, errno on failure.
+ */
+int
+mlx5_priv_txq_release(struct priv *priv, uint16_t idx)
+{
+	unsigned int i;
+	struct mlx5_txq_ctrl *txq;
+
+	if (!(*priv->txqs)[idx])
+		return 0;
+	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+	DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
+	      (void *)txq, rte_atomic32_read(&txq->refcnt));
+	if (txq->ibv) {
+		int ret;
+
+		ret = mlx5_priv_txq_ibv_release(priv, txq->ibv);
+		if (!ret)
+			txq->ibv = NULL;
+	}
+	for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
+		if (txq->txq.mp2mr[i]) {
+			priv_mr_release(priv, txq->txq.mp2mr[i]);
+			txq->txq.mp2mr[i] = NULL;
+		}
+	}
+	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
+		txq_free_elts(txq);
+		LIST_REMOVE(txq, next);
+		rte_free(txq);
+		(*priv->txqs)[idx] = NULL;
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify if the queue can be released.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   1 if the queue can be released.
+ */
+int
+mlx5_priv_txq_releasable(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_ctrl *txq;
+
+	if (!(*priv->txqs)[idx])
+		return -1;
+	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+	return (rte_atomic32_read(&txq->refcnt) == 1);
+}
+
+/**
+ * Verify the Tx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_txq_verify(struct priv *priv)
+{
+	struct mlx5_txq_ctrl *txq;
+	int ret = 0;
+
+	LIST_FOREACH(txq, &priv->txqsctrl, next) {
+		DEBUG("%p: Tx Queue %p still referenced", (void *)priv,
+		      (void *)txq);
+		++ret;
+	}
+	return ret;
+}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 13/30] net/mlx5: add reference counter on DPDK Rx queues
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (37 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 12/30] net/mlx5: add reference counter on DPDK Tx queues Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 14/30] net/mlx5: make indirection tables shareable Nelio Laranjeiro
                   ` (16 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Use the same design for DPDK queue as for Verbs queue for symmetry, this
also helps in fixing some issues like the DPDK release queue API which is
not expected to fail.  With such design, the queue is released when the
reference counters reaches 0.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         |  16 +-
 drivers/net/mlx5/mlx5.h         |   1 +
 drivers/net/mlx5/mlx5_rxq.c     | 488 +++++++++++++++++++++-------------------
 drivers/net/mlx5/mlx5_rxtx.h    |  10 +
 drivers/net/mlx5/mlx5_trigger.c |  47 +++-
 5 files changed, 321 insertions(+), 241 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 276401d..c2c3d1b 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -208,17 +208,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->rxqs != NULL) {
 		/* XXX race condition if mlx5_rx_burst() is still running. */
 		usleep(1000);
-		for (i = 0; (i != priv->rxqs_n); ++i) {
-			struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
-			struct mlx5_rxq_ctrl *rxq_ctrl;
-
-			if (rxq == NULL)
-				continue;
-			rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
-			(*priv->rxqs)[i] = NULL;
-			mlx5_rxq_cleanup(rxq_ctrl);
-			rte_free(rxq_ctrl);
-		}
+		for (i = 0; (i != priv->rxqs_n); ++i)
+			mlx5_priv_rxq_release(priv, i);
 		priv->rxqs_n = 0;
 		priv->rxqs = NULL;
 	}
@@ -247,6 +238,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	ret = mlx5_priv_rxq_ibv_verify(priv);
 	if (ret)
 		WARN("%p: some Verbs Rx queue still remain", (void *)priv);
+	ret = mlx5_priv_rxq_verify(priv);
+	if (ret)
+		WARN("%p: some Rx Queues still remain", (void *)priv);
 	ret = mlx5_priv_txq_ibv_verify(priv);
 	if (ret)
 		WARN("%p: some Verbs Tx queue still remain", (void *)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index b20c39c..d0ef21a 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -147,6 +147,7 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
+	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
 	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 89c2cdb..87efeed 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -37,6 +37,7 @@
 #include <string.h>
 #include <stdint.h>
 #include <fcntl.h>
+#include <sys/queue.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -629,16 +630,15 @@ priv_rehash_flows(struct priv *priv)
  *
  * @param rxq_ctrl
  *   Pointer to RX queue structure.
- * @param elts_n
- *   Number of elements to allocate.
  *
  * @return
  *   0 on success, errno value on failure.
  */
-static int
-rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
+int
+rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
+	unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
 	unsigned int i;
 	int ret = 0;
 
@@ -667,9 +667,11 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 		NB_SEGS(buf) = 1;
 		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
+	/* If Rx vector is activated. */
 	if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
 		struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
 		struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
+		int j;
 
 		/* Initialize default rearm_data for vPMD. */
 		mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
@@ -681,10 +683,11 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
 		 * rearm_data covers previous fields.
 		 */
 		rte_compiler_barrier();
-		rxq->mbuf_initializer = *(uint64_t *)&mbuf_init->rearm_data;
+		rxq->mbuf_initializer =
+			*(uint64_t *)&mbuf_init->rearm_data;
 		/* Padding with a fake mbuf for vectorized Rx. */
-		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
-			(*rxq->elts)[elts_n + i] = &rxq->fake_mbuf;
+		for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
+			(*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
 	}
 	DEBUG("%p: allocated and configured %u segments (max %u packets)",
 	      (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
@@ -754,170 +757,6 @@ mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 }
 
 /**
- * Configure a RX queue.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param rxq_ctrl
- *   Pointer to RX queue structure.
- * @param desc
- *   Number of descriptors to configure in queue.
- * @param socket
- *   NUMA socket on which memory must be allocated.
- * @param[in] conf
- *   Thresholds parameters.
- * @param mp
- *   Memory pool for buffer allocations.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
-	       uint16_t desc, unsigned int socket,
-	       const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
-{
-	struct priv *priv = dev->data->dev_private;
-	const uint16_t desc_n =
-		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
-	struct mlx5_rxq_ctrl tmpl = {
-		.priv = priv,
-		.socket = socket,
-		.rxq = {
-			.elts = rte_calloc_socket("RXQ", 1,
-						  desc_n *
-						  sizeof(struct rte_mbuf *), 0,
-						  socket),
-			.elts_n = log2above(desc),
-			.mp = mp,
-			.rss_hash = priv->rxqs_n > 1,
-		},
-	};
-	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
-	struct rte_mbuf *(*elts)[desc_n] = NULL;
-	int ret = 0;
-
-	(void)conf; /* Thresholds configuration (ignored). */
-	if (dev->data->dev_conf.intr_conf.rxq)
-		tmpl.irq = 1;
-	/* Enable scattered packets support for this queue if necessary. */
-	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
-	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
-	    (mb_len - RTE_PKTMBUF_HEADROOM)) {
-		tmpl.rxq.sges_n = 0;
-	} else if (dev->data->dev_conf.rxmode.enable_scatter) {
-		unsigned int size =
-			RTE_PKTMBUF_HEADROOM +
-			dev->data->dev_conf.rxmode.max_rx_pkt_len;
-		unsigned int sges_n;
-
-		/*
-		 * Determine the number of SGEs needed for a full packet
-		 * and round it to the next power of two.
-		 */
-		sges_n = log2above((size / mb_len) + !!(size % mb_len));
-		tmpl.rxq.sges_n = sges_n;
-		/* Make sure rxq.sges_n did not overflow. */
-		size = mb_len * (1 << tmpl.rxq.sges_n);
-		size -= RTE_PKTMBUF_HEADROOM;
-		if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
-			ERROR("%p: too many SGEs (%u) needed to handle"
-			      " requested maximum packet size %u",
-			      (void *)dev,
-			      1 << sges_n,
-			      dev->data->dev_conf.rxmode.max_rx_pkt_len);
-			return EOVERFLOW;
-		}
-	} else {
-		WARN("%p: the requested maximum Rx packet size (%u) is"
-		     " larger than a single mbuf (%u) and scattered"
-		     " mode has not been requested",
-		     (void *)dev,
-		     dev->data->dev_conf.rxmode.max_rx_pkt_len,
-		     mb_len - RTE_PKTMBUF_HEADROOM);
-	}
-	DEBUG("%p: maximum number of segments per packet: %u",
-	      (void *)dev, 1 << tmpl.rxq.sges_n);
-	if (desc % (1 << tmpl.rxq.sges_n)) {
-		ERROR("%p: number of RX queue descriptors (%u) is not a"
-		      " multiple of SGEs per packet (%u)",
-		      (void *)dev,
-		      desc,
-		      1 << tmpl.rxq.sges_n);
-		return EINVAL;
-	}
-	/* Toggle RX checksum offload if hardware supports it. */
-	if (priv->hw_csum)
-		tmpl.rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
-	if (priv->hw_csum_l2tun)
-		tmpl.rxq.csum_l2tun =
-			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
-	/* Configure VLAN stripping. */
-	tmpl.rxq.vlan_strip = (priv->hw_vlan_strip &&
-			       !!dev->data->dev_conf.rxmode.hw_vlan_strip);
-	/* By default, FCS (CRC) is stripped by hardware. */
-	if (dev->data->dev_conf.rxmode.hw_strip_crc) {
-		tmpl.rxq.crc_present = 0;
-	} else if (priv->hw_fcs_strip) {
-		tmpl.rxq.crc_present = 1;
-	} else {
-		WARN("%p: CRC stripping has been disabled but will still"
-		     " be performed by hardware, make sure MLNX_OFED and"
-		     " firmware are up to date",
-		     (void *)dev);
-		tmpl.rxq.crc_present = 0;
-	}
-	DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
-	      " incoming frames to hide it",
-	      (void *)dev,
-	      tmpl.rxq.crc_present ? "disabled" : "enabled",
-	      tmpl.rxq.crc_present << 2);
-#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
-	if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING")) {
-		; /* Nothing else to do. */
-	} else if (priv->hw_padding) {
-		INFO("%p: enabling packet padding on queue %p",
-		     (void *)dev, (void *)rxq_ctrl);
-	} else {
-		WARN("%p: packet padding has been requested but is not"
-		     " supported, make sure MLNX_OFED and firmware are"
-		     " up to date",
-		     (void *)dev);
-	}
-#endif
-	/* Save port ID. */
-	tmpl.rxq.port_id = dev->data->port_id;
-	DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
-	ret = rxq_alloc_elts(&tmpl, desc);
-	if (ret) {
-		ERROR("%p: RXQ allocation failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	/* Clean up rxq in case we're reinitializing it. */
-	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl);
-	mlx5_rxq_cleanup(rxq_ctrl);
-	/* Move mbuf pointers to dedicated storage area in RX queue. */
-	elts = (void *)(rxq_ctrl + 1);
-	rte_memcpy(elts, tmpl.rxq.elts, sizeof(*elts));
-#ifndef NDEBUG
-	memset(tmpl.rxq.elts, 0x55, sizeof(*elts));
-#endif
-	rte_free(tmpl.rxq.elts);
-	tmpl.rxq.elts = elts;
-	*rxq_ctrl = tmpl;
-	DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
-	assert(ret == 0);
-	return 0;
-error:
-	rte_free(tmpl.rxq.elts);
-	mlx5_rxq_cleanup(&tmpl);
-	assert(ret > 0);
-	return ret;
-}
-
-/**
- * DPDK callback to configure a RX queue.
  *
  * @param dev
  *   Pointer to Ethernet device structure.
@@ -944,13 +783,11 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
 	struct mlx5_rxq_ctrl *rxq_ctrl =
 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
-	const uint16_t desc_n =
-		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
-	int ret;
+	int ret = 0;
 
+	(void)conf;
 	if (mlx5_is_secondary())
 		return -E_RTE_SECONDARY;
-
 	priv_lock(priv);
 	if (!rte_is_power_of_2(desc)) {
 		desc = 1 << log2above(desc);
@@ -966,54 +803,23 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		priv_unlock(priv);
 		return -EOVERFLOW;
 	}
-	if (rxq != NULL) {
-		DEBUG("%p: reusing already allocated queue index %u (%p)",
-		      (void *)dev, idx, (void *)rxq);
-		if (dev->data->dev_started) {
-			priv_unlock(priv);
-			return -EEXIST;
-		}
-		(*priv->rxqs)[idx] = NULL;
-		mlx5_rxq_cleanup(rxq_ctrl);
-		/* Resize if rxq size is changed. */
-		if (rxq_ctrl->rxq.elts_n != log2above(desc)) {
-			rxq_ctrl = rte_realloc(rxq_ctrl,
-					       sizeof(*rxq_ctrl) + desc_n *
-					       sizeof(struct rte_mbuf *),
-					       RTE_CACHE_LINE_SIZE);
-			if (!rxq_ctrl) {
-				ERROR("%p: unable to reallocate queue index %u",
-					(void *)dev, idx);
-				priv_unlock(priv);
-				return -ENOMEM;
-			}
-		}
-	} else {
-		rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl) +
-					     desc_n *
-					     sizeof(struct rte_mbuf *),
-					     0, socket);
-		if (rxq_ctrl == NULL) {
-			ERROR("%p: unable to allocate queue index %u",
-			      (void *)dev, idx);
-			priv_unlock(priv);
-			return -ENOMEM;
-		}
+	if (!mlx5_priv_rxq_releasable(priv, idx)) {
+		ret = EBUSY;
+		ERROR("%p: unable to release queue index %u",
+		      (void *)dev, idx);
+		goto out;
 	}
-	ret = rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
-	if (ret) {
-		rte_free(rxq_ctrl);
+	mlx5_priv_rxq_release(priv, idx);
+	rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp);
+	if (!rxq_ctrl) {
+		ERROR("%p: unable to allocate queue index %u",
+		      (void *)dev, idx);
+		ret = ENOMEM;
 		goto out;
 	}
-	rxq_ctrl->rxq.stats.idx = idx;
 	DEBUG("%p: adding RX queue %p to list",
 	      (void *)dev, (void *)rxq_ctrl);
 	(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
-	rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, idx);
-	if (!rxq_ctrl->ibv) {
-		ret = EAGAIN;
-		goto out;
-	}
 out:
 	priv_unlock(priv);
 	return -ret;
@@ -1031,7 +837,6 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 	struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
 	struct mlx5_rxq_ctrl *rxq_ctrl;
 	struct priv *priv;
-	unsigned int i;
 
 	if (mlx5_is_secondary())
 		return;
@@ -1041,18 +846,10 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
 	priv_lock(priv);
-	if (!mlx5_priv_rxq_ibv_releasable(priv, rxq_ctrl->ibv))
+	if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx))
 		rte_panic("Rx queue %p is still used by a flow and cannot be"
 			  " removed\n", (void *)rxq_ctrl);
-	for (i = 0; (i != priv->rxqs_n); ++i)
-		if ((*priv->rxqs)[i] == rxq) {
-			DEBUG("%p: removing RX queue %p from list",
-			      (void *)priv->dev, (void *)rxq_ctrl);
-			(*priv->rxqs)[i] = NULL;
-			break;
-		}
-	mlx5_rxq_cleanup(rxq_ctrl);
-	rte_free(rxq_ctrl);
+	mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx);
 	priv_unlock(priv);
 }
 
@@ -1590,3 +1387,238 @@ mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
 	assert(rxq_ibv);
 	return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
 }
+
+/**
+ * Create a DPDK Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ *
+ * @return
+ *   A DPDK queue object on success.
+ */
+struct mlx5_rxq_ctrl*
+mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
+		  unsigned int socket, struct rte_mempool *mp)
+{
+	struct rte_eth_dev *dev = priv->dev;
+	struct mlx5_rxq_ctrl *tmpl;
+	const uint16_t desc_n =
+		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
+	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
+
+	tmpl = rte_calloc_socket("RXQ", 1,
+				 sizeof(*tmpl) +
+				 desc_n * sizeof(struct rte_mbuf *),
+				 0, socket);
+	if (!tmpl)
+		return NULL;
+	if (priv->dev->data->dev_conf.intr_conf.rxq)
+		tmpl->irq = 1;
+	/* Enable scattered packets support for this queue if necessary. */
+	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
+	    (mb_len - RTE_PKTMBUF_HEADROOM)) {
+		tmpl->rxq.sges_n = 0;
+	} else if (dev->data->dev_conf.rxmode.enable_scatter) {
+		unsigned int size =
+			RTE_PKTMBUF_HEADROOM +
+			dev->data->dev_conf.rxmode.max_rx_pkt_len;
+		unsigned int sges_n;
+
+		/*
+		 * Determine the number of SGEs needed for a full packet
+		 * and round it to the next power of two.
+		 */
+		sges_n = log2above((size / mb_len) + !!(size % mb_len));
+		tmpl->rxq.sges_n = sges_n;
+		/* Make sure rxq.sges_n did not overflow. */
+		size = mb_len * (1 << tmpl->rxq.sges_n);
+		size -= RTE_PKTMBUF_HEADROOM;
+		if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
+			ERROR("%p: too many SGEs (%u) needed to handle"
+			      " requested maximum packet size %u",
+			      (void *)dev,
+			      1 << sges_n,
+			      dev->data->dev_conf.rxmode.max_rx_pkt_len);
+			goto error;
+		}
+	} else {
+		WARN("%p: the requested maximum Rx packet size (%u) is"
+		     " larger than a single mbuf (%u) and scattered"
+		     " mode has not been requested",
+		     (void *)dev,
+		     dev->data->dev_conf.rxmode.max_rx_pkt_len,
+		     mb_len - RTE_PKTMBUF_HEADROOM);
+	}
+	DEBUG("%p: maximum number of segments per packet: %u",
+	      (void *)dev, 1 << tmpl->rxq.sges_n);
+	if (desc % (1 << tmpl->rxq.sges_n)) {
+		ERROR("%p: number of RX queue descriptors (%u) is not a"
+		      " multiple of SGEs per packet (%u)",
+		      (void *)dev,
+		      desc,
+		      1 << tmpl->rxq.sges_n);
+		goto error;
+	}
+	/* Toggle RX checksum offload if hardware supports it. */
+	if (priv->hw_csum)
+		tmpl->rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
+	if (priv->hw_csum_l2tun)
+		tmpl->rxq.csum_l2tun =
+			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
+	/* Configure VLAN stripping. */
+	tmpl->rxq.vlan_strip = (priv->hw_vlan_strip &&
+			       !!dev->data->dev_conf.rxmode.hw_vlan_strip);
+	/* By default, FCS (CRC) is stripped by hardware. */
+	if (dev->data->dev_conf.rxmode.hw_strip_crc) {
+		tmpl->rxq.crc_present = 0;
+	} else if (priv->hw_fcs_strip) {
+		tmpl->rxq.crc_present = 1;
+	} else {
+		WARN("%p: CRC stripping has been disabled but will still"
+		     " be performed by hardware, make sure MLNX_OFED and"
+		     " firmware are up to date",
+		     (void *)dev);
+		tmpl->rxq.crc_present = 0;
+	}
+	DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
+	      " incoming frames to hide it",
+	      (void *)dev,
+	      tmpl->rxq.crc_present ? "disabled" : "enabled",
+	      tmpl->rxq.crc_present << 2);
+	/* Save port ID. */
+	tmpl->rxq.rss_hash = priv->rxqs_n > 1;
+	tmpl->rxq.port_id = dev->data->port_id;
+	tmpl->priv = priv;
+	tmpl->rxq.mp = mp;
+	tmpl->rxq.stats.idx = idx;
+	tmpl->rxq.elts_n = log2above(desc);
+	tmpl->rxq.elts =
+		(struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
+	rte_atomic32_inc(&tmpl->refcnt);
+	DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
+	return tmpl;
+error:
+	rte_free(tmpl);
+	return NULL;
+}
+
+/**
+ * Get a Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   A pointer to the queue if it exists.
+ */
+struct mlx5_rxq_ctrl*
+mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
+
+	if ((*priv->rxqs)[idx]) {
+		rxq_ctrl = container_of((*priv->rxqs)[idx],
+					struct mlx5_rxq_ctrl,
+					rxq);
+
+		mlx5_priv_rxq_ibv_get(priv, idx);
+		rte_atomic32_inc(&rxq_ctrl->refcnt);
+		DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+		      (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
+	}
+	return rxq_ctrl;
+}
+
+/**
+ * Release a Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+
+	if (!(*priv->rxqs)[idx])
+		return 0;
+	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
+	assert(rxq_ctrl->priv);
+	if (rxq_ctrl->ibv) {
+		int ret;
+
+		ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
+		if (!ret)
+			rxq_ctrl->ibv = NULL;
+	}
+	DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
+	if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
+		LIST_REMOVE(rxq_ctrl, next);
+		rte_free(rxq_ctrl);
+		(*priv->rxqs)[idx] = NULL;
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify if the queue can be released.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   1 if the queue can be released.
+ */
+int
+mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+
+	if (!(*priv->rxqs)[idx])
+		return -1;
+	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
+	return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_rxq_verify(struct priv *priv)
+{
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+	int ret = 0;
+
+	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
+		DEBUG("%p: Rx Queue %p still referenced", (void *)priv,
+		      (void *)rxq_ctrl);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 69344f6..44cfef5 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -147,6 +147,8 @@ struct mlx5_rxq_ibv {
 
 /* RX queue control descriptor. */
 struct mlx5_rxq_ctrl {
+	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
 	struct priv *priv; /* Back pointer to private data. */
 	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
 	struct mlx5_rxq_data rxq; /* Data path structure. */
@@ -335,6 +337,14 @@ struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_get(struct priv *, uint16_t);
 int mlx5_priv_rxq_ibv_release(struct priv *, struct mlx5_rxq_ibv *);
 int mlx5_priv_rxq_ibv_releasable(struct priv *, struct mlx5_rxq_ibv *);
 int mlx5_priv_rxq_ibv_verify(struct priv *);
+struct mlx5_rxq_ctrl *mlx5_priv_rxq_new(struct priv *, uint16_t,
+					uint16_t, unsigned int,
+					struct rte_mempool *);
+struct mlx5_rxq_ctrl *mlx5_priv_rxq_get(struct priv *, uint16_t);
+int mlx5_priv_rxq_release(struct priv *, uint16_t);
+int mlx5_priv_rxq_releasable(struct priv *, uint16_t);
+int mlx5_priv_rxq_verify(struct priv *);
+int rxq_alloc_elts(struct mlx5_rxq_ctrl *);
 
 /* mlx5_txq.c */
 
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7a12768..a311499 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -79,6 +79,41 @@ priv_txq_start(struct priv *priv)
 	return -ret;
 }
 
+static void
+priv_rxq_stop(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; i != priv->rxqs_n; ++i)
+		mlx5_priv_rxq_release(priv, i);
+}
+
+static int
+priv_rxq_start(struct priv *priv)
+{
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i != priv->rxqs_n; ++i) {
+		struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_priv_rxq_get(priv, i);
+
+		if (!rxq_ctrl)
+			continue;
+		ret = rxq_alloc_elts(rxq_ctrl);
+		if (ret)
+			goto error;
+		rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, i);
+		if (!rxq_ctrl->ibv) {
+			ret = ENOMEM;
+			goto error;
+		}
+	}
+	return -ret;
+error:
+	priv_rxq_stop(priv);
+	return -ret;
+}
+
 /**
  * DPDK callback to start the device.
  *
@@ -101,8 +136,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		return -E_RTE_SECONDARY;
 
 	priv_lock(priv);
-	/* Update Rx/Tx callback. */
-	priv_dev_select_rx_function(priv, dev);
 	DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
 	rte_mempool_walk(mlx5_mp2mr_iter, priv);
 	err = priv_txq_start(priv);
@@ -113,6 +146,14 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	}
 	/* Update send callback. */
 	priv_dev_select_tx_function(priv, dev);
+	err = priv_rxq_start(priv);
+	if (err) {
+		ERROR("%p: RXQ allocation failed: %s",
+		      (void *)dev, strerror(err));
+		goto error;
+	}
+	/* Update receive callback. */
+	priv_dev_select_rx_function(priv, dev);
 	err = priv_create_hash_rxqs(priv);
 	if (!err)
 		err = priv_rehash_flows(priv);
@@ -147,6 +188,7 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv);
+	priv_rxq_stop(priv);
 	priv_txq_stop(priv);
 	priv_unlock(priv);
 	return -err;
@@ -183,6 +225,7 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_flow_stop(priv);
 	priv_rx_intr_vec_disable(priv);
 	priv_txq_stop(priv);
+	priv_rxq_stop(priv);
 	LIST_FOREACH(mr, &priv->mr, next) {
 		priv_mr_release(priv, mr);
 	}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 14/30] net/mlx5: make indirection tables shareable
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (38 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 13/30] net/mlx5: add reference counter on DPDK Rx queues Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 15/30] net/mlx5: add Hash Rx queue object Nelio Laranjeiro
                   ` (15 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Indirection table in verbs side resides in a list of final work queues to
spread the packets according to an higher level queue.  This indirection
table can be shared among the hash Rx queues which points to them.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c       |   3 +
 drivers/net/mlx5/mlx5.h       |   2 +
 drivers/net/mlx5/mlx5_flow.c  |  83 ++++++++++-------------
 drivers/net/mlx5/mlx5_rxq.c   | 153 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_rxtx.h  |  17 +++++
 drivers/net/mlx5/mlx5_utils.h |   2 +
 6 files changed, 214 insertions(+), 46 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index c2c3d1b..46b4067 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -235,6 +235,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
 	priv_socket_uninit(priv);
+	ret = mlx5_priv_ind_table_ibv_verify(priv);
+	if (ret)
+		WARN("%p: some Indirection table still remain", (void *)priv);
 	ret = mlx5_priv_rxq_ibv_verify(priv);
 	if (ret)
 		WARN("%p: some Verbs Rx queue still remain", (void *)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d0ef21a..ab17ce6 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -151,6 +151,8 @@ struct priv {
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
 	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
+	/* Verbs Indirection tables. */
+	LIST_HEAD(ind_tables, mlx5_ind_table_ibv) ind_tbls;
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 362ec91..dc9adeb 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -90,7 +90,7 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 struct rte_flow {
 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
-	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
+	struct mlx5_ind_table_ibv *ind_table; /**< Indirection table. */
 	struct ibv_qp *qp; /**< Verbs queue pair. */
 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	struct ibv_wq *wq; /**< Verbs work queue. */
@@ -98,8 +98,6 @@ struct rte_flow {
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
-	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< List of queues. */
-	uint16_t queues_n; /**< Number of queues in the list. */
 };
 
 /** Static initializer for items. */
@@ -1089,9 +1087,6 @@ priv_flow_create_action_queue(struct priv *priv,
 {
 	struct rte_flow *rte_flow;
 	unsigned int i;
-	unsigned int j;
-	const unsigned int wqs_n = 1 << log2above(flow->actions.queues_n);
-	struct ibv_wq *wqs[wqs_n];
 
 	assert(priv->pd);
 	assert(priv->ctx);
@@ -1102,36 +1097,29 @@ priv_flow_create_action_queue(struct priv *priv,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	for (i = 0; i < flow->actions.queues_n; ++i) {
-		struct mlx5_rxq_ibv *rxq_ibv =
-			mlx5_priv_rxq_ibv_get(priv, flow->actions.queues[i]);
-
-		wqs[i] = rxq_ibv->wq;
-		rte_flow->queues[i] = flow->actions.queues[i];
-		++rte_flow->queues_n;
-		(*priv->rxqs)[flow->actions.queues[i]]->mark |=
-			flow->actions.mark;
-	}
-	/* finalise indirection table. */
-	for (j = 0; i < wqs_n; ++i, ++j) {
-		wqs[i] = wqs[j];
-		if (j == flow->actions.queues_n)
-			j = 0;
+	for (i = 0; i != flow->actions.queues_n; ++i) {
+		struct mlx5_rxq_data *q =
+			(*priv->rxqs)[flow->actions.queues[i]];
+
+		q->mark |= flow->actions.mark;
 	}
 	rte_flow->mark = flow->actions.mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
 	rte_flow->hash_fields = flow->hash_fields;
-	rte_flow->ind_table = ibv_create_rwq_ind_table(
-		priv->ctx,
-		&(struct ibv_rwq_ind_table_init_attr){
-			.log_ind_tbl_size = log2above(flow->actions.queues_n),
-			.ind_tbl = wqs,
-			.comp_mask = 0,
-		});
+	rte_flow->ind_table =
+		mlx5_priv_ind_table_ibv_get(priv, flow->actions.queues,
+					    flow->actions.queues_n);
 	if (!rte_flow->ind_table) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate indirection table");
-		goto error;
+		rte_flow->ind_table =
+			mlx5_priv_ind_table_ibv_new(priv, flow->actions.queues,
+						    flow->actions.queues_n);
+		if (!rte_flow->ind_table) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL,
+					   "cannot allocate indirection table");
+			goto error;
+		}
 	}
 	rte_flow->qp = ibv_create_qp_ex(
 		priv->ctx,
@@ -1148,7 +1136,7 @@ priv_flow_create_action_queue(struct priv *priv,
 				.rx_hash_key = rss_hash_default_key,
 				.rx_hash_fields_mask = rte_flow->hash_fields,
 			},
-			.rwq_ind_tbl = rte_flow->ind_table,
+			.rwq_ind_tbl = rte_flow->ind_table->ind_table,
 			.pd = priv->pd
 		});
 	if (!rte_flow->qp) {
@@ -1171,7 +1159,7 @@ priv_flow_create_action_queue(struct priv *priv,
 	if (rte_flow->qp)
 		ibv_destroy_qp(rte_flow->qp);
 	if (rte_flow->ind_table)
-		ibv_destroy_rwq_ind_table(rte_flow->ind_table);
+		mlx5_priv_ind_table_ibv_release(priv, rte_flow->ind_table);
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -1297,13 +1285,10 @@ priv_flow_destroy(struct priv *priv,
 		goto free;
 	if (flow->qp)
 		claim_zero(ibv_destroy_qp(flow->qp));
-	if (flow->ind_table)
-		claim_zero(ibv_destroy_rwq_ind_table(flow->ind_table));
-	for (i = 0; i != flow->queues_n; ++i) {
+	for (i = 0; i != flow->ind_table->queues_n; ++i) {
 		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[flow->queues[i]];
-		struct mlx5_rxq_ctrl *rxq_ctrl =
-			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+		struct mlx5_rxq_data *rxq_data =
+			(*priv->rxqs)[flow->ind_table->queues[i]];
 
 		/*
 		 * To remove the mark from the queue, the queue must not be
@@ -1319,14 +1304,17 @@ priv_flow_destroy(struct priv *priv,
 					continue;
 				if (!tmp->mark)
 					continue;
-				for (j = 0; (j != tmp->queues_n) && !mark; j++)
-					if (tmp->queues[j] == flow->queues[i])
+				for (j = 0;
+				     (j != tmp->ind_table->queues_n) && !mark;
+				     j++)
+					if (tmp->ind_table->queues[j] ==
+					    flow->ind_table->queues[i])
 						mark = 1;
 			}
 			rxq_data->mark = mark;
 		}
-		mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
 	}
+	mlx5_priv_ind_table_ibv_release(priv, flow->ind_table);
 free:
 	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
@@ -1518,9 +1506,10 @@ priv_flow_stop(struct priv *priv)
 		flow->ibv_flow = NULL;
 		if (flow->mark) {
 			unsigned int n;
+			struct mlx5_ind_table_ibv *ind_tbl = flow->ind_table;
 
-			for (n = 0; n < flow->queues_n; ++n)
-				(*priv->rxqs)[flow->queues[n]]->mark = 0;
+			for (n = 0; n < ind_tbl->queues_n; ++n)
+				(*priv->rxqs)[ind_tbl->queues[n]]->mark = 0;
 		}
 		DEBUG("Flow %p removed", (void *)flow);
 	}
@@ -1562,8 +1551,10 @@ priv_flow_start(struct priv *priv)
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->queues_n; ++n)
-				(*priv->rxqs)[flow->queues[n]]->mark = 1;
+			for (n = 0; n < flow->ind_table->queues_n; ++n) {
+				uint16_t idx = flow->ind_table->queues[n];
+				(*priv->rxqs)[idx]->mark = 1;
+			}
 		}
 	}
 	return 0;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 87efeed..4a53282 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1622,3 +1622,156 @@ mlx5_priv_rxq_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Create an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   Queues entering in the indirection table.
+ * @param queues_n
+ *   Number of queues in the array.
+ *
+ * @return
+ *   A new indirection table.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
+			    uint16_t queues_n)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+	const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
+		log2above(queues_n) :
+		priv->ind_table_max_size;
+	struct ibv_wq *wq[1 << wq_n];
+	unsigned int i;
+	unsigned int j;
+
+	ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
+			     queues_n * sizeof(uint16_t), 0);
+	if (!ind_tbl)
+		return NULL;
+	for (i = 0; i != queues_n; ++i) {
+		struct mlx5_rxq_ctrl *rxq =
+			mlx5_priv_rxq_get(priv, queues[i]);
+
+		if (!rxq)
+			goto error;
+		wq[i] = rxq->ibv->wq;
+		ind_tbl->queues[i] = queues[i];
+	}
+	ind_tbl->queues_n = queues_n;
+	/* Finalise indirection table. */
+	for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j)
+		wq[i] = wq[j];
+	ind_tbl->ind_table = ibv_create_rwq_ind_table(
+		priv->ctx,
+		&(struct ibv_rwq_ind_table_init_attr){
+			.log_ind_tbl_size = wq_n,
+			.ind_tbl = wq,
+			.comp_mask = 0,
+		});
+	if (!ind_tbl->ind_table)
+		goto error;
+	rte_atomic32_inc(&ind_tbl->refcnt);
+	LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
+	DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+	      (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+	return ind_tbl;
+error:
+	rte_free(ind_tbl);
+	DEBUG("%p cannot create indirection table", (void *)priv);
+	return NULL;
+}
+
+/**
+ * Get an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   Queues entering in the indirection table.
+ * @param queues_n
+ *   Number of queues in the array.
+ *
+ * @return
+ *   An indirection table if found.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
+			    uint16_t queues_n)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+
+	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+		if ((ind_tbl->queues_n == queues_n) &&
+		    (memcmp(ind_tbl->queues, queues,
+			    ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
+		     == 0))
+			break;
+	}
+	if (ind_tbl) {
+		unsigned int i;
+
+		rte_atomic32_inc(&ind_tbl->refcnt);
+		DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+		      (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+		for (i = 0; i != ind_tbl->queues_n; ++i)
+			mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
+	}
+	return ind_tbl;
+}
+
+/**
+ * Release an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param ind_table
+ *   Indirection table to release.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_ind_table_ibv_release(struct priv *priv,
+				struct mlx5_ind_table_ibv *ind_tbl)
+{
+	unsigned int i;
+
+	DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+	      (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+	if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
+		claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
+	for (i = 0; i != ind_tbl->queues_n; ++i)
+		claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
+	if (!rte_atomic32_read(&ind_tbl->refcnt)) {
+		LIST_REMOVE(ind_tbl, next);
+		rte_free(ind_tbl);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_ind_table_ibv_verify(struct priv *priv)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+	int ret = 0;
+
+	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+		DEBUG("%p: Verbs indirection table %p still referenced",
+		      (void *)priv, (void *)ind_tbl);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 44cfef5..b7c75bf 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -156,6 +156,15 @@ struct mlx5_rxq_ctrl {
 	unsigned int irq:1; /* Whether IRQ is enabled. */
 };
 
+/* Indirection table. */
+struct mlx5_ind_table_ibv {
+	LIST_ENTRY(mlx5_ind_table_ibv) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
+	uint16_t queues_n; /**< Number of queues in the list. */
+	uint16_t queues[]; /**< Queue list. */
+};
+
 /* Hash RX queue types. */
 enum hash_rxq_type {
 	HASH_RXQ_TCPV4,
@@ -345,6 +354,14 @@ int mlx5_priv_rxq_release(struct priv *, uint16_t);
 int mlx5_priv_rxq_releasable(struct priv *, uint16_t);
 int mlx5_priv_rxq_verify(struct priv *);
 int rxq_alloc_elts(struct mlx5_rxq_ctrl *);
+struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_new(struct priv *,
+						       uint16_t [],
+						       uint16_t);
+struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_get(struct priv *,
+						       uint16_t [],
+						       uint16_t);
+int mlx5_priv_ind_table_ibv_release(struct priv *, struct mlx5_ind_table_ibv *);
+int mlx5_priv_ind_table_ibv_verify(struct priv *);
 
 /* mlx5_txq.c */
 
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index a824787..218ae83 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -128,11 +128,13 @@ pmd_drv_log_basename(const char *s)
 
 #define DEBUG(...) PMD_DRV_LOG(DEBUG, __VA_ARGS__)
 #define claim_zero(...) assert((__VA_ARGS__) == 0)
+#define claim_nonzero(...) assert((__VA_ARGS__) != 0)
 
 #else /* NDEBUG */
 
 #define DEBUG(...) (void)0
 #define claim_zero(...) (__VA_ARGS__)
+#define claim_nonzero(...) (__VA_ARGS__)
 
 #endif /* NDEBUG */
 
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 15/30] net/mlx5: add Hash Rx queue object
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (39 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 14/30] net/mlx5: make indirection tables shareable Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 16/30] net/mlx5: fix clang compilation error Nelio Laranjeiro
                   ` (14 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Hash Rx queue is an high level queue providing the RSS hash algorithm, key
and indirection table to spread the packets.  Those objects can be easily
shared between several Verbs flows.  This commit bring this capability to
the PMD.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c      |   3 +
 drivers/net/mlx5/mlx5.h      |   3 +-
 drivers/net/mlx5/mlx5_flow.c | 228 ++++++++++++++++++++++++-------------------
 drivers/net/mlx5/mlx5_rxq.c  | 165 +++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_rxtx.h |  17 ++++
 5 files changed, 312 insertions(+), 104 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 46b4067..fd8138b 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -235,6 +235,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
 	priv_socket_uninit(priv);
+	ret = mlx5_priv_hrxq_ibv_verify(priv);
+	if (ret)
+		WARN("%p: some Hash Rx queue still remain", (void *)priv);
 	ret = mlx5_priv_ind_table_ibv_verify(priv);
 	if (ret)
 		WARN("%p: some Indirection table still remain", (void *)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ab17ce6..77413c9 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -144,11 +144,12 @@ struct priv {
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
-	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
+	struct mlx5_hrxq_drop *flow_drop_queue; /* Flow drop queue. */
 	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
+	LIST_HEAD(hrxq, mlx5_hrxq) hrxqs; /* Verbs Hash Rx queues. */
 	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
 	/* Verbs Indirection tables. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index dc9adeb..4948882 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -87,17 +87,37 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 		       const void *default_mask,
 		       void *data);
 
-struct rte_flow {
-	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
-	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
-	struct mlx5_ind_table_ibv *ind_table; /**< Indirection table. */
+/** Structure for Drop queue. */
+struct mlx5_hrxq_drop {
+	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
 	struct ibv_qp *qp; /**< Verbs queue pair. */
-	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	struct ibv_wq *wq; /**< Verbs work queue. */
 	struct ibv_cq *cq; /**< Verbs completion queue. */
+};
+
+/* Flows structures. */
+struct mlx5_flow {
+	uint64_t hash_fields; /**< Fields that participate in the hash. */
+	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
+};
+
+/* Drop flows structures. */
+struct mlx5_flow_drop {
+	struct mlx5_hrxq_drop hrxq; /**< Drop hash Rx queue. */
+};
+
+struct rte_flow {
+	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
-	uint64_t hash_fields; /**< Fields that participate in the hash. */
+	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct ibv_flow *ibv_flow; /**< Verbs flow. */
+	uint16_t queues_n; /**< Number of entries in queue[]. */
+	uint16_t (*queues)[]; /**< Queues indexes to use. */
+	union {
+		struct mlx5_flow frxq; /**< Flow with Rx queue. */
+		struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
+	};
 };
 
 /** Static initializer for items. */
@@ -288,14 +308,6 @@ struct mlx5_flow_parse {
 	struct mlx5_flow_action actions; /**< Parsed action result. */
 };
 
-/** Structure for Drop queue. */
-struct rte_flow_drop {
-	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
-	struct ibv_qp *qp; /**< Verbs queue pair. */
-	struct ibv_wq *wq; /**< Verbs work queue. */
-	struct ibv_cq *cq; /**< Verbs completion queue. */
-};
-
 static const struct rte_flow_ops mlx5_flow_ops = {
 	.validate = mlx5_flow_validate,
 	.create = mlx5_flow_create,
@@ -1052,8 +1064,8 @@ priv_flow_create_action_queue_drop(struct priv *priv,
 	rte_flow->ibv_attr = flow->ibv_attr;
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
-	rte_flow->qp = priv->flow_drop_queue->qp;
-	rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
+	rte_flow->drxq.hrxq.qp = priv->flow_drop_queue->qp;
+	rte_flow->ibv_flow = ibv_create_flow(rte_flow->drxq.hrxq.qp,
 					     rte_flow->ibv_attr);
 	if (!rte_flow->ibv_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
@@ -1091,62 +1103,52 @@ priv_flow_create_action_queue(struct priv *priv,
 	assert(priv->pd);
 	assert(priv->ctx);
 	assert(!flow->actions.drop);
-	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
+	rte_flow =
+		rte_calloc(__func__, 1,
+			   sizeof(*flow) +
+			   flow->actions.queues_n * sizeof(uint16_t),
+			   0);
 	if (!rte_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	for (i = 0; i != flow->actions.queues_n; ++i) {
-		struct mlx5_rxq_data *q =
-			(*priv->rxqs)[flow->actions.queues[i]];
-
-		q->mark |= flow->actions.mark;
-	}
 	rte_flow->mark = flow->actions.mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
-	rte_flow->hash_fields = flow->hash_fields;
-	rte_flow->ind_table =
-		mlx5_priv_ind_table_ibv_get(priv, flow->actions.queues,
-					    flow->actions.queues_n);
-	if (!rte_flow->ind_table) {
-		rte_flow->ind_table =
-			mlx5_priv_ind_table_ibv_new(priv, flow->actions.queues,
-						    flow->actions.queues_n);
-		if (!rte_flow->ind_table) {
-			rte_flow_error_set(error, ENOMEM,
-					   RTE_FLOW_ERROR_TYPE_HANDLE,
-					   NULL,
-					   "cannot allocate indirection table");
-			goto error;
-		}
+	rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
+	memcpy(rte_flow->queues, flow->actions.queues,
+	       flow->actions.queues_n * sizeof(uint16_t));
+	rte_flow->queues_n = flow->actions.queues_n;
+	rte_flow->frxq.hash_fields = flow->hash_fields;
+	rte_flow->frxq.hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
+						 rss_hash_default_key_len,
+						 flow->hash_fields,
+						 (*rte_flow->queues),
+						 rte_flow->queues_n);
+	if (rte_flow->frxq.hrxq) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "duplicated flow");
+		goto error;
 	}
-	rte_flow->qp = ibv_create_qp_ex(
-		priv->ctx,
-		&(struct ibv_qp_init_attr_ex){
-			.qp_type = IBV_QPT_RAW_PACKET,
-			.comp_mask =
-				IBV_QP_INIT_ATTR_PD |
-				IBV_QP_INIT_ATTR_IND_TABLE |
-				IBV_QP_INIT_ATTR_RX_HASH,
-			.rx_hash_conf = (struct ibv_rx_hash_conf){
-				.rx_hash_function =
-					IBV_RX_HASH_FUNC_TOEPLITZ,
-				.rx_hash_key_len = rss_hash_default_key_len,
-				.rx_hash_key = rss_hash_default_key,
-				.rx_hash_fields_mask = rte_flow->hash_fields,
-			},
-			.rwq_ind_tbl = rte_flow->ind_table->ind_table,
-			.pd = priv->pd
-		});
-	if (!rte_flow->qp) {
+	rte_flow->frxq.hrxq = mlx5_priv_hrxq_new(priv, rss_hash_default_key,
+						 rss_hash_default_key_len,
+						 flow->hash_fields,
+						 (*rte_flow->queues),
+						 rte_flow->queues_n);
+	if (!rte_flow->frxq.hrxq) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate QP");
+				   NULL, "cannot create hash rxq");
 		goto error;
 	}
+	for (i = 0; i != flow->actions.queues_n; ++i) {
+		struct mlx5_rxq_data *q =
+			(*priv->rxqs)[flow->actions.queues[i]];
+
+		q->mark |= flow->actions.mark;
+	}
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
-	rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
+	rte_flow->ibv_flow = ibv_create_flow(rte_flow->frxq.hrxq->qp,
 					     rte_flow->ibv_attr);
 	if (!rte_flow->ibv_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
@@ -1156,10 +1158,8 @@ priv_flow_create_action_queue(struct priv *priv,
 	return rte_flow;
 error:
 	assert(rte_flow);
-	if (rte_flow->qp)
-		ibv_destroy_qp(rte_flow->qp);
-	if (rte_flow->ind_table)
-		mlx5_priv_ind_table_ibv_release(priv, rte_flow->ind_table);
+	if (rte_flow->frxq.hrxq)
+		mlx5_priv_hrxq_release(priv, rte_flow->frxq.hrxq);
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -1277,45 +1277,43 @@ priv_flow_destroy(struct priv *priv,
 		  struct rte_flow *flow)
 {
 	unsigned int i;
+	uint16_t *queues;
+	uint16_t queues_n;
 
-	TAILQ_REMOVE(&priv->flows, flow, next);
-	if (flow->ibv_flow)
-		claim_zero(ibv_destroy_flow(flow->ibv_flow));
-	if (flow->drop)
+	if (flow->drop || !flow->mark)
 		goto free;
-	if (flow->qp)
-		claim_zero(ibv_destroy_qp(flow->qp));
-	for (i = 0; i != flow->ind_table->queues_n; ++i) {
+	queues = flow->frxq.hrxq->ind_table->queues;
+	queues_n = flow->frxq.hrxq->ind_table->queues_n;
+	for (i = 0; i != queues_n; ++i) {
 		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq_data =
-			(*priv->rxqs)[flow->ind_table->queues[i]];
+		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[queues[i]];
+		int mark = 0;
 
 		/*
 		 * To remove the mark from the queue, the queue must not be
 		 * present in any other marked flow (RSS or not).
 		 */
-		if (flow->mark) {
-			int mark = 0;
-
-			TAILQ_FOREACH(tmp, &priv->flows, next) {
-				unsigned int j;
-
-				if (tmp->drop)
-					continue;
-				if (!tmp->mark)
-					continue;
-				for (j = 0;
-				     (j != tmp->ind_table->queues_n) && !mark;
-				     j++)
-					if (tmp->ind_table->queues[j] ==
-					    flow->ind_table->queues[i])
-						mark = 1;
-			}
-			rxq_data->mark = mark;
+		TAILQ_FOREACH(tmp, &priv->flows, next) {
+			unsigned int j;
+
+			if (!tmp->mark)
+				continue;
+			for (j = 0;
+			     (j != tmp->frxq.hrxq->ind_table->queues_n) &&
+			     !mark;
+			     j++)
+				if (tmp->frxq.hrxq->ind_table->queues[j] ==
+				    queues[i])
+					mark = 1;
 		}
+		rxq_data->mark = mark;
 	}
-	mlx5_priv_ind_table_ibv_release(priv, flow->ind_table);
 free:
+	if (flow->ibv_flow)
+		claim_zero(ibv_destroy_flow(flow->ibv_flow));
+	if (!flow->drop)
+		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
+	TAILQ_REMOVE(&priv->flows, flow, next);
 	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
 	rte_free(flow);
@@ -1389,7 +1387,7 @@ mlx5_flow_flush(struct rte_eth_dev *dev,
 static int
 priv_flow_create_drop_queue(struct priv *priv)
 {
-	struct rte_flow_drop *fdq = NULL;
+	struct mlx5_hrxq_drop *fdq = NULL;
 
 	assert(priv->pd);
 	assert(priv->ctx);
@@ -1472,7 +1470,7 @@ priv_flow_create_drop_queue(struct priv *priv)
 static void
 priv_flow_delete_drop_queue(struct priv *priv)
 {
-	struct rte_flow_drop *fdq = priv->flow_drop_queue;
+	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
 
 	if (!fdq)
 		return;
@@ -1504,9 +1502,12 @@ priv_flow_stop(struct priv *priv)
 	TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
 		claim_zero(ibv_destroy_flow(flow->ibv_flow));
 		flow->ibv_flow = NULL;
+		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
+		flow->frxq.hrxq = NULL;
 		if (flow->mark) {
 			unsigned int n;
-			struct mlx5_ind_table_ibv *ind_tbl = flow->ind_table;
+			struct mlx5_ind_table_ibv *ind_tbl =
+				flow->frxq.hrxq->ind_table;
 
 			for (n = 0; n < ind_tbl->queues_n; ++n)
 				(*priv->rxqs)[ind_tbl->queues[n]]->mark = 0;
@@ -1535,13 +1536,31 @@ priv_flow_start(struct priv *priv)
 	if (ret)
 		return -1;
 	TAILQ_FOREACH(flow, &priv->flows, next) {
-		struct ibv_qp *qp;
-
-		if (flow->drop)
-			qp = priv->flow_drop_queue->qp;
-		else
-			qp = flow->qp;
-		flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
+		if (flow->frxq.hrxq)
+			goto flow_create;
+		flow->frxq.hrxq =
+			mlx5_priv_hrxq_get(priv, rss_hash_default_key,
+					   rss_hash_default_key_len,
+					   flow->frxq.hash_fields,
+					   (*flow->queues),
+					   flow->queues_n);
+		if (flow->frxq.hrxq)
+			goto flow_create;
+		flow->frxq.hrxq =
+			mlx5_priv_hrxq_new(priv, rss_hash_default_key,
+					   rss_hash_default_key_len,
+					   flow->frxq.hash_fields,
+					   (*flow->queues),
+					   flow->queues_n);
+		if (!flow->frxq.hrxq) {
+			DEBUG("Flow %p cannot be applied",
+			      (void *)flow);
+			rte_errno = EINVAL;
+			return rte_errno;
+		}
+flow_create:
+		flow->ibv_flow = ibv_create_flow(flow->frxq.hrxq->qp,
+						 flow->ibv_attr);
 		if (!flow->ibv_flow) {
 			DEBUG("Flow %p cannot be applied", (void *)flow);
 			rte_errno = EINVAL;
@@ -1551,8 +1570,11 @@ priv_flow_start(struct priv *priv)
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->ind_table->queues_n; ++n) {
-				uint16_t idx = flow->ind_table->queues[n];
+			for (n = 0;
+			     n < flow->frxq.hrxq->ind_table->queues_n;
+			     ++n) {
+				uint16_t idx =
+					flow->frxq.hrxq->ind_table->queues[n];
 				(*priv->rxqs)[idx]->mark = 1;
 			}
 		}
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 4a53282..b240c16 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1775,3 +1775,168 @@ mlx5_priv_ind_table_ibv_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Create an Rx Hash queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rss_key
+ *   RSS key for the Rx hash queue.
+ * @param rss_key_len
+ *   RSS key length.
+ * @param hash_fields
+ *   Verbs protocol hash field to make the RSS on.
+ * @param queues
+ *   Queues entering in hash queue.
+ * @param queues_n
+ *   Number of queues.
+ *
+ * @return
+ *   An hash Rx queue on success.
+ */
+struct mlx5_hrxq*
+mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
+		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+{
+	struct mlx5_hrxq *hrxq;
+	struct mlx5_ind_table_ibv *ind_tbl;
+	struct ibv_qp *qp;
+
+	ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
+	if (!ind_tbl)
+		ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
+	if (!ind_tbl)
+		return NULL;
+	qp = ibv_create_qp_ex(
+		priv->ctx,
+		&(struct ibv_qp_init_attr_ex){
+			.qp_type = IBV_QPT_RAW_PACKET,
+			.comp_mask =
+				IBV_QP_INIT_ATTR_PD |
+				IBV_QP_INIT_ATTR_IND_TABLE |
+				IBV_QP_INIT_ATTR_RX_HASH,
+			.rx_hash_conf = (struct ibv_rx_hash_conf){
+				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
+				.rx_hash_key_len = rss_key_len,
+				.rx_hash_key = rss_key,
+				.rx_hash_fields_mask = hash_fields,
+			},
+			.rwq_ind_tbl = ind_tbl->ind_table,
+			.pd = priv->pd,
+		});
+	if (!qp)
+		goto error;
+	hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
+	if (!hrxq)
+		goto error;
+	hrxq->ind_table = ind_tbl;
+	hrxq->qp = qp;
+	hrxq->rss_key_len = rss_key_len;
+	hrxq->hash_fields = hash_fields;
+	memcpy(hrxq->rss_key, rss_key, rss_key_len);
+	rte_atomic32_inc(&hrxq->refcnt);
+	LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
+	DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+	return hrxq;
+error:
+	mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
+	if (qp)
+		claim_zero(ibv_destroy_qp(qp));
+	return NULL;
+}
+
+/**
+ * Get an Rx Hash queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rss_conf
+ *   RSS configuration for the Rx hash queue.
+ * @param queues
+ *   Queues entering in hash queue.
+ * @param queues_n
+ *   Number of queues.
+ *
+ * @return
+ *   An hash Rx queue on success.
+ */
+struct mlx5_hrxq*
+mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
+		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+{
+	struct mlx5_hrxq *hrxq;
+
+	LIST_FOREACH(hrxq, &priv->hrxqs, next) {
+		struct mlx5_ind_table_ibv *ind_tbl;
+
+		if (hrxq->rss_key_len != rss_key_len)
+			continue;
+		if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
+			continue;
+		if (hrxq->hash_fields != hash_fields)
+			continue;
+		ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
+		if (!ind_tbl)
+			continue;
+		if (ind_tbl != hrxq->ind_table) {
+			mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
+			continue;
+		}
+		rte_atomic32_inc(&hrxq->refcnt);
+		DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+		      (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+		return hrxq;
+	}
+	return NULL;
+}
+
+/**
+ * Release the hash Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param hrxq
+ *   Pointer to Hash Rx queue to release.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq)
+{
+	DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+	if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
+		claim_zero(ibv_destroy_qp(hrxq->qp));
+		mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table);
+		LIST_REMOVE(hrxq, next);
+		rte_free(hrxq);
+		return 0;
+	}
+	claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table));
+	return EBUSY;
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_hrxq_ibv_verify(struct priv *priv)
+{
+	struct mlx5_hrxq *hrxq;
+	int ret = 0;
+
+	LIST_FOREACH(hrxq, &priv->hrxqs, next) {
+		DEBUG("%p: Verbs Hash Rx queue %p still referenced",
+		      (void *)priv, (void *)hrxq);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index b7c75bf..bb0a65d 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -165,6 +165,17 @@ struct mlx5_ind_table_ibv {
 	uint16_t queues[]; /**< Queue list. */
 };
 
+/* Hash Rx queue. */
+struct mlx5_hrxq {
+	LIST_ENTRY(mlx5_hrxq) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct mlx5_ind_table_ibv *ind_table; /* Indirection table. */
+	struct ibv_qp *qp; /* Verbs queue pair. */
+	uint64_t hash_fields; /* Verbs Hash fields. */
+	uint8_t rss_key_len; /* Hash key length in bytes. */
+	uint8_t rss_key[]; /* Hash key. */
+};
+
 /* Hash RX queue types. */
 enum hash_rxq_type {
 	HASH_RXQ_TCPV4,
@@ -362,6 +373,12 @@ struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_get(struct priv *,
 						       uint16_t);
 int mlx5_priv_ind_table_ibv_release(struct priv *, struct mlx5_ind_table_ibv *);
 int mlx5_priv_ind_table_ibv_verify(struct priv *);
+struct mlx5_hrxq *mlx5_priv_hrxq_new(struct priv *, uint8_t *, uint8_t,
+				     uint64_t, uint16_t [], uint16_t);
+struct mlx5_hrxq *mlx5_priv_hrxq_get(struct priv *, uint8_t *, uint8_t,
+				     uint64_t, uint16_t [], uint16_t);
+int mlx5_priv_hrxq_release(struct priv *, struct mlx5_hrxq *);
+int mlx5_priv_hrxq_ibv_verify(struct priv *);
 
 /* mlx5_txq.c */
 
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 16/30] net/mlx5: fix clang compilation error
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (40 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 15/30] net/mlx5: add Hash Rx queue object Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 17/30] net/mlx5: use flow to enable promiscuous mode Nelio Laranjeiro
                   ` (13 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit, stable
drivers/net/mlx5/mlx5_rxq.c:606:6: error: comparison of constant 4
      with expression of type 'enum hash_rxq_flow_type' is always true
      [-Werror,-Wtautological-constant-out-of-range-compare]
                        i != (int)RTE_DIM((*priv->hash_rxqs)[0].special_flow);
                        ~ ^  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Clang expects to have an index going upto special_flow size which is
defined by MLX5_MAX_SPECIAL_FLOWS and value is 4.  Comparing to an
unrelated enum where index my be lower cause this compilation issue.
Fixes: 36351ea34b92 ("net/mlx: fix build with icc")
Cc: ferruh.yigit@intel.com
Cc: stable@dpdk.org
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index b240c16..81e9eb5 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -606,11 +606,9 @@ priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
 int
 priv_rehash_flows(struct priv *priv)
 {
-	enum hash_rxq_flow_type i;
+	size_t i;
 
-	for (i = HASH_RXQ_FLOW_TYPE_PROMISC;
-			i != RTE_DIM((*priv->hash_rxqs)[0].special_flow);
-			++i)
+	for (i = 0; i != RTE_DIM((*priv->hash_rxqs)[0].special_flow); ++i)
 		if (!priv_allow_flow_type(priv, i)) {
 			priv_special_flow_disable(priv, i);
 		} else {
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 17/30] net/mlx5: use flow to enable promiscuous mode
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (41 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 16/30] net/mlx5: fix clang compilation error Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 18/30] net/mlx5: use flow to enable all multi mode Nelio Laranjeiro
                   ` (12 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
RSS hash configuration is currently ignored by the PMD, this commits
removes the RSS feature on promiscuous mode.
This functionality will be added in a later commit.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         |   3 +-
 drivers/net/mlx5/mlx5.h         |  15 +++--
 drivers/net/mlx5/mlx5_flow.c    | 141 ++++++++++++++++++++++++++++++++++------
 drivers/net/mlx5/mlx5_rxmode.c  |  52 +++++----------
 drivers/net/mlx5/mlx5_rxq.c     |   6 --
 drivers/net/mlx5/mlx5_rxtx.h    |   3 -
 drivers/net/mlx5/mlx5_trigger.c |  19 ++++--
 7 files changed, 166 insertions(+), 73 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index fd8138b..97d6a21 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -201,7 +201,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
-
+	priv_flow_flush(priv, &priv->flows);
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
 	dev->tx_pkt_burst = removed_tx_burst;
@@ -884,6 +884,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		priv->dev = eth_dev;
 		eth_dev->dev_ops = &mlx5_dev_ops;
 		TAILQ_INIT(&priv->flows);
+		TAILQ_INIT(&priv->ctrl_flows);
 
 		/* Hint libmlx5 to use PMD allocator for data plane resources */
 		struct mlx5dv_ctx_allocators alctr = {
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 77413c9..2699917 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -39,6 +39,7 @@
 #include <limits.h>
 #include <net/if.h>
 #include <netinet/in.h>
+#include <sys/queue.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -86,6 +87,9 @@ struct mlx5_xstats_ctrl {
 	uint64_t base[MLX5_MAX_XSTATS];
 };
 
+/* Flow list . */
+TAILQ_HEAD(mlx5_flows, rte_flow);
+
 struct priv {
 	struct rte_eth_dev *dev; /* Ethernet device of master process. */
 	struct ibv_context *ctx; /* Verbs context. */
@@ -104,7 +108,6 @@ struct priv {
 	/* Device properties. */
 	uint16_t mtu; /* Configured MTU. */
 	uint8_t port; /* Physical port number. */
-	unsigned int promisc_req:1; /* Promiscuous mode requested. */
 	unsigned int allmulti_req:1; /* All multicast mode requested. */
 	unsigned int hw_csum:1; /* Checksum offload is supported. */
 	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
@@ -145,7 +148,8 @@ struct priv {
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
 	struct mlx5_hrxq_drop *flow_drop_queue; /* Flow drop queue. */
-	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
+	struct mlx5_flows flows; /* RTE Flow rules. */
+	struct mlx5_flows ctrl_flows; /* Control flow rules. */
 	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
 	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
@@ -293,11 +297,14 @@ struct rte_flow *mlx5_flow_create(struct rte_eth_dev *,
 				  struct rte_flow_error *);
 int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
 		      struct rte_flow_error *);
+void priv_flow_flush(struct priv *, struct mlx5_flows *);
 int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
 int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
-int priv_flow_start(struct priv *);
-void priv_flow_stop(struct priv *);
+int priv_flow_start(struct priv *, struct mlx5_flows *);
+void priv_flow_stop(struct priv *, struct mlx5_flows *);
 int priv_flow_verify(struct priv *);
+int mlx5_ctrl_flow(struct rte_eth_dev *, struct rte_flow_item_eth *,
+		   struct rte_flow_item_eth *, unsigned int);
 
 /* mlx5_socket.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 4948882..8512905 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -52,6 +52,9 @@
 #include "mlx5.h"
 #include "mlx5_prm.h"
 
+/* Define minimal priority for control plane flows. */
+#define MLX5_CTRL_FLOW_PRIORITY 4
+
 static int
 mlx5_flow_create_eth(const struct rte_flow_item *item,
 		     const void *default_mask,
@@ -451,7 +454,7 @@ priv_flow_validate(struct priv *priv,
 				   "groups are not supported");
 		return -rte_errno;
 	}
-	if (attr->priority) {
+	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
 		rte_flow_error_set(error, ENOTSUP,
 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
 				   NULL,
@@ -1169,6 +1172,8 @@ priv_flow_create_action_queue(struct priv *priv,
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  * @param[in] attr
  *   Flow rule attributes.
  * @param[in] pattern
@@ -1183,6 +1188,7 @@ priv_flow_create_action_queue(struct priv *priv,
  */
 static struct rte_flow *
 priv_flow_create(struct priv *priv,
+		 struct mlx5_flows *list,
 		 const struct rte_flow_attr *attr,
 		 const struct rte_flow_item items[],
 		 const struct rte_flow_action actions[],
@@ -1232,6 +1238,10 @@ priv_flow_create(struct priv *priv,
 		rte_flow = priv_flow_create_action_queue(priv, &flow, error);
 	if (!rte_flow)
 		goto exit;
+	if (rte_flow) {
+		TAILQ_INSERT_TAIL(list, rte_flow, next);
+		DEBUG("Flow created %p", (void *)rte_flow);
+	}
 	return rte_flow;
 exit:
 	rte_free(flow.ibv_attr);
@@ -1255,11 +1265,8 @@ mlx5_flow_create(struct rte_eth_dev *dev,
 	struct rte_flow *flow;
 
 	priv_lock(priv);
-	flow = priv_flow_create(priv, attr, items, actions, error);
-	if (flow) {
-		TAILQ_INSERT_TAIL(&priv->flows, flow, next);
-		DEBUG("Flow created %p", (void *)flow);
-	}
+	flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
+				error);
 	priv_unlock(priv);
 	return flow;
 }
@@ -1269,11 +1276,14 @@ mlx5_flow_create(struct rte_eth_dev *dev,
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  * @param[in] flow
  *   Flow to destroy.
  */
 static void
 priv_flow_destroy(struct priv *priv,
+		  struct mlx5_flows *list,
 		  struct rte_flow *flow)
 {
 	unsigned int i;
@@ -1293,7 +1303,7 @@ priv_flow_destroy(struct priv *priv,
 		 * To remove the mark from the queue, the queue must not be
 		 * present in any other marked flow (RSS or not).
 		 */
-		TAILQ_FOREACH(tmp, &priv->flows, next) {
+		TAILQ_FOREACH(tmp, list, next) {
 			unsigned int j;
 
 			if (!tmp->mark)
@@ -1313,7 +1323,7 @@ priv_flow_destroy(struct priv *priv,
 		claim_zero(ibv_destroy_flow(flow->ibv_flow));
 	if (!flow->drop)
 		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
-	TAILQ_REMOVE(&priv->flows, flow, next);
+	TAILQ_REMOVE(list, flow, next);
 	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
 	rte_free(flow);
@@ -1334,7 +1344,7 @@ mlx5_flow_destroy(struct rte_eth_dev *dev,
 
 	(void)error;
 	priv_lock(priv);
-	priv_flow_destroy(priv, flow);
+	priv_flow_destroy(priv, &priv->flows, flow);
 	priv_unlock(priv);
 	return 0;
 }
@@ -1344,15 +1354,17 @@ mlx5_flow_destroy(struct rte_eth_dev *dev,
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  */
-static void
-priv_flow_flush(struct priv *priv)
+void
+priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
 {
-	while (!TAILQ_EMPTY(&priv->flows)) {
+	while (!TAILQ_EMPTY(list)) {
 		struct rte_flow *flow;
 
-		flow = TAILQ_FIRST(&priv->flows);
-		priv_flow_destroy(priv, flow);
+		flow = TAILQ_FIRST(list);
+		priv_flow_destroy(priv, list, flow);
 	}
 }
 
@@ -1370,7 +1382,7 @@ mlx5_flow_flush(struct rte_eth_dev *dev,
 
 	(void)error;
 	priv_lock(priv);
-	priv_flow_flush(priv);
+	priv_flow_flush(priv, &priv->flows);
 	priv_unlock(priv);
 	return 0;
 }
@@ -1493,13 +1505,15 @@ priv_flow_delete_drop_queue(struct priv *priv)
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  */
 void
-priv_flow_stop(struct priv *priv)
+priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 {
 	struct rte_flow *flow;
 
-	TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
+	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
 		claim_zero(ibv_destroy_flow(flow->ibv_flow));
 		flow->ibv_flow = NULL;
 		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
@@ -1522,12 +1536,14 @@ priv_flow_stop(struct priv *priv)
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  *
  * @return
  *   0 on success, a errno value otherwise and rte_errno is set.
  */
 int
-priv_flow_start(struct priv *priv)
+priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 {
 	int ret;
 	struct rte_flow *flow;
@@ -1535,7 +1551,7 @@ priv_flow_start(struct priv *priv)
 	ret = priv_flow_create_drop_queue(priv);
 	if (ret)
 		return -1;
-	TAILQ_FOREACH(flow, &priv->flows, next) {
+	TAILQ_FOREACH(flow, list, next) {
 		if (flow->frxq.hrxq)
 			goto flow_create;
 		flow->frxq.hrxq =
@@ -1630,3 +1646,90 @@ priv_flow_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Enable/disable a control flow configured from the control plane.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param spec
+ *   An Ethernet flow spec to apply.
+ * @param mask
+ *   An Ethernet flow mask to apply.
+ * @param enable
+ *   Enable/disable the flow.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+mlx5_ctrl_flow(struct rte_eth_dev *dev,
+	       struct rte_flow_item_eth *spec,
+	       struct rte_flow_item_eth *mask,
+	       unsigned int enable)
+{
+	struct priv *priv = dev->data->dev_private;
+	const struct rte_flow_attr attr = {
+		.ingress = 1,
+		.priority = MLX5_CTRL_FLOW_PRIORITY,
+	};
+	struct rte_flow_item items[] = {
+		{
+			.type = RTE_FLOW_ITEM_TYPE_ETH,
+			.spec = spec,
+			.last = NULL,
+			.mask = mask,
+		},
+		{
+			.type = RTE_FLOW_ITEM_TYPE_END,
+		},
+	};
+	struct rte_flow_action actions[] = {
+		{
+			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
+			.conf = &(struct rte_flow_action_queue){
+				.index = 0,
+			},
+		},
+		{
+			.type = RTE_FLOW_ACTION_TYPE_END,
+		},
+	};
+	struct rte_flow *flow;
+	struct rte_flow_error error;
+
+	if (enable) {
+		flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items,
+					actions, &error);
+		if (!flow)
+			return 1;
+	} else {
+		struct spec {
+			struct ibv_flow_attr ibv_attr;
+			struct ibv_flow_spec_eth eth;
+		} spec;
+		struct mlx5_flow_parse parser = {
+			.ibv_attr = &spec.ibv_attr,
+			.offset = sizeof(struct ibv_flow_attr),
+		};
+		struct ibv_flow_spec_eth *eth;
+		const unsigned int attr_size = sizeof(struct ibv_flow_attr);
+
+		claim_zero(mlx5_flow_create_eth(&items[0], NULL, &parser));
+		TAILQ_FOREACH(flow, &priv->ctrl_flows, next) {
+			eth = (void *)((uintptr_t)flow->ibv_attr + attr_size);
+			assert(eth->type == IBV_FLOW_SPEC_ETH);
+			if (!memcmp(eth, &spec.eth, sizeof(*eth)))
+				break;
+		}
+		if (flow) {
+			claim_zero(ibv_destroy_flow(flow->ibv_flow));
+			mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
+			rte_free(flow->ibv_attr);
+			DEBUG("Control flow destroyed %p", (void *)flow);
+			TAILQ_REMOVE(&priv->ctrl_flows, flow, next);
+			rte_free(flow);
+		}
+	}
+	return 0;
+}
diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index e9ea2aa..f469f41 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -53,20 +53,6 @@
 
 /* Initialization data for special flows. */
 static const struct special_flow_init special_flow_init[] = {
-	[HASH_RXQ_FLOW_TYPE_PROMISC] = {
-		.dst_mac_val = "\x00\x00\x00\x00\x00\x00",
-		.dst_mac_mask = "\x00\x00\x00\x00\x00\x00",
-		.hash_types =
-			1 << HASH_RXQ_TCPV4 |
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_TCPV6 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 0,
-	},
 	[HASH_RXQ_FLOW_TYPE_ALLMULTI] = {
 		.dst_mac_val = "\x01\x00\x00\x00\x00\x00",
 		.dst_mac_mask = "\x01\x00\x00\x00\x00\x00",
@@ -346,7 +332,7 @@ priv_special_flow_enable_all(struct priv *priv)
 
 	if (priv->isolated)
 		return 0;
-	for (flow_type = HASH_RXQ_FLOW_TYPE_PROMISC;
+	for (flow_type = HASH_RXQ_FLOW_TYPE_ALLMULTI;
 			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
 			++flow_type) {
 		int ret;
@@ -373,7 +359,7 @@ priv_special_flow_disable_all(struct priv *priv)
 {
 	enum hash_rxq_flow_type flow_type;
 
-	for (flow_type = HASH_RXQ_FLOW_TYPE_PROMISC;
+	for (flow_type = HASH_RXQ_FLOW_TYPE_ALLMULTI;
 			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
 			++flow_type)
 		priv_special_flow_disable(priv, flow_type);
@@ -388,19 +374,16 @@ priv_special_flow_disable_all(struct priv *priv)
 void
 mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
+	struct rte_flow_item_eth eth = {
+		.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		.type = 0,
+	};
 
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->promisc_req = 1;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while enabling promiscuous mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->promiscuous = 1;
+	claim_zero(mlx5_ctrl_flow(dev, ð, ð, 1));
 }
 
 /**
@@ -412,19 +395,16 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 void
 mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
+	struct rte_flow_item_eth eth = {
+		.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		.type = 0,
+	};
 
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->promisc_req = 0;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while disabling promiscuous mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->promiscuous = 0;
+	claim_zero(mlx5_ctrl_flow(dev, ð, ð, 0));
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 81e9eb5..d3d1355 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -571,13 +571,7 @@ priv_destroy_hash_rxqs(struct priv *priv)
 int
 priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
 {
-	/* Only FLOW_TYPE_PROMISC is allowed when promiscuous mode
-	 * has been requested. */
-	if (priv->promisc_req)
-		return type == HASH_RXQ_FLOW_TYPE_PROMISC;
 	switch (type) {
-	case HASH_RXQ_FLOW_TYPE_PROMISC:
-		return !!priv->promisc_req;
 	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
 		return !!priv->allmulti_req;
 	case HASH_RXQ_FLOW_TYPE_BROADCAST:
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index bb0a65d..ffba64e 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -237,7 +237,6 @@ struct special_flow_init {
 };
 
 enum hash_rxq_flow_type {
-	HASH_RXQ_FLOW_TYPE_PROMISC,
 	HASH_RXQ_FLOW_TYPE_ALLMULTI,
 	HASH_RXQ_FLOW_TYPE_BROADCAST,
 	HASH_RXQ_FLOW_TYPE_IPV6MULTI,
@@ -249,8 +248,6 @@ static inline const char *
 hash_rxq_flow_type_str(enum hash_rxq_flow_type flow_type)
 {
 	switch (flow_type) {
-	case HASH_RXQ_FLOW_TYPE_PROMISC:
-		return "promiscuous";
 	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
 		return "allmulticast";
 	case HASH_RXQ_FLOW_TYPE_BROADCAST:
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index a311499..085abcc 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -163,7 +163,16 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		      (void *)priv, strerror(err));
 		goto error;
 	}
-	err = priv_flow_start(priv);
+	if (dev->data->promiscuous)
+		mlx5_promiscuous_enable(dev);
+	err = priv_flow_start(priv, &priv->ctrl_flows);
+	if (err) {
+		ERROR("%p: an error occurred while configuring control flows:"
+		      " %s",
+		      (void *)priv, strerror(err));
+		goto error;
+	}
+	err = priv_flow_start(priv, &priv->flows);
 	if (err) {
 		ERROR("%p: an error occurred while configuring flows:"
 		      " %s",
@@ -187,7 +196,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
-	priv_flow_stop(priv);
+	priv_flow_stop(priv, &priv->flows);
+	priv_flow_flush(priv, &priv->ctrl_flows);
 	priv_rxq_stop(priv);
 	priv_txq_stop(priv);
 	priv_unlock(priv);
@@ -222,13 +232,14 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_special_flow_disable_all(priv);
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
-	priv_flow_stop(priv);
+	priv_flow_stop(priv, &priv->flows);
+	priv_flow_flush(priv, &priv->ctrl_flows);
 	priv_rx_intr_vec_disable(priv);
+	priv_dev_interrupt_handler_uninstall(priv, dev);
 	priv_txq_stop(priv);
 	priv_rxq_stop(priv);
 	LIST_FOREACH(mr, &priv->mr, next) {
 		priv_mr_release(priv, mr);
 	}
-	priv_dev_interrupt_handler_uninstall(priv, dev);
 	priv_unlock(priv);
 }
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 18/30] net/mlx5: use flow to enable all multi mode
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (42 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 17/30] net/mlx5: use flow to enable promiscuous mode Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 19/30] net/mlx5: use flow to enable unicast traffic Nelio Laranjeiro
                   ` (11 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
RSS hash configuration is currently ignored by the PMD, this commits
removes the RSS feature on promiscuous mode.
This functionality will be added in a later commit.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.h         |  1 -
 drivers/net/mlx5/mlx5_rxmode.c  | 52 ++++++++++++++---------------------------
 drivers/net/mlx5/mlx5_rxq.c     |  7 ++----
 drivers/net/mlx5/mlx5_rxtx.h    |  3 ---
 drivers/net/mlx5/mlx5_trigger.c |  2 ++
 5 files changed, 22 insertions(+), 43 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 2699917..45673b1 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -108,7 +108,6 @@ struct priv {
 	/* Device properties. */
 	uint16_t mtu; /* Configured MTU. */
 	uint8_t port; /* Physical port number. */
-	unsigned int allmulti_req:1; /* All multicast mode requested. */
 	unsigned int hw_csum:1; /* Checksum offload is supported. */
 	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
 	unsigned int hw_vlan_strip:1; /* VLAN stripping is supported. */
diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index f469f41..0c75889 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -53,18 +53,6 @@
 
 /* Initialization data for special flows. */
 static const struct special_flow_init special_flow_init[] = {
-	[HASH_RXQ_FLOW_TYPE_ALLMULTI] = {
-		.dst_mac_val = "\x01\x00\x00\x00\x00\x00",
-		.dst_mac_mask = "\x01\x00\x00\x00\x00\x00",
-		.hash_types =
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 0,
-	},
 	[HASH_RXQ_FLOW_TYPE_BROADCAST] = {
 		.dst_mac_val = "\xff\xff\xff\xff\xff\xff",
 		.dst_mac_mask = "\xff\xff\xff\xff\xff\xff",
@@ -332,7 +320,7 @@ priv_special_flow_enable_all(struct priv *priv)
 
 	if (priv->isolated)
 		return 0;
-	for (flow_type = HASH_RXQ_FLOW_TYPE_ALLMULTI;
+	for (flow_type = HASH_RXQ_FLOW_TYPE_BROADCAST;
 			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
 			++flow_type) {
 		int ret;
@@ -359,7 +347,7 @@ priv_special_flow_disable_all(struct priv *priv)
 {
 	enum hash_rxq_flow_type flow_type;
 
-	for (flow_type = HASH_RXQ_FLOW_TYPE_ALLMULTI;
+	for (flow_type = HASH_RXQ_FLOW_TYPE_BROADCAST;
 			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
 			++flow_type)
 		priv_special_flow_disable(priv, flow_type);
@@ -416,19 +404,17 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
+	struct rte_flow_item_eth eth = {
+		.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+		.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+		.type = 0,
+	};
 
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->allmulti_req = 1;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while enabling allmulticast mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->all_multicast = 1;
+	if (dev->data->dev_started)
+		claim_zero(mlx5_ctrl_flow(dev, ð, ð, 1));
 }
 
 /**
@@ -440,17 +426,15 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_disable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
+	struct rte_flow_item_eth eth = {
+		.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+		.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+		.type = 0,
+	};
 
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->allmulti_req = 0;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while disabling allmulticast mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->all_multicast = 0;
+	if (dev->data->dev_started)
+		claim_zero(mlx5_ctrl_flow(dev, ð, ð, 0));
 }
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index d3d1355..d3cd58e 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -571,16 +571,13 @@ priv_destroy_hash_rxqs(struct priv *priv)
 int
 priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
 {
+	(void)priv;
 	switch (type) {
-	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
-		return !!priv->allmulti_req;
 	case HASH_RXQ_FLOW_TYPE_BROADCAST:
 	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
-		/* If allmulti is enabled, broadcast and ipv6multi
-		 * are unnecessary. */
-		return !priv->allmulti_req;
 	case HASH_RXQ_FLOW_TYPE_MAC:
 		return 1;
+		return 1;
 	default:
 		/* Unsupported flow type is not allowed. */
 		return 0;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index ffba64e..6f474d2 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -237,7 +237,6 @@ struct special_flow_init {
 };
 
 enum hash_rxq_flow_type {
-	HASH_RXQ_FLOW_TYPE_ALLMULTI,
 	HASH_RXQ_FLOW_TYPE_BROADCAST,
 	HASH_RXQ_FLOW_TYPE_IPV6MULTI,
 	HASH_RXQ_FLOW_TYPE_MAC,
@@ -248,8 +247,6 @@ static inline const char *
 hash_rxq_flow_type_str(enum hash_rxq_flow_type flow_type)
 {
 	switch (flow_type) {
-	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
-		return "allmulticast";
 	case HASH_RXQ_FLOW_TYPE_BROADCAST:
 		return "broadcast";
 	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 085abcc..27e7890 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -165,6 +165,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	}
 	if (dev->data->promiscuous)
 		mlx5_promiscuous_enable(dev);
+	else if (dev->data->all_multicast)
+		mlx5_allmulticast_enable(dev);
 	err = priv_flow_start(priv, &priv->ctrl_flows);
 	if (err) {
 		ERROR("%p: an error occurred while configuring control flows:"
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 19/30] net/mlx5: use flow to enable unicast traffic
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (43 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 18/30] net/mlx5: use flow to enable all multi mode Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 20/30] net/mlx5: handle a single RSS hash key for all protocols Nelio Laranjeiro
                   ` (10 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
RSS hash configuration is currently ignored by the PMD, this commits
removes the RSS feature.
This functionality will be added in a later commit.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         |  10 +-
 drivers/net/mlx5/mlx5.h         |  29 ++-
 drivers/net/mlx5/mlx5_defs.h    |   3 -
 drivers/net/mlx5/mlx5_flow.c    | 126 ++++++-------
 drivers/net/mlx5/mlx5_mac.c     | 407 +++-------------------------------------
 drivers/net/mlx5/mlx5_rxmode.c  | 336 +--------------------------------
 drivers/net/mlx5/mlx5_rxq.c     |  63 -------
 drivers/net/mlx5/mlx5_rxtx.h    |  26 ---
 drivers/net/mlx5/mlx5_trigger.c | 198 +++++++++++++++++--
 drivers/net/mlx5/mlx5_vlan.c    |  49 ++---
 10 files changed, 295 insertions(+), 952 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 97d6a21..c818cf8 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -198,10 +198,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
 	/* In case mlx5_dev_stop() has not been called. */
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-	priv_special_flow_disable_all(priv);
-	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
-	priv_flow_flush(priv, &priv->flows);
+	priv_dev_traffic_disable(priv, dev);
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
 	dev->tx_pkt_burst = removed_tx_burst;
@@ -843,10 +841,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		     mac.addr_bytes[0], mac.addr_bytes[1],
 		     mac.addr_bytes[2], mac.addr_bytes[3],
 		     mac.addr_bytes[4], mac.addr_bytes[5]);
-		/* Register MAC address. */
-		claim_zero(priv_mac_addr_add(priv, 0,
-					     (const uint8_t (*)[ETHER_ADDR_LEN])
-					     mac.addr_bytes));
 #ifndef NDEBUG
 		{
 			char ifname[IF_NAMESIZE];
@@ -883,6 +877,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		eth_dev->device->driver = &mlx5_driver.driver;
 		priv->dev = eth_dev;
 		eth_dev->dev_ops = &mlx5_dev_ops;
+		/* Register MAC address. */
+		claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 		TAILQ_INIT(&priv->flows);
 		TAILQ_INIT(&priv->ctrl_flows);
 
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 45673b1..e83961f 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -96,13 +96,7 @@ struct priv {
 	struct ibv_device_attr_ex device_attr; /* Device properties. */
 	struct ibv_pd *pd; /* Protection Domain. */
 	char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for secondary */
-	/*
-	 * MAC addresses array and configuration bit-field.
-	 * An extra entry that cannot be modified by the DPDK is reserved
-	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
-	 */
-	struct ether_addr mac[MLX5_MAX_MAC_ADDRESSES];
-	BITFIELD_DECLARE(mac_configured, uint32_t, MLX5_MAX_MAC_ADDRESSES);
+	struct ether_addr mac[MLX5_MAX_MAC_ADDRESSES]; /* MAC addresses. */
 	uint16_t vlan_filter[MLX5_MAX_VLAN_IDS]; /* VLAN filters table. */
 	unsigned int vlan_filter_n; /* Number of configured VLAN filters. */
 	/* Device properties. */
@@ -225,13 +219,7 @@ void priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev);
 /* mlx5_mac.c */
 
 int priv_get_mac(struct priv *, uint8_t (*)[ETHER_ADDR_LEN]);
-void hash_rxq_mac_addrs_del(struct hash_rxq *);
-void priv_mac_addrs_disable(struct priv *);
 void mlx5_mac_addr_remove(struct rte_eth_dev *, uint32_t);
-int hash_rxq_mac_addrs_add(struct hash_rxq *);
-int priv_mac_addr_add(struct priv *, unsigned int,
-		      const uint8_t (*)[ETHER_ADDR_LEN]);
-int priv_mac_addrs_enable(struct priv *);
 int mlx5_mac_addr_add(struct rte_eth_dev *, struct ether_addr *, uint32_t,
 		      uint32_t);
 void mlx5_mac_addr_set(struct rte_eth_dev *, struct ether_addr *);
@@ -250,10 +238,6 @@ int mlx5_dev_rss_reta_update(struct rte_eth_dev *,
 
 /* mlx5_rxmode.c */
 
-int priv_special_flow_enable(struct priv *, enum hash_rxq_flow_type);
-void priv_special_flow_disable(struct priv *, enum hash_rxq_flow_type);
-int priv_special_flow_enable_all(struct priv *);
-void priv_special_flow_disable_all(struct priv *);
 void mlx5_promiscuous_enable(struct rte_eth_dev *);
 void mlx5_promiscuous_disable(struct rte_eth_dev *);
 void mlx5_allmulticast_enable(struct rte_eth_dev *);
@@ -280,6 +264,10 @@ void mlx5_vlan_strip_queue_set(struct rte_eth_dev *, uint16_t, int);
 
 int mlx5_dev_start(struct rte_eth_dev *);
 void mlx5_dev_stop(struct rte_eth_dev *);
+int priv_dev_traffic_enable(struct priv *, struct rte_eth_dev *);
+int priv_dev_traffic_disable(struct priv *, struct rte_eth_dev *);
+int priv_dev_traffic_restart(struct priv *, struct rte_eth_dev *);
+int mlx5_traffic_restart(struct rte_eth_dev *);
 
 /* mlx5_flow.c */
 
@@ -302,8 +290,13 @@ int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
 int priv_flow_start(struct priv *, struct mlx5_flows *);
 void priv_flow_stop(struct priv *, struct mlx5_flows *);
 int priv_flow_verify(struct priv *);
+int mlx5_ctrl_flow_vlan(struct rte_eth_dev *, struct rte_flow_item_eth *,
+			struct rte_flow_item_eth *, struct rte_flow_item_vlan *,
+			struct rte_flow_item_vlan *);
 int mlx5_ctrl_flow(struct rte_eth_dev *, struct rte_flow_item_eth *,
-		   struct rte_flow_item_eth *, unsigned int);
+		   struct rte_flow_item_eth *);
+int priv_flow_create_drop_queue(struct priv *);
+void priv_flow_delete_drop_queue(struct priv *);
 
 /* mlx5_socket.c */
 
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index 59ff00d..3a7706c 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -45,9 +45,6 @@
 /* Maximum number of simultaneous VLAN filters. */
 #define MLX5_MAX_VLAN_IDS 128
 
-/* Maximum number of special flows. */
-#define MLX5_MAX_SPECIAL_FLOWS 4
-
 /*
  * Request TX completion every time descriptors reach this threshold since
  * the previous request. Must be a power of two for performance reasons.
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 8512905..83c75f4 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1128,20 +1128,19 @@ priv_flow_create_action_queue(struct priv *priv,
 						 flow->hash_fields,
 						 (*rte_flow->queues),
 						 rte_flow->queues_n);
-	if (rte_flow->frxq.hrxq) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "duplicated flow");
-		goto error;
-	}
-	rte_flow->frxq.hrxq = mlx5_priv_hrxq_new(priv, rss_hash_default_key,
-						 rss_hash_default_key_len,
-						 flow->hash_fields,
-						 (*rte_flow->queues),
-						 rte_flow->queues_n);
 	if (!rte_flow->frxq.hrxq) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot create hash rxq");
-		goto error;
+		rte_flow->frxq.hrxq =
+			mlx5_priv_hrxq_new(priv, rss_hash_default_key,
+					   rss_hash_default_key_len,
+					   flow->hash_fields,
+					   (*rte_flow->queues),
+					   rte_flow->queues_n);
+		if (!rte_flow->frxq.hrxq) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "cannot create hash rxq");
+			goto error;
+		}
 	}
 	for (i = 0; i != flow->actions.queues_n; ++i) {
 		struct mlx5_rxq_data *q =
@@ -1396,7 +1395,7 @@ mlx5_flow_flush(struct rte_eth_dev *dev,
  * @return
  *   0 on success.
  */
-static int
+int
 priv_flow_create_drop_queue(struct priv *priv)
 {
 	struct mlx5_hrxq_drop *fdq = NULL;
@@ -1479,7 +1478,7 @@ priv_flow_create_drop_queue(struct priv *priv)
  * @param priv
  *   Pointer to private structure.
  */
-static void
+void
 priv_flow_delete_drop_queue(struct priv *priv)
 {
 	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
@@ -1501,8 +1500,6 @@ priv_flow_delete_drop_queue(struct priv *priv)
 /**
  * Remove all flows.
  *
- * Called by dev_stop() to remove all flows.
- *
  * @param priv
  *   Pointer to private structure.
  * @param list
@@ -1528,7 +1525,6 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 		}
 		DEBUG("Flow %p removed", (void *)flow);
 	}
-	priv_flow_delete_drop_queue(priv);
 }
 
 /**
@@ -1545,12 +1541,8 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 int
 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 {
-	int ret;
 	struct rte_flow *flow;
 
-	ret = priv_flow_create_drop_queue(priv);
-	if (ret)
-		return -1;
 	TAILQ_FOREACH(flow, list, next) {
 		if (flow->frxq.hrxq)
 			goto flow_create;
@@ -1648,25 +1640,28 @@ priv_flow_verify(struct priv *priv)
 }
 
 /**
- * Enable/disable a control flow configured from the control plane.
+ * Enable a control flow configured from the control plane.
  *
  * @param dev
  *   Pointer to Ethernet device.
- * @param spec
+ * @param eth_spec
  *   An Ethernet flow spec to apply.
- * @param mask
+ * @param eth_mask
  *   An Ethernet flow mask to apply.
- * @param enable
- *   Enable/disable the flow.
+ * @param vlan_spec
+ *   A VLAN flow spec to apply.
+ * @param vlan_mask
+ *   A VLAN flow mask to apply.
  *
  * @return
  *   0 on success.
  */
 int
-mlx5_ctrl_flow(struct rte_eth_dev *dev,
-	       struct rte_flow_item_eth *spec,
-	       struct rte_flow_item_eth *mask,
-	       unsigned int enable)
+mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
+		    struct rte_flow_item_eth *eth_spec,
+		    struct rte_flow_item_eth *eth_mask,
+		    struct rte_flow_item_vlan *vlan_spec,
+		    struct rte_flow_item_vlan *vlan_mask)
 {
 	struct priv *priv = dev->data->dev_private;
 	const struct rte_flow_attr attr = {
@@ -1676,9 +1671,16 @@ mlx5_ctrl_flow(struct rte_eth_dev *dev,
 	struct rte_flow_item items[] = {
 		{
 			.type = RTE_FLOW_ITEM_TYPE_ETH,
-			.spec = spec,
+			.spec = eth_spec,
+			.last = NULL,
+			.mask = eth_mask,
+		},
+		{
+			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
+				RTE_FLOW_ITEM_TYPE_END,
+			.spec = vlan_spec,
 			.last = NULL,
-			.mask = mask,
+			.mask = vlan_mask,
 		},
 		{
 			.type = RTE_FLOW_ITEM_TYPE_END,
@@ -1698,38 +1700,30 @@ mlx5_ctrl_flow(struct rte_eth_dev *dev,
 	struct rte_flow *flow;
 	struct rte_flow_error error;
 
-	if (enable) {
-		flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items,
-					actions, &error);
-		if (!flow)
-			return 1;
-	} else {
-		struct spec {
-			struct ibv_flow_attr ibv_attr;
-			struct ibv_flow_spec_eth eth;
-		} spec;
-		struct mlx5_flow_parse parser = {
-			.ibv_attr = &spec.ibv_attr,
-			.offset = sizeof(struct ibv_flow_attr),
-		};
-		struct ibv_flow_spec_eth *eth;
-		const unsigned int attr_size = sizeof(struct ibv_flow_attr);
-
-		claim_zero(mlx5_flow_create_eth(&items[0], NULL, &parser));
-		TAILQ_FOREACH(flow, &priv->ctrl_flows, next) {
-			eth = (void *)((uintptr_t)flow->ibv_attr + attr_size);
-			assert(eth->type == IBV_FLOW_SPEC_ETH);
-			if (!memcmp(eth, &spec.eth, sizeof(*eth)))
-				break;
-		}
-		if (flow) {
-			claim_zero(ibv_destroy_flow(flow->ibv_flow));
-			mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
-			rte_free(flow->ibv_attr);
-			DEBUG("Control flow destroyed %p", (void *)flow);
-			TAILQ_REMOVE(&priv->ctrl_flows, flow, next);
-			rte_free(flow);
-		}
-	}
+	flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
+				&error);
+	if (!flow)
+		return rte_errno;
 	return 0;
 }
+
+/**
+ * Enable a flow control configured from the control plane.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param eth_spec
+ *   An Ethernet flow spec to apply.
+ * @param eth_mask
+ *   An Ethernet flow mask to apply.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+mlx5_ctrl_flow(struct rte_eth_dev *dev,
+	       struct rte_flow_item_eth *eth_spec,
+	       struct rte_flow_item_eth *eth_mask)
+{
+	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
+}
diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c
index 086af58..d17b991 100644
--- a/drivers/net/mlx5/mlx5_mac.c
+++ b/drivers/net/mlx5/mlx5_mac.c
@@ -83,112 +83,6 @@ priv_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
 }
 
 /**
- * Delete MAC flow steering rule.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index.
- * @param vlan_index
- *   VLAN index to use.
- */
-static void
-hash_rxq_del_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index,
-		      unsigned int vlan_index)
-{
-#ifndef NDEBUG
-	const uint8_t (*mac)[ETHER_ADDR_LEN] =
-		(const uint8_t (*)[ETHER_ADDR_LEN])
-		hash_rxq->priv->mac[mac_index].addr_bytes;
-#endif
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index]));
-	if (hash_rxq->mac_flow[mac_index][vlan_index] == NULL)
-		return;
-	DEBUG("%p: removing MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
-	      " VLAN index %u",
-	      (void *)hash_rxq,
-	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
-	      mac_index,
-	      vlan_index);
-	claim_zero(ibv_destroy_flow(hash_rxq->mac_flow
-				    [mac_index][vlan_index]));
-	hash_rxq->mac_flow[mac_index][vlan_index] = NULL;
-}
-
-/**
- * Unregister a MAC address from a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index.
- */
-static void
-hash_rxq_mac_addr_del(struct hash_rxq *hash_rxq, unsigned int mac_index)
-{
-	unsigned int i;
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow[mac_index])); ++i)
-		hash_rxq_del_mac_flow(hash_rxq, mac_index, i);
-}
-
-/**
- * Unregister all MAC addresses from a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- */
-void
-hash_rxq_mac_addrs_del(struct hash_rxq *hash_rxq)
-{
-	unsigned int i;
-
-	for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow)); ++i)
-		hash_rxq_mac_addr_del(hash_rxq, i);
-}
-
-/**
- * Unregister a MAC address.
- *
- * This is done for each hash RX queue.
- *
- * @param priv
- *   Pointer to private structure.
- * @param mac_index
- *   MAC address index.
- */
-static void
-priv_mac_addr_del(struct priv *priv, unsigned int mac_index)
-{
-	unsigned int i;
-
-	assert(mac_index < RTE_DIM(priv->mac));
-	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
-		return;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i)
-		hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[i], mac_index);
-	BITFIELD_RESET(priv->mac_configured, mac_index);
-}
-
-/**
- * Unregister all MAC addresses from all hash RX queues.
- *
- * @param priv
- *   Pointer to private structure.
- */
-void
-priv_mac_addrs_disable(struct priv *priv)
-{
-	unsigned int i;
-
-	for (i = 0; (i != priv->hash_rxqs_n); ++i)
-		hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[i]);
-}
-
-/**
  * DPDK callback to remove a MAC address.
  *
  * @param dev
@@ -199,262 +93,12 @@ priv_mac_addrs_disable(struct priv *priv)
 void
 mlx5_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
 {
-	struct priv *priv = dev->data->dev_private;
-
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	DEBUG("%p: removing MAC address from index %" PRIu32,
-	      (void *)dev, index);
-	if (index >= RTE_DIM(priv->mac))
-		goto end;
-	priv_mac_addr_del(priv, index);
-end:
-	priv_unlock(priv);
-}
-
-/**
- * Add MAC flow steering rule.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index to register.
- * @param vlan_index
- *   VLAN index to use.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_add_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index,
-		      unsigned int vlan_index)
-{
-	struct ibv_flow *flow;
-	struct priv *priv = hash_rxq->priv;
-	const uint8_t (*mac)[ETHER_ADDR_LEN] =
-			(const uint8_t (*)[ETHER_ADDR_LEN])
-			priv->mac[mac_index].addr_bytes;
-	FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, hash_rxq->type));
-	struct ibv_flow_attr *attr = &data->attr;
-	struct ibv_flow_spec_eth *spec = &data->spec;
-	unsigned int vlan_enabled = !!priv->vlan_filter_n;
-	unsigned int vlan_id = priv->vlan_filter[vlan_index];
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index]));
-	if (hash_rxq->mac_flow[mac_index][vlan_index] != NULL)
-		return 0;
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
-	priv_flow_attr(priv, attr, sizeof(data), hash_rxq->type);
-	/* The first specification must be Ethernet. */
-	assert(spec->type == IBV_FLOW_SPEC_ETH);
-	assert(spec->size == sizeof(*spec));
-	*spec = (struct ibv_flow_spec_eth){
-		.type = IBV_FLOW_SPEC_ETH,
-		.size = sizeof(*spec),
-		.val = {
-			.dst_mac = {
-				(*mac)[0], (*mac)[1], (*mac)[2],
-				(*mac)[3], (*mac)[4], (*mac)[5]
-			},
-			.vlan_tag = (vlan_enabled ?
-				     rte_cpu_to_be_16(vlan_id)
-				     : 0),
-		},
-		.mask = {
-			.dst_mac = "\xff\xff\xff\xff\xff\xff",
-			.vlan_tag = (vlan_enabled ?
-				     rte_cpu_to_be_16(0xfff) :
-				     0),
-		},
-	};
-	DEBUG("%p: adding MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
-	      " VLAN index %u filtering %s, ID %u",
-	      (void *)hash_rxq,
-	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
-	      mac_index,
-	      vlan_index,
-	      (vlan_enabled ? "enabled" : "disabled"),
-	      vlan_id);
-	/* Create related flow. */
-	errno = 0;
-	flow = ibv_create_flow(hash_rxq->qp, attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)hash_rxq, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-	hash_rxq->mac_flow[mac_index][vlan_index] = flow;
-	return 0;
-}
-
-/**
- * Register a MAC address in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index to register.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_mac_addr_add(struct hash_rxq *hash_rxq, unsigned int mac_index)
-{
-	struct priv *priv = hash_rxq->priv;
-	unsigned int i = 0;
-	int ret;
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	assert(RTE_DIM(hash_rxq->mac_flow[mac_index]) ==
-	       RTE_DIM(priv->vlan_filter));
-	/* Add a MAC address for each VLAN filter, or at least once. */
-	do {
-		ret = hash_rxq_add_mac_flow(hash_rxq, mac_index, i);
-		if (ret) {
-			/* Failure, rollback. */
-			while (i != 0)
-				hash_rxq_del_mac_flow(hash_rxq, mac_index,
-						      --i);
-			return ret;
-		}
-	} while (++i < priv->vlan_filter_n);
-	return 0;
-}
-
-/**
- * Register all MAC addresses in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-hash_rxq_mac_addrs_add(struct hash_rxq *hash_rxq)
-{
-	struct priv *priv = hash_rxq->priv;
-	unsigned int i;
-	int ret;
-
-	assert(RTE_DIM(priv->mac) == RTE_DIM(hash_rxq->mac_flow));
-	for (i = 0; (i != RTE_DIM(priv->mac)); ++i) {
-		if (!BITFIELD_ISSET(priv->mac_configured, i))
-			continue;
-		ret = hash_rxq_mac_addr_add(hash_rxq, i);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			hash_rxq_mac_addr_del(hash_rxq, --i);
-		assert(ret > 0);
-		return ret;
-	}
-	return 0;
-}
-
-/**
- * Register a MAC address.
- *
- * This is done for each hash RX queue.
- *
- * @param priv
- *   Pointer to private structure.
- * @param mac_index
- *   MAC address index to use.
- * @param mac
- *   MAC address to register.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_mac_addr_add(struct priv *priv, unsigned int mac_index,
-		  const uint8_t (*mac)[ETHER_ADDR_LEN])
-{
-	unsigned int i;
-	int ret;
-
-	assert(mac_index < RTE_DIM(priv->mac));
-	/* First, make sure this address isn't already configured. */
-	for (i = 0; (i != RTE_DIM(priv->mac)); ++i) {
-		/* Skip this index, it's going to be reconfigured. */
-		if (i == mac_index)
-			continue;
-		if (!BITFIELD_ISSET(priv->mac_configured, i))
-			continue;
-		if (memcmp(priv->mac[i].addr_bytes, *mac, sizeof(*mac)))
-			continue;
-		/* Address already configured elsewhere, return with error. */
-		return EADDRINUSE;
-	}
-	if (BITFIELD_ISSET(priv->mac_configured, mac_index))
-		priv_mac_addr_del(priv, mac_index);
-	priv->mac[mac_index] = (struct ether_addr){
-		{
-			(*mac)[0], (*mac)[1], (*mac)[2],
-			(*mac)[3], (*mac)[4], (*mac)[5]
-		}
-	};
-	if (!priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
-		goto end;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		ret = hash_rxq_mac_addr_add(&(*priv->hash_rxqs)[i], mac_index);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[--i],
-					      mac_index);
-		return ret;
-	}
-end:
-	BITFIELD_SET(priv->mac_configured, mac_index);
-	return 0;
-}
-
-/**
- * Register all MAC addresses in all hash RX queues.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_mac_addrs_enable(struct priv *priv)
-{
-	unsigned int i;
-	int ret;
-
-	if (priv->isolated)
-		return 0;
-	if (!priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
-		return 0;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		ret = hash_rxq_mac_addrs_add(&(*priv->hash_rxqs)[i]);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[--i]);
-		assert(ret > 0);
-		return ret;
-	}
-	return 0;
+	assert(index < MLX5_MAX_MAC_ADDRESSES);
+	memset(&dev->data->mac_addrs[index], 0, sizeof(struct ether_addr));
+	if (!dev->data->promiscuous && !dev->data->all_multicast)
+		mlx5_traffic_restart(dev);
 }
 
 /**
@@ -468,31 +112,35 @@ priv_mac_addrs_enable(struct priv *priv)
  *   MAC address index.
  * @param vmdq
  *   VMDq pool index to associate address with (ignored).
+ *
+ * @return
+ *   0 on success.
  */
 int
-mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
+mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac,
 		  uint32_t index, uint32_t vmdq)
 {
-	struct priv *priv = dev->data->dev_private;
-	int re;
-
-	if (mlx5_is_secondary())
-		return -ENOTSUP;
+	unsigned int i;
+	int ret = 0;
 
 	(void)vmdq;
-	priv_lock(priv);
-	DEBUG("%p: adding MAC address at index %" PRIu32,
-	      (void *)dev, index);
-	if (index >= RTE_DIM(priv->mac)) {
-		re = EINVAL;
-		goto end;
+	if (mlx5_is_secondary())
+		return 0;
+	assert(index < MLX5_MAX_MAC_ADDRESSES);
+	/* First, make sure this address isn't already configured. */
+	for (i = 0; (i != MLX5_MAX_MAC_ADDRESSES); ++i) {
+		/* Skip this index, it's going to be reconfigured. */
+		if (i == index)
+			continue;
+		if (memcmp(&dev->data->mac_addrs[i], mac, sizeof(*mac)))
+			continue;
+		/* Address already configured elsewhere, return with error. */
+		return EADDRINUSE;
 	}
-	re = priv_mac_addr_add(priv, index,
-			       (const uint8_t (*)[ETHER_ADDR_LEN])
-			       mac_addr->addr_bytes);
-end:
-	priv_unlock(priv);
-	return -re;
+	dev->data->mac_addrs[index] = *mac;
+	if (!dev->data->promiscuous && !dev->data->all_multicast)
+		mlx5_traffic_restart(dev);
+	return ret;
 }
 
 /**
@@ -506,7 +154,8 @@ mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
 void
 mlx5_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
 {
+	if (mlx5_is_secondary())
+		return;
 	DEBUG("%p: setting primary MAC address", (void *)dev);
-	mlx5_mac_addr_remove(dev, 0);
 	mlx5_mac_addr_add(dev, mac_addr, 0, 0);
 }
diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index 0c75889..0ef2cdf 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -51,308 +51,6 @@
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 
-/* Initialization data for special flows. */
-static const struct special_flow_init special_flow_init[] = {
-	[HASH_RXQ_FLOW_TYPE_BROADCAST] = {
-		.dst_mac_val = "\xff\xff\xff\xff\xff\xff",
-		.dst_mac_mask = "\xff\xff\xff\xff\xff\xff",
-		.hash_types =
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 1,
-	},
-	[HASH_RXQ_FLOW_TYPE_IPV6MULTI] = {
-		.dst_mac_val = "\x33\x33\x00\x00\x00\x00",
-		.dst_mac_mask = "\xff\xff\x00\x00\x00\x00",
-		.hash_types =
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 1,
-	},
-};
-
-/**
- * Enable a special flow in a hash RX queue for a given VLAN index.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- * @param vlan_index
- *   VLAN index to use.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_special_flow_enable_vlan(struct hash_rxq *hash_rxq,
-				  enum hash_rxq_flow_type flow_type,
-				  unsigned int vlan_index)
-{
-	struct priv *priv = hash_rxq->priv;
-	struct ibv_flow *flow;
-	FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, hash_rxq->type));
-	struct ibv_flow_attr *attr = &data->attr;
-	struct ibv_flow_spec_eth *spec = &data->spec;
-	const uint8_t *mac;
-	const uint8_t *mask;
-	unsigned int vlan_enabled = (priv->vlan_filter_n &&
-				     special_flow_init[flow_type].per_vlan);
-	unsigned int vlan_id = priv->vlan_filter[vlan_index];
-
-	/* Check if flow is relevant for this hash_rxq. */
-	if (!(special_flow_init[flow_type].hash_types & (1 << hash_rxq->type)))
-		return 0;
-	/* Check if flow already exists. */
-	if (hash_rxq->special_flow[flow_type][vlan_index] != NULL)
-		return 0;
-
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
-	priv_flow_attr(priv, attr, sizeof(data), hash_rxq->type);
-	/* The first specification must be Ethernet. */
-	assert(spec->type == IBV_FLOW_SPEC_ETH);
-	assert(spec->size == sizeof(*spec));
-
-	mac = special_flow_init[flow_type].dst_mac_val;
-	mask = special_flow_init[flow_type].dst_mac_mask;
-	*spec = (struct ibv_flow_spec_eth){
-		.type = IBV_FLOW_SPEC_ETH,
-		.size = sizeof(*spec),
-		.val = {
-			.dst_mac = {
-				mac[0], mac[1], mac[2],
-				mac[3], mac[4], mac[5],
-			},
-			.vlan_tag = (vlan_enabled ?
-				     rte_cpu_to_be_16(vlan_id) :
-				     0),
-		},
-		.mask = {
-			.dst_mac = {
-				mask[0], mask[1], mask[2],
-				mask[3], mask[4], mask[5],
-			},
-			.vlan_tag = (vlan_enabled ?
-				     rte_cpu_to_be_16(0xfff) :
-				     0),
-		},
-	};
-
-	errno = 0;
-	flow = ibv_create_flow(hash_rxq->qp, attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)hash_rxq, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-	hash_rxq->special_flow[flow_type][vlan_index] = flow;
-	DEBUG("%p: special flow %s (index %d) VLAN %u (index %u) enabled",
-	      (void *)hash_rxq, hash_rxq_flow_type_str(flow_type), flow_type,
-	      vlan_id, vlan_index);
-	return 0;
-}
-
-/**
- * Disable a special flow in a hash RX queue for a given VLAN index.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- * @param vlan_index
- *   VLAN index to use.
- */
-static void
-hash_rxq_special_flow_disable_vlan(struct hash_rxq *hash_rxq,
-				   enum hash_rxq_flow_type flow_type,
-				   unsigned int vlan_index)
-{
-	struct ibv_flow *flow =
-		hash_rxq->special_flow[flow_type][vlan_index];
-
-	if (flow == NULL)
-		return;
-	claim_zero(ibv_destroy_flow(flow));
-	hash_rxq->special_flow[flow_type][vlan_index] = NULL;
-	DEBUG("%p: special flow %s (index %d) VLAN %u (index %u) disabled",
-	      (void *)hash_rxq, hash_rxq_flow_type_str(flow_type), flow_type,
-	      hash_rxq->priv->vlan_filter[vlan_index], vlan_index);
-}
-
-/**
- * Enable a special flow in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- * @param vlan_index
- *   VLAN index to use.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_special_flow_enable(struct hash_rxq *hash_rxq,
-			     enum hash_rxq_flow_type flow_type)
-{
-	struct priv *priv = hash_rxq->priv;
-	unsigned int i = 0;
-	int ret;
-
-	assert((unsigned int)flow_type < RTE_DIM(hash_rxq->special_flow));
-	assert(RTE_DIM(hash_rxq->special_flow[flow_type]) ==
-	       RTE_DIM(priv->vlan_filter));
-	/* Add a special flow for each VLAN filter when relevant. */
-	do {
-		ret = hash_rxq_special_flow_enable_vlan(hash_rxq, flow_type, i);
-		if (ret) {
-			/* Failure, rollback. */
-			while (i != 0)
-				hash_rxq_special_flow_disable_vlan(hash_rxq,
-								   flow_type,
-								   --i);
-			return ret;
-		}
-	} while (special_flow_init[flow_type].per_vlan &&
-		 ++i < priv->vlan_filter_n);
-	return 0;
-}
-
-/**
- * Disable a special flow in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- */
-static void
-hash_rxq_special_flow_disable(struct hash_rxq *hash_rxq,
-			      enum hash_rxq_flow_type flow_type)
-{
-	unsigned int i;
-
-	assert((unsigned int)flow_type < RTE_DIM(hash_rxq->special_flow));
-	for (i = 0; (i != RTE_DIM(hash_rxq->special_flow[flow_type])); ++i)
-		hash_rxq_special_flow_disable_vlan(hash_rxq, flow_type, i);
-}
-
-/**
- * Enable a special flow in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- * @param flow_type
- *   Special flow type.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_special_flow_enable(struct priv *priv, enum hash_rxq_flow_type flow_type)
-{
-	unsigned int i;
-
-	if (!priv_allow_flow_type(priv, flow_type))
-		return 0;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
-		int ret;
-
-		ret = hash_rxq_special_flow_enable(hash_rxq, flow_type);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0) {
-			hash_rxq = &(*priv->hash_rxqs)[--i];
-			hash_rxq_special_flow_disable(hash_rxq, flow_type);
-		}
-		return ret;
-	}
-	return 0;
-}
-
-/**
- * Disable a special flow in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- * @param flow_type
- *   Special flow type.
- */
-void
-priv_special_flow_disable(struct priv *priv, enum hash_rxq_flow_type flow_type)
-{
-	unsigned int i;
-
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
-
-		hash_rxq_special_flow_disable(hash_rxq, flow_type);
-	}
-}
-
-/**
- * Enable all special flows in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- */
-int
-priv_special_flow_enable_all(struct priv *priv)
-{
-	enum hash_rxq_flow_type flow_type;
-
-	if (priv->isolated)
-		return 0;
-	for (flow_type = HASH_RXQ_FLOW_TYPE_BROADCAST;
-			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
-			++flow_type) {
-		int ret;
-
-		ret = priv_special_flow_enable(priv, flow_type);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (flow_type)
-			priv_special_flow_disable(priv, --flow_type);
-		return ret;
-	}
-	return 0;
-}
-
-/**
- * Disable all special flows in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_special_flow_disable_all(struct priv *priv)
-{
-	enum hash_rxq_flow_type flow_type;
-
-	for (flow_type = HASH_RXQ_FLOW_TYPE_BROADCAST;
-			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
-			++flow_type)
-		priv_special_flow_disable(priv, flow_type);
-}
-
 /**
  * DPDK callback to enable promiscuous mode.
  *
@@ -362,16 +60,10 @@ priv_special_flow_disable_all(struct priv *priv)
 void
 mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 {
-	struct rte_flow_item_eth eth = {
-		.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
-		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
-		.type = 0,
-	};
-
 	if (mlx5_is_secondary())
 		return;
 	dev->data->promiscuous = 1;
-	claim_zero(mlx5_ctrl_flow(dev, ð, ð, 1));
+	mlx5_traffic_restart(dev);
 }
 
 /**
@@ -383,16 +75,10 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 void
 mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 {
-	struct rte_flow_item_eth eth = {
-		.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
-		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
-		.type = 0,
-	};
-
 	if (mlx5_is_secondary())
 		return;
 	dev->data->promiscuous = 0;
-	claim_zero(mlx5_ctrl_flow(dev, ð, ð, 0));
+	mlx5_traffic_restart(dev);
 }
 
 /**
@@ -404,17 +90,10 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 {
-	struct rte_flow_item_eth eth = {
-		.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
-		.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
-		.type = 0,
-	};
-
 	if (mlx5_is_secondary())
 		return;
 	dev->data->all_multicast = 1;
-	if (dev->data->dev_started)
-		claim_zero(mlx5_ctrl_flow(dev, ð, ð, 1));
+	mlx5_traffic_restart(dev);
 }
 
 /**
@@ -426,15 +105,8 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_disable(struct rte_eth_dev *dev)
 {
-	struct rte_flow_item_eth eth = {
-		.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
-		.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
-		.type = 0,
-	};
-
 	if (mlx5_is_secondary())
 		return;
 	dev->data->all_multicast = 0;
-	if (dev->data->dev_started)
-		claim_zero(mlx5_ctrl_flow(dev, ð, ð, 0));
+	mlx5_traffic_restart(dev);
 }
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index d3cd58e..c603d2b 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -531,12 +531,6 @@ priv_destroy_hash_rxqs(struct priv *priv)
 
 		assert(hash_rxq->priv == priv);
 		assert(hash_rxq->qp != NULL);
-		/* Also check that there are no remaining flows. */
-		for (j = 0; (j != RTE_DIM(hash_rxq->special_flow)); ++j)
-			for (k = 0;
-			     (k != RTE_DIM(hash_rxq->special_flow[j]));
-			     ++k)
-				assert(hash_rxq->special_flow[j][k] == NULL);
 		for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
 			for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
 				assert(hash_rxq->mac_flow[j][k] == NULL);
@@ -558,63 +552,6 @@ priv_destroy_hash_rxqs(struct priv *priv)
 }
 
 /**
- * Check whether a given flow type is allowed.
- *
- * @param priv
- *   Pointer to private structure.
- * @param type
- *   Flow type to check.
- *
- * @return
- *   Nonzero if the given flow type is allowed.
- */
-int
-priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
-{
-	(void)priv;
-	switch (type) {
-	case HASH_RXQ_FLOW_TYPE_BROADCAST:
-	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
-	case HASH_RXQ_FLOW_TYPE_MAC:
-		return 1;
-		return 1;
-	default:
-		/* Unsupported flow type is not allowed. */
-		return 0;
-	}
-	return 0;
-}
-
-/**
- * Automatically enable/disable flows according to configuration.
- *
- * @param priv
- *   Private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_rehash_flows(struct priv *priv)
-{
-	size_t i;
-
-	for (i = 0; i != RTE_DIM((*priv->hash_rxqs)[0].special_flow); ++i)
-		if (!priv_allow_flow_type(priv, i)) {
-			priv_special_flow_disable(priv, i);
-		} else {
-			int ret = priv_special_flow_enable(priv, i);
-
-			if (ret)
-				return ret;
-		}
-	if (priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
-		return priv_mac_addrs_enable(priv);
-	priv_mac_addrs_disable(priv);
-	return 0;
-}
-
-/**
  * Allocate RX queue elements.
  *
  * @param rxq_ctrl
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 6f474d2..c60bc4d 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -236,28 +236,6 @@ struct special_flow_init {
 	unsigned int per_vlan:1;
 };
 
-enum hash_rxq_flow_type {
-	HASH_RXQ_FLOW_TYPE_BROADCAST,
-	HASH_RXQ_FLOW_TYPE_IPV6MULTI,
-	HASH_RXQ_FLOW_TYPE_MAC,
-};
-
-#ifndef NDEBUG
-static inline const char *
-hash_rxq_flow_type_str(enum hash_rxq_flow_type flow_type)
-{
-	switch (flow_type) {
-	case HASH_RXQ_FLOW_TYPE_BROADCAST:
-		return "broadcast";
-	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
-		return "IPv6 multicast";
-	case HASH_RXQ_FLOW_TYPE_MAC:
-		return "MAC";
-	}
-	return NULL;
-}
-#endif /* NDEBUG */
-
 struct hash_rxq {
 	struct priv *priv; /* Back pointer to private data. */
 	struct ibv_qp *qp; /* Hash RX QP. */
@@ -265,8 +243,6 @@ struct hash_rxq {
 	/* MAC flow steering rules, one per VLAN ID. */
 	struct ibv_flow *mac_flow
 		[MLX5_MAX_MAC_ADDRESSES][MLX5_MAX_VLAN_IDS];
-	struct ibv_flow *special_flow
-		[MLX5_MAX_SPECIAL_FLOWS][MLX5_MAX_VLAN_IDS];
 };
 
 /* TX queue descriptor. */
@@ -336,8 +312,6 @@ size_t priv_flow_attr(struct priv *, struct ibv_flow_attr *,
 		      size_t, enum hash_rxq_type);
 int priv_create_hash_rxqs(struct priv *);
 void priv_destroy_hash_rxqs(struct priv *);
-int priv_allow_flow_type(struct priv *, enum hash_rxq_flow_type);
-int priv_rehash_flows(struct priv *);
 void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
 int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_rxconf *, struct rte_mempool *);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 27e7890..4143571 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -135,7 +135,14 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	if (mlx5_is_secondary())
 		return -E_RTE_SECONDARY;
 
+	dev->data->dev_started = 1;
 	priv_lock(priv);
+	err = priv_flow_create_drop_queue(priv);
+	if (err) {
+		ERROR("%p: Drop queue allocation failed: %s",
+		      (void *)dev, strerror(err));
+		goto error;
+	}
 	DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
 	rte_mempool_walk(mlx5_mp2mr_iter, priv);
 	err = priv_txq_start(priv);
@@ -155,21 +162,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	/* Update receive callback. */
 	priv_dev_select_rx_function(priv, dev);
 	err = priv_create_hash_rxqs(priv);
-	if (!err)
-		err = priv_rehash_flows(priv);
-	else {
-		ERROR("%p: an error occurred while configuring hash RX queues:"
-		      " %s",
-		      (void *)priv, strerror(err));
-		goto error;
-	}
-	if (dev->data->promiscuous)
-		mlx5_promiscuous_enable(dev);
-	else if (dev->data->all_multicast)
-		mlx5_allmulticast_enable(dev);
-	err = priv_flow_start(priv, &priv->ctrl_flows);
 	if (err) {
-		ERROR("%p: an error occurred while configuring control flows:"
+		ERROR("%p: an error occurred while configuring hash RX queues:"
 		      " %s",
 		      (void *)priv, strerror(err));
 		goto error;
@@ -193,15 +187,14 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	return 0;
 error:
 	/* Rollback. */
+	dev->data->dev_started = 0;
 	LIST_FOREACH(mr, &priv->mr, next)
 		priv_mr_release(priv, mr);
-	priv_special_flow_disable_all(priv);
-	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv, &priv->flows);
-	priv_flow_flush(priv, &priv->ctrl_flows);
-	priv_rxq_stop(priv);
 	priv_txq_stop(priv);
+	priv_rxq_stop(priv);
+	priv_flow_delete_drop_queue(priv);
 	priv_unlock(priv);
 	return -err;
 }
@@ -231,8 +224,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	rte_wmb();
 	usleep(1000 * priv->rxqs_n);
 	DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
-	priv_special_flow_disable_all(priv);
-	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv, &priv->flows);
 	priv_flow_flush(priv, &priv->ctrl_flows);
@@ -243,5 +234,172 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	LIST_FOREACH(mr, &priv->mr, next) {
 		priv_mr_release(priv, mr);
 	}
+	priv_flow_delete_drop_queue(priv);
+	priv_unlock(priv);
+}
+
+/**
+ * Enable traffic flows configured by control plane
+ *
+ * @param priv
+ *   Pointer to Ethernet device private data.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev)
+{
+	if (priv->isolated)
+		return 0;
+	if (dev->data->promiscuous) {
+		struct rte_flow_item_eth promisc = {
+			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+			.type = 0,
+		};
+
+		claim_zero(mlx5_ctrl_flow(dev, &promisc, &promisc));
+	} else if (dev->data->all_multicast) {
+		struct rte_flow_item_eth multicast = {
+			.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+			.src.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+			.type = 0,
+		};
+
+		claim_zero(mlx5_ctrl_flow(dev, &multicast, &multicast));
+	} else {
+		struct rte_flow_item_eth bcast = {
+			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		};
+		struct rte_flow_item_eth ipv6_multi_spec = {
+			.dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
+		};
+		struct rte_flow_item_eth ipv6_multi_mask = {
+			.dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
+		};
+		struct rte_flow_item_eth unicast = {
+			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		};
+		struct rte_flow_item_eth unicast_mask = {
+			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		};
+		const unsigned int vlan_filter_n = priv->vlan_filter_n;
+		const struct ether_addr cmp = {
+			.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+		};
+		unsigned int i;
+		unsigned int j;
+		unsigned int unicast_flow = 0;
+		int ret;
+
+		for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
+			struct ether_addr *mac = &dev->data->mac_addrs[i];
+
+			if (!memcmp(mac, &cmp, sizeof(*mac)))
+				continue;
+			memcpy(&unicast.dst.addr_bytes,
+			       mac->addr_bytes,
+			       ETHER_ADDR_LEN);
+			for (j = 0; j != vlan_filter_n; ++j) {
+				uint16_t vlan = priv->vlan_filter[j];
+
+				struct rte_flow_item_vlan vlan_spec = {
+					.tci = rte_cpu_to_be_16(vlan),
+				};
+				struct rte_flow_item_vlan vlan_mask = {
+					.tci = 0xffff,
+				};
+
+				ret = mlx5_ctrl_flow_vlan(dev, &unicast,
+							  &unicast_mask,
+							  &vlan_spec,
+							  &vlan_mask);
+				if (ret)
+					goto error;
+				unicast_flow = 1;
+			}
+			if (!vlan_filter_n) {
+				ret = mlx5_ctrl_flow(dev, &unicast,
+						     &unicast_mask);
+				if (ret)
+					goto error;
+				unicast_flow = 1;
+			}
+		}
+		if (!unicast_flow)
+			return 0;
+		ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
+		if (ret)
+			goto error;
+		ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec, &ipv6_multi_mask);
+		if (ret)
+			goto error;
+	}
+	return 0;
+error:
+	return rte_errno;
+}
+
+
+/**
+ * Disable traffic flows configured by control plane
+ *
+ * @param priv
+ *   Pointer to Ethernet device private data.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+priv_dev_traffic_disable(struct priv *priv, struct rte_eth_dev *dev)
+{
+	(void)dev;
+	priv_flow_flush(priv, &priv->ctrl_flows);
+	return 0;
+}
+
+/**
+ * Restart traffic flows configured by control plane
+ *
+ * @param priv
+ *   Pointer to Ethernet device private data.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+priv_dev_traffic_restart(struct priv *priv, struct rte_eth_dev *dev)
+{
+	if (dev->data->dev_started) {
+		priv_dev_traffic_disable(priv, dev);
+		priv_dev_traffic_enable(priv, dev);
+	}
+	return 0;
+}
+
+/**
+ * Restart traffic flows configured by control plane
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+mlx5_traffic_restart(struct rte_eth_dev *dev)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	priv_dev_traffic_restart(priv, dev);
 	priv_unlock(priv);
+	return 0;
 }
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index d707984..ed91d9b 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -44,7 +44,7 @@
 #include "mlx5_autoconf.h"
 
 /**
- * Configure a VLAN filter.
+ * DPDK callback to configure a VLAN filter.
  *
  * @param dev
  *   Pointer to Ethernet device structure.
@@ -54,28 +54,26 @@
  *   Toggle filter.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, negative errno value on failure.
  */
-static int
-vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+int
+mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 {
 	struct priv *priv = dev->data->dev_private;
 	unsigned int i;
 
+	priv_lock(priv);
 	DEBUG("%p: %s VLAN filter ID %" PRIu16,
 	      (void *)dev, (on ? "enable" : "disable"), vlan_id);
 	assert(priv->vlan_filter_n <= RTE_DIM(priv->vlan_filter));
 	for (i = 0; (i != priv->vlan_filter_n); ++i)
 		if (priv->vlan_filter[i] == vlan_id)
 			break;
-	/* Check if there's room for another VLAN filter. */
-	if (i == RTE_DIM(priv->vlan_filter))
-		return ENOMEM;
 	if (i < priv->vlan_filter_n) {
 		assert(priv->vlan_filter_n != 0);
 		/* Enabling an existing VLAN filter has no effect. */
 		if (on)
-			return 0;
+			goto out;
 		/* Remove VLAN filter from list. */
 		--priv->vlan_filter_n;
 		memmove(&priv->vlan_filter[i],
@@ -87,41 +85,16 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 		assert(i == priv->vlan_filter_n);
 		/* Disabling an unknown VLAN filter has no effect. */
 		if (!on)
-			return 0;
+			goto out;
 		/* Add new VLAN filter. */
 		priv->vlan_filter[priv->vlan_filter_n] = vlan_id;
 		++priv->vlan_filter_n;
 	}
-	/* Rehash flows in all hash RX queues. */
-	priv_mac_addrs_disable(priv);
-	priv_special_flow_disable_all(priv);
-	return priv_rehash_flows(priv);
-}
-
-/**
- * DPDK callback to configure a VLAN filter.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param vlan_id
- *   VLAN ID to filter.
- * @param on
- *   Toggle filter.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
-{
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-
-	priv_lock(priv);
-	ret = vlan_filter_set(dev, vlan_id, on);
+	if (dev->data->dev_started)
+		priv_dev_traffic_restart(priv, dev);
+out:
 	priv_unlock(priv);
-	assert(ret >= 0);
-	return -ret;
+	return 0;
 }
 
 /**
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 20/30] net/mlx5: handle a single RSS hash key for all protocols
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (44 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 19/30] net/mlx5: use flow to enable unicast traffic Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 21/30] net/mlx5: remove hash Rx queues support Nelio Laranjeiro
                   ` (9 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Since RSS configuration can also be used by flow API, there is no more
necessity to keep a list of RSS configurable for each protocol.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c        |  24 +-------
 drivers/net/mlx5/mlx5.h        |   6 +-
 drivers/net/mlx5/mlx5_ethdev.c |  27 +++++++--
     | 127 +++++++++--------------------------------
 drivers/net/mlx5/mlx5_rxq.c    |   5 +-
 5 files changed, 56 insertions(+), 133 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index c818cf8..46c89c6 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -225,11 +225,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		claim_zero(ibv_close_device(priv->ctx));
 	} else
 		assert(priv->ctx == NULL);
-	if (priv->rss_conf != NULL) {
-		for (i = 0; (i != hash_rxq_init_n); ++i)
-			rte_free((*priv->rss_conf)[i]);
-		rte_free(priv->rss_conf);
-	}
+	if (priv->rss_conf.rss_key != NULL)
+		rte_free(priv->rss_conf.rss_key);
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
 	priv_socket_uninit(priv);
@@ -816,19 +813,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				priv->txq_inline = MLX5_WQE_SIZE_MAX -
 						   MLX5_WQE_SIZE;
 		}
-		/* Allocate and register default RSS hash keys. */
-		priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n,
-					    sizeof((*priv->rss_conf)[0]), 0);
-		if (priv->rss_conf == NULL) {
-			err = ENOMEM;
-			goto port_error;
-		}
-		err = rss_hash_rss_conf_new_key(priv,
-						rss_hash_default_key,
-						rss_hash_default_key_len,
-						ETH_RSS_PROTO_MASK);
-		if (err)
-			goto port_error;
 		/* Configure the first MAC address by default. */
 		if (priv_get_mac(priv, &mac.addr_bytes)) {
 			ERROR("cannot get MAC address, is mlx5_en loaded?"
@@ -898,10 +882,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		continue;
 
 port_error:
-		if (priv) {
-			rte_free(priv->rss_conf);
+		if (priv)
 			rte_free(priv);
-		}
 		if (pd)
 			claim_zero(ibv_dealloc_pd(pd));
 		if (ctx)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index e83961f..4718506 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -134,9 +134,7 @@ struct priv {
 	/* Hash RX QPs feeding the indirection table. */
 	struct hash_rxq (*hash_rxqs)[];
 	unsigned int hash_rxqs_n; /* Hash RX QPs array size. */
-	/* RSS configuration array indexed by hash RX queue type. */
-	struct rte_eth_rss_conf *(*rss_conf)[];
-	uint64_t rss_hf; /* RSS DPDK bit field of active RSS. */
+	struct rte_eth_rss_conf rss_conf; /* RSS configuration. */
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
@@ -226,8 +224,6 @@ void mlx5_mac_addr_set(struct rte_eth_dev *, struct ether_addr *);
 
 /* mlx5_rss.c */
 
-int rss_hash_rss_conf_new_key(struct priv *, const uint8_t *, unsigned int,
-			      uint64_t);
 int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *);
 int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *);
 int priv_rss_reta_index_resize(struct priv *, unsigned int);
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 318bc9d..9f5b489 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -575,8 +575,29 @@ dev_configure(struct rte_eth_dev *dev)
 	unsigned int i;
 	unsigned int j;
 	unsigned int reta_idx_n;
+	const uint8_t use_app_rss_key =
+		!!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
 
-	priv->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
+	if (use_app_rss_key &&
+	    (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len !=
+	     rss_hash_default_key_len)) {
+		/* MLX5 RSS only support 40bytes key. */
+		return EINVAL;
+	}
+	priv->rss_conf.rss_key =
+		rte_realloc(priv->rss_conf.rss_key,
+			    rss_hash_default_key_len, 0);
+	if (!priv->rss_conf.rss_key) {
+		ERROR("cannot allocate RSS hash key memory (%u)", rxqs_n);
+		return ENOMEM;
+	}
+	memcpy(priv->rss_conf.rss_key,
+	       use_app_rss_key ?
+	       dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key :
+	       rss_hash_default_key,
+	       rss_hash_default_key_len);
+	priv->rss_conf.rss_key_len = rss_hash_default_key_len;
+	priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
 	priv->rxqs = (void *)dev->data->rx_queues;
 	priv->txqs = (void *)dev->data->tx_queues;
 	if (txqs_n != priv->txqs_n) {
@@ -694,9 +715,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 		info->if_index = if_nametoindex(ifname);
 	info->reta_size = priv->reta_idx_n ?
 		priv->reta_idx_n : priv->ind_table_max_size;
-	info->hash_key_size = ((*priv->rss_conf) ?
-			       (*priv->rss_conf)[0]->rss_key_len :
-			       0);
+	info->hash_key_size = priv->rss_conf.rss_key_len;
 	info->speed_capa = priv->link_speed_capa;
 	priv_unlock(priv);
 }
 --git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index 8942879..ad6d9ab 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -54,74 +54,6 @@
 #include "mlx5_rxtx.h"
 
 /**
- * Get a RSS configuration hash key.
- *
- * @param priv
- *   Pointer to private structure.
- * @param rss_hf
- *   RSS hash functions configuration must be retrieved for.
- *
- * @return
- *   Pointer to a RSS configuration structure or NULL if rss_hf cannot
- *   be matched.
- */
-static struct rte_eth_rss_conf *
-rss_hash_get(struct priv *priv, uint64_t rss_hf)
-{
-	unsigned int i;
-
-	for (i = 0; (i != hash_rxq_init_n); ++i) {
-		uint64_t dpdk_rss_hf = hash_rxq_init[i].dpdk_rss_hf;
-
-		if (!(dpdk_rss_hf & rss_hf))
-			continue;
-		return (*priv->rss_conf)[i];
-	}
-	return NULL;
-}
-
-/**
- * Register a RSS key.
- *
- * @param priv
- *   Pointer to private structure.
- * @param key
- *   Hash key to register.
- * @param key_len
- *   Hash key length in bytes.
- * @param rss_hf
- *   RSS hash functions the provided key applies to.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-rss_hash_rss_conf_new_key(struct priv *priv, const uint8_t *key,
-			  unsigned int key_len, uint64_t rss_hf)
-{
-	unsigned int i;
-
-	for (i = 0; (i != hash_rxq_init_n); ++i) {
-		struct rte_eth_rss_conf *rss_conf;
-		uint64_t dpdk_rss_hf = hash_rxq_init[i].dpdk_rss_hf;
-
-		if (!(dpdk_rss_hf & rss_hf))
-			continue;
-		rss_conf = rte_realloc((*priv->rss_conf)[i],
-				       (sizeof(*rss_conf) + key_len),
-				       0);
-		if (!rss_conf)
-			return ENOMEM;
-		rss_conf->rss_key = (void *)(rss_conf + 1);
-		rss_conf->rss_key_len = key_len;
-		rss_conf->rss_hf = dpdk_rss_hf;
-		memcpy(rss_conf->rss_key, key, key_len);
-		(*priv->rss_conf)[i] = rss_conf;
-	}
-	return 0;
-}
-
-/**
  * DPDK callback to update the RSS hash configuration.
  *
  * @param dev
@@ -137,23 +69,24 @@ mlx5_rss_hash_update(struct rte_eth_dev *dev,
 		     struct rte_eth_rss_conf *rss_conf)
 {
 	struct priv *priv = dev->data->dev_private;
-	int err = 0;
+	int ret = 0;
 
 	priv_lock(priv);
-
-	assert(priv->rss_conf != NULL);
-
-	/* Apply configuration. */
-	if (rss_conf->rss_key)
-		err = rss_hash_rss_conf_new_key(priv,
-						rss_conf->rss_key,
-						rss_conf->rss_key_len,
-						rss_conf->rss_hf);
-	/* Store protocols for which RSS is enabled. */
-	priv->rss_hf = rss_conf->rss_hf;
+	if (rss_conf->rss_key_len) {
+		priv->rss_conf.rss_key = rte_realloc(priv->rss_conf.rss_key,
+						     rss_conf->rss_key_len, 0);
+		if (!priv->rss_conf.rss_key) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		memcpy(&priv->rss_conf.rss_key, rss_conf->rss_key,
+		       rss_conf->rss_key_len);
+		priv->rss_conf.rss_key_len = rss_conf->rss_key_len;
+	}
+	priv->rss_conf.rss_hf = rss_conf->rss_hf;
+out:
 	priv_unlock(priv);
-	assert(err >= 0);
-	return -err;
+	return ret;
 }
 
 /**
@@ -172,28 +105,22 @@ mlx5_rss_hash_conf_get(struct rte_eth_dev *dev,
 		       struct rte_eth_rss_conf *rss_conf)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct rte_eth_rss_conf *priv_rss_conf;
+	int ret = 0;
 
 	priv_lock(priv);
-
-	assert(priv->rss_conf != NULL);
-
-	priv_rss_conf = rss_hash_get(priv, rss_conf->rss_hf);
-	if (!priv_rss_conf) {
-		rss_conf->rss_hf = 0;
-		priv_unlock(priv);
-		return -EINVAL;
+	if (!rss_conf->rss_key) {
+		ret = -ENOMEM;
+		goto out;
 	}
-	if (rss_conf->rss_key &&
-	    rss_conf->rss_key_len >= priv_rss_conf->rss_key_len)
-		memcpy(rss_conf->rss_key,
-		       priv_rss_conf->rss_key,
-		       priv_rss_conf->rss_key_len);
-	rss_conf->rss_key_len = priv_rss_conf->rss_key_len;
-	rss_conf->rss_hf = priv_rss_conf->rss_hf;
-
+	if (rss_conf->rss_key_len < priv->rss_conf.rss_key_len) {
+		ret = -EINVAL;
+		goto out;
+	}
+	memcpy(rss_conf->rss_key, priv->rss_conf.rss_key,
+	       priv->rss_conf.rss_key_len);
+out:
 	priv_unlock(priv);
-	return 0;
+	return ret;
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index c603d2b..d37dfbb 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -299,7 +299,7 @@ priv_make_ind_table_init(struct priv *priv,
 	/* Mandatory to receive frames not handled by normal hash RX queues. */
 	unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
 
-	rss_hf = priv->rss_hf;
+	rss_hf = priv->rss_conf.rss_hf;
 	/* Process other protocols only if more than one queue. */
 	if (priv->rxqs_n > 1)
 		for (i = 0; (i != hash_rxq_init_n); ++i)
@@ -435,8 +435,7 @@ priv_create_hash_rxqs(struct priv *priv)
 		struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
 		enum hash_rxq_type type =
 			hash_rxq_type_from_pos(&ind_table_init[j], k);
-		struct rte_eth_rss_conf *priv_rss_conf =
-			(*priv->rss_conf)[type];
+		struct rte_eth_rss_conf *priv_rss_conf = &priv->rss_conf;
 		struct ibv_rx_hash_conf hash_conf = {
 			.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
 			.rx_hash_key_len = (priv_rss_conf ?
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 21/30] net/mlx5: remove hash Rx queues support
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (45 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 20/30] net/mlx5: handle a single RSS hash key for all protocols Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 22/30] net/mlx5: fully convert a flow to verbs in validate Nelio Laranjeiro
                   ` (8 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
>From this commit the RSS support becomes un-available until it is replaced
by the generic flow implementation.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         |   1 -
 drivers/net/mlx5/mlx5.h         |   6 -
 drivers/net/mlx5/mlx5_rxq.c     | 469 ----------------------------------------
 drivers/net/mlx5/mlx5_rxtx.h    |  76 -------
 drivers/net/mlx5/mlx5_trigger.c |   9 +-
 5 files changed, 4 insertions(+), 557 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 46c89c6..b206535 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -198,7 +198,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
 	/* In case mlx5_dev_stop() has not been called. */
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-	priv_destroy_hash_rxqs(priv);
 	priv_dev_traffic_disable(priv, dev);
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 4718506..643bab6 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -127,13 +127,7 @@ struct priv {
 	unsigned int txqs_n; /* TX queues array size. */
 	struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */
 	struct mlx5_txq_data *(*txqs)[]; /* TX queues. */
-	/* Indirection tables referencing all RX WQs. */
-	struct ibv_rwq_ind_table *(*ind_tables)[];
-	unsigned int ind_tables_n; /* Number of indirection tables. */
 	unsigned int ind_table_max_size; /* Maximum indirection table size. */
-	/* Hash RX QPs feeding the indirection table. */
-	struct hash_rxq (*hash_rxqs)[];
-	unsigned int hash_rxqs_n; /* Hash RX QPs array size. */
 	struct rte_eth_rss_conf rss_conf; /* RSS configuration. */
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index d37dfbb..e7ec1da 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -64,122 +64,6 @@
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
-/* Initialization data for hash RX queues. */
-const struct hash_rxq_init hash_rxq_init[] = {
-	[HASH_RXQ_TCPV4] = {
-		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
-				IBV_RX_HASH_DST_IPV4 |
-				IBV_RX_HASH_SRC_PORT_TCP |
-				IBV_RX_HASH_DST_PORT_TCP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_FLOW_SPEC_TCP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
-	},
-	[HASH_RXQ_UDPV4] = {
-		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
-				IBV_RX_HASH_DST_IPV4 |
-				IBV_RX_HASH_SRC_PORT_UDP |
-				IBV_RX_HASH_DST_PORT_UDP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_FLOW_SPEC_UDP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
-	},
-	[HASH_RXQ_IPV4] = {
-		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
-				IBV_RX_HASH_DST_IPV4),
-		.dpdk_rss_hf = (ETH_RSS_IPV4 |
-				ETH_RSS_FRAG_IPV4),
-		.flow_priority = 1,
-		.flow_spec.ipv4 = {
-			.type = IBV_FLOW_SPEC_IPV4,
-			.size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_ETH],
-	},
-	[HASH_RXQ_TCPV6] = {
-		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
-				IBV_RX_HASH_DST_IPV6 |
-				IBV_RX_HASH_SRC_PORT_TCP |
-				IBV_RX_HASH_DST_PORT_TCP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_FLOW_SPEC_TCP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
-	},
-	[HASH_RXQ_UDPV6] = {
-		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
-				IBV_RX_HASH_DST_IPV6 |
-				IBV_RX_HASH_SRC_PORT_UDP |
-				IBV_RX_HASH_DST_PORT_UDP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_FLOW_SPEC_UDP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
-	},
-	[HASH_RXQ_IPV6] = {
-		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
-				IBV_RX_HASH_DST_IPV6),
-		.dpdk_rss_hf = (ETH_RSS_IPV6 |
-				ETH_RSS_FRAG_IPV6),
-		.flow_priority = 1,
-		.flow_spec.ipv6 = {
-			.type = IBV_FLOW_SPEC_IPV6,
-			.size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_ETH],
-	},
-	[HASH_RXQ_ETH] = {
-		.hash_fields = 0,
-		.dpdk_rss_hf = 0,
-		.flow_priority = 2,
-		.flow_spec.eth = {
-			.type = IBV_FLOW_SPEC_ETH,
-			.size = sizeof(hash_rxq_init[0].flow_spec.eth),
-		},
-		.underlayer = NULL,
-	},
-};
-
-/* Number of entries in hash_rxq_init[]. */
-const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
-
-/* Initialization data for hash RX queue indirection tables. */
-static const struct ind_table_init ind_table_init[] = {
-	{
-		.max_size = -1u, /* Superseded by HW limitations. */
-		.hash_types =
-			1 << HASH_RXQ_TCPV4 |
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_TCPV6 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			0,
-		.hash_types_n = 6,
-	},
-	{
-		.max_size = 1,
-		.hash_types = 1 << HASH_RXQ_ETH,
-		.hash_types_n = 1,
-	},
-};
-
-#define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
-
 /* Default RSS hash key also used for ConnectX-3. */
 uint8_t rss_hash_default_key[] = {
 	0x2c, 0xc6, 0x81, 0xd1,
@@ -198,359 +82,6 @@ uint8_t rss_hash_default_key[] = {
 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
 
 /**
- * Populate flow steering rule for a given hash RX queue type using
- * information from hash_rxq_init[]. Nothing is written to flow_attr when
- * flow_attr_size is not large enough, but the required size is still returned.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[out] flow_attr
- *   Pointer to flow attribute structure to fill. Note that the allocated
- *   area must be larger and large enough to hold all flow specifications.
- * @param flow_attr_size
- *   Entire size of flow_attr and trailing room for flow specifications.
- * @param type
- *   Hash RX queue type to use for flow steering rule.
- *
- * @return
- *   Total size of the flow attribute buffer. No errors are defined.
- */
-size_t
-priv_flow_attr(struct priv *priv, struct ibv_flow_attr *flow_attr,
-	       size_t flow_attr_size, enum hash_rxq_type type)
-{
-	size_t offset = sizeof(*flow_attr);
-	const struct hash_rxq_init *init = &hash_rxq_init[type];
-
-	assert(priv != NULL);
-	assert((size_t)type < RTE_DIM(hash_rxq_init));
-	do {
-		offset += init->flow_spec.hdr.size;
-		init = init->underlayer;
-	} while (init != NULL);
-	if (offset > flow_attr_size)
-		return offset;
-	flow_attr_size = offset;
-	init = &hash_rxq_init[type];
-	*flow_attr = (struct ibv_flow_attr){
-		.type = IBV_FLOW_ATTR_NORMAL,
-		/* Priorities < 3 are reserved for flow director. */
-		.priority = init->flow_priority + 3,
-		.num_of_specs = 0,
-		.port = priv->port,
-		.flags = 0,
-	};
-	do {
-		offset -= init->flow_spec.hdr.size;
-		memcpy((void *)((uintptr_t)flow_attr + offset),
-		       &init->flow_spec,
-		       init->flow_spec.hdr.size);
-		++flow_attr->num_of_specs;
-		init = init->underlayer;
-	} while (init != NULL);
-	return flow_attr_size;
-}
-
-/**
- * Convert hash type position in indirection table initializer to
- * hash RX queue type.
- *
- * @param table
- *   Indirection table initializer.
- * @param pos
- *   Hash type position.
- *
- * @return
- *   Hash RX queue type.
- */
-static enum hash_rxq_type
-hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos)
-{
-	enum hash_rxq_type type = HASH_RXQ_TCPV4;
-
-	assert(pos < table->hash_types_n);
-	do {
-		if ((table->hash_types & (1 << type)) && (pos-- == 0))
-			break;
-		++type;
-	} while (1);
-	return type;
-}
-
-/**
- * Filter out disabled hash RX queue types from ind_table_init[].
- *
- * @param priv
- *   Pointer to private structure.
- * @param[out] table
- *   Output table.
- *
- * @return
- *   Number of table entries.
- */
-static unsigned int
-priv_make_ind_table_init(struct priv *priv,
-			 struct ind_table_init (*table)[IND_TABLE_INIT_N])
-{
-	uint64_t rss_hf;
-	unsigned int i;
-	unsigned int j;
-	unsigned int table_n = 0;
-	/* Mandatory to receive frames not handled by normal hash RX queues. */
-	unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
-
-	rss_hf = priv->rss_conf.rss_hf;
-	/* Process other protocols only if more than one queue. */
-	if (priv->rxqs_n > 1)
-		for (i = 0; (i != hash_rxq_init_n); ++i)
-			if (rss_hf & hash_rxq_init[i].dpdk_rss_hf)
-				hash_types_sup |= (1 << i);
-
-	/* Filter out entries whose protocols are not in the set. */
-	for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
-		unsigned int nb;
-		unsigned int h;
-
-		/* j is increased only if the table has valid protocols. */
-		assert(j <= i);
-		(*table)[j] = ind_table_init[i];
-		(*table)[j].hash_types &= hash_types_sup;
-		for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
-			if (((*table)[j].hash_types >> h) & 0x1)
-				++nb;
-		(*table)[i].hash_types_n = nb;
-		if (nb) {
-			++table_n;
-			++j;
-		}
-	}
-	return table_n;
-}
-
-/**
- * Initialize hash RX queues and indirection table.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_create_hash_rxqs(struct priv *priv)
-{
-	struct ibv_wq *wqs[priv->reta_idx_n];
-	struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
-	unsigned int ind_tables_n =
-		priv_make_ind_table_init(priv, &ind_table_init);
-	unsigned int hash_rxqs_n = 0;
-	struct hash_rxq (*hash_rxqs)[] = NULL;
-	struct ibv_rwq_ind_table *(*ind_tables)[] = NULL;
-	unsigned int i;
-	unsigned int j;
-	unsigned int k;
-	int err = 0;
-
-	assert(priv->ind_tables == NULL);
-	assert(priv->ind_tables_n == 0);
-	assert(priv->hash_rxqs == NULL);
-	assert(priv->hash_rxqs_n == 0);
-	assert(priv->pd != NULL);
-	assert(priv->ctx != NULL);
-	if (priv->isolated)
-		return 0;
-	if (priv->rxqs_n == 0)
-		return EINVAL;
-	assert(priv->rxqs != NULL);
-	if (ind_tables_n == 0) {
-		ERROR("all hash RX queue types have been filtered out,"
-		      " indirection table cannot be created");
-		return EINVAL;
-	}
-	if (priv->rxqs_n & (priv->rxqs_n - 1)) {
-		INFO("%u RX queues are configured, consider rounding this"
-		     " number to the next power of two for better balancing",
-		     priv->rxqs_n);
-		DEBUG("indirection table extended to assume %u WQs",
-		      priv->reta_idx_n);
-	}
-	for (i = 0; (i != priv->reta_idx_n); ++i) {
-		struct mlx5_rxq_ctrl *rxq_ctrl;
-
-		rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
-					struct mlx5_rxq_ctrl, rxq);
-		wqs[i] = rxq_ctrl->ibv->wq;
-	}
-	/* Get number of hash RX queues to configure. */
-	for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
-		hash_rxqs_n += ind_table_init[i].hash_types_n;
-	DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
-	      hash_rxqs_n, priv->rxqs_n, ind_tables_n);
-	/* Create indirection tables. */
-	ind_tables = rte_calloc(__func__, ind_tables_n,
-				sizeof((*ind_tables)[0]), 0);
-	if (ind_tables == NULL) {
-		err = ENOMEM;
-		ERROR("cannot allocate indirection tables container: %s",
-		      strerror(err));
-		goto error;
-	}
-	for (i = 0; (i != ind_tables_n); ++i) {
-		struct ibv_rwq_ind_table_init_attr ind_init_attr = {
-			.log_ind_tbl_size = 0, /* Set below. */
-			.ind_tbl = wqs,
-			.comp_mask = 0,
-		};
-		unsigned int ind_tbl_size = ind_table_init[i].max_size;
-		struct ibv_rwq_ind_table *ind_table;
-
-		if (priv->reta_idx_n < ind_tbl_size)
-			ind_tbl_size = priv->reta_idx_n;
-		ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
-		errno = 0;
-		ind_table = ibv_create_rwq_ind_table(priv->ctx,
-						     &ind_init_attr);
-		if (ind_table != NULL) {
-			(*ind_tables)[i] = ind_table;
-			continue;
-		}
-		/* Not clear whether errno is set. */
-		err = (errno ? errno : EINVAL);
-		ERROR("RX indirection table creation failed with error %d: %s",
-		      err, strerror(err));
-		goto error;
-	}
-	/* Allocate array that holds hash RX queues and related data. */
-	hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
-			       sizeof((*hash_rxqs)[0]), 0);
-	if (hash_rxqs == NULL) {
-		err = ENOMEM;
-		ERROR("cannot allocate hash RX queues container: %s",
-		      strerror(err));
-		goto error;
-	}
-	for (i = 0, j = 0, k = 0;
-	     ((i != hash_rxqs_n) && (j != ind_tables_n));
-	     ++i) {
-		struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
-		enum hash_rxq_type type =
-			hash_rxq_type_from_pos(&ind_table_init[j], k);
-		struct rte_eth_rss_conf *priv_rss_conf = &priv->rss_conf;
-		struct ibv_rx_hash_conf hash_conf = {
-			.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
-			.rx_hash_key_len = (priv_rss_conf ?
-					    priv_rss_conf->rss_key_len :
-					    rss_hash_default_key_len),
-			.rx_hash_key = (priv_rss_conf ?
-					priv_rss_conf->rss_key :
-					rss_hash_default_key),
-			.rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
-		};
-		struct ibv_qp_init_attr_ex qp_init_attr = {
-			.qp_type = IBV_QPT_RAW_PACKET,
-			.comp_mask = (IBV_QP_INIT_ATTR_PD |
-				      IBV_QP_INIT_ATTR_IND_TABLE |
-				      IBV_QP_INIT_ATTR_RX_HASH),
-			.rx_hash_conf = hash_conf,
-			.rwq_ind_tbl = (*ind_tables)[j],
-			.pd = priv->pd,
-		};
-
-		DEBUG("using indirection table %u for hash RX queue %u type %d",
-		      j, i, type);
-		*hash_rxq = (struct hash_rxq){
-			.priv = priv,
-			.qp = ibv_create_qp_ex(priv->ctx, &qp_init_attr),
-			.type = type,
-		};
-		if (hash_rxq->qp == NULL) {
-			err = (errno ? errno : EINVAL);
-			ERROR("Hash RX QP creation failure: %s",
-			      strerror(err));
-			goto error;
-		}
-		if (++k < ind_table_init[j].hash_types_n)
-			continue;
-		/* Switch to the next indirection table and reset hash RX
-		 * queue type array index. */
-		++j;
-		k = 0;
-	}
-	priv->ind_tables = ind_tables;
-	priv->ind_tables_n = ind_tables_n;
-	priv->hash_rxqs = hash_rxqs;
-	priv->hash_rxqs_n = hash_rxqs_n;
-	assert(err == 0);
-	return 0;
-error:
-	if (hash_rxqs != NULL) {
-		for (i = 0; (i != hash_rxqs_n); ++i) {
-			struct ibv_qp *qp = (*hash_rxqs)[i].qp;
-
-			if (qp == NULL)
-				continue;
-			claim_zero(ibv_destroy_qp(qp));
-		}
-		rte_free(hash_rxqs);
-	}
-	if (ind_tables != NULL) {
-		for (j = 0; (j != ind_tables_n); ++j) {
-			struct ibv_rwq_ind_table *ind_table =
-				(*ind_tables)[j];
-
-			if (ind_table == NULL)
-				continue;
-			claim_zero(ibv_destroy_rwq_ind_table(ind_table));
-		}
-		rte_free(ind_tables);
-	}
-	return err;
-}
-
-/**
- * Clean up hash RX queues and indirection table.
- *
- * @param priv
- *   Pointer to private structure.
- */
-void
-priv_destroy_hash_rxqs(struct priv *priv)
-{
-	unsigned int i;
-
-	DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
-	if (priv->hash_rxqs_n == 0) {
-		assert(priv->hash_rxqs == NULL);
-		assert(priv->ind_tables == NULL);
-		return;
-	}
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
-		unsigned int j, k;
-
-		assert(hash_rxq->priv == priv);
-		assert(hash_rxq->qp != NULL);
-		for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
-			for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
-				assert(hash_rxq->mac_flow[j][k] == NULL);
-		claim_zero(ibv_destroy_qp(hash_rxq->qp));
-	}
-	priv->hash_rxqs_n = 0;
-	rte_free(priv->hash_rxqs);
-	priv->hash_rxqs = NULL;
-	for (i = 0; (i != priv->ind_tables_n); ++i) {
-		struct ibv_rwq_ind_table *ind_table =
-			(*priv->ind_tables)[i];
-
-		assert(ind_table != NULL);
-		claim_zero(ibv_destroy_rwq_ind_table(ind_table));
-	}
-	priv->ind_tables_n = 0;
-	rte_free(priv->ind_tables);
-	priv->ind_tables = NULL;
-}
-
-/**
  * Allocate RX queue elements.
  *
  * @param rxq_ctrl
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index c60bc4d..a96a21a 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -176,75 +176,6 @@ struct mlx5_hrxq {
 	uint8_t rss_key[]; /* Hash key. */
 };
 
-/* Hash RX queue types. */
-enum hash_rxq_type {
-	HASH_RXQ_TCPV4,
-	HASH_RXQ_UDPV4,
-	HASH_RXQ_IPV4,
-	HASH_RXQ_TCPV6,
-	HASH_RXQ_UDPV6,
-	HASH_RXQ_IPV6,
-	HASH_RXQ_ETH,
-};
-
-/* Flow structure with Ethernet specification. It is packed to prevent padding
- * between attr and spec as this layout is expected by libibverbs. */
-struct flow_attr_spec_eth {
-	struct ibv_flow_attr attr;
-	struct ibv_flow_spec_eth spec;
-} __attribute__((packed));
-
-/* Define a struct flow_attr_spec_eth object as an array of at least
- * "size" bytes. Room after the first index is normally used to store
- * extra flow specifications. */
-#define FLOW_ATTR_SPEC_ETH(name, size) \
-	struct flow_attr_spec_eth name \
-		[((size) / sizeof(struct flow_attr_spec_eth)) + \
-		 !!((size) % sizeof(struct flow_attr_spec_eth))]
-
-/* Initialization data for hash RX queue. */
-struct hash_rxq_init {
-	uint64_t hash_fields; /* Fields that participate in the hash. */
-	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
-	unsigned int flow_priority; /* Flow priority to use. */
-	union {
-		struct {
-			enum ibv_flow_spec_type type;
-			uint16_t size;
-		} hdr;
-		struct ibv_flow_spec_tcp_udp tcp_udp;
-		struct ibv_flow_spec_ipv4 ipv4;
-		struct ibv_flow_spec_ipv6 ipv6;
-		struct ibv_flow_spec_eth eth;
-	} flow_spec; /* Flow specification template. */
-	const struct hash_rxq_init *underlayer; /* Pointer to underlayer. */
-};
-
-/* Initialization data for indirection table. */
-struct ind_table_init {
-	unsigned int max_size; /* Maximum number of WQs. */
-	/* Hash RX queues using this table. */
-	unsigned int hash_types;
-	unsigned int hash_types_n;
-};
-
-/* Initialization data for special flows. */
-struct special_flow_init {
-	uint8_t dst_mac_val[6];
-	uint8_t dst_mac_mask[6];
-	unsigned int hash_types;
-	unsigned int per_vlan:1;
-};
-
-struct hash_rxq {
-	struct priv *priv; /* Back pointer to private data. */
-	struct ibv_qp *qp; /* Hash RX QP. */
-	enum hash_rxq_type type; /* Hash RX queue type. */
-	/* MAC flow steering rules, one per VLAN ID. */
-	struct ibv_flow *mac_flow
-		[MLX5_MAX_MAC_ADDRESSES][MLX5_MAX_VLAN_IDS];
-};
-
 /* TX queue descriptor. */
 __extension__
 struct mlx5_txq_data {
@@ -302,16 +233,9 @@ struct mlx5_txq_ctrl {
 
 /* mlx5_rxq.c */
 
-extern const struct hash_rxq_init hash_rxq_init[];
-extern const unsigned int hash_rxq_init_n;
-
 extern uint8_t rss_hash_default_key[];
 extern const size_t rss_hash_default_key_len;
 
-size_t priv_flow_attr(struct priv *, struct ibv_flow_attr *,
-		      size_t, enum hash_rxq_type);
-int priv_create_hash_rxqs(struct priv *);
-void priv_destroy_hash_rxqs(struct priv *);
 void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
 int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_rxconf *, struct rte_mempool *);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 4143571..29167ba 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -161,9 +161,9 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	}
 	/* Update receive callback. */
 	priv_dev_select_rx_function(priv, dev);
-	err = priv_create_hash_rxqs(priv);
+	err = priv_dev_traffic_enable(priv, dev);
 	if (err) {
-		ERROR("%p: an error occurred while configuring hash RX queues:"
+		ERROR("%p: an error occurred while configuring control flows:"
 		      " %s",
 		      (void *)priv, strerror(err));
 		goto error;
@@ -190,8 +190,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	dev->data->dev_started = 0;
 	LIST_FOREACH(mr, &priv->mr, next)
 		priv_mr_release(priv, mr);
-	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv, &priv->flows);
+	priv_dev_traffic_disable(priv, dev);
 	priv_txq_stop(priv);
 	priv_rxq_stop(priv);
 	priv_flow_delete_drop_queue(priv);
@@ -224,9 +224,8 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	rte_wmb();
 	usleep(1000 * priv->rxqs_n);
 	DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
-	priv_destroy_hash_rxqs(priv);
 	priv_flow_stop(priv, &priv->flows);
-	priv_flow_flush(priv, &priv->ctrl_flows);
+	priv_dev_traffic_disable(priv, dev);
 	priv_rx_intr_vec_disable(priv);
 	priv_dev_interrupt_handler_uninstall(priv, dev);
 	priv_txq_stop(priv);
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 22/30] net/mlx5: fully convert a flow to verbs in validate
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (46 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 21/30] net/mlx5: remove hash Rx queues support Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 23/30] net/mlx5: process flows actions before of items Nelio Laranjeiro
                   ` (7 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Validation of flows is only making few verifications on the pattern, in
some situation the validate action could end by with success whereas the
pattern could not be converted correctly.
This brings this conversion verification part also to the validate.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c | 194 +++++++++++++++++++++++++------------------
 1 file changed, 114 insertions(+), 80 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 83c75f4..13bd250 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -307,6 +307,7 @@ struct mlx5_flow_parse {
 	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 	uint32_t inner; /**< Set once VXLAN is encountered. */
+	uint32_t create:1; /**< Leave allocated resources on exit. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
 	struct mlx5_flow_action actions; /**< Parsed action result. */
 };
@@ -418,7 +419,7 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
 }
 
 /**
- * Validate a flow supported by the NIC.
+ * Validate and convert a flow supported by the NIC.
  *
  * @param priv
  *   Pointer to private structure.
@@ -437,16 +438,24 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_validate(struct priv *priv,
-		   const struct rte_flow_attr *attr,
-		   const struct rte_flow_item items[],
-		   const struct rte_flow_action actions[],
-		   struct rte_flow_error *error,
-		   struct mlx5_flow_parse *flow)
+priv_flow_convert(struct priv *priv,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item items[],
+		  const struct rte_flow_action actions[],
+		  struct rte_flow_error *error,
+		  struct mlx5_flow_parse *flow)
 {
 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
 
 	(void)priv;
+	*flow = (struct mlx5_flow_parse){
+		.ibv_attr = flow->ibv_attr,
+		.create = flow->create,
+		.offset = sizeof(struct ibv_flow_attr),
+		.actions = {
+			.mark_id = MLX5_FLOW_MARK_DEFAULT,
+		},
+	};
 	if (attr->group) {
 		rte_flow_error_set(error, ENOTSUP,
 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
@@ -647,35 +656,6 @@ priv_flow_validate(struct priv *priv,
 }
 
 /**
- * Validate a flow supported by the NIC.
- *
- * @see rte_flow_validate()
- * @see rte_flow_ops
- */
-int
-mlx5_flow_validate(struct rte_eth_dev *dev,
-		   const struct rte_flow_attr *attr,
-		   const struct rte_flow_item items[],
-		   const struct rte_flow_action actions[],
-		   struct rte_flow_error *error)
-{
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-	struct mlx5_flow_parse flow = {
-		.offset = sizeof(struct ibv_flow_attr),
-		.actions = {
-			.mark_id = MLX5_FLOW_MARK_DEFAULT,
-			.queues_n = 0,
-		},
-	};
-
-	priv_lock(priv);
-	ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
-	priv_unlock(priv);
-	return ret;
-}
-
-/**
  * Convert Ethernet item to Verbs specification.
  *
  * @param item[in]
@@ -1016,6 +996,7 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
 	struct ibv_flow_spec_action_tag *tag;
 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
 
+	assert(flow->actions.mark);
 	tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*tag = (struct ibv_flow_spec_action_tag){
 		.type = IBV_FLOW_SPEC_ACTION_TAG,
@@ -1023,6 +1004,7 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
 		.tag_id = mlx5_flow_mark_set(mark_id),
 	};
 	++flow->ibv_attr->num_of_specs;
+	flow->offset += size;
 	return 0;
 }
 
@@ -1167,12 +1149,10 @@ priv_flow_create_action_queue(struct priv *priv,
 }
 
 /**
- * Convert a flow.
+ * Validate a flow.
  *
  * @param priv
  *   Pointer to private structure.
- * @param list
- *   Pointer to a TAILQ flow list.
  * @param[in] attr
  *   Flow rule attributes.
  * @param[in] pattern
@@ -1181,40 +1161,35 @@ priv_flow_create_action_queue(struct priv *priv,
  *   Associated actions (list terminated by the END action).
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
+ * @param[in,out] parser
+ *   MLX5 parser structure.
  *
  * @return
- *   A flow on success, NULL otherwise.
+ *   0 on success, negative errno value on failure.
  */
-static struct rte_flow *
-priv_flow_create(struct priv *priv,
-		 struct mlx5_flows *list,
-		 const struct rte_flow_attr *attr,
-		 const struct rte_flow_item items[],
-		 const struct rte_flow_action actions[],
-		 struct rte_flow_error *error)
+static int
+priv_flow_validate(struct priv *priv,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error,
+		   struct mlx5_flow_parse *parser)
 {
-	struct rte_flow *rte_flow;
-	struct mlx5_flow_parse flow = {
-		.offset = sizeof(struct ibv_flow_attr),
-		.actions = {
-			.mark_id = MLX5_FLOW_MARK_DEFAULT,
-			.queues = { 0 },
-			.queues_n = 0,
-		},
-	};
 	int err;
 
-	err = priv_flow_validate(priv, attr, items, actions, error, &flow);
+	err = priv_flow_convert(priv, attr, items, actions, error, parser);
 	if (err)
 		goto exit;
-	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
-	flow.offset = sizeof(struct ibv_flow_attr);
-	if (!flow.ibv_attr) {
+	if (parser->actions.mark)
+		parser->offset += sizeof(struct ibv_flow_spec_action_tag);
+	parser->ibv_attr = rte_malloc(__func__, parser->offset, 0);
+	if (!parser->ibv_attr) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate ibv_attr memory");
+		err = rte_errno;
 		goto exit;
 	}
-	*flow.ibv_attr = (struct ibv_flow_attr){
+	*parser->ibv_attr = (struct ibv_flow_attr){
 		.type = IBV_FLOW_ATTR_NORMAL,
 		.size = sizeof(struct ibv_flow_attr),
 		.priority = attr->priority,
@@ -1222,32 +1197,91 @@ priv_flow_create(struct priv *priv,
 		.port = 0,
 		.flags = 0,
 	};
-	flow.inner = 0;
-	flow.hash_fields = 0;
-	claim_zero(priv_flow_validate(priv, attr, items, actions,
-				      error, &flow));
-	if (flow.actions.mark && !flow.actions.drop) {
-		mlx5_flow_create_flag_mark(&flow, flow.actions.mark_id);
-		flow.offset += sizeof(struct ibv_flow_spec_action_tag);
-	}
-	if (flow.actions.drop)
-		rte_flow =
-			priv_flow_create_action_queue_drop(priv, &flow, error);
+	err = priv_flow_convert(priv, attr, items, actions, error, parser);
+	if (err || parser->create)
+		goto exit;
+	if (parser->actions.mark)
+		mlx5_flow_create_flag_mark(parser, parser->actions.mark_id);
+	return 0;
+exit:
+	if (parser->ibv_attr)
+		rte_free(parser->ibv_attr);
+	return err;
+}
+
+/**
+ * Convert a flow.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A flow on success, NULL otherwise.
+ */
+static struct rte_flow *
+priv_flow_create(struct priv *priv,
+		 struct mlx5_flows *list,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	struct mlx5_flow_parse parser = { .create = 1, };
+	struct rte_flow *flow;
+	int err;
+
+	err = priv_flow_validate(priv, attr, items, actions, error, &parser);
+	if (err)
+		goto exit;
+	if (parser.actions.drop)
+		flow = priv_flow_create_action_queue_drop(priv, &parser, error);
 	else
-		rte_flow = priv_flow_create_action_queue(priv, &flow, error);
-	if (!rte_flow)
+		flow = priv_flow_create_action_queue(priv, &parser, error);
+	if (!flow)
 		goto exit;
-	if (rte_flow) {
-		TAILQ_INSERT_TAIL(list, rte_flow, next);
-		DEBUG("Flow created %p", (void *)rte_flow);
-	}
-	return rte_flow;
+	TAILQ_INSERT_TAIL(list, flow, next);
+	DEBUG("Flow created %p", (void *)flow);
+	return flow;
 exit:
-	rte_free(flow.ibv_attr);
+	if (parser.ibv_attr)
+		rte_free(parser.ibv_attr);
 	return NULL;
 }
 
 /**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_validate(struct rte_eth_dev *dev,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	int ret;
+	struct mlx5_flow_parse parser = { .create = 0, };
+
+	priv_lock(priv);
+	ret = priv_flow_validate(priv, attr, items, actions, error, &parser);
+	priv_unlock(priv);
+	return ret;
+}
+
+/**
  * Create a flow.
  *
  * @see rte_flow_create()
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 23/30] net/mlx5: process flows actions before of items
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (47 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 22/30] net/mlx5: fully convert a flow to verbs in validate Nelio Laranjeiro
@ 2017-10-09 14:44 ` Nelio Laranjeiro
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 24/30] net/mlx5: merge internal parser and actions structures Nelio Laranjeiro
                   ` (6 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:44 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
In case the pattern contains an RSS actions, the RSS configuration to use
is the one provided by the user.  To make the correct conversion from DPDK
RSS hash fields to Verbs ones according to the users requests the actions
must be processed first.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c | 90 ++++++++++++++++++++++----------------------
 1 file changed, 45 insertions(+), 45 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 13bd250..6a58194 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -484,51 +484,6 @@ priv_flow_convert(struct priv *priv,
 				   "only ingress is supported");
 		return -rte_errno;
 	}
-	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
-		const struct mlx5_flow_items *token = NULL;
-		unsigned int i;
-		int err;
-
-		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
-			continue;
-		for (i = 0;
-		     cur_item->items &&
-		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
-		     ++i) {
-			if (cur_item->items[i] == items->type) {
-				token = &mlx5_flow_items[items->type];
-				break;
-			}
-		}
-		if (!token)
-			goto exit_item_not_supported;
-		cur_item = token;
-		err = mlx5_flow_item_validate(items,
-					      (const uint8_t *)cur_item->mask,
-					      cur_item->mask_sz);
-		if (err)
-			goto exit_item_not_supported;
-		if (flow->ibv_attr && cur_item->convert) {
-			err = cur_item->convert(items,
-						(cur_item->default_mask ?
-						 cur_item->default_mask :
-						 cur_item->mask),
-						flow);
-			if (err)
-				goto exit_item_not_supported;
-		} else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
-			if (flow->inner) {
-				rte_flow_error_set(error, ENOTSUP,
-						   RTE_FLOW_ERROR_TYPE_ITEM,
-						   items,
-						   "cannot recognize multiple"
-						   " VXLAN encapsulations");
-				return -rte_errno;
-			}
-			flow->inner = 1;
-		}
-		flow->offset += cur_item->dst_sz;
-	}
 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
 			continue;
@@ -644,6 +599,51 @@ priv_flow_convert(struct priv *priv,
 				   NULL, "no valid action");
 		return -rte_errno;
 	}
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+		const struct mlx5_flow_items *token = NULL;
+		unsigned int i;
+		int err;
+
+		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+			continue;
+		for (i = 0;
+		     cur_item->items &&
+		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
+		     ++i) {
+			if (cur_item->items[i] == items->type) {
+				token = &mlx5_flow_items[items->type];
+				break;
+			}
+		}
+		if (!token)
+			goto exit_item_not_supported;
+		cur_item = token;
+		err = mlx5_flow_item_validate(items,
+					      (const uint8_t *)cur_item->mask,
+					      cur_item->mask_sz);
+		if (err)
+			goto exit_item_not_supported;
+		if (flow->ibv_attr && cur_item->convert) {
+			err = cur_item->convert(items,
+						(cur_item->default_mask ?
+						 cur_item->default_mask :
+						 cur_item->mask),
+						flow);
+			if (err)
+				goto exit_item_not_supported;
+		} else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
+			if (flow->inner) {
+				rte_flow_error_set(error, ENOTSUP,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   items,
+						   "cannot recognize multiple"
+						   " VXLAN encapsulations");
+				return -rte_errno;
+			}
+			flow->inner = 1;
+		}
+		flow->offset += cur_item->dst_sz;
+	}
 	return 0;
 exit_item_not_supported:
 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 24/30] net/mlx5: merge internal parser and actions structures
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (48 preceding siblings ...)
  2017-10-09 14:44 ` [dpdk-dev] [PATCH v3 23/30] net/mlx5: process flows actions before of items Nelio Laranjeiro
@ 2017-10-09 14:45 ` Nelio Laranjeiro
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 25/30] net/mlx5: use a better name for the flow parser Nelio Laranjeiro
                   ` (5 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:45 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
struct mlx5_flow_parse now embed fields from struct mlx5_flow_action.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c | 94 ++++++++++++++++++++------------------------
 1 file changed, 43 insertions(+), 51 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 6a58194..f10f2b3 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -292,24 +292,19 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 	},
 };
 
-/* Structure to parse actions. */
-struct mlx5_flow_action {
-	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t drop:1; /**< Target is a drop queue. */
-	uint32_t mark:1; /**< Mark is present in the flow. */
-	uint32_t mark_id; /**< Mark identifier. */
-	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
-	uint16_t queues_n; /**< Number of entries in queue[]. */
-};
-
 /** Structure to pass to the conversion function. */
 struct mlx5_flow_parse {
 	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 	uint32_t inner; /**< Set once VXLAN is encountered. */
 	uint32_t create:1; /**< Leave allocated resources on exit. */
+	uint32_t queue:1; /**< Target is a receive queue. */
+	uint32_t drop:1; /**< Target is a drop queue. */
+	uint32_t mark:1; /**< Mark is present in the flow. */
+	uint32_t mark_id; /**< Mark identifier. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
-	struct mlx5_flow_action actions; /**< Parsed action result. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
+	uint16_t queues_n; /**< Number of entries in queue[]. */
 };
 
 static const struct rte_flow_ops mlx5_flow_ops = {
@@ -452,9 +447,7 @@ priv_flow_convert(struct priv *priv,
 		.ibv_attr = flow->ibv_attr,
 		.create = flow->create,
 		.offset = sizeof(struct ibv_flow_attr),
-		.actions = {
-			.mark_id = MLX5_FLOW_MARK_DEFAULT,
-		},
+		.mark_id = MLX5_FLOW_MARK_DEFAULT,
 	};
 	if (attr->group) {
 		rte_flow_error_set(error, ENOTSUP,
@@ -488,7 +481,7 @@ priv_flow_convert(struct priv *priv,
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
-			flow->actions.drop = 1;
+			flow->drop = 1;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			const struct rte_flow_action_queue *queue =
 				(const struct rte_flow_action_queue *)
@@ -498,13 +491,13 @@ priv_flow_convert(struct priv *priv,
 
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
-			for (n = 0; n < flow->actions.queues_n; ++n) {
-				if (flow->actions.queues[n] == queue->index) {
+			for (n = 0; n < flow->queues_n; ++n) {
+				if (flow->queues[n] == queue->index) {
 					found = 1;
 					break;
 				}
 			}
-			if (flow->actions.queues_n > 1 && !found) {
+			if (flow->queues_n > 1 && !found) {
 				rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
 					   actions,
@@ -512,9 +505,9 @@ priv_flow_convert(struct priv *priv,
 				return -rte_errno;
 			}
 			if (!found) {
-				flow->actions.queue = 1;
-				flow->actions.queues_n = 1;
-				flow->actions.queues[0] = queue->index;
+				flow->queue = 1;
+				flow->queues_n = 1;
+				flow->queues[0] = queue->index;
 			}
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
 			const struct rte_flow_action_rss *rss =
@@ -529,12 +522,12 @@ priv_flow_convert(struct priv *priv,
 						   "no valid queues");
 				return -rte_errno;
 			}
-			if (flow->actions.queues_n == 1) {
+			if (flow->queues_n == 1) {
 				uint16_t found = 0;
 
-				assert(flow->actions.queues_n);
+				assert(flow->queues_n);
 				for (n = 0; n < rss->num; ++n) {
-					if (flow->actions.queues[0] ==
+					if (flow->queues[0] ==
 					    rss->queue[n]) {
 						found = 1;
 						break;
@@ -559,10 +552,10 @@ priv_flow_convert(struct priv *priv,
 					return -rte_errno;
 				}
 			}
-			flow->actions.queue = 1;
+			flow->queue = 1;
 			for (n = 0; n < rss->num; ++n)
-				flow->actions.queues[n] = rss->queue[n];
-			flow->actions.queues_n = rss->num;
+				flow->queues[n] = rss->queue[n];
+			flow->queues_n = rss->num;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
 			const struct rte_flow_action_mark *mark =
 				(const struct rte_flow_action_mark *)
@@ -582,19 +575,19 @@ priv_flow_convert(struct priv *priv,
 						   " and 16777199");
 				return -rte_errno;
 			}
-			flow->actions.mark = 1;
-			flow->actions.mark_id = mark->id;
+			flow->mark = 1;
+			flow->mark_id = mark->id;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
-			flow->actions.mark = 1;
+			flow->mark = 1;
 		} else {
 			goto exit_action_not_supported;
 		}
 	}
-	if (flow->actions.mark && !flow->ibv_attr && !flow->actions.drop)
+	if (flow->mark && !flow->ibv_attr && !flow->drop)
 		flow->offset += sizeof(struct ibv_flow_spec_action_tag);
-	if (!flow->ibv_attr && flow->actions.drop)
+	if (!flow->ibv_attr && flow->drop)
 		flow->offset += sizeof(struct ibv_flow_spec_action_drop);
-	if (!flow->actions.queue && !flow->actions.drop) {
+	if (!flow->queue && !flow->drop) {
 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "no valid action");
 		return -rte_errno;
@@ -996,7 +989,7 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
 	struct ibv_flow_spec_action_tag *tag;
 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
 
-	assert(flow->actions.mark);
+	assert(flow->mark);
 	tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*tag = (struct ibv_flow_spec_action_tag){
 		.type = IBV_FLOW_SPEC_ACTION_TAG,
@@ -1087,23 +1080,22 @@ priv_flow_create_action_queue(struct priv *priv,
 
 	assert(priv->pd);
 	assert(priv->ctx);
-	assert(!flow->actions.drop);
-	rte_flow =
-		rte_calloc(__func__, 1,
-			   sizeof(*flow) +
-			   flow->actions.queues_n * sizeof(uint16_t),
-			   0);
+	assert(!flow->drop);
+	rte_flow = rte_calloc(__func__, 1,
+			      sizeof(*rte_flow) +
+			      flow->queues_n * sizeof(uint16_t),
+			      0);
 	if (!rte_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	rte_flow->mark = flow->actions.mark;
+	rte_flow->mark = flow->mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
 	rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
-	memcpy(rte_flow->queues, flow->actions.queues,
-	       flow->actions.queues_n * sizeof(uint16_t));
-	rte_flow->queues_n = flow->actions.queues_n;
+	memcpy(rte_flow->queues, flow->queues,
+	       flow->queues_n * sizeof(uint16_t));
+	rte_flow->queues_n = flow->queues_n;
 	rte_flow->frxq.hash_fields = flow->hash_fields;
 	rte_flow->frxq.hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
 						 rss_hash_default_key_len,
@@ -1124,11 +1116,11 @@ priv_flow_create_action_queue(struct priv *priv,
 			goto error;
 		}
 	}
-	for (i = 0; i != flow->actions.queues_n; ++i) {
+	for (i = 0; i != flow->queues_n; ++i) {
 		struct mlx5_rxq_data *q =
-			(*priv->rxqs)[flow->actions.queues[i]];
+			(*priv->rxqs)[flow->queues[i]];
 
-		q->mark |= flow->actions.mark;
+		q->mark |= flow->mark;
 	}
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
@@ -1180,7 +1172,7 @@ priv_flow_validate(struct priv *priv,
 	err = priv_flow_convert(priv, attr, items, actions, error, parser);
 	if (err)
 		goto exit;
-	if (parser->actions.mark)
+	if (parser->mark)
 		parser->offset += sizeof(struct ibv_flow_spec_action_tag);
 	parser->ibv_attr = rte_malloc(__func__, parser->offset, 0);
 	if (!parser->ibv_attr) {
@@ -1200,8 +1192,8 @@ priv_flow_validate(struct priv *priv,
 	err = priv_flow_convert(priv, attr, items, actions, error, parser);
 	if (err || parser->create)
 		goto exit;
-	if (parser->actions.mark)
-		mlx5_flow_create_flag_mark(parser, parser->actions.mark_id);
+	if (parser->mark)
+		mlx5_flow_create_flag_mark(parser, parser->mark_id);
 	return 0;
 exit:
 	if (parser->ibv_attr)
@@ -1243,7 +1235,7 @@ priv_flow_create(struct priv *priv,
 	err = priv_flow_validate(priv, attr, items, actions, error, &parser);
 	if (err)
 		goto exit;
-	if (parser.actions.drop)
+	if (parser.drop)
 		flow = priv_flow_create_action_queue_drop(priv, &parser, error);
 	else
 		flow = priv_flow_create_action_queue(priv, &parser, error);
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 25/30] net/mlx5: use a better name for the flow parser
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (49 preceding siblings ...)
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 24/30] net/mlx5: merge internal parser and actions structures Nelio Laranjeiro
@ 2017-10-09 14:45 ` Nelio Laranjeiro
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 26/30] net/mlx5: reorganise functions in the file Nelio Laranjeiro
                   ` (4 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:45 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
struct mlx5_flow_parse was commonly used with the name "flow" confusing
sometimes the development.  The variable name is replaced by parser to
reflect its use.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c | 200 +++++++++++++++++++++----------------------
 1 file changed, 96 insertions(+), 104 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index f10f2b3..6d7a29e 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -426,8 +426,8 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
  *   Associated actions (list terminated by the END action).
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
- * @param[in, out] flow
- *   Flow structure to update.
+ * @param[in, out] parser
+ *   Internal parser structure.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
@@ -438,14 +438,14 @@ priv_flow_convert(struct priv *priv,
 		  const struct rte_flow_item items[],
 		  const struct rte_flow_action actions[],
 		  struct rte_flow_error *error,
-		  struct mlx5_flow_parse *flow)
+		  struct mlx5_flow_parse *parser)
 {
 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
 
 	(void)priv;
-	*flow = (struct mlx5_flow_parse){
-		.ibv_attr = flow->ibv_attr,
-		.create = flow->create,
+	*parser = (struct mlx5_flow_parse){
+		.ibv_attr = parser->ibv_attr,
+		.create = parser->create,
 		.offset = sizeof(struct ibv_flow_attr),
 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
 	};
@@ -481,7 +481,7 @@ priv_flow_convert(struct priv *priv,
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
-			flow->drop = 1;
+			parser->drop = 1;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			const struct rte_flow_action_queue *queue =
 				(const struct rte_flow_action_queue *)
@@ -491,13 +491,13 @@ priv_flow_convert(struct priv *priv,
 
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
-			for (n = 0; n < flow->queues_n; ++n) {
-				if (flow->queues[n] == queue->index) {
+			for (n = 0; n < parser->queues_n; ++n) {
+				if (parser->queues[n] == queue->index) {
 					found = 1;
 					break;
 				}
 			}
-			if (flow->queues_n > 1 && !found) {
+			if (parser->queues_n > 1 && !found) {
 				rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
 					   actions,
@@ -505,9 +505,9 @@ priv_flow_convert(struct priv *priv,
 				return -rte_errno;
 			}
 			if (!found) {
-				flow->queue = 1;
-				flow->queues_n = 1;
-				flow->queues[0] = queue->index;
+				parser->queue = 1;
+				parser->queues_n = 1;
+				parser->queues[0] = queue->index;
 			}
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
 			const struct rte_flow_action_rss *rss =
@@ -522,12 +522,12 @@ priv_flow_convert(struct priv *priv,
 						   "no valid queues");
 				return -rte_errno;
 			}
-			if (flow->queues_n == 1) {
+			if (parser->queues_n == 1) {
 				uint16_t found = 0;
 
-				assert(flow->queues_n);
+				assert(parser->queues_n);
 				for (n = 0; n < rss->num; ++n) {
-					if (flow->queues[0] ==
+					if (parser->queues[0] ==
 					    rss->queue[n]) {
 						found = 1;
 						break;
@@ -552,10 +552,10 @@ priv_flow_convert(struct priv *priv,
 					return -rte_errno;
 				}
 			}
-			flow->queue = 1;
+			parser->queue = 1;
 			for (n = 0; n < rss->num; ++n)
-				flow->queues[n] = rss->queue[n];
-			flow->queues_n = rss->num;
+				parser->queues[n] = rss->queue[n];
+			parser->queues_n = rss->num;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
 			const struct rte_flow_action_mark *mark =
 				(const struct rte_flow_action_mark *)
@@ -575,19 +575,19 @@ priv_flow_convert(struct priv *priv,
 						   " and 16777199");
 				return -rte_errno;
 			}
-			flow->mark = 1;
-			flow->mark_id = mark->id;
+			parser->mark = 1;
+			parser->mark_id = mark->id;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
-			flow->mark = 1;
+			parser->mark = 1;
 		} else {
 			goto exit_action_not_supported;
 		}
 	}
-	if (flow->mark && !flow->ibv_attr && !flow->drop)
-		flow->offset += sizeof(struct ibv_flow_spec_action_tag);
-	if (!flow->ibv_attr && flow->drop)
-		flow->offset += sizeof(struct ibv_flow_spec_action_drop);
-	if (!flow->queue && !flow->drop) {
+	if (parser->mark && !parser->ibv_attr && !parser->drop)
+		parser->offset += sizeof(struct ibv_flow_spec_action_tag);
+	if (!parser->ibv_attr && parser->drop)
+		parser->offset += sizeof(struct ibv_flow_spec_action_drop);
+	if (!parser->queue && !parser->drop) {
 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "no valid action");
 		return -rte_errno;
@@ -616,16 +616,16 @@ priv_flow_convert(struct priv *priv,
 					      cur_item->mask_sz);
 		if (err)
 			goto exit_item_not_supported;
-		if (flow->ibv_attr && cur_item->convert) {
+		if (parser->ibv_attr && cur_item->convert) {
 			err = cur_item->convert(items,
 						(cur_item->default_mask ?
 						 cur_item->default_mask :
 						 cur_item->mask),
-						flow);
+						parser);
 			if (err)
 				goto exit_item_not_supported;
 		} else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
-			if (flow->inner) {
+			if (parser->inner) {
 				rte_flow_error_set(error, ENOTSUP,
 						   RTE_FLOW_ERROR_TYPE_ITEM,
 						   items,
@@ -633,9 +633,9 @@ priv_flow_convert(struct priv *priv,
 						   " VXLAN encapsulations");
 				return -rte_errno;
 			}
-			flow->inner = 1;
+			parser->inner = 1;
 		}
-		flow->offset += cur_item->dst_sz;
+		parser->offset += cur_item->dst_sz;
 	}
 	return 0;
 exit_item_not_supported:
@@ -665,17 +665,16 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_eth *spec = item->spec;
 	const struct rte_flow_item_eth *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
 	unsigned int i;
 
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 2;
-	flow->hash_fields = 0;
-	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	++parser->ibv_attr->num_of_specs;
+	parser->hash_fields = 0;
+	eth = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*eth = (struct ibv_flow_spec_eth) {
-		.type = flow->inner | IBV_FLOW_SPEC_ETH,
+		.type = parser->inner | IBV_FLOW_SPEC_ETH,
 		.size = eth_size,
 	};
 	if (!spec)
@@ -714,11 +713,11 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_vlan *spec = item->spec;
 	const struct rte_flow_item_vlan *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
 
-	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
+	eth = (void *)((uintptr_t)parser->ibv_attr + parser->offset - eth_size);
 	if (!spec)
 		return 0;
 	if (!mask)
@@ -746,17 +745,15 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_ipv4 *spec = item->spec;
 	const struct rte_flow_item_ipv4 *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_ipv4_ext *ipv4;
 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
 
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 1;
-	flow->hash_fields = (IBV_RX_HASH_SRC_IPV4 |
-			     IBV_RX_HASH_DST_IPV4);
-	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	++parser->ibv_attr->num_of_specs;
+	parser->hash_fields = (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4);
+	ipv4 = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*ipv4 = (struct ibv_flow_spec_ipv4_ext) {
-		.type = flow->inner | IBV_FLOW_SPEC_IPV4_EXT,
+		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
 		.size = ipv4_size,
 	};
 	if (!spec)
@@ -800,18 +797,16 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_ipv6 *spec = item->spec;
 	const struct rte_flow_item_ipv6 *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_ipv6 *ipv6;
 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
 	unsigned int i;
 
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 1;
-	flow->hash_fields = (IBV_RX_HASH_SRC_IPV6 |
-			     IBV_RX_HASH_DST_IPV6);
-	ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	++parser->ibv_attr->num_of_specs;
+	parser->hash_fields = (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6);
+	ipv6 = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*ipv6 = (struct ibv_flow_spec_ipv6) {
-		.type = flow->inner | IBV_FLOW_SPEC_IPV6,
+		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
 		.size = ipv6_size,
 	};
 	if (!spec)
@@ -857,17 +852,16 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_udp *spec = item->spec;
 	const struct rte_flow_item_udp *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_tcp_udp *udp;
 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
 
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 0;
-	flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_UDP |
-			      IBV_RX_HASH_DST_PORT_UDP);
-	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	++parser->ibv_attr->num_of_specs;
+	parser->hash_fields |= (IBV_RX_HASH_SRC_PORT_UDP |
+				IBV_RX_HASH_DST_PORT_UDP);
+	udp = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*udp = (struct ibv_flow_spec_tcp_udp) {
-		.type = flow->inner | IBV_FLOW_SPEC_UDP,
+		.type = parser->inner | IBV_FLOW_SPEC_UDP,
 		.size = udp_size,
 	};
 	if (!spec)
@@ -901,17 +895,16 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_tcp *spec = item->spec;
 	const struct rte_flow_item_tcp *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_tcp_udp *tcp;
 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
 
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 0;
-	flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_TCP |
-			      IBV_RX_HASH_DST_PORT_TCP);
-	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	++parser->ibv_attr->num_of_specs;
+	parser->hash_fields |= (IBV_RX_HASH_SRC_PORT_TCP |
+				IBV_RX_HASH_DST_PORT_TCP);
+	tcp = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*tcp = (struct ibv_flow_spec_tcp_udp) {
-		.type = flow->inner | IBV_FLOW_SPEC_TCP,
+		.type = parser->inner | IBV_FLOW_SPEC_TCP,
 		.size = tcp_size,
 	};
 	if (!spec)
@@ -945,7 +938,7 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_vxlan *spec = item->spec;
 	const struct rte_flow_item_vxlan *mask = item->mask;
-	struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
 	struct ibv_flow_spec_tunnel *vxlan;
 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
 	union vni {
@@ -953,15 +946,14 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 		uint8_t vni[4];
 	} id;
 
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 0;
+	++parser->ibv_attr->num_of_specs;
 	id.vni[0] = 0;
-	vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	vxlan = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*vxlan = (struct ibv_flow_spec_tunnel) {
-		.type = flow->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
+		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
 		.size = size,
 	};
-	flow->inner = IBV_FLOW_SPEC_INNER;
+	parser->inner = IBV_FLOW_SPEC_INNER;
 	if (!spec)
 		return 0;
 	if (!mask)
@@ -978,26 +970,26 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 /**
  * Convert mark/flag action to Verbs specification.
  *
- * @param flow
- *   Pointer to MLX5 flow structure.
+ * @param parser
+ *   Internal parser structure.
  * @param mark_id
  *   Mark identifier.
  */
 static int
-mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
+mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
 {
 	struct ibv_flow_spec_action_tag *tag;
 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
 
-	assert(flow->mark);
-	tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	assert(parser->mark);
+	tag = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*tag = (struct ibv_flow_spec_action_tag){
 		.type = IBV_FLOW_SPEC_ACTION_TAG,
 		.size = size,
 		.tag_id = mlx5_flow_mark_set(mark_id),
 	};
-	++flow->ibv_attr->num_of_specs;
-	flow->offset += size;
+	++parser->ibv_attr->num_of_specs;
+	parser->offset += size;
 	return 0;
 }
 
@@ -1006,8 +998,8 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
  *
  * @param priv
  *   Pointer to private structure.
- * @param flow
- *   MLX5 flow attributes (filled by mlx5_flow_validate()).
+ * @param parser
+ *   Internal parser structure.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
@@ -1016,7 +1008,7 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
  */
 static struct rte_flow *
 priv_flow_create_action_queue_drop(struct priv *priv,
-				   struct mlx5_flow_parse *flow,
+				   struct mlx5_flow_parse *parser,
 				   struct rte_flow_error *error)
 {
 	struct rte_flow *rte_flow;
@@ -1032,14 +1024,14 @@ priv_flow_create_action_queue_drop(struct priv *priv,
 		return NULL;
 	}
 	rte_flow->drop = 1;
-	drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	drop = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
 	*drop = (struct ibv_flow_spec_action_drop){
 			.type = IBV_FLOW_SPEC_ACTION_DROP,
 			.size = size,
 	};
-	++flow->ibv_attr->num_of_specs;
-	flow->offset += sizeof(struct ibv_flow_spec_action_drop);
-	rte_flow->ibv_attr = flow->ibv_attr;
+	++parser->ibv_attr->num_of_specs;
+	parser->offset += sizeof(struct ibv_flow_spec_action_drop);
+	rte_flow->ibv_attr = parser->ibv_attr;
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
 	rte_flow->drxq.hrxq.qp = priv->flow_drop_queue->qp;
@@ -1062,8 +1054,8 @@ priv_flow_create_action_queue_drop(struct priv *priv,
  *
  * @param priv
  *   Pointer to private structure.
- * @param flow
- *   MLX5 flow attributes (filled by mlx5_flow_validate()).
+ * @param parser
+ *   MLX5 flow parser attributes (filled by mlx5_flow_validate()).
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
@@ -1072,7 +1064,7 @@ priv_flow_create_action_queue_drop(struct priv *priv,
  */
 static struct rte_flow *
 priv_flow_create_action_queue(struct priv *priv,
-			      struct mlx5_flow_parse *flow,
+			      struct mlx5_flow_parse *parser,
 			      struct rte_flow_error *error)
 {
 	struct rte_flow *rte_flow;
@@ -1080,33 +1072,33 @@ priv_flow_create_action_queue(struct priv *priv,
 
 	assert(priv->pd);
 	assert(priv->ctx);
-	assert(!flow->drop);
+	assert(!parser->drop);
 	rte_flow = rte_calloc(__func__, 1,
 			      sizeof(*rte_flow) +
-			      flow->queues_n * sizeof(uint16_t),
+			      parser->queues_n * sizeof(uint16_t),
 			      0);
 	if (!rte_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	rte_flow->mark = flow->mark;
-	rte_flow->ibv_attr = flow->ibv_attr;
+	rte_flow->mark = parser->mark;
+	rte_flow->ibv_attr = parser->ibv_attr;
 	rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
-	memcpy(rte_flow->queues, flow->queues,
-	       flow->queues_n * sizeof(uint16_t));
-	rte_flow->queues_n = flow->queues_n;
-	rte_flow->frxq.hash_fields = flow->hash_fields;
+	memcpy(rte_flow->queues, parser->queues,
+	       parser->queues_n * sizeof(uint16_t));
+	rte_flow->queues_n = parser->queues_n;
+	rte_flow->frxq.hash_fields = parser->hash_fields;
 	rte_flow->frxq.hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
 						 rss_hash_default_key_len,
-						 flow->hash_fields,
+						 parser->hash_fields,
 						 (*rte_flow->queues),
 						 rte_flow->queues_n);
 	if (!rte_flow->frxq.hrxq) {
 		rte_flow->frxq.hrxq =
 			mlx5_priv_hrxq_new(priv, rss_hash_default_key,
 					   rss_hash_default_key_len,
-					   flow->hash_fields,
+					   parser->hash_fields,
 					   (*rte_flow->queues),
 					   rte_flow->queues_n);
 		if (!rte_flow->frxq.hrxq) {
@@ -1116,11 +1108,11 @@ priv_flow_create_action_queue(struct priv *priv,
 			goto error;
 		}
 	}
-	for (i = 0; i != flow->queues_n; ++i) {
+	for (i = 0; i != parser->queues_n; ++i) {
 		struct mlx5_rxq_data *q =
-			(*priv->rxqs)[flow->queues[i]];
+			(*priv->rxqs)[parser->queues[i]];
 
-		q->mark |= flow->mark;
+		q->mark |= parser->mark;
 	}
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 26/30] net/mlx5: reorganise functions in the file
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (50 preceding siblings ...)
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 25/30] net/mlx5: use a better name for the flow parser Nelio Laranjeiro
@ 2017-10-09 14:45 ` Nelio Laranjeiro
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 27/30] net/mlx5: move Verbs flows and attributes Nelio Laranjeiro
                   ` (3 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:45 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Move mlx5_flow_validate/create/flush/isolate() to the end of the file.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c | 132 +++++++++++++++++++++----------------------
 1 file changed, 66 insertions(+), 66 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 6d7a29e..f809ed5 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1347,26 +1347,6 @@ priv_flow_destroy(struct priv *priv,
 }
 
 /**
- * Destroy a flow.
- *
- * @see rte_flow_destroy()
- * @see rte_flow_ops
- */
-int
-mlx5_flow_destroy(struct rte_eth_dev *dev,
-		  struct rte_flow *flow,
-		  struct rte_flow_error *error)
-{
-	struct priv *priv = dev->data->dev_private;
-
-	(void)error;
-	priv_lock(priv);
-	priv_flow_destroy(priv, &priv->flows, flow);
-	priv_unlock(priv);
-	return 0;
-}
-
-/**
  * Destroy all flows.
  *
  * @param priv
@@ -1386,25 +1366,6 @@ priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
 }
 
 /**
- * Destroy all flows.
- *
- * @see rte_flow_flush()
- * @see rte_flow_ops
- */
-int
-mlx5_flow_flush(struct rte_eth_dev *dev,
-		struct rte_flow_error *error)
-{
-	struct priv *priv = dev->data->dev_private;
-
-	(void)error;
-	priv_lock(priv);
-	priv_flow_flush(priv, &priv->flows);
-	priv_unlock(priv);
-	return 0;
-}
-
-/**
  * Create drop queue.
  *
  * @param priv
@@ -1609,33 +1570,6 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 }
 
 /**
- * Isolated mode.
- *
- * @see rte_flow_isolate()
- * @see rte_flow_ops
- */
-int
-mlx5_flow_isolate(struct rte_eth_dev *dev,
-		  int enable,
-		  struct rte_flow_error *error)
-{
-	struct priv *priv = dev->data->dev_private;
-
-	priv_lock(priv);
-	if (dev->data->dev_started) {
-		rte_flow_error_set(error, EBUSY,
-				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				   NULL,
-				   "port must be stopped first");
-		priv_unlock(priv);
-		return -rte_errno;
-	}
-	priv->isolated = !!enable;
-	priv_unlock(priv);
-	return 0;
-}
-
-/**
  * Verify the flow list is empty
  *
  * @param priv
@@ -1745,3 +1679,69 @@ mlx5_ctrl_flow(struct rte_eth_dev *dev,
 {
 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
 }
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_destroy(struct rte_eth_dev *dev,
+		  struct rte_flow *flow,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	(void)error;
+	priv_lock(priv);
+	priv_flow_destroy(priv, &priv->flows, flow);
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_flush(struct rte_eth_dev *dev,
+		struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	(void)error;
+	priv_lock(priv);
+	priv_flow_flush(priv, &priv->flows);
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
+ * Isolated mode.
+ *
+ * @see rte_flow_isolate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	if (dev->data->dev_started) {
+		rte_flow_error_set(error, EBUSY,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL,
+				   "port must be stopped first");
+		priv_unlock(priv);
+		return -rte_errno;
+	}
+	priv->isolated = !!enable;
+	priv_unlock(priv);
+	return 0;
+}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 27/30] net/mlx5: move Verbs flows and attributes
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (51 preceding siblings ...)
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 26/30] net/mlx5: reorganise functions in the file Nelio Laranjeiro
@ 2017-10-09 14:45 ` Nelio Laranjeiro
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 28/30] net/mlx5: handle RSS hash configuration in RSS flow Nelio Laranjeiro
                   ` (2 subsequent siblings)
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:45 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Moves ibv_attr containing the specification of the flow from Verbs point of
view also with the verbs flow itself near the related verbs objects making
the flow.
This is also a preparation to handle correctly the RSS hash configuration
provided by the user, has multiple Verbs flows will be necessary for a
single generic flow.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c | 46 ++++++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 19 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index f809ed5..4ff915a 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -101,11 +101,15 @@ struct mlx5_hrxq_drop {
 /* Flows structures. */
 struct mlx5_flow {
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
+	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
 };
 
 /* Drop flows structures. */
 struct mlx5_flow_drop {
+	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	struct mlx5_hrxq_drop hrxq; /**< Drop hash Rx queue. */
 };
 
@@ -113,8 +117,6 @@ struct rte_flow {
 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
-	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
-	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	uint16_t queues_n; /**< Number of entries in queue[]. */
 	uint16_t (*queues)[]; /**< Queues indexes to use. */
 	union {
@@ -1031,13 +1033,13 @@ priv_flow_create_action_queue_drop(struct priv *priv,
 	};
 	++parser->ibv_attr->num_of_specs;
 	parser->offset += sizeof(struct ibv_flow_spec_action_drop);
-	rte_flow->ibv_attr = parser->ibv_attr;
+	rte_flow->drxq.ibv_attr = parser->ibv_attr;
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
 	rte_flow->drxq.hrxq.qp = priv->flow_drop_queue->qp;
-	rte_flow->ibv_flow = ibv_create_flow(rte_flow->drxq.hrxq.qp,
-					     rte_flow->ibv_attr);
-	if (!rte_flow->ibv_flow) {
+	rte_flow->drxq.ibv_flow = ibv_create_flow(rte_flow->drxq.hrxq.qp,
+						  rte_flow->drxq.ibv_attr);
+	if (!rte_flow->drxq.ibv_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "flow rule creation failure");
 		goto error;
@@ -1083,7 +1085,7 @@ priv_flow_create_action_queue(struct priv *priv,
 		return NULL;
 	}
 	rte_flow->mark = parser->mark;
-	rte_flow->ibv_attr = parser->ibv_attr;
+	rte_flow->frxq.ibv_attr = parser->ibv_attr;
 	rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
 	memcpy(rte_flow->queues, parser->queues,
 	       parser->queues_n * sizeof(uint16_t));
@@ -1116,9 +1118,9 @@ priv_flow_create_action_queue(struct priv *priv,
 	}
 	if (!priv->dev->data->dev_started)
 		return rte_flow;
-	rte_flow->ibv_flow = ibv_create_flow(rte_flow->frxq.hrxq->qp,
-					     rte_flow->ibv_attr);
-	if (!rte_flow->ibv_flow) {
+	rte_flow->frxq.ibv_flow = ibv_create_flow(rte_flow->frxq.hrxq->qp,
+						  rte_flow->frxq.ibv_attr);
+	if (!rte_flow->frxq.ibv_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "flow rule creation failure");
 		goto error;
@@ -1336,12 +1338,17 @@ priv_flow_destroy(struct priv *priv,
 		rxq_data->mark = mark;
 	}
 free:
-	if (flow->ibv_flow)
-		claim_zero(ibv_destroy_flow(flow->ibv_flow));
-	if (!flow->drop)
+	if (flow->drop) {
+		if (flow->drxq.ibv_flow)
+			claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
+		rte_free(flow->drxq.ibv_attr);
+	} else {
 		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
+		if (flow->frxq.ibv_flow)
+			claim_zero(ibv_destroy_flow(flow->frxq.ibv_flow));
+		rte_free(flow->frxq.ibv_attr);
+	}
 	TAILQ_REMOVE(list, flow, next);
-	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
 	rte_free(flow);
 }
@@ -1490,8 +1497,9 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 	struct rte_flow *flow;
 
 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
-		claim_zero(ibv_destroy_flow(flow->ibv_flow));
-		flow->ibv_flow = NULL;
+		assert(!flow->drop);
+		claim_zero(ibv_destroy_flow(flow->frxq.ibv_flow));
+		flow->frxq.ibv_flow = NULL;
 		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
 		flow->frxq.hrxq = NULL;
 		if (flow->mark) {
@@ -1546,9 +1554,9 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 			return rte_errno;
 		}
 flow_create:
-		flow->ibv_flow = ibv_create_flow(flow->frxq.hrxq->qp,
-						 flow->ibv_attr);
-		if (!flow->ibv_flow) {
+		flow->frxq.ibv_flow = ibv_create_flow(flow->frxq.hrxq->qp,
+						      flow->frxq.ibv_attr);
+		if (!flow->frxq.ibv_flow) {
 			DEBUG("Flow %p cannot be applied", (void *)flow);
 			rte_errno = EINVAL;
 			return rte_errno;
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 28/30] net/mlx5: handle RSS hash configuration in RSS flow
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (52 preceding siblings ...)
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 27/30] net/mlx5: move Verbs flows and attributes Nelio Laranjeiro
@ 2017-10-09 14:45 ` Nelio Laranjeiro
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 29/30] net/mlx5: support flow director Nelio Laranjeiro
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 30/30] net/mlx5: add new operations for isolated mode Nelio Laranjeiro
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:45 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Add RSS support according to the RSS configuration.
A special case is handled, when the pattern does not cover the RSS hash
configuration request such as:
 flow create 0 ingress pattern eth / end actions rss queues 0 1 end / end
In such situation with the default configuration of testpmd RSS i.e. IP,
it should be converted to 3 Verbs flow to handle correctly the request:
 1. IPv4 flow, an extra IPv4 wildcard specification needs to be added in
    the conversion.
 2. IPv6 flow, same as for IPv4.
 3. Ethernet followed by any other protocol on which no RSS can be
    performed and thus the traffic will be redirected to the first queue of
    the user request.
The same kind of issue is handled if the RSS is performed only on UDPv4 or
UDPv6 or TCPv*.
This does not handle a priority conflict which can occurs if the user adds
several colliding flow rules.  Currently in the example above, the request
is already consuming 2 priorities (1 for IPv4/IPV6 matching rule priority
and one for Ethernet matching rule priority + 1).
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c | 1424 ++++++++++++++++++++++++++++++------------
 1 file changed, 1022 insertions(+), 402 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 4ff915a..23ca2b8 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -55,6 +55,10 @@
 /* Define minimal priority for control plane flows. */
 #define MLX5_CTRL_FLOW_PRIORITY 4
 
+/* Internet Protocol versions. */
+#define MLX5_IPV4 4
+#define MLX5_IPV6 6
+
 static int
 mlx5_flow_create_eth(const struct rte_flow_item *item,
 		     const void *default_mask,
@@ -90,6 +94,98 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 		       const void *default_mask,
 		       void *data);
 
+struct mlx5_flow_parse;
+
+static void
+mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
+		      unsigned int size);
+
+static int
+mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
+
+/* Hash RX queue types. */
+enum hash_rxq_type {
+	HASH_RXQ_TCPV4,
+	HASH_RXQ_UDPV4,
+	HASH_RXQ_IPV4,
+	HASH_RXQ_TCPV6,
+	HASH_RXQ_UDPV6,
+	HASH_RXQ_IPV6,
+	HASH_RXQ_ETH,
+};
+
+/* Initialization data for hash RX queue. */
+struct hash_rxq_init {
+	uint64_t hash_fields; /* Fields that participate in the hash. */
+	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
+	unsigned int flow_priority; /* Flow priority to use. */
+	unsigned int ip_version; /* Internet protocol. */
+};
+
+/* Initialization data for hash RX queues. */
+const struct hash_rxq_init hash_rxq_init[] = {
+	[HASH_RXQ_TCPV4] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
+				IBV_RX_HASH_DST_IPV4 |
+				IBV_RX_HASH_SRC_PORT_TCP |
+				IBV_RX_HASH_DST_PORT_TCP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
+		.flow_priority = 0,
+		.ip_version = MLX5_IPV4,
+	},
+	[HASH_RXQ_UDPV4] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
+				IBV_RX_HASH_DST_IPV4 |
+				IBV_RX_HASH_SRC_PORT_UDP |
+				IBV_RX_HASH_DST_PORT_UDP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
+		.flow_priority = 0,
+		.ip_version = MLX5_IPV4,
+	},
+	[HASH_RXQ_IPV4] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
+				IBV_RX_HASH_DST_IPV4),
+		.dpdk_rss_hf = (ETH_RSS_IPV4 |
+				ETH_RSS_FRAG_IPV4),
+		.flow_priority = 1,
+		.ip_version = MLX5_IPV4,
+	},
+	[HASH_RXQ_TCPV6] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
+				IBV_RX_HASH_DST_IPV6 |
+				IBV_RX_HASH_SRC_PORT_TCP |
+				IBV_RX_HASH_DST_PORT_TCP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
+		.flow_priority = 0,
+		.ip_version = MLX5_IPV6,
+	},
+	[HASH_RXQ_UDPV6] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
+				IBV_RX_HASH_DST_IPV6 |
+				IBV_RX_HASH_SRC_PORT_UDP |
+				IBV_RX_HASH_DST_PORT_UDP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
+		.flow_priority = 0,
+		.ip_version = MLX5_IPV6,
+	},
+	[HASH_RXQ_IPV6] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
+				IBV_RX_HASH_DST_IPV6),
+		.dpdk_rss_hf = (ETH_RSS_IPV6 |
+				ETH_RSS_FRAG_IPV6),
+		.flow_priority = 1,
+		.ip_version = MLX5_IPV6,
+	},
+	[HASH_RXQ_ETH] = {
+		.hash_fields = 0,
+		.dpdk_rss_hf = 0,
+		.flow_priority = 2,
+	},
+};
+
+/* Number of entries in hash_rxq_init[]. */
+const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
+
 /** Structure for Drop queue. */
 struct mlx5_hrxq_drop {
 	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
@@ -110,7 +206,6 @@ struct mlx5_flow {
 struct mlx5_flow_drop {
 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
-	struct mlx5_hrxq_drop hrxq; /**< Drop hash Rx queue. */
 };
 
 struct rte_flow {
@@ -119,8 +214,11 @@ struct rte_flow {
 	uint32_t drop:1; /**< Drop queue. */
 	uint16_t queues_n; /**< Number of entries in queue[]. */
 	uint16_t (*queues)[]; /**< Queues indexes to use. */
+	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
+	uint8_t rss_key[40]; /**< copy of the RSS key. */
 	union {
-		struct mlx5_flow frxq; /**< Flow with Rx queue. */
+		struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
+		/**< Flow with Rx queue. */
 		struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
 	};
 };
@@ -224,7 +322,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.default_mask = &rte_flow_item_ipv4_mask,
 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
 		.convert = mlx5_flow_create_ipv4,
-		.dst_sz = sizeof(struct ibv_flow_spec_ipv4),
+		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
 	},
 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
@@ -296,17 +394,31 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 
 /** Structure to pass to the conversion function. */
 struct mlx5_flow_parse {
-	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
-	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 	uint32_t inner; /**< Set once VXLAN is encountered. */
-	uint32_t create:1; /**< Leave allocated resources on exit. */
-	uint32_t queue:1; /**< Target is a receive queue. */
+	uint32_t create:1;
+	/**< Whether resources should remain after a validate. */
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t mark:1; /**< Mark is present in the flow. */
 	uint32_t mark_id; /**< Mark identifier. */
-	uint64_t hash_fields; /**< Fields that participate in the hash. */
 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
 	uint16_t queues_n; /**< Number of entries in queue[]. */
+	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
+	uint8_t rss_key[40]; /**< copy of the RSS key. */
+	enum hash_rxq_type layer; /**< Last pattern layer detected. */
+	union {
+		struct {
+			struct ibv_flow_attr *ibv_attr;
+			/**< Pointer to Verbs attributes. */
+			unsigned int offset;
+			/**< Current position or total size of the attribute. */
+		} queue[RTE_DIM(hash_rxq_init)];
+		struct {
+			struct ibv_flow_attr *ibv_attr;
+			/**< Pointer to Verbs attributes. */
+			unsigned int offset;
+			/**< Current position or total size of the attribute. */
+		} drop_q;
+	};
 };
 
 static const struct rte_flow_ops mlx5_flow_ops = {
@@ -416,16 +528,42 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
 }
 
 /**
- * Validate and convert a flow supported by the NIC.
+ * Copy the RSS configuration from the user ones.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param parser
+ *   Internal parser structure.
+ * @param rss_conf
+ *   User RSS configuration to save.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_flow_convert_rss_conf(struct priv *priv,
+			   struct mlx5_flow_parse *parser,
+			   const struct rte_eth_rss_conf *rss_conf)
+{
+	const struct rte_eth_rss_conf *rss =
+		rss_conf ? rss_conf : &priv->rss_conf;
+
+	if (rss->rss_key_len > 40)
+		return EINVAL;
+	parser->rss_conf.rss_key_len = rss->rss_key_len;
+	parser->rss_conf.rss_hf = rss->rss_hf;
+	memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
+	parser->rss_conf.rss_key = parser->rss_key;
+	return 0;
+}
+
+/**
+ * Extract attribute to the parser.
  *
  * @param priv
  *   Pointer to private structure.
  * @param[in] attr
  *   Flow rule attributes.
- * @param[in] pattern
- *   Pattern specification (list terminated by the END pattern item).
- * @param[in] actions
- *   Associated actions (list terminated by the END action).
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  * @param[in, out] parser
@@ -435,22 +573,13 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_convert(struct priv *priv,
-		  const struct rte_flow_attr *attr,
-		  const struct rte_flow_item items[],
-		  const struct rte_flow_action actions[],
-		  struct rte_flow_error *error,
-		  struct mlx5_flow_parse *parser)
+priv_flow_convert_attributes(struct priv *priv,
+			     const struct rte_flow_attr *attr,
+			     struct rte_flow_error *error,
+			     struct mlx5_flow_parse *parser)
 {
-	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
-
 	(void)priv;
-	*parser = (struct mlx5_flow_parse){
-		.ibv_attr = parser->ibv_attr,
-		.create = parser->create,
-		.offset = sizeof(struct ibv_flow_attr),
-		.mark_id = MLX5_FLOW_MARK_DEFAULT,
-	};
+	(void)parser;
 	if (attr->group) {
 		rte_flow_error_set(error, ENOTSUP,
 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
@@ -479,6 +608,37 @@ priv_flow_convert(struct priv *priv,
 				   "only ingress is supported");
 		return -rte_errno;
 	}
+	return 0;
+}
+
+/**
+ * Extract actions request to the parser.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] parser
+ *   Internal parser structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_convert_actions(struct priv *priv,
+			  const struct rte_flow_action actions[],
+			  struct rte_flow_error *error,
+			  struct mlx5_flow_parse *parser)
+{
+	/*
+	 * Add default RSS configuration necessary for Verbs to create QP even
+	 * if no RSS is necessary.
+	 */
+	priv_flow_convert_rss_conf(priv, parser,
+				   (const struct rte_eth_rss_conf *)
+				   &priv->rss_conf);
 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
 			continue;
@@ -507,7 +667,6 @@ priv_flow_convert(struct priv *priv,
 				return -rte_errno;
 			}
 			if (!found) {
-				parser->queue = 1;
 				parser->queues_n = 1;
 				parser->queues[0] = queue->index;
 			}
@@ -554,10 +713,17 @@ priv_flow_convert(struct priv *priv,
 					return -rte_errno;
 				}
 			}
-			parser->queue = 1;
 			for (n = 0; n < rss->num; ++n)
 				parser->queues[n] = rss->queue[n];
 			parser->queues_n = rss->num;
+			if (priv_flow_convert_rss_conf(priv, parser,
+						       rss->rss_conf)) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ACTION,
+						   actions,
+						   "wrong RSS configuration");
+				return -rte_errno;
+			}
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
 			const struct rte_flow_action_mark *mark =
 				(const struct rte_flow_action_mark *)
@@ -585,18 +751,53 @@ priv_flow_convert(struct priv *priv,
 			goto exit_action_not_supported;
 		}
 	}
-	if (parser->mark && !parser->ibv_attr && !parser->drop)
-		parser->offset += sizeof(struct ibv_flow_spec_action_tag);
-	if (!parser->ibv_attr && parser->drop)
-		parser->offset += sizeof(struct ibv_flow_spec_action_drop);
-	if (!parser->queue && !parser->drop) {
+	if (!parser->queues_n && !parser->drop) {
 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "no valid action");
 		return -rte_errno;
 	}
+	return 0;
+exit_action_not_supported:
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+			   actions, "action not supported");
+	return -rte_errno;
+}
+
+/**
+ * Validate items.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] items
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] parser
+ *   Internal parser structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_convert_items_validate(struct priv *priv,
+				 const struct rte_flow_item items[],
+				 struct rte_flow_error *error,
+				 struct mlx5_flow_parse *parser)
+{
+	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
+	unsigned int i;
+
+	(void)priv;
+	/* Initialise the offsets to start after verbs attribute. */
+	if (parser->drop) {
+		parser->drop_q.offset = sizeof(struct ibv_flow_attr);
+	} else {
+		for (i = 0; i != hash_rxq_init_n; ++i)
+			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
+	}
 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
 		const struct mlx5_flow_items *token = NULL;
-		unsigned int i;
+		unsigned int n;
 		int err;
 
 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
@@ -618,15 +819,7 @@ priv_flow_convert(struct priv *priv,
 					      cur_item->mask_sz);
 		if (err)
 			goto exit_item_not_supported;
-		if (parser->ibv_attr && cur_item->convert) {
-			err = cur_item->convert(items,
-						(cur_item->default_mask ?
-						 cur_item->default_mask :
-						 cur_item->mask),
-						parser);
-			if (err)
-				goto exit_item_not_supported;
-		} else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
+		if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
 			if (parser->inner) {
 				rte_flow_error_set(error, ENOTSUP,
 						   RTE_FLOW_ERROR_TYPE_ITEM,
@@ -637,17 +830,367 @@ priv_flow_convert(struct priv *priv,
 			}
 			parser->inner = 1;
 		}
-		parser->offset += cur_item->dst_sz;
+		if (parser->drop) {
+			parser->drop_q.offset += cur_item->dst_sz;
+		} else if (parser->queues_n == 1) {
+			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
+		} else {
+			for (n = 0; n != hash_rxq_init_n; ++n)
+				parser->queue[n].offset += cur_item->dst_sz;
+		}
+	}
+	if (parser->mark) {
+		for (i = 0; i != hash_rxq_init_n; ++i)
+			parser->queue[i].offset +=
+				sizeof(struct ibv_flow_spec_action_tag);
 	}
 	return 0;
 exit_item_not_supported:
 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
 			   items, "item not supported");
 	return -rte_errno;
-exit_action_not_supported:
-	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
-			   actions, "action not supported");
-	return -rte_errno;
+}
+
+/**
+ * Allocate memory space to store verbs flow attributes.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] priority
+ *   Flow priority.
+ * @param[in] size
+ *   Amount of byte to allocate.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A verbs flow attribute on success, NULL otherwise.
+ */
+static struct ibv_flow_attr*
+priv_flow_convert_allocate(struct priv *priv,
+			   unsigned int priority,
+			   unsigned int size,
+			   struct rte_flow_error *error)
+{
+	struct ibv_flow_attr *ibv_attr;
+
+	(void)priv;
+	ibv_attr = rte_calloc(__func__, 1, size, 0);
+	if (!ibv_attr) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL,
+				   "cannot allocate verbs spec attributes.");
+		return NULL;
+	}
+	ibv_attr->priority = priority;
+	return ibv_attr;
+}
+
+/**
+ * Finalise verbs flow attributes.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in, out] parser
+ *   Internal parser structure.
+ */
+static void
+priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
+{
+	const unsigned int ipv4 =
+		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
+	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
+	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
+	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
+	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
+	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
+	unsigned int i;
+
+	(void)priv;
+	if (parser->layer == HASH_RXQ_ETH) {
+		goto fill;
+	} else {
+		/*
+		 * This layer becomes useless as the pattern define under
+		 * layers.
+		 */
+		rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
+		parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
+	}
+	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
+	for (i = ohmin; i != (ohmax + 1); ++i) {
+		if (!parser->queue[i].ibv_attr)
+			continue;
+		rte_free(parser->queue[i].ibv_attr);
+		parser->queue[i].ibv_attr = NULL;
+	}
+	/* Remove impossible flow according to the RSS configuration. */
+	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
+	    parser->rss_conf.rss_hf) {
+		/* Remove any other flow. */
+		for (i = hmin; i != (hmax + 1); ++i) {
+			if ((i == parser->layer) ||
+			     (!parser->queue[i].ibv_attr))
+				continue;
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
+	} else  if (!parser->queue[ip].ibv_attr) {
+		/* no RSS possible with the current configuration. */
+		parser->queues_n = 1;
+		return;
+	}
+fill:
+	/*
+	 * Fill missing layers in verbs specifications, or compute the correct
+	 * offset to allocate the memory space for the attributes and
+	 * specifications.
+	 */
+	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
+		union {
+			struct ibv_flow_spec_ipv4_ext ipv4;
+			struct ibv_flow_spec_ipv6 ipv6;
+			struct ibv_flow_spec_tcp_udp udp_tcp;
+		} specs;
+		void *dst;
+		uint16_t size;
+
+		if (i == parser->layer)
+			continue;
+		if (parser->layer == HASH_RXQ_ETH) {
+			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
+				size = sizeof(struct ibv_flow_spec_ipv4_ext);
+				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
+					.type = IBV_FLOW_SPEC_IPV4_EXT |
+						parser->inner,
+					.size = size,
+				};
+			} else {
+				size = sizeof(struct ibv_flow_spec_ipv6);
+				specs.ipv6 = (struct ibv_flow_spec_ipv6){
+					.type = IBV_FLOW_SPEC_IPV6 |
+						parser->inner,
+					.size = size,
+				};
+			}
+			if (parser->queue[i].ibv_attr) {
+				dst = (void *)((uintptr_t)
+					       parser->queue[i].ibv_attr +
+					       parser->queue[i].offset);
+				memcpy(dst, &specs, size);
+				++parser->queue[i].ibv_attr->num_of_specs;
+			}
+			parser->queue[i].offset += size;
+		}
+		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
+		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
+			size = sizeof(struct ibv_flow_spec_tcp_udp);
+			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
+				.type = ((i == HASH_RXQ_UDPV4 ||
+					  i == HASH_RXQ_UDPV6) ?
+					 IBV_FLOW_SPEC_UDP :
+					 IBV_FLOW_SPEC_TCP) |
+					parser->inner,
+				.size = size,
+			};
+			if (parser->queue[i].ibv_attr) {
+				dst = (void *)((uintptr_t)
+					       parser->queue[i].ibv_attr +
+					       parser->queue[i].offset);
+				memcpy(dst, &specs, size);
+				++parser->queue[i].ibv_attr->num_of_specs;
+			}
+			parser->queue[i].offset += size;
+		}
+	}
+}
+
+/**
+ * Validate and convert a flow supported by the NIC.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] parser
+ *   Internal parser structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_convert(struct priv *priv,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item items[],
+		  const struct rte_flow_action actions[],
+		  struct rte_flow_error *error,
+		  struct mlx5_flow_parse *parser)
+{
+	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
+	unsigned int i;
+	int ret;
+
+	/* First step. Validate the attributes, items and actions. */
+	*parser = (struct mlx5_flow_parse){
+		.create = parser->create,
+		.layer = HASH_RXQ_ETH,
+		.mark_id = MLX5_FLOW_MARK_DEFAULT,
+	};
+	ret = priv_flow_convert_attributes(priv, attr, error, parser);
+	if (ret)
+		return ret;
+	ret = priv_flow_convert_actions(priv, actions, error, parser);
+	if (ret)
+		return ret;
+	ret = priv_flow_convert_items_validate(priv, items, error, parser);
+	if (ret)
+		return ret;
+	priv_flow_convert_finalise(priv, parser);
+	/*
+	 * Second step.
+	 * Allocate the memory space to store verbs specifications.
+	 */
+	if (parser->drop) {
+		parser->drop_q.ibv_attr =
+			priv_flow_convert_allocate(priv, attr->priority,
+						   parser->drop_q.offset,
+						   error);
+		if (!parser->drop_q.ibv_attr)
+			return ENOMEM;
+		parser->drop_q.offset = sizeof(struct ibv_flow_attr);
+	} else if (parser->queues_n == 1) {
+		unsigned int priority =
+			attr->priority +
+			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
+		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
+
+		parser->queue[HASH_RXQ_ETH].ibv_attr =
+			priv_flow_convert_allocate(priv, priority,
+						   offset, error);
+		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
+			return ENOMEM;
+		parser->queue[HASH_RXQ_ETH].offset =
+			sizeof(struct ibv_flow_attr);
+	} else {
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			unsigned int priority =
+				attr->priority +
+				hash_rxq_init[i].flow_priority;
+			unsigned int offset;
+
+			if (!(parser->rss_conf.rss_hf &
+			      hash_rxq_init[i].dpdk_rss_hf) &&
+			    (i != HASH_RXQ_ETH))
+				continue;
+			offset = parser->queue[i].offset;
+			parser->queue[i].ibv_attr =
+				priv_flow_convert_allocate(priv, priority,
+							   offset, error);
+			if (!parser->queue[i].ibv_attr)
+				goto exit_enomem;
+			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
+		}
+	}
+	/* Third step. Conversion parse, fill the specifications. */
+	parser->inner = 0;
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+			continue;
+		cur_item = &mlx5_flow_items[items->type];
+		ret = cur_item->convert(items,
+					(cur_item->default_mask ?
+					 cur_item->default_mask :
+					 cur_item->mask),
+					parser);
+		if (ret) {
+			rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   items, "item not supported");
+			goto exit_free;
+		}
+	}
+	if (parser->mark)
+		mlx5_flow_create_flag_mark(parser, parser->mark_id);
+	/*
+	 * Last step. Complete missing specification to reach the RSS
+	 * configuration.
+	 */
+	if (parser->queues_n > 1)
+		priv_flow_convert_finalise(priv, parser);
+exit_free:
+	/* Only verification is expected, all resources should be released. */
+	if (!parser->create) {
+		if (parser->drop) {
+			rte_free(parser->drop_q.ibv_attr);
+			parser->drop_q.ibv_attr = NULL;
+		}
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (parser->queue[i].ibv_attr) {
+				rte_free(parser->queue[i].ibv_attr);
+				parser->queue[i].ibv_attr = NULL;
+			}
+		}
+	}
+	return ret;
+exit_enomem:
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (parser->queue[i].ibv_attr) {
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
+	}
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, "cannot allocate verbs spec attributes.");
+	return ret;
+}
+
+/**
+ * Copy the specification created into the flow.
+ *
+ * @param parser
+ *   Internal parser structure.
+ * @param src
+ *   Create specification.
+ * @param size
+ *   Size in bytes of the specification to copy.
+ */
+static void
+mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
+		      unsigned int size)
+{
+	unsigned int i;
+	void *dst;
+
+	if (parser->drop) {
+		dst = (void *)((uintptr_t)parser->drop_q.ibv_attr +
+				parser->drop_q.offset);
+		memcpy(dst, src, size);
+		++parser->drop_q.ibv_attr->num_of_specs;
+		parser->drop_q.offset += size;
+		return;
+	}
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (!parser->queue[i].ibv_attr)
+			continue;
+		/* Specification must be the same l3 type or none. */
+		if (parser->layer == HASH_RXQ_ETH ||
+		    (hash_rxq_init[parser->layer].ip_version ==
+		     hash_rxq_init[i].ip_version) ||
+		    (hash_rxq_init[i].ip_version == 0)) {
+			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
+					parser->queue[i].offset);
+			memcpy(dst, src, size);
+			++parser->queue[i].ibv_attr->num_of_specs;
+			parser->queue[i].offset += size;
+		}
+	}
 }
 
 /**
@@ -668,33 +1211,32 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
 	const struct rte_flow_item_eth *spec = item->spec;
 	const struct rte_flow_item_eth *mask = item->mask;
 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
-	struct ibv_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
-	unsigned int i;
-
-	++parser->ibv_attr->num_of_specs;
-	parser->hash_fields = 0;
-	eth = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*eth = (struct ibv_flow_spec_eth) {
+	struct ibv_flow_spec_eth eth = {
 		.type = parser->inner | IBV_FLOW_SPEC_ETH,
 		.size = eth_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
-	memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
-	eth->val.ether_type = spec->type;
-	memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
-	memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
-	eth->mask.ether_type = mask->type;
-	/* Remove unwanted bits from values. */
-	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
-		eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
-		eth->val.src_mac[i] &= eth->mask.src_mac[i];
+
+	parser->layer = HASH_RXQ_ETH;
+	if (spec) {
+		unsigned int i;
+
+		if (!mask)
+			mask = default_mask;
+		memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+		memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+		eth.val.ether_type = spec->type;
+		memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+		memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+		eth.mask.ether_type = mask->type;
+		/* Remove unwanted bits from values. */
+		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
+			eth.val.src_mac[i] &= eth.mask.src_mac[i];
+		}
+		eth.val.ether_type &= eth.mask.ether_type;
 	}
-	eth->val.ether_type &= eth->mask.ether_type;
+	mlx5_flow_create_copy(parser, ð, eth_size);
 	return 0;
 }
 
@@ -719,14 +1261,30 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item,
 	struct ibv_flow_spec_eth *eth;
 	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
 
-	eth = (void *)((uintptr_t)parser->ibv_attr + parser->offset - eth_size);
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	eth->val.vlan_tag = spec->tci;
-	eth->mask.vlan_tag = mask->tci;
-	eth->val.vlan_tag &= eth->mask.vlan_tag;
+	if (spec) {
+		unsigned int i;
+		if (!mask)
+			mask = default_mask;
+
+		if (parser->drop) {
+			eth = (void *)((uintptr_t)parser->drop_q.ibv_attr +
+				       parser->drop_q.offset - eth_size);
+			eth->val.vlan_tag = spec->tci;
+			eth->mask.vlan_tag = mask->tci;
+			eth->val.vlan_tag &= eth->mask.vlan_tag;
+			return 0;
+		}
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!parser->queue[i].ibv_attr)
+				continue;
+
+			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
+				       parser->queue[i].offset - eth_size);
+			eth->val.vlan_tag = spec->tci;
+			eth->mask.vlan_tag = mask->tci;
+			eth->val.vlan_tag &= eth->mask.vlan_tag;
+		}
+	}
 	return 0;
 }
 
@@ -748,37 +1306,35 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
 	const struct rte_flow_item_ipv4 *spec = item->spec;
 	const struct rte_flow_item_ipv4 *mask = item->mask;
 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
-	struct ibv_flow_spec_ipv4_ext *ipv4;
 	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
-
-	++parser->ibv_attr->num_of_specs;
-	parser->hash_fields = (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4);
-	ipv4 = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*ipv4 = (struct ibv_flow_spec_ipv4_ext) {
+	struct ibv_flow_spec_ipv4_ext ipv4 = {
 		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
 		.size = ipv4_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	ipv4->val = (struct ibv_flow_ipv4_ext_filter){
-		.src_ip = spec->hdr.src_addr,
-		.dst_ip = spec->hdr.dst_addr,
-		.proto = spec->hdr.next_proto_id,
-		.tos = spec->hdr.type_of_service,
-	};
-	ipv4->mask = (struct ibv_flow_ipv4_ext_filter){
-		.src_ip = mask->hdr.src_addr,
-		.dst_ip = mask->hdr.dst_addr,
-		.proto = mask->hdr.next_proto_id,
-		.tos = mask->hdr.type_of_service,
-	};
-	/* Remove unwanted bits from values. */
-	ipv4->val.src_ip &= ipv4->mask.src_ip;
-	ipv4->val.dst_ip &= ipv4->mask.dst_ip;
-	ipv4->val.proto &= ipv4->mask.proto;
-	ipv4->val.tos &= ipv4->mask.tos;
+
+	parser->layer = HASH_RXQ_IPV4;
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
+			.src_ip = spec->hdr.src_addr,
+			.dst_ip = spec->hdr.dst_addr,
+			.proto = spec->hdr.next_proto_id,
+			.tos = spec->hdr.type_of_service,
+		};
+		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
+			.src_ip = mask->hdr.src_addr,
+			.dst_ip = mask->hdr.dst_addr,
+			.proto = mask->hdr.next_proto_id,
+			.tos = mask->hdr.type_of_service,
+		};
+		/* Remove unwanted bits from values. */
+		ipv4.val.src_ip &= ipv4.mask.src_ip;
+		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
+		ipv4.val.proto &= ipv4.mask.proto;
+		ipv4.val.tos &= ipv4.mask.tos;
+	}
+	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
 	return 0;
 }
 
@@ -800,40 +1356,39 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
 	const struct rte_flow_item_ipv6 *spec = item->spec;
 	const struct rte_flow_item_ipv6 *mask = item->mask;
 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
-	struct ibv_flow_spec_ipv6 *ipv6;
 	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
-	unsigned int i;
-
-	++parser->ibv_attr->num_of_specs;
-	parser->hash_fields = (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6);
-	ipv6 = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*ipv6 = (struct ibv_flow_spec_ipv6) {
+	struct ibv_flow_spec_ipv6 ipv6 = {
 		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
 		.size = ipv6_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
-	       RTE_DIM(ipv6->val.src_ip));
-	memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
-	       RTE_DIM(ipv6->val.dst_ip));
-	memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
-	       RTE_DIM(ipv6->mask.src_ip));
-	memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
-	       RTE_DIM(ipv6->mask.dst_ip));
-	ipv6->mask.flow_label = mask->hdr.vtc_flow;
-	ipv6->mask.next_hdr = mask->hdr.proto;
-	ipv6->mask.hop_limit = mask->hdr.hop_limits;
-	/* Remove unwanted bits from values. */
-	for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
-		ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
-		ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
+
+	parser->layer = HASH_RXQ_IPV6;
+	if (spec) {
+		unsigned int i;
+
+		if (!mask)
+			mask = default_mask;
+		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
+		       RTE_DIM(ipv6.val.src_ip));
+		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
+		       RTE_DIM(ipv6.val.dst_ip));
+		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
+		       RTE_DIM(ipv6.mask.src_ip));
+		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
+		       RTE_DIM(ipv6.mask.dst_ip));
+		ipv6.mask.flow_label = mask->hdr.vtc_flow;
+		ipv6.mask.next_hdr = mask->hdr.proto;
+		ipv6.mask.hop_limit = mask->hdr.hop_limits;
+		/* Remove unwanted bits from values. */
+		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
+			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
+			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
+		}
+		ipv6.val.flow_label &= ipv6.mask.flow_label;
+		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
+		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
 	}
-	ipv6->val.flow_label &= ipv6->mask.flow_label;
-	ipv6->val.next_hdr &= ipv6->mask.next_hdr;
-	ipv6->val.hop_limit &= ipv6->mask.hop_limit;
+	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
 	return 0;
 }
 
@@ -855,28 +1410,28 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
 	const struct rte_flow_item_udp *spec = item->spec;
 	const struct rte_flow_item_udp *mask = item->mask;
 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
-	struct ibv_flow_spec_tcp_udp *udp;
 	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
-
-	++parser->ibv_attr->num_of_specs;
-	parser->hash_fields |= (IBV_RX_HASH_SRC_PORT_UDP |
-				IBV_RX_HASH_DST_PORT_UDP);
-	udp = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*udp = (struct ibv_flow_spec_tcp_udp) {
+	struct ibv_flow_spec_tcp_udp udp = {
 		.type = parser->inner | IBV_FLOW_SPEC_UDP,
 		.size = udp_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	udp->val.dst_port = spec->hdr.dst_port;
-	udp->val.src_port = spec->hdr.src_port;
-	udp->mask.dst_port = mask->hdr.dst_port;
-	udp->mask.src_port = mask->hdr.src_port;
-	/* Remove unwanted bits from values. */
-	udp->val.src_port &= udp->mask.src_port;
-	udp->val.dst_port &= udp->mask.dst_port;
+
+	if (parser->layer == HASH_RXQ_IPV4)
+		parser->layer = HASH_RXQ_UDPV4;
+	else
+		parser->layer = HASH_RXQ_UDPV6;
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		udp.val.dst_port = spec->hdr.dst_port;
+		udp.val.src_port = spec->hdr.src_port;
+		udp.mask.dst_port = mask->hdr.dst_port;
+		udp.mask.src_port = mask->hdr.src_port;
+		/* Remove unwanted bits from values. */
+		udp.val.src_port &= udp.mask.src_port;
+		udp.val.dst_port &= udp.mask.dst_port;
+	}
+	mlx5_flow_create_copy(parser, &udp, udp_size);
 	return 0;
 }
 
@@ -898,28 +1453,28 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
 	const struct rte_flow_item_tcp *spec = item->spec;
 	const struct rte_flow_item_tcp *mask = item->mask;
 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
-	struct ibv_flow_spec_tcp_udp *tcp;
 	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
-
-	++parser->ibv_attr->num_of_specs;
-	parser->hash_fields |= (IBV_RX_HASH_SRC_PORT_TCP |
-				IBV_RX_HASH_DST_PORT_TCP);
-	tcp = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*tcp = (struct ibv_flow_spec_tcp_udp) {
+	struct ibv_flow_spec_tcp_udp tcp = {
 		.type = parser->inner | IBV_FLOW_SPEC_TCP,
 		.size = tcp_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	tcp->val.dst_port = spec->hdr.dst_port;
-	tcp->val.src_port = spec->hdr.src_port;
-	tcp->mask.dst_port = mask->hdr.dst_port;
-	tcp->mask.src_port = mask->hdr.src_port;
-	/* Remove unwanted bits from values. */
-	tcp->val.src_port &= tcp->mask.src_port;
-	tcp->val.dst_port &= tcp->mask.dst_port;
+
+	if (parser->layer == HASH_RXQ_IPV4)
+		parser->layer = HASH_RXQ_TCPV4;
+	else
+		parser->layer = HASH_RXQ_TCPV6;
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		tcp.val.dst_port = spec->hdr.dst_port;
+		tcp.val.src_port = spec->hdr.src_port;
+		tcp.mask.dst_port = mask->hdr.dst_port;
+		tcp.mask.src_port = mask->hdr.src_port;
+		/* Remove unwanted bits from values. */
+		tcp.val.src_port &= tcp.mask.src_port;
+		tcp.val.dst_port &= tcp.mask.dst_port;
+	}
+	mlx5_flow_create_copy(parser, &tcp, tcp_size);
 	return 0;
 }
 
@@ -941,31 +1496,29 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 	const struct rte_flow_item_vxlan *spec = item->spec;
 	const struct rte_flow_item_vxlan *mask = item->mask;
 	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
-	struct ibv_flow_spec_tunnel *vxlan;
 	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+	struct ibv_flow_spec_tunnel vxlan = {
+		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
+		.size = size,
+	};
 	union vni {
 		uint32_t vlan_id;
 		uint8_t vni[4];
 	} id;
 
-	++parser->ibv_attr->num_of_specs;
 	id.vni[0] = 0;
-	vxlan = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*vxlan = (struct ibv_flow_spec_tunnel) {
-		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
-		.size = size,
-	};
 	parser->inner = IBV_FLOW_SPEC_INNER;
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	memcpy(&id.vni[1], spec->vni, 3);
-	vxlan->val.tunnel_id = id.vlan_id;
-	memcpy(&id.vni[1], mask->vni, 3);
-	vxlan->mask.tunnel_id = id.vlan_id;
-	/* Remove unwanted bits from values. */
-	vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		memcpy(&id.vni[1], spec->vni, 3);
+		vxlan.val.tunnel_id = id.vlan_id;
+		memcpy(&id.vni[1], mask->vni, 3);
+		vxlan.mask.tunnel_id = id.vlan_id;
+		/* Remove unwanted bits from values. */
+		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
+	}
+	mlx5_flow_create_copy(parser, &vxlan, size);
 	return 0;
 }
 
@@ -980,18 +1533,15 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 static int
 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
 {
-	struct ibv_flow_spec_action_tag *tag;
 	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
-
-	assert(parser->mark);
-	tag = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
-	*tag = (struct ibv_flow_spec_action_tag){
+	struct ibv_flow_spec_action_tag tag = {
 		.type = IBV_FLOW_SPEC_ACTION_TAG,
 		.size = size,
 		.tag_id = mlx5_flow_mark_set(mark_id),
 	};
-	++parser->ibv_attr->num_of_specs;
-	parser->offset += size;
+
+	assert(parser->mark);
+	mlx5_flow_create_copy(parser, &tag, size);
 	return 0;
 }
 
@@ -1002,196 +1552,188 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
  *   Pointer to private structure.
  * @param parser
  *   Internal parser structure.
+ * @param flow
+ *   Pointer to the rte_flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
  * @return
- *   A flow if the rule could be created.
+ *   0 on success, errno value on failure.
  */
-static struct rte_flow *
+static int
 priv_flow_create_action_queue_drop(struct priv *priv,
 				   struct mlx5_flow_parse *parser,
+				   struct rte_flow *flow,
 				   struct rte_flow_error *error)
 {
-	struct rte_flow *rte_flow;
 	struct ibv_flow_spec_action_drop *drop;
 	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
+	int err = 0;
 
 	assert(priv->pd);
 	assert(priv->ctx);
-	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
-	if (!rte_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate flow memory");
-		return NULL;
-	}
-	rte_flow->drop = 1;
-	drop = (void *)((uintptr_t)parser->ibv_attr + parser->offset);
+	flow->drop = 1;
+	drop = (void *)((uintptr_t)parser->drop_q.ibv_attr +
+			parser->drop_q.offset);
 	*drop = (struct ibv_flow_spec_action_drop){
 			.type = IBV_FLOW_SPEC_ACTION_DROP,
 			.size = size,
 	};
-	++parser->ibv_attr->num_of_specs;
-	parser->offset += sizeof(struct ibv_flow_spec_action_drop);
-	rte_flow->drxq.ibv_attr = parser->ibv_attr;
+	++parser->drop_q.ibv_attr->num_of_specs;
+	parser->drop_q.offset += size;
 	if (!priv->dev->data->dev_started)
-		return rte_flow;
-	rte_flow->drxq.hrxq.qp = priv->flow_drop_queue->qp;
-	rte_flow->drxq.ibv_flow = ibv_create_flow(rte_flow->drxq.hrxq.qp,
-						  rte_flow->drxq.ibv_attr);
-	if (!rte_flow->drxq.ibv_flow) {
+		return 0;
+	flow->drxq.ibv_attr = parser->drop_q.ibv_attr;
+	parser->drop_q.ibv_attr = NULL;
+	flow->drxq.ibv_flow = ibv_create_flow(priv->flow_drop_queue->qp,
+					      flow->drxq.ibv_attr);
+	if (!flow->drxq.ibv_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "flow rule creation failure");
+		err = ENOMEM;
 		goto error;
 	}
-	return rte_flow;
+	return 0;
 error:
-	assert(rte_flow);
-	rte_free(rte_flow);
-	return NULL;
+	assert(flow);
+	if (flow->drxq.ibv_flow) {
+		claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
+		flow->drxq.ibv_flow = NULL;
+	}
+	if (flow->drxq.ibv_attr) {
+		rte_free(flow->drxq.ibv_attr);
+		flow->drxq.ibv_attr = NULL;
+	}
+	return err;
 }
 
 /**
- * Complete flow rule creation.
+ * Create hash Rx queues when RSS is enabled.
  *
  * @param priv
  *   Pointer to private structure.
  * @param parser
- *   MLX5 flow parser attributes (filled by mlx5_flow_validate()).
+ *   Internal parser structure.
+ * @param flow
+ *   Pointer to the rte_flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
  * @return
- *   A flow if the rule could be created.
+ *   0 on success, a errno value otherwise and rte_errno is set.
  */
-static struct rte_flow *
-priv_flow_create_action_queue(struct priv *priv,
-			      struct mlx5_flow_parse *parser,
-			      struct rte_flow_error *error)
+static int
+priv_flow_create_action_queue_rss(struct priv *priv,
+				  struct mlx5_flow_parse *parser,
+				  struct rte_flow *flow,
+				  struct rte_flow_error *error)
 {
-	struct rte_flow *rte_flow;
 	unsigned int i;
 
-	assert(priv->pd);
-	assert(priv->ctx);
-	assert(!parser->drop);
-	rte_flow = rte_calloc(__func__, 1,
-			      sizeof(*rte_flow) +
-			      parser->queues_n * sizeof(uint16_t),
-			      0);
-	if (!rte_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate flow memory");
-		return NULL;
-	}
-	rte_flow->mark = parser->mark;
-	rte_flow->frxq.ibv_attr = parser->ibv_attr;
-	rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
-	memcpy(rte_flow->queues, parser->queues,
-	       parser->queues_n * sizeof(uint16_t));
-	rte_flow->queues_n = parser->queues_n;
-	rte_flow->frxq.hash_fields = parser->hash_fields;
-	rte_flow->frxq.hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
-						 rss_hash_default_key_len,
-						 parser->hash_fields,
-						 (*rte_flow->queues),
-						 rte_flow->queues_n);
-	if (!rte_flow->frxq.hrxq) {
-		rte_flow->frxq.hrxq =
-			mlx5_priv_hrxq_new(priv, rss_hash_default_key,
-					   rss_hash_default_key_len,
-					   parser->hash_fields,
-					   (*rte_flow->queues),
-					   rte_flow->queues_n);
-		if (!rte_flow->frxq.hrxq) {
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		uint64_t hash_fields;
+
+		if (!parser->queue[i].ibv_attr)
+			continue;
+		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
+		parser->queue[i].ibv_attr = NULL;
+		hash_fields = hash_rxq_init[i].hash_fields;
+		flow->frxq[i].hrxq =
+			mlx5_priv_hrxq_get(priv,
+					   parser->rss_conf.rss_key,
+					   parser->rss_conf.rss_key_len,
+					   hash_fields,
+					   parser->queues,
+					   hash_fields ? parser->queues_n : 1);
+		if (flow->frxq[i].hrxq)
+			continue;
+		flow->frxq[i].hrxq =
+			mlx5_priv_hrxq_new(priv,
+					   parser->rss_conf.rss_key,
+					   parser->rss_conf.rss_key_len,
+					   hash_fields,
+					   parser->queues,
+					   hash_fields ? parser->queues_n : 1);
+		if (!flow->frxq[i].hrxq) {
 			rte_flow_error_set(error, ENOMEM,
 					   RTE_FLOW_ERROR_TYPE_HANDLE,
 					   NULL, "cannot create hash rxq");
-			goto error;
+			return ENOMEM;
 		}
 	}
-	for (i = 0; i != parser->queues_n; ++i) {
-		struct mlx5_rxq_data *q =
-			(*priv->rxqs)[parser->queues[i]];
-
-		q->mark |= parser->mark;
-	}
-	if (!priv->dev->data->dev_started)
-		return rte_flow;
-	rte_flow->frxq.ibv_flow = ibv_create_flow(rte_flow->frxq.hrxq->qp,
-						  rte_flow->frxq.ibv_attr);
-	if (!rte_flow->frxq.ibv_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "flow rule creation failure");
-		goto error;
-	}
-	return rte_flow;
-error:
-	assert(rte_flow);
-	if (rte_flow->frxq.hrxq)
-		mlx5_priv_hrxq_release(priv, rte_flow->frxq.hrxq);
-	rte_free(rte_flow);
-	return NULL;
+	return 0;
 }
 
 /**
- * Validate a flow.
+ * Complete flow rule creation.
  *
  * @param priv
  *   Pointer to private structure.
- * @param[in] attr
- *   Flow rule attributes.
- * @param[in] pattern
- *   Pattern specification (list terminated by the END pattern item).
- * @param[in] actions
- *   Associated actions (list terminated by the END action).
+ * @param parser
+ *   Internal parser structure.
+ * @param flow
+ *   Pointer to the rte_flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
- * @param[in,out] parser
- *   MLX5 parser structure.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_validate(struct priv *priv,
-		   const struct rte_flow_attr *attr,
-		   const struct rte_flow_item items[],
-		   const struct rte_flow_action actions[],
-		   struct rte_flow_error *error,
-		   struct mlx5_flow_parse *parser)
+priv_flow_create_action_queue(struct priv *priv,
+			      struct mlx5_flow_parse *parser,
+			      struct rte_flow *flow,
+			      struct rte_flow_error *error)
 {
-	int err;
+	int err = 0;
+	unsigned int i;
 
-	err = priv_flow_convert(priv, attr, items, actions, error, parser);
+	assert(priv->pd);
+	assert(priv->ctx);
+	assert(!parser->drop);
+	err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
 	if (err)
-		goto exit;
-	if (parser->mark)
-		parser->offset += sizeof(struct ibv_flow_spec_action_tag);
-	parser->ibv_attr = rte_malloc(__func__, parser->offset, 0);
-	if (!parser->ibv_attr) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate ibv_attr memory");
-		err = rte_errno;
-		goto exit;
+		goto error;
+	if (!priv->dev->data->dev_started)
+		return 0;
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (!flow->frxq[i].hrxq)
+			continue;
+		flow->frxq[i].ibv_flow =
+			ibv_create_flow(flow->frxq[i].hrxq->qp,
+					flow->frxq[i].ibv_attr);
+		if (!flow->frxq[i].ibv_flow) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "flow rule creation failure");
+			err = ENOMEM;
+			goto error;
+		}
+		DEBUG("%p type %d QP %p ibv_flow %p",
+		      (void *)flow, i,
+		      (void *)flow->frxq[i].hrxq,
+		      (void *)flow->frxq[i].ibv_flow);
+	}
+	for (i = 0; i != parser->queues_n; ++i) {
+		struct mlx5_rxq_data *q =
+			(*priv->rxqs)[parser->queues[i]];
+
+		q->mark |= parser->mark;
 	}
-	*parser->ibv_attr = (struct ibv_flow_attr){
-		.type = IBV_FLOW_ATTR_NORMAL,
-		.size = sizeof(struct ibv_flow_attr),
-		.priority = attr->priority,
-		.num_of_specs = 0,
-		.port = 0,
-		.flags = 0,
-	};
-	err = priv_flow_convert(priv, attr, items, actions, error, parser);
-	if (err || parser->create)
-		goto exit;
-	if (parser->mark)
-		mlx5_flow_create_flag_mark(parser, parser->mark_id);
 	return 0;
-exit:
-	if (parser->ibv_attr)
-		rte_free(parser->ibv_attr);
+error:
+	assert(flow);
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (flow->frxq[i].ibv_flow) {
+			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
+
+			claim_zero(ibv_destroy_flow(ibv_flow));
+		}
+		if (flow->frxq[i].hrxq)
+			mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
+		if (flow->frxq[i].ibv_attr)
+			rte_free(flow->frxq[i].ibv_attr);
+	}
 	return err;
 }
 
@@ -1223,24 +1765,52 @@ priv_flow_create(struct priv *priv,
 		 struct rte_flow_error *error)
 {
 	struct mlx5_flow_parse parser = { .create = 1, };
-	struct rte_flow *flow;
+	struct rte_flow *flow = NULL;
+	unsigned int i;
 	int err;
 
-	err = priv_flow_validate(priv, attr, items, actions, error, &parser);
+	err = priv_flow_convert(priv, attr, items, actions, error, &parser);
 	if (err)
 		goto exit;
+	flow = rte_calloc(__func__, 1,
+			  sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
+			  0);
+	if (!flow) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL,
+				   "cannot allocate flow memory");
+		return NULL;
+	}
+	/* Copy queues configuration. */
+	flow->queues = (uint16_t (*)[])(flow + 1);
+	memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
+	flow->queues_n = parser.queues_n;
+	/* Copy RSS configuration. */
+	flow->rss_conf = parser.rss_conf;
+	flow->rss_conf.rss_key = flow->rss_key;
+	memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
+	/* finalise the flow. */
 	if (parser.drop)
-		flow = priv_flow_create_action_queue_drop(priv, &parser, error);
+		err = priv_flow_create_action_queue_drop(priv, &parser, flow,
+							 error);
 	else
-		flow = priv_flow_create_action_queue(priv, &parser, error);
-	if (!flow)
+		err = priv_flow_create_action_queue(priv, &parser, flow, error);
+	if (err)
 		goto exit;
 	TAILQ_INSERT_TAIL(list, flow, next);
 	DEBUG("Flow created %p", (void *)flow);
 	return flow;
 exit:
-	if (parser.ibv_attr)
-		rte_free(parser.ibv_attr);
+	if (parser.drop) {
+		rte_free(parser.drop_q.ibv_attr);
+	} else {
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (parser.queue[i].ibv_attr)
+				rte_free(parser.queue[i].ibv_attr);
+		}
+	}
+	rte_free(flow);
 	return NULL;
 }
 
@@ -1262,7 +1832,7 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
 	struct mlx5_flow_parse parser = { .create = 0, };
 
 	priv_lock(priv);
-	ret = priv_flow_validate(priv, attr, items, actions, error, &parser);
+	ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
 	priv_unlock(priv);
 	return ret;
 }
@@ -1306,16 +1876,11 @@ priv_flow_destroy(struct priv *priv,
 		  struct rte_flow *flow)
 {
 	unsigned int i;
-	uint16_t *queues;
-	uint16_t queues_n;
 
 	if (flow->drop || !flow->mark)
 		goto free;
-	queues = flow->frxq.hrxq->ind_table->queues;
-	queues_n = flow->frxq.hrxq->ind_table->queues_n;
-	for (i = 0; i != queues_n; ++i) {
+	for (i = 0; i != flow->queues_n; ++i) {
 		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[queues[i]];
 		int mark = 0;
 
 		/*
@@ -1324,18 +1889,24 @@ priv_flow_destroy(struct priv *priv,
 		 */
 		TAILQ_FOREACH(tmp, list, next) {
 			unsigned int j;
+			uint16_t *tqs = NULL;
+			uint16_t tq_n = 0;
 
 			if (!tmp->mark)
 				continue;
-			for (j = 0;
-			     (j != tmp->frxq.hrxq->ind_table->queues_n) &&
-			     !mark;
-			     j++)
-				if (tmp->frxq.hrxq->ind_table->queues[j] ==
-				    queues[i])
+			for (j = 0; j != hash_rxq_init_n; ++j) {
+				if (!tmp->frxq[j].hrxq)
+					continue;
+				tqs = tmp->frxq[j].hrxq->ind_table->queues;
+				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
+			}
+			if (!tq_n)
+				continue;
+			for (j = 0; (j != tq_n) && !mark; j++)
+				if (tqs[j] == (*flow->queues)[i])
 					mark = 1;
 		}
-		rxq_data->mark = mark;
+		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
 	}
 free:
 	if (flow->drop) {
@@ -1343,10 +1914,16 @@ priv_flow_destroy(struct priv *priv,
 			claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
 		rte_free(flow->drxq.ibv_attr);
 	} else {
-		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
-		if (flow->frxq.ibv_flow)
-			claim_zero(ibv_destroy_flow(flow->frxq.ibv_flow));
-		rte_free(flow->frxq.ibv_attr);
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			struct mlx5_flow *frxq = &flow->frxq[i];
+
+			if (frxq->ibv_flow)
+				claim_zero(ibv_destroy_flow(frxq->ibv_flow));
+			if (frxq->hrxq)
+				mlx5_priv_hrxq_release(priv, frxq->hrxq);
+			if (frxq->ibv_attr)
+				rte_free(frxq->ibv_attr);
+		}
 	}
 	TAILQ_REMOVE(list, flow, next);
 	DEBUG("Flow destroyed %p", (void *)flow);
@@ -1497,18 +2074,35 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 	struct rte_flow *flow;
 
 	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
-		assert(!flow->drop);
-		claim_zero(ibv_destroy_flow(flow->frxq.ibv_flow));
-		flow->frxq.ibv_flow = NULL;
-		mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
-		flow->frxq.hrxq = NULL;
+		unsigned int i;
+
+		if (flow->drop) {
+			if (!flow->drxq.ibv_flow)
+				continue;
+			claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
+			flow->drxq.ibv_flow = NULL;
+			/* Next flow. */
+			continue;
+		}
 		if (flow->mark) {
-			unsigned int n;
-			struct mlx5_ind_table_ibv *ind_tbl =
-				flow->frxq.hrxq->ind_table;
+			struct mlx5_ind_table_ibv *ind_tbl = NULL;
 
-			for (n = 0; n < ind_tbl->queues_n; ++n)
-				(*priv->rxqs)[ind_tbl->queues[n]]->mark = 0;
+			for (i = 0; i != hash_rxq_init_n; ++i) {
+				if (!flow->frxq[i].hrxq)
+					continue;
+				ind_tbl = flow->frxq[i].hrxq->ind_table;
+			}
+			assert(ind_tbl);
+			for (i = 0; i != ind_tbl->queues_n; ++i)
+				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
+		}
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!flow->frxq[i].ibv_flow)
+				continue;
+			claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
+			flow->frxq[i].ibv_flow = NULL;
+			mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
+			flow->frxq[i].hrxq = NULL;
 		}
 		DEBUG("Flow %p removed", (void *)flow);
 	}
@@ -1531,48 +2125,61 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 	struct rte_flow *flow;
 
 	TAILQ_FOREACH(flow, list, next) {
-		if (flow->frxq.hrxq)
-			goto flow_create;
-		flow->frxq.hrxq =
-			mlx5_priv_hrxq_get(priv, rss_hash_default_key,
-					   rss_hash_default_key_len,
-					   flow->frxq.hash_fields,
-					   (*flow->queues),
-					   flow->queues_n);
-		if (flow->frxq.hrxq)
-			goto flow_create;
-		flow->frxq.hrxq =
-			mlx5_priv_hrxq_new(priv, rss_hash_default_key,
-					   rss_hash_default_key_len,
-					   flow->frxq.hash_fields,
-					   (*flow->queues),
-					   flow->queues_n);
-		if (!flow->frxq.hrxq) {
-			DEBUG("Flow %p cannot be applied",
-			      (void *)flow);
-			rte_errno = EINVAL;
-			return rte_errno;
+		unsigned int i;
+
+		if (flow->drop) {
+			flow->drxq.ibv_flow =
+				ibv_create_flow(priv->flow_drop_queue->qp,
+						flow->drxq.ibv_attr);
+			if (!flow->drxq.ibv_flow) {
+				DEBUG("Flow %p cannot be applied",
+				      (void *)flow);
+				rte_errno = EINVAL;
+				return rte_errno;
+			}
+			DEBUG("Flow %p applied", (void *)flow);
+			/* Next flow. */
+			continue;
 		}
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!flow->frxq[i].ibv_attr)
+				continue;
+			flow->frxq[i].hrxq =
+				mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
+						   flow->rss_conf.rss_key_len,
+						   hash_rxq_init[i].hash_fields,
+						   (*flow->queues),
+						   flow->queues_n);
+			if (flow->frxq[i].hrxq)
+				goto flow_create;
+			flow->frxq[i].hrxq =
+				mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
+						   flow->rss_conf.rss_key_len,
+						   hash_rxq_init[i].hash_fields,
+						   (*flow->queues),
+						   flow->queues_n);
+			if (!flow->frxq[i].hrxq) {
+				DEBUG("Flow %p cannot be applied",
+				      (void *)flow);
+				rte_errno = EINVAL;
+				return rte_errno;
+			}
 flow_create:
-		flow->frxq.ibv_flow = ibv_create_flow(flow->frxq.hrxq->qp,
-						      flow->frxq.ibv_attr);
-		if (!flow->frxq.ibv_flow) {
-			DEBUG("Flow %p cannot be applied", (void *)flow);
-			rte_errno = EINVAL;
-			return rte_errno;
-		}
-		DEBUG("Flow %p applied", (void *)flow);
-		if (flow->mark) {
-			unsigned int n;
-
-			for (n = 0;
-			     n < flow->frxq.hrxq->ind_table->queues_n;
-			     ++n) {
-				uint16_t idx =
-					flow->frxq.hrxq->ind_table->queues[n];
-				(*priv->rxqs)[idx]->mark = 1;
+			flow->frxq[i].ibv_flow =
+				ibv_create_flow(flow->frxq[i].hrxq->qp,
+						flow->frxq[i].ibv_attr);
+			if (!flow->frxq[i].ibv_flow) {
+				DEBUG("Flow %p cannot be applied",
+				      (void *)flow);
+				rte_errno = EINVAL;
+				return rte_errno;
 			}
+			DEBUG("Flow %p applied", (void *)flow);
 		}
+		if (!flow->mark)
+			continue;
+		for (i = 0; i != flow->queues_n; ++i)
+			(*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
 	}
 	return 0;
 }
@@ -1648,10 +2255,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
 	};
 	struct rte_flow_action actions[] = {
 		{
-			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
-			.conf = &(struct rte_flow_action_queue){
-				.index = 0,
-			},
+			.type = RTE_FLOW_ACTION_TYPE_RSS,
 		},
 		{
 			.type = RTE_FLOW_ACTION_TYPE_END,
@@ -1659,7 +2263,23 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
 	};
 	struct rte_flow *flow;
 	struct rte_flow_error error;
-
+	unsigned int i;
+	union {
+		struct rte_flow_action_rss rss;
+		struct {
+			const struct rte_eth_rss_conf *rss_conf;
+			uint16_t num;
+			uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
+		} local;
+	} action_rss;
+
+	if (!priv->reta_idx_n)
+		return EINVAL;
+	for (i = 0; i != priv->reta_idx_n; ++i)
+		action_rss.local.queue[i] = (*priv->reta_idx)[i];
+	action_rss.local.rss_conf = &priv->rss_conf;
+	action_rss.local.num = priv->reta_idx_n;
+	actions[0].conf = (const void *)&action_rss.rss;
 	flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
 				&error);
 	if (!flow)
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 29/30] net/mlx5: support flow director
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (53 preceding siblings ...)
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 28/30] net/mlx5: handle RSS hash configuration in RSS flow Nelio Laranjeiro
@ 2017-10-09 14:45 ` Nelio Laranjeiro
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 30/30] net/mlx5: add new operations for isolated mode Nelio Laranjeiro
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:45 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Support same functionalities as in
commit cf521eaa3c76 ("net/mlx5: remove flow director support")
This implementation is done on top of the generic flow API.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 doc/guides/nics/features/mlx5.ini |   1 +
 doc/guides/nics/mlx5.rst          |   2 +
 drivers/net/mlx5/mlx5_flow.c      | 510 +++++++++++++++++++++++++++++++++++---
 3 files changed, 481 insertions(+), 32 deletions(-)
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index 34a796d..c363639 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -23,6 +23,7 @@ RSS key update       = Y
 RSS reta update      = Y
 SR-IOV               = Y
 VLAN filter          = Y
+Flow director        = Y
 Flow API             = Y
 CRC offload          = Y
 VLAN offload         = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 09fb738..d24941a 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -89,6 +89,8 @@ Features
 - Promiscuous mode.
 - Multicast promiscuous mode.
 - Hardware checksum offloads.
+- Flow director (RTE_FDIR_MODE_PERFECT, RTE_FDIR_MODE_PERFECT_MAC_VLAN and
+  RTE_ETH_FDIR_REJECT).
 - Flow API.
 - Multiple process.
 - KVM and VMware ESX SR-IOV modes are supported.
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 23ca2b8..36c060e 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -430,39 +430,28 @@ static const struct rte_flow_ops mlx5_flow_ops = {
 	.isolate = mlx5_flow_isolate,
 };
 
-/**
- * Manage filter operations.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param filter_type
- *   Filter type.
- * @param filter_op
- *   Operation to perform.
- * @param arg
- *   Pointer to operation-specific structure.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
-		     enum rte_filter_type filter_type,
-		     enum rte_filter_op filter_op,
-		     void *arg)
-{
-	int ret = EINVAL;
+/* Convert FDIR request to Generic flow. */
+struct mlx5_fdir {
+	struct rte_flow_attr attr;
+	struct rte_flow_action actions[2];
+	struct rte_flow_item items[4];
+	struct rte_flow_item_eth l2;
+	union {
+		struct rte_flow_item_ipv4 ipv4;
+		struct rte_flow_item_ipv6 ipv6;
+	} l3;
+	union {
+		struct rte_flow_item_udp udp;
+		struct rte_flow_item_tcp tcp;
+	} l4;
+	struct rte_flow_action_queue queue;
+};
 
-	if (filter_type == RTE_ETH_FILTER_GENERIC) {
-		if (filter_op != RTE_ETH_FILTER_GET)
-			return -EINVAL;
-		*(const void **)arg = &mlx5_flow_ops;
-		return 0;
-	}
-	ERROR("%p: filter type (%d) not supported",
-	      (void *)dev, filter_type);
-	return -ret;
-}
+/* Verbs specification header. */
+struct ibv_spec_header {
+	enum ibv_flow_spec_type type;
+	uint16_t size;
+};
 
 /**
  * Check support for a given item.
@@ -2373,3 +2362,460 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 	priv_unlock(priv);
 	return 0;
 }
+
+/**
+ * Convert a flow director filter to a generic flow.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Flow director filter to add.
+ * @param attributes
+ *   Generic flow parameters structure.
+ *
+ * @return
+ *  0 on success, errno value on error.
+ */
+static int
+priv_fdir_filter_convert(struct priv *priv,
+			 const struct rte_eth_fdir_filter *fdir_filter,
+			 struct mlx5_fdir *attributes)
+{
+	const struct rte_eth_fdir_input *input = &fdir_filter->input;
+
+	/* Validate queue number. */
+	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
+		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
+		return EINVAL;
+	}
+	/* Validate the behavior. */
+	if (fdir_filter->action.behavior != RTE_ETH_FDIR_ACCEPT) {
+		ERROR("invalid behavior %d", fdir_filter->action.behavior);
+		return ENOTSUP;
+	}
+	attributes->attr.ingress = 1;
+	attributes->items[0] = (struct rte_flow_item) {
+		.type = RTE_FLOW_ITEM_TYPE_ETH,
+		.spec = &attributes->l2,
+	};
+	attributes->actions[0] = (struct rte_flow_action){
+		.type = RTE_FLOW_ACTION_TYPE_QUEUE,
+		.conf = &attributes->queue,
+	};
+	attributes->queue.index = fdir_filter->action.rx_queue;
+	switch (fdir_filter->input.flow_type) {
+	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
+		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+			.src_addr = input->flow.udp4_flow.ip.src_ip,
+			.dst_addr = input->flow.udp4_flow.ip.dst_ip,
+			.time_to_live = input->flow.udp4_flow.ip.ttl,
+			.type_of_service = input->flow.udp4_flow.ip.tos,
+			.next_proto_id = input->flow.udp4_flow.ip.proto,
+		};
+		attributes->l4.udp.hdr = (struct udp_hdr){
+			.src_port = input->flow.udp4_flow.src_port,
+			.dst_port = input->flow.udp4_flow.dst_port,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV4,
+			.spec = &attributes->l3,
+		};
+		attributes->items[2] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_UDP,
+			.spec = &attributes->l4,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
+		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+			.src_addr = input->flow.tcp4_flow.ip.src_ip,
+			.dst_addr = input->flow.tcp4_flow.ip.dst_ip,
+			.time_to_live = input->flow.tcp4_flow.ip.ttl,
+			.type_of_service = input->flow.tcp4_flow.ip.tos,
+			.next_proto_id = input->flow.tcp4_flow.ip.proto,
+		};
+		attributes->l4.tcp.hdr = (struct tcp_hdr){
+			.src_port = input->flow.tcp4_flow.src_port,
+			.dst_port = input->flow.tcp4_flow.dst_port,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV4,
+			.spec = &attributes->l3,
+		};
+		attributes->items[2] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_TCP,
+			.spec = &attributes->l4,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
+		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+			.src_addr = input->flow.ip4_flow.src_ip,
+			.dst_addr = input->flow.ip4_flow.dst_ip,
+			.time_to_live = input->flow.ip4_flow.ttl,
+			.type_of_service = input->flow.ip4_flow.tos,
+			.next_proto_id = input->flow.ip4_flow.proto,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV4,
+			.spec = &attributes->l3,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
+		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+			.hop_limits = input->flow.udp6_flow.ip.hop_limits,
+			.proto = input->flow.udp6_flow.ip.proto,
+		};
+		memcpy(attributes->l3.ipv6.hdr.src_addr,
+		       input->flow.udp6_flow.ip.src_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		memcpy(attributes->l3.ipv6.hdr.dst_addr,
+		       input->flow.udp6_flow.ip.dst_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		attributes->l4.udp.hdr = (struct udp_hdr){
+			.src_port = input->flow.udp6_flow.src_port,
+			.dst_port = input->flow.udp6_flow.dst_port,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV6,
+			.spec = &attributes->l3,
+		};
+		attributes->items[2] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_UDP,
+			.spec = &attributes->l4,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
+		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+			.hop_limits = input->flow.tcp6_flow.ip.hop_limits,
+			.proto = input->flow.tcp6_flow.ip.proto,
+		};
+		memcpy(attributes->l3.ipv6.hdr.src_addr,
+		       input->flow.tcp6_flow.ip.src_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		memcpy(attributes->l3.ipv6.hdr.dst_addr,
+		       input->flow.tcp6_flow.ip.dst_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		attributes->l4.tcp.hdr = (struct tcp_hdr){
+			.src_port = input->flow.tcp6_flow.src_port,
+			.dst_port = input->flow.tcp6_flow.dst_port,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV6,
+			.spec = &attributes->l3,
+		};
+		attributes->items[2] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_UDP,
+			.spec = &attributes->l4,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
+		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+			.hop_limits = input->flow.ipv6_flow.hop_limits,
+			.proto = input->flow.ipv6_flow.proto,
+		};
+		memcpy(attributes->l3.ipv6.hdr.src_addr,
+		       input->flow.ipv6_flow.src_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		memcpy(attributes->l3.ipv6.hdr.dst_addr,
+		       input->flow.ipv6_flow.dst_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV6,
+			.spec = &attributes->l3,
+		};
+		break;
+	default:
+		ERROR("invalid flow type%d",
+		      fdir_filter->input.flow_type);
+		return ENOTSUP;
+	}
+	return 0;
+}
+
+/**
+ * Add new flow director filter and store it in list.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Flow director filter to add.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_add(struct priv *priv,
+		     const struct rte_eth_fdir_filter *fdir_filter)
+{
+	struct mlx5_fdir attributes = {
+		.attr.group = 0,
+	};
+	struct mlx5_flow_parse parser = {
+		.layer = HASH_RXQ_ETH,
+	};
+	struct rte_flow_error error;
+	struct rte_flow *flow;
+	int ret;
+
+	ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
+	if (ret)
+		return -ret;
+	ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
+				attributes.actions, &error, &parser);
+	if (ret)
+		return -ret;
+	flow = priv_flow_create(priv,
+				&priv->flows,
+				&attributes.attr,
+				attributes.items,
+				attributes.actions,
+				&error);
+	if (flow) {
+		TAILQ_INSERT_TAIL(&priv->flows, flow, next);
+		DEBUG("FDIR created %p", (void *)flow);
+		return 0;
+	}
+	return ENOTSUP;
+}
+
+/**
+ * Delete specific filter.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Filter to be deleted.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_delete(struct priv *priv,
+			const struct rte_eth_fdir_filter *fdir_filter)
+{
+	struct mlx5_fdir attributes;
+	struct mlx5_flow_parse parser = {
+		.create = 1,
+		.layer = HASH_RXQ_ETH,
+	};
+	struct rte_flow_error error;
+	struct rte_flow *flow;
+	unsigned int i;
+	int ret;
+
+	ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
+	if (ret)
+		return -ret;
+	ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
+				attributes.actions, &error, &parser);
+	if (ret)
+		goto exit;
+	TAILQ_FOREACH(flow, &priv->flows, next) {
+		struct ibv_flow_attr *attr;
+		struct ibv_spec_header *attr_h;
+		void *spec;
+		struct ibv_flow_attr *flow_attr;
+		struct ibv_spec_header *flow_h;
+		void *flow_spec;
+		unsigned int specs_n;
+
+		if (parser.drop)
+			attr = parser.drop_q.ibv_attr;
+		else
+			attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
+		if (flow->drop)
+			flow_attr = flow->drxq.ibv_attr;
+		else
+			flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
+		/* Compare first the attributes. */
+		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
+			continue;
+		if (attr->num_of_specs == 0)
+			continue;
+		spec = (void *)((uintptr_t)attr +
+				sizeof(struct ibv_flow_attr));
+		flow_spec = (void *)((uintptr_t)flow_attr +
+				     sizeof(struct ibv_flow_attr));
+		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
+		for (i = 0; i != specs_n; ++i) {
+			attr_h = spec;
+			flow_h = flow_spec;
+			if (memcmp(spec, flow_spec,
+				   RTE_MIN(attr_h->size, flow_h->size)))
+				continue;
+			spec = (void *)((uintptr_t)attr + attr_h->size);
+			flow_spec = (void *)((uintptr_t)flow_attr +
+					     flow_h->size);
+		}
+		/* At this point, the flow match. */
+		break;
+	}
+	if (flow)
+		priv_flow_destroy(priv, &priv->flows, flow);
+exit:
+	if (parser.drop) {
+		rte_free(parser.drop_q.ibv_attr);
+	} else {
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (parser.queue[i].ibv_attr)
+				rte_free(parser.queue[i].ibv_attr);
+		}
+	}
+	return -ret;
+}
+
+/**
+ * Update queue for specific filter.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Filter to be updated.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_update(struct priv *priv,
+			const struct rte_eth_fdir_filter *fdir_filter)
+{
+	int ret;
+
+	ret = priv_fdir_filter_delete(priv, fdir_filter);
+	if (ret)
+		return ret;
+	ret = priv_fdir_filter_add(priv, fdir_filter);
+	return ret;
+}
+
+/**
+ * Flush all filters.
+ *
+ * @param priv
+ *   Private structure.
+ */
+static void
+priv_fdir_filter_flush(struct priv *priv)
+{
+	priv_flow_flush(priv, &priv->flows);
+}
+
+/**
+ * Get flow director information.
+ *
+ * @param priv
+ *   Private structure.
+ * @param[out] fdir_info
+ *   Resulting flow director information.
+ */
+static void
+priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
+{
+	struct rte_eth_fdir_masks *mask =
+		&priv->dev->data->dev_conf.fdir_conf.mask;
+
+	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
+	fdir_info->guarant_spc = 0;
+	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
+	fdir_info->max_flexpayload = 0;
+	fdir_info->flow_types_mask[0] = 0;
+	fdir_info->flex_payload_unit = 0;
+	fdir_info->max_flex_payload_segment_num = 0;
+	fdir_info->flex_payload_limit = 0;
+	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
+}
+
+/**
+ * Deal with flow director operations.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
+{
+	enum rte_fdir_mode fdir_mode =
+		priv->dev->data->dev_conf.fdir_conf.mode;
+	int ret = 0;
+
+	if (filter_op == RTE_ETH_FILTER_NOP)
+		return 0;
+	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
+	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
+		ERROR("%p: flow director mode %d not supported",
+		      (void *)priv, fdir_mode);
+		return EINVAL;
+	}
+	switch (filter_op) {
+	case RTE_ETH_FILTER_ADD:
+		ret = priv_fdir_filter_add(priv, arg);
+		break;
+	case RTE_ETH_FILTER_UPDATE:
+		ret = priv_fdir_filter_update(priv, arg);
+		break;
+	case RTE_ETH_FILTER_DELETE:
+		ret = priv_fdir_filter_delete(priv, arg);
+		break;
+	case RTE_ETH_FILTER_FLUSH:
+		priv_fdir_filter_flush(priv);
+		break;
+	case RTE_ETH_FILTER_INFO:
+		priv_fdir_info_get(priv, arg);
+		break;
+	default:
+		DEBUG("%p: unknown operation %u", (void *)priv,
+		      filter_op);
+		ret = EINVAL;
+		break;
+	}
+	return ret;
+}
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param filter_type
+ *   Filter type.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
+		     enum rte_filter_type filter_type,
+		     enum rte_filter_op filter_op,
+		     void *arg)
+{
+	int ret = EINVAL;
+	struct priv *priv = dev->data->dev_private;
+
+	switch (filter_type) {
+	case RTE_ETH_FILTER_GENERIC:
+		if (filter_op != RTE_ETH_FILTER_GET)
+			return -EINVAL;
+		*(const void **)arg = &mlx5_flow_ops;
+		return 0;
+	case RTE_ETH_FILTER_FDIR:
+		priv_lock(priv);
+		ret = priv_fdir_ctrl_func(priv, filter_op, arg);
+		priv_unlock(priv);
+		break;
+	default:
+		ERROR("%p: filter type (%d) not supported",
+		      (void *)dev, filter_type);
+		break;
+	}
+	return -ret;
+}
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread
- * [dpdk-dev] [PATCH v3 30/30] net/mlx5: add new operations for isolated mode
  2017-08-02 14:10 [dpdk-dev] [PATCH v1 00/21] net/mlx5: cleanup for isolated mode Nelio Laranjeiro
                   ` (54 preceding siblings ...)
  2017-10-09 14:45 ` [dpdk-dev] [PATCH v3 29/30] net/mlx5: support flow director Nelio Laranjeiro
@ 2017-10-09 14:45 ` Nelio Laranjeiro
  55 siblings, 0 replies; 129+ messages in thread
From: Nelio Laranjeiro @ 2017-10-09 14:45 UTC (permalink / raw)
  To: dev; +Cc: adrien.mazarguil, yskoh, ferruh.yigit
Isolated works exclusively with the generic flow API, this patch adds a new
set of operations valid in this mode.
 - promiscuous*()
 - allmulticast*()
 - reta*()
 - rss*()
are not supported in this mode as it is fully supported by generic flow
API.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c      | 39 +++++++++++++++++++++++++++++++++++++--
 drivers/net/mlx5/mlx5_flow.c |  8 ++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index b206535..b2087c0 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -257,7 +257,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	memset(priv, 0, sizeof(*priv));
 }
 
-static const struct eth_dev_ops mlx5_dev_ops = {
+const struct eth_dev_ops mlx5_dev_ops = {
 	.dev_configure = mlx5_dev_configure,
 	.dev_start = mlx5_dev_start,
 	.dev_stop = mlx5_dev_stop,
@@ -300,7 +300,6 @@ static const struct eth_dev_ops mlx5_dev_ops = {
 	.rx_queue_intr_disable = mlx5_rx_intr_disable,
 };
 
-
 static const struct eth_dev_ops mlx5_dev_sec_ops = {
 	.stats_get = mlx5_stats_get,
 	.stats_reset = mlx5_stats_reset,
@@ -312,6 +311,42 @@ static const struct eth_dev_ops mlx5_dev_sec_ops = {
 	.tx_descriptor_status = mlx5_tx_descriptor_status,
 };
 
+/* Available operators in flow isolated mode. */
+const struct eth_dev_ops mlx5_dev_ops_isolate = {
+	.dev_configure = mlx5_dev_configure,
+	.dev_start = mlx5_dev_start,
+	.dev_stop = mlx5_dev_stop,
+	.dev_set_link_down = mlx5_set_link_down,
+	.dev_set_link_up = mlx5_set_link_up,
+	.dev_close = mlx5_dev_close,
+	.link_update = mlx5_link_update,
+	.stats_get = mlx5_stats_get,
+	.stats_reset = mlx5_stats_reset,
+	.xstats_get = mlx5_xstats_get,
+	.xstats_reset = mlx5_xstats_reset,
+	.xstats_get_names = mlx5_xstats_get_names,
+	.dev_infos_get = mlx5_dev_infos_get,
+	.dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
+	.vlan_filter_set = mlx5_vlan_filter_set,
+	.rx_queue_setup = mlx5_rx_queue_setup,
+	.tx_queue_setup = mlx5_tx_queue_setup,
+	.rx_queue_release = mlx5_rx_queue_release,
+	.tx_queue_release = mlx5_tx_queue_release,
+	.flow_ctrl_get = mlx5_dev_get_flow_ctrl,
+	.flow_ctrl_set = mlx5_dev_set_flow_ctrl,
+	.mac_addr_remove = mlx5_mac_addr_remove,
+	.mac_addr_add = mlx5_mac_addr_add,
+	.mac_addr_set = mlx5_mac_addr_set,
+	.mtu_set = mlx5_dev_set_mtu,
+	.vlan_strip_queue_set = mlx5_vlan_strip_queue_set,
+	.vlan_offload_set = mlx5_vlan_offload_set,
+	.filter_ctrl = mlx5_dev_filter_ctrl,
+	.rx_descriptor_status = mlx5_rx_descriptor_status,
+	.tx_descriptor_status = mlx5_tx_descriptor_status,
+	.rx_queue_intr_enable = mlx5_rx_intr_enable,
+	.rx_queue_intr_disable = mlx5_rx_intr_disable,
+};
+
 static struct {
 	struct rte_pci_addr pci_addr; /* associated PCI address */
 	uint32_t ports; /* physical ports bitfield. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 36c060e..3321b3e 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -59,6 +59,10 @@
 #define MLX5_IPV4 4
 #define MLX5_IPV6 6
 
+/* Dev ops structure defined in mlx5.c */
+extern const struct eth_dev_ops mlx5_dev_ops;
+extern const struct eth_dev_ops mlx5_dev_ops_isolate;
+
 static int
 mlx5_flow_create_eth(const struct rte_flow_item *item,
 		     const void *default_mask,
@@ -2359,6 +2363,10 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 		return -rte_errno;
 	}
 	priv->isolated = !!enable;
+	if (enable)
+		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
+	else
+		priv->dev->dev_ops = &mlx5_dev_ops;
 	priv_unlock(priv);
 	return 0;
 }
-- 
2.1.4
^ permalink raw reply	[flat|nested] 129+ messages in thread