DPDK patches and discussions
 help / color / mirror / Atom feed
From: "Nélio Laranjeiro" <nelio.laranjeiro@6wind.com>
To: Xueming Li <xuemingl@mellanox.com>
Cc: Shahaf Shuler <shahafs@mellanox.com>, dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v2 01/15] net/mlx5: support 16 hardware priorities
Date: Tue, 10 Apr 2018 16:41:30 +0200	[thread overview]
Message-ID: <20180410144130.6byrrolegsimzyu3@laranjeiro-vm.dev.6wind.com> (raw)
In-Reply-To: <20180410133415.189905-2-xuemingl@mellanox.com>

On Tue, Apr 10, 2018 at 09:34:01PM +0800, Xueming Li wrote:
> Adjust flow priority mapping to adapt new hardware 16 verb flow
> priorites support:
> 0-3: RTE FLOW tunnel rule
> 4-7: RTE FLOW non-tunnel rule
> 8-15: PMD control flow

This commit log is inducing people in error, this amount of priority
depends on the Mellanox OFED installed, it is not available on upstream
Linux kernel yet nor in the current Mellanox OFED GA.  

What happens when those amount of priority are not available, is it
removing a functionality?  Will it collide with other flows?

> Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c         |  10 ++++
>  drivers/net/mlx5/mlx5.h         |   8 +++
>  drivers/net/mlx5/mlx5_flow.c    | 107 ++++++++++++++++++++++++++++++----------
>  drivers/net/mlx5/mlx5_trigger.c |   8 ---
>  4 files changed, 100 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index cfab55897..a1f2799e5 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -197,6 +197,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
>  		priv->txqs_n = 0;
>  		priv->txqs = NULL;
>  	}
> +	mlx5_flow_delete_drop_queue(dev);
>
>  	if (priv->pd != NULL) {
>  		assert(priv->ctx != NULL);
>  		claim_zero(mlx5_glue->dealloc_pd(priv->pd));
> @@ -993,6 +994,15 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>  		mlx5_set_link_up(eth_dev);
>  		/* Store device configuration on private structure. */
>  		priv->config = config;
> +		/* Create drop queue. */
> +		err = mlx5_flow_create_drop_queue(eth_dev);
> +		if (err) {
> +			DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
> +				eth_dev->data->port_id, strerror(rte_errno));
> +			goto port_error;
> +		}
> +		/* Supported flow priority number detection. */
> +		mlx5_flow_priorities_detect(eth_dev);
>  		continue;
>  port_error:
>  		if (priv)
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 63b24e6bb..708272f6d 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -89,6 +89,8 @@ struct mlx5_dev_config {
>  	unsigned int rx_vec_en:1; /* Rx vector is enabled. */
>  	unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
>  	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
> +	unsigned int flow_priority_shift; /* Non-tunnel flow priority shift. */
> +	unsigned int control_flow_priority; /* Control flow priority. */
>  	unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */
>  	unsigned int ind_table_max_size; /* Maximum indirection table size. */
>  	int txq_inline; /* Maximum packet size for inlining. */
> @@ -105,6 +107,11 @@ enum mlx5_verbs_alloc_type {
>  	MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,
>  };
>  
> +/* 8 Verbs priorities per flow. */
> +#define MLX5_VERBS_FLOW_PRIO_8 8
> +/* 4 Verbs priorities per flow. */
> +#define MLX5_VERBS_FLOW_PRIO_4 4
> +
>  /**
>   * Verbs allocator needs a context to know in the callback which kind of
>   * resources it is allocating.
> @@ -253,6 +260,7 @@ int mlx5_traffic_restart(struct rte_eth_dev *dev);
>  
>  /* mlx5_flow.c */
>  
> +void mlx5_flow_priorities_detect(struct rte_eth_dev *dev);
>  int mlx5_flow_validate(struct rte_eth_dev *dev,
>  		       const struct rte_flow_attr *attr,
>  		       const struct rte_flow_item items[],
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index 288610620..394760418 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -32,9 +32,6 @@
>  #include "mlx5_prm.h"
>  #include "mlx5_glue.h"
>  
> -/* Define minimal priority for control plane flows. */
> -#define MLX5_CTRL_FLOW_PRIORITY 4
> -
>  /* Internet Protocol versions. */
>  #define MLX5_IPV4 4
>  #define MLX5_IPV6 6
> @@ -129,7 +126,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_SRC_PORT_TCP |
>  				IBV_RX_HASH_DST_PORT_TCP),
>  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
> -		.flow_priority = 1,
> +		.flow_priority = 0,
>  		.ip_version = MLX5_IPV4,
>  	},
>  	[HASH_RXQ_UDPV4] = {
> @@ -138,7 +135,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_SRC_PORT_UDP |
>  				IBV_RX_HASH_DST_PORT_UDP),
>  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
> -		.flow_priority = 1,
> +		.flow_priority = 0,
>  		.ip_version = MLX5_IPV4,
>  	},
>  	[HASH_RXQ_IPV4] = {
> @@ -146,7 +143,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_DST_IPV4),
>  		.dpdk_rss_hf = (ETH_RSS_IPV4 |
>  				ETH_RSS_FRAG_IPV4),
> -		.flow_priority = 2,
> +		.flow_priority = 1,
>  		.ip_version = MLX5_IPV4,
>  	},
>  	[HASH_RXQ_TCPV6] = {
> @@ -155,7 +152,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_SRC_PORT_TCP |
>  				IBV_RX_HASH_DST_PORT_TCP),
>  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
> -		.flow_priority = 1,
> +		.flow_priority = 0,
>  		.ip_version = MLX5_IPV6,
>  	},
>  	[HASH_RXQ_UDPV6] = {
> @@ -164,7 +161,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_SRC_PORT_UDP |
>  				IBV_RX_HASH_DST_PORT_UDP),
>  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
> -		.flow_priority = 1,
> +		.flow_priority = 0,
>  		.ip_version = MLX5_IPV6,
>  	},
>  	[HASH_RXQ_IPV6] = {
> @@ -172,13 +169,13 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_DST_IPV6),
>  		.dpdk_rss_hf = (ETH_RSS_IPV6 |
>  				ETH_RSS_FRAG_IPV6),
> -		.flow_priority = 2,
> +		.flow_priority = 1,
>  		.ip_version = MLX5_IPV6,
>  	},
>  	[HASH_RXQ_ETH] = {
>  		.hash_fields = 0,
>  		.dpdk_rss_hf = 0,
> -		.flow_priority = 3,
> +		.flow_priority = 2,
>  	},
>  };

If the amount of priorities remains 8, you are removing the priority for
the tunnel flows introduced by 
commit 749365717f5c ("net/mlx5: change tunnel flow priority")

Please keep this functionality when this patch fails to get the expected
16 Verbs priorities.

> @@ -536,6 +533,8 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
>  /**
>   * Extract attribute to the parser.
>   *
> + * @param dev
> + *   Pointer to Ethernet device.
>   * @param[in] attr
>   *   Flow rule attributes.
>   * @param[out] error
> @@ -545,9 +544,12 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
> -mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
> +mlx5_flow_convert_attributes(struct rte_eth_dev *dev,
> +			     const struct rte_flow_attr *attr,
>  			     struct rte_flow_error *error)
>  {
> +	struct priv *priv = dev->data->dev_private;
> +
>  	if (attr->group) {
>  		rte_flow_error_set(error, ENOTSUP,
>  				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
> @@ -555,7 +557,7 @@ mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
>  				   "groups are not supported");
>  		return -rte_errno;
>  	}
> -	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
> +	if (attr->priority > priv->config.control_flow_priority) {
>  		rte_flow_error_set(error, ENOTSUP,
>  				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
>  				   NULL,
> @@ -900,30 +902,38 @@ mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
>   * Make inner packet matching with an higher priority from the non Inner
>   * matching.
>   *
> + * @param dev
> + *   Pointer to Ethernet device.
>   * @param[in, out] parser
>   *   Internal parser structure.
>   * @param attr
>   *   User flow attribute.
>   */
>  static void
> -mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
> +mlx5_flow_update_priority(struct rte_eth_dev *dev,
> +			  struct mlx5_flow_parse *parser,
>  			  const struct rte_flow_attr *attr)
>  {
> +	struct priv *priv = dev->data->dev_private;
>  	unsigned int i;
> +	uint16_t priority;
>  
> +	if (priv->config.flow_priority_shift == 1)
> +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_4;
> +	else
> +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
> +	if (!parser->inner)
> +		priority += priv->config.flow_priority_shift;
>  	if (parser->drop) {
> -		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
> -			attr->priority +
> -			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
> +		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
> +				hash_rxq_init[HASH_RXQ_ETH].flow_priority;
>  		return;
>  	}
>  	for (i = 0; i != hash_rxq_init_n; ++i) {
> -		if (parser->queue[i].ibv_attr) {
> -			parser->queue[i].ibv_attr->priority =
> -				attr->priority +
> -				hash_rxq_init[i].flow_priority -
> -				(parser->inner ? 1 : 0);
> -		}
> +		if (!parser->queue[i].ibv_attr)
> +			continue;
> +		parser->queue[i].ibv_attr->priority = priority +
> +				hash_rxq_init[i].flow_priority;
>  	}
>  }
>  
> @@ -1087,7 +1097,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
>  		.layer = HASH_RXQ_ETH,
>  		.mark_id = MLX5_FLOW_MARK_DEFAULT,
>  	};
> -	ret = mlx5_flow_convert_attributes(attr, error);
> +	ret = mlx5_flow_convert_attributes(dev, attr, error);
>  	if (ret)
>  		return ret;
>  	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
> @@ -1158,7 +1168,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
>  	 */
>  	if (!parser->drop)
>  		mlx5_flow_convert_finalise(parser);
> -	mlx5_flow_update_priority(parser, attr);
> +	mlx5_flow_update_priority(dev, parser, attr);
>  exit_free:
>  	/* Only verification is expected, all resources should be released. */
>  	if (!parser->create) {
> @@ -2450,7 +2460,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
>  	struct priv *priv = dev->data->dev_private;
>  	const struct rte_flow_attr attr = {
>  		.ingress = 1,
> -		.priority = MLX5_CTRL_FLOW_PRIORITY,
> +		.priority = priv->config.control_flow_priority,
>  	};
>  	struct rte_flow_item items[] = {
>  		{
> @@ -3161,3 +3171,50 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
>  	}
>  	return 0;
>  }
> +
> +/**
> + * Detect number of Verbs flow priorities supported.
> + *
> + * @param dev
> + *   Pointer to Ethernet device.
> + */
> +void
> +mlx5_flow_priorities_detect(struct rte_eth_dev *dev)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +	uint32_t verb_priorities = MLX5_VERBS_FLOW_PRIO_8 * 2;
> +	struct {
> +		struct ibv_flow_attr attr;
> +		struct ibv_flow_spec_eth eth;
> +		struct ibv_flow_spec_action_drop drop;
> +	} flow_attr = {
> +		.attr = {
> +			.num_of_specs = 2,
> +			.priority = verb_priorities - 1,
> +		},
> +		.eth = {
> +			.type = IBV_FLOW_SPEC_ETH,
> +			.size = sizeof(struct ibv_flow_spec_eth),
> +		},
> +		.drop = {
> +			.size = sizeof(struct ibv_flow_spec_action_drop),
> +			.type = IBV_FLOW_SPEC_ACTION_DROP,
> +		},
> +	};
> +	struct ibv_flow *flow;
> +
> +	if (priv->config.control_flow_priority)
> +		return;
> +	flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
> +				      &flow_attr.attr);
> +	if (flow) {
> +		priv->config.flow_priority_shift = MLX5_VERBS_FLOW_PRIO_8 / 2;
> +		claim_zero(mlx5_glue->destroy_flow(flow));
> +	} else {
> +		priv->config.flow_priority_shift = 1;
> +		verb_priorities = verb_priorities / 2;
> +	}
> +	priv->config.control_flow_priority = 1;
> +	DRV_LOG(INFO, "port %u Verbs flow priorities: %d",
> +		dev->data->port_id, verb_priorities);
> +}
> diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
> index 6bb4ffb14..d80a2e688 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -148,12 +148,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
>  	int ret;
>  
>  	dev->data->dev_started = 1;
> -	ret = mlx5_flow_create_drop_queue(dev);
> -	if (ret) {
> -		DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
> -			dev->data->port_id, strerror(rte_errno));
> -		goto error;
> -	}
>  	DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx queues",
>  		dev->data->port_id);
>  	rte_mempool_walk(mlx5_mp2mr_iter, priv);
> @@ -202,7 +196,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
>  	mlx5_traffic_disable(dev);
>  	mlx5_txq_stop(dev);
>  	mlx5_rxq_stop(dev);
> -	mlx5_flow_delete_drop_queue(dev);
>  	rte_errno = ret; /* Restore rte_errno. */
>  	return -rte_errno;
>  }
> @@ -237,7 +230,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
>  	mlx5_rxq_stop(dev);
>  	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
>  		mlx5_mr_release(mr);
> -	mlx5_flow_delete_drop_queue(dev);
>  }
>  
>  /**
> -- 
> 2.13.3

I have few concerns on this, mlx5_pci_probe() will also probe any
under layer verbs device, and in a near future the representors
associated to a VF.
Making such detection should only be done once by the PF, I also wander
if it is possible to make such drop action in a representor directly
using Verbs.

Another concern is, this patch will be reverted in some time when those
16 priority will be always available.  It will be easier to remove this
detection function than searching for all those modifications.

I would suggest to have a standalone mlx5_flow_priorities_detect() which
creates and deletes all resources needed for this detection.

Thanks,

-- 
Nélio Laranjeiro
6WIND

  reply	other threads:[~2018-04-10 14:41 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-10 13:34 [dpdk-dev] [PATCH v2 00/15] mlx5 Rx tunnel offloading Xueming Li
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 01/15] net/mlx5: support 16 hardware priorities Xueming Li
2018-04-10 14:41   ` Nélio Laranjeiro [this message]
2018-04-10 15:22     ` Xueming(Steven) Li
2018-04-12  9:09       ` Nélio Laranjeiro
2018-04-12 13:43         ` Xueming(Steven) Li
2018-04-12 14:02           ` Nélio Laranjeiro
2018-04-12 14:46             ` Xueming(Steven) Li
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 02/15] net/mlx5: support GRE tunnel flow Xueming Li
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 03/15] net/mlx5: support L3 vxlan flow Xueming Li
2018-04-10 14:53   ` Nélio Laranjeiro
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 04/15] net/mlx5: support Rx tunnel type identification Xueming Li
2018-04-10 15:17   ` Nélio Laranjeiro
2018-04-11  8:11     ` Xueming(Steven) Li
2018-04-12  9:50       ` Nélio Laranjeiro
2018-04-12 14:27         ` Xueming(Steven) Li
2018-04-13  8:37           ` Nélio Laranjeiro
2018-04-13 12:09             ` Xueming(Steven) Li
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 05/15] net/mlx5: support tunnel inner checksum offloads Xueming Li
2018-04-10 15:27   ` Nélio Laranjeiro
2018-04-11  8:46     ` Xueming(Steven) Li
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 06/15] net/mlx5: split flow RSS handling logic Xueming Li
2018-04-10 15:28   ` Nélio Laranjeiro
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 07/15] net/mlx5: support tunnel RSS level Xueming Li
2018-04-11  8:55   ` Nélio Laranjeiro
2018-04-14 12:25     ` Xueming(Steven) Li
2018-04-16  7:14       ` Nélio Laranjeiro
2018-04-16  7:46         ` Xueming(Steven) Li
2018-04-16  8:09           ` Nélio Laranjeiro
2018-04-16 10:06             ` Xueming(Steven) Li
2018-04-16 12:27               ` Nélio Laranjeiro
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 08/15] net/mlx5: add hardware flow debug dump Xueming Li
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 09/15] net/mlx5: introduce VXLAN-GPE tunnel type Xueming Li
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 10/15] net/mlx5: allow flow tunnel ID 0 with outer pattern Xueming Li
2018-04-11 12:25   ` Nélio Laranjeiro
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 11/15] net/mlx5: support MPLS-in-GRE and MPLS-in-UDP Xueming Li
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 12/15] doc: update mlx5 guide on tunnel offloading Xueming Li
2018-04-11 12:32   ` Nélio Laranjeiro
2018-04-11 12:43     ` Thomas Monjalon
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 13/15] net/mlx5: setup RSS flow regardless of queue count Xueming Li
2018-04-11 12:37   ` Nélio Laranjeiro
2018-04-11 13:01     ` Xueming(Steven) Li
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 14/15] net/mlx5: fix invalid flow item check Xueming Li
2018-04-10 13:34 ` [dpdk-dev] [PATCH v2 15/15] net/mlx5: support RSS configuration in isolated mode Xueming Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180410144130.6byrrolegsimzyu3@laranjeiro-vm.dev.6wind.com \
    --to=nelio.laranjeiro@6wind.com \
    --cc=dev@dpdk.org \
    --cc=shahafs@mellanox.com \
    --cc=xuemingl@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).