DPDK patches and discussions
 help / color / mirror / Atom feed
From: Matan Azrad <matan@mellanox.com>
To: Slava Ovsiienko <viacheslavo@mellanox.com>,
	"dev@dpdk.org" <dev@dpdk.org>
Cc: Yongseok Koh <yskoh@mellanox.com>
Subject: Re: [dpdk-dev] [PATCH] net/mlx5: fix ESXi VLAN in virtual machine
Date: Mon, 29 Jul 2019 15:14:08 +0000	[thread overview]
Message-ID: <AM0PR0502MB40194AFF71D9611472A2C363D2DD0@AM0PR0502MB4019.eurprd05.prod.outlook.com> (raw)
In-Reply-To: <1563198320-29068-1-git-send-email-viacheslavo@mellanox.com>



From: Viacheslav Ovsiienko
> On ESXi setups when we have SR-IOV and E-Switch enabled there is the
> problem to receive VLAN traffic on VF interfaces. The NIC driver in ESXi
> hypervisor does not setup E-Switch vport setting correctly and VLAN traffic
> targeted to VF is dropped.
> 
> The patch provides the temporary workaround - if the rule containing the
> VLAN pattern is being installed for VF the VLAN network interface over VF is
> created, like the command does:
> 
>   ip link add link vf.if name mlx5.wa.1.100 type vlan id 100
> 
> The PMD in DPDK maintains the database of created VLAN interfaces for
> each existing VF and requested VLAN tags. When all of the RTE Flows using
> the given VLAN tag are removed the created VLAN interface with this VLAN
> tag is deleted.
> 
> The name of created VLAN interface follows the format:
> 
>   evmlx.d1.d2, where d1 is VF interface ifindex, d2 - VLAN ifindex
> 
> Implementation limitations:
> 
> - mask in rules is ignored, rule must specify VLAN tags exactly,
>   no wildcards (which are implemented by the masks) are allowed
> 
> - virtual environment is detected via rte_hypervisor() call,
>   currently it checks the RTE_CPUFLAG_HYPERVISOR flag for x86
>   platform. For other architectures workaround always
>   applied for the Flow over PCI VF
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

After rebase, 
Acked-by: Matan Azrad <matan@mellanox.com>

> ---
>  drivers/net/mlx5/mlx5.c            |   6 +
>  drivers/net/mlx5/mlx5.h            |  30 ++++
>  drivers/net/mlx5/mlx5_flow.c       |  22 +++
>  drivers/net/mlx5/mlx5_flow.h       |   5 +
>  drivers/net/mlx5/mlx5_flow_dv.c    |  33 ++++-
>  drivers/net/mlx5/mlx5_flow_verbs.c |  25 +++-
>  drivers/net/mlx5/mlx5_nl.c         | 279
> +++++++++++++++++++++++++++++++++++++
>  7 files changed, 396 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> d93f92d..8549167 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -690,6 +690,8 @@ struct mlx5_dev_spawn_data {
>  		close(priv->nl_socket_route);
>  	if (priv->nl_socket_rdma >= 0)
>  		close(priv->nl_socket_rdma);
> +	if (priv->esxi_context)
> +		mlx5_vlan_esxi_exit(priv->esxi_context);
>  	if (priv->sh) {
>  		/*
>  		 * Free the shared context in last turn, because the cleanup
> @@ -1546,6 +1548,8 @@ struct mlx5_dev_spawn_data {  #endif
>  	/* Store device configuration on private structure. */
>  	priv->config = config;
> +	/* Create context for virtual machine VLAN workaround. */
> +	priv->esxi_context = mlx5_vlan_esxi_init(eth_dev, spawn->ifindex);
>  	if (config.dv_flow_en) {
>  		err = mlx5_alloc_shared_dr(priv);
>  		if (err)
> @@ -1572,6 +1576,8 @@ struct mlx5_dev_spawn_data {
>  			close(priv->nl_socket_route);
>  		if (priv->nl_socket_rdma >= 0)
>  			close(priv->nl_socket_rdma);
> +		if (priv->esxi_context)
> +			mlx5_vlan_esxi_exit(priv->esxi_context);
>  		if (own_domain_id)
>  			claim_zero(rte_eth_switch_domain_free(priv-
> >domain_id));
>  		rte_free(priv);
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 5af3f41..87afa7a 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -231,6 +231,27 @@ enum mlx5_verbs_alloc_type {
>  	MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,
>  };
> 
> +/* VLAN netdev for ESXi VLAN workaround. */ struct mlx5_vlan_dev {
> +	uint32_t refcnt;
> +	uint32_t ifindex; /**< Own interface index. */ };
> +
> +/* Structure for VF ESXi VLAN workaround. */ struct mlx5_vf_vlan {
> +	uint32_t tag:12;
> +	uint32_t created:1;
> +};
> +
> +/* Array of VLAN devices created on the base of VF */ struct
> +mlx5_vlan_esxi_context {
> +	int nl_socket;
> +	uint32_t nl_sn;
> +	uint32_t vf_ifindex;
> +	struct rte_eth_dev *dev;
> +	struct mlx5_vlan_dev vlan_dev[4096];
> +};
> +
>  /**
>   * Verbs allocator needs a context to know in the callback which kind of
>   * resources it is allocating.
> @@ -386,6 +407,7 @@ struct mlx5_priv {
>  	int nl_socket_rdma; /* Netlink socket (NETLINK_RDMA). */
>  	int nl_socket_route; /* Netlink socket (NETLINK_ROUTE). */
>  	uint32_t nl_sn; /* Netlink message sequence number. */
> +	struct mlx5_vlan_esxi_context *esxi_context; /* ESXi VLAN context.
> */
>  #ifndef RTE_ARCH_64
>  	rte_spinlock_t uar_lock_cq; /* CQs share a common distinct UAR */
>  	rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX]; @@ -582,6
> +604,14 @@ int mlx5_nl_mac_addr_remove(struct rte_eth_dev *dev, struct
> rte_ether_addr *mac,  int mlx5_nl_switch_info(int nl, unsigned int ifindex,
>  			struct mlx5_switch_info *info);
> 
> +struct mlx5_vlan_esxi_context *mlx5_vlan_esxi_init(struct rte_eth_dev
> *dev,
> +						   uint32_t ifindex);
> +void mlx5_vlan_esxi_exit(struct mlx5_vlan_esxi_context *ctx); void
> +mlx5_vlan_esxi_release(struct rte_eth_dev *dev,
> +			    struct mlx5_vf_vlan *vf_vlan);
> +void mlx5_vlan_esxi_acquire(struct rte_eth_dev *dev,
> +			    struct mlx5_vf_vlan *vf_vlan);
> +
>  /* mlx5_devx_cmds.c */
> 
>  int mlx5_devx_cmd_flow_counter_alloc(struct ibv_context *ctx, diff --git
> a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index
> 4ba34db..42743d2 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -1200,6 +1200,8 @@ uint32_t mlx5_flow_adjust_priority(struct
> rte_eth_dev *dev, int32_t priority,
>   *   Item specification.
>   * @param[in] item_flags
>   *   Bit-fields that holds the items detected until now.
> + * @param[in] dev
> + *   Ethernet device flow is being created on.
>   * @param[out] error
>   *   Pointer to error structure.
>   *
> @@ -1209,6 +1211,7 @@ uint32_t mlx5_flow_adjust_priority(struct
> rte_eth_dev *dev, int32_t priority,  int  mlx5_flow_validate_item_vlan(const
> struct rte_flow_item *item,
>  			     uint64_t item_flags,
> +			     struct rte_eth_dev *dev,
>  			     struct rte_flow_error *error)
>  {
>  	const struct rte_flow_item_vlan *spec = item->spec; @@ -1243,6
> +1246,25 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev,
> int32_t priority,
>  					error);
>  	if (ret)
>  		return ret;
> +	if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
> +		struct mlx5_priv *priv = dev->data->dev_private;
> +
> +		if (priv->esxi_context) {
> +			/*
> +			 * Non-NULL context means we have a virtual
> machine
> +			 * and SR-IOV enabled, we have to create VLAN
> interface
> +			 * to make hypervisor (ESXi) to setup E-Switch vport
> +			 * context correctly. We avoid creating the multiple
> +			 * VLAN interfaces, so we cannot support VLAN tag
> mask.
> +			 */
> +			return rte_flow_error_set(error, EINVAL,
> +
> RTE_FLOW_ERROR_TYPE_ITEM,
> +						  item,
> +						  "VLAN tag mask is not"
> +						  " supported in virtual"
> +						  " environment");
> +		}
> +	}
>  	if (spec) {
>  		vlan_tag = spec->tci;
>  		vlan_tag &= mask->tci;
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 72b339e..ac20572 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -318,6 +318,8 @@ struct mlx5_flow_dv {
>  	/**< Pointer to the jump action resource. */
>  	struct mlx5_flow_dv_port_id_action_resource *port_id_action;
>  	/**< Pointer to port ID action resource. */
> +	struct mlx5_vf_vlan vf_vlan;
> +	/**< Structure for VF ESXi VLAN workaround. */
>  #ifdef HAVE_IBV_FLOW_DV_SUPPORT
>  	void *actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];
>  	/**< Action list. */
> @@ -343,6 +345,8 @@ struct mlx5_flow_verbs {
>  	struct ibv_flow *flow; /**< Verbs flow pointer. */
>  	struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
>  	uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
> +	struct mlx5_vf_vlan vf_vlan;
> +	/**< Structure for VF ESXi VLAN workaround. */
>  };
> 
>  /** Device flow structure. */
> @@ -507,6 +511,7 @@ int mlx5_flow_validate_item_udp(const struct
> rte_flow_item *item,
>  				struct rte_flow_error *error);
>  int mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
>  				 uint64_t item_flags,
> +				 struct rte_eth_dev *dev,
>  				 struct rte_flow_error *error);
>  int mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
>  				  uint64_t item_flags,
> diff --git a/drivers/net/mlx5/mlx5_flow_dv.c
> b/drivers/net/mlx5/mlx5_flow_dv.c index 3fa624b..63183b5 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -2363,7 +2363,7 @@ struct field_modify_info modify_tcp[] = {
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_VLAN:
>  			ret = mlx5_flow_validate_item_vlan(items,
> item_flags,
> -							   error);
> +							   dev, error);
>  			if (ret < 0)
>  				return ret;
>  			last_item = tunnel ?
> MLX5_FLOW_LAYER_INNER_VLAN :
> @@ -2914,6 +2914,8 @@ struct field_modify_info modify_tcp[] = {
>  /**
>   * Add VLAN item to matcher and to the value.
>   *
> + * @param[in, out] dev_flow
> + *   Flow descriptor.
>   * @param[in, out] matcher
>   *   Flow matcher.
>   * @param[in, out] key
> @@ -2924,7 +2926,8 @@ struct field_modify_info modify_tcp[] = {
>   *   Item is inner pattern.
>   */
>  static void
> -flow_dv_translate_item_vlan(void *matcher, void *key,
> +flow_dv_translate_item_vlan(struct mlx5_flow *dev_flow,
> +			    void *matcher, void *key,
>  			    const struct rte_flow_item *item,
>  			    int inner)
>  {
> @@ -2951,6 +2954,12 @@ struct field_modify_info modify_tcp[] = {
>  		headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
>  					 outer_headers);
>  		headers_v = MLX5_ADDR_OF(fte_match_param, key,
> outer_headers);
> +		/*
> +		 * This is workaround, masks are not supported,
> +		 * and pre-validated.
> +		 */
> +		dev_flow->dv.vf_vlan.tag =
> +			rte_be_to_cpu_16(vlan_v->tci) & 0x0fff;
>  	}
>  	tci_m = rte_be_to_cpu_16(vlan_m->tci);
>  	tci_v = rte_be_to_cpu_16(vlan_m->tci & vlan_v->tci); @@ -4443,7
> +4452,8 @@ struct field_modify_info modify_tcp[] = {
>  					     MLX5_FLOW_LAYER_OUTER_L2;
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_VLAN:
> -			flow_dv_translate_item_vlan(match_mask,
> match_value,
> +			flow_dv_translate_item_vlan(dev_flow,
> +						    match_mask, match_value,
>  						    items, tunnel);
>  			matcher.priority = MLX5_PRIORITY_MAP_L2;
>  			last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2
> | @@ -4658,6 +4668,17 @@ struct field_modify_info modify_tcp[] = {
>  					   "hardware refuses to create flow");
>  			goto error;
>  		}
> +		if (priv->esxi_context &&
> +		    dev_flow->dv.vf_vlan.tag &&
> +		    !dev_flow->dv.vf_vlan.created) {
> +			/*
> +			 * The rule contains the VLAN pattern.
> +			 * For VF we are going to create VLAN
> +			 * interface to make ESXi set correct
> +			 * e-Switch vport context.
> +			 */
> +			mlx5_vlan_esxi_acquire(dev, &dev_flow-
> >dv.vf_vlan);
> +		}
>  	}
>  	return 0;
>  error:
> @@ -4671,6 +4692,9 @@ struct field_modify_info modify_tcp[] = {
>  				mlx5_hrxq_release(dev, dv->hrxq);
>  			dv->hrxq = NULL;
>  		}
> +		if (dev_flow->dv.vf_vlan.tag &&
> +		    dev_flow->dv.vf_vlan.created)
> +			mlx5_vlan_esxi_release(dev, &dev_flow-
> >dv.vf_vlan);
>  	}
>  	rte_errno = err; /* Restore rte_errno. */
>  	return -rte_errno;
> @@ -4871,6 +4895,9 @@ struct field_modify_info modify_tcp[] = {
>  				mlx5_hrxq_release(dev, dv->hrxq);
>  			dv->hrxq = NULL;
>  		}
> +		if (dev_flow->dv.vf_vlan.tag &&
> +		    dev_flow->dv.vf_vlan.created)
> +			mlx5_vlan_esxi_release(dev, &dev_flow-
> >dv.vf_vlan);
>  	}
>  }
> 
> diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c
> b/drivers/net/mlx5/mlx5_flow_verbs.c
> index 2f4c80c..5909488 100644
> --- a/drivers/net/mlx5/mlx5_flow_verbs.c
> +++ b/drivers/net/mlx5/mlx5_flow_verbs.c
> @@ -386,6 +386,9 @@
>  		flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
>  	else
>  		flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
> +	if (!tunnel)
> +		dev_flow->verbs.vf_vlan.tag =
> +			rte_be_to_cpu_16(spec->tci) & 0x0fff;
>  }
> 
>  /**
> @@ -1049,7 +1052,7 @@
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_VLAN:
>  			ret = mlx5_flow_validate_item_vlan(items,
> item_flags,
> -							   error);
> +							   dev, error);
>  			if (ret < 0)
>  				return ret;
>  			last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2
> | @@ -1587,6 +1590,10 @@
>  				mlx5_hrxq_release(dev, verbs->hrxq);
>  			verbs->hrxq = NULL;
>  		}
> +		if (dev_flow->verbs.vf_vlan.tag &&
> +		    dev_flow->verbs.vf_vlan.created) {
> +			mlx5_vlan_esxi_release(dev, &dev_flow-
> >verbs.vf_vlan);
> +		}
>  	}
>  }
> 
> @@ -1634,6 +1641,7 @@
>  flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
>  		 struct rte_flow_error *error)
>  {
> +	struct mlx5_priv *priv = dev->data->dev_private;
>  	struct mlx5_flow_verbs *verbs;
>  	struct mlx5_flow *dev_flow;
>  	int err;
> @@ -1683,6 +1691,17 @@
>  					   "hardware refuses to create flow");
>  			goto error;
>  		}
> +		if (priv->esxi_context &&
> +		    dev_flow->verbs.vf_vlan.tag &&
> +		    !dev_flow->verbs.vf_vlan.created) {
> +			/*
> +			 * The rule contains the VLAN pattern.
> +			 * For VF we are going to create VLAN
> +			 * interface to make ESXi set correct
> +			 * e-Switch vport context.
> +			 */
> +			mlx5_vlan_esxi_acquire(dev, &dev_flow-
> >verbs.vf_vlan);
> +		}
>  	}
>  	return 0;
>  error:
> @@ -1696,6 +1715,10 @@
>  				mlx5_hrxq_release(dev, verbs->hrxq);
>  			verbs->hrxq = NULL;
>  		}
> +		if (dev_flow->verbs.vf_vlan.tag &&
> +		    dev_flow->verbs.vf_vlan.created) {
> +			mlx5_vlan_esxi_release(dev, &dev_flow-
> >verbs.vf_vlan);
> +		}
>  	}
>  	rte_errno = err; /* Restore rte_errno. */
>  	return -rte_errno;
> diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c index
> 5773fa7..8516442 100644
> --- a/drivers/net/mlx5/mlx5_nl.c
> +++ b/drivers/net/mlx5/mlx5_nl.c
> @@ -12,11 +12,14 @@
>  #include <stdbool.h>
>  #include <stdint.h>
>  #include <stdlib.h>
> +#include <stdalign.h>
>  #include <string.h>
>  #include <sys/socket.h>
>  #include <unistd.h>
> 
>  #include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_hypervisor.h>
> 
>  #include "mlx5.h"
>  #include "mlx5_utils.h"
> @@ -28,6 +31,8 @@
>  /* Receive buffer size for the Netlink socket */  #define
> MLX5_RECV_BUF_SIZE 32768
> 
> +/** Parameters of VLAN devices created by driver. */ #define
> +MLX5_ESXI_VLAN_DEVICE_PFX "evmlx"
>  /*
>   * Define NDA_RTA as defined in iproute2 sources.
>   *
> @@ -987,3 +992,277 @@ struct mlx5_nl_ifindex_data {
>  	}
>  	return ret;
>  }
> +
> +/*
> + * Delete VLAN network device by ifindex.
> + *
> + * @param[in] tcf
> + *   Context object initialized by mlx5_vlan_esxi_init().
> + * @param[in] ifindex
> + *   Interface index of network device to delete.
> + */
> +static void
> +mlx5_vlan_esxi_delete(struct mlx5_vlan_esxi_context *esxi,
> +		      uint32_t ifindex)
> +{
> +	int ret;
> +	struct {
> +		struct nlmsghdr nh;
> +		struct ifinfomsg info;
> +	} req = {
> +		.nh = {
> +			.nlmsg_len = NLMSG_LENGTH(sizeof(struct
> ifinfomsg)),
> +			.nlmsg_type = RTM_DELLINK,
> +			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
> +		},
> +		.info = {
> +			.ifi_family = AF_UNSPEC,
> +			.ifi_index = ifindex,
> +		},
> +	};
> +
> +	if (ifindex) {
> +		++esxi->nl_sn;
> +		if (!esxi->nl_sn)
> +			++esxi->nl_sn;
> +		ret = mlx5_nl_send(esxi->nl_socket, &req.nh, esxi->nl_sn);
> +		if (ret >= 0)
> +			ret = mlx5_nl_recv(esxi->nl_socket,
> +					   esxi->nl_sn,
> +					   NULL, NULL);
> +		if (ret < 0)
> +			DRV_LOG(WARNING, "netlink: error deleting"
> +					 " VLAN ESXi ifindex %u, %d",
> +					 ifindex, ret);
> +	}
> +}
> +
> +/* Set of subroutines to build Netlink message. */ static struct nlattr
> +* nl_msg_tail(struct nlmsghdr *nlh) {
> +	return (struct nlattr *)
> +		(((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); }
> +
> +static void
> +nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen)
> +{
> +	struct nlattr *nla = nl_msg_tail(nlh);
> +
> +	nla->nla_type = type;
> +	nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr) + alen);
> +	nlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + nla->nla_len;
> +
> +	if (alen)
> +		memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); }
> +
> +static struct nlattr *
> +nl_attr_nest_start(struct nlmsghdr *nlh, int type) {
> +	struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh);
> +
> +	nl_attr_put(nlh, type, NULL, 0);
> +	return nest;
> +}
> +
> +static void
> +nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) {
> +	nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; }
> +
> +/*
> + * Create network VLAN device with specified VLAN tag.
> + *
> + * @param[in] tcf
> + *   Context object initialized by mlx5_vlan_esxi_init().
> + * @param[in] ifindex
> + *   Base network interface index.
> + * @param[in] tag
> + *   VLAN tag for VLAN network device to create.
> + */
> +static uint32_t
> +mlx5_vlan_esxi_create(struct mlx5_vlan_esxi_context *esxi,
> +		      uint32_t ifindex,
> +		      uint16_t tag)
> +{
> +	struct nlmsghdr *nlh;
> +	struct ifinfomsg *ifm;
> +	char name[sizeof(MLX5_ESXI_VLAN_DEVICE_PFX) + 32];
> +
> +	alignas(RTE_CACHE_LINE_SIZE)
> +	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
> +		    NLMSG_ALIGN(sizeof(struct ifinfomsg)) +
> +		    NLMSG_ALIGN(sizeof(struct nlattr)) * 8 +
> +		    NLMSG_ALIGN(sizeof(uint32_t)) +
> +		    NLMSG_ALIGN(sizeof(name)) +
> +		    NLMSG_ALIGN(sizeof("vlan")) +
> +		    NLMSG_ALIGN(sizeof(uint32_t)) +
> +		    NLMSG_ALIGN(sizeof(uint16_t)) + 16];
> +	struct nlattr *na_info;
> +	struct nlattr *na_vlan;
> +	int ret;
> +
> +	memset(buf, 0, sizeof(buf));
> +	++esxi->nl_sn;
> +	if (!esxi->nl_sn)
> +		++esxi->nl_sn;
> +	nlh = (struct nlmsghdr *)buf;
> +	nlh->nlmsg_len = sizeof(struct nlmsghdr);
> +	nlh->nlmsg_type = RTM_NEWLINK;
> +	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
> +			   NLM_F_EXCL | NLM_F_ACK;
> +	ifm = (struct ifinfomsg *)nl_msg_tail(nlh);
> +	nlh->nlmsg_len += sizeof(struct ifinfomsg);
> +	ifm->ifi_family = AF_UNSPEC;
> +	ifm->ifi_type = 0;
> +	ifm->ifi_index = 0;
> +	ifm->ifi_flags = IFF_UP;
> +	ifm->ifi_change = 0xffffffff;
> +	nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex));
> +	ret = snprintf(name, sizeof(name), "%s.%u.%u",
> +		       MLX5_ESXI_VLAN_DEVICE_PFX, ifindex, tag);
> +	nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1);
> +	na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO);
> +	nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan"));
> +	na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA);
> +	nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag));
> +	nl_attr_nest_end(nlh, na_vlan);
> +	nl_attr_nest_end(nlh, na_info);
> +	assert(sizeof(buf) >= nlh->nlmsg_len);
> +	ret = mlx5_nl_send(esxi->nl_socket, nlh, esxi->nl_sn);
> +	if (ret >= 0)
> +		ret = mlx5_nl_recv(esxi->nl_socket, esxi->nl_sn, NULL,
> NULL);
> +	if (ret < 0) {
> +		DRV_LOG(WARNING,
> +			"netlink: VLAN %s create failure (%d)",
> +			name, ret);
> +	}
> +	// Try to get ifindex of created or pre-existing device.
> +	ret = if_nametoindex(name);
> +	if (!ret) {
> +		DRV_LOG(WARNING,
> +			"VLAN %s failed to get index (%d)",
> +			name, errno);
> +		return 0;
> +	}
> +	return ret;
> +}
> +
> +/*
> + * Release VLAN network device, created for ESXi workaround.
> + *
> + * @param[in] dev
> + *   Ethernet device object, Netlink context provider.
> + * @param[in] vlan
> + *   Object representing the network device to release.
> + */
> +void mlx5_vlan_esxi_release(struct rte_eth_dev *dev,
> +			    struct mlx5_vf_vlan *vlan)
> +{
> +	struct mlx5_priv *priv = dev->data->dev_private;
> +	struct mlx5_vlan_esxi_context *esxi = priv->esxi_context;
> +	struct mlx5_vlan_dev *vlan_dev = &esxi->vlan_dev[0];
> +
> +	assert(vlan->created);
> +	assert(priv->esxi_context);
> +	if (!vlan->created || !esxi)
> +		return;
> +	vlan->created = 0;
> +	assert(vlan_dev[vlan->tag].refcnt);
> +	if (--vlan_dev[vlan->tag].refcnt == 0 &&
> +	    vlan_dev[vlan->tag].ifindex) {
> +		mlx5_vlan_esxi_delete(esxi, vlan_dev[vlan->tag].ifindex);
> +		vlan_dev[vlan->tag].ifindex = 0;
> +	}
> +}
> +
> +/**
> + * Acquire VLAN interface with specified tag for ESXi workaround.
> + *
> + * @param[in] dev
> + *   Ethernet device object, Netlink context provider.
> + * @param[in] vlan
> + *   Object representing the network device to acquire.
> + */
> +void mlx5_vlan_esxi_acquire(struct rte_eth_dev *dev,
> +			    struct mlx5_vf_vlan *vlan)
> +{
> +	struct mlx5_priv *priv = dev->data->dev_private;
> +	struct mlx5_vlan_esxi_context *esxi = priv->esxi_context;
> +	struct mlx5_vlan_dev *vlan_dev = &esxi->vlan_dev[0];
> +
> +	assert(!vlan->created);
> +	assert(priv->esxi_context);
> +	if (vlan->created || !esxi)
> +		return;
> +	if (vlan_dev[vlan->tag].refcnt == 0) {
> +		assert(!vlan_dev[vlan->tag].ifindex);
> +		vlan_dev[vlan->tag].ifindex =
> +			mlx5_vlan_esxi_create(esxi,
> +					      esxi->vf_ifindex,
> +					      vlan->tag);
> +	}
> +	if (vlan_dev[vlan->tag].ifindex) {
> +		vlan_dev[vlan->tag].refcnt++;
> +		vlan->created = 1;
> +	}
> +}
> +
> +/*
> + * Create per ethernet device VLAN ESXi workaround context  */ struct
> +mlx5_vlan_esxi_context * mlx5_vlan_esxi_init(struct rte_eth_dev *dev,
> +		    uint32_t ifindex)
> +{
> +	struct mlx5_priv *priv = dev->data->dev_private;
> +	struct mlx5_dev_config *config = &priv->config;
> +	struct mlx5_vlan_esxi_context *esxi;
> +
> +	/* Do not engage workaround over PF. */
> +	if (!config->vf)
> +		return NULL;
> +	/* Check whether there is virtual environment */
> +	if (rte_hypervisor_get() == RTE_HYPERVISOR_NONE)
> +		return NULL;
> +	esxi = rte_zmalloc(__func__, sizeof(*esxi), sizeof(uint32_t));
> +	if (!esxi) {
> +		DRV_LOG(WARNING,
> +			"Can not allocate memory"
> +			" for ESXi VLAN context");
> +		return NULL;
> +	}
> +	esxi->nl_socket = mlx5_nl_init(NETLINK_ROUTE);
> +	if (esxi->nl_socket < 0) {
> +		DRV_LOG(WARNING,
> +			"Can not create Netlink socket"
> +			" for ESXi VLAN context");
> +		rte_free(esxi);
> +		return NULL;
> +	}
> +	esxi->nl_sn = random();
> +	esxi->vf_ifindex = ifindex;
> +	esxi->dev = dev;
> +	/* Cleanup for existing VLAN devices. */
> +	return esxi;
> +}
> +
> +/*
> + * Destroy per ethernet device VLAN ESXi workaround context  */ void
> +mlx5_vlan_esxi_exit(struct mlx5_vlan_esxi_context *esxi) {
> +	unsigned int i;
> +
> +	/* Delete all remaining VLAN devices. */
> +	for (i = 0; i < RTE_DIM(esxi->vlan_dev); i++) {
> +		if (esxi->vlan_dev[i].ifindex)
> +			mlx5_vlan_esxi_delete(esxi, esxi-
> >vlan_dev[i].ifindex);
> +	}
> +	if (esxi->nl_socket >= 0)
> +		close(esxi->nl_socket);
> +	rte_free(esxi);
> +}
> --
> 1.8.3.1


  reply	other threads:[~2019-07-29 15:14 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-15 13:45 Viacheslav Ovsiienko
2019-07-29 15:14 ` Matan Azrad [this message]
2019-07-29 15:26 ` [dpdk-dev] [PATCH v2] " Viacheslav Ovsiienko
2019-07-30  5:05   ` Shahaf Shuler
2019-07-30  9:20   ` [dpdk-dev] [PATCH v3] net/mlx5: add workaround for " Viacheslav Ovsiienko
2019-07-31  6:14     ` Shahaf Shuler
2019-07-31  7:39     ` Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=AM0PR0502MB40194AFF71D9611472A2C363D2DD0@AM0PR0502MB4019.eurprd05.prod.outlook.com \
    --to=matan@mellanox.com \
    --cc=dev@dpdk.org \
    --cc=viacheslavo@mellanox.com \
    --cc=yskoh@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).