DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support
@ 2020-06-03 15:05 Ophir Munk
  2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 1/8] net/mlx5: rename mlx5 ibv shared struct Ophir Munk
                   ` (8 more replies)
  0 siblings, 9 replies; 17+ messages in thread
From: Ophir Munk @ 2020-06-03 15:05 UTC (permalink / raw)
  To: dev, Matan Azrad, Raslan Darawsheh; +Cc: Ophir Munk

This patch series is part of preparing mlx5 PMD to compile and run under
multiple OSs.

v1:
Initial release

Ophir Munk (8):
  net/mlx5: rename mlx5 ibv shared struct
  net/mlx5: add mlx5 Linux specific file with getter functions
  drivers: remove mlx5 protection domain dependency on ibv
  net/mlx5: remove attributes dependency on ibv and dv
  net/mlx5: remove umem field dependency on dv
  net/mlx5: refactor PCI probing under Linux
  net/mlx5: add mlx5 header file specific to Linux
  net/mlx5: remove ibv dependency in spawn struct

 drivers/common/mlx5/mlx5_common_mr.c |   24 +-
 drivers/common/mlx5/mlx5_common_mr.h |    6 +-
 drivers/net/mlx5/Makefile            |    2 +
 drivers/net/mlx5/linux/meson.build   |    8 +
 drivers/net/mlx5/linux/mlx5_os.c     | 1992 ++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/linux/mlx5_os.h     |   18 +
 drivers/net/mlx5/meson.build         |    5 +-
 drivers/net/mlx5/mlx5.c              | 1900 ++------------------------------
 drivers/net/mlx5/mlx5.h              |  100 +-
 drivers/net/mlx5/mlx5_ethdev.c       |   12 +-
 drivers/net/mlx5/mlx5_flow.c         |   20 +-
 drivers/net/mlx5/mlx5_flow_dv.c      |   28 +-
 drivers/net/mlx5/mlx5_mp.c           |    2 +-
 drivers/net/mlx5/mlx5_mr.c           |   10 +-
 drivers/net/mlx5/mlx5_rxq.c          |    6 +-
 drivers/net/mlx5/mlx5_txq.c          |   18 +-
 16 files changed, 2233 insertions(+), 1918 deletions(-)
 create mode 100644 drivers/net/mlx5/linux/meson.build
 create mode 100644 drivers/net/mlx5/linux/mlx5_os.c
 create mode 100644 drivers/net/mlx5/linux/mlx5_os.h

-- 
2.8.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH v1 1/8] net/mlx5: rename mlx5 ibv shared struct
  2020-06-03 15:05 [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support Ophir Munk
@ 2020-06-03 15:05 ` Ophir Munk
  2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 2/8] net/mlx5: add mlx5 Linux specific file with getter functions Ophir Munk
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 17+ messages in thread
From: Ophir Munk @ 2020-06-03 15:05 UTC (permalink / raw)
  To: dev, Matan Azrad, Raslan Darawsheh; +Cc: Ophir Munk

Replace all 'mlx5_ibv_shared' appearances with 'mlx5_dev_ctx_shared'.

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         | 58 ++++++++++++++++++++---------------------
 drivers/net/mlx5/mlx5.h         | 14 +++++-----
 drivers/net/mlx5/mlx5_ethdev.c  |  6 ++---
 drivers/net/mlx5/mlx5_flow.c    | 20 +++++++-------
 drivers/net/mlx5/mlx5_flow_dv.c | 26 +++++++++---------
 drivers/net/mlx5/mlx5_mr.c      | 10 +++----
 6 files changed, 67 insertions(+), 67 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 95a0f33..f942f92 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -199,7 +199,7 @@ struct mlx5_dev_spawn_data {
 	struct rte_pci_device *pci_dev; /**< Backend PCI device. */
 };
 
-static LIST_HEAD(, mlx5_ibv_shared) mlx5_ibv_list = LIST_HEAD_INITIALIZER();
+static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_ibv_list = LIST_HEAD_INITIALIZER();
 static pthread_mutex_t mlx5_ibv_list_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 static struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = {
@@ -445,10 +445,10 @@ mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id)
  * Initialize the shared aging list information per port.
  *
  * @param[in] sh
- *   Pointer to mlx5_ibv_shared object.
+ *   Pointer to mlx5_dev_ctx_shared object.
  */
 static void
-mlx5_flow_aging_init(struct mlx5_ibv_shared *sh)
+mlx5_flow_aging_init(struct mlx5_dev_ctx_shared *sh)
 {
 	uint32_t i;
 	struct mlx5_age_info *age_info;
@@ -466,10 +466,10 @@ mlx5_flow_aging_init(struct mlx5_ibv_shared *sh)
  * Initialize the counters management structure.
  *
  * @param[in] sh
- *   Pointer to mlx5_ibv_shared object to free
+ *   Pointer to mlx5_dev_ctx_shared object to free
  */
 static void
-mlx5_flow_counters_mng_init(struct mlx5_ibv_shared *sh)
+mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh)
 {
 	int i;
 
@@ -502,10 +502,10 @@ mlx5_flow_destroy_counter_stat_mem_mng(struct mlx5_counter_stats_mem_mng *mng)
  * Close and release all the resources of the counters management.
  *
  * @param[in] sh
- *   Pointer to mlx5_ibv_shared object to free.
+ *   Pointer to mlx5_dev_ctx_shared object to free.
  */
 static void
-mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
+mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh)
 {
 	struct mlx5_counter_stats_mem_mng *mng;
 	int i;
@@ -560,12 +560,12 @@ mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
  * Initialize the flow resources' indexed mempool.
  *
  * @param[in] sh
- *   Pointer to mlx5_ibv_shared object.
+ *   Pointer to mlx5_dev_ctx_shared object.
  * @param[in] sh
  *   Pointer to user dev config.
  */
 static void
-mlx5_flow_ipool_create(struct mlx5_ibv_shared *sh,
+mlx5_flow_ipool_create(struct mlx5_dev_ctx_shared *sh,
 		       const struct mlx5_dev_config *config __rte_unused)
 {
 	uint8_t i;
@@ -591,10 +591,10 @@ mlx5_flow_ipool_create(struct mlx5_ibv_shared *sh,
  * Release the flow resources' indexed mempool.
  *
  * @param[in] sh
- *   Pointer to mlx5_ibv_shared object.
+ *   Pointer to mlx5_dev_ctx_shared object.
  */
 static void
-mlx5_flow_ipool_destroy(struct mlx5_ibv_shared *sh)
+mlx5_flow_ipool_destroy(struct mlx5_dev_ctx_shared *sh)
 {
 	uint8_t i;
 
@@ -668,10 +668,10 @@ mlx5_restore_doorbell_mapping_env(int value)
  * between multiple ports of single IB device.
  *
  * @param sh
- *   Pointer to mlx5_ibv_shared object.
+ *   Pointer to mlx5_dev_ctx_shared object.
  */
 static void
-mlx5_dev_shared_handler_install(struct mlx5_ibv_shared *sh)
+mlx5_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 {
 	int ret;
 	int flags;
@@ -724,10 +724,10 @@ mlx5_dev_shared_handler_install(struct mlx5_ibv_shared *sh)
  * between multiple ports of single IB device.
  *
  * @param dev
- *   Pointer to mlx5_ibv_shared object.
+ *   Pointer to mlx5_dev_ctx_shared object.
  */
 static void
-mlx5_dev_shared_handler_uninstall(struct mlx5_ibv_shared *sh)
+mlx5_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh)
 {
 	if (sh->intr_handle.fd >= 0)
 		mlx5_intr_callback_unregister(&sh->intr_handle,
@@ -758,14 +758,14 @@ mlx5_dev_shared_handler_uninstall(struct mlx5_ibv_shared *sh)
  *   Pointer to device configuration structure.
  *
  * @return
- *   Pointer to mlx5_ibv_shared object on success,
+ *   Pointer to mlx5_dev_ctx_shared object on success,
  *   otherwise NULL and rte_errno is set.
  */
-static struct mlx5_ibv_shared *
+static struct mlx5_dev_ctx_shared *
 mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
 			const struct mlx5_dev_config *config)
 {
-	struct mlx5_ibv_shared *sh;
+	struct mlx5_dev_ctx_shared *sh;
 	int dbmap_env;
 	int err = 0;
 	uint32_t i;
@@ -787,7 +787,7 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
 	/* No device found, we have to create new shared context. */
 	MLX5_ASSERT(spawn->max_port);
 	sh = rte_zmalloc("ethdev shared ib context",
-			 sizeof(struct mlx5_ibv_shared) +
+			 sizeof(struct mlx5_dev_ctx_shared) +
 			 spawn->max_port *
 			 sizeof(struct mlx5_ibv_shared_port),
 			 RTE_CACHE_LINE_SIZE);
@@ -933,15 +933,15 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
  * all allocated resources and close handles.
  *
  * @param[in] sh
- *   Pointer to mlx5_ibv_shared object to free
+ *   Pointer to mlx5_dev_ctx_shared object to free
  */
 static void
-mlx5_free_shared_ibctx(struct mlx5_ibv_shared *sh)
+mlx5_free_shared_ibctx(struct mlx5_dev_ctx_shared *sh)
 {
 	pthread_mutex_lock(&mlx5_ibv_list_mutex);
 #ifdef RTE_LIBRTE_MLX5_DEBUG
 	/* Check the object presence in the list. */
-	struct mlx5_ibv_shared *lctx;
+	struct mlx5_dev_ctx_shared *lctx;
 
 	LIST_FOREACH(lctx, &mlx5_ibv_list, next)
 		if (lctx == sh)
@@ -997,7 +997,7 @@ mlx5_free_shared_ibctx(struct mlx5_ibv_shared *sh)
 static void
 mlx5_free_table_hash_list(struct mlx5_priv *priv)
 {
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_flow_tbl_data_entry *tbl_data;
 	union mlx5_flow_tbl_key table_key = {
 		{
@@ -1054,7 +1054,7 @@ mlx5_free_table_hash_list(struct mlx5_priv *priv)
 static int
 mlx5_alloc_table_hash_list(struct mlx5_priv *priv)
 {
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	char s[MLX5_HLIST_NAMESIZE];
 	int err = 0;
 
@@ -1139,7 +1139,7 @@ mlx5_alloc_table_hash_list(struct mlx5_priv *priv)
 static int
 mlx5_alloc_shared_dr(struct mlx5_priv *priv)
 {
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	char s[MLX5_HLIST_NAMESIZE];
 	int err = 0;
 
@@ -1249,7 +1249,7 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv)
 static void
 mlx5_free_shared_dr(struct mlx5_priv *priv)
 {
-	struct mlx5_ibv_shared *sh;
+	struct mlx5_dev_ctx_shared *sh;
 
 	if (!priv->dr_shared)
 		return;
@@ -2154,7 +2154,7 @@ static void
 mlx5_set_metadata_mask(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	uint32_t meta, mark, reg_c0;
 
 	reg_c0 = ~priv->vport_meta_mask;
@@ -2356,7 +2356,7 @@ static int
 mlx5_dev_check_sibling_config(struct mlx5_priv *priv,
 			      struct mlx5_dev_config *config)
 {
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_dev_config *sh_conf = NULL;
 	uint16_t port_id;
 
@@ -2413,7 +2413,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	       struct mlx5_dev_config config)
 {
 	const struct mlx5_switch_info *switch_info = &spawn->info;
-	struct mlx5_ibv_shared *sh = NULL;
+	struct mlx5_dev_ctx_shared *sh = NULL;
 	struct ibv_port_attr port_attr;
 	struct mlx5dv_context dv_attr = { .comp_mask = 0 };
 	struct rte_eth_dev *eth_dev = NULL;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 8e60897..4f2ca15 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -76,7 +76,7 @@ enum mlx5_reclaim_mem_mode {
 #define MLX5_MP_NAME "net_mlx5_mp"
 
 
-LIST_HEAD(mlx5_dev_list, mlx5_ibv_shared);
+LIST_HEAD(mlx5_dev_list, mlx5_dev_ctx_shared);
 
 /* Shared data between primary and secondary processes. */
 struct mlx5_shared_data {
@@ -488,8 +488,8 @@ struct mlx5_flow_id_pool {
  * Shared Infiniband device context for Master/Representors
  * which belong to same IB device with multiple IB ports.
  **/
-struct mlx5_ibv_shared {
-	LIST_ENTRY(mlx5_ibv_shared) next;
+struct mlx5_dev_ctx_shared {
+	LIST_ENTRY(mlx5_dev_ctx_shared) next;
 	uint32_t refcnt;
 	uint32_t devx:1; /* Opened with DV. */
 	uint32_t max_port; /* Maximal IB device port index. */
@@ -500,7 +500,7 @@ struct mlx5_ibv_shared {
 	char ibdev_name[IBV_SYSFS_NAME_MAX]; /* IB device name. */
 	char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for secondary */
 	struct ibv_device_attr_ex device_attr; /* Device properties. */
-	LIST_ENTRY(mlx5_ibv_shared) mem_event_cb;
+	LIST_ENTRY(mlx5_dev_ctx_shared) mem_event_cb;
 	/**< Called by memory event callback. */
 	struct mlx5_mr_share_cache share_cache;
 	/* Shared DV/DR flow data section. */
@@ -552,7 +552,7 @@ TAILQ_HEAD(mlx5_flow_meters, mlx5_flow_meter);
 
 struct mlx5_priv {
 	struct rte_eth_dev_data *dev_data;  /* Pointer to device data. */
-	struct mlx5_ibv_shared *sh; /* Shared IB device context. */
+	struct mlx5_dev_ctx_shared *sh; /* Shared device context. */
 	uint32_t ibv_port; /* IB device port number. */
 	struct rte_pci_device *pci_dev; /* Backend PCI device. */
 	struct rte_ether_addr mac[MLX5_MAX_MAC_ADDRESSES]; /* MAC addresses. */
@@ -817,9 +817,9 @@ int mlx5_ctrl_flow(struct rte_eth_dev *dev,
 struct rte_flow *mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev);
 int mlx5_flow_create_drop_queue(struct rte_eth_dev *dev);
 void mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev);
-void mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
+void mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
 				       uint64_t async_id, int status);
-void mlx5_set_query_alarm(struct mlx5_ibv_shared *sh);
+void mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh);
 void mlx5_flow_query_alarm(void *arg);
 uint32_t mlx5_counter_alloc(struct rte_eth_dev *dev);
 void mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt);
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index b837ce6..6919911 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1237,7 +1237,7 @@ mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
  *   Infiniband device shared context.
  */
 static void
-mlx5_dev_interrupt_device_fatal(struct mlx5_ibv_shared *sh)
+mlx5_dev_interrupt_device_fatal(struct mlx5_dev_ctx_shared *sh)
 {
 	uint32_t i;
 
@@ -1269,7 +1269,7 @@ mlx5_dev_interrupt_device_fatal(struct mlx5_ibv_shared *sh)
 void
 mlx5_dev_interrupt_handler(void *cb_arg)
 {
-	struct mlx5_ibv_shared *sh = cb_arg;
+	struct mlx5_dev_ctx_shared *sh = cb_arg;
 	struct ibv_async_event event;
 
 	/* Read all message from the IB device and acknowledge them. */
@@ -1426,7 +1426,7 @@ mlx5_dev_interrupt_handler_devx(void *cb_arg)
 	(void)cb_arg;
 	return;
 #else
-	struct mlx5_ibv_shared *sh = cb_arg;
+	struct mlx5_dev_ctx_shared *sh = cb_arg;
 	union {
 		struct mlx5dv_devx_async_cmd_hdr cmd_resp;
 		uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index f2c3cf9..e375b10 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -5794,13 +5794,13 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
  * Get number of all validate pools.
  *
  * @param[in] sh
- *   Pointer to mlx5_ibv_shared object.
+ *   Pointer to mlx5_dev_ctx_shared object.
  *
  * @return
  *   The number of all validate pools.
  */
 static uint32_t
-mlx5_get_all_valid_pool_count(struct mlx5_ibv_shared *sh)
+mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh)
 {
 	int i;
 	uint32_t pools_n = 0;
@@ -5815,10 +5815,10 @@ mlx5_get_all_valid_pool_count(struct mlx5_ibv_shared *sh)
  * the counter pools.
  *
  * @param[in] sh
- *   Pointer to mlx5_ibv_shared object.
+ *   Pointer to mlx5_dev_ctx_shared object.
  */
 void
-mlx5_set_query_alarm(struct mlx5_ibv_shared *sh)
+mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
 {
 	uint32_t pools_n, us;
 
@@ -5843,7 +5843,7 @@ mlx5_set_query_alarm(struct mlx5_ibv_shared *sh)
 void
 mlx5_flow_query_alarm(void *arg)
 {
-	struct mlx5_ibv_shared *sh = arg;
+	struct mlx5_dev_ctx_shared *sh = arg;
 	struct mlx5_devx_obj *dcs;
 	uint16_t offset;
 	int ret;
@@ -5928,12 +5928,12 @@ mlx5_flow_query_alarm(void *arg)
  * Check and callback event for new aged flow in the counter pool
  *
  * @param[in] sh
- *   Pointer to mlx5_ibv_shared object.
+ *   Pointer to mlx5_dev_ctx_shared object.
  * @param[in] pool
  *   Pointer to Current counter pool.
  */
 static void
-mlx5_flow_aging_check(struct mlx5_ibv_shared *sh,
+mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
 		   struct mlx5_flow_counter_pool *pool)
 {
 	struct mlx5_priv *priv;
@@ -5993,14 +5993,14 @@ mlx5_flow_aging_check(struct mlx5_ibv_shared *sh,
  * query. This function is probably called by the host thread.
  *
  * @param[in] sh
- *   The pointer to the shared IB device context.
+ *   The pointer to the shared device context.
  * @param[in] async_id
  *   The Devx async ID.
  * @param[in] status
  *   The status of the completion.
  */
 void
-mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
+mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
 				  uint64_t async_id, int status)
 {
 	struct mlx5_flow_counter_pool *pool =
@@ -6161,7 +6161,7 @@ mlx5_flow_dev_dump(struct rte_eth_dev *dev,
 		   struct rte_flow_error *error __rte_unused)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 
 	return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
 				       sh->tx_domain, file);
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index e481831..4dec57d 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -285,7 +285,7 @@ static void
 flow_dv_shared_lock(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 
 	if (sh->dv_refcnt > 1) {
 		int ret;
@@ -300,7 +300,7 @@ static void
 flow_dv_shared_unlock(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 
 	if (sh->dv_refcnt > 1) {
 		int ret;
@@ -2560,7 +2560,7 @@ flow_dv_encap_decap_resource_register
 			 struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_flow_dv_encap_decap_resource *cache_resource;
 	struct mlx5dv_dr_domain *domain;
 	uint32_t idx = 0;
@@ -2697,7 +2697,7 @@ flow_dv_port_id_action_resource_register
 			 struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_flow_dv_port_id_action_resource *cache_resource;
 	uint32_t idx = 0;
 
@@ -2772,7 +2772,7 @@ flow_dv_push_vlan_action_resource_register
 			struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_flow_dv_push_vlan_action_resource *cache_resource;
 	struct mlx5dv_dr_domain *domain;
 	uint32_t idx = 0;
@@ -3946,7 +3946,7 @@ flow_dv_modify_hdr_resource_register
 			 struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_flow_dv_modify_hdr_resource *cache_resource;
 	struct mlx5dv_dr_domain *ns;
 	uint32_t actions_len;
@@ -4104,7 +4104,7 @@ static struct mlx5_counter_stats_mem_mng *
 flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_devx_mkey_attr mkey_attr;
 	struct mlx5_counter_stats_mem_mng *mem_mng;
 	volatile struct flow_counter_stats *raw_data;
@@ -7206,7 +7206,7 @@ flow_dv_tbl_resource_get(struct rte_eth_dev *dev,
 			 struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_flow_tbl_resource *tbl;
 	union mlx5_flow_tbl_key table_key = {
 		{
@@ -7291,7 +7291,7 @@ flow_dv_tbl_resource_release(struct rte_eth_dev *dev,
 			     struct mlx5_flow_tbl_resource *tbl)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_flow_tbl_data_entry *tbl_data =
 		container_of(tbl, struct mlx5_flow_tbl_data_entry, tbl);
 
@@ -7336,7 +7336,7 @@ flow_dv_matcher_register(struct rte_eth_dev *dev,
 			 struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_flow_dv_matcher *cache_matcher;
 	struct mlx5dv_flow_matcher_attr dv_attr = {
 		.type = IBV_FLOW_ATTR_NORMAL,
@@ -7435,7 +7435,7 @@ flow_dv_tag_resource_register
 			 struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_flow_dv_tag_resource *cache_resource;
 	struct mlx5_hlist_entry *entry;
 
@@ -7499,7 +7499,7 @@ flow_dv_tag_release(struct rte_eth_dev *dev,
 		    uint32_t tag_idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_flow_dv_tag_resource *tag;
 
 	tag = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_TAG], tag_idx);
@@ -9147,7 +9147,7 @@ flow_dv_prepare_mtr_tables(struct rte_eth_dev *dev,
 			   uint32_t color_reg_c_idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_flow_dv_match_params mask = {
 		.size = sizeof(mask.buf),
 	};
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 2b4b3e2..c91d6a4 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -57,7 +57,7 @@ struct mr_update_mp_data {
  *   Size of freed memory.
  */
 static void
-mlx5_mr_mem_event_free_cb(struct mlx5_ibv_shared *sh,
+mlx5_mr_mem_event_free_cb(struct mlx5_dev_ctx_shared *sh,
 			  const void *addr, size_t len)
 {
 	const struct rte_memseg_list *msl;
@@ -145,7 +145,7 @@ void
 mlx5_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
 		     size_t len, void *arg __rte_unused)
 {
-	struct mlx5_ibv_shared *sh;
+	struct mlx5_dev_ctx_shared *sh;
 	struct mlx5_dev_list *dev_list = &mlx5_shared_data->mem_event_cb_list;
 
 	/* Must be called from the primary process. */
@@ -259,7 +259,7 @@ mlx5_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque,
 	struct mr_update_mp_data *data = opaque;
 	struct rte_eth_dev *dev = data->dev;
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_ibv_shared *sh = priv->sh;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_mr_ctrl *mr_ctrl = data->mr_ctrl;
 	struct mlx5_mr *mr = NULL;
 	uintptr_t addr = (uintptr_t)memhdr->addr;
@@ -339,7 +339,7 @@ mlx5_dma_map(struct rte_pci_device *pdev, void *addr,
 	struct rte_eth_dev *dev;
 	struct mlx5_mr *mr;
 	struct mlx5_priv *priv;
-	struct mlx5_ibv_shared *sh;
+	struct mlx5_dev_ctx_shared *sh;
 
 	dev = pci_dev_to_eth_dev(pdev);
 	if (!dev) {
@@ -386,7 +386,7 @@ mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr,
 {
 	struct rte_eth_dev *dev;
 	struct mlx5_priv *priv;
-	struct mlx5_ibv_shared *sh;
+	struct mlx5_dev_ctx_shared *sh;
 	struct mlx5_mr *mr;
 	struct mr_cache_entry entry;
 
-- 
2.8.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH v1 2/8] net/mlx5: add mlx5 Linux specific file with getter functions
  2020-06-03 15:05 [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support Ophir Munk
  2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 1/8] net/mlx5: rename mlx5 ibv shared struct Ophir Munk
@ 2020-06-03 15:05 ` Ophir Munk
  2020-06-08 11:20   ` Ferruh Yigit
  2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 3/8] drivers: remove mlx5 protection domain dependency on ibv Ophir Munk
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 17+ messages in thread
From: Ophir Munk @ 2020-06-03 15:05 UTC (permalink / raw)
  To: dev, Matan Azrad, Raslan Darawsheh; +Cc: Ophir Munk

'ctx' type (field in 'struct mlx5_ctx_shared') is changed from 'struct
ibv_context *' to 'void *'.  'ctx' members which are verbs dependent
(e.g. device_name) will be accessed through getter functions which are
added to a new file under Linux directory: linux/mlx5_os.c.

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/Makefile          |  1 +
 drivers/net/mlx5/linux/meson.build |  8 ++++
 drivers/net/mlx5/linux/mlx5_os.c   | 87 ++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/meson.build       |  5 ++-
 drivers/net/mlx5/mlx5.c            | 18 ++++----
 drivers/net/mlx5/mlx5.h            |  6 ++-
 drivers/net/mlx5/mlx5_mp.c         |  2 +-
 7 files changed, 115 insertions(+), 12 deletions(-)
 create mode 100644 drivers/net/mlx5/linux/meson.build
 create mode 100644 drivers/net/mlx5/linux/mlx5_os.c

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index c160e6b..115b66c 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -32,6 +32,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mp.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_utils.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += linux/mlx5_os.c
 
 # Basic CFLAGS.
 CFLAGS += -O3
diff --git a/drivers/net/mlx5/linux/meson.build b/drivers/net/mlx5/linux/meson.build
new file mode 100644
index 0000000..2ea0792
--- /dev/null
+++ b/drivers/net/mlx5/linux/meson.build
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2020 Mellanox Technologies, Ltd
+
+includes += include_directories('.')
+sources += files(
+	'mlx5_os.c',
+)
+
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
new file mode 100644
index 0000000..9443239
--- /dev/null
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2015 6WIND S.A.
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#include <stddef.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <net/if.h>
+#include <sys/mman.h>
+#include <linux/rtnetlink.h>
+#include <fcntl.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_malloc.h>
+#include <rte_ethdev_driver.h>
+#include <rte_ethdev_pci.h>
+#include <rte_pci.h>
+#include <rte_bus_pci.h>
+#include <rte_common.h>
+#include <rte_kvargs.h>
+#include <rte_rwlock.h>
+#include <rte_spinlock.h>
+#include <rte_string_fns.h>
+#include <rte_alarm.h>
+
+#include <mlx5_glue.h>
+#include <mlx5_devx_cmds.h>
+#include <mlx5_common.h>
+
+#include "mlx5_defs.h"
+#include "mlx5.h"
+#include "mlx5_utils.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_autoconf.h"
+#include "mlx5_mr.h"
+#include "mlx5_flow.h"
+#include "rte_pmd_mlx5.h"
+
+/**
+ * Get ibv device name. Given an ibv_context pointer - return a
+ * pointer to the corresponding device name.
+ *
+ * @param[in] ctx
+ *   Pointer to ibv context.
+ *
+ * @return
+ *   Pointer to device name if ctx is valid, NULL otherwise.
+ */
+const char *
+mlx5_os_get_ctx_device_name(void *ctx)
+{
+	if (!ctx)
+		return NULL;
+	return ((struct ibv_context *)ctx)->device->name;
+}
+
+/**
+ * Get ibv device path name. Given an ibv_context pointer - return a
+ * pointer to the corresponding device path name.
+ *
+ * @param[in] ctx
+ *   Pointer to ibv context.
+ *
+ * @return
+ *   Pointer to device path name if ctx is valid, NULL otherwise.
+ */
+const char *
+mlx5_os_get_ctx_device_path(void *ctx)
+{
+	if (!ctx)
+		return NULL;
+
+	return ((struct ibv_context *)ctx)->device->ibdev_path;
+}
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index 928663a..e71b2c5 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -2,9 +2,9 @@
 # Copyright 2018 6WIND S.A.
 # Copyright 2018 Mellanox Technologies, Ltd
 
-if not is_linux
+if not (is_linux or is_windows)
 	build = false
-	reason = 'only supported on Linux'
+	reason = 'only supported on Linux and Windows'
 	subdir_done()
 endif
 
@@ -52,3 +52,4 @@ if get_option('buildtype').contains('debug')
 else
 	cflags += [ '-UPEDANTIC' ]
 endif
+subdir(exec_env)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index f942f92..95a34d1 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -677,13 +677,14 @@ mlx5_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 	int flags;
 
 	sh->intr_handle.fd = -1;
-	flags = fcntl(sh->ctx->async_fd, F_GETFL);
-	ret = fcntl(sh->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
+	flags = fcntl(((struct ibv_context *)sh->ctx)->async_fd, F_GETFL);
+	ret = fcntl(((struct ibv_context *)sh->ctx)->async_fd,
+		    F_SETFL, flags | O_NONBLOCK);
 	if (ret) {
 		DRV_LOG(INFO, "failed to change file descriptor async event"
 			" queue");
 	} else {
-		sh->intr_handle.fd = sh->ctx->async_fd;
+		sh->intr_handle.fd = ((struct ibv_context *)sh->ctx)->async_fd;
 		sh->intr_handle.type = RTE_INTR_HANDLE_EXT;
 		if (rte_intr_callback_register(&sh->intr_handle,
 					mlx5_dev_interrupt_handler, sh)) {
@@ -831,10 +832,10 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
 	}
 	sh->refcnt = 1;
 	sh->max_port = spawn->max_port;
-	strncpy(sh->ibdev_name, sh->ctx->device->name,
-		sizeof(sh->ibdev_name));
-	strncpy(sh->ibdev_path, sh->ctx->device->ibdev_path,
-		sizeof(sh->ibdev_path));
+	strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->ctx),
+		sizeof(sh->ibdev_name) - 1);
+	strncpy(sh->ibdev_path, mlx5_os_get_ctx_device_path(sh->ctx),
+		sizeof(sh->ibdev_path) - 1);
 	/*
 	 * Setting port_id to max unallowed value means
 	 * there is no interrupt subhandler installed for
@@ -1515,7 +1516,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		return;
 	DRV_LOG(DEBUG, "port %u closing device \"%s\"",
 		dev->data->port_id,
-		((priv->sh->ctx != NULL) ? priv->sh->ctx->device->name : ""));
+		((priv->sh->ctx != NULL) ?
+		mlx5_os_get_ctx_device_name(priv->sh->ctx) : ""));
 	/*
 	 * If default mreg copy action is removed at the stop stage,
 	 * the search will return none and nothing will be done anymore.
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 4f2ca15..d020c10 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -493,7 +493,7 @@ struct mlx5_dev_ctx_shared {
 	uint32_t refcnt;
 	uint32_t devx:1; /* Opened with DV. */
 	uint32_t max_port; /* Maximal IB device port index. */
-	struct ibv_context *ctx; /* Verbs/DV context. */
+	void *ctx; /* Verbs/DV/DevX context. */
 	struct ibv_pd *pd; /* Protection Domain. */
 	uint32_t pdn; /* Protection Domain number. */
 	uint32_t tdn; /* Transport Domain number. */
@@ -853,4 +853,8 @@ struct mlx5_flow_meter *mlx5_flow_meter_attach
 					 struct rte_flow_error *error);
 void mlx5_flow_meter_detach(struct mlx5_flow_meter *fm);
 
+/* mlx5_os.c */
+const char *mlx5_os_get_ctx_device_name(void *ctx);
+const char *mlx5_os_get_ctx_device_path(void *ctx);
+
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_mp.c b/drivers/net/mlx5/mlx5_mp.c
index 7ad322d..a2b5c40 100644
--- a/drivers/net/mlx5/mlx5_mp.c
+++ b/drivers/net/mlx5/mlx5_mp.c
@@ -52,7 +52,7 @@ mlx5_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
 	case MLX5_MP_REQ_VERBS_CMD_FD:
 		mp_init_msg(&priv->mp_id, &mp_res, param->type);
 		mp_res.num_fds = 1;
-		mp_res.fds[0] = priv->sh->ctx->cmd_fd;
+		mp_res.fds[0] = ((struct ibv_context *)priv->sh->ctx)->cmd_fd;
 		res->result = 0;
 		ret = rte_mp_reply(&mp_res, peer);
 		break;
-- 
2.8.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH v1 3/8] drivers: remove mlx5 protection domain dependency on ibv
  2020-06-03 15:05 [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support Ophir Munk
  2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 1/8] net/mlx5: rename mlx5 ibv shared struct Ophir Munk
  2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 2/8] net/mlx5: add mlx5 Linux specific file with getter functions Ophir Munk
@ 2020-06-03 15:05 ` Ophir Munk
  2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 4/8] net/mlx5: remove attributes dependency on ibv and dv Ophir Munk
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 17+ messages in thread
From: Ophir Munk @ 2020-06-03 15:05 UTC (permalink / raw)
  To: dev, Matan Azrad, Raslan Darawsheh; +Cc: Ophir Munk

Replace 'struct ibv_pd *' with 'void *' in struct mlx5_ctx_shared and
all function calls in mlx5 PMD.

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/common/mlx5/mlx5_common_mr.c | 24 ++++++++++++------------
 drivers/common/mlx5/mlx5_common_mr.h |  6 +++---
 drivers/net/mlx5/mlx5.h              |  2 +-
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_common_mr.c b/drivers/common/mlx5/mlx5_common_mr.c
index 3b46446..e894523 100644
--- a/drivers/common/mlx5/mlx5_common_mr.c
+++ b/drivers/common/mlx5/mlx5_common_mr.c
@@ -521,7 +521,7 @@ mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl,
  * request fails.
  *
  * @param pd
- *   Pointer to ibv_pd of a device (net, regex, vdpa,...).
+ *   Pointer to pd of a device (net, regex, vdpa,...).
  * @param share_cache
  *   Pointer to a global shared MR cache.
  * @param[out] entry
@@ -536,7 +536,7 @@ mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl,
  *   Searched LKey on success, UINT32_MAX on failure and rte_errno is set.
  */
 static uint32_t
-mlx5_mr_create_secondary(struct ibv_pd *pd __rte_unused,
+mlx5_mr_create_secondary(void *pd __rte_unused,
 			 struct mlx5_mp_id *mp_id,
 			 struct mlx5_mr_share_cache *share_cache,
 			 struct mr_cache_entry *entry, uintptr_t addr,
@@ -569,7 +569,7 @@ mlx5_mr_create_secondary(struct ibv_pd *pd __rte_unused,
  * Register entire virtually contiguous memory chunk around the address.
  *
  * @param pd
- *   Pointer to ibv_pd of a device (net, regex, vdpa,...).
+ *   Pointer to pd of a device (net, regex, vdpa,...).
  * @param share_cache
  *   Pointer to a global shared MR cache.
  * @param[out] entry
@@ -584,7 +584,7 @@ mlx5_mr_create_secondary(struct ibv_pd *pd __rte_unused,
  *   Searched LKey on success, UINT32_MAX on failure and rte_errno is set.
  */
 uint32_t
-mlx5_mr_create_primary(struct ibv_pd *pd,
+mlx5_mr_create_primary(void *pd,
 		       struct mlx5_mr_share_cache *share_cache,
 		       struct mr_cache_entry *entry, uintptr_t addr,
 		       unsigned int mr_ext_memseg_en)
@@ -816,7 +816,7 @@ mlx5_mr_create_primary(struct ibv_pd *pd,
  * This can be called from primary and secondary process.
  *
  * @param pd
- *   Pointer to ibv_pd of a device (net, regex, vdpa,...).
+ *   Pointer to pd handle of a device (net, regex, vdpa,...).
  * @param share_cache
  *   Pointer to a global shared MR cache.
  * @param[out] entry
@@ -829,7 +829,7 @@ mlx5_mr_create_primary(struct ibv_pd *pd,
  *   Searched LKey on success, UINT32_MAX on failure and rte_errno is set.
  */
 static uint32_t
-mlx5_mr_create(struct ibv_pd *pd, struct mlx5_mp_id *mp_id,
+mlx5_mr_create(void *pd, struct mlx5_mp_id *mp_id,
 	       struct mlx5_mr_share_cache *share_cache,
 	       struct mr_cache_entry *entry, uintptr_t addr,
 	       unsigned int mr_ext_memseg_en)
@@ -856,7 +856,7 @@ mlx5_mr_create(struct ibv_pd *pd, struct mlx5_mp_id *mp_id,
  * Insert the found/created entry to local bottom-half cache table.
  *
  * @param pd
- *   Pointer to ibv_pd of a device (net, regex, vdpa,...).
+ *   Pointer to pd of a device (net, regex, vdpa,...).
  * @param share_cache
  *   Pointer to a global shared MR cache.
  * @param mr_ctrl
@@ -871,7 +871,7 @@ mlx5_mr_create(struct ibv_pd *pd, struct mlx5_mp_id *mp_id,
  *   Searched LKey on success, UINT32_MAX on no match.
  */
 static uint32_t
-mr_lookup_caches(struct ibv_pd *pd, struct mlx5_mp_id *mp_id,
+mr_lookup_caches(void *pd, struct mlx5_mp_id *mp_id,
 		 struct mlx5_mr_share_cache *share_cache,
 		 struct mlx5_mr_ctrl *mr_ctrl,
 		 struct mr_cache_entry *entry, uintptr_t addr,
@@ -920,7 +920,7 @@ mr_lookup_caches(struct ibv_pd *pd, struct mlx5_mp_id *mp_id,
  * per-queue local caches.
  *
  * @param pd
- *   Pointer to ibv_pd of a device (net, regex, vdpa,...).
+ *   Pointer to pd of a device (net, regex, vdpa,...).
  * @param share_cache
  *   Pointer to a global shared MR cache.
  * @param mr_ctrl
@@ -931,7 +931,7 @@ mr_lookup_caches(struct ibv_pd *pd, struct mlx5_mp_id *mp_id,
  * @return
  *   Searched LKey on success, UINT32_MAX on no match.
  */
-uint32_t mlx5_mr_addr2mr_bh(struct ibv_pd *pd, struct mlx5_mp_id *mp_id,
+uint32_t mlx5_mr_addr2mr_bh(void *pd, struct mlx5_mp_id *mp_id,
 			    struct mlx5_mr_share_cache *share_cache,
 			    struct mlx5_mr_ctrl *mr_ctrl,
 			    uintptr_t addr, unsigned int mr_ext_memseg_en)
@@ -1022,7 +1022,7 @@ mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl)
  * part of the DPDK memory segments.
  *
  * @param pd
- *   Pointer to ibv_pd of a device (net, regex, vdpa,...).
+ *   Pointer to pd of a device (net, regex, vdpa,...).
  * @param addr
  *   Starting virtual address of memory.
  * @param len
@@ -1034,7 +1034,7 @@ mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl)
  *   Pointer to MR structure on success, NULL otherwise.
  */
 struct mlx5_mr *
-mlx5_create_mr_ext(struct ibv_pd *pd, uintptr_t addr, size_t len, int socket_id)
+mlx5_create_mr_ext(void *pd, uintptr_t addr, size_t len, int socket_id)
 {
 	struct mlx5_mr *mr = NULL;
 
diff --git a/drivers/common/mlx5/mlx5_common_mr.h b/drivers/common/mlx5/mlx5_common_mr.h
index 4ea47cd..7add4da 100644
--- a/drivers/common/mlx5/mlx5_common_mr.h
+++ b/drivers/common/mlx5/mlx5_common_mr.h
@@ -122,7 +122,7 @@ void mlx5_mr_btree_free(struct mlx5_mr_btree *bt);
 __rte_internal
 void mlx5_mr_btree_dump(struct mlx5_mr_btree *bt __rte_unused);
 __rte_internal
-uint32_t mlx5_mr_addr2mr_bh(struct ibv_pd *pd, struct mlx5_mp_id *mp_id,
+uint32_t mlx5_mr_addr2mr_bh(void *pd, struct mlx5_mp_id *mp_id,
 			    struct mlx5_mr_share_cache *share_cache,
 			    struct mlx5_mr_ctrl *mr_ctrl,
 			    uintptr_t addr, unsigned int mr_ext_memseg_en);
@@ -148,11 +148,11 @@ mlx5_mr_lookup_list(struct mlx5_mr_share_cache *share_cache,
 		    struct mr_cache_entry *entry, uintptr_t addr);
 __rte_internal
 struct mlx5_mr *
-mlx5_create_mr_ext(struct ibv_pd *pd, uintptr_t addr, size_t len,
+mlx5_create_mr_ext(void *pd, uintptr_t addr, size_t len,
 		   int socket_id);
 __rte_internal
 uint32_t
-mlx5_mr_create_primary(struct ibv_pd *pd,
+mlx5_mr_create_primary(void *pd,
 		       struct mlx5_mr_share_cache *share_cache,
 		       struct mr_cache_entry *entry, uintptr_t addr,
 		       unsigned int mr_ext_memseg_en);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d020c10..30678aa 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -494,7 +494,7 @@ struct mlx5_dev_ctx_shared {
 	uint32_t devx:1; /* Opened with DV. */
 	uint32_t max_port; /* Maximal IB device port index. */
 	void *ctx; /* Verbs/DV/DevX context. */
-	struct ibv_pd *pd; /* Protection Domain. */
+	void *pd; /* Protection Domain. */
 	uint32_t pdn; /* Protection Domain number. */
 	uint32_t tdn; /* Transport Domain number. */
 	char ibdev_name[IBV_SYSFS_NAME_MAX]; /* IB device name. */
-- 
2.8.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH v1 4/8] net/mlx5: remove attributes dependency on ibv and dv
  2020-06-03 15:05 [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support Ophir Munk
                   ` (2 preceding siblings ...)
  2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 3/8] drivers: remove mlx5 protection domain dependency on ibv Ophir Munk
@ 2020-06-03 15:05 ` Ophir Munk
  2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 5/8] net/mlx5: remove umem field dependency on dv Ophir Munk
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 17+ messages in thread
From: Ophir Munk @ 2020-06-03 15:05 UTC (permalink / raw)
  To: dev, Matan Azrad, Raslan Darawsheh; +Cc: Ophir Munk

Define 'struct mlx5_dev_attr' which is ibv and dv independent. It
contains attribute that were originally contained in 'struct
ibv_device_attr_ex' and 'struct mlx5dv_context dv_attr'. Add a new API
mlx5_os_get_dev_attr() which fills in the new defined struct.

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 63 ++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5.c          | 12 ++++----
 drivers/net/mlx5/mlx5.h          | 27 +++++++++++++++--
 drivers/net/mlx5/mlx5_ethdev.c   |  6 ++--
 drivers/net/mlx5/mlx5_rxq.c      |  4 +--
 drivers/net/mlx5/mlx5_txq.c      | 18 ++++++------
 6 files changed, 108 insertions(+), 22 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 9443239..85dcf49 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -85,3 +85,66 @@ mlx5_os_get_ctx_device_path(void *ctx)
 
 	return ((struct ibv_context *)ctx)->device->ibdev_path;
 }
+
+/**
+ * Get mlx5 device attributes. The glue function query_device_ex() is called
+ * with out parameter of type 'struct ibv_device_attr_ex *'. Then fill in mlx5
+ * device attributes from the glue out parameter.
+ *
+ * @param dev
+ *   Pointer to ibv context.
+ *
+ * @param device_attr
+ *   Pointer to mlx5 device attributes.
+ *
+ * @return
+ *   0 on success, non zero error number otherwise
+ */
+int
+mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *device_attr)
+{
+	int err;
+	struct ibv_device_attr_ex attr_ex;
+	memset(device_attr, 0, sizeof(*device_attr));
+	err = mlx5_glue->query_device_ex(ctx, NULL, &attr_ex);
+	if (err)
+		return err;
+
+	device_attr->device_cap_flags_ex = attr_ex.device_cap_flags_ex;
+	device_attr->max_qp_wr = attr_ex.orig_attr.max_qp_wr;
+	device_attr->max_sge = attr_ex.orig_attr.max_sge;
+	device_attr->max_cq = attr_ex.orig_attr.max_cq;
+	device_attr->max_qp = attr_ex.orig_attr.max_qp;
+	device_attr->raw_packet_caps = attr_ex.raw_packet_caps;
+	device_attr->max_rwq_indirection_table_size =
+		attr_ex.rss_caps.max_rwq_indirection_table_size;
+	device_attr->max_tso = attr_ex.tso_caps.max_tso;
+	device_attr->tso_supported_qpts = attr_ex.tso_caps.supported_qpts;
+
+	struct mlx5dv_context dv_attr = { .comp_mask = 0 };
+	err = mlx5_glue->dv_query_device(ctx, &dv_attr);
+	if (err)
+		return err;
+
+	device_attr->flags = dv_attr.flags;
+	device_attr->comp_mask = dv_attr.comp_mask;
+#ifdef HAVE_IBV_MLX5_MOD_SWP
+	device_attr->sw_parsing_offloads =
+		dv_attr.sw_parsing_caps.sw_parsing_offloads;
+#endif
+	device_attr->min_single_stride_log_num_of_bytes =
+		dv_attr.striding_rq_caps.min_single_stride_log_num_of_bytes;
+	device_attr->max_single_stride_log_num_of_bytes =
+		dv_attr.striding_rq_caps.max_single_stride_log_num_of_bytes;
+	device_attr->min_single_wqe_log_num_of_strides =
+		dv_attr.striding_rq_caps.min_single_wqe_log_num_of_strides;
+	device_attr->max_single_wqe_log_num_of_strides =
+		dv_attr.striding_rq_caps.max_single_wqe_log_num_of_strides;
+	device_attr->stride_supported_qpts =
+		dv_attr.striding_rq_caps.supported_qpts;
+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+	device_attr->tunnel_offloads_caps = dv_attr.tunnel_offloads_caps;
+#endif
+
+	return err;
+}
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 95a34d1..0fa8742 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -825,9 +825,9 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
 			goto error;
 		DRV_LOG(DEBUG, "DevX is NOT supported");
 	}
-	err = mlx5_glue->query_device_ex(sh->ctx, NULL, &sh->device_attr);
+	err = mlx5_os_get_dev_attr(sh->ctx, &sh->device_attr);
 	if (err) {
-		DRV_LOG(DEBUG, "ibv_query_device_ex() failed");
+		DRV_LOG(DEBUG, "mlx5_os_get_dev_attr() failed");
 		goto error;
 	}
 	sh->refcnt = 1;
@@ -2799,7 +2799,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	}
 #endif
 	config.ind_table_max_size =
-		sh->device_attr.rss_caps.max_rwq_indirection_table_size;
+		sh->device_attr.max_rwq_indirection_table_size;
 	/*
 	 * Remove this check once DPDK supports larger/variable
 	 * indirection tables.
@@ -2828,11 +2828,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	} else if (config.hw_padding) {
 		DRV_LOG(DEBUG, "Rx end alignment padding is enabled");
 	}
-	config.tso = (sh->device_attr.tso_caps.max_tso > 0 &&
-		      (sh->device_attr.tso_caps.supported_qpts &
+	config.tso = (sh->device_attr.max_tso > 0 &&
+		      (sh->device_attr.tso_supported_qpts &
 		       (1 << IBV_QPT_RAW_PACKET)));
 	if (config.tso)
-		config.tso_max_payload_sz = sh->device_attr.tso_caps.max_tso;
+		config.tso_max_payload_sz = sh->device_attr.max_tso;
 	/*
 	 * MPW is disabled by default, while the Enhanced MPW is enabled
 	 * by default.
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 30678aa..478ebef 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -43,7 +43,6 @@
 #include "mlx5_utils.h"
 #include "mlx5_autoconf.h"
 
-
 enum mlx5_ipool_index {
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	MLX5_IPOOL_DECAP_ENCAP = 0, /* Pool for encap/decap resource. */
@@ -72,6 +71,29 @@ enum mlx5_reclaim_mem_mode {
 	MLX5_RCM_AGGR, /* Reclaim PMD and rdma-core level. */
 };
 
+/* Device attributes used in mlx5 PMD */
+struct mlx5_dev_attr {
+	uint64_t	device_cap_flags_ex;
+	int		max_qp_wr;
+	int		max_sge;
+	int		max_cq;
+	int		max_qp;
+	uint32_t	raw_packet_caps;
+	uint32_t	max_rwq_indirection_table_size;
+	uint32_t	max_tso;
+	uint32_t	tso_supported_qpts;
+	uint64_t	flags;
+	uint64_t	comp_mask;
+	uint32_t	sw_parsing_offloads;
+	uint32_t	min_single_stride_log_num_of_bytes;
+	uint32_t	max_single_stride_log_num_of_bytes;
+	uint32_t	min_single_wqe_log_num_of_strides;
+	uint32_t	max_single_wqe_log_num_of_strides;
+	uint32_t	stride_supported_qpts;
+	uint32_t	tunnel_offloads_caps;
+	char		fw_ver[64];
+};
+
 /** Key string for IPC. */
 #define MLX5_MP_NAME "net_mlx5_mp"
 
@@ -499,7 +521,7 @@ struct mlx5_dev_ctx_shared {
 	uint32_t tdn; /* Transport Domain number. */
 	char ibdev_name[IBV_SYSFS_NAME_MAX]; /* IB device name. */
 	char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for secondary */
-	struct ibv_device_attr_ex device_attr; /* Device properties. */
+	struct mlx5_dev_attr device_attr; /* Device properties. */
 	LIST_ENTRY(mlx5_dev_ctx_shared) mem_event_cb;
 	/**< Called by memory event callback. */
 	struct mlx5_mr_share_cache share_cache;
@@ -856,5 +878,6 @@ void mlx5_flow_meter_detach(struct mlx5_flow_meter *fm);
 /* mlx5_os.c */
 const char *mlx5_os_get_ctx_device_name(void *ctx);
 const char *mlx5_os_get_ctx_device_path(void *ctx);
+int mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *dev_attr);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 6919911..6b8b303 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -626,8 +626,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 	 * Since we need one CQ per QP, the limit is the minimum number
 	 * between the two values.
 	 */
-	max = RTE_MIN(priv->sh->device_attr.orig_attr.max_cq,
-		      priv->sh->device_attr.orig_attr.max_qp);
+	max = RTE_MIN(priv->sh->device_attr.max_cq,
+		      priv->sh->device_attr.max_qp);
 	/* max_rx_queues is uint16_t. */
 	max = RTE_MIN(max, (unsigned int)UINT16_MAX);
 	info->max_rx_queues = max;
@@ -736,7 +736,7 @@ mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock)
 int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct ibv_device_attr *attr = &priv->sh->device_attr.orig_attr;
+	struct mlx5_dev_attr *attr = &priv->sh->device_attr;
 	size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1;
 
 	if (fw_size < size)
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 0b0abe1..f018553 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1405,9 +1405,9 @@ mlx5_rxq_obj_new(struct rte_eth_dev *dev, uint16_t idx,
 		goto error;
 	}
 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
-		dev->data->port_id, priv->sh->device_attr.orig_attr.max_qp_wr);
+		dev->data->port_id, priv->sh->device_attr.max_qp_wr);
 	DRV_LOG(DEBUG, "port %u device_attr.max_sge is %d",
-		dev->data->port_id, priv->sh->device_attr.orig_attr.max_sge);
+		dev->data->port_id, priv->sh->device_attr.max_sge);
 	/* Allocate door-bell for types created with DevX. */
 	if (tmpl->type != MLX5_RXQ_OBJ_TYPE_IBV) {
 		struct mlx5_devx_dbr_page *dbr_page;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 2047a9a..f7b548f 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -645,9 +645,9 @@ mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx,
 		.cap = {
 			/* Max number of outstanding WRs. */
 			.max_send_wr =
-				((priv->sh->device_attr.orig_attr.max_qp_wr <
+				((priv->sh->device_attr.max_qp_wr <
 				  desc) ?
-				 priv->sh->device_attr.orig_attr.max_qp_wr :
+				 priv->sh->device_attr.max_qp_wr :
 				 desc),
 			/*
 			 * Max number of scatter/gather elements in a WR,
@@ -948,7 +948,7 @@ txq_calc_inline_max(struct mlx5_txq_ctrl *txq_ctrl)
 	struct mlx5_priv *priv = txq_ctrl->priv;
 	unsigned int wqe_size;
 
-	wqe_size = priv->sh->device_attr.orig_attr.max_qp_wr / desc;
+	wqe_size = priv->sh->device_attr.max_qp_wr / desc;
 	if (!wqe_size)
 		return 0;
 	/*
@@ -1203,7 +1203,7 @@ txq_adjust_params(struct mlx5_txq_ctrl *txq_ctrl)
 			" Tx queue size (%d)",
 			txq_ctrl->txq.inlen_mode, max_inline,
 			priv->dev_data->port_id,
-			priv->sh->device_attr.orig_attr.max_qp_wr);
+			priv->sh->device_attr.max_qp_wr);
 		goto error;
 	}
 	if (txq_ctrl->txq.inlen_send > max_inline &&
@@ -1215,7 +1215,7 @@ txq_adjust_params(struct mlx5_txq_ctrl *txq_ctrl)
 			" Tx queue size (%d)",
 			txq_ctrl->txq.inlen_send, max_inline,
 			priv->dev_data->port_id,
-			priv->sh->device_attr.orig_attr.max_qp_wr);
+			priv->sh->device_attr.max_qp_wr);
 		goto error;
 	}
 	if (txq_ctrl->txq.inlen_empw > max_inline &&
@@ -1227,7 +1227,7 @@ txq_adjust_params(struct mlx5_txq_ctrl *txq_ctrl)
 			" Tx queue size (%d)",
 			txq_ctrl->txq.inlen_empw, max_inline,
 			priv->dev_data->port_id,
-			priv->sh->device_attr.orig_attr.max_qp_wr);
+			priv->sh->device_attr.max_qp_wr);
 		goto error;
 	}
 	if (txq_ctrl->txq.tso_en && max_inline < MLX5_MAX_TSO_HEADER) {
@@ -1237,7 +1237,7 @@ txq_adjust_params(struct mlx5_txq_ctrl *txq_ctrl)
 			" Tx queue size (%d)",
 			MLX5_MAX_TSO_HEADER, max_inline,
 			priv->dev_data->port_id,
-			priv->sh->device_attr.orig_attr.max_qp_wr);
+			priv->sh->device_attr.max_qp_wr);
 		goto error;
 	}
 	if (txq_ctrl->txq.inlen_send > max_inline) {
@@ -1322,12 +1322,12 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	if (txq_adjust_params(tmpl))
 		goto error;
 	if (txq_calc_wqebb_cnt(tmpl) >
-	    priv->sh->device_attr.orig_attr.max_qp_wr) {
+	    priv->sh->device_attr.max_qp_wr) {
 		DRV_LOG(ERR,
 			"port %u Tx WQEBB count (%d) exceeds the limit (%d),"
 			" try smaller queue size",
 			dev->data->port_id, txq_calc_wqebb_cnt(tmpl),
-			priv->sh->device_attr.orig_attr.max_qp_wr);
+			priv->sh->device_attr.max_qp_wr);
 		rte_errno = ENOMEM;
 		goto error;
 	}
-- 
2.8.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH v1 5/8] net/mlx5: remove umem field dependency on dv
  2020-06-03 15:05 [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support Ophir Munk
                   ` (3 preceding siblings ...)
  2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 4/8] net/mlx5: remove attributes dependency on ibv and dv Ophir Munk
@ 2020-06-03 15:05 ` Ophir Munk
  2020-06-03 15:06 ` [dpdk-dev] [PATCH v1 6/8] net/mlx5: refactor PCI probing under Linux Ophir Munk
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 17+ messages in thread
From: Ophir Munk @ 2020-06-03 15:05 UTC (permalink / raw)
  To: dev, Matan Azrad, Raslan Darawsheh; +Cc: Ophir Munk

umem field is used in several structs. Its type 'struct mlx5dv_devx_umem
*' is changed to 'void *'. This change will allow non-Linux OS
compilations.

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 18 ++++++++++++++++++
 drivers/net/mlx5/mlx5.c          |  2 +-
 drivers/net/mlx5/mlx5.h          |  5 +++--
 drivers/net/mlx5/mlx5_flow_dv.c  |  2 +-
 drivers/net/mlx5/mlx5_rxq.c      |  2 +-
 5 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 85dcf49..7d60683 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -87,6 +87,24 @@ mlx5_os_get_ctx_device_path(void *ctx)
 }
 
 /**
+ * Get umem id. Given a pointer to umem object of type
+ * 'struct mlx5dv_devx_umem *' - return its id.
+ *
+ * @param[in] umem
+ *   Pointer to umem object.
+ *
+ * @return
+ *   The umem id if umem is valid, 0 otherwise.
+ */
+uint32_t
+mlx5_os_get_umem_id(void *umem)
+{
+	if (!umem)
+		return 0;
+	return ((struct mlx5dv_devx_umem *)umem)->umem_id;
+}
+
+/**
  * Get mlx5 device attributes. The glue function query_device_ex() is called
  * with out parameter of type 'struct ibv_device_attr_ex *'. Then fill in mlx5
  * device attributes from the glue out parameter.
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 0fa8742..4f7b4d3 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -2298,7 +2298,7 @@ mlx5_release_dbr(struct rte_eth_dev *dev, uint32_t umem_id, uint64_t offset)
 
 	LIST_FOREACH(page, &priv->dbrpgs, next)
 		/* Find the page this address belongs to. */
-		if (page->umem->umem_id == umem_id)
+		if (mlx5_os_get_umem_id(page->umem) == umem_id)
 			break;
 	if (!page)
 		return -EINVAL;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 478ebef..ec4ba87 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -384,7 +384,7 @@ struct mlx5_counter_stats_mem_mng {
 	LIST_ENTRY(mlx5_counter_stats_mem_mng) next;
 	struct mlx5_counter_stats_raw *raws;
 	struct mlx5_devx_obj *dm;
-	struct mlx5dv_devx_umem *umem;
+	void *umem;
 };
 
 /* Raw memory structure for the counter statistics values of a pool. */
@@ -490,7 +490,7 @@ struct mlx5_devx_dbr_page {
 	/* Door-bell records, must be first member in structure. */
 	uint8_t dbrs[MLX5_DBR_PAGE_SIZE];
 	LIST_ENTRY(mlx5_devx_dbr_page) next; /* Pointer to the next element. */
-	struct mlx5dv_devx_umem *umem;
+	void *umem;
 	uint32_t dbr_count; /* Number of door-bell records in use. */
 	/* 1 bit marks matching door-bell is in use. */
 	uint64_t dbr_bitmap[MLX5_DBR_BITMAP_SIZE];
@@ -878,6 +878,7 @@ void mlx5_flow_meter_detach(struct mlx5_flow_meter *fm);
 /* mlx5_os.c */
 const char *mlx5_os_get_ctx_device_name(void *ctx);
 const char *mlx5_os_get_ctx_device_path(void *ctx);
+uint32_t mlx5_os_get_umem_id(void *umem);
 int mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *dev_attr);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 4dec57d..81f5bd4 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -4130,7 +4130,7 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n)
 	}
 	mkey_attr.addr = (uintptr_t)mem;
 	mkey_attr.size = size;
-	mkey_attr.umem_id = mem_mng->umem->umem_id;
+	mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
 	mkey_attr.pd = sh->pdn;
 	mkey_attr.log_entity_size = 0;
 	mkey_attr.pg_access = 0;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index f018553..78046fd 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1417,7 +1417,7 @@ mlx5_rxq_obj_new(struct rte_eth_dev *dev, uint16_t idx,
 		if (dbr_offset < 0)
 			goto error;
 		rxq_ctrl->dbr_offset = dbr_offset;
-		rxq_ctrl->dbr_umem_id = dbr_page->umem->umem_id;
+		rxq_ctrl->dbr_umem_id = mlx5_os_get_umem_id(dbr_page->umem);
 		rxq_ctrl->dbr_umem_id_valid = 1;
 		rxq_data->rq_db = (uint32_t *)((uintptr_t)dbr_page->dbrs +
 					       (uintptr_t)rxq_ctrl->dbr_offset);
-- 
2.8.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH v1 6/8] net/mlx5: refactor PCI probing under Linux
  2020-06-03 15:05 [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support Ophir Munk
                   ` (4 preceding siblings ...)
  2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 5/8] net/mlx5: remove umem field dependency on dv Ophir Munk
@ 2020-06-03 15:06 ` Ophir Munk
  2020-06-03 15:06 ` [dpdk-dev] [PATCH v1 7/8] net/mlx5: add mlx5 header file specific to Linux Ophir Munk
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 17+ messages in thread
From: Ophir Munk @ 2020-06-03 15:06 UTC (permalink / raw)
  To: dev, Matan Azrad, Raslan Darawsheh; +Cc: Ophir Munk

Refactor PCI probing related code. Move Linux specific functions (as well
as verbs and dv related code) from mlx5.c file to linux/mlx5_os.c file.

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 1794 +++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5.c          | 1837 +-------------------------------------
 drivers/net/mlx5/mlx5.h          |   42 +-
 3 files changed, 1864 insertions(+), 1809 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 7d60683..d1476c2 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -39,6 +39,7 @@
 #include <mlx5_glue.h>
 #include <mlx5_devx_cmds.h>
 #include <mlx5_common.h>
+#include <mlx5_common_mp.h>
 
 #include "mlx5_defs.h"
 #include "mlx5.h"
@@ -49,6 +50,17 @@
 #include "mlx5_flow.h"
 #include "rte_pmd_mlx5.h"
 
+#define MLX5_TAGS_HLIST_ARRAY_SIZE 8192
+
+#ifndef HAVE_IBV_MLX5_MOD_MPW
+#define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2)
+#define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3)
+#endif
+
+#ifndef HAVE_IBV_MLX5_MOD_CQE_128B_COMP
+#define MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP (1 << 4)
+#endif
+
 /**
  * Get ibv device name. Given an ibv_context pointer - return a
  * pointer to the corresponding device name.
@@ -166,3 +178,1785 @@ mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *device_attr)
 
 	return err;
 }
+
+/**
+ * Verbs callback to allocate a memory. This function should allocate the space
+ * according to the size provided residing inside a huge page.
+ * Please note that all allocation must respect the alignment from libmlx5
+ * (i.e. currently sysconf(_SC_PAGESIZE)).
+ *
+ * @param[in] size
+ *   The size in bytes of the memory to allocate.
+ * @param[in] data
+ *   A pointer to the callback data.
+ *
+ * @return
+ *   Allocated buffer, NULL otherwise and rte_errno is set.
+ */
+static void *
+mlx5_alloc_verbs_buf(size_t size, void *data)
+{
+	struct mlx5_priv *priv = data;
+	void *ret;
+	size_t alignment = sysconf(_SC_PAGESIZE);
+	unsigned int socket = SOCKET_ID_ANY;
+
+	if (priv->verbs_alloc_ctx.type == MLX5_VERBS_ALLOC_TYPE_TX_QUEUE) {
+		const struct mlx5_txq_ctrl *ctrl = priv->verbs_alloc_ctx.obj;
+
+		socket = ctrl->socket;
+	} else if (priv->verbs_alloc_ctx.type ==
+		   MLX5_VERBS_ALLOC_TYPE_RX_QUEUE) {
+		const struct mlx5_rxq_ctrl *ctrl = priv->verbs_alloc_ctx.obj;
+
+		socket = ctrl->socket;
+	}
+	MLX5_ASSERT(data != NULL);
+	ret = rte_malloc_socket(__func__, size, alignment, socket);
+	if (!ret && size)
+		rte_errno = ENOMEM;
+	return ret;
+}
+
+/**
+ * Verbs callback to free a memory.
+ *
+ * @param[in] ptr
+ *   A pointer to the memory to free.
+ * @param[in] data
+ *   A pointer to the callback data.
+ */
+static void
+mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
+{
+	MLX5_ASSERT(data != NULL);
+	rte_free(ptr);
+}
+
+/**
+ * Initialize DR related data within private structure.
+ * Routine checks the reference counter and does actual
+ * resources creation/initialization only if counter is zero.
+ *
+ * @param[in] priv
+ *   Pointer to the private device data structure.
+ *
+ * @return
+ *   Zero on success, positive error code otherwise.
+ */
+static int
+mlx5_alloc_shared_dr(struct mlx5_priv *priv)
+{
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	char s[MLX5_HLIST_NAMESIZE];
+	int err = 0;
+
+	if (!sh->flow_tbls)
+		err = mlx5_alloc_table_hash_list(priv);
+	else
+		DRV_LOG(DEBUG, "sh->flow_tbls[%p] already created, reuse\n",
+			(void *)sh->flow_tbls);
+	if (err)
+		return err;
+	/* Create tags hash list table. */
+	snprintf(s, sizeof(s), "%s_tags", sh->ibdev_name);
+	sh->tag_table = mlx5_hlist_create(s, MLX5_TAGS_HLIST_ARRAY_SIZE);
+	if (!sh->tag_table) {
+		DRV_LOG(ERR, "tags with hash creation failed.\n");
+		err = ENOMEM;
+		goto error;
+	}
+#ifdef HAVE_MLX5DV_DR
+	void *domain;
+
+	if (sh->dv_refcnt) {
+		/* Shared DV/DR structures is already initialized. */
+		sh->dv_refcnt++;
+		priv->dr_shared = 1;
+		return 0;
+	}
+	/* Reference counter is zero, we should initialize structures. */
+	domain = mlx5_glue->dr_create_domain(sh->ctx,
+					     MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
+	if (!domain) {
+		DRV_LOG(ERR, "ingress mlx5dv_dr_create_domain failed");
+		err = errno;
+		goto error;
+	}
+	sh->rx_domain = domain;
+	domain = mlx5_glue->dr_create_domain(sh->ctx,
+					     MLX5DV_DR_DOMAIN_TYPE_NIC_TX);
+	if (!domain) {
+		DRV_LOG(ERR, "egress mlx5dv_dr_create_domain failed");
+		err = errno;
+		goto error;
+	}
+	pthread_mutex_init(&sh->dv_mutex, NULL);
+	sh->tx_domain = domain;
+#ifdef HAVE_MLX5DV_DR_ESWITCH
+	if (priv->config.dv_esw_en) {
+		domain  = mlx5_glue->dr_create_domain
+			(sh->ctx, MLX5DV_DR_DOMAIN_TYPE_FDB);
+		if (!domain) {
+			DRV_LOG(ERR, "FDB mlx5dv_dr_create_domain failed");
+			err = errno;
+			goto error;
+		}
+		sh->fdb_domain = domain;
+		sh->esw_drop_action = mlx5_glue->dr_create_flow_action_drop();
+	}
+#endif
+	if (priv->config.reclaim_mode == MLX5_RCM_AGGR) {
+		mlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1);
+		mlx5_glue->dr_reclaim_domain_memory(sh->tx_domain, 1);
+		if (sh->fdb_domain)
+			mlx5_glue->dr_reclaim_domain_memory(sh->fdb_domain, 1);
+	}
+	sh->pop_vlan_action = mlx5_glue->dr_create_flow_action_pop_vlan();
+#endif /* HAVE_MLX5DV_DR */
+	sh->dv_refcnt++;
+	priv->dr_shared = 1;
+	return 0;
+error:
+	/* Rollback the created objects. */
+	if (sh->rx_domain) {
+		mlx5_glue->dr_destroy_domain(sh->rx_domain);
+		sh->rx_domain = NULL;
+	}
+	if (sh->tx_domain) {
+		mlx5_glue->dr_destroy_domain(sh->tx_domain);
+		sh->tx_domain = NULL;
+	}
+	if (sh->fdb_domain) {
+		mlx5_glue->dr_destroy_domain(sh->fdb_domain);
+		sh->fdb_domain = NULL;
+	}
+	if (sh->esw_drop_action) {
+		mlx5_glue->destroy_flow_action(sh->esw_drop_action);
+		sh->esw_drop_action = NULL;
+	}
+	if (sh->pop_vlan_action) {
+		mlx5_glue->destroy_flow_action(sh->pop_vlan_action);
+		sh->pop_vlan_action = NULL;
+	}
+	if (sh->tag_table) {
+		/* tags should be destroyed with flow before. */
+		mlx5_hlist_destroy(sh->tag_table, NULL, NULL);
+		sh->tag_table = NULL;
+	}
+	mlx5_free_table_hash_list(priv);
+	return err;
+}
+
+/**
+ * Destroy DR related data within private structure.
+ *
+ * @param[in] priv
+ *   Pointer to the private device data structure.
+ */
+void
+mlx5_os_free_shared_dr(struct mlx5_priv *priv)
+{
+	struct mlx5_dev_ctx_shared *sh;
+
+	if (!priv->dr_shared)
+		return;
+	priv->dr_shared = 0;
+	sh = priv->sh;
+	MLX5_ASSERT(sh);
+#ifdef HAVE_MLX5DV_DR
+	MLX5_ASSERT(sh->dv_refcnt);
+	if (sh->dv_refcnt && --sh->dv_refcnt)
+		return;
+	if (sh->rx_domain) {
+		mlx5_glue->dr_destroy_domain(sh->rx_domain);
+		sh->rx_domain = NULL;
+	}
+	if (sh->tx_domain) {
+		mlx5_glue->dr_destroy_domain(sh->tx_domain);
+		sh->tx_domain = NULL;
+	}
+#ifdef HAVE_MLX5DV_DR_ESWITCH
+	if (sh->fdb_domain) {
+		mlx5_glue->dr_destroy_domain(sh->fdb_domain);
+		sh->fdb_domain = NULL;
+	}
+	if (sh->esw_drop_action) {
+		mlx5_glue->destroy_flow_action(sh->esw_drop_action);
+		sh->esw_drop_action = NULL;
+	}
+#endif
+	if (sh->pop_vlan_action) {
+		mlx5_glue->destroy_flow_action(sh->pop_vlan_action);
+		sh->pop_vlan_action = NULL;
+	}
+	pthread_mutex_destroy(&sh->dv_mutex);
+#endif /* HAVE_MLX5DV_DR */
+	if (sh->tag_table) {
+		/* tags should be destroyed with flow before. */
+		mlx5_hlist_destroy(sh->tag_table, NULL, NULL);
+		sh->tag_table = NULL;
+	}
+	mlx5_free_table_hash_list(priv);
+}
+
+/**
+ * Spawn an Ethernet device from Verbs information.
+ *
+ * @param dpdk_dev
+ *   Backing DPDK device.
+ * @param spawn
+ *   Verbs device parameters (name, port, switch_info) to spawn.
+ * @param config
+ *   Device configuration parameters.
+ *
+ * @return
+ *   A valid Ethernet device object on success, NULL otherwise and rte_errno
+ *   is set. The following errors are defined:
+ *
+ *   EBUSY: device is not supposed to be spawned.
+ *   EEXIST: device is already spawned
+ */
+static struct rte_eth_dev *
+mlx5_dev_spawn(struct rte_device *dpdk_dev,
+	       struct mlx5_dev_spawn_data *spawn,
+	       struct mlx5_dev_config config)
+{
+	const struct mlx5_switch_info *switch_info = &spawn->info;
+	struct mlx5_dev_ctx_shared *sh = NULL;
+	struct ibv_port_attr port_attr;
+	struct mlx5dv_context dv_attr = { .comp_mask = 0 };
+	struct rte_eth_dev *eth_dev = NULL;
+	struct mlx5_priv *priv = NULL;
+	int err = 0;
+	unsigned int hw_padding = 0;
+	unsigned int mps;
+	unsigned int cqe_comp;
+	unsigned int cqe_pad = 0;
+	unsigned int tunnel_en = 0;
+	unsigned int mpls_en = 0;
+	unsigned int swp = 0;
+	unsigned int mprq = 0;
+	unsigned int mprq_min_stride_size_n = 0;
+	unsigned int mprq_max_stride_size_n = 0;
+	unsigned int mprq_min_stride_num_n = 0;
+	unsigned int mprq_max_stride_num_n = 0;
+	struct rte_ether_addr mac;
+	char name[RTE_ETH_NAME_MAX_LEN];
+	int own_domain_id = 0;
+	uint16_t port_id;
+	unsigned int i;
+#ifdef HAVE_MLX5DV_DR_DEVX_PORT
+	struct mlx5dv_devx_port devx_port = { .comp_mask = 0 };
+#endif
+
+	/* Determine if this port representor is supposed to be spawned. */
+	if (switch_info->representor && dpdk_dev->devargs) {
+		struct rte_eth_devargs eth_da;
+
+		err = rte_eth_devargs_parse(dpdk_dev->devargs->args, &eth_da);
+		if (err) {
+			rte_errno = -err;
+			DRV_LOG(ERR, "failed to process device arguments: %s",
+				strerror(rte_errno));
+			return NULL;
+		}
+		for (i = 0; i < eth_da.nb_representor_ports; ++i)
+			if (eth_da.representor_ports[i] ==
+			    (uint16_t)switch_info->port_name)
+				break;
+		if (i == eth_da.nb_representor_ports) {
+			rte_errno = EBUSY;
+			return NULL;
+		}
+	}
+	/* Build device name. */
+	if (spawn->pf_bond <  0) {
+		/* Single device. */
+		if (!switch_info->representor)
+			strlcpy(name, dpdk_dev->name, sizeof(name));
+		else
+			snprintf(name, sizeof(name), "%s_representor_%u",
+				 dpdk_dev->name, switch_info->port_name);
+	} else {
+		/* Bonding device. */
+		if (!switch_info->representor)
+			snprintf(name, sizeof(name), "%s_%s",
+				 dpdk_dev->name, spawn->ibv_dev->name);
+		else
+			snprintf(name, sizeof(name), "%s_%s_representor_%u",
+				 dpdk_dev->name, spawn->ibv_dev->name,
+				 switch_info->port_name);
+	}
+	/* check if the device is already spawned */
+	if (rte_eth_dev_get_port_by_name(name, &port_id) == 0) {
+		rte_errno = EEXIST;
+		return NULL;
+	}
+	DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name);
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		struct mlx5_mp_id mp_id;
+
+		eth_dev = rte_eth_dev_attach_secondary(name);
+		if (eth_dev == NULL) {
+			DRV_LOG(ERR, "can not attach rte ethdev");
+			rte_errno = ENOMEM;
+			return NULL;
+		}
+		eth_dev->device = dpdk_dev;
+		eth_dev->dev_ops = &mlx5_dev_sec_ops;
+		err = mlx5_proc_priv_init(eth_dev);
+		if (err)
+			return NULL;
+		mp_id.port_id = eth_dev->data->port_id;
+		strlcpy(mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN);
+		/* Receive command fd from primary process */
+		err = mlx5_mp_req_verbs_cmd_fd(&mp_id);
+		if (err < 0)
+			goto err_secondary;
+		/* Remap UAR for Tx queues. */
+		err = mlx5_tx_uar_init_secondary(eth_dev, err);
+		if (err)
+			goto err_secondary;
+		/*
+		 * Ethdev pointer is still required as input since
+		 * the primary device is not accessible from the
+		 * secondary process.
+		 */
+		eth_dev->rx_pkt_burst = mlx5_select_rx_function(eth_dev);
+		eth_dev->tx_pkt_burst = mlx5_select_tx_function(eth_dev);
+		return eth_dev;
+err_secondary:
+		mlx5_dev_close(eth_dev);
+		return NULL;
+	}
+	/*
+	 * Some parameters ("tx_db_nc" in particularly) are needed in
+	 * advance to create dv/verbs device context. We proceed the
+	 * devargs here to get ones, and later proceed devargs again
+	 * to override some hardware settings.
+	 */
+	err = mlx5_args(&config, dpdk_dev->devargs);
+	if (err) {
+		err = rte_errno;
+		DRV_LOG(ERR, "failed to process device arguments: %s",
+			strerror(rte_errno));
+		goto error;
+	}
+	sh = mlx5_alloc_shared_ibctx(spawn, &config);
+	if (!sh)
+		return NULL;
+	config.devx = sh->devx;
+#ifdef HAVE_MLX5DV_DR_ACTION_DEST_DEVX_TIR
+	config.dest_tir = 1;
+#endif
+#ifdef HAVE_IBV_MLX5_MOD_SWP
+	dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_SWP;
+#endif
+	/*
+	 * Multi-packet send is supported by ConnectX-4 Lx PF as well
+	 * as all ConnectX-5 devices.
+	 */
+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+	dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS;
+#endif
+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
+	dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_STRIDING_RQ;
+#endif
+	mlx5_glue->dv_query_device(sh->ctx, &dv_attr);
+	if (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) {
+		if (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) {
+			DRV_LOG(DEBUG, "enhanced MPW is supported");
+			mps = MLX5_MPW_ENHANCED;
+		} else {
+			DRV_LOG(DEBUG, "MPW is supported");
+			mps = MLX5_MPW;
+		}
+	} else {
+		DRV_LOG(DEBUG, "MPW isn't supported");
+		mps = MLX5_MPW_DISABLED;
+	}
+#ifdef HAVE_IBV_MLX5_MOD_SWP
+	if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_SWP)
+		swp = dv_attr.sw_parsing_caps.sw_parsing_offloads;
+	DRV_LOG(DEBUG, "SWP support: %u", swp);
+#endif
+	config.swp = !!swp;
+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
+	if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_STRIDING_RQ) {
+		struct mlx5dv_striding_rq_caps mprq_caps =
+			dv_attr.striding_rq_caps;
+
+		DRV_LOG(DEBUG, "\tmin_single_stride_log_num_of_bytes: %d",
+			mprq_caps.min_single_stride_log_num_of_bytes);
+		DRV_LOG(DEBUG, "\tmax_single_stride_log_num_of_bytes: %d",
+			mprq_caps.max_single_stride_log_num_of_bytes);
+		DRV_LOG(DEBUG, "\tmin_single_wqe_log_num_of_strides: %d",
+			mprq_caps.min_single_wqe_log_num_of_strides);
+		DRV_LOG(DEBUG, "\tmax_single_wqe_log_num_of_strides: %d",
+			mprq_caps.max_single_wqe_log_num_of_strides);
+		DRV_LOG(DEBUG, "\tsupported_qpts: %d",
+			mprq_caps.supported_qpts);
+		DRV_LOG(DEBUG, "device supports Multi-Packet RQ");
+		mprq = 1;
+		mprq_min_stride_size_n =
+			mprq_caps.min_single_stride_log_num_of_bytes;
+		mprq_max_stride_size_n =
+			mprq_caps.max_single_stride_log_num_of_bytes;
+		mprq_min_stride_num_n =
+			mprq_caps.min_single_wqe_log_num_of_strides;
+		mprq_max_stride_num_n =
+			mprq_caps.max_single_wqe_log_num_of_strides;
+	}
+#endif
+	if (RTE_CACHE_LINE_SIZE == 128 &&
+	    !(dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP))
+		cqe_comp = 0;
+	else
+		cqe_comp = 1;
+	config.cqe_comp = cqe_comp;
+#ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
+	/* Whether device supports 128B Rx CQE padding. */
+	cqe_pad = RTE_CACHE_LINE_SIZE == 128 &&
+		  (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD);
+#endif
+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+	if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) {
+		tunnel_en = ((dv_attr.tunnel_offloads_caps &
+			      MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_VXLAN) &&
+			     (dv_attr.tunnel_offloads_caps &
+			      MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GRE) &&
+			     (dv_attr.tunnel_offloads_caps &
+			      MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GENEVE));
+	}
+	DRV_LOG(DEBUG, "tunnel offloading is %ssupported",
+		tunnel_en ? "" : "not ");
+#else
+	DRV_LOG(WARNING,
+		"tunnel offloading disabled due to old OFED/rdma-core version");
+#endif
+	config.tunnel_en = tunnel_en;
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+	mpls_en = ((dv_attr.tunnel_offloads_caps &
+		    MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_GRE) &&
+		   (dv_attr.tunnel_offloads_caps &
+		    MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_UDP));
+	DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is %ssupported",
+		mpls_en ? "" : "not ");
+#else
+	DRV_LOG(WARNING, "MPLS over GRE/UDP tunnel offloading disabled due to"
+		" old OFED/rdma-core version or firmware configuration");
+#endif
+	config.mpls_en = mpls_en;
+	/* Check port status. */
+	err = mlx5_glue->query_port(sh->ctx, spawn->ibv_port, &port_attr);
+	if (err) {
+		DRV_LOG(ERR, "port query failed: %s", strerror(err));
+		goto error;
+	}
+	if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
+		DRV_LOG(ERR, "port is not configured in Ethernet mode");
+		err = EINVAL;
+		goto error;
+	}
+	if (port_attr.state != IBV_PORT_ACTIVE)
+		DRV_LOG(DEBUG, "port is not active: \"%s\" (%d)",
+			mlx5_glue->port_state_str(port_attr.state),
+			port_attr.state);
+	/* Allocate private eth device data. */
+	priv = rte_zmalloc("ethdev private structure",
+			   sizeof(*priv),
+			   RTE_CACHE_LINE_SIZE);
+	if (priv == NULL) {
+		DRV_LOG(ERR, "priv allocation failure");
+		err = ENOMEM;
+		goto error;
+	}
+	priv->sh = sh;
+	priv->ibv_port = spawn->ibv_port;
+	priv->pci_dev = spawn->pci_dev;
+	priv->mtu = RTE_ETHER_MTU;
+	priv->mp_id.port_id = port_id;
+	strlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN);
+#ifndef RTE_ARCH_64
+	/* Initialize UAR access locks for 32bit implementations. */
+	rte_spinlock_init(&priv->uar_lock_cq);
+	for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++)
+		rte_spinlock_init(&priv->uar_lock[i]);
+#endif
+	/* Some internal functions rely on Netlink sockets, open them now. */
+	priv->nl_socket_rdma = mlx5_nl_init(NETLINK_RDMA);
+	priv->nl_socket_route =	mlx5_nl_init(NETLINK_ROUTE);
+	priv->representor = !!switch_info->representor;
+	priv->master = !!switch_info->master;
+	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
+	priv->vport_meta_tag = 0;
+	priv->vport_meta_mask = 0;
+	priv->pf_bond = spawn->pf_bond;
+#ifdef HAVE_MLX5DV_DR_DEVX_PORT
+	/*
+	 * The DevX port query API is implemented. E-Switch may use
+	 * either vport or reg_c[0] metadata register to match on
+	 * vport index. The engaged part of metadata register is
+	 * defined by mask.
+	 */
+	if (switch_info->representor || switch_info->master) {
+		devx_port.comp_mask = MLX5DV_DEVX_PORT_VPORT |
+				      MLX5DV_DEVX_PORT_MATCH_REG_C_0;
+		err = mlx5_glue->devx_port_query(sh->ctx, spawn->ibv_port,
+						 &devx_port);
+		if (err) {
+			DRV_LOG(WARNING,
+				"can't query devx port %d on device %s",
+				spawn->ibv_port, spawn->ibv_dev->name);
+			devx_port.comp_mask = 0;
+		}
+	}
+	if (devx_port.comp_mask & MLX5DV_DEVX_PORT_MATCH_REG_C_0) {
+		priv->vport_meta_tag = devx_port.reg_c_0.value;
+		priv->vport_meta_mask = devx_port.reg_c_0.mask;
+		if (!priv->vport_meta_mask) {
+			DRV_LOG(ERR, "vport zero mask for port %d"
+				     " on bonding device %s",
+				     spawn->ibv_port, spawn->ibv_dev->name);
+			err = ENOTSUP;
+			goto error;
+		}
+		if (priv->vport_meta_tag & ~priv->vport_meta_mask) {
+			DRV_LOG(ERR, "invalid vport tag for port %d"
+				     " on bonding device %s",
+				     spawn->ibv_port, spawn->ibv_dev->name);
+			err = ENOTSUP;
+			goto error;
+		}
+	}
+	if (devx_port.comp_mask & MLX5DV_DEVX_PORT_VPORT) {
+		priv->vport_id = devx_port.vport_num;
+	} else if (spawn->pf_bond >= 0) {
+		DRV_LOG(ERR, "can't deduce vport index for port %d"
+			     " on bonding device %s",
+			     spawn->ibv_port, spawn->ibv_dev->name);
+		err = ENOTSUP;
+		goto error;
+	} else {
+		/* Suppose vport index in compatible way. */
+		priv->vport_id = switch_info->representor ?
+				 switch_info->port_name + 1 : -1;
+	}
+#else
+	/*
+	 * Kernel/rdma_core support single E-Switch per PF configurations
+	 * only and vport_id field contains the vport index for
+	 * associated VF, which is deduced from representor port name.
+	 * For example, let's have the IB device port 10, it has
+	 * attached network device eth0, which has port name attribute
+	 * pf0vf2, we can deduce the VF number as 2, and set vport index
+	 * as 3 (2+1). This assigning schema should be changed if the
+	 * multiple E-Switch instances per PF configurations or/and PCI
+	 * subfunctions are added.
+	 */
+	priv->vport_id = switch_info->representor ?
+			 switch_info->port_name + 1 : -1;
+#endif
+	/* representor_id field keeps the unmodified VF index. */
+	priv->representor_id = switch_info->representor ?
+			       switch_info->port_name : -1;
+	/*
+	 * Look for sibling devices in order to reuse their switch domain
+	 * if any, otherwise allocate one.
+	 */
+	MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
+		const struct mlx5_priv *opriv =
+			rte_eth_devices[port_id].data->dev_private;
+
+		if (!opriv ||
+		    opriv->sh != priv->sh ||
+			opriv->domain_id ==
+			RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
+			continue;
+		priv->domain_id = opriv->domain_id;
+		break;
+	}
+	if (priv->domain_id == RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) {
+		err = rte_eth_switch_domain_alloc(&priv->domain_id);
+		if (err) {
+			err = rte_errno;
+			DRV_LOG(ERR, "unable to allocate switch domain: %s",
+				strerror(rte_errno));
+			goto error;
+		}
+		own_domain_id = 1;
+	}
+	/* Override some values set by hardware configuration. */
+	mlx5_args(&config, dpdk_dev->devargs);
+	err = mlx5_dev_check_sibling_config(priv, &config);
+	if (err)
+		goto error;
+	config.hw_csum = !!(sh->device_attr.device_cap_flags_ex &
+			    IBV_DEVICE_RAW_IP_CSUM);
+	DRV_LOG(DEBUG, "checksum offloading is %ssupported",
+		(config.hw_csum ? "" : "not "));
+#if !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) && \
+	!defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+	DRV_LOG(DEBUG, "counters are not supported");
+#endif
+#if !defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_MLX5DV_DR)
+	if (config.dv_flow_en) {
+		DRV_LOG(WARNING, "DV flow is not supported");
+		config.dv_flow_en = 0;
+	}
+#endif
+	config.ind_table_max_size =
+		sh->device_attr.max_rwq_indirection_table_size;
+	/*
+	 * Remove this check once DPDK supports larger/variable
+	 * indirection tables.
+	 */
+	if (config.ind_table_max_size > (unsigned int)ETH_RSS_RETA_SIZE_512)
+		config.ind_table_max_size = ETH_RSS_RETA_SIZE_512;
+	DRV_LOG(DEBUG, "maximum Rx indirection table size is %u",
+		config.ind_table_max_size);
+	config.hw_vlan_strip = !!(sh->device_attr.raw_packet_caps &
+				  IBV_RAW_PACKET_CAP_CVLAN_STRIPPING);
+	DRV_LOG(DEBUG, "VLAN stripping is %ssupported",
+		(config.hw_vlan_strip ? "" : "not "));
+	config.hw_fcs_strip = !!(sh->device_attr.raw_packet_caps &
+				 IBV_RAW_PACKET_CAP_SCATTER_FCS);
+	DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported",
+		(config.hw_fcs_strip ? "" : "not "));
+#if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
+	hw_padding = !!sh->device_attr.rx_pad_end_addr_align;
+#elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
+	hw_padding = !!(sh->device_attr.device_cap_flags_ex &
+			IBV_DEVICE_PCI_WRITE_END_PADDING);
+#endif
+	if (config.hw_padding && !hw_padding) {
+		DRV_LOG(DEBUG, "Rx end alignment padding isn't supported");
+		config.hw_padding = 0;
+	} else if (config.hw_padding) {
+		DRV_LOG(DEBUG, "Rx end alignment padding is enabled");
+	}
+	config.tso = (sh->device_attr.max_tso > 0 &&
+		      (sh->device_attr.tso_supported_qpts &
+		       (1 << IBV_QPT_RAW_PACKET)));
+	if (config.tso)
+		config.tso_max_payload_sz = sh->device_attr.max_tso;
+	/*
+	 * MPW is disabled by default, while the Enhanced MPW is enabled
+	 * by default.
+	 */
+	if (config.mps == MLX5_ARG_UNSET)
+		config.mps = (mps == MLX5_MPW_ENHANCED) ? MLX5_MPW_ENHANCED :
+							  MLX5_MPW_DISABLED;
+	else
+		config.mps = config.mps ? mps : MLX5_MPW_DISABLED;
+	DRV_LOG(INFO, "%sMPS is %s",
+		config.mps == MLX5_MPW_ENHANCED ? "enhanced " :
+		config.mps == MLX5_MPW ? "legacy " : "",
+		config.mps != MLX5_MPW_DISABLED ? "enabled" : "disabled");
+	if (config.cqe_comp && !cqe_comp) {
+		DRV_LOG(WARNING, "Rx CQE compression isn't supported");
+		config.cqe_comp = 0;
+	}
+	if (config.cqe_pad && !cqe_pad) {
+		DRV_LOG(WARNING, "Rx CQE padding isn't supported");
+		config.cqe_pad = 0;
+	} else if (config.cqe_pad) {
+		DRV_LOG(INFO, "Rx CQE padding is enabled");
+	}
+	if (config.devx) {
+		priv->counter_fallback = 0;
+		err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config.hca_attr);
+		if (err) {
+			err = -err;
+			goto error;
+		}
+		if (!config.hca_attr.flow_counters_dump)
+			priv->counter_fallback = 1;
+#ifndef HAVE_IBV_DEVX_ASYNC
+		priv->counter_fallback = 1;
+#endif
+		if (priv->counter_fallback)
+			DRV_LOG(INFO, "Use fall-back DV counter management");
+		/* Check for LRO support. */
+		if (config.dest_tir && config.hca_attr.lro_cap &&
+		    config.dv_flow_en) {
+			/* TBD check tunnel lro caps. */
+			config.lro.supported = config.hca_attr.lro_cap;
+			DRV_LOG(DEBUG, "Device supports LRO");
+			/*
+			 * If LRO timeout is not configured by application,
+			 * use the minimal supported value.
+			 */
+			if (!config.lro.timeout)
+				config.lro.timeout =
+				config.hca_attr.lro_timer_supported_periods[0];
+			DRV_LOG(DEBUG, "LRO session timeout set to %d usec",
+				config.lro.timeout);
+		}
+#if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_METER)
+		if (config.hca_attr.qos.sup && config.hca_attr.qos.srtcm_sup &&
+		    config.dv_flow_en) {
+			uint8_t reg_c_mask =
+				config.hca_attr.qos.flow_meter_reg_c_ids;
+			/*
+			 * Meter needs two REG_C's for color match and pre-sfx
+			 * flow match. Here get the REG_C for color match.
+			 * REG_C_0 and REG_C_1 is reserved for metadata feature.
+			 */
+			reg_c_mask &= 0xfc;
+			if (__builtin_popcount(reg_c_mask) < 1) {
+				priv->mtr_en = 0;
+				DRV_LOG(WARNING, "No available register for"
+					" meter.");
+			} else {
+				priv->mtr_color_reg = ffs(reg_c_mask) - 1 +
+						      REG_C_0;
+				priv->mtr_en = 1;
+				priv->mtr_reg_share =
+				      config.hca_attr.qos.flow_meter_reg_share;
+				DRV_LOG(DEBUG, "The REG_C meter uses is %d",
+					priv->mtr_color_reg);
+			}
+		}
+#endif
+	}
+	if (config.mprq.enabled && mprq) {
+		if (config.mprq.stride_num_n &&
+		    (config.mprq.stride_num_n > mprq_max_stride_num_n ||
+		     config.mprq.stride_num_n < mprq_min_stride_num_n)) {
+			config.mprq.stride_num_n =
+				RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_NUM_N,
+						mprq_min_stride_num_n),
+					mprq_max_stride_num_n);
+			DRV_LOG(WARNING,
+				"the number of strides"
+				" for Multi-Packet RQ is out of range,"
+				" setting default value (%u)",
+				1 << config.mprq.stride_num_n);
+		}
+		if (config.mprq.stride_size_n &&
+		    (config.mprq.stride_size_n > mprq_max_stride_size_n ||
+		     config.mprq.stride_size_n < mprq_min_stride_size_n)) {
+			config.mprq.stride_size_n =
+				RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_SIZE_N,
+						mprq_min_stride_size_n),
+					mprq_max_stride_size_n);
+			DRV_LOG(WARNING,
+				"the size of a stride"
+				" for Multi-Packet RQ is out of range,"
+				" setting default value (%u)",
+				1 << config.mprq.stride_size_n);
+		}
+		config.mprq.min_stride_size_n = mprq_min_stride_size_n;
+		config.mprq.max_stride_size_n = mprq_max_stride_size_n;
+	} else if (config.mprq.enabled && !mprq) {
+		DRV_LOG(WARNING, "Multi-Packet RQ isn't supported");
+		config.mprq.enabled = 0;
+	}
+	if (config.max_dump_files_num == 0)
+		config.max_dump_files_num = 128;
+	eth_dev = rte_eth_dev_allocate(name);
+	if (eth_dev == NULL) {
+		DRV_LOG(ERR, "can not allocate rte ethdev");
+		err = ENOMEM;
+		goto error;
+	}
+	/* Flag to call rte_eth_dev_release_port() in rte_eth_dev_close(). */
+	eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
+	if (priv->representor) {
+		eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR;
+		eth_dev->data->representor_id = priv->representor_id;
+	}
+	/*
+	 * Store associated network device interface index. This index
+	 * is permanent throughout the lifetime of device. So, we may store
+	 * the ifindex here and use the cached value further.
+	 */
+	MLX5_ASSERT(spawn->ifindex);
+	priv->if_index = spawn->ifindex;
+	eth_dev->data->dev_private = priv;
+	priv->dev_data = eth_dev->data;
+	eth_dev->data->mac_addrs = priv->mac;
+	eth_dev->device = dpdk_dev;
+	/* Configure the first MAC address by default. */
+	if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) {
+		DRV_LOG(ERR,
+			"port %u cannot get MAC address, is mlx5_en"
+			" loaded? (errno: %s)",
+			eth_dev->data->port_id, strerror(rte_errno));
+		err = ENODEV;
+		goto error;
+	}
+	DRV_LOG(INFO,
+		"port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
+		eth_dev->data->port_id,
+		mac.addr_bytes[0], mac.addr_bytes[1],
+		mac.addr_bytes[2], mac.addr_bytes[3],
+		mac.addr_bytes[4], mac.addr_bytes[5]);
+#ifdef RTE_LIBRTE_MLX5_DEBUG
+	{
+		char ifname[IF_NAMESIZE];
+
+		if (mlx5_get_ifname(eth_dev, &ifname) == 0)
+			DRV_LOG(DEBUG, "port %u ifname is \"%s\"",
+				eth_dev->data->port_id, ifname);
+		else
+			DRV_LOG(DEBUG, "port %u ifname is unknown",
+				eth_dev->data->port_id);
+	}
+#endif
+	/* Get actual MTU if possible. */
+	err = mlx5_get_mtu(eth_dev, &priv->mtu);
+	if (err) {
+		err = rte_errno;
+		goto error;
+	}
+	DRV_LOG(DEBUG, "port %u MTU is %u", eth_dev->data->port_id,
+		priv->mtu);
+	/* Initialize burst functions to prevent crashes before link-up. */
+	eth_dev->rx_pkt_burst = removed_rx_burst;
+	eth_dev->tx_pkt_burst = removed_tx_burst;
+	eth_dev->dev_ops = &mlx5_dev_ops;
+	/* Register MAC address. */
+	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
+	if (config.vf && config.vf_nl_en)
+		mlx5_nl_mac_addr_sync(priv->nl_socket_route,
+				      mlx5_ifindex(eth_dev),
+				      eth_dev->data->mac_addrs,
+				      MLX5_MAX_MAC_ADDRESSES);
+	priv->flows = 0;
+	priv->ctrl_flows = 0;
+	TAILQ_INIT(&priv->flow_meters);
+	TAILQ_INIT(&priv->flow_meter_profiles);
+	/* Hint libmlx5 to use PMD allocator for data plane resources */
+	struct mlx5dv_ctx_allocators alctr = {
+		.alloc = &mlx5_alloc_verbs_buf,
+		.free = &mlx5_free_verbs_buf,
+		.data = priv,
+	};
+	mlx5_glue->dv_set_context_attr(sh->ctx,
+				       MLX5DV_CTX_ATTR_BUF_ALLOCATORS,
+				       (void *)((uintptr_t)&alctr));
+	/* Bring Ethernet device up. */
+	DRV_LOG(DEBUG, "port %u forcing Ethernet interface up",
+		eth_dev->data->port_id);
+	mlx5_set_link_up(eth_dev);
+	/*
+	 * Even though the interrupt handler is not installed yet,
+	 * interrupts will still trigger on the async_fd from
+	 * Verbs context returned by ibv_open_device().
+	 */
+	mlx5_link_update(eth_dev, 0);
+#ifdef HAVE_MLX5DV_DR_ESWITCH
+	if (!(config.hca_attr.eswitch_manager && config.dv_flow_en &&
+	      (switch_info->representor || switch_info->master)))
+		config.dv_esw_en = 0;
+#else
+	config.dv_esw_en = 0;
+#endif
+	/* Detect minimal data bytes to inline. */
+	mlx5_set_min_inline(spawn, &config);
+	/* Store device configuration on private structure. */
+	priv->config = config;
+	/* Create context for virtual machine VLAN workaround. */
+	priv->vmwa_context = mlx5_vlan_vmwa_init(eth_dev, spawn->ifindex);
+	if (config.dv_flow_en) {
+		err = mlx5_alloc_shared_dr(priv);
+		if (err)
+			goto error;
+		/*
+		 * RSS id is shared with meter flow id. Meter flow id can only
+		 * use the 24 MSB of the register.
+		 */
+		priv->qrss_id_pool = mlx5_flow_id_pool_alloc(UINT32_MAX >>
+				     MLX5_MTR_COLOR_BITS);
+		if (!priv->qrss_id_pool) {
+			DRV_LOG(ERR, "can't create flow id pool");
+			err = ENOMEM;
+			goto error;
+		}
+	}
+	/* Supported Verbs flow priority number detection. */
+	err = mlx5_flow_discover_priorities(eth_dev);
+	if (err < 0) {
+		err = -err;
+		goto error;
+	}
+	priv->config.flow_prio = err;
+	if (!priv->config.dv_esw_en &&
+	    priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) {
+		DRV_LOG(WARNING, "metadata mode %u is not supported "
+				 "(no E-Switch)", priv->config.dv_xmeta_en);
+		priv->config.dv_xmeta_en = MLX5_XMETA_MODE_LEGACY;
+	}
+	mlx5_set_metadata_mask(eth_dev);
+	if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
+	    !priv->sh->dv_regc0_mask) {
+		DRV_LOG(ERR, "metadata mode %u is not supported "
+			     "(no metadata reg_c[0] is available)",
+			     priv->config.dv_xmeta_en);
+			err = ENOTSUP;
+			goto error;
+	}
+	/*
+	 * Allocate the buffer for flow creating, just once.
+	 * The allocation must be done before any flow creating.
+	 */
+	mlx5_flow_alloc_intermediate(eth_dev);
+	/* Query availibility of metadata reg_c's. */
+	err = mlx5_flow_discover_mreg_c(eth_dev);
+	if (err < 0) {
+		err = -err;
+		goto error;
+	}
+	if (!mlx5_flow_ext_mreg_supported(eth_dev)) {
+		DRV_LOG(DEBUG,
+			"port %u extensive metadata register is not supported",
+			eth_dev->data->port_id);
+		if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) {
+			DRV_LOG(ERR, "metadata mode %u is not supported "
+				     "(no metadata registers available)",
+				     priv->config.dv_xmeta_en);
+			err = ENOTSUP;
+			goto error;
+		}
+	}
+	if (priv->config.dv_flow_en &&
+	    priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
+	    mlx5_flow_ext_mreg_supported(eth_dev) &&
+	    priv->sh->dv_regc0_mask) {
+		priv->mreg_cp_tbl = mlx5_hlist_create(MLX5_FLOW_MREG_HNAME,
+						      MLX5_FLOW_MREG_HTABLE_SZ);
+		if (!priv->mreg_cp_tbl) {
+			err = ENOMEM;
+			goto error;
+		}
+	}
+	return eth_dev;
+error:
+	if (priv) {
+		if (priv->mreg_cp_tbl)
+			mlx5_hlist_destroy(priv->mreg_cp_tbl, NULL, NULL);
+		if (priv->sh)
+			mlx5_os_free_shared_dr(priv);
+		if (priv->nl_socket_route >= 0)
+			close(priv->nl_socket_route);
+		if (priv->nl_socket_rdma >= 0)
+			close(priv->nl_socket_rdma);
+		if (priv->vmwa_context)
+			mlx5_vlan_vmwa_exit(priv->vmwa_context);
+		if (priv->qrss_id_pool)
+			mlx5_flow_id_pool_release(priv->qrss_id_pool);
+		if (own_domain_id)
+			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
+		rte_free(priv);
+		if (eth_dev != NULL)
+			eth_dev->data->dev_private = NULL;
+	}
+	if (eth_dev != NULL) {
+		/* mac_addrs must not be freed alone because part of
+		 * dev_private
+		 **/
+		eth_dev->data->mac_addrs = NULL;
+		rte_eth_dev_release_port(eth_dev);
+	}
+	if (sh)
+		mlx5_free_shared_ibctx(sh);
+	MLX5_ASSERT(err > 0);
+	rte_errno = err;
+	return NULL;
+}
+
+/**
+ * Comparison callback to sort device data.
+ *
+ * This is meant to be used with qsort().
+ *
+ * @param a[in]
+ *   Pointer to pointer to first data object.
+ * @param b[in]
+ *   Pointer to pointer to second data object.
+ *
+ * @return
+ *   0 if both objects are equal, less than 0 if the first argument is less
+ *   than the second, greater than 0 otherwise.
+ */
+static int
+mlx5_dev_spawn_data_cmp(const void *a, const void *b)
+{
+	const struct mlx5_switch_info *si_a =
+		&((const struct mlx5_dev_spawn_data *)a)->info;
+	const struct mlx5_switch_info *si_b =
+		&((const struct mlx5_dev_spawn_data *)b)->info;
+	int ret;
+
+	/* Master device first. */
+	ret = si_b->master - si_a->master;
+	if (ret)
+		return ret;
+	/* Then representor devices. */
+	ret = si_b->representor - si_a->representor;
+	if (ret)
+		return ret;
+	/* Unidentified devices come last in no specific order. */
+	if (!si_a->representor)
+		return 0;
+	/* Order representors by name. */
+	return si_a->port_name - si_b->port_name;
+}
+
+/**
+ * Match PCI information for possible slaves of bonding device.
+ *
+ * @param[in] ibv_dev
+ *   Pointer to Infiniband device structure.
+ * @param[in] pci_dev
+ *   Pointer to PCI device structure to match PCI address.
+ * @param[in] nl_rdma
+ *   Netlink RDMA group socket handle.
+ *
+ * @return
+ *   negative value if no bonding device found, otherwise
+ *   positive index of slave PF in bonding.
+ */
+static int
+mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev,
+			   const struct rte_pci_device *pci_dev,
+			   int nl_rdma)
+{
+	char ifname[IF_NAMESIZE + 1];
+	unsigned int ifindex;
+	unsigned int np, i;
+	FILE *file = NULL;
+	int pf = -1;
+
+	/*
+	 * Try to get master device name. If something goes
+	 * wrong suppose the lack of kernel support and no
+	 * bonding devices.
+	 */
+	if (nl_rdma < 0)
+		return -1;
+	if (!strstr(ibv_dev->name, "bond"))
+		return -1;
+	np = mlx5_nl_portnum(nl_rdma, ibv_dev->name);
+	if (!np)
+		return -1;
+	/*
+	 * The Master device might not be on the predefined
+	 * port (not on port index 1, it is not garanted),
+	 * we have to scan all Infiniband device port and
+	 * find master.
+	 */
+	for (i = 1; i <= np; ++i) {
+		/* Check whether Infiniband port is populated. */
+		ifindex = mlx5_nl_ifindex(nl_rdma, ibv_dev->name, i);
+		if (!ifindex)
+			continue;
+		if (!if_indextoname(ifindex, ifname))
+			continue;
+		/* Try to read bonding slave names from sysfs. */
+		MKSTR(slaves,
+		      "/sys/class/net/%s/master/bonding/slaves", ifname);
+		file = fopen(slaves, "r");
+		if (file)
+			break;
+	}
+	if (!file)
+		return -1;
+	/* Use safe format to check maximal buffer length. */
+	MLX5_ASSERT(atol(RTE_STR(IF_NAMESIZE)) == IF_NAMESIZE);
+	while (fscanf(file, "%" RTE_STR(IF_NAMESIZE) "s", ifname) == 1) {
+		char tmp_str[IF_NAMESIZE + 32];
+		struct rte_pci_addr pci_addr;
+		struct mlx5_switch_info	info;
+
+		/* Process slave interface names in the loop. */
+		snprintf(tmp_str, sizeof(tmp_str),
+			 "/sys/class/net/%s", ifname);
+		if (mlx5_dev_to_pci_addr(tmp_str, &pci_addr)) {
+			DRV_LOG(WARNING, "can not get PCI address"
+					 " for netdev \"%s\"", ifname);
+			continue;
+		}
+		if (pci_dev->addr.domain != pci_addr.domain ||
+		    pci_dev->addr.bus != pci_addr.bus ||
+		    pci_dev->addr.devid != pci_addr.devid ||
+		    pci_dev->addr.function != pci_addr.function)
+			continue;
+		/* Slave interface PCI address match found. */
+		fclose(file);
+		snprintf(tmp_str, sizeof(tmp_str),
+			 "/sys/class/net/%s/phys_port_name", ifname);
+		file = fopen(tmp_str, "rb");
+		if (!file)
+			break;
+		info.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET;
+		if (fscanf(file, "%32s", tmp_str) == 1)
+			mlx5_translate_port_name(tmp_str, &info);
+		if (info.name_type == MLX5_PHYS_PORT_NAME_TYPE_LEGACY ||
+		    info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK)
+			pf = info.port_name;
+		break;
+	}
+	if (file)
+		fclose(file);
+	return pf;
+}
+
+/**
+ * DPDK callback to register a PCI device.
+ *
+ * This function spawns Ethernet devices out of a given PCI device.
+ *
+ * @param[in] pci_drv
+ *   PCI driver structure (mlx5_driver).
+ * @param[in] pci_dev
+ *   PCI device information.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+		  struct rte_pci_device *pci_dev)
+{
+	struct ibv_device **ibv_list;
+	/*
+	 * Number of found IB Devices matching with requested PCI BDF.
+	 * nd != 1 means there are multiple IB devices over the same
+	 * PCI device and we have representors and master.
+	 */
+	unsigned int nd = 0;
+	/*
+	 * Number of found IB device Ports. nd = 1 and np = 1..n means
+	 * we have the single multiport IB device, and there may be
+	 * representors attached to some of found ports.
+	 */
+	unsigned int np = 0;
+	/*
+	 * Number of DPDK ethernet devices to Spawn - either over
+	 * multiple IB devices or multiple ports of single IB device.
+	 * Actually this is the number of iterations to spawn.
+	 */
+	unsigned int ns = 0;
+	/*
+	 * Bonding device
+	 *   < 0 - no bonding device (single one)
+	 *  >= 0 - bonding device (value is slave PF index)
+	 */
+	int bd = -1;
+	struct mlx5_dev_spawn_data *list = NULL;
+	struct mlx5_dev_config dev_config;
+	int ret;
+
+	if (mlx5_class_get(pci_dev->device.devargs) != MLX5_CLASS_NET) {
+		DRV_LOG(DEBUG, "Skip probing - should be probed by other mlx5"
+			" driver.");
+		return 1;
+	}
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		mlx5_pmd_socket_init();
+	ret = mlx5_init_once();
+	if (ret) {
+		DRV_LOG(ERR, "unable to init PMD global data: %s",
+			strerror(rte_errno));
+		return -rte_errno;
+	}
+	MLX5_ASSERT(pci_drv == &mlx5_driver);
+	errno = 0;
+	ibv_list = mlx5_glue->get_device_list(&ret);
+	if (!ibv_list) {
+		rte_errno = errno ? errno : ENOSYS;
+		DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?");
+		return -rte_errno;
+	}
+	/*
+	 * First scan the list of all Infiniband devices to find
+	 * matching ones, gathering into the list.
+	 */
+	struct ibv_device *ibv_match[ret + 1];
+	int nl_route = mlx5_nl_init(NETLINK_ROUTE);
+	int nl_rdma = mlx5_nl_init(NETLINK_RDMA);
+	unsigned int i;
+
+	while (ret-- > 0) {
+		struct rte_pci_addr pci_addr;
+
+		DRV_LOG(DEBUG, "checking device \"%s\"", ibv_list[ret]->name);
+		bd = mlx5_device_bond_pci_match
+				(ibv_list[ret], pci_dev, nl_rdma);
+		if (bd >= 0) {
+			/*
+			 * Bonding device detected. Only one match is allowed,
+			 * the bonding is supported over multi-port IB device,
+			 * there should be no matches on representor PCI
+			 * functions or non VF LAG bonding devices with
+			 * specified address.
+			 */
+			if (nd) {
+				DRV_LOG(ERR,
+					"multiple PCI match on bonding device"
+					"\"%s\" found", ibv_list[ret]->name);
+				rte_errno = ENOENT;
+				ret = -rte_errno;
+				goto exit;
+			}
+			DRV_LOG(INFO, "PCI information matches for"
+				      " slave %d bonding device \"%s\"",
+				      bd, ibv_list[ret]->name);
+			ibv_match[nd++] = ibv_list[ret];
+			break;
+		}
+		if (mlx5_dev_to_pci_addr
+			(ibv_list[ret]->ibdev_path, &pci_addr))
+			continue;
+		if (pci_dev->addr.domain != pci_addr.domain ||
+		    pci_dev->addr.bus != pci_addr.bus ||
+		    pci_dev->addr.devid != pci_addr.devid ||
+		    pci_dev->addr.function != pci_addr.function)
+			continue;
+		DRV_LOG(INFO, "PCI information matches for device \"%s\"",
+			ibv_list[ret]->name);
+		ibv_match[nd++] = ibv_list[ret];
+	}
+	ibv_match[nd] = NULL;
+	if (!nd) {
+		/* No device matches, just complain and bail out. */
+		DRV_LOG(WARNING,
+			"no Verbs device matches PCI device " PCI_PRI_FMT ","
+			" are kernel drivers loaded?",
+			pci_dev->addr.domain, pci_dev->addr.bus,
+			pci_dev->addr.devid, pci_dev->addr.function);
+		rte_errno = ENOENT;
+		ret = -rte_errno;
+		goto exit;
+	}
+	if (nd == 1) {
+		/*
+		 * Found single matching device may have multiple ports.
+		 * Each port may be representor, we have to check the port
+		 * number and check the representors existence.
+		 */
+		if (nl_rdma >= 0)
+			np = mlx5_nl_portnum(nl_rdma, ibv_match[0]->name);
+		if (!np)
+			DRV_LOG(WARNING, "can not get IB device \"%s\""
+					 " ports number", ibv_match[0]->name);
+		if (bd >= 0 && !np) {
+			DRV_LOG(ERR, "can not get ports"
+				     " for bonding device");
+			rte_errno = ENOENT;
+			ret = -rte_errno;
+			goto exit;
+		}
+	}
+#ifndef HAVE_MLX5DV_DR_DEVX_PORT
+	if (bd >= 0) {
+		/*
+		 * This may happen if there is VF LAG kernel support and
+		 * application is compiled with older rdma_core library.
+		 */
+		DRV_LOG(ERR,
+			"No kernel/verbs support for VF LAG bonding found.");
+		rte_errno = ENOTSUP;
+		ret = -rte_errno;
+		goto exit;
+	}
+#endif
+	/*
+	 * Now we can determine the maximal
+	 * amount of devices to be spawned.
+	 */
+	list = rte_zmalloc("device spawn data",
+			 sizeof(struct mlx5_dev_spawn_data) *
+			 (np ? np : nd),
+			 RTE_CACHE_LINE_SIZE);
+	if (!list) {
+		DRV_LOG(ERR, "spawn data array allocation failure");
+		rte_errno = ENOMEM;
+		ret = -rte_errno;
+		goto exit;
+	}
+	if (bd >= 0 || np > 1) {
+		/*
+		 * Single IB device with multiple ports found,
+		 * it may be E-Switch master device and representors.
+		 * We have to perform identification through the ports.
+		 */
+		MLX5_ASSERT(nl_rdma >= 0);
+		MLX5_ASSERT(ns == 0);
+		MLX5_ASSERT(nd == 1);
+		MLX5_ASSERT(np);
+		for (i = 1; i <= np; ++i) {
+			list[ns].max_port = np;
+			list[ns].ibv_port = i;
+			list[ns].ibv_dev = ibv_match[0];
+			list[ns].eth_dev = NULL;
+			list[ns].pci_dev = pci_dev;
+			list[ns].pf_bond = bd;
+			list[ns].ifindex = mlx5_nl_ifindex
+					(nl_rdma, list[ns].ibv_dev->name, i);
+			if (!list[ns].ifindex) {
+				/*
+				 * No network interface index found for the
+				 * specified port, it means there is no
+				 * representor on this port. It's OK,
+				 * there can be disabled ports, for example
+				 * if sriov_numvfs < sriov_totalvfs.
+				 */
+				continue;
+			}
+			ret = -1;
+			if (nl_route >= 0)
+				ret = mlx5_nl_switch_info
+					       (nl_route,
+						list[ns].ifindex,
+						&list[ns].info);
+			if (ret || (!list[ns].info.representor &&
+				    !list[ns].info.master)) {
+				/*
+				 * We failed to recognize representors with
+				 * Netlink, let's try to perform the task
+				 * with sysfs.
+				 */
+				ret =  mlx5_sysfs_switch_info
+						(list[ns].ifindex,
+						 &list[ns].info);
+			}
+			if (!ret && bd >= 0) {
+				switch (list[ns].info.name_type) {
+				case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+					if (list[ns].info.port_name == bd)
+						ns++;
+					break;
+				case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+					if (list[ns].info.pf_num == bd)
+						ns++;
+					break;
+				default:
+					break;
+				}
+				continue;
+			}
+			if (!ret && (list[ns].info.representor ^
+				     list[ns].info.master))
+				ns++;
+		}
+		if (!ns) {
+			DRV_LOG(ERR,
+				"unable to recognize master/representors"
+				" on the IB device with multiple ports");
+			rte_errno = ENOENT;
+			ret = -rte_errno;
+			goto exit;
+		}
+	} else {
+		/*
+		 * The existence of several matching entries (nd > 1) means
+		 * port representors have been instantiated. No existing Verbs
+		 * call nor sysfs entries can tell them apart, this can only
+		 * be done through Netlink calls assuming kernel drivers are
+		 * recent enough to support them.
+		 *
+		 * In the event of identification failure through Netlink,
+		 * try again through sysfs, then:
+		 *
+		 * 1. A single IB device matches (nd == 1) with single
+		 *    port (np=0/1) and is not a representor, assume
+		 *    no switch support.
+		 *
+		 * 2. Otherwise no safe assumptions can be made;
+		 *    complain louder and bail out.
+		 */
+		for (i = 0; i != nd; ++i) {
+			memset(&list[ns].info, 0, sizeof(list[ns].info));
+			list[ns].max_port = 1;
+			list[ns].ibv_port = 1;
+			list[ns].ibv_dev = ibv_match[i];
+			list[ns].eth_dev = NULL;
+			list[ns].pci_dev = pci_dev;
+			list[ns].pf_bond = -1;
+			list[ns].ifindex = 0;
+			if (nl_rdma >= 0)
+				list[ns].ifindex = mlx5_nl_ifindex
+					(nl_rdma, list[ns].ibv_dev->name, 1);
+			if (!list[ns].ifindex) {
+				char ifname[IF_NAMESIZE];
+
+				/*
+				 * Netlink failed, it may happen with old
+				 * ib_core kernel driver (before 4.16).
+				 * We can assume there is old driver because
+				 * here we are processing single ports IB
+				 * devices. Let's try sysfs to retrieve
+				 * the ifindex. The method works for
+				 * master device only.
+				 */
+				if (nd > 1) {
+					/*
+					 * Multiple devices found, assume
+					 * representors, can not distinguish
+					 * master/representor and retrieve
+					 * ifindex via sysfs.
+					 */
+					continue;
+				}
+				ret = mlx5_get_master_ifname
+					(ibv_match[i]->ibdev_path, &ifname);
+				if (!ret)
+					list[ns].ifindex =
+						if_nametoindex(ifname);
+				if (!list[ns].ifindex) {
+					/*
+					 * No network interface index found
+					 * for the specified device, it means
+					 * there it is neither representor
+					 * nor master.
+					 */
+					continue;
+				}
+			}
+			ret = -1;
+			if (nl_route >= 0)
+				ret = mlx5_nl_switch_info
+					       (nl_route,
+						list[ns].ifindex,
+						&list[ns].info);
+			if (ret || (!list[ns].info.representor &&
+				    !list[ns].info.master)) {
+				/*
+				 * We failed to recognize representors with
+				 * Netlink, let's try to perform the task
+				 * with sysfs.
+				 */
+				ret =  mlx5_sysfs_switch_info
+						(list[ns].ifindex,
+						 &list[ns].info);
+			}
+			if (!ret && (list[ns].info.representor ^
+				     list[ns].info.master)) {
+				ns++;
+			} else if ((nd == 1) &&
+				   !list[ns].info.representor &&
+				   !list[ns].info.master) {
+				/*
+				 * Single IB device with
+				 * one physical port and
+				 * attached network device.
+				 * May be SRIOV is not enabled
+				 * or there is no representors.
+				 */
+				DRV_LOG(INFO, "no E-Switch support detected");
+				ns++;
+				break;
+			}
+		}
+		if (!ns) {
+			DRV_LOG(ERR,
+				"unable to recognize master/representors"
+				" on the multiple IB devices");
+			rte_errno = ENOENT;
+			ret = -rte_errno;
+			goto exit;
+		}
+	}
+	MLX5_ASSERT(ns);
+	/*
+	 * Sort list to probe devices in natural order for users convenience
+	 * (i.e. master first, then representors from lowest to highest ID).
+	 */
+	qsort(list, ns, sizeof(*list), mlx5_dev_spawn_data_cmp);
+	/* Default configuration. */
+	dev_config = (struct mlx5_dev_config){
+		.hw_padding = 0,
+		.mps = MLX5_ARG_UNSET,
+		.dbnc = MLX5_ARG_UNSET,
+		.rx_vec_en = 1,
+		.txq_inline_max = MLX5_ARG_UNSET,
+		.txq_inline_min = MLX5_ARG_UNSET,
+		.txq_inline_mpw = MLX5_ARG_UNSET,
+		.txqs_inline = MLX5_ARG_UNSET,
+		.vf_nl_en = 1,
+		.mr_ext_memseg_en = 1,
+		.mprq = {
+			.enabled = 0, /* Disabled by default. */
+			.stride_num_n = 0,
+			.stride_size_n = 0,
+			.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN,
+			.min_rxqs_num = MLX5_MPRQ_MIN_RXQS,
+		},
+		.dv_esw_en = 1,
+		.dv_flow_en = 1,
+		.log_hp_size = MLX5_ARG_UNSET,
+	};
+	/* Device specific configuration. */
+	switch (pci_dev->id.device_id) {
+	case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:
+	case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF:
+	case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:
+	case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF:
+	case PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF:
+	case PCI_DEVICE_ID_MELLANOX_CONNECTX6VF:
+	case PCI_DEVICE_ID_MELLANOX_CONNECTX6DXVF:
+		dev_config.vf = 1;
+		break;
+	default:
+		break;
+	}
+	for (i = 0; i != ns; ++i) {
+		uint32_t restore;
+
+		list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
+						 &list[i],
+						 dev_config);
+		if (!list[i].eth_dev) {
+			if (rte_errno != EBUSY && rte_errno != EEXIST)
+				break;
+			/* Device is disabled or already spawned. Ignore it. */
+			continue;
+		}
+		restore = list[i].eth_dev->data->dev_flags;
+		rte_eth_copy_pci_info(list[i].eth_dev, pci_dev);
+		/* Restore non-PCI flags cleared by the above call. */
+		list[i].eth_dev->data->dev_flags |= restore;
+		rte_eth_dev_probing_finish(list[i].eth_dev);
+	}
+	if (i != ns) {
+		DRV_LOG(ERR,
+			"probe of PCI device " PCI_PRI_FMT " aborted after"
+			" encountering an error: %s",
+			pci_dev->addr.domain, pci_dev->addr.bus,
+			pci_dev->addr.devid, pci_dev->addr.function,
+			strerror(rte_errno));
+		ret = -rte_errno;
+		/* Roll back. */
+		while (i--) {
+			if (!list[i].eth_dev)
+				continue;
+			mlx5_dev_close(list[i].eth_dev);
+			/* mac_addrs must not be freed because in dev_private */
+			list[i].eth_dev->data->mac_addrs = NULL;
+			claim_zero(rte_eth_dev_release_port(list[i].eth_dev));
+		}
+		/* Restore original error. */
+		rte_errno = -ret;
+	} else {
+		ret = 0;
+	}
+exit:
+	/*
+	 * Do the routine cleanup:
+	 * - close opened Netlink sockets
+	 * - free allocated spawn data array
+	 * - free the Infiniband device list
+	 */
+	if (nl_rdma >= 0)
+		close(nl_rdma);
+	if (nl_route >= 0)
+		close(nl_route);
+	if (list)
+		rte_free(list);
+	MLX5_ASSERT(ibv_list);
+	mlx5_glue->free_device_list(ibv_list);
+	return ret;
+}
+
+static int
+mlx5_config_doorbell_mapping_env(const struct mlx5_dev_config *config)
+{
+	char *env;
+	int value;
+
+	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	/* Get environment variable to store. */
+	env = getenv(MLX5_SHUT_UP_BF);
+	value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET;
+	if (config->dbnc == MLX5_ARG_UNSET)
+		setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1);
+	else
+		setenv(MLX5_SHUT_UP_BF,
+		       config->dbnc == MLX5_TXDB_NCACHED ? "1" : "0", 1);
+	return value;
+}
+
+static void
+mlx5_restore_doorbell_mapping_env(int value)
+{
+	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	/* Restore the original environment variable state. */
+	if (value == MLX5_ARG_UNSET)
+		unsetenv(MLX5_SHUT_UP_BF);
+	else
+		setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1);
+}
+
+/**
+ * Extract pdn of PD object using DV API.
+ *
+ * @param[in] pd
+ *   Pointer to the verbs PD object.
+ * @param[out] pdn
+ *   Pointer to the PD object number variable.
+ *
+ * @return
+ *   0 on success, error value otherwise.
+ */
+int
+mlx5_os_get_pdn(void *pd, uint32_t *pdn)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+	struct mlx5dv_obj obj;
+	struct mlx5dv_pd pd_info;
+	int ret = 0;
+
+	obj.pd.in = pd;
+	obj.pd.out = &pd_info;
+	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);
+	if (ret) {
+		DRV_LOG(DEBUG, "Fail to get PD object info");
+		return ret;
+	}
+	*pdn = pd_info.pdn;
+	return 0;
+#else
+	(void)pd;
+	(void)pdn;
+	return -ENOTSUP;
+#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
+}
+
+/**
+ * Function API to open IB device.
+ *
+ * This function calls the Linux glue APIs to open a device.
+ *
+ * @param[in] spawn
+ *   Pointer to the IB device attributes (name, port, etc).
+ * @param[out] config
+ *   Pointer to device configuration structure.
+ * @param[out] sh
+ *   Pointer to shared context structure.
+ *
+ * @return
+ *   0 on success, a positive error value otherwise.
+ */
+int
+mlx5_os_open_device(const struct mlx5_dev_spawn_data *spawn,
+		     const struct mlx5_dev_config *config,
+		     struct mlx5_dev_ctx_shared *sh)
+{
+	int dbmap_env;
+	int err = 0;
+	/*
+	 * Configure environment variable "MLX5_BF_SHUT_UP"
+	 * before the device creation. The rdma_core library
+	 * checks the variable at device creation and
+	 * stores the result internally.
+	 */
+	dbmap_env = mlx5_config_doorbell_mapping_env(config);
+	/* Try to open IB device with DV first, then usual Verbs. */
+	errno = 0;
+	sh->ctx = mlx5_glue->dv_open_device(spawn->ibv_dev);
+	if (sh->ctx) {
+		sh->devx = 1;
+		DRV_LOG(DEBUG, "DevX is supported");
+		/* The device is created, no need for environment. */
+		mlx5_restore_doorbell_mapping_env(dbmap_env);
+	} else {
+		/* The environment variable is still configured. */
+		sh->ctx = mlx5_glue->open_device(spawn->ibv_dev);
+		err = errno ? errno : ENODEV;
+		/*
+		 * The environment variable is not needed anymore,
+		 * all device creation attempts are completed.
+		 */
+		mlx5_restore_doorbell_mapping_env(dbmap_env);
+		if (!sh->ctx)
+			return err;
+		DRV_LOG(DEBUG, "DevX is NOT supported");
+		err = 0;
+	}
+	return err;
+}
+
+/**
+ * Install shared asynchronous device events handler.
+ * This function is implemented to support event sharing
+ * between multiple ports of single IB device.
+ *
+ * @param sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ */
+void
+mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
+{
+	int ret;
+	int flags;
+
+	sh->intr_handle.fd = -1;
+	flags = fcntl(((struct ibv_context *)sh->ctx)->async_fd, F_GETFL);
+	ret = fcntl(((struct ibv_context *)sh->ctx)->async_fd,
+		    F_SETFL, flags | O_NONBLOCK);
+	if (ret) {
+		DRV_LOG(INFO, "failed to change file descriptor async event"
+			" queue");
+	} else {
+		sh->intr_handle.fd = ((struct ibv_context *)sh->ctx)->async_fd;
+		sh->intr_handle.type = RTE_INTR_HANDLE_EXT;
+		if (rte_intr_callback_register(&sh->intr_handle,
+					mlx5_dev_interrupt_handler, sh)) {
+			DRV_LOG(INFO, "Fail to install the shared interrupt.");
+			sh->intr_handle.fd = -1;
+		}
+	}
+	if (sh->devx) {
+#ifdef HAVE_IBV_DEVX_ASYNC
+		sh->intr_handle_devx.fd = -1;
+		sh->devx_comp = mlx5_glue->devx_create_cmd_comp(sh->ctx);
+		if (!sh->devx_comp) {
+			DRV_LOG(INFO, "failed to allocate devx_comp.");
+			return;
+		}
+		flags = fcntl(sh->devx_comp->fd, F_GETFL);
+		ret = fcntl(sh->devx_comp->fd, F_SETFL, flags | O_NONBLOCK);
+		if (ret) {
+			DRV_LOG(INFO, "failed to change file descriptor"
+				" devx comp");
+			return;
+		}
+		sh->intr_handle_devx.fd = sh->devx_comp->fd;
+		sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT;
+		if (rte_intr_callback_register(&sh->intr_handle_devx,
+					mlx5_dev_interrupt_handler_devx, sh)) {
+			DRV_LOG(INFO, "Fail to install the devx shared"
+				" interrupt.");
+			sh->intr_handle_devx.fd = -1;
+		}
+#endif /* HAVE_IBV_DEVX_ASYNC */
+	}
+}
+
+/**
+ * Uninstall shared asynchronous device events handler.
+ * This function is implemented to support event sharing
+ * between multiple ports of single IB device.
+ *
+ * @param dev
+ *   Pointer to mlx5_dev_ctx_shared object.
+ */
+void
+mlx5_os_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh)
+{
+	if (sh->intr_handle.fd >= 0)
+		mlx5_intr_callback_unregister(&sh->intr_handle,
+					      mlx5_dev_interrupt_handler, sh);
+#ifdef HAVE_IBV_DEVX_ASYNC
+	if (sh->intr_handle_devx.fd >= 0)
+		rte_intr_callback_unregister(&sh->intr_handle_devx,
+				  mlx5_dev_interrupt_handler_devx, sh);
+	if (sh->devx_comp)
+		mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp);
+#endif
+}
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 4f7b4d3..f62ad12 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -10,7 +10,6 @@
 #include <stdlib.h>
 #include <errno.h>
 #include <net/if.h>
-#include <fcntl.h>
 #include <sys/mman.h>
 #include <linux/rtnetlink.h>
 
@@ -164,15 +163,6 @@
 /* Flow memory reclaim mode. */
 #define MLX5_RECLAIM_MEM "reclaim_mem_mode"
 
-#ifndef HAVE_IBV_MLX5_MOD_MPW
-#define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2)
-#define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3)
-#endif
-
-#ifndef HAVE_IBV_MLX5_MOD_CQE_128B_COMP
-#define MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP (1 << 4)
-#endif
-
 static const char *MZ_MLX5_PMD_SHARED_DATA = "mlx5_pmd_shared_data";
 
 /* Shared memory between primary and secondary processes. */
@@ -183,22 +173,9 @@ static rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
 
 /* Process local data for secondary processes. */
 static struct mlx5_local_data mlx5_local_data;
-
 /** Driver-specific log messages type. */
 int mlx5_logtype;
 
-/** Data associated with devices to spawn. */
-struct mlx5_dev_spawn_data {
-	uint32_t ifindex; /**< Network interface index. */
-	uint32_t max_port; /**< IB device maximal port index. */
-	uint32_t ibv_port; /**< IB device physical port index. */
-	int pf_bond; /**< bonding device PF index. < 0 - no bonding */
-	struct mlx5_switch_info info; /**< Switch information. */
-	struct ibv_device *ibv_dev; /**< Associated IB device. */
-	struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */
-	struct rte_pci_device *pci_dev; /**< Backend PCI device. */
-};
-
 static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_ibv_list = LIST_HEAD_INITIALIZER();
 static pthread_mutex_t mlx5_ibv_list_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -320,7 +297,6 @@ static struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = {
 #define MLX5_ID_GENERATION_ARRAY_FACTOR 16
 
 #define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 4096
-#define MLX5_TAGS_HLIST_ARRAY_SIZE 8192
 
 /**
  * Allocate ID pool structure.
@@ -603,146 +579,6 @@ mlx5_flow_ipool_destroy(struct mlx5_dev_ctx_shared *sh)
 }
 
 /**
- * Extract pdn of PD object using DV API.
- *
- * @param[in] pd
- *   Pointer to the verbs PD object.
- * @param[out] pdn
- *   Pointer to the PD object number variable.
- *
- * @return
- *   0 on success, error value otherwise.
- */
-#ifdef HAVE_IBV_FLOW_DV_SUPPORT
-static int
-mlx5_get_pdn(struct ibv_pd *pd __rte_unused, uint32_t *pdn __rte_unused)
-{
-	struct mlx5dv_obj obj;
-	struct mlx5dv_pd pd_info;
-	int ret = 0;
-
-	obj.pd.in = pd;
-	obj.pd.out = &pd_info;
-	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);
-	if (ret) {
-		DRV_LOG(DEBUG, "Fail to get PD object info");
-		return ret;
-	}
-	*pdn = pd_info.pdn;
-	return 0;
-}
-#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
-
-static int
-mlx5_config_doorbell_mapping_env(const struct mlx5_dev_config *config)
-{
-	char *env;
-	int value;
-
-	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
-	/* Get environment variable to store. */
-	env = getenv(MLX5_SHUT_UP_BF);
-	value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET;
-	if (config->dbnc == MLX5_ARG_UNSET)
-		setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1);
-	else
-		setenv(MLX5_SHUT_UP_BF,
-		       config->dbnc == MLX5_TXDB_NCACHED ? "1" : "0", 1);
-	return value;
-}
-
-static void
-mlx5_restore_doorbell_mapping_env(int value)
-{
-	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
-	/* Restore the original environment variable state. */
-	if (value == MLX5_ARG_UNSET)
-		unsetenv(MLX5_SHUT_UP_BF);
-	else
-		setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1);
-}
-
-/**
- * Install shared asynchronous device events handler.
- * This function is implemented to support event sharing
- * between multiple ports of single IB device.
- *
- * @param sh
- *   Pointer to mlx5_dev_ctx_shared object.
- */
-static void
-mlx5_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
-{
-	int ret;
-	int flags;
-
-	sh->intr_handle.fd = -1;
-	flags = fcntl(((struct ibv_context *)sh->ctx)->async_fd, F_GETFL);
-	ret = fcntl(((struct ibv_context *)sh->ctx)->async_fd,
-		    F_SETFL, flags | O_NONBLOCK);
-	if (ret) {
-		DRV_LOG(INFO, "failed to change file descriptor async event"
-			" queue");
-	} else {
-		sh->intr_handle.fd = ((struct ibv_context *)sh->ctx)->async_fd;
-		sh->intr_handle.type = RTE_INTR_HANDLE_EXT;
-		if (rte_intr_callback_register(&sh->intr_handle,
-					mlx5_dev_interrupt_handler, sh)) {
-			DRV_LOG(INFO, "Fail to install the shared interrupt.");
-			sh->intr_handle.fd = -1;
-		}
-	}
-	if (sh->devx) {
-#ifdef HAVE_IBV_DEVX_ASYNC
-		sh->intr_handle_devx.fd = -1;
-		sh->devx_comp = mlx5_glue->devx_create_cmd_comp(sh->ctx);
-		if (!sh->devx_comp) {
-			DRV_LOG(INFO, "failed to allocate devx_comp.");
-			return;
-		}
-		flags = fcntl(sh->devx_comp->fd, F_GETFL);
-		ret = fcntl(sh->devx_comp->fd, F_SETFL, flags | O_NONBLOCK);
-		if (ret) {
-			DRV_LOG(INFO, "failed to change file descriptor"
-				" devx comp");
-			return;
-		}
-		sh->intr_handle_devx.fd = sh->devx_comp->fd;
-		sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT;
-		if (rte_intr_callback_register(&sh->intr_handle_devx,
-					mlx5_dev_interrupt_handler_devx, sh)) {
-			DRV_LOG(INFO, "Fail to install the devx shared"
-				" interrupt.");
-			sh->intr_handle_devx.fd = -1;
-		}
-#endif /* HAVE_IBV_DEVX_ASYNC */
-	}
-}
-
-/**
- * Uninstall shared asynchronous device events handler.
- * This function is implemented to support event sharing
- * between multiple ports of single IB device.
- *
- * @param dev
- *   Pointer to mlx5_dev_ctx_shared object.
- */
-static void
-mlx5_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh)
-{
-	if (sh->intr_handle.fd >= 0)
-		mlx5_intr_callback_unregister(&sh->intr_handle,
-					      mlx5_dev_interrupt_handler, sh);
-#ifdef HAVE_IBV_DEVX_ASYNC
-	if (sh->intr_handle_devx.fd >= 0)
-		rte_intr_callback_unregister(&sh->intr_handle_devx,
-				  mlx5_dev_interrupt_handler_devx, sh);
-	if (sh->devx_comp)
-		mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp);
-#endif
-}
-
-/**
  * Allocate shared IB device context. If there is multiport device the
  * master and representors will share this context, if there is single
  * port dedicated IB device, the context will be used by only given
@@ -762,17 +598,14 @@ mlx5_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh)
  *   Pointer to mlx5_dev_ctx_shared object on success,
  *   otherwise NULL and rte_errno is set.
  */
-static struct mlx5_dev_ctx_shared *
+struct mlx5_dev_ctx_shared *
 mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
 			const struct mlx5_dev_config *config)
 {
 	struct mlx5_dev_ctx_shared *sh;
-	int dbmap_env;
 	int err = 0;
 	uint32_t i;
-#ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	struct mlx5_devx_tis_attr tis_attr = { 0 };
-#endif
 
 	MLX5_ASSERT(spawn);
 	/* Secondary process should not create the shared context. */
@@ -797,34 +630,9 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
 		rte_errno  = ENOMEM;
 		goto exit;
 	}
-	/*
-	 * Configure environment variable "MLX5_BF_SHUT_UP"
-	 * before the device creation. The rdma_core library
-	 * checks the variable at device creation and
-	 * stores the result internally.
-	 */
-	dbmap_env = mlx5_config_doorbell_mapping_env(config);
-	/* Try to open IB device with DV first, then usual Verbs. */
-	errno = 0;
-	sh->ctx = mlx5_glue->dv_open_device(spawn->ibv_dev);
-	if (sh->ctx) {
-		sh->devx = 1;
-		DRV_LOG(DEBUG, "DevX is supported");
-		/* The device is created, no need for environment. */
-		mlx5_restore_doorbell_mapping_env(dbmap_env);
-	} else {
-		/* The environment variable is still configured. */
-		sh->ctx = mlx5_glue->open_device(spawn->ibv_dev);
-		err = errno ? errno : ENODEV;
-		/*
-		 * The environment variable is not needed anymore,
-		 * all device creation attempts are completed.
-		 */
-		mlx5_restore_doorbell_mapping_env(dbmap_env);
-		if (!sh->ctx)
-			goto error;
-		DRV_LOG(DEBUG, "DevX is NOT supported");
-	}
+	err = mlx5_os_open_device(spawn, config, sh);
+	if (!sh->ctx)
+		goto error;
 	err = mlx5_os_get_dev_attr(sh->ctx, &sh->device_attr);
 	if (err) {
 		DRV_LOG(DEBUG, "mlx5_os_get_dev_attr() failed");
@@ -851,9 +659,8 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
 		err = ENOMEM;
 		goto error;
 	}
-#ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	if (sh->devx) {
-		err = mlx5_get_pdn(sh->pd, &sh->pdn);
+		err = mlx5_os_get_pdn(sh->pd, &sh->pdn);
 		if (err) {
 			DRV_LOG(ERR, "Fail to extract pdn from PD");
 			goto error;
@@ -879,7 +686,6 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
 		err = ENOMEM;
 		goto error;
 	}
-#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
 	/*
 	 * Once the device is added to the list of memory event
 	 * callback, its global MR cache table cannot be expanded
@@ -896,7 +702,7 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
 		err = rte_errno;
 		goto error;
 	}
-	mlx5_dev_shared_handler_install(sh);
+	mlx5_os_dev_shared_handler_install(sh);
 	mlx5_flow_aging_init(sh);
 	mlx5_flow_counters_mng_init(sh);
 	mlx5_flow_ipool_create(sh, config);
@@ -936,7 +742,7 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
  * @param[in] sh
  *   Pointer to mlx5_dev_ctx_shared object to free
  */
-static void
+void
 mlx5_free_shared_ibctx(struct mlx5_dev_ctx_shared *sh)
 {
 	pthread_mutex_lock(&mlx5_ibv_list_mutex);
@@ -973,7 +779,7 @@ mlx5_free_shared_ibctx(struct mlx5_dev_ctx_shared *sh)
 	 **/
 	mlx5_flow_counters_mng_close(sh);
 	mlx5_flow_ipool_destroy(sh);
-	mlx5_dev_shared_handler_uninstall(sh);
+	mlx5_os_dev_shared_handler_uninstall(sh);
 	if (sh->pd)
 		claim_zero(mlx5_glue->dealloc_pd(sh->pd));
 	if (sh->tis)
@@ -995,7 +801,7 @@ mlx5_free_shared_ibctx(struct mlx5_dev_ctx_shared *sh)
  * @param[in] priv
  *   Pointer to the private device data structure.
  */
-static void
+void
 mlx5_free_table_hash_list(struct mlx5_priv *priv)
 {
 	struct mlx5_dev_ctx_shared *sh = priv->sh;
@@ -1052,7 +858,7 @@ mlx5_free_table_hash_list(struct mlx5_priv *priv)
  * @return
  *   Zero on success, positive error code otherwise.
  */
-static int
+int
 mlx5_alloc_table_hash_list(struct mlx5_priv *priv)
 {
 	struct mlx5_dev_ctx_shared *sh = priv->sh;
@@ -1127,173 +933,6 @@ mlx5_alloc_table_hash_list(struct mlx5_priv *priv)
 }
 
 /**
- * Initialize DR related data within private structure.
- * Routine checks the reference counter and does actual
- * resources creation/initialization only if counter is zero.
- *
- * @param[in] priv
- *   Pointer to the private device data structure.
- *
- * @return
- *   Zero on success, positive error code otherwise.
- */
-static int
-mlx5_alloc_shared_dr(struct mlx5_priv *priv)
-{
-	struct mlx5_dev_ctx_shared *sh = priv->sh;
-	char s[MLX5_HLIST_NAMESIZE];
-	int err = 0;
-
-	if (!sh->flow_tbls)
-		err = mlx5_alloc_table_hash_list(priv);
-	else
-		DRV_LOG(DEBUG, "sh->flow_tbls[%p] already created, reuse\n",
-			(void *)sh->flow_tbls);
-	if (err)
-		return err;
-	/* Create tags hash list table. */
-	snprintf(s, sizeof(s), "%s_tags", sh->ibdev_name);
-	sh->tag_table = mlx5_hlist_create(s, MLX5_TAGS_HLIST_ARRAY_SIZE);
-	if (!sh->tag_table) {
-		DRV_LOG(ERR, "tags with hash creation failed.\n");
-		err = ENOMEM;
-		goto error;
-	}
-#ifdef HAVE_MLX5DV_DR
-	void *domain;
-
-	if (sh->dv_refcnt) {
-		/* Shared DV/DR structures is already initialized. */
-		sh->dv_refcnt++;
-		priv->dr_shared = 1;
-		return 0;
-	}
-	/* Reference counter is zero, we should initialize structures. */
-	domain = mlx5_glue->dr_create_domain(sh->ctx,
-					     MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
-	if (!domain) {
-		DRV_LOG(ERR, "ingress mlx5dv_dr_create_domain failed");
-		err = errno;
-		goto error;
-	}
-	sh->rx_domain = domain;
-	domain = mlx5_glue->dr_create_domain(sh->ctx,
-					     MLX5DV_DR_DOMAIN_TYPE_NIC_TX);
-	if (!domain) {
-		DRV_LOG(ERR, "egress mlx5dv_dr_create_domain failed");
-		err = errno;
-		goto error;
-	}
-	pthread_mutex_init(&sh->dv_mutex, NULL);
-	sh->tx_domain = domain;
-#ifdef HAVE_MLX5DV_DR_ESWITCH
-	if (priv->config.dv_esw_en) {
-		domain  = mlx5_glue->dr_create_domain
-			(sh->ctx, MLX5DV_DR_DOMAIN_TYPE_FDB);
-		if (!domain) {
-			DRV_LOG(ERR, "FDB mlx5dv_dr_create_domain failed");
-			err = errno;
-			goto error;
-		}
-		sh->fdb_domain = domain;
-		sh->esw_drop_action = mlx5_glue->dr_create_flow_action_drop();
-	}
-#endif
-	if (priv->config.reclaim_mode == MLX5_RCM_AGGR) {
-		mlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1);
-		mlx5_glue->dr_reclaim_domain_memory(sh->tx_domain, 1);
-		if (sh->fdb_domain)
-			mlx5_glue->dr_reclaim_domain_memory(sh->fdb_domain, 1);
-	}
-	sh->pop_vlan_action = mlx5_glue->dr_create_flow_action_pop_vlan();
-#endif /* HAVE_MLX5DV_DR */
-	sh->dv_refcnt++;
-	priv->dr_shared = 1;
-	return 0;
-error:
-	/* Rollback the created objects. */
-	if (sh->rx_domain) {
-		mlx5_glue->dr_destroy_domain(sh->rx_domain);
-		sh->rx_domain = NULL;
-	}
-	if (sh->tx_domain) {
-		mlx5_glue->dr_destroy_domain(sh->tx_domain);
-		sh->tx_domain = NULL;
-	}
-	if (sh->fdb_domain) {
-		mlx5_glue->dr_destroy_domain(sh->fdb_domain);
-		sh->fdb_domain = NULL;
-	}
-	if (sh->esw_drop_action) {
-		mlx5_glue->destroy_flow_action(sh->esw_drop_action);
-		sh->esw_drop_action = NULL;
-	}
-	if (sh->pop_vlan_action) {
-		mlx5_glue->destroy_flow_action(sh->pop_vlan_action);
-		sh->pop_vlan_action = NULL;
-	}
-	if (sh->tag_table) {
-		/* tags should be destroyed with flow before. */
-		mlx5_hlist_destroy(sh->tag_table, NULL, NULL);
-		sh->tag_table = NULL;
-	}
-	mlx5_free_table_hash_list(priv);
-	return err;
-}
-
-/**
- * Destroy DR related data within private structure.
- *
- * @param[in] priv
- *   Pointer to the private device data structure.
- */
-static void
-mlx5_free_shared_dr(struct mlx5_priv *priv)
-{
-	struct mlx5_dev_ctx_shared *sh;
-
-	if (!priv->dr_shared)
-		return;
-	priv->dr_shared = 0;
-	sh = priv->sh;
-	MLX5_ASSERT(sh);
-#ifdef HAVE_MLX5DV_DR
-	MLX5_ASSERT(sh->dv_refcnt);
-	if (sh->dv_refcnt && --sh->dv_refcnt)
-		return;
-	if (sh->rx_domain) {
-		mlx5_glue->dr_destroy_domain(sh->rx_domain);
-		sh->rx_domain = NULL;
-	}
-	if (sh->tx_domain) {
-		mlx5_glue->dr_destroy_domain(sh->tx_domain);
-		sh->tx_domain = NULL;
-	}
-#ifdef HAVE_MLX5DV_DR_ESWITCH
-	if (sh->fdb_domain) {
-		mlx5_glue->dr_destroy_domain(sh->fdb_domain);
-		sh->fdb_domain = NULL;
-	}
-	if (sh->esw_drop_action) {
-		mlx5_glue->destroy_flow_action(sh->esw_drop_action);
-		sh->esw_drop_action = NULL;
-	}
-#endif
-	if (sh->pop_vlan_action) {
-		mlx5_glue->destroy_flow_action(sh->pop_vlan_action);
-		sh->pop_vlan_action = NULL;
-	}
-	pthread_mutex_destroy(&sh->dv_mutex);
-#endif /* HAVE_MLX5DV_DR */
-	if (sh->tag_table) {
-		/* tags should be destroyed with flow before. */
-		mlx5_hlist_destroy(sh->tag_table, NULL, NULL);
-		sh->tag_table = NULL;
-	}
-	mlx5_free_table_hash_list(priv);
-}
-
-/**
  * Initialize shared data between primary and secondary process.
  *
  * A memzone is reserved by primary process and secondary processes attach to
@@ -1362,60 +1001,6 @@ mlx5_getenv_int(const char *name)
 }
 
 /**
- * Verbs callback to allocate a memory. This function should allocate the space
- * according to the size provided residing inside a huge page.
- * Please note that all allocation must respect the alignment from libmlx5
- * (i.e. currently sysconf(_SC_PAGESIZE)).
- *
- * @param[in] size
- *   The size in bytes of the memory to allocate.
- * @param[in] data
- *   A pointer to the callback data.
- *
- * @return
- *   Allocated buffer, NULL otherwise and rte_errno is set.
- */
-static void *
-mlx5_alloc_verbs_buf(size_t size, void *data)
-{
-	struct mlx5_priv *priv = data;
-	void *ret;
-	size_t alignment = sysconf(_SC_PAGESIZE);
-	unsigned int socket = SOCKET_ID_ANY;
-
-	if (priv->verbs_alloc_ctx.type == MLX5_VERBS_ALLOC_TYPE_TX_QUEUE) {
-		const struct mlx5_txq_ctrl *ctrl = priv->verbs_alloc_ctx.obj;
-
-		socket = ctrl->socket;
-	} else if (priv->verbs_alloc_ctx.type ==
-		   MLX5_VERBS_ALLOC_TYPE_RX_QUEUE) {
-		const struct mlx5_rxq_ctrl *ctrl = priv->verbs_alloc_ctx.obj;
-
-		socket = ctrl->socket;
-	}
-	MLX5_ASSERT(data != NULL);
-	ret = rte_malloc_socket(__func__, size, alignment, socket);
-	if (!ret && size)
-		rte_errno = ENOMEM;
-	return ret;
-}
-
-/**
- * Verbs callback to free a memory.
- *
- * @param[in] ptr
- *   A pointer to the memory to free.
- * @param[in] data
- *   A pointer to the callback data.
- */
-static void
-mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
-{
-	MLX5_ASSERT(data != NULL);
-	rte_free(ptr);
-}
-
-/**
  * DPDK callback to add udp tunnel port
  *
  * @param[in] dev
@@ -1496,7 +1081,7 @@ mlx5_proc_priv_uninit(struct rte_eth_dev *dev)
  * @param dev
  *   Pointer to Ethernet device structure.
  */
-static void
+void
 mlx5_dev_close(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
@@ -1558,7 +1143,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->mreg_cp_tbl)
 		mlx5_hlist_destroy(priv->mreg_cp_tbl, NULL, NULL);
 	mlx5_mprq_free_mp(dev);
-	mlx5_free_shared_dr(priv);
+	mlx5_os_free_shared_dr(priv);
 	if (priv->rss_conf.rss_key != NULL)
 		rte_free(priv->rss_conf.rss_key);
 	if (priv->reta_idx != NULL)
@@ -1696,7 +1281,7 @@ const struct eth_dev_ops mlx5_dev_ops = {
 };
 
 /* Available operations from secondary process. */
-static const struct eth_dev_ops mlx5_dev_sec_ops = {
+const struct eth_dev_ops mlx5_dev_sec_ops = {
 	.stats_get = mlx5_stats_get,
 	.stats_reset = mlx5_stats_reset,
 	.xstats_get = mlx5_xstats_get,
@@ -1904,7 +1489,7 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 {
 	const char **params = (const char *[]){
@@ -1970,8 +1555,6 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 	return 0;
 }
 
-static struct rte_pci_driver mlx5_driver;
-
 /**
  * PMD global initialization.
  *
@@ -1982,7 +1565,7 @@ static struct rte_pci_driver mlx5_driver;
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_init_once(void)
 {
 	struct mlx5_shared_data *sd;
@@ -2042,7 +1625,7 @@ mlx5_init_once(void)
  * @param config
  *   Device configuration parameters.
  */
-static void
+void
 mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn,
 		    struct mlx5_dev_config *config)
 {
@@ -2152,7 +1735,7 @@ mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn,
  * @param [in] dev
  *   Pointer to Ethernet device.
  */
-static void
+void
 mlx5_set_metadata_mask(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
@@ -2340,21 +1923,20 @@ rte_pmd_mlx5_get_dyn_flag_names(char *names[], unsigned int n)
 }
 
 /**
- * Check sibling device configurations.
+ * Comparison callback to sort device data.
  *
- * Sibling devices sharing the Infiniband device context
- * should have compatible configurations. This regards
- * representors and bonding slaves.
+ * This is meant to be used with qsort().
  *
- * @param priv
- *   Private device descriptor.
- * @param config
- *   Configuration of the device is going to be created.
+ * @param a[in]
+ *   Pointer to pointer to first data object.
+ * @param b[in]
+ *   Pointer to pointer to second data object.
  *
  * @return
- *   0 on success, EINVAL otherwise
+ *   0 if both objects are equal, less than 0 if the first argument is less
+ *   than the second, greater than 0 otherwise.
  */
-static int
+int
 mlx5_dev_check_sibling_config(struct mlx5_priv *priv,
 			      struct mlx5_dev_config *config)
 {
@@ -2392,1367 +1974,6 @@ mlx5_dev_check_sibling_config(struct mlx5_priv *priv,
 	}
 	return 0;
 }
-/**
- * Spawn an Ethernet device from Verbs information.
- *
- * @param dpdk_dev
- *   Backing DPDK device.
- * @param spawn
- *   Verbs device parameters (name, port, switch_info) to spawn.
- * @param config
- *   Device configuration parameters.
- *
- * @return
- *   A valid Ethernet device object on success, NULL otherwise and rte_errno
- *   is set. The following errors are defined:
- *
- *   EBUSY: device is not supposed to be spawned.
- *   EEXIST: device is already spawned
- */
-static struct rte_eth_dev *
-mlx5_dev_spawn(struct rte_device *dpdk_dev,
-	       struct mlx5_dev_spawn_data *spawn,
-	       struct mlx5_dev_config config)
-{
-	const struct mlx5_switch_info *switch_info = &spawn->info;
-	struct mlx5_dev_ctx_shared *sh = NULL;
-	struct ibv_port_attr port_attr;
-	struct mlx5dv_context dv_attr = { .comp_mask = 0 };
-	struct rte_eth_dev *eth_dev = NULL;
-	struct mlx5_priv *priv = NULL;
-	int err = 0;
-	unsigned int hw_padding = 0;
-	unsigned int mps;
-	unsigned int cqe_comp;
-	unsigned int cqe_pad = 0;
-	unsigned int tunnel_en = 0;
-	unsigned int mpls_en = 0;
-	unsigned int swp = 0;
-	unsigned int mprq = 0;
-	unsigned int mprq_min_stride_size_n = 0;
-	unsigned int mprq_max_stride_size_n = 0;
-	unsigned int mprq_min_stride_num_n = 0;
-	unsigned int mprq_max_stride_num_n = 0;
-	struct rte_ether_addr mac;
-	char name[RTE_ETH_NAME_MAX_LEN];
-	int own_domain_id = 0;
-	uint16_t port_id;
-	unsigned int i;
-#ifdef HAVE_MLX5DV_DR_DEVX_PORT
-	struct mlx5dv_devx_port devx_port = { .comp_mask = 0 };
-#endif
-
-	/* Determine if this port representor is supposed to be spawned. */
-	if (switch_info->representor && dpdk_dev->devargs) {
-		struct rte_eth_devargs eth_da;
-
-		err = rte_eth_devargs_parse(dpdk_dev->devargs->args, &eth_da);
-		if (err) {
-			rte_errno = -err;
-			DRV_LOG(ERR, "failed to process device arguments: %s",
-				strerror(rte_errno));
-			return NULL;
-		}
-		for (i = 0; i < eth_da.nb_representor_ports; ++i)
-			if (eth_da.representor_ports[i] ==
-			    (uint16_t)switch_info->port_name)
-				break;
-		if (i == eth_da.nb_representor_ports) {
-			rte_errno = EBUSY;
-			return NULL;
-		}
-	}
-	/* Build device name. */
-	if (spawn->pf_bond <  0) {
-		/* Single device. */
-		if (!switch_info->representor)
-			strlcpy(name, dpdk_dev->name, sizeof(name));
-		else
-			snprintf(name, sizeof(name), "%s_representor_%u",
-				 dpdk_dev->name, switch_info->port_name);
-	} else {
-		/* Bonding device. */
-		if (!switch_info->representor)
-			snprintf(name, sizeof(name), "%s_%s",
-				 dpdk_dev->name, spawn->ibv_dev->name);
-		else
-			snprintf(name, sizeof(name), "%s_%s_representor_%u",
-				 dpdk_dev->name, spawn->ibv_dev->name,
-				 switch_info->port_name);
-	}
-	/* check if the device is already spawned */
-	if (rte_eth_dev_get_port_by_name(name, &port_id) == 0) {
-		rte_errno = EEXIST;
-		return NULL;
-	}
-	DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name);
-	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-		struct mlx5_mp_id mp_id;
-
-		eth_dev = rte_eth_dev_attach_secondary(name);
-		if (eth_dev == NULL) {
-			DRV_LOG(ERR, "can not attach rte ethdev");
-			rte_errno = ENOMEM;
-			return NULL;
-		}
-		eth_dev->device = dpdk_dev;
-		eth_dev->dev_ops = &mlx5_dev_sec_ops;
-		err = mlx5_proc_priv_init(eth_dev);
-		if (err)
-			return NULL;
-		mp_id.port_id = eth_dev->data->port_id;
-		strlcpy(mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN);
-		/* Receive command fd from primary process */
-		err = mlx5_mp_req_verbs_cmd_fd(&mp_id);
-		if (err < 0)
-			goto err_secondary;
-		/* Remap UAR for Tx queues. */
-		err = mlx5_tx_uar_init_secondary(eth_dev, err);
-		if (err)
-			goto err_secondary;
-		/*
-		 * Ethdev pointer is still required as input since
-		 * the primary device is not accessible from the
-		 * secondary process.
-		 */
-		eth_dev->rx_pkt_burst = mlx5_select_rx_function(eth_dev);
-		eth_dev->tx_pkt_burst = mlx5_select_tx_function(eth_dev);
-		return eth_dev;
-err_secondary:
-		mlx5_dev_close(eth_dev);
-		return NULL;
-	}
-	/*
-	 * Some parameters ("tx_db_nc" in particularly) are needed in
-	 * advance to create dv/verbs device context. We proceed the
-	 * devargs here to get ones, and later proceed devargs again
-	 * to override some hardware settings.
-	 */
-	err = mlx5_args(&config, dpdk_dev->devargs);
-	if (err) {
-		err = rte_errno;
-		DRV_LOG(ERR, "failed to process device arguments: %s",
-			strerror(rte_errno));
-		goto error;
-	}
-	sh = mlx5_alloc_shared_ibctx(spawn, &config);
-	if (!sh)
-		return NULL;
-	config.devx = sh->devx;
-#ifdef HAVE_MLX5DV_DR_ACTION_DEST_DEVX_TIR
-	config.dest_tir = 1;
-#endif
-#ifdef HAVE_IBV_MLX5_MOD_SWP
-	dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_SWP;
-#endif
-	/*
-	 * Multi-packet send is supported by ConnectX-4 Lx PF as well
-	 * as all ConnectX-5 devices.
-	 */
-#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
-	dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS;
-#endif
-#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
-	dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_STRIDING_RQ;
-#endif
-	mlx5_glue->dv_query_device(sh->ctx, &dv_attr);
-	if (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) {
-		if (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) {
-			DRV_LOG(DEBUG, "enhanced MPW is supported");
-			mps = MLX5_MPW_ENHANCED;
-		} else {
-			DRV_LOG(DEBUG, "MPW is supported");
-			mps = MLX5_MPW;
-		}
-	} else {
-		DRV_LOG(DEBUG, "MPW isn't supported");
-		mps = MLX5_MPW_DISABLED;
-	}
-#ifdef HAVE_IBV_MLX5_MOD_SWP
-	if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_SWP)
-		swp = dv_attr.sw_parsing_caps.sw_parsing_offloads;
-	DRV_LOG(DEBUG, "SWP support: %u", swp);
-#endif
-	config.swp = !!swp;
-#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
-	if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_STRIDING_RQ) {
-		struct mlx5dv_striding_rq_caps mprq_caps =
-			dv_attr.striding_rq_caps;
-
-		DRV_LOG(DEBUG, "\tmin_single_stride_log_num_of_bytes: %d",
-			mprq_caps.min_single_stride_log_num_of_bytes);
-		DRV_LOG(DEBUG, "\tmax_single_stride_log_num_of_bytes: %d",
-			mprq_caps.max_single_stride_log_num_of_bytes);
-		DRV_LOG(DEBUG, "\tmin_single_wqe_log_num_of_strides: %d",
-			mprq_caps.min_single_wqe_log_num_of_strides);
-		DRV_LOG(DEBUG, "\tmax_single_wqe_log_num_of_strides: %d",
-			mprq_caps.max_single_wqe_log_num_of_strides);
-		DRV_LOG(DEBUG, "\tsupported_qpts: %d",
-			mprq_caps.supported_qpts);
-		DRV_LOG(DEBUG, "device supports Multi-Packet RQ");
-		mprq = 1;
-		mprq_min_stride_size_n =
-			mprq_caps.min_single_stride_log_num_of_bytes;
-		mprq_max_stride_size_n =
-			mprq_caps.max_single_stride_log_num_of_bytes;
-		mprq_min_stride_num_n =
-			mprq_caps.min_single_wqe_log_num_of_strides;
-		mprq_max_stride_num_n =
-			mprq_caps.max_single_wqe_log_num_of_strides;
-	}
-#endif
-	if (RTE_CACHE_LINE_SIZE == 128 &&
-	    !(dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP))
-		cqe_comp = 0;
-	else
-		cqe_comp = 1;
-	config.cqe_comp = cqe_comp;
-#ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
-	/* Whether device supports 128B Rx CQE padding. */
-	cqe_pad = RTE_CACHE_LINE_SIZE == 128 &&
-		  (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD);
-#endif
-#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
-	if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) {
-		tunnel_en = ((dv_attr.tunnel_offloads_caps &
-			      MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_VXLAN) &&
-			     (dv_attr.tunnel_offloads_caps &
-			      MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GRE) &&
-			     (dv_attr.tunnel_offloads_caps &
-			      MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GENEVE));
-	}
-	DRV_LOG(DEBUG, "tunnel offloading is %ssupported",
-		tunnel_en ? "" : "not ");
-#else
-	DRV_LOG(WARNING,
-		"tunnel offloading disabled due to old OFED/rdma-core version");
-#endif
-	config.tunnel_en = tunnel_en;
-#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
-	mpls_en = ((dv_attr.tunnel_offloads_caps &
-		    MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_GRE) &&
-		   (dv_attr.tunnel_offloads_caps &
-		    MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_UDP));
-	DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is %ssupported",
-		mpls_en ? "" : "not ");
-#else
-	DRV_LOG(WARNING, "MPLS over GRE/UDP tunnel offloading disabled due to"
-		" old OFED/rdma-core version or firmware configuration");
-#endif
-	config.mpls_en = mpls_en;
-	/* Check port status. */
-	err = mlx5_glue->query_port(sh->ctx, spawn->ibv_port, &port_attr);
-	if (err) {
-		DRV_LOG(ERR, "port query failed: %s", strerror(err));
-		goto error;
-	}
-	if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
-		DRV_LOG(ERR, "port is not configured in Ethernet mode");
-		err = EINVAL;
-		goto error;
-	}
-	if (port_attr.state != IBV_PORT_ACTIVE)
-		DRV_LOG(DEBUG, "port is not active: \"%s\" (%d)",
-			mlx5_glue->port_state_str(port_attr.state),
-			port_attr.state);
-	/* Allocate private eth device data. */
-	priv = rte_zmalloc("ethdev private structure",
-			   sizeof(*priv),
-			   RTE_CACHE_LINE_SIZE);
-	if (priv == NULL) {
-		DRV_LOG(ERR, "priv allocation failure");
-		err = ENOMEM;
-		goto error;
-	}
-	priv->sh = sh;
-	priv->ibv_port = spawn->ibv_port;
-	priv->pci_dev = spawn->pci_dev;
-	priv->mtu = RTE_ETHER_MTU;
-	priv->mp_id.port_id = port_id;
-	strlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN);
-#ifndef RTE_ARCH_64
-	/* Initialize UAR access locks for 32bit implementations. */
-	rte_spinlock_init(&priv->uar_lock_cq);
-	for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++)
-		rte_spinlock_init(&priv->uar_lock[i]);
-#endif
-	/* Some internal functions rely on Netlink sockets, open them now. */
-	priv->nl_socket_rdma = mlx5_nl_init(NETLINK_RDMA);
-	priv->nl_socket_route =	mlx5_nl_init(NETLINK_ROUTE);
-	priv->representor = !!switch_info->representor;
-	priv->master = !!switch_info->master;
-	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
-	priv->vport_meta_tag = 0;
-	priv->vport_meta_mask = 0;
-	priv->pf_bond = spawn->pf_bond;
-#ifdef HAVE_MLX5DV_DR_DEVX_PORT
-	/*
-	 * The DevX port query API is implemented. E-Switch may use
-	 * either vport or reg_c[0] metadata register to match on
-	 * vport index. The engaged part of metadata register is
-	 * defined by mask.
-	 */
-	if (switch_info->representor || switch_info->master) {
-		devx_port.comp_mask = MLX5DV_DEVX_PORT_VPORT |
-				      MLX5DV_DEVX_PORT_MATCH_REG_C_0;
-		err = mlx5_glue->devx_port_query(sh->ctx, spawn->ibv_port,
-						 &devx_port);
-		if (err) {
-			DRV_LOG(WARNING,
-				"can't query devx port %d on device %s",
-				spawn->ibv_port, spawn->ibv_dev->name);
-			devx_port.comp_mask = 0;
-		}
-	}
-	if (devx_port.comp_mask & MLX5DV_DEVX_PORT_MATCH_REG_C_0) {
-		priv->vport_meta_tag = devx_port.reg_c_0.value;
-		priv->vport_meta_mask = devx_port.reg_c_0.mask;
-		if (!priv->vport_meta_mask) {
-			DRV_LOG(ERR, "vport zero mask for port %d"
-				     " on bonding device %s",
-				     spawn->ibv_port, spawn->ibv_dev->name);
-			err = ENOTSUP;
-			goto error;
-		}
-		if (priv->vport_meta_tag & ~priv->vport_meta_mask) {
-			DRV_LOG(ERR, "invalid vport tag for port %d"
-				     " on bonding device %s",
-				     spawn->ibv_port, spawn->ibv_dev->name);
-			err = ENOTSUP;
-			goto error;
-		}
-	}
-	if (devx_port.comp_mask & MLX5DV_DEVX_PORT_VPORT) {
-		priv->vport_id = devx_port.vport_num;
-	} else if (spawn->pf_bond >= 0) {
-		DRV_LOG(ERR, "can't deduce vport index for port %d"
-			     " on bonding device %s",
-			     spawn->ibv_port, spawn->ibv_dev->name);
-		err = ENOTSUP;
-		goto error;
-	} else {
-		/* Suppose vport index in compatible way. */
-		priv->vport_id = switch_info->representor ?
-				 switch_info->port_name + 1 : -1;
-	}
-#else
-	/*
-	 * Kernel/rdma_core support single E-Switch per PF configurations
-	 * only and vport_id field contains the vport index for
-	 * associated VF, which is deduced from representor port name.
-	 * For example, let's have the IB device port 10, it has
-	 * attached network device eth0, which has port name attribute
-	 * pf0vf2, we can deduce the VF number as 2, and set vport index
-	 * as 3 (2+1). This assigning schema should be changed if the
-	 * multiple E-Switch instances per PF configurations or/and PCI
-	 * subfunctions are added.
-	 */
-	priv->vport_id = switch_info->representor ?
-			 switch_info->port_name + 1 : -1;
-#endif
-	/* representor_id field keeps the unmodified VF index. */
-	priv->representor_id = switch_info->representor ?
-			       switch_info->port_name : -1;
-	/*
-	 * Look for sibling devices in order to reuse their switch domain
-	 * if any, otherwise allocate one.
-	 */
-	MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
-		const struct mlx5_priv *opriv =
-			rte_eth_devices[port_id].data->dev_private;
-
-		if (!opriv ||
-		    opriv->sh != priv->sh ||
-			opriv->domain_id ==
-			RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
-			continue;
-		priv->domain_id = opriv->domain_id;
-		break;
-	}
-	if (priv->domain_id == RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) {
-		err = rte_eth_switch_domain_alloc(&priv->domain_id);
-		if (err) {
-			err = rte_errno;
-			DRV_LOG(ERR, "unable to allocate switch domain: %s",
-				strerror(rte_errno));
-			goto error;
-		}
-		own_domain_id = 1;
-	}
-	/* Override some values set by hardware configuration. */
-	mlx5_args(&config, dpdk_dev->devargs);
-	err = mlx5_dev_check_sibling_config(priv, &config);
-	if (err)
-		goto error;
-	config.hw_csum = !!(sh->device_attr.device_cap_flags_ex &
-			    IBV_DEVICE_RAW_IP_CSUM);
-	DRV_LOG(DEBUG, "checksum offloading is %ssupported",
-		(config.hw_csum ? "" : "not "));
-#if !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) && \
-	!defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
-	DRV_LOG(DEBUG, "counters are not supported");
-#endif
-#if !defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_MLX5DV_DR)
-	if (config.dv_flow_en) {
-		DRV_LOG(WARNING, "DV flow is not supported");
-		config.dv_flow_en = 0;
-	}
-#endif
-	config.ind_table_max_size =
-		sh->device_attr.max_rwq_indirection_table_size;
-	/*
-	 * Remove this check once DPDK supports larger/variable
-	 * indirection tables.
-	 */
-	if (config.ind_table_max_size > (unsigned int)ETH_RSS_RETA_SIZE_512)
-		config.ind_table_max_size = ETH_RSS_RETA_SIZE_512;
-	DRV_LOG(DEBUG, "maximum Rx indirection table size is %u",
-		config.ind_table_max_size);
-	config.hw_vlan_strip = !!(sh->device_attr.raw_packet_caps &
-				  IBV_RAW_PACKET_CAP_CVLAN_STRIPPING);
-	DRV_LOG(DEBUG, "VLAN stripping is %ssupported",
-		(config.hw_vlan_strip ? "" : "not "));
-	config.hw_fcs_strip = !!(sh->device_attr.raw_packet_caps &
-				 IBV_RAW_PACKET_CAP_SCATTER_FCS);
-	DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported",
-		(config.hw_fcs_strip ? "" : "not "));
-#if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
-	hw_padding = !!sh->device_attr.rx_pad_end_addr_align;
-#elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
-	hw_padding = !!(sh->device_attr.device_cap_flags_ex &
-			IBV_DEVICE_PCI_WRITE_END_PADDING);
-#endif
-	if (config.hw_padding && !hw_padding) {
-		DRV_LOG(DEBUG, "Rx end alignment padding isn't supported");
-		config.hw_padding = 0;
-	} else if (config.hw_padding) {
-		DRV_LOG(DEBUG, "Rx end alignment padding is enabled");
-	}
-	config.tso = (sh->device_attr.max_tso > 0 &&
-		      (sh->device_attr.tso_supported_qpts &
-		       (1 << IBV_QPT_RAW_PACKET)));
-	if (config.tso)
-		config.tso_max_payload_sz = sh->device_attr.max_tso;
-	/*
-	 * MPW is disabled by default, while the Enhanced MPW is enabled
-	 * by default.
-	 */
-	if (config.mps == MLX5_ARG_UNSET)
-		config.mps = (mps == MLX5_MPW_ENHANCED) ? MLX5_MPW_ENHANCED :
-							  MLX5_MPW_DISABLED;
-	else
-		config.mps = config.mps ? mps : MLX5_MPW_DISABLED;
-	DRV_LOG(INFO, "%sMPS is %s",
-		config.mps == MLX5_MPW_ENHANCED ? "enhanced " :
-		config.mps == MLX5_MPW ? "legacy " : "",
-		config.mps != MLX5_MPW_DISABLED ? "enabled" : "disabled");
-	if (config.cqe_comp && !cqe_comp) {
-		DRV_LOG(WARNING, "Rx CQE compression isn't supported");
-		config.cqe_comp = 0;
-	}
-	if (config.cqe_pad && !cqe_pad) {
-		DRV_LOG(WARNING, "Rx CQE padding isn't supported");
-		config.cqe_pad = 0;
-	} else if (config.cqe_pad) {
-		DRV_LOG(INFO, "Rx CQE padding is enabled");
-	}
-	if (config.devx) {
-		priv->counter_fallback = 0;
-		err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config.hca_attr);
-		if (err) {
-			err = -err;
-			goto error;
-		}
-		if (!config.hca_attr.flow_counters_dump)
-			priv->counter_fallback = 1;
-#ifndef HAVE_IBV_DEVX_ASYNC
-		priv->counter_fallback = 1;
-#endif
-		if (priv->counter_fallback)
-			DRV_LOG(INFO, "Use fall-back DV counter management");
-		/* Check for LRO support. */
-		if (config.dest_tir && config.hca_attr.lro_cap &&
-		    config.dv_flow_en) {
-			/* TBD check tunnel lro caps. */
-			config.lro.supported = config.hca_attr.lro_cap;
-			DRV_LOG(DEBUG, "Device supports LRO");
-			/*
-			 * If LRO timeout is not configured by application,
-			 * use the minimal supported value.
-			 */
-			if (!config.lro.timeout)
-				config.lro.timeout =
-				config.hca_attr.lro_timer_supported_periods[0];
-			DRV_LOG(DEBUG, "LRO session timeout set to %d usec",
-				config.lro.timeout);
-		}
-#if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_METER)
-		if (config.hca_attr.qos.sup && config.hca_attr.qos.srtcm_sup &&
-		    config.dv_flow_en) {
-			uint8_t reg_c_mask =
-				config.hca_attr.qos.flow_meter_reg_c_ids;
-			/*
-			 * Meter needs two REG_C's for color match and pre-sfx
-			 * flow match. Here get the REG_C for color match.
-			 * REG_C_0 and REG_C_1 is reserved for metadata feature.
-			 */
-			reg_c_mask &= 0xfc;
-			if (__builtin_popcount(reg_c_mask) < 1) {
-				priv->mtr_en = 0;
-				DRV_LOG(WARNING, "No available register for"
-					" meter.");
-			} else {
-				priv->mtr_color_reg = ffs(reg_c_mask) - 1 +
-						      REG_C_0;
-				priv->mtr_en = 1;
-				priv->mtr_reg_share =
-				      config.hca_attr.qos.flow_meter_reg_share;
-				DRV_LOG(DEBUG, "The REG_C meter uses is %d",
-					priv->mtr_color_reg);
-			}
-		}
-#endif
-	}
-	if (config.mprq.enabled && mprq) {
-		if (config.mprq.stride_num_n &&
-		    (config.mprq.stride_num_n > mprq_max_stride_num_n ||
-		     config.mprq.stride_num_n < mprq_min_stride_num_n)) {
-			config.mprq.stride_num_n =
-				RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_NUM_N,
-						mprq_min_stride_num_n),
-					mprq_max_stride_num_n);
-			DRV_LOG(WARNING,
-				"the number of strides"
-				" for Multi-Packet RQ is out of range,"
-				" setting default value (%u)",
-				1 << config.mprq.stride_num_n);
-		}
-		if (config.mprq.stride_size_n &&
-		    (config.mprq.stride_size_n > mprq_max_stride_size_n ||
-		     config.mprq.stride_size_n < mprq_min_stride_size_n)) {
-			config.mprq.stride_size_n =
-				RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_SIZE_N,
-						mprq_min_stride_size_n),
-					mprq_max_stride_size_n);
-			DRV_LOG(WARNING,
-				"the size of a stride"
-				" for Multi-Packet RQ is out of range,"
-				" setting default value (%u)",
-				1 << config.mprq.stride_size_n);
-		}
-		config.mprq.min_stride_size_n = mprq_min_stride_size_n;
-		config.mprq.max_stride_size_n = mprq_max_stride_size_n;
-	} else if (config.mprq.enabled && !mprq) {
-		DRV_LOG(WARNING, "Multi-Packet RQ isn't supported");
-		config.mprq.enabled = 0;
-	}
-	if (config.max_dump_files_num == 0)
-		config.max_dump_files_num = 128;
-	eth_dev = rte_eth_dev_allocate(name);
-	if (eth_dev == NULL) {
-		DRV_LOG(ERR, "can not allocate rte ethdev");
-		err = ENOMEM;
-		goto error;
-	}
-	/* Flag to call rte_eth_dev_release_port() in rte_eth_dev_close(). */
-	eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
-	if (priv->representor) {
-		eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR;
-		eth_dev->data->representor_id = priv->representor_id;
-	}
-	/*
-	 * Store associated network device interface index. This index
-	 * is permanent throughout the lifetime of device. So, we may store
-	 * the ifindex here and use the cached value further.
-	 */
-	MLX5_ASSERT(spawn->ifindex);
-	priv->if_index = spawn->ifindex;
-	eth_dev->data->dev_private = priv;
-	priv->dev_data = eth_dev->data;
-	eth_dev->data->mac_addrs = priv->mac;
-	eth_dev->device = dpdk_dev;
-	/* Configure the first MAC address by default. */
-	if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) {
-		DRV_LOG(ERR,
-			"port %u cannot get MAC address, is mlx5_en"
-			" loaded? (errno: %s)",
-			eth_dev->data->port_id, strerror(rte_errno));
-		err = ENODEV;
-		goto error;
-	}
-	DRV_LOG(INFO,
-		"port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
-		eth_dev->data->port_id,
-		mac.addr_bytes[0], mac.addr_bytes[1],
-		mac.addr_bytes[2], mac.addr_bytes[3],
-		mac.addr_bytes[4], mac.addr_bytes[5]);
-#ifdef RTE_LIBRTE_MLX5_DEBUG
-	{
-		char ifname[IF_NAMESIZE];
-
-		if (mlx5_get_ifname(eth_dev, &ifname) == 0)
-			DRV_LOG(DEBUG, "port %u ifname is \"%s\"",
-				eth_dev->data->port_id, ifname);
-		else
-			DRV_LOG(DEBUG, "port %u ifname is unknown",
-				eth_dev->data->port_id);
-	}
-#endif
-	/* Get actual MTU if possible. */
-	err = mlx5_get_mtu(eth_dev, &priv->mtu);
-	if (err) {
-		err = rte_errno;
-		goto error;
-	}
-	DRV_LOG(DEBUG, "port %u MTU is %u", eth_dev->data->port_id,
-		priv->mtu);
-	/* Initialize burst functions to prevent crashes before link-up. */
-	eth_dev->rx_pkt_burst = removed_rx_burst;
-	eth_dev->tx_pkt_burst = removed_tx_burst;
-	eth_dev->dev_ops = &mlx5_dev_ops;
-	/* Register MAC address. */
-	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
-	if (config.vf && config.vf_nl_en)
-		mlx5_nl_mac_addr_sync(priv->nl_socket_route,
-				      mlx5_ifindex(eth_dev),
-				      eth_dev->data->mac_addrs,
-				      MLX5_MAX_MAC_ADDRESSES);
-	priv->flows = 0;
-	priv->ctrl_flows = 0;
-	TAILQ_INIT(&priv->flow_meters);
-	TAILQ_INIT(&priv->flow_meter_profiles);
-	/* Hint libmlx5 to use PMD allocator for data plane resources */
-	struct mlx5dv_ctx_allocators alctr = {
-		.alloc = &mlx5_alloc_verbs_buf,
-		.free = &mlx5_free_verbs_buf,
-		.data = priv,
-	};
-	mlx5_glue->dv_set_context_attr(sh->ctx,
-				       MLX5DV_CTX_ATTR_BUF_ALLOCATORS,
-				       (void *)((uintptr_t)&alctr));
-	/* Bring Ethernet device up. */
-	DRV_LOG(DEBUG, "port %u forcing Ethernet interface up",
-		eth_dev->data->port_id);
-	mlx5_set_link_up(eth_dev);
-	/*
-	 * Even though the interrupt handler is not installed yet,
-	 * interrupts will still trigger on the async_fd from
-	 * Verbs context returned by ibv_open_device().
-	 */
-	mlx5_link_update(eth_dev, 0);
-#ifdef HAVE_MLX5DV_DR_ESWITCH
-	if (!(config.hca_attr.eswitch_manager && config.dv_flow_en &&
-	      (switch_info->representor || switch_info->master)))
-		config.dv_esw_en = 0;
-#else
-	config.dv_esw_en = 0;
-#endif
-	/* Detect minimal data bytes to inline. */
-	mlx5_set_min_inline(spawn, &config);
-	/* Store device configuration on private structure. */
-	priv->config = config;
-	/* Create context for virtual machine VLAN workaround. */
-	priv->vmwa_context = mlx5_vlan_vmwa_init(eth_dev, spawn->ifindex);
-	if (config.dv_flow_en) {
-		err = mlx5_alloc_shared_dr(priv);
-		if (err)
-			goto error;
-		/*
-		 * RSS id is shared with meter flow id. Meter flow id can only
-		 * use the 24 MSB of the register.
-		 */
-		priv->qrss_id_pool = mlx5_flow_id_pool_alloc(UINT32_MAX >>
-				     MLX5_MTR_COLOR_BITS);
-		if (!priv->qrss_id_pool) {
-			DRV_LOG(ERR, "can't create flow id pool");
-			err = ENOMEM;
-			goto error;
-		}
-	}
-	/* Supported Verbs flow priority number detection. */
-	err = mlx5_flow_discover_priorities(eth_dev);
-	if (err < 0) {
-		err = -err;
-		goto error;
-	}
-	priv->config.flow_prio = err;
-	if (!priv->config.dv_esw_en &&
-	    priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) {
-		DRV_LOG(WARNING, "metadata mode %u is not supported "
-				 "(no E-Switch)", priv->config.dv_xmeta_en);
-		priv->config.dv_xmeta_en = MLX5_XMETA_MODE_LEGACY;
-	}
-	mlx5_set_metadata_mask(eth_dev);
-	if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
-	    !priv->sh->dv_regc0_mask) {
-		DRV_LOG(ERR, "metadata mode %u is not supported "
-			     "(no metadata reg_c[0] is available)",
-			     priv->config.dv_xmeta_en);
-			err = ENOTSUP;
-			goto error;
-	}
-	/*
-	 * Allocate the buffer for flow creating, just once.
-	 * The allocation must be done before any flow creating.
-	 */
-	mlx5_flow_alloc_intermediate(eth_dev);
-	/* Query availibility of metadata reg_c's. */
-	err = mlx5_flow_discover_mreg_c(eth_dev);
-	if (err < 0) {
-		err = -err;
-		goto error;
-	}
-	if (!mlx5_flow_ext_mreg_supported(eth_dev)) {
-		DRV_LOG(DEBUG,
-			"port %u extensive metadata register is not supported",
-			eth_dev->data->port_id);
-		if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) {
-			DRV_LOG(ERR, "metadata mode %u is not supported "
-				     "(no metadata registers available)",
-				     priv->config.dv_xmeta_en);
-			err = ENOTSUP;
-			goto error;
-		}
-	}
-	if (priv->config.dv_flow_en &&
-	    priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
-	    mlx5_flow_ext_mreg_supported(eth_dev) &&
-	    priv->sh->dv_regc0_mask) {
-		priv->mreg_cp_tbl = mlx5_hlist_create(MLX5_FLOW_MREG_HNAME,
-						      MLX5_FLOW_MREG_HTABLE_SZ);
-		if (!priv->mreg_cp_tbl) {
-			err = ENOMEM;
-			goto error;
-		}
-	}
-	return eth_dev;
-error:
-	if (priv) {
-		if (priv->mreg_cp_tbl)
-			mlx5_hlist_destroy(priv->mreg_cp_tbl, NULL, NULL);
-		if (priv->sh)
-			mlx5_free_shared_dr(priv);
-		if (priv->nl_socket_route >= 0)
-			close(priv->nl_socket_route);
-		if (priv->nl_socket_rdma >= 0)
-			close(priv->nl_socket_rdma);
-		if (priv->vmwa_context)
-			mlx5_vlan_vmwa_exit(priv->vmwa_context);
-		if (priv->qrss_id_pool)
-			mlx5_flow_id_pool_release(priv->qrss_id_pool);
-		if (own_domain_id)
-			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
-		rte_free(priv);
-		if (eth_dev != NULL)
-			eth_dev->data->dev_private = NULL;
-	}
-	if (eth_dev != NULL) {
-		/* mac_addrs must not be freed alone because part of dev_private */
-		eth_dev->data->mac_addrs = NULL;
-		rte_eth_dev_release_port(eth_dev);
-	}
-	if (sh)
-		mlx5_free_shared_ibctx(sh);
-	MLX5_ASSERT(err > 0);
-	rte_errno = err;
-	return NULL;
-}
-
-/**
- * Comparison callback to sort device data.
- *
- * This is meant to be used with qsort().
- *
- * @param a[in]
- *   Pointer to pointer to first data object.
- * @param b[in]
- *   Pointer to pointer to second data object.
- *
- * @return
- *   0 if both objects are equal, less than 0 if the first argument is less
- *   than the second, greater than 0 otherwise.
- */
-static int
-mlx5_dev_spawn_data_cmp(const void *a, const void *b)
-{
-	const struct mlx5_switch_info *si_a =
-		&((const struct mlx5_dev_spawn_data *)a)->info;
-	const struct mlx5_switch_info *si_b =
-		&((const struct mlx5_dev_spawn_data *)b)->info;
-	int ret;
-
-	/* Master device first. */
-	ret = si_b->master - si_a->master;
-	if (ret)
-		return ret;
-	/* Then representor devices. */
-	ret = si_b->representor - si_a->representor;
-	if (ret)
-		return ret;
-	/* Unidentified devices come last in no specific order. */
-	if (!si_a->representor)
-		return 0;
-	/* Order representors by name. */
-	return si_a->port_name - si_b->port_name;
-}
-
-/**
- * Match PCI information for possible slaves of bonding device.
- *
- * @param[in] ibv_dev
- *   Pointer to Infiniband device structure.
- * @param[in] pci_dev
- *   Pointer to PCI device structure to match PCI address.
- * @param[in] nl_rdma
- *   Netlink RDMA group socket handle.
- *
- * @return
- *   negative value if no bonding device found, otherwise
- *   positive index of slave PF in bonding.
- */
-static int
-mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev,
-			   const struct rte_pci_device *pci_dev,
-			   int nl_rdma)
-{
-	char ifname[IF_NAMESIZE + 1];
-	unsigned int ifindex;
-	unsigned int np, i;
-	FILE *file = NULL;
-	int pf = -1;
-
-	/*
-	 * Try to get master device name. If something goes
-	 * wrong suppose the lack of kernel support and no
-	 * bonding devices.
-	 */
-	if (nl_rdma < 0)
-		return -1;
-	if (!strstr(ibv_dev->name, "bond"))
-		return -1;
-	np = mlx5_nl_portnum(nl_rdma, ibv_dev->name);
-	if (!np)
-		return -1;
-	/*
-	 * The Master device might not be on the predefined
-	 * port (not on port index 1, it is not garanted),
-	 * we have to scan all Infiniband device port and
-	 * find master.
-	 */
-	for (i = 1; i <= np; ++i) {
-		/* Check whether Infiniband port is populated. */
-		ifindex = mlx5_nl_ifindex(nl_rdma, ibv_dev->name, i);
-		if (!ifindex)
-			continue;
-		if (!if_indextoname(ifindex, ifname))
-			continue;
-		/* Try to read bonding slave names from sysfs. */
-		MKSTR(slaves,
-		      "/sys/class/net/%s/master/bonding/slaves", ifname);
-		file = fopen(slaves, "r");
-		if (file)
-			break;
-	}
-	if (!file)
-		return -1;
-	/* Use safe format to check maximal buffer length. */
-	MLX5_ASSERT(atol(RTE_STR(IF_NAMESIZE)) == IF_NAMESIZE);
-	while (fscanf(file, "%" RTE_STR(IF_NAMESIZE) "s", ifname) == 1) {
-		char tmp_str[IF_NAMESIZE + 32];
-		struct rte_pci_addr pci_addr;
-		struct mlx5_switch_info	info;
-
-		/* Process slave interface names in the loop. */
-		snprintf(tmp_str, sizeof(tmp_str),
-			 "/sys/class/net/%s", ifname);
-		if (mlx5_dev_to_pci_addr(tmp_str, &pci_addr)) {
-			DRV_LOG(WARNING, "can not get PCI address"
-					 " for netdev \"%s\"", ifname);
-			continue;
-		}
-		if (pci_dev->addr.domain != pci_addr.domain ||
-		    pci_dev->addr.bus != pci_addr.bus ||
-		    pci_dev->addr.devid != pci_addr.devid ||
-		    pci_dev->addr.function != pci_addr.function)
-			continue;
-		/* Slave interface PCI address match found. */
-		fclose(file);
-		snprintf(tmp_str, sizeof(tmp_str),
-			 "/sys/class/net/%s/phys_port_name", ifname);
-		file = fopen(tmp_str, "rb");
-		if (!file)
-			break;
-		info.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET;
-		if (fscanf(file, "%32s", tmp_str) == 1)
-			mlx5_translate_port_name(tmp_str, &info);
-		if (info.name_type == MLX5_PHYS_PORT_NAME_TYPE_LEGACY ||
-		    info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK)
-			pf = info.port_name;
-		break;
-	}
-	if (file)
-		fclose(file);
-	return pf;
-}
-
-/**
- * DPDK callback to register a PCI device.
- *
- * This function spawns Ethernet devices out of a given PCI device.
- *
- * @param[in] pci_drv
- *   PCI driver structure (mlx5_driver).
- * @param[in] pci_dev
- *   PCI device information.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
-	       struct rte_pci_device *pci_dev)
-{
-	struct ibv_device **ibv_list;
-	/*
-	 * Number of found IB Devices matching with requested PCI BDF.
-	 * nd != 1 means there are multiple IB devices over the same
-	 * PCI device and we have representors and master.
-	 */
-	unsigned int nd = 0;
-	/*
-	 * Number of found IB device Ports. nd = 1 and np = 1..n means
-	 * we have the single multiport IB device, and there may be
-	 * representors attached to some of found ports.
-	 */
-	unsigned int np = 0;
-	/*
-	 * Number of DPDK ethernet devices to Spawn - either over
-	 * multiple IB devices or multiple ports of single IB device.
-	 * Actually this is the number of iterations to spawn.
-	 */
-	unsigned int ns = 0;
-	/*
-	 * Bonding device
-	 *   < 0 - no bonding device (single one)
-	 *  >= 0 - bonding device (value is slave PF index)
-	 */
-	int bd = -1;
-	struct mlx5_dev_spawn_data *list = NULL;
-	struct mlx5_dev_config dev_config;
-	int ret;
-
-	if (mlx5_class_get(pci_dev->device.devargs) != MLX5_CLASS_NET) {
-		DRV_LOG(DEBUG, "Skip probing - should be probed by other mlx5"
-			" driver.");
-		return 1;
-	}
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		mlx5_pmd_socket_init();
-	ret = mlx5_init_once();
-	if (ret) {
-		DRV_LOG(ERR, "unable to init PMD global data: %s",
-			strerror(rte_errno));
-		return -rte_errno;
-	}
-	MLX5_ASSERT(pci_drv == &mlx5_driver);
-	errno = 0;
-	ibv_list = mlx5_glue->get_device_list(&ret);
-	if (!ibv_list) {
-		rte_errno = errno ? errno : ENOSYS;
-		DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?");
-		return -rte_errno;
-	}
-	/*
-	 * First scan the list of all Infiniband devices to find
-	 * matching ones, gathering into the list.
-	 */
-	struct ibv_device *ibv_match[ret + 1];
-	int nl_route = mlx5_nl_init(NETLINK_ROUTE);
-	int nl_rdma = mlx5_nl_init(NETLINK_RDMA);
-	unsigned int i;
-
-	while (ret-- > 0) {
-		struct rte_pci_addr pci_addr;
-
-		DRV_LOG(DEBUG, "checking device \"%s\"", ibv_list[ret]->name);
-		bd = mlx5_device_bond_pci_match
-				(ibv_list[ret], pci_dev, nl_rdma);
-		if (bd >= 0) {
-			/*
-			 * Bonding device detected. Only one match is allowed,
-			 * the bonding is supported over multi-port IB device,
-			 * there should be no matches on representor PCI
-			 * functions or non VF LAG bonding devices with
-			 * specified address.
-			 */
-			if (nd) {
-				DRV_LOG(ERR,
-					"multiple PCI match on bonding device"
-					"\"%s\" found", ibv_list[ret]->name);
-				rte_errno = ENOENT;
-				ret = -rte_errno;
-				goto exit;
-			}
-			DRV_LOG(INFO, "PCI information matches for"
-				      " slave %d bonding device \"%s\"",
-				      bd, ibv_list[ret]->name);
-			ibv_match[nd++] = ibv_list[ret];
-			break;
-		}
-		if (mlx5_dev_to_pci_addr
-			(ibv_list[ret]->ibdev_path, &pci_addr))
-			continue;
-		if (pci_dev->addr.domain != pci_addr.domain ||
-		    pci_dev->addr.bus != pci_addr.bus ||
-		    pci_dev->addr.devid != pci_addr.devid ||
-		    pci_dev->addr.function != pci_addr.function)
-			continue;
-		DRV_LOG(INFO, "PCI information matches for device \"%s\"",
-			ibv_list[ret]->name);
-		ibv_match[nd++] = ibv_list[ret];
-	}
-	ibv_match[nd] = NULL;
-	if (!nd) {
-		/* No device matches, just complain and bail out. */
-		DRV_LOG(WARNING,
-			"no Verbs device matches PCI device " PCI_PRI_FMT ","
-			" are kernel drivers loaded?",
-			pci_dev->addr.domain, pci_dev->addr.bus,
-			pci_dev->addr.devid, pci_dev->addr.function);
-		rte_errno = ENOENT;
-		ret = -rte_errno;
-		goto exit;
-	}
-	if (nd == 1) {
-		/*
-		 * Found single matching device may have multiple ports.
-		 * Each port may be representor, we have to check the port
-		 * number and check the representors existence.
-		 */
-		if (nl_rdma >= 0)
-			np = mlx5_nl_portnum(nl_rdma, ibv_match[0]->name);
-		if (!np)
-			DRV_LOG(WARNING, "can not get IB device \"%s\""
-					 " ports number", ibv_match[0]->name);
-		if (bd >= 0 && !np) {
-			DRV_LOG(ERR, "can not get ports"
-				     " for bonding device");
-			rte_errno = ENOENT;
-			ret = -rte_errno;
-			goto exit;
-		}
-	}
-#ifndef HAVE_MLX5DV_DR_DEVX_PORT
-	if (bd >= 0) {
-		/*
-		 * This may happen if there is VF LAG kernel support and
-		 * application is compiled with older rdma_core library.
-		 */
-		DRV_LOG(ERR,
-			"No kernel/verbs support for VF LAG bonding found.");
-		rte_errno = ENOTSUP;
-		ret = -rte_errno;
-		goto exit;
-	}
-#endif
-	/*
-	 * Now we can determine the maximal
-	 * amount of devices to be spawned.
-	 */
-	list = rte_zmalloc("device spawn data",
-			 sizeof(struct mlx5_dev_spawn_data) *
-			 (np ? np : nd),
-			 RTE_CACHE_LINE_SIZE);
-	if (!list) {
-		DRV_LOG(ERR, "spawn data array allocation failure");
-		rte_errno = ENOMEM;
-		ret = -rte_errno;
-		goto exit;
-	}
-	if (bd >= 0 || np > 1) {
-		/*
-		 * Single IB device with multiple ports found,
-		 * it may be E-Switch master device and representors.
-		 * We have to perform identification through the ports.
-		 */
-		MLX5_ASSERT(nl_rdma >= 0);
-		MLX5_ASSERT(ns == 0);
-		MLX5_ASSERT(nd == 1);
-		MLX5_ASSERT(np);
-		for (i = 1; i <= np; ++i) {
-			list[ns].max_port = np;
-			list[ns].ibv_port = i;
-			list[ns].ibv_dev = ibv_match[0];
-			list[ns].eth_dev = NULL;
-			list[ns].pci_dev = pci_dev;
-			list[ns].pf_bond = bd;
-			list[ns].ifindex = mlx5_nl_ifindex
-					(nl_rdma, list[ns].ibv_dev->name, i);
-			if (!list[ns].ifindex) {
-				/*
-				 * No network interface index found for the
-				 * specified port, it means there is no
-				 * representor on this port. It's OK,
-				 * there can be disabled ports, for example
-				 * if sriov_numvfs < sriov_totalvfs.
-				 */
-				continue;
-			}
-			ret = -1;
-			if (nl_route >= 0)
-				ret = mlx5_nl_switch_info
-					       (nl_route,
-						list[ns].ifindex,
-						&list[ns].info);
-			if (ret || (!list[ns].info.representor &&
-				    !list[ns].info.master)) {
-				/*
-				 * We failed to recognize representors with
-				 * Netlink, let's try to perform the task
-				 * with sysfs.
-				 */
-				ret =  mlx5_sysfs_switch_info
-						(list[ns].ifindex,
-						 &list[ns].info);
-			}
-			if (!ret && bd >= 0) {
-				switch (list[ns].info.name_type) {
-				case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
-					if (list[ns].info.port_name == bd)
-						ns++;
-					break;
-				case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
-					if (list[ns].info.pf_num == bd)
-						ns++;
-					break;
-				default:
-					break;
-				}
-				continue;
-			}
-			if (!ret && (list[ns].info.representor ^
-				     list[ns].info.master))
-				ns++;
-		}
-		if (!ns) {
-			DRV_LOG(ERR,
-				"unable to recognize master/representors"
-				" on the IB device with multiple ports");
-			rte_errno = ENOENT;
-			ret = -rte_errno;
-			goto exit;
-		}
-	} else {
-		/*
-		 * The existence of several matching entries (nd > 1) means
-		 * port representors have been instantiated. No existing Verbs
-		 * call nor sysfs entries can tell them apart, this can only
-		 * be done through Netlink calls assuming kernel drivers are
-		 * recent enough to support them.
-		 *
-		 * In the event of identification failure through Netlink,
-		 * try again through sysfs, then:
-		 *
-		 * 1. A single IB device matches (nd == 1) with single
-		 *    port (np=0/1) and is not a representor, assume
-		 *    no switch support.
-		 *
-		 * 2. Otherwise no safe assumptions can be made;
-		 *    complain louder and bail out.
-		 */
-		np = 1;
-		for (i = 0; i != nd; ++i) {
-			memset(&list[ns].info, 0, sizeof(list[ns].info));
-			list[ns].max_port = 1;
-			list[ns].ibv_port = 1;
-			list[ns].ibv_dev = ibv_match[i];
-			list[ns].eth_dev = NULL;
-			list[ns].pci_dev = pci_dev;
-			list[ns].pf_bond = -1;
-			list[ns].ifindex = 0;
-			if (nl_rdma >= 0)
-				list[ns].ifindex = mlx5_nl_ifindex
-					(nl_rdma, list[ns].ibv_dev->name, 1);
-			if (!list[ns].ifindex) {
-				char ifname[IF_NAMESIZE];
-
-				/*
-				 * Netlink failed, it may happen with old
-				 * ib_core kernel driver (before 4.16).
-				 * We can assume there is old driver because
-				 * here we are processing single ports IB
-				 * devices. Let's try sysfs to retrieve
-				 * the ifindex. The method works for
-				 * master device only.
-				 */
-				if (nd > 1) {
-					/*
-					 * Multiple devices found, assume
-					 * representors, can not distinguish
-					 * master/representor and retrieve
-					 * ifindex via sysfs.
-					 */
-					continue;
-				}
-				ret = mlx5_get_master_ifname
-					(ibv_match[i]->ibdev_path, &ifname);
-				if (!ret)
-					list[ns].ifindex =
-						if_nametoindex(ifname);
-				if (!list[ns].ifindex) {
-					/*
-					 * No network interface index found
-					 * for the specified device, it means
-					 * there it is neither representor
-					 * nor master.
-					 */
-					continue;
-				}
-			}
-			ret = -1;
-			if (nl_route >= 0)
-				ret = mlx5_nl_switch_info
-					       (nl_route,
-						list[ns].ifindex,
-						&list[ns].info);
-			if (ret || (!list[ns].info.representor &&
-				    !list[ns].info.master)) {
-				/*
-				 * We failed to recognize representors with
-				 * Netlink, let's try to perform the task
-				 * with sysfs.
-				 */
-				ret =  mlx5_sysfs_switch_info
-						(list[ns].ifindex,
-						 &list[ns].info);
-			}
-			if (!ret && (list[ns].info.representor ^
-				     list[ns].info.master)) {
-				ns++;
-			} else if ((nd == 1) &&
-				   !list[ns].info.representor &&
-				   !list[ns].info.master) {
-				/*
-				 * Single IB device with
-				 * one physical port and
-				 * attached network device.
-				 * May be SRIOV is not enabled
-				 * or there is no representors.
-				 */
-				DRV_LOG(INFO, "no E-Switch support detected");
-				ns++;
-				break;
-			}
-		}
-		if (!ns) {
-			DRV_LOG(ERR,
-				"unable to recognize master/representors"
-				" on the multiple IB devices");
-			rte_errno = ENOENT;
-			ret = -rte_errno;
-			goto exit;
-		}
-	}
-	MLX5_ASSERT(ns);
-	/*
-	 * Sort list to probe devices in natural order for users convenience
-	 * (i.e. master first, then representors from lowest to highest ID).
-	 */
-	qsort(list, ns, sizeof(*list), mlx5_dev_spawn_data_cmp);
-	/* Default configuration. */
-	dev_config = (struct mlx5_dev_config){
-		.hw_padding = 0,
-		.mps = MLX5_ARG_UNSET,
-		.dbnc = MLX5_ARG_UNSET,
-		.rx_vec_en = 1,
-		.txq_inline_max = MLX5_ARG_UNSET,
-		.txq_inline_min = MLX5_ARG_UNSET,
-		.txq_inline_mpw = MLX5_ARG_UNSET,
-		.txqs_inline = MLX5_ARG_UNSET,
-		.vf_nl_en = 1,
-		.mr_ext_memseg_en = 1,
-		.mprq = {
-			.enabled = 0, /* Disabled by default. */
-			.stride_num_n = 0,
-			.stride_size_n = 0,
-			.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN,
-			.min_rxqs_num = MLX5_MPRQ_MIN_RXQS,
-		},
-		.dv_esw_en = 1,
-		.dv_flow_en = 1,
-		.log_hp_size = MLX5_ARG_UNSET,
-	};
-	/* Device specific configuration. */
-	switch (pci_dev->id.device_id) {
-	case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:
-	case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF:
-	case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:
-	case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF:
-	case PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF:
-	case PCI_DEVICE_ID_MELLANOX_CONNECTX6VF:
-	case PCI_DEVICE_ID_MELLANOX_CONNECTX6DXVF:
-		dev_config.vf = 1;
-		break;
-	default:
-		break;
-	}
-	for (i = 0; i != ns; ++i) {
-		uint32_t restore;
-
-		list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
-						 &list[i],
-						 dev_config);
-		if (!list[i].eth_dev) {
-			if (rte_errno != EBUSY && rte_errno != EEXIST)
-				break;
-			/* Device is disabled or already spawned. Ignore it. */
-			continue;
-		}
-		restore = list[i].eth_dev->data->dev_flags;
-		rte_eth_copy_pci_info(list[i].eth_dev, pci_dev);
-		/* Restore non-PCI flags cleared by the above call. */
-		list[i].eth_dev->data->dev_flags |= restore;
-		rte_eth_dev_probing_finish(list[i].eth_dev);
-	}
-	if (i != ns) {
-		DRV_LOG(ERR,
-			"probe of PCI device " PCI_PRI_FMT " aborted after"
-			" encountering an error: %s",
-			pci_dev->addr.domain, pci_dev->addr.bus,
-			pci_dev->addr.devid, pci_dev->addr.function,
-			strerror(rte_errno));
-		ret = -rte_errno;
-		/* Roll back. */
-		while (i--) {
-			if (!list[i].eth_dev)
-				continue;
-			mlx5_dev_close(list[i].eth_dev);
-			/* mac_addrs must not be freed because in dev_private */
-			list[i].eth_dev->data->mac_addrs = NULL;
-			claim_zero(rte_eth_dev_release_port(list[i].eth_dev));
-		}
-		/* Restore original error. */
-		rte_errno = -ret;
-	} else {
-		ret = 0;
-	}
-exit:
-	/*
-	 * Do the routine cleanup:
-	 * - close opened Netlink sockets
-	 * - free allocated spawn data array
-	 * - free the Infiniband device list
-	 */
-	if (nl_rdma >= 0)
-		close(nl_rdma);
-	if (nl_route >= 0)
-		close(nl_route);
-	if (list)
-		rte_free(list);
-	MLX5_ASSERT(ibv_list);
-	mlx5_glue->free_device_list(ibv_list);
-	return ret;
-}
 
 /**
  * Look for the ethernet device belonging to mlx5 driver.
@@ -3762,7 +1983,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
  * @param[in] pci_dev
  *   Pointer to the hint PCI device. When device is being probed
  *   the its siblings (master and preceding representors might
- *   not have assigned driver yet (because the mlx5_pci_probe()
+ *   not have assigned driver yet (because the mlx5_os_pci_probe()
  *   is not completed yet, for this case match on hint PCI
  *   device may be used to detect sibling device.
  *
@@ -3884,12 +2105,12 @@ static const struct rte_pci_id mlx5_pci_id_map[] = {
 	}
 };
 
-static struct rte_pci_driver mlx5_driver = {
+struct rte_pci_driver mlx5_driver = {
 	.driver = {
 		.name = MLX5_DRIVER_NAME
 	},
 	.id_table = mlx5_pci_id_map,
-	.probe = mlx5_pci_probe,
+	.probe = mlx5_os_pci_probe,
 	.remove = mlx5_pci_remove,
 	.dma_map = mlx5_dma_map,
 	.dma_unmap = mlx5_dma_unmap,
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ec4ba87..f5d9aad 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -94,6 +94,18 @@ struct mlx5_dev_attr {
 	char		fw_ver[64];
 };
 
+/** Data associated with devices to spawn. */
+struct mlx5_dev_spawn_data {
+	uint32_t ifindex; /**< Network interface index. */
+	uint32_t max_port; /**< IB device maximal port index. */
+	uint32_t ibv_port; /**< IB device physical port index. */
+	int pf_bond; /**< bonding device PF index. < 0 - no bonding */
+	struct mlx5_switch_info info; /**< Switch information. */
+	struct ibv_device *ibv_dev; /**< Associated IB device. */
+	struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */
+	struct rte_pci_device *pci_dev; /**< Backend PCI device. */
+};
+
 /** Key string for IPC. */
 #define MLX5_MP_NAME "net_mlx5_mp"
 
@@ -116,6 +128,11 @@ struct mlx5_local_data {
 };
 
 extern struct mlx5_shared_data *mlx5_shared_data;
+extern struct rte_pci_driver mlx5_driver;
+
+/* Dev ops structs */
+extern const struct eth_dev_ops mlx5_dev_sec_ops;
+extern const struct eth_dev_ops mlx5_dev_ops;
 
 struct mlx5_counter_ctrl {
 	/* Name of the counter. */
@@ -670,12 +687,26 @@ int32_t mlx5_release_dbr(struct rte_eth_dev *dev, uint32_t umem_id,
 int mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev,
 			      struct rte_eth_udp_tunnel *udp_tunnel);
 uint16_t mlx5_eth_find_next(uint16_t port_id, struct rte_pci_device *pci_dev);
+void mlx5_dev_close(struct rte_eth_dev *dev);
 
 /* Macro to iterate over all valid ports for mlx5 driver. */
 #define MLX5_ETH_FOREACH_DEV(port_id, pci_dev) \
 	for (port_id = mlx5_eth_find_next(0, pci_dev); \
 	     port_id < RTE_MAX_ETHPORTS; \
 	     port_id = mlx5_eth_find_next(port_id + 1, pci_dev))
+int mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs);
+struct mlx5_dev_ctx_shared *
+mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
+			const struct mlx5_dev_config *config);
+void mlx5_free_shared_ibctx(struct mlx5_dev_ctx_shared *sh);
+void mlx5_free_table_hash_list(struct mlx5_priv *priv);
+int mlx5_alloc_table_hash_list(struct mlx5_priv *priv);
+void mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn,
+			 struct mlx5_dev_config *config);
+void mlx5_set_metadata_mask(struct rte_eth_dev *dev);
+int mlx5_dev_check_sibling_config(struct mlx5_priv *priv,
+				  struct mlx5_dev_config *config);
+int mlx5_init_once(void);
 
 /* mlx5_ethdev.c */
 
@@ -876,9 +907,18 @@ struct mlx5_flow_meter *mlx5_flow_meter_attach
 void mlx5_flow_meter_detach(struct mlx5_flow_meter *fm);
 
 /* mlx5_os.c */
+struct rte_pci_driver;
 const char *mlx5_os_get_ctx_device_name(void *ctx);
 const char *mlx5_os_get_ctx_device_path(void *ctx);
 uint32_t mlx5_os_get_umem_id(void *umem);
 int mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *dev_attr);
-
+void mlx5_os_free_shared_dr(struct mlx5_priv *priv);
+int mlx5_os_open_device(const struct mlx5_dev_spawn_data *spawn,
+			 const struct mlx5_dev_config *config,
+			 struct mlx5_dev_ctx_shared *sh);
+int mlx5_os_get_pdn(void *pd, uint32_t *pdn);
+int mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+		       struct rte_pci_device *pci_dev);
+void mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh);
+void mlx5_os_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh);
 #endif /* RTE_PMD_MLX5_H_ */
-- 
2.8.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH v1 7/8] net/mlx5: add mlx5 header file specific to Linux
  2020-06-03 15:05 [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support Ophir Munk
                   ` (5 preceding siblings ...)
  2020-06-03 15:06 ` [dpdk-dev] [PATCH v1 6/8] net/mlx5: refactor PCI probing under Linux Ophir Munk
@ 2020-06-03 15:06 ` Ophir Munk
  2020-06-08 11:31   ` Ferruh Yigit
  2020-06-03 15:06 ` [dpdk-dev] [PATCH v1 8/8] net/mlx5: remove ibv dependency in spawn struct Ophir Munk
  2020-06-07  8:49 ` [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support Raslan Darawsheh
  8 siblings, 1 reply; 17+ messages in thread
From: Ophir Munk @ 2020-06-03 15:06 UTC (permalink / raw)
  To: dev, Matan Azrad, Raslan Darawsheh; +Cc: Ophir Munk

File drivers/net/linux/mlx5_os.h is added. It includes specific
Linux definitions such as PCI driver flags, link state changes
interrupts, link removal interrupts, etc.

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/Makefile        |  1 +
 drivers/net/mlx5/linux/mlx5_os.h | 18 ++++++++++++++++++
 drivers/net/mlx5/mlx5.c          |  3 +--
 drivers/net/mlx5/mlx5.h          |  5 +++--
 4 files changed, 23 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/mlx5/linux/mlx5_os.h

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 115b66c..41ab73e 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -41,6 +41,7 @@ CFLAGS += -g
 CFLAGS += -I$(RTE_SDK)/drivers/common/mlx5
 CFLAGS += -I$(RTE_SDK)/drivers/common/mlx5/linux
 CFLAGS += -I$(RTE_SDK)/drivers/net/mlx5
+CFLAGS += -I$(RTE_SDK)/drivers/net/mlx5/linux
 CFLAGS += -I$(BUILDDIR)/drivers/common/mlx5
 CFLAGS += -D_BSD_SOURCE
 CFLAGS += -D_DEFAULT_SOURCE
diff --git a/drivers/net/mlx5/linux/mlx5_os.h b/drivers/net/mlx5/linux/mlx5_os.h
new file mode 100644
index 0000000..f310f17
--- /dev/null
+++ b/drivers/net/mlx5/linux/mlx5_os.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2015 6WIND S.A.
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#ifndef RTE_PMD_MLX5_OS_H_
+#define RTE_PMD_MLX5_OS_H_
+
+/* verb enumerations translations to local enums. */
+enum {
+	DEV_SYSFS_NAME_MAX = IBV_SYSFS_NAME_MAX,
+	DEV_SYSFS_PATH_MAX = IBV_SYSFS_PATH_MAX
+};
+
+#define PCI_DRV_FLAGS  (RTE_PCI_DRV_INTR_LSC | \
+			RTE_PCI_DRV_INTR_RMV | \
+			RTE_PCI_DRV_PROBE_AGAIN)
+#endif /* RTE_PMD_MLX5_OS_H_ */
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index f62ad12..16ab8b0 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -2114,8 +2114,7 @@ struct rte_pci_driver mlx5_driver = {
 	.remove = mlx5_pci_remove,
 	.dma_map = mlx5_dma_map,
 	.dma_unmap = mlx5_dma_unmap,
-	.drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV |
-		     RTE_PCI_DRV_PROBE_AGAIN,
+	.drv_flags = PCI_DRV_FLAGS,
 };
 
 /**
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index f5d9aad..eca4472 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -41,6 +41,7 @@
 
 #include "mlx5_defs.h"
 #include "mlx5_utils.h"
+#include "mlx5_os.h"
 #include "mlx5_autoconf.h"
 
 enum mlx5_ipool_index {
@@ -536,8 +537,8 @@ struct mlx5_dev_ctx_shared {
 	void *pd; /* Protection Domain. */
 	uint32_t pdn; /* Protection Domain number. */
 	uint32_t tdn; /* Transport Domain number. */
-	char ibdev_name[IBV_SYSFS_NAME_MAX]; /* IB device name. */
-	char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for secondary */
+	char ibdev_name[DEV_SYSFS_NAME_MAX]; /* SYSFS dev name. */
+	char ibdev_path[DEV_SYSFS_PATH_MAX]; /* SYSFS dev path for secondary */
 	struct mlx5_dev_attr device_attr; /* Device properties. */
 	LIST_ENTRY(mlx5_dev_ctx_shared) mem_event_cb;
 	/**< Called by memory event callback. */
-- 
2.8.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [dpdk-dev] [PATCH v1 8/8] net/mlx5: remove ibv dependency in spawn struct
  2020-06-03 15:05 [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support Ophir Munk
                   ` (6 preceding siblings ...)
  2020-06-03 15:06 ` [dpdk-dev] [PATCH v1 7/8] net/mlx5: add mlx5 header file specific to Linux Ophir Munk
@ 2020-06-03 15:06 ` Ophir Munk
  2020-06-07  8:49 ` [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support Raslan Darawsheh
  8 siblings, 0 replies; 17+ messages in thread
From: Ophir Munk @ 2020-06-03 15:06 UTC (permalink / raw)
  To: dev, Matan Azrad, Raslan Darawsheh; +Cc: Ophir Munk

1. Replace 'struct ibv_device *' with 'void *' in 'struct
mlx5_dev_spawn_data'. Define a getter function to retrieve the
device name.
2. Rename ibv_dev and ibv_port as phys_dev and phys_port
respectively.

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/linux/mlx5_os.c | 64 +++++++++++++++++++++++++++++-----------
 drivers/net/mlx5/mlx5.c          |  3 +-
 drivers/net/mlx5/mlx5.h          |  7 +++--
 3 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index d1476c2..92422db 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -62,6 +62,24 @@
 #endif
 
 /**
+ * Get device name. Given an ibv_device pointer - return a
+ * pointer to the corresponding device name.
+ *
+ * @param[in] dev
+ *   Pointer to ibv device.
+ *
+ * @return
+ *   Pointer to device name if dev is valid, NULL otherwise.
+ */
+const char *
+mlx5_os_get_dev_device_name(void *dev)
+{
+	if (!dev)
+		return NULL;
+	return ((struct ibv_device *)dev)->name;
+}
+
+/**
  * Get ibv device name. Given an ibv_context pointer - return a
  * pointer to the corresponding device name.
  *
@@ -482,10 +500,12 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		/* Bonding device. */
 		if (!switch_info->representor)
 			snprintf(name, sizeof(name), "%s_%s",
-				 dpdk_dev->name, spawn->ibv_dev->name);
+				 dpdk_dev->name,
+				 mlx5_os_get_dev_device_name(spawn->phys_dev));
 		else
 			snprintf(name, sizeof(name), "%s_%s_representor_%u",
-				 dpdk_dev->name, spawn->ibv_dev->name,
+				 dpdk_dev->name,
+				 mlx5_os_get_dev_device_name(spawn->phys_dev),
 				 switch_info->port_name);
 	}
 	/* check if the device is already spawned */
@@ -649,7 +669,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 #endif
 	config.mpls_en = mpls_en;
 	/* Check port status. */
-	err = mlx5_glue->query_port(sh->ctx, spawn->ibv_port, &port_attr);
+	err = mlx5_glue->query_port(sh->ctx, spawn->phys_port, &port_attr);
 	if (err) {
 		DRV_LOG(ERR, "port query failed: %s", strerror(err));
 		goto error;
@@ -673,7 +693,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		goto error;
 	}
 	priv->sh = sh;
-	priv->ibv_port = spawn->ibv_port;
+	priv->ibv_port = spawn->phys_port;
 	priv->pci_dev = spawn->pci_dev;
 	priv->mtu = RTE_ETHER_MTU;
 	priv->mp_id.port_id = port_id;
@@ -703,12 +723,13 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	if (switch_info->representor || switch_info->master) {
 		devx_port.comp_mask = MLX5DV_DEVX_PORT_VPORT |
 				      MLX5DV_DEVX_PORT_MATCH_REG_C_0;
-		err = mlx5_glue->devx_port_query(sh->ctx, spawn->ibv_port,
+		err = mlx5_glue->devx_port_query(sh->ctx, spawn->phys_port,
 						 &devx_port);
 		if (err) {
 			DRV_LOG(WARNING,
 				"can't query devx port %d on device %s",
-				spawn->ibv_port, spawn->ibv_dev->name);
+				spawn->phys_port,
+				mlx5_os_get_dev_device_name(spawn->phys_dev));
 			devx_port.comp_mask = 0;
 		}
 	}
@@ -718,14 +739,18 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		if (!priv->vport_meta_mask) {
 			DRV_LOG(ERR, "vport zero mask for port %d"
 				     " on bonding device %s",
-				     spawn->ibv_port, spawn->ibv_dev->name);
+				     spawn->phys_port,
+				     mlx5_os_get_dev_device_name
+							(spawn->phys_dev));
 			err = ENOTSUP;
 			goto error;
 		}
 		if (priv->vport_meta_tag & ~priv->vport_meta_mask) {
 			DRV_LOG(ERR, "invalid vport tag for port %d"
 				     " on bonding device %s",
-				     spawn->ibv_port, spawn->ibv_dev->name);
+				     spawn->phys_port,
+				     mlx5_os_get_dev_device_name
+							(spawn->phys_dev));
 			err = ENOTSUP;
 			goto error;
 		}
@@ -735,7 +760,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	} else if (spawn->pf_bond >= 0) {
 		DRV_LOG(ERR, "can't deduce vport index for port %d"
 			     " on bonding device %s",
-			     spawn->ibv_port, spawn->ibv_dev->name);
+			     spawn->phys_port,
+			     mlx5_os_get_dev_device_name(spawn->phys_dev));
 		err = ENOTSUP;
 		goto error;
 	} else {
@@ -1491,13 +1517,15 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		MLX5_ASSERT(np);
 		for (i = 1; i <= np; ++i) {
 			list[ns].max_port = np;
-			list[ns].ibv_port = i;
-			list[ns].ibv_dev = ibv_match[0];
+			list[ns].phys_port = i;
+			list[ns].phys_dev = ibv_match[0];
 			list[ns].eth_dev = NULL;
 			list[ns].pci_dev = pci_dev;
 			list[ns].pf_bond = bd;
 			list[ns].ifindex = mlx5_nl_ifindex
-					(nl_rdma, list[ns].ibv_dev->name, i);
+				(nl_rdma,
+				mlx5_os_get_dev_device_name
+						(list[ns].phys_dev), i);
 			if (!list[ns].ifindex) {
 				/*
 				 * No network interface index found for the
@@ -1573,15 +1601,17 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		for (i = 0; i != nd; ++i) {
 			memset(&list[ns].info, 0, sizeof(list[ns].info));
 			list[ns].max_port = 1;
-			list[ns].ibv_port = 1;
-			list[ns].ibv_dev = ibv_match[i];
+			list[ns].phys_port = 1;
+			list[ns].phys_dev = ibv_match[i];
 			list[ns].eth_dev = NULL;
 			list[ns].pci_dev = pci_dev;
 			list[ns].pf_bond = -1;
 			list[ns].ifindex = 0;
 			if (nl_rdma >= 0)
 				list[ns].ifindex = mlx5_nl_ifindex
-					(nl_rdma, list[ns].ibv_dev->name, 1);
+				(nl_rdma,
+				mlx5_os_get_dev_device_name
+						(list[ns].phys_dev), 1);
 			if (!list[ns].ifindex) {
 				char ifname[IF_NAMESIZE];
 
@@ -1858,7 +1888,7 @@ mlx5_os_open_device(const struct mlx5_dev_spawn_data *spawn,
 	dbmap_env = mlx5_config_doorbell_mapping_env(config);
 	/* Try to open IB device with DV first, then usual Verbs. */
 	errno = 0;
-	sh->ctx = mlx5_glue->dv_open_device(spawn->ibv_dev);
+	sh->ctx = mlx5_glue->dv_open_device(spawn->phys_dev);
 	if (sh->ctx) {
 		sh->devx = 1;
 		DRV_LOG(DEBUG, "DevX is supported");
@@ -1866,7 +1896,7 @@ mlx5_os_open_device(const struct mlx5_dev_spawn_data *spawn,
 		mlx5_restore_doorbell_mapping_env(dbmap_env);
 	} else {
 		/* The environment variable is still configured. */
-		sh->ctx = mlx5_glue->open_device(spawn->ibv_dev);
+		sh->ctx = mlx5_glue->open_device(spawn->phys_dev);
 		err = errno ? errno : ENODEV;
 		/*
 		 * The environment variable is not needed anymore,
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 16ab8b0..7c5e23d 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -613,7 +613,8 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn,
 	pthread_mutex_lock(&mlx5_ibv_list_mutex);
 	/* Search for IB context by device name. */
 	LIST_FOREACH(sh, &mlx5_ibv_list, next) {
-		if (!strcmp(sh->ibdev_name, spawn->ibv_dev->name)) {
+		if (!strcmp(sh->ibdev_name,
+			mlx5_os_get_dev_device_name(spawn->phys_dev))) {
 			sh->refcnt++;
 			goto exit;
 		}
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index eca4472..8c4b234 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -98,11 +98,11 @@ struct mlx5_dev_attr {
 /** Data associated with devices to spawn. */
 struct mlx5_dev_spawn_data {
 	uint32_t ifindex; /**< Network interface index. */
-	uint32_t max_port; /**< IB device maximal port index. */
-	uint32_t ibv_port; /**< IB device physical port index. */
+	uint32_t max_port; /**< Device maximal port index. */
+	uint32_t phys_port; /**< Device physical port index. */
 	int pf_bond; /**< bonding device PF index. < 0 - no bonding */
 	struct mlx5_switch_info info; /**< Switch information. */
-	struct ibv_device *ibv_dev; /**< Associated IB device. */
+	void *phys_dev; /**< Associated physical device. */
 	struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */
 	struct rte_pci_device *pci_dev; /**< Backend PCI device. */
 };
@@ -911,6 +911,7 @@ void mlx5_flow_meter_detach(struct mlx5_flow_meter *fm);
 struct rte_pci_driver;
 const char *mlx5_os_get_ctx_device_name(void *ctx);
 const char *mlx5_os_get_ctx_device_path(void *ctx);
+const char *mlx5_os_get_dev_device_name(void *dev);
 uint32_t mlx5_os_get_umem_id(void *umem);
 int mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *dev_attr);
 void mlx5_os_free_shared_dr(struct mlx5_priv *priv);
-- 
2.8.4


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support
  2020-06-03 15:05 [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support Ophir Munk
                   ` (7 preceding siblings ...)
  2020-06-03 15:06 ` [dpdk-dev] [PATCH v1 8/8] net/mlx5: remove ibv dependency in spawn struct Ophir Munk
@ 2020-06-07  8:49 ` Raslan Darawsheh
  8 siblings, 0 replies; 17+ messages in thread
From: Raslan Darawsheh @ 2020-06-07  8:49 UTC (permalink / raw)
  To: Ophir Munk, dev, Matan Azrad

Hi,

> -----Original Message-----
> From: Ophir Munk <ophirmu@mellanox.com>
> Sent: Wednesday, June 3, 2020 6:06 PM
> To: dev@dpdk.org; Matan Azrad <matan@mellanox.com>; Raslan
> Darawsheh <rasland@mellanox.com>
> Cc: Ophir Munk <ophirmu@mellanox.com>
> Subject: [PATCH v1 0/8] mlx5 PMD multi OS support
> 
> This patch series is part of preparing mlx5 PMD to compile and run under
> multiple OSs.
> 
> v1:
> Initial release
> 
> Ophir Munk (8):
>   net/mlx5: rename mlx5 ibv shared struct
>   net/mlx5: add mlx5 Linux specific file with getter functions
>   drivers: remove mlx5 protection domain dependency on ibv
>   net/mlx5: remove attributes dependency on ibv and dv
>   net/mlx5: remove umem field dependency on dv
>   net/mlx5: refactor PCI probing under Linux
>   net/mlx5: add mlx5 header file specific to Linux
>   net/mlx5: remove ibv dependency in spawn struct
> 
>  drivers/common/mlx5/mlx5_common_mr.c |   24 +-
>  drivers/common/mlx5/mlx5_common_mr.h |    6 +-
>  drivers/net/mlx5/Makefile            |    2 +
>  drivers/net/mlx5/linux/meson.build   |    8 +
>  drivers/net/mlx5/linux/mlx5_os.c     | 1992
> ++++++++++++++++++++++++++++++++++
>  drivers/net/mlx5/linux/mlx5_os.h     |   18 +
>  drivers/net/mlx5/meson.build         |    5 +-
>  drivers/net/mlx5/mlx5.c              | 1900 ++------------------------------
>  drivers/net/mlx5/mlx5.h              |  100 +-
>  drivers/net/mlx5/mlx5_ethdev.c       |   12 +-
>  drivers/net/mlx5/mlx5_flow.c         |   20 +-
>  drivers/net/mlx5/mlx5_flow_dv.c      |   28 +-
>  drivers/net/mlx5/mlx5_mp.c           |    2 +-
>  drivers/net/mlx5/mlx5_mr.c           |   10 +-
>  drivers/net/mlx5/mlx5_rxq.c          |    6 +-
>  drivers/net/mlx5/mlx5_txq.c          |   18 +-
>  16 files changed, 2233 insertions(+), 1918 deletions(-)
>  create mode 100644 drivers/net/mlx5/linux/meson.build
>  create mode 100644 drivers/net/mlx5/linux/mlx5_os.c
>  create mode 100644 drivers/net/mlx5/linux/mlx5_os.h
> 
> --
> 2.8.4

Series applied to next-net-mlx,
Kindest regards,
Raslan Darawsheh

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH v1 2/8] net/mlx5: add mlx5 Linux specific file with getter functions
  2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 2/8] net/mlx5: add mlx5 Linux specific file with getter functions Ophir Munk
@ 2020-06-08 11:20   ` Ferruh Yigit
  2020-06-09  8:40     ` Ophir Munk
  0 siblings, 1 reply; 17+ messages in thread
From: Ferruh Yigit @ 2020-06-08 11:20 UTC (permalink / raw)
  To: Ophir Munk, dev, Matan Azrad, Raslan Darawsheh

On 6/3/2020 4:05 PM, Ophir Munk wrote:
> 'ctx' type (field in 'struct mlx5_ctx_shared') is changed from 'struct
> ibv_context *' to 'void *'.  'ctx' members which are verbs dependent
> (e.g. device_name) will be accessed through getter functions which are
> added to a new file under Linux directory: linux/mlx5_os.c.
> 
> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> Acked-by: Matan Azrad <matan@mellanox.com>

<...>

> @@ -0,0 +1,87 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2015 6WIND S.A.
> + * Copyright 2020 Mellanox Technologies, Ltd
> + */

Just to double check if '6WIND' is copy/paste error in this new file?

<...>

> @@ -677,13 +677,14 @@ mlx5_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
>  	int flags;
>  
>  	sh->intr_handle.fd = -1;
> -	flags = fcntl(sh->ctx->async_fd, F_GETFL);
> -	ret = fcntl(sh->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
> +	flags = fcntl(((struct ibv_context *)sh->ctx)->async_fd, F_GETFL);
> +	ret = fcntl(((struct ibv_context *)sh->ctx)->async_fd,
> +		    F_SETFL, flags | O_NONBLOCK);

As far as I understand you are trying to remove to the dependency to ibverbs, at
least in root level, linux/x.c will have that dependency. (I assume this is for
Windows support)
The 'mlx5_os_get_ctx_device_path()' wrapper seems can work for it but what is
the point of above usage, that you explicitly cast "void *" to "(struct
ibv_context *)", so you still keep the ibv dependency?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH v1 7/8] net/mlx5: add mlx5 header file specific to Linux
  2020-06-03 15:06 ` [dpdk-dev] [PATCH v1 7/8] net/mlx5: add mlx5 header file specific to Linux Ophir Munk
@ 2020-06-08 11:31   ` Ferruh Yigit
  2020-06-09  8:44     ` Ophir Munk
  0 siblings, 1 reply; 17+ messages in thread
From: Ferruh Yigit @ 2020-06-08 11:31 UTC (permalink / raw)
  To: Ophir Munk, dev, Matan Azrad, Raslan Darawsheh

On 6/3/2020 4:06 PM, Ophir Munk wrote:
> File drivers/net/linux/mlx5_os.h is added. It includes specific
> Linux definitions such as PCI driver flags, link state changes
> interrupts, link removal interrupts, etc.
> 
> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> Acked-by: Matan Azrad <matan@mellanox.com>

<...>

> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index f5d9aad..eca4472 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -41,6 +41,7 @@
>  
>  #include "mlx5_defs.h"
>  #include "mlx5_utils.h"
> +#include "mlx5_os.h"

Assuming that you will have multiple "mlx5_os.h", one for each OS, like
"linux/mlx5_os.h" & "windows/mlx5_os.h", doesn't it make sense to include it as
"#include linux/mlx5_os.h", and remove relevant "-I" from CFLAGS in makefile?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH v1 2/8] net/mlx5: add mlx5 Linux specific file with getter functions
  2020-06-08 11:20   ` Ferruh Yigit
@ 2020-06-09  8:40     ` Ophir Munk
  2020-06-09  8:43       ` Ferruh Yigit
  0 siblings, 1 reply; 17+ messages in thread
From: Ophir Munk @ 2020-06-09  8:40 UTC (permalink / raw)
  To: Ferruh Yigit, dev, Matan Azrad, Raslan Darawsheh

Hi Ferruh,
Please see inline

> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@intel.com>
> Sent: Monday, June 8, 2020 2:20 PM
> To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org; Matan Azrad
> <matan@mellanox.com>; Raslan Darawsheh <rasland@mellanox.com>
> Subject: Re: [dpdk-dev] [PATCH v1 2/8] net/mlx5: add mlx5 Linux specific file
> with getter functions
> 
> On 6/3/2020 4:05 PM, Ophir Munk wrote:
> > 'ctx' type (field in 'struct mlx5_ctx_shared') is changed from 'struct
> > ibv_context *' to 'void *'.  'ctx' members which are verbs dependent
> > (e.g. device_name) will be accessed through getter functions which are
> > added to a new file under Linux directory: linux/mlx5_os.c.
> >
> > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> > Acked-by: Matan Azrad <matan@mellanox.com>
> 
> <...>
> 
> > @@ -0,0 +1,87 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2015 6WIND S.A.
> > + * Copyright 2020 Mellanox Technologies, Ltd  */
> 
> Just to double check if '6WIND' is copy/paste error in this new file?
> 

Some functions were moved from file mlx5.c (with 6WIND copyright) to this file
and renamed. 
Should 6WIND copyright be kept or removed in this file (mlx5_os.c)?

> <...>
> 
> > @@ -677,13 +677,14 @@ mlx5_dev_shared_handler_install(struct
> mlx5_dev_ctx_shared *sh)
> >  	int flags;
> >
> >  	sh->intr_handle.fd = -1;
> > -	flags = fcntl(sh->ctx->async_fd, F_GETFL);
> > -	ret = fcntl(sh->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
> > +	flags = fcntl(((struct ibv_context *)sh->ctx)->async_fd, F_GETFL);
> > +	ret = fcntl(((struct ibv_context *)sh->ctx)->async_fd,
> > +		    F_SETFL, flags | O_NONBLOCK);
> 
> As far as I understand you are trying to remove to the dependency to ibverbs,
> at least in root level, linux/x.c will have that dependency. (I assume this is for
> Windows support) The 'mlx5_os_get_ctx_device_path()' wrapper seems can
> work for it but what is the point of above usage, that you explicitly cast "void
> *" to "(struct ibv_context *)", so you still keep the ibv dependency?

The reason for keeping an explicit cast for async_fd (and not creating a new getter API)
is that this code snippet will be moved under linux in next commits where no getter function is needed.
I wanted to avoid adding a getter function here and then remove it in a follow up commit. 


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH v1 2/8] net/mlx5: add mlx5 Linux specific file with getter functions
  2020-06-09  8:40     ` Ophir Munk
@ 2020-06-09  8:43       ` Ferruh Yigit
  0 siblings, 0 replies; 17+ messages in thread
From: Ferruh Yigit @ 2020-06-09  8:43 UTC (permalink / raw)
  To: Ophir Munk, dev, Matan Azrad, Raslan Darawsheh

On 6/9/2020 9:40 AM, Ophir Munk wrote:
> Hi Ferruh,
> Please see inline
> 
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@intel.com>
>> Sent: Monday, June 8, 2020 2:20 PM
>> To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org; Matan Azrad
>> <matan@mellanox.com>; Raslan Darawsheh <rasland@mellanox.com>
>> Subject: Re: [dpdk-dev] [PATCH v1 2/8] net/mlx5: add mlx5 Linux specific file
>> with getter functions
>>
>> On 6/3/2020 4:05 PM, Ophir Munk wrote:
>>> 'ctx' type (field in 'struct mlx5_ctx_shared') is changed from 'struct
>>> ibv_context *' to 'void *'.  'ctx' members which are verbs dependent
>>> (e.g. device_name) will be accessed through getter functions which are
>>> added to a new file under Linux directory: linux/mlx5_os.c.
>>>
>>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
>>> Acked-by: Matan Azrad <matan@mellanox.com>
>>
>> <...>
>>
>>> @@ -0,0 +1,87 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright 2015 6WIND S.A.
>>> + * Copyright 2020 Mellanox Technologies, Ltd  */
>>
>> Just to double check if '6WIND' is copy/paste error in this new file?
>>
> 
> Some functions were moved from file mlx5.c (with 6WIND copyright) to this file
> and renamed. 
> Should 6WIND copyright be kept or removed in this file (mlx5_os.c)?

No. I just want to confirm this is not just copy/paste error, which happens on
new files, but done intentionally. As you did this intentionally, that is good.

> 
>> <...>
>>
>>> @@ -677,13 +677,14 @@ mlx5_dev_shared_handler_install(struct
>> mlx5_dev_ctx_shared *sh)
>>>  	int flags;
>>>
>>>  	sh->intr_handle.fd = -1;
>>> -	flags = fcntl(sh->ctx->async_fd, F_GETFL);
>>> -	ret = fcntl(sh->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
>>> +	flags = fcntl(((struct ibv_context *)sh->ctx)->async_fd, F_GETFL);
>>> +	ret = fcntl(((struct ibv_context *)sh->ctx)->async_fd,
>>> +		    F_SETFL, flags | O_NONBLOCK);
>>
>> As far as I understand you are trying to remove to the dependency to ibverbs,
>> at least in root level, linux/x.c will have that dependency. (I assume this is for
>> Windows support) The 'mlx5_os_get_ctx_device_path()' wrapper seems can
>> work for it but what is the point of above usage, that you explicitly cast "void
>> *" to "(struct ibv_context *)", so you still keep the ibv dependency?
> 
> The reason for keeping an explicit cast for async_fd (and not creating a new getter API)
> is that this code snippet will be moved under linux in next commits where no getter function is needed.
> I wanted to avoid adding a getter function here and then remove it in a follow up commit. 
> 

Make sense if it is removed later, thanks for clarification.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH v1 7/8] net/mlx5: add mlx5 header file specific to Linux
  2020-06-08 11:31   ` Ferruh Yigit
@ 2020-06-09  8:44     ` Ophir Munk
  2020-06-09 11:48       ` Ferruh Yigit
  0 siblings, 1 reply; 17+ messages in thread
From: Ophir Munk @ 2020-06-09  8:44 UTC (permalink / raw)
  To: Ferruh Yigit, dev, Matan Azrad, Raslan Darawsheh

Hi,
Please find comments inline.

> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@intel.com>
> Sent: Monday, June 8, 2020 2:32 PM
> To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org; Matan Azrad
> <matan@mellanox.com>; Raslan Darawsheh <rasland@mellanox.com>
> Subject: Re: [dpdk-dev] [PATCH v1 7/8] net/mlx5: add mlx5 header file
> specific to Linux
> 
> On 6/3/2020 4:06 PM, Ophir Munk wrote:
> > File drivers/net/linux/mlx5_os.h is added. It includes specific Linux
> > definitions such as PCI driver flags, link state changes interrupts,
> > link removal interrupts, etc.
> >
> > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> > Acked-by: Matan Azrad <matan@mellanox.com>
> 
> <...>
> 
> > diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> > f5d9aad..eca4472 100644
> > --- a/drivers/net/mlx5/mlx5.h
> > +++ b/drivers/net/mlx5/mlx5.h
> > @@ -41,6 +41,7 @@
> >
> >  #include "mlx5_defs.h"
> >  #include "mlx5_utils.h"
> > +#include "mlx5_os.h"
> 
> Assuming that you will have multiple "mlx5_os.h", one for each OS, like
> "linux/mlx5_os.h" & "windows/mlx5_os.h", doesn't it make sense to include
> it as "#include linux/mlx5_os.h", and remove relevant "-I" from CFLAGS in
> makefile?

IMO it doesn't make sense. 
mlx5.h is a shared file that will be compiled under Windows as well. 
It wouldn't be possible if I used #include linux/mlx5_os.h


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH v1 7/8] net/mlx5: add mlx5 header file specific to Linux
  2020-06-09  8:44     ` Ophir Munk
@ 2020-06-09 11:48       ` Ferruh Yigit
  2020-06-09 14:49         ` Ophir Munk
  0 siblings, 1 reply; 17+ messages in thread
From: Ferruh Yigit @ 2020-06-09 11:48 UTC (permalink / raw)
  To: Ophir Munk, dev, Matan Azrad, Raslan Darawsheh

On 6/9/2020 9:44 AM, Ophir Munk wrote:
> Hi,
> Please find comments inline.
> 
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@intel.com>
>> Sent: Monday, June 8, 2020 2:32 PM
>> To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org; Matan Azrad
>> <matan@mellanox.com>; Raslan Darawsheh <rasland@mellanox.com>
>> Subject: Re: [dpdk-dev] [PATCH v1 7/8] net/mlx5: add mlx5 header file
>> specific to Linux
>>
>> On 6/3/2020 4:06 PM, Ophir Munk wrote:
>>> File drivers/net/linux/mlx5_os.h is added. It includes specific Linux
>>> definitions such as PCI driver flags, link state changes interrupts,
>>> link removal interrupts, etc.
>>>
>>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
>>> Acked-by: Matan Azrad <matan@mellanox.com>
>>
>> <...>
>>
>>> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
>>> f5d9aad..eca4472 100644
>>> --- a/drivers/net/mlx5/mlx5.h
>>> +++ b/drivers/net/mlx5/mlx5.h
>>> @@ -41,6 +41,7 @@
>>>
>>>  #include "mlx5_defs.h"
>>>  #include "mlx5_utils.h"
>>> +#include "mlx5_os.h"
>>
>> Assuming that you will have multiple "mlx5_os.h", one for each OS, like
>> "linux/mlx5_os.h" & "windows/mlx5_os.h", doesn't it make sense to include
>> it as "#include linux/mlx5_os.h", and remove relevant "-I" from CFLAGS in
>> makefile?
> 
> IMO it doesn't make sense. 
> mlx5.h is a shared file that will be compiled under Windows as well. 
> It wouldn't be possible if I used #include linux/mlx5_os.h
> 

It is possible with an #ifdef around include. (#ifdef Linux)

But if you keep as #include "mlx5_os.h" and have this header for multiple OS,
than you will have to have the ifdef in the build files.

Right now you are not doing both since there is only one platform support, I am
OK to proceed and postpone the second platform support until it is ready.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [dpdk-dev] [PATCH v1 7/8] net/mlx5: add mlx5 header file specific to Linux
  2020-06-09 11:48       ` Ferruh Yigit
@ 2020-06-09 14:49         ` Ophir Munk
  0 siblings, 0 replies; 17+ messages in thread
From: Ophir Munk @ 2020-06-09 14:49 UTC (permalink / raw)
  To: Ferruh Yigit, dev, Matan Azrad, Raslan Darawsheh



> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@intel.com>
> Sent: Tuesday, June 9, 2020 2:49 PM
> To: Ophir Munk <ophirmu@mellanox.com>; dev@dpdk.org; Matan Azrad
> <matan@mellanox.com>; Raslan Darawsheh <rasland@mellanox.com>
> Subject: Re: [dpdk-dev] [PATCH v1 7/8] net/mlx5: add mlx5 header file
> specific to Linux
> <...>
> >> On 6/3/2020 4:06 PM, Ophir Munk wrote:
> >>> File drivers/net/linux/mlx5_os.h is added. It includes specific
> >>> Linux definitions such as PCI driver flags, link state changes
> >>> interrupts, link removal interrupts, etc.
> >>>
> >>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> >>> Acked-by: Matan Azrad <matan@mellanox.com>
> >>
> >> <...>
> >>
> >>> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> >>> f5d9aad..eca4472 100644
> >>> --- a/drivers/net/mlx5/mlx5.h
> >>> +++ b/drivers/net/mlx5/mlx5.h
> >>> @@ -41,6 +41,7 @@
> >>>
> >>>  #include "mlx5_defs.h"
> >>>  #include "mlx5_utils.h"
> >>> +#include "mlx5_os.h"
> >>
> >> Assuming that you will have multiple "mlx5_os.h", one for each OS,
> >> like "linux/mlx5_os.h" & "windows/mlx5_os.h", doesn't it make sense
> >> to include it as "#include linux/mlx5_os.h", and remove relevant "-I"
> >> from CFLAGS in makefile?
> >
> > IMO it doesn't make sense.
> > mlx5.h is a shared file that will be compiled under Windows as well.
> > It wouldn't be possible if I used #include linux/mlx5_os.h
> >
> 
> It is possible with an #ifdef around include. (#ifdef Linux)
> 
> But if you keep as #include "mlx5_os.h" and have this header for multiple OS,
> than you will have to have the ifdef in the build files.
> 
> Right now you are not doing both since there is only one platform support, I
> am OK to proceed and postpone the second platform support until it is ready.

Please note that Windows dpdk will only be built with the meson build system (no Makefile usage under Windows).


^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2020-06-09 14:49 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-03 15:05 [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support Ophir Munk
2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 1/8] net/mlx5: rename mlx5 ibv shared struct Ophir Munk
2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 2/8] net/mlx5: add mlx5 Linux specific file with getter functions Ophir Munk
2020-06-08 11:20   ` Ferruh Yigit
2020-06-09  8:40     ` Ophir Munk
2020-06-09  8:43       ` Ferruh Yigit
2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 3/8] drivers: remove mlx5 protection domain dependency on ibv Ophir Munk
2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 4/8] net/mlx5: remove attributes dependency on ibv and dv Ophir Munk
2020-06-03 15:05 ` [dpdk-dev] [PATCH v1 5/8] net/mlx5: remove umem field dependency on dv Ophir Munk
2020-06-03 15:06 ` [dpdk-dev] [PATCH v1 6/8] net/mlx5: refactor PCI probing under Linux Ophir Munk
2020-06-03 15:06 ` [dpdk-dev] [PATCH v1 7/8] net/mlx5: add mlx5 header file specific to Linux Ophir Munk
2020-06-08 11:31   ` Ferruh Yigit
2020-06-09  8:44     ` Ophir Munk
2020-06-09 11:48       ` Ferruh Yigit
2020-06-09 14:49         ` Ophir Munk
2020-06-03 15:06 ` [dpdk-dev] [PATCH v1 8/8] net/mlx5: remove ibv dependency in spawn struct Ophir Munk
2020-06-07  8:49 ` [dpdk-dev] [PATCH v1 0/8] mlx5 PMD multi OS support Raslan Darawsheh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).