* [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process
@ 2017-08-24 14:03 Xueming Li
2017-08-24 14:03 ` [dpdk-dev] [PATCH v1 2/2] net/mlx5: add multiple process support Xueming Li
` (21 more replies)
0 siblings, 22 replies; 41+ messages in thread
From: Xueming Li @ 2017-08-24 14:03 UTC (permalink / raw)
To: Nelio Laranjeiro; +Cc: Xueming Li, dev
rte_eth_dev created by primary process were not available in secondary
process, it was not possible to use the primary process local memory
object from a secondary process.
This patch modify the reference of primary rte_eth_dev object, use
local rte_eth_dev secondary process instead.
Cc: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
drivers/net/mlx5/mlx5.h | 6 ++---
drivers/net/mlx5/mlx5_ethdev.c | 52 ++++++++++++++++++++++-------------------
drivers/net/mlx5/mlx5_fdir.c | 3 +++
| 3 +++
drivers/net/mlx5/mlx5_rxq.c | 2 ++
drivers/net/mlx5/mlx5_trigger.c | 4 ++--
6 files changed, 41 insertions(+), 29 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 684a603..2dee07c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -95,7 +95,7 @@ struct mlx5_xstats_ctrl {
};
struct priv {
- struct rte_eth_dev *dev; /* Ethernet device. */
+ struct rte_eth_dev *dev; /* Ethernet device of master process. */
struct ibv_context *ctx; /* Verbs context. */
struct ibv_device_attr_ex device_attr; /* Device properties. */
struct ibv_pd *pd; /* Protection Domain. */
@@ -223,8 +223,8 @@ int mlx5_ibv_device_to_pci_addr(const struct ibv_device *,
int mlx5_set_link_down(struct rte_eth_dev *dev);
int mlx5_set_link_up(struct rte_eth_dev *dev);
struct priv *mlx5_secondary_data_setup(struct priv *priv);
-void priv_select_tx_function(struct priv *);
-void priv_select_rx_function(struct priv *);
+void mlx5_dev_select_tx_function(struct rte_eth_dev *dev);
+void mlx5_dev_select_rx_function(struct rte_eth_dev *dev);
/* mlx5_mac.c */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index f5167e0..fce7dd5 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1038,7 +1038,7 @@ struct priv *
* burst function again.
*/
if (!ret)
- priv_select_rx_function(priv);
+ mlx5_dev_select_rx_function(dev);
out:
priv_unlock(priv);
assert(ret >= 0);
@@ -1347,7 +1347,7 @@ struct priv *
/**
* Change the link state (UP / DOWN).
*
- * @param priv
+ * @param dev
* Pointer to Ethernet device structure.
* @param up
* Nonzero for link up, otherwise link down.
@@ -1356,17 +1356,17 @@ struct priv *
* 0 on success, errno value on failure.
*/
static int
-priv_set_link(struct priv *priv, int up)
+mlx5_dev_set_link(struct rte_eth_dev *dev, int up)
{
- struct rte_eth_dev *dev = priv->dev;
+ struct priv *priv = dev->data->dev_private;
int err;
if (up) {
err = priv_set_flags(priv, ~IFF_UP, IFF_UP);
if (err)
return err;
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
+ mlx5_dev_select_tx_function(dev);
+ mlx5_dev_select_rx_function(dev);
} else {
err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP);
if (err)
@@ -1393,7 +1393,7 @@ struct priv *
int err;
priv_lock(priv);
- err = priv_set_link(priv, 0);
+ err = mlx5_dev_set_link(dev, 0);
priv_unlock(priv);
return err;
}
@@ -1414,7 +1414,7 @@ struct priv *
int err;
priv_lock(priv);
- err = priv_set_link(priv, 1);
+ err = mlx5_dev_set_link(dev, 1);
priv_unlock(priv);
return err;
}
@@ -1560,8 +1560,8 @@ struct priv *
rte_mb();
priv->dev->data = &sd->data;
rte_mb();
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
+ mlx5_dev_select_tx_function(priv->dev);
+ mlx5_dev_select_rx_function(priv->dev);
priv_unlock(priv);
end:
/* More sanity checks. */
@@ -1579,30 +1579,32 @@ struct priv *
/**
* Configure the TX function to use.
*
- * @param priv
- * Pointer to private structure.
+ * @param dev
+ * Pointer to device structure.
*/
void
-priv_select_tx_function(struct priv *priv)
+mlx5_dev_select_tx_function(struct rte_eth_dev *dev)
{
- priv->dev->tx_pkt_burst = mlx5_tx_burst;
+ struct priv *priv = dev->data->dev_private;
+
+ dev->tx_pkt_burst = mlx5_tx_burst;
/* Select appropriate TX function. */
if (priv->mps == MLX5_MPW_ENHANCED) {
if (priv_check_vec_tx_support(priv) > 0) {
if (priv_check_raw_vec_tx_support(priv) > 0)
- priv->dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
+ dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
else
- priv->dev->tx_pkt_burst = mlx5_tx_burst_vec;
+ dev->tx_pkt_burst = mlx5_tx_burst_vec;
DEBUG("selected Enhanced MPW TX vectorized function");
} else {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_empw;
+ dev->tx_pkt_burst = mlx5_tx_burst_empw;
DEBUG("selected Enhanced MPW TX function");
}
} else if (priv->mps && priv->txq_inline) {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
+ dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
DEBUG("selected MPW inline TX function");
} else if (priv->mps) {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw;
+ dev->tx_pkt_burst = mlx5_tx_burst_mpw;
DEBUG("selected MPW TX function");
}
}
@@ -1610,17 +1612,19 @@ struct priv *
/**
* Configure the RX function to use.
*
- * @param priv
- * Pointer to private structure.
+ * @param dev
+ * Pointer to device structure.
*/
void
-priv_select_rx_function(struct priv *priv)
+mlx5_dev_select_rx_function(struct rte_eth_dev *dev)
{
+ struct priv *priv = dev->data->dev_private;
+
if (priv_check_vec_rx_support(priv) > 0) {
priv_prep_vec_rx_function(priv);
- priv->dev->rx_pkt_burst = mlx5_rx_burst_vec;
+ dev->rx_pkt_burst = mlx5_rx_burst_vec;
DEBUG("selected RX vectorized function");
} else {
- priv->dev->rx_pkt_burst = mlx5_rx_burst;
+ dev->rx_pkt_burst = mlx5_rx_burst;
}
}
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index 6acc053..0f3b70a 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -1075,6 +1075,9 @@ struct mlx5_fdir_filter {
int ret = EINVAL;
struct priv *priv = dev->data->dev_private;
+ if (mlx5_is_secondary())
+ return -E_RTE_SECONDARY;
+
switch (filter_type) {
case RTE_ETH_FILTER_GENERIC:
if (filter_op != RTE_ETH_FILTER_GET)
--git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index a2dd7d1..7fd1ac1 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -357,6 +357,9 @@
int ret;
struct priv *priv = dev->data->dev_private;
+ if (mlx5_is_secondary())
+ return -E_RTE_SECONDARY;
+
mlx5_dev_stop(dev);
priv_lock(priv);
ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index da9f2f7..18e711e 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1376,6 +1376,8 @@
unsigned int count = 0;
struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+ assert(!mlx5_is_secondary());
+
if (!priv->dev->data->dev_conf.intr_conf.rxq)
return 0;
priv_rx_intr_vec_disable(priv);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 595a9e0..5e7f979 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -73,8 +73,8 @@
return 0;
}
/* Update Rx/Tx callback. */
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
+ mlx5_dev_select_tx_function(dev);
+ mlx5_dev_select_rx_function(dev);
DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
err = priv_create_hash_rxqs(priv);
if (!err)
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v1 2/2] net/mlx5: add multiple process support
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
@ 2017-08-24 14:03 ` Xueming Li
2017-08-25 7:27 ` Nélio Laranjeiro
2017-08-25 6:52 ` [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Nélio Laranjeiro
` (20 subsequent siblings)
21 siblings, 1 reply; 41+ messages in thread
From: Xueming Li @ 2017-08-24 14:03 UTC (permalink / raw)
To: Nelio Laranjeiro; +Cc: Xueming Li, dev
PMD uses Verbs object which were not available in the shared memory, in
addition, due to IO pages, it was not possible to use the primary
process Tx queues from a secondary process.
This patch modify the location where Verbs objects are allocated (from
process memory address space to shared memory address space) and thus
allow a secondary process to use those object by mapping this shared
memory space its own memory space.
For Tx IO pages, it uses a unix socket to get back the communication
channel with the Kernel driver from the primary process, this is
necessary to remap those pages in the secondary process memory space and
thus use the same Tx queues.
This is only supported from Linux kernel (v4.14) and rdma-core (v14).
Cc: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
doc/guides/nics/mlx5.rst | 3 +-
drivers/net/mlx5/Makefile | 1 +
drivers/net/mlx5/mlx5.c | 132 ++++++++++++------
drivers/net/mlx5/mlx5.h | 18 +--
drivers/net/mlx5/mlx5_ethdev.c | 215 ++++++------------------------
drivers/net/mlx5/mlx5_rxq.c | 41 ------
drivers/net/mlx5/mlx5_rxtx.h | 5 +-
drivers/net/mlx5/mlx5_socket.c | 294 +++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx5/mlx5_txq.c | 89 ++++++++-----
9 files changed, 501 insertions(+), 297 deletions(-)
create mode 100644 drivers/net/mlx5/mlx5_socket.c
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index a68b7ad..9eeada4 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -87,7 +87,7 @@ Features
- Flow director (RTE_FDIR_MODE_PERFECT, RTE_FDIR_MODE_PERFECT_MAC_VLAN and
RTE_ETH_FDIR_REJECT).
- Flow API.
-- Secondary process TX is supported.
+- Secondary process.
- KVM and VMware ESX SR-IOV modes are supported.
- RSS hash result is supported.
- Hardware TSO.
@@ -99,7 +99,6 @@ Limitations
- Inner RSS for VXLAN frames is not supported yet.
- Port statistics through software counters only.
- Hardware checksum RX offloads for VXLAN inner header are not supported yet.
-- Secondary process RX is not supported.
Configuration
-------------
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 0feed4c..6c8f404 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -52,6 +52,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_fdir.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
# Basic CFLAGS.
CFLAGS += -O3
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 39a159c..3002e7e 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -126,6 +126,52 @@ struct mlx5_args {
}
/**
+ * Verbs callback to allocate a memory. This function should allocate the space
+ * according to the size provided residing inside a huge page.
+ *
+ * @param[in] size
+ * The size in bytes of the memory to allocate.
+ * @param[in] data
+ * A pointer to the callback data.
+ *
+ * @return
+ * a pointer to the allocate space.
+ */
+static void *
+mlx5_extern_alloc_buf(size_t size, void *data)
+{
+ struct priv *priv = data;
+ void *ret;
+ size_t alignment = sysconf(_SC_PAGESIZE);
+
+ assert(data != NULL);
+ assert(!mlx5_is_secondary());
+
+ ret = rte_malloc_socket(__func__, size, alignment,
+ priv->dev->device->numa_node);
+ DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret);
+ return ret;
+}
+
+/**
+ * Verbs callback to free a memory.
+ *
+ * @param[in] ptr
+ * A pointer to the memory to free.
+ * @param[in] data
+ * A pointer to the callback data.
+ */
+static void
+mlx5_extern_free_buf(void *ptr, void *data __rte_unused)
+{
+ assert(data != NULL);
+ assert(!mlx5_is_secondary());
+
+ DEBUG("Extern free request: %p", ptr);
+ rte_free(ptr);
+}
+
+/**
* DPDK callback to close the device.
*
* Destroy all queues and objects, free memory.
@@ -203,6 +249,7 @@ struct mlx5_args {
}
if (priv->reta_idx != NULL)
rte_free(priv->reta_idx);
+ priv_socket_uninit(priv);
priv_unlock(priv);
memset(priv, 0, sizeof(*priv));
}
@@ -526,6 +573,7 @@ struct mlx5_args {
assert(err > 0);
return -err;
}
+ err = 0; /* previous errors are handled if attr_ctx is NULL. */
ibv_dev = list[i];
DEBUG("device opened");
@@ -555,6 +603,40 @@ struct mlx5_args {
.tso = MLX5_ARG_UNSET,
};
+ mlx5_dev[idx].ports |= test;
+ if (mlx5_is_secondary()) {
+ /* from rte_ethdev.c */
+ char name[RTE_ETH_NAME_MAX_LEN];
+
+ snprintf(name, sizeof(name), "%s port %u",
+ ibv_get_device_name(ibv_dev), port);
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (eth_dev == NULL) {
+ ERROR("can not attach rte ethdev");
+ err = ENOMEM;
+ goto error;
+ }
+ eth_dev->dev_ops = &mlx5_dev_ops;
+ priv = eth_dev->data->dev_private;
+ /* TODO replace with mlx5dv_context */
+ priv->num_uars_per_page = 1;
+ /* Receive command fd from primary process */
+ err = priv_socket_connect(priv);
+ if (err < 0) {
+ err = -err;
+ goto error;
+ }
+ /* Remap UAR for Tx queues. */
+ err = mlx5_tx_uar_remap(priv, err);
+ if (err < 0) {
+ err = -err;
+ goto error;
+ }
+ mlx5_dev_select_rx_function(eth_dev);
+ mlx5_dev_select_tx_function(eth_dev);
+ continue;
+ }
+
device_attr_ex.comp_mask = 0;
DEBUG("using port %u (%08" PRIx32 ")", port, test);
@@ -753,37 +835,8 @@ struct mlx5_args {
err = ENOMEM;
goto port_error;
}
-
- /* Secondary processes have to use local storage for their
- * private data as well as a copy of eth_dev->data, but this
- * pointer must not be modified before burst functions are
- * actually called. */
- if (mlx5_is_secondary()) {
- struct mlx5_secondary_data *sd =
- &mlx5_secondary_data[eth_dev->data->port_id];
- sd->primary_priv = eth_dev->data->dev_private;
- if (sd->primary_priv == NULL) {
- ERROR("no private data for port %u",
- eth_dev->data->port_id);
- err = EINVAL;
- goto port_error;
- }
- sd->shared_dev_data = eth_dev->data;
- rte_spinlock_init(&sd->lock);
- memcpy(sd->data.name, sd->shared_dev_data->name,
- sizeof(sd->data.name));
- sd->data.dev_private = priv;
- sd->data.rx_mbuf_alloc_failed = 0;
- sd->data.mtu = ETHER_MTU;
- sd->data.port_id = sd->shared_dev_data->port_id;
- sd->data.mac_addrs = priv->mac;
- eth_dev->tx_pkt_burst = mlx5_tx_burst_secondary_setup;
- eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup;
- } else {
- eth_dev->data->dev_private = priv;
- eth_dev->data->mac_addrs = priv->mac;
- }
-
+ eth_dev->data->dev_private = priv;
+ eth_dev->data->mac_addrs = priv->mac;
eth_dev->device = &pci_dev->device;
rte_eth_copy_pci_info(eth_dev, pci_dev);
eth_dev->device->driver = &mlx5_driver.driver;
@@ -791,6 +844,15 @@ struct mlx5_args {
eth_dev->dev_ops = &mlx5_dev_ops;
TAILQ_INIT(&priv->flows);
+ /* Hint libmlx5 to use PMD allocator for PRM resources */
+ struct mlx5dv_ctx_allocators alctr = {
+ .alloc = &mlx5_extern_alloc_buf,
+ .free = &mlx5_extern_free_buf,
+ .data = priv,
+ };
+ mlx5dv_set_context_attr(ctx, MLX5DV_CTX_ATTR_BUF_ALLOCATORS,
+ (void *)((uintptr_t)&alctr));
+
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
priv_set_flags(priv, ~IFF_UP, IFF_UP);
@@ -885,14 +947,6 @@ struct mlx5_args {
static void
rte_mlx5_pmd_init(void)
{
- /*
- * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
- * huge pages. Calling ibv_fork_init() during init allows
- * applications to use fork() safely for purposes other than
- * using this PMD, which is not supported in forked processes.
- */
- setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
- ibv_fork_init();
rte_pci_register(&mlx5_driver);
}
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 2dee07c..b5d2f67 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -157,16 +157,11 @@ struct priv {
uint32_t link_speed_capa; /* Link speed capabilities. */
struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
rte_spinlock_t lock; /* Lock for control functions. */
+ int socket; /* Socket to exchange data with secondaries. */
+ struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
+ int num_uars_per_page; /* number of UARs per system page */
};
-/* Local storage for secondary process data. */
-struct mlx5_secondary_data {
- struct rte_eth_dev_data data; /* Local device data. */
- struct priv *primary_priv; /* Private structure from primary. */
- struct rte_eth_dev_data *shared_dev_data; /* Shared device data. */
- rte_spinlock_t lock; /* Port configuration lock. */
-} mlx5_secondary_data[RTE_MAX_ETHPORTS];
-
/**
* Lock private structure to protect it from concurrent access in the
* control path.
@@ -314,4 +309,11 @@ int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
void priv_flow_stop(struct priv *);
int priv_flow_rxq_in_use(struct priv *, struct rxq *);
+/* mlx5_socket.c */
+
+int priv_socket_init(struct priv *priv);
+int priv_socket_uninit(struct priv *priv);
+void priv_socket_handle(struct priv *priv);
+int priv_socket_connect(struct priv *priv);
+
#endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index fce7dd5..84efeda 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -31,6 +31,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#define _GNU_SOURCE
+
#include <stddef.h>
#include <assert.h>
#include <unistd.h>
@@ -49,6 +51,7 @@
#include <linux/sockios.h>
#include <linux/version.h>
#include <fcntl.h>
+#include <sys/un.h>
/* DPDK headers don't like -pedantic. */
#ifdef PEDANTIC
@@ -132,12 +135,7 @@ struct ethtool_link_settings {
struct priv *
mlx5_get_priv(struct rte_eth_dev *dev)
{
- struct mlx5_secondary_data *sd;
-
- if (!mlx5_is_secondary())
- return dev->data->dev_private;
- sd = &mlx5_secondary_data[dev->data->port_id];
- return sd->data.dev_private;
+ return dev->data->dev_private;
}
/**
@@ -149,7 +147,7 @@ struct priv *
inline int
mlx5_is_secondary(void)
{
- return rte_eal_process_type() != RTE_PROC_PRIMARY;
+ return rte_eal_process_type() == RTE_PROC_SECONDARY;
}
/**
@@ -1292,6 +1290,23 @@ struct priv *
}
/**
+ * Handle interrupts from the socket.
+ *
+ * @param cb_arg
+ * Callback argument.
+ */
+static void
+mlx5_dev_handler_socket(void *cb_arg)
+{
+ struct rte_eth_dev *dev = cb_arg;
+ struct priv *priv = dev->data->dev_private;
+
+ priv_lock(priv);
+ priv_socket_handle(priv);
+ priv_unlock(priv);
+}
+
+/**
* Uninstall interrupt handler.
*
* @param priv
@@ -1302,16 +1317,21 @@ struct priv *
void
priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
{
- if (!dev->data->dev_conf.intr_conf.lsc)
- return;
- rte_intr_callback_unregister(&priv->intr_handle,
- mlx5_dev_interrupt_handler,
- dev);
+ if (dev->data->dev_conf.intr_conf.lsc)
+ rte_intr_callback_unregister(&priv->intr_handle,
+ mlx5_dev_interrupt_handler,
+ dev);
+ if (priv->socket)
+ rte_intr_callback_unregister(&priv->intr_handle_socket,
+ mlx5_dev_handler_socket,
+ dev);
if (priv->pending_alarm)
rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev);
priv->pending_alarm = 0;
priv->intr_handle.fd = 0;
priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ priv->intr_handle_socket.fd = 0;
+ priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN;
}
/**
@@ -1327,21 +1347,29 @@ struct priv *
{
int rc, flags;
- if (!dev->data->dev_conf.intr_conf.lsc)
- return;
+ assert(!mlx5_is_secondary());
assert(priv->ctx->async_fd > 0);
flags = fcntl(priv->ctx->async_fd, F_GETFL);
rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
if (rc < 0) {
INFO("failed to change file descriptor async event queue");
dev->data->dev_conf.intr_conf.lsc = 0;
- } else {
+ }
+ if (dev->data->dev_conf.intr_conf.lsc) {
priv->intr_handle.fd = priv->ctx->async_fd;
priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
rte_intr_callback_register(&priv->intr_handle,
mlx5_dev_interrupt_handler,
dev);
}
+
+ rc = priv_socket_init(priv);
+ if (!rc && priv->socket) {
+ priv->intr_handle_socket.fd = priv->socket;
+ priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT;
+ rte_intr_callback_register(&priv->intr_handle_socket,
+ mlx5_dev_handler_socket, dev);
+ }
}
/**
@@ -1420,163 +1448,6 @@ struct priv *
}
/**
- * Configure secondary process queues from a private data pointer (primary
- * or secondary) and update burst callbacks. Can take place only once.
- *
- * All queues must have been previously created by the primary process to
- * avoid undefined behavior.
- *
- * @param priv
- * Private data pointer from either primary or secondary process.
- *
- * @return
- * Private data pointer from secondary process, NULL in case of error.
- */
-struct priv *
-mlx5_secondary_data_setup(struct priv *priv)
-{
- unsigned int port_id = 0;
- struct mlx5_secondary_data *sd;
- void **tx_queues;
- void **rx_queues;
- unsigned int nb_tx_queues;
- unsigned int nb_rx_queues;
- unsigned int i;
-
- /* priv must be valid at this point. */
- assert(priv != NULL);
- /* priv->dev must also be valid but may point to local memory from
- * another process, possibly with the same address and must not
- * be dereferenced yet. */
- assert(priv->dev != NULL);
- /* Determine port ID by finding out where priv comes from. */
- while (1) {
- sd = &mlx5_secondary_data[port_id];
- rte_spinlock_lock(&sd->lock);
- /* Primary process? */
- if (sd->primary_priv == priv)
- break;
- /* Secondary process? */
- if (sd->data.dev_private == priv)
- break;
- rte_spinlock_unlock(&sd->lock);
- if (++port_id == RTE_DIM(mlx5_secondary_data))
- port_id = 0;
- }
- /* Switch to secondary private structure. If private data has already
- * been updated by another thread, there is nothing else to do. */
- priv = sd->data.dev_private;
- if (priv->dev->data == &sd->data)
- goto end;
- /* Sanity checks. Secondary private structure is supposed to point
- * to local eth_dev, itself still pointing to the shared device data
- * structure allocated by the primary process. */
- assert(sd->shared_dev_data != &sd->data);
- assert(sd->data.nb_tx_queues == 0);
- assert(sd->data.tx_queues == NULL);
- assert(sd->data.nb_rx_queues == 0);
- assert(sd->data.rx_queues == NULL);
- assert(priv != sd->primary_priv);
- assert(priv->dev->data == sd->shared_dev_data);
- assert(priv->txqs_n == 0);
- assert(priv->txqs == NULL);
- assert(priv->rxqs_n == 0);
- assert(priv->rxqs == NULL);
- nb_tx_queues = sd->shared_dev_data->nb_tx_queues;
- nb_rx_queues = sd->shared_dev_data->nb_rx_queues;
- /* Allocate local storage for queues. */
- tx_queues = rte_zmalloc("secondary ethdev->tx_queues",
- sizeof(sd->data.tx_queues[0]) * nb_tx_queues,
- RTE_CACHE_LINE_SIZE);
- rx_queues = rte_zmalloc("secondary ethdev->rx_queues",
- sizeof(sd->data.rx_queues[0]) * nb_rx_queues,
- RTE_CACHE_LINE_SIZE);
- if (tx_queues == NULL || rx_queues == NULL)
- goto error;
- /* Lock to prevent control operations during setup. */
- priv_lock(priv);
- /* TX queues. */
- for (i = 0; i != nb_tx_queues; ++i) {
- struct txq *primary_txq = (*sd->primary_priv->txqs)[i];
- struct txq_ctrl *primary_txq_ctrl;
- struct txq_ctrl *txq_ctrl;
-
- if (primary_txq == NULL)
- continue;
- primary_txq_ctrl = container_of(primary_txq,
- struct txq_ctrl, txq);
- txq_ctrl = rte_calloc_socket("TXQ", 1, sizeof(*txq_ctrl) +
- (1 << primary_txq->elts_n) *
- sizeof(struct rte_mbuf *), 0,
- primary_txq_ctrl->socket);
- if (txq_ctrl != NULL) {
- if (txq_ctrl_setup(priv->dev,
- txq_ctrl,
- 1 << primary_txq->elts_n,
- primary_txq_ctrl->socket,
- NULL) == 0) {
- txq_ctrl->txq.stats.idx =
- primary_txq->stats.idx;
- tx_queues[i] = &txq_ctrl->txq;
- continue;
- }
- rte_free(txq_ctrl);
- }
- while (i) {
- txq_ctrl = tx_queues[--i];
- txq_cleanup(txq_ctrl);
- rte_free(txq_ctrl);
- }
- goto error;
- }
- /* RX queues. */
- for (i = 0; i != nb_rx_queues; ++i) {
- struct rxq_ctrl *primary_rxq =
- container_of((*sd->primary_priv->rxqs)[i],
- struct rxq_ctrl, rxq);
-
- if (primary_rxq == NULL)
- continue;
- /* Not supported yet. */
- rx_queues[i] = NULL;
- }
- /* Update everything. */
- priv->txqs = (void *)tx_queues;
- priv->txqs_n = nb_tx_queues;
- priv->rxqs = (void *)rx_queues;
- priv->rxqs_n = nb_rx_queues;
- sd->data.rx_queues = rx_queues;
- sd->data.tx_queues = tx_queues;
- sd->data.nb_rx_queues = nb_rx_queues;
- sd->data.nb_tx_queues = nb_tx_queues;
- sd->data.dev_link = sd->shared_dev_data->dev_link;
- sd->data.mtu = sd->shared_dev_data->mtu;
- memcpy(sd->data.rx_queue_state, sd->shared_dev_data->rx_queue_state,
- sizeof(sd->data.rx_queue_state));
- memcpy(sd->data.tx_queue_state, sd->shared_dev_data->tx_queue_state,
- sizeof(sd->data.tx_queue_state));
- sd->data.dev_flags = sd->shared_dev_data->dev_flags;
- /* Use local data from now on. */
- rte_mb();
- priv->dev->data = &sd->data;
- rte_mb();
- mlx5_dev_select_tx_function(priv->dev);
- mlx5_dev_select_rx_function(priv->dev);
- priv_unlock(priv);
-end:
- /* More sanity checks. */
- assert(priv->dev->data == &sd->data);
- rte_spinlock_unlock(&sd->lock);
- return priv;
-error:
- priv_unlock(priv);
- rte_free(tx_queues);
- rte_free(rx_queues);
- rte_spinlock_unlock(&sd->lock);
- return NULL;
-}
-
-/**
* Configure the TX function to use.
*
* @param dev
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 18e711e..60edf9d 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1318,47 +1318,6 @@
}
/**
- * DPDK callback for RX in secondary processes.
- *
- * This function configures all queues from primary process information
- * if necessary before reverting to the normal RX burst callback.
- *
- * @param dpdk_rxq
- * Generic pointer to RX queue structure.
- * @param[out] pkts
- * Array to store received packets.
- * @param pkts_n
- * Maximum number of packets in array.
- *
- * @return
- * Number of packets successfully received (<= pkts_n).
- */
-uint16_t
-mlx5_rx_burst_secondary_setup(void *dpdk_rxq, struct rte_mbuf **pkts,
- uint16_t pkts_n)
-{
- struct rxq *rxq = dpdk_rxq;
- struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
- struct priv *priv = mlx5_secondary_data_setup(rxq_ctrl->priv);
- struct priv *primary_priv;
- unsigned int index;
-
- if (priv == NULL)
- return 0;
- primary_priv =
- mlx5_secondary_data[priv->dev->data->port_id].primary_priv;
- /* Look for queue index in both private structures. */
- for (index = 0; index != priv->rxqs_n; ++index)
- if (((*primary_priv->rxqs)[index] == rxq) ||
- ((*priv->rxqs)[index] == rxq))
- break;
- if (index == priv->rxqs_n)
- return 0;
- rxq = (*priv->rxqs)[index];
- return priv->dev->rx_pkt_burst(rxq, pkts, pkts_n);
-}
-
-/**
* Allocate queue vector and fill epoll fd list for Rx interrupts.
*
* @param priv
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 393c500..3940e00 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -290,6 +290,8 @@ struct txq_ctrl {
struct ibv_qp *qp; /* Queue Pair. */
unsigned int socket; /* CPU socket ID for allocations. */
struct txq txq; /* Data path structure. */
+
+ off_t uar_mmap_offset; /* UAR offset for secondary process mmap. */
};
/* mlx5_rxq.c */
@@ -314,7 +316,6 @@ int rxq_ctrl_setup(struct rte_eth_dev *, struct rxq_ctrl *, uint16_t,
int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
const struct rte_eth_rxconf *, struct rte_mempool *);
void mlx5_rx_queue_release(void *);
-uint16_t mlx5_rx_burst_secondary_setup(void *, struct rte_mbuf **, uint16_t);
int priv_rx_intr_vec_enable(struct priv *priv);
void priv_rx_intr_vec_disable(struct priv *priv);
int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
@@ -328,7 +329,7 @@ int txq_ctrl_setup(struct rte_eth_dev *, struct txq_ctrl *, uint16_t,
int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
const struct rte_eth_txconf *);
void mlx5_tx_queue_release(void *);
-uint16_t mlx5_tx_burst_secondary_setup(void *, struct rte_mbuf **, uint16_t);
+int mlx5_tx_uar_remap(struct priv *priv, int fd);
/* mlx5_rxtx.c */
diff --git a/drivers/net/mlx5/mlx5_socket.c b/drivers/net/mlx5/mlx5_socket.c
new file mode 100644
index 0000000..e371ab6
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_socket.c
@@ -0,0 +1,294 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2016 6WIND S.A.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+
+/**
+ * Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_socket_init(struct priv *priv)
+{
+ struct sockaddr_un sun = {
+ .sun_family = AF_UNIX,
+ };
+ int ret;
+ int flags;
+ struct stat file_stat;
+
+ /*
+ * Initialise the socket to communicate with the secondary
+ * process.
+ */
+ ret = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ret < 0) {
+ WARN("secondary process not supported: %s", strerror(errno));
+ return ret;
+ }
+ priv->socket = ret;
+ flags = fcntl(priv->socket, F_GETFL, 0);
+ if (flags == -1)
+ goto out;
+ ret = fcntl(priv->socket, F_SETFL, flags | O_NONBLOCK);
+ if (ret < 0)
+ goto out;
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+ MLX5_DRIVER_NAME, priv->socket);
+ ret = stat(sun.sun_path, &file_stat);
+ if (!ret)
+ claim_zero(remove(sun.sun_path));
+ ret = bind(priv->socket, (const struct sockaddr *)&sun, sizeof(sun));
+ if (ret < 0) {
+ WARN("cannot bind socket, secondary process not supported: %s",
+ strerror(errno));
+ goto close;
+ }
+ ret = listen(priv->socket, 0);
+ if (ret < 0) {
+ WARN("Secondary process not supported: %s", strerror(errno));
+ goto close;
+ }
+ return ret;
+close:
+ remove(sun.sun_path);
+out:
+ claim_zero(close(priv->socket));
+ priv->socket = 0;
+ return -(ret);
+}
+
+/**
+ * Un-Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_socket_uninit(struct priv *priv)
+{
+ MKSTR(path, "/var/tmp/%s_%d", MLX5_DRIVER_NAME, priv->socket);
+ claim_zero(close(priv->socket));
+ priv->socket = 0;
+ claim_zero(remove(path));
+ return 0;
+}
+
+/**
+ * Handle socket interrupts.
+ *
+ * @param priv
+ * Pointer to private structure.
+ */
+void
+priv_socket_handle(struct priv *priv)
+{
+ int conn_sock;
+ int ret = 0;
+ struct cmsghdr *cmsg = NULL;
+ struct ucred *cred = NULL;
+ char buf[CMSG_SPACE(sizeof(struct ucred))] = { 0 };
+ char vbuf[1024] = { 0 };
+ struct iovec io = {
+ .iov_base = vbuf,
+ .iov_len = sizeof(*vbuf),
+ };
+ struct msghdr msg = {
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ .msg_control = buf,
+ .msg_controllen = sizeof(buf),
+ };
+ int *fd;
+
+ /* Accept the connection from the client. */
+ conn_sock = accept(priv->socket, NULL, NULL);
+ if (conn_sock < 0) {
+ WARN("connection failed: %s", strerror(errno));
+ return;
+ }
+ ret = setsockopt(conn_sock, SOL_SOCKET, SO_PASSCRED, &(int){1},
+ sizeof(int));
+ if (ret < 0) {
+ WARN("cannot change socket options");
+ goto out;
+ }
+ ret = recvmsg(conn_sock, &msg, MSG_WAITALL);
+ if (ret < 0) {
+ WARN("received an empty message: %s", strerror(errno));
+ goto out;
+ }
+ /* Expect to receive credentials only. */
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ WARN("no message");
+ goto out;
+ }
+ if ((cmsg->cmsg_type == SCM_CREDENTIALS) &&
+ (cmsg->cmsg_len >= sizeof(*cred))) {
+ cred = (struct ucred *)CMSG_DATA(cmsg);
+ assert(cred != NULL);
+ }
+ cmsg = CMSG_NXTHDR(&msg, cmsg);
+ if (cmsg != NULL) {
+ WARN("Message wrongly formated");
+ goto out;
+ }
+ /* Make sure all the ancillary data was received and valid. */
+ if ((cred == NULL) ||
+ (cred->uid != getuid()) ||
+ (cred->gid != getgid())) {
+ WARN("wrong credentials");
+ goto out;
+ }
+ /* Set-up the ancillary data. */
+ cmsg = CMSG_FIRSTHDR(&msg);
+ assert(cmsg != NULL);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(priv->ctx->cmd_fd));
+ fd = (int *)CMSG_DATA(cmsg);
+ *fd = priv->ctx->cmd_fd;
+ ret = sendmsg(conn_sock, &msg, 0);
+ if (ret < 0)
+ WARN("cannot send response");
+out:
+ close(conn_sock);
+}
+
+/**
+ * Connect to the primary process.
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * fd on success, negative errno value on failure.
+ */
+int
+priv_socket_connect(struct priv *priv)
+{
+ struct sockaddr_un sun = {
+ .sun_family = AF_UNIX,
+ };
+ int socket_fd;
+ int *fd = NULL;
+ int ret;
+ struct ucred *cred;
+ char buf[CMSG_SPACE(sizeof(*cred))] = { 0 };
+ char vbuf[1024] = { 0 };
+ struct iovec io = {
+ .iov_base = vbuf,
+ .iov_len = sizeof(*vbuf),
+ };
+ struct msghdr msg = {
+ .msg_control = buf,
+ .msg_controllen = sizeof(buf),
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+
+ ret = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ret < 0) {
+ WARN("cannot connect to primary");
+ return ret;
+ }
+ socket_fd = ret;
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+ MLX5_DRIVER_NAME, priv->socket);
+ ret = connect(socket_fd, (const struct sockaddr *)&sun, sizeof(sun));
+ if (ret < 0) {
+ WARN("cannot connect to primary");
+ goto out;
+ }
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ DEBUG("cannot get first message");
+ goto out;
+ }
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(*cred));
+ cred = (struct ucred *)CMSG_DATA(cmsg);
+ if (cred == NULL) {
+ DEBUG("no credentials received");
+ goto out;
+ }
+ cred->pid = getpid();
+ cred->uid = getuid();
+ cred->gid = getgid();
+ ret = sendmsg(socket_fd, &msg, MSG_DONTWAIT);
+ if (ret < 0) {
+ WARN("cannot send credentials to primary: %s",
+ strerror(errno));
+ goto out;
+ }
+ ret = recvmsg(socket_fd, &msg, MSG_WAITALL);
+ if (ret <= 0) {
+ WARN("no message from primary: %s", strerror(errno));
+ goto out;
+ }
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ WARN("No file descriptor received");
+ goto out;
+ }
+ fd = (int *)CMSG_DATA(cmsg);
+ if (*fd <= 0) {
+ WARN("no file descriptor received: %s", strerror(errno));
+ ret = *fd;
+ goto out;
+ }
+ ret = *fd;
+out:
+ close(socket_fd);
+ return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 0ea6630..6f57319 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -36,6 +36,8 @@
#include <errno.h>
#include <string.h>
#include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -174,6 +176,8 @@
struct mlx5dv_cq cq_info;
struct mlx5dv_obj obj;
+ qp.comp_mask = MLX5DV_QP_MASK_UAR_INFO;
+
obj.cq.in = ibcq;
obj.cq.out = &cq_info;
obj.qp.in = tmpl->qp;
@@ -183,7 +187,7 @@
if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
"it should be set to %u", RTE_CACHE_LINE_SIZE);
- return EINVAL;
+ return -EINVAL;
}
tmpl->txq.cqe_n = log2above(cq_info.cqe_cnt);
tmpl->txq.qp_num_8s = tmpl->qp->qp_num << 8;
@@ -198,6 +202,14 @@
tmpl->txq.elts =
(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])
((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
+
+ if (qp.comp_mask | MLX5DV_QP_MASK_UAR_INFO) {
+ tmpl->uar_mmap_offset = qp.uar_info.mmap_offset;
+ } else {
+ ERROR("Failed to retrieve UAR info, invalid libmlx5.so version");
+ return -EINVAL;
+ }
+
return 0;
}
@@ -539,42 +551,53 @@
}
/**
- * DPDK callback for TX in secondary processes.
+ * Map locally UAR used in Tx queues for BlueFlame doorbell.
*
- * This function configures all queues from primary process information
- * if necessary before reverting to the normal TX burst callback.
- *
- * @param dpdk_txq
- * Generic pointer to TX queue structure.
- * @param[in] pkts
- * Packets to transmit.
- * @param pkts_n
- * Number of packets in array.
+ * @param[in] priv
+ * Pointer to private structure.
+ * @param fd
+ * Verbs file descriptor to map UAR pages.
*
* @return
- * Number of packets successfully transmitted (<= pkts_n).
+ * 0 on success, errno value on failure.
*/
-uint16_t
-mlx5_tx_burst_secondary_setup(void *dpdk_txq, struct rte_mbuf **pkts,
- uint16_t pkts_n)
+int
+mlx5_tx_uar_remap(struct priv *priv, int fd)
{
- struct txq *txq = dpdk_txq;
- struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
- struct priv *priv = mlx5_secondary_data_setup(txq_ctrl->priv);
- struct priv *primary_priv;
- unsigned int index;
+ unsigned int i, j;
+ uintptr_t pages[priv->txqs_n];
+ unsigned int pages_n = 0;
+ uintptr_t uar_va;
+ void *addr;
+ struct txq *txq;
+ struct txq_ctrl *txq_ctrl;
+ int already_mapped;
+ size_t page_size = sysconf(_SC_PAGESIZE);
- if (priv == NULL)
- return 0;
- primary_priv =
- mlx5_secondary_data[priv->dev->data->port_id].primary_priv;
- /* Look for queue index in both private structures. */
- for (index = 0; index != priv->txqs_n; ++index)
- if (((*primary_priv->txqs)[index] == txq) ||
- ((*priv->txqs)[index] == txq))
- break;
- if (index == priv->txqs_n)
- return 0;
- txq = (*priv->txqs)[index];
- return priv->dev->tx_pkt_burst(txq, pkts, pkts_n);
+ for (i = 0; i != priv->txqs_n; ++i) {
+ txq = (*priv->txqs)[i];
+ txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+ uar_va = (uintptr_t)txq_ctrl->txq.bf_reg;
+ uar_va = RTE_ALIGN_FLOOR(uar_va,
+ page_size / priv->num_uars_per_page);
+ already_mapped = 0;
+ for (j = 0; j != pages_n; ++j) {
+ if (pages[j] == uar_va) {
+ already_mapped = 1;
+ break;
+ }
+ }
+ if (already_mapped)
+ continue;
+
+ pages[pages_n++] = uar_va;
+ addr = mmap((void *)uar_va, page_size,
+ PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
+ txq_ctrl->uar_mmap_offset);
+ if (addr != (void *)uar_va) {
+ ERROR("call to mmap failed on UAR for txq %d\n", i);
+ return -1;
+ }
+ }
+ return 0;
}
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
2017-08-24 14:03 ` [dpdk-dev] [PATCH v1 2/2] net/mlx5: add multiple process support Xueming Li
@ 2017-08-25 6:52 ` Nélio Laranjeiro
2017-08-25 7:15 ` Xueming(Steven) Li
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 1/6] " Xueming Li
` (19 subsequent siblings)
21 siblings, 1 reply; 41+ messages in thread
From: Nélio Laranjeiro @ 2017-08-25 6:52 UTC (permalink / raw)
To: Xueming Li; +Cc: dev
Hi Xueming,
Please see some comments below,
On Thu, Aug 24, 2017 at 10:03:40PM +0800, Xueming Li wrote:
> rte_eth_dev created by primary process were not available in secondary
> process, it was not possible to use the primary process local memory
> object from a secondary process.
>
> This patch modify the reference of primary rte_eth_dev object, use
> local rte_eth_dev secondary process instead.
>
> Cc: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> ---
> drivers/net/mlx5/mlx5.h | 6 ++---
> drivers/net/mlx5/mlx5_ethdev.c | 52 ++++++++++++++++++++++-------------------
> drivers/net/mlx5/mlx5_fdir.c | 3 +++
> drivers/net/mlx5/mlx5_rss.c | 3 +++
> drivers/net/mlx5/mlx5_rxq.c | 2 ++
> drivers/net/mlx5/mlx5_trigger.c | 4 ++--
> 6 files changed, 41 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> index f5167e0..fce7dd5 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -1356,17 +1356,17 @@ struct priv *
> * 0 on success, errno value on failure.
> */
> static int
> -priv_set_link(struct priv *priv, int up)
> +mlx5_dev_set_link(struct rte_eth_dev *dev, int up)
> {
> - struct rte_eth_dev *dev = priv->dev;
> + struct priv *priv = dev->data->dev_private;
> int err;
>
This function should lock/unclock priv.
> if (up) {
> err = priv_set_flags(priv, ~IFF_UP, IFF_UP);
> if (err)
> return err;
> - priv_select_tx_function(priv);
> - priv_select_rx_function(priv);
> + mlx5_dev_select_tx_function(dev);
> + mlx5_dev_select_rx_function(dev);
This also induce that those function mlx5_dev_select_rx/tx_function() should
be renamed to:
priv_dev_select_rx/tx_function(struct *priv, struct rte_eth_dev *dev, ...)
this will avoid the multiple lock/unlocks inside the functions.
> diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
> index 6acc053..0f3b70a 100644
> --- a/drivers/net/mlx5/mlx5_fdir.c
> +++ b/drivers/net/mlx5/mlx5_fdir.c
> @@ -1075,6 +1075,9 @@ struct mlx5_fdir_filter {
> int ret = EINVAL;
> struct priv *priv = dev->data->dev_private;
>
> + if (mlx5_is_secondary())
> + return -E_RTE_SECONDARY;
> +
Extra empty line also in the following copy/past of this if statement.
>[...]
Thanks,
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process
2017-08-25 6:52 ` [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Nélio Laranjeiro
@ 2017-08-25 7:15 ` Xueming(Steven) Li
2017-08-25 7:32 ` Nélio Laranjeiro
0 siblings, 1 reply; 41+ messages in thread
From: Xueming(Steven) Li @ 2017-08-25 7:15 UTC (permalink / raw)
To: Nélio Laranjeiro; +Cc: dev
Nelio, thanks, comments inline.
> -----Original Message-----
> From: Nélio Laranjeiro [mailto:nelio.laranjeiro@6wind.com]
> Sent: Friday, August 25, 2017 2:52 PM
> To: Xueming(Steven) Li <xuemingl@mellanox.com>
> Cc: dev@dpdk.org
> Subject: Re: [PATCH v1 1/2] net/mlx5: change eth device reference for
> secondary process
>
> Hi Xueming,
>
> Please see some comments below,
>
> On Thu, Aug 24, 2017 at 10:03:40PM +0800, Xueming Li wrote:
> > rte_eth_dev created by primary process were not available in secondary
> > process, it was not possible to use the primary process local memory
> > object from a secondary process.
> >
> > This patch modify the reference of primary rte_eth_dev object, use
> > local rte_eth_dev secondary process instead.
> >
> > Cc: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> > Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> > ---
> > drivers/net/mlx5/mlx5.h | 6 ++---
> > drivers/net/mlx5/mlx5_ethdev.c | 52 ++++++++++++++++++++++-----------
> --------
> > drivers/net/mlx5/mlx5_fdir.c | 3 +++
> > drivers/net/mlx5/mlx5_rss.c | 3 +++
> > drivers/net/mlx5/mlx5_rxq.c | 2 ++
> > drivers/net/mlx5/mlx5_trigger.c | 4 ++--
> > 6 files changed, 41 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> > b/drivers/net/mlx5/mlx5_ethdev.c index f5167e0..fce7dd5 100644
> > --- a/drivers/net/mlx5/mlx5_ethdev.c
> > +++ b/drivers/net/mlx5/mlx5_ethdev.c
> > @@ -1356,17 +1356,17 @@ struct priv *
> > * 0 on success, errno value on failure.
> > */
> > static int
> > -priv_set_link(struct priv *priv, int up)
> > +mlx5_dev_set_link(struct rte_eth_dev *dev, int up)
> > {
> > - struct rte_eth_dev *dev = priv->dev;
> > + struct priv *priv = dev->data->dev_private;
> > int err;
> >
>
> This function should lock/unclock priv.
This is a static function, caller function do the lock/unlock.
Is there naming convention here? Mlx5_* is outpost interfaces that normally require lock/unlock priv?
>
> > if (up) {
> > err = priv_set_flags(priv, ~IFF_UP, IFF_UP);
> > if (err)
> > return err;
> > - priv_select_tx_function(priv);
> > - priv_select_rx_function(priv);
> > + mlx5_dev_select_tx_function(dev);
> > + mlx5_dev_select_rx_function(dev);
>
> This also induce that those function mlx5_dev_select_rx/tx_function() should
> be renamed to:
> priv_dev_select_rx/tx_function(struct *priv, struct rte_eth_dev *dev, ...)
>
> this will avoid the multiple lock/unlocks inside the functions.
So priv_* are lock-free functions?
>
> > diff --git a/drivers/net/mlx5/mlx5_fdir.c
> > b/drivers/net/mlx5/mlx5_fdir.c index 6acc053..0f3b70a 100644
> > --- a/drivers/net/mlx5/mlx5_fdir.c
> > +++ b/drivers/net/mlx5/mlx5_fdir.c
> > @@ -1075,6 +1075,9 @@ struct mlx5_fdir_filter {
> > int ret = EINVAL;
> > struct priv *priv = dev->data->dev_private;
> >
> > + if (mlx5_is_secondary())
> > + return -E_RTE_SECONDARY;
> > +
>
> Extra empty line also in the following copy/past of this if statement.
> >[...]
>
> Thanks,
>
> --
> Nélio Laranjeiro
> 6WIND
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v1 2/2] net/mlx5: add multiple process support
2017-08-24 14:03 ` [dpdk-dev] [PATCH v1 2/2] net/mlx5: add multiple process support Xueming Li
@ 2017-08-25 7:27 ` Nélio Laranjeiro
0 siblings, 0 replies; 41+ messages in thread
From: Nélio Laranjeiro @ 2017-08-25 7:27 UTC (permalink / raw)
To: Xueming Li; +Cc: dev
Hi Xueming,
Please see comments below,
On Thu, Aug 24, 2017 at 10:03:41PM +0800, Xueming Li wrote:
> PMD uses Verbs object which were not available in the shared memory, in
> addition, due to IO pages, it was not possible to use the primary
> process Tx queues from a secondary process.
>
> This patch modify the location where Verbs objects are allocated (from
> process memory address space to shared memory address space) and thus
> allow a secondary process to use those object by mapping this shared
> memory space its own memory space.
> For Tx IO pages, it uses a unix socket to get back the communication
> channel with the Kernel driver from the primary process, this is
> necessary to remap those pages in the secondary process memory space and
> thus use the same Tx queues.
>
> This is only supported from Linux kernel (v4.14) and rdma-core (v14).
Will it not be supported also with MLNX_OFED 4.2 on older kernels?
> Cc: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> ---
> doc/guides/nics/mlx5.rst | 3 +-
> drivers/net/mlx5/Makefile | 1 +
> drivers/net/mlx5/mlx5.c | 132 ++++++++++++------
> drivers/net/mlx5/mlx5.h | 18 +--
> drivers/net/mlx5/mlx5_ethdev.c | 215 ++++++------------------------
> drivers/net/mlx5/mlx5_rxq.c | 41 ------
> drivers/net/mlx5/mlx5_rxtx.h | 5 +-
> drivers/net/mlx5/mlx5_socket.c | 294 +++++++++++++++++++++++++++++++++++++++++
> drivers/net/mlx5/mlx5_txq.c | 89 ++++++++-----
> 9 files changed, 501 insertions(+), 297 deletions(-)
> create mode 100644 drivers/net/mlx5/mlx5_socket.c
You should also bring back the "Multiprocess aware" in
doc/guides/nics/features/mlx5.ini removed in [1].
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index 39a159c..3002e7e 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -126,6 +126,52 @@ struct mlx5_args {
> }
>
> /**
> + * Verbs callback to allocate a memory. This function should allocate the space
> + * according to the size provided residing inside a huge page.
> + *
> + * @param[in] size
> + * The size in bytes of the memory to allocate.
> + * @param[in] data
> + * A pointer to the callback data.
> + *
> + * @return
> + * a pointer to the allocate space.
> + */
> +static void *
> +mlx5_extern_alloc_buf(size_t size, void *data)
> +{
> + struct priv *priv = data;
> + void *ret;
> + size_t alignment = sysconf(_SC_PAGESIZE);
Seems dangerous assuming in the PMD the page size expected by a library, there
is no guarantee it will not change in the future and this may cause
un-expected behaviors.
The library should provide such alignment information through the callback
parameter.
> + assert(data != NULL);
> + assert(!mlx5_is_secondary());
> +
> + ret = rte_malloc_socket(__func__, size, alignment,
> + priv->dev->device->numa_node);
Indentation is wrong.
> + DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret);
> + return ret;
> +}
>[...]
> @@ -526,6 +573,7 @@ struct mlx5_args {
> assert(err > 0);
> return -err;
> }
> + err = 0; /* previous errors are handled if attr_ctx is NULL. */
> ibv_dev = list[i];
>
> DEBUG("device opened");
This hunk seems to fix a bug not related to this feature, please put it in its
own commit and reference the commit introducing the issue.
> @@ -555,6 +603,40 @@ struct mlx5_args {
> .tso = MLX5_ARG_UNSET,
> };
>
> + mlx5_dev[idx].ports |= test;
> + if (mlx5_is_secondary()) {
> + /* from rte_ethdev.c */
> + char name[RTE_ETH_NAME_MAX_LEN];
> +
> + snprintf(name, sizeof(name), "%s port %u",
> + ibv_get_device_name(ibv_dev), port);
> + eth_dev = rte_eth_dev_attach_secondary(name);
> + if (eth_dev == NULL) {
> + ERROR("can not attach rte ethdev");
> + err = ENOMEM;
> + goto error;
> + }
> + eth_dev->dev_ops = &mlx5_dev_ops;
There are several operation the secondary process should not be allowed to
make, maybe it is better to defines a new array of ops for the secondary
process and assign it here.
This can also be done on an extra commit adding this new devops array and
removing all the verification is_secondary() across the PMD files.
> + priv = eth_dev->data->dev_private;
> + /* TODO replace with mlx5dv_context */
According to a modification below "struct mlx5dv_ctx_allocators alctr = ", it
seems this is already done.
Please add the dependency on Schachar series as comment in the commit log
(after a '---' line), it will considerably help Ferruh.
> @@ -753,37 +835,8 @@ struct mlx5_args {
> err = ENOMEM;
> goto port_error;
> }
> -
> - /* Secondary processes have to use local storage for their
> - * private data as well as a copy of eth_dev->data, but this
> - * pointer must not be modified before burst functions are
> - * actually called. */
> - if (mlx5_is_secondary()) {
> - struct mlx5_secondary_data *sd =
> - &mlx5_secondary_data[eth_dev->data->port_id];
> - sd->primary_priv = eth_dev->data->dev_private;
> - if (sd->primary_priv == NULL) {
> - ERROR("no private data for port %u",
> - eth_dev->data->port_id);
> - err = EINVAL;
> - goto port_error;
> - }
> - sd->shared_dev_data = eth_dev->data;
> - rte_spinlock_init(&sd->lock);
> - memcpy(sd->data.name, sd->shared_dev_data->name,
> - sizeof(sd->data.name));
> - sd->data.dev_private = priv;
> - sd->data.rx_mbuf_alloc_failed = 0;
> - sd->data.mtu = ETHER_MTU;
> - sd->data.port_id = sd->shared_dev_data->port_id;
> - sd->data.mac_addrs = priv->mac;
> - eth_dev->tx_pkt_burst = mlx5_tx_burst_secondary_setup;
> - eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup;
> - } else {
> - eth_dev->data->dev_private = priv;
> - eth_dev->data->mac_addrs = priv->mac;
> - }
> -
> + eth_dev->data->dev_private = priv;
> + eth_dev->data->mac_addrs = priv->mac;
> eth_dev->device = &pci_dev->device;
> rte_eth_copy_pci_info(eth_dev, pci_dev);
> eth_dev->device->driver = &mlx5_driver.driver;
This patch is not rebased on top of dpdk-next-net/master, this part as already
been removed on [1].
> @@ -791,6 +844,15 @@ struct mlx5_args {
> eth_dev->dev_ops = &mlx5_dev_ops;
> TAILQ_INIT(&priv->flows);
>
> + /* Hint libmlx5 to use PMD allocator for PRM resources */
PRM stand for Programmer Reference Manual? If so, please remove this
acronyms from the comment.
> + struct mlx5dv_ctx_allocators alctr = {
> + .alloc = &mlx5_extern_alloc_buf,
> + .free = &mlx5_extern_free_buf,
> + .data = priv,
> + };
> + mlx5dv_set_context_attr(ctx, MLX5DV_CTX_ATTR_BUF_ALLOCATORS,
> + (void *)((uintptr_t)&alctr));
Indentation.
> @@ -885,14 +947,6 @@ struct mlx5_args {
> static void
> rte_mlx5_pmd_init(void)
> {
> - /*
> - * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
> - * huge pages. Calling ibv_fork_init() during init allows
> - * applications to use fork() safely for purposes other than
> - * using this PMD, which is not supported in forked processes.
> - */
> - setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
> - ibv_fork_init();
> rte_pci_register(&mlx5_driver);
> }
Seems this modification is not directly related to the multiple process, can
you move it to another commit?
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 2dee07c..b5d2f67 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -157,16 +157,11 @@ struct priv {
> uint32_t link_speed_capa; /* Link speed capabilities. */
> struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
> rte_spinlock_t lock; /* Lock for control functions. */
> + int socket; /* Socket to exchange data with secondaries. */
> + struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
> + int num_uars_per_page; /* number of UARs per system page */
> };
"socket" seems to be a UNIX Socket, to avoid any confusion with "NUMA Socket"
please rename it.
> -/* Local storage for secondary process data. */
> -struct mlx5_secondary_data {
> - struct rte_eth_dev_data data; /* Local device data. */
> - struct priv *primary_priv; /* Private structure from primary. */
> - struct rte_eth_dev_data *shared_dev_data; /* Shared device data. */
> - rte_spinlock_t lock; /* Port configuration lock. */
> -} mlx5_secondary_data[RTE_MAX_ETHPORTS];
> -
Should also make part of another commit fixing [1].
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> index fce7dd5..84efeda 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -132,12 +135,7 @@ struct ethtool_link_settings {
> struct priv *
> mlx5_get_priv(struct rte_eth_dev *dev)
> {
> - struct mlx5_secondary_data *sd;
> -
> - if (!mlx5_is_secondary())
> - return dev->data->dev_private;
> - sd = &mlx5_secondary_data[dev->data->port_id];
> - return sd->data.dev_private;
> + return dev->data->dev_private;
> }
This hunk should also be in another commit fixing [1].
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index 393c500..3940e00 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -290,6 +290,8 @@ struct txq_ctrl {
> struct ibv_qp *qp; /* Queue Pair. */
> unsigned int socket; /* CPU socket ID for allocations. */
> struct txq txq; /* Data path structure. */
> +
Extra empty line.
> + off_t uar_mmap_offset; /* UAR offset for secondary process mmap. */
> };
>
> /* mlx5_rxq.c */
> diff --git a/drivers/net/mlx5/mlx5_socket.c b/drivers/net/mlx5/mlx5_socket.c
> new file mode 100644
> index 0000000..e371ab6
> --- /dev/null
> +++ b/drivers/net/mlx5/mlx5_socket.c
> @@ -0,0 +1,294 @@
> +#define _GNU_SOURCE
> +
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <sys/stat.h>
> +
> +#include "mlx5.h"
> +#include "mlx5_utils.h"
> +
> +/**
> + * Initialise the socket to communicate with the secondary process
> + *
> + * @param[in] priv
> + * Pointer to private structure.
> + *
> + * @return
> + * 0 on success, errno value on failure.
> + */
> +int
> +priv_socket_init(struct priv *priv)
> +{
> + struct sockaddr_un sun = {
> + .sun_family = AF_UNIX,
> + };
> + int ret;
> + int flags;
> + struct stat file_stat;
> +
> + /*
> + * Initialise the socket to communicate with the secondary
> + * process.
> + */
> + ret = socket(AF_UNIX, SOCK_STREAM, 0);
> + if (ret < 0) {
> + WARN("secondary process not supported: %s", strerror(errno));
> + return ret;
> + }
> + priv->socket = ret;
> + flags = fcntl(priv->socket, F_GETFL, 0);
> + if (flags == -1)
> + goto out;
> + ret = fcntl(priv->socket, F_SETFL, flags | O_NONBLOCK);
> + if (ret < 0)
> + goto out;
> + snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
> + MLX5_DRIVER_NAME, priv->socket);
To keep the same coding style of the PMD, the indentation should be fixed.
> + ret = stat(sun.sun_path, &file_stat);
> + if (!ret)
> + claim_zero(remove(sun.sun_path));
> + ret = bind(priv->socket, (const struct sockaddr *)&sun, sizeof(sun));
> + if (ret < 0) {
> + WARN("cannot bind socket, secondary process not supported: %s",
> + strerror(errno));
> + goto close;
> + }
> + ret = listen(priv->socket, 0);
> + if (ret < 0) {
> + WARN("Secondary process not supported: %s", strerror(errno));
> + goto close;
> + }
> + return ret;
> +close:
> + remove(sun.sun_path);
> +out:
> + claim_zero(close(priv->socket));
> + priv->socket = 0;
> + return -(ret);
> +}
>[...]
> +
> +/**
> + * Connect to the primary process.
> + *
> + * @param[in] priv
> + * Pointer to private structure.
> + *
> + * @return
> + * fd on success, negative errno value on failure.
> + */
> +int
> +priv_socket_connect(struct priv *priv)
> +{
> + struct sockaddr_un sun = {
> + .sun_family = AF_UNIX,
> + };
> + int socket_fd;
> + int *fd = NULL;
> + int ret;
> + struct ucred *cred;
> + char buf[CMSG_SPACE(sizeof(*cred))] = { 0 };
> + char vbuf[1024] = { 0 };
> + struct iovec io = {
> + .iov_base = vbuf,
> + .iov_len = sizeof(*vbuf),
> + };
> + struct msghdr msg = {
> + .msg_control = buf,
> + .msg_controllen = sizeof(buf),
> + .msg_iov = &io,
> + .msg_iovlen = 1,
> + };
> + struct cmsghdr *cmsg;
> +
> + ret = socket(AF_UNIX, SOCK_STREAM, 0);
> + if (ret < 0) {
> + WARN("cannot connect to primary");
> + return ret;
> + }
> + socket_fd = ret;
> + snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
> + MLX5_DRIVER_NAME, priv->socket);
Same here about indentation.
> diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
> index 0ea6630..6f57319 100644
> --- a/drivers/net/mlx5/mlx5_txq.c
> +++ b/drivers/net/mlx5/mlx5_txq.c
> @@ -36,6 +36,8 @@
> #include <errno.h>
> #include <string.h>
> #include <stdint.h>
> +#include <unistd.h>
> +#include <sys/mman.h>
>
> /* Verbs header. */
> /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
> @@ -174,6 +176,8 @@
> struct mlx5dv_cq cq_info;
> struct mlx5dv_obj obj;
>
> + qp.comp_mask = MLX5DV_QP_MASK_UAR_INFO;
> +
Extra empty line.
> obj.cq.in = ibcq;
> obj.cq.out = &cq_info;
> obj.qp.in = tmpl->qp;
> @@ -183,7 +187,7 @@
> if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
> ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
> "it should be set to %u", RTE_CACHE_LINE_SIZE);
> - return EINVAL;
> + return -EINVAL;
This don't respect the return value documented by the function.
> }
> tmpl->txq.cqe_n = log2above(cq_info.cqe_cnt);
> tmpl->txq.qp_num_8s = tmpl->qp->qp_num << 8;
> @@ -198,6 +202,14 @@
> tmpl->txq.elts =
> (struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])
> ((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
> +
> + if (qp.comp_mask | MLX5DV_QP_MASK_UAR_INFO) {
> + tmpl->uar_mmap_offset = qp.uar_info.mmap_offset;
> + } else {
> + ERROR("Failed to retrieve UAR info, invalid libmlx5.so version");
> + return -EINVAL;
Same here.
> + }
> +
> return 0;
> }
>
> @@ -539,42 +551,53 @@
> }
>
> /**
> - * DPDK callback for TX in secondary processes.
> + * Map locally UAR used in Tx queues for BlueFlame doorbell.
> *
> - * This function configures all queues from primary process information
> - * if necessary before reverting to the normal TX burst callback.
> - *
> - * @param dpdk_txq
> - * Generic pointer to TX queue structure.
> - * @param[in] pkts
> - * Packets to transmit.
> - * @param pkts_n
> - * Number of packets in array.
> + * @param[in] priv
> + * Pointer to private structure.
> + * @param fd
> + * Verbs file descriptor to map UAR pages.
> *
> * @return
> - * Number of packets successfully transmitted (<= pkts_n).
> + * 0 on success, errno value on failure.
> */
> -uint16_t
> -mlx5_tx_burst_secondary_setup(void *dpdk_txq, struct rte_mbuf **pkts,
> - uint16_t pkts_n)
> +int
> +mlx5_tx_uar_remap(struct priv *priv, int fd)
> {
> - struct txq *txq = dpdk_txq;
> - struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
> - struct priv *priv = mlx5_secondary_data_setup(txq_ctrl->priv);
> - struct priv *primary_priv;
> - unsigned int index;
> + unsigned int i, j;
> + uintptr_t pages[priv->txqs_n];
> + unsigned int pages_n = 0;
> + uintptr_t uar_va;
> + void *addr;
> + struct txq *txq;
> + struct txq_ctrl *txq_ctrl;
> + int already_mapped;
> + size_t page_size = sysconf(_SC_PAGESIZE);
Same comment here about the page_size, there is no guarantee the library will
use this value in the future. PMD should not make such assumption.
> - if (priv == NULL)
> - return 0;
> - primary_priv =
> - mlx5_secondary_data[priv->dev->data->port_id].primary_priv;
> - /* Look for queue index in both private structures. */
> - for (index = 0; index != priv->txqs_n; ++index)
> - if (((*primary_priv->txqs)[index] == txq) ||
> - ((*priv->txqs)[index] == txq))
> - break;
> - if (index == priv->txqs_n)
> - return 0;
> - txq = (*priv->txqs)[index];
> - return priv->dev->tx_pkt_burst(txq, pkts, pkts_n);
> + for (i = 0; i != priv->txqs_n; ++i) {
> + txq = (*priv->txqs)[i];
> + txq_ctrl = container_of(txq, struct txq_ctrl, txq);
> + uar_va = (uintptr_t)txq_ctrl->txq.bf_reg;
> + uar_va = RTE_ALIGN_FLOOR(uar_va,
> + page_size / priv->num_uars_per_page);
Indentation.
> + already_mapped = 0;
> + for (j = 0; j != pages_n; ++j) {
> + if (pages[j] == uar_va) {
> + already_mapped = 1;
> + break;
> + }
> + }
> + if (already_mapped)
> + continue;
> +
Extra empty line.
> + pages[pages_n++] = uar_va;
> + addr = mmap((void *)uar_va, page_size,
> + PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
> + txq_ctrl->uar_mmap_offset);
Indentation.
> + if (addr != (void *)uar_va) {
> + ERROR("call to mmap failed on UAR for txq %d\n", i);
> + return -1;
> + }
> + }
> + return 0;
> }
> --
> 1.8.3.1
Nice work.
Thanks,
[1] http://dpdk.org/ml/archives/dev/2017-August/073212.html
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process
2017-08-25 7:15 ` Xueming(Steven) Li
@ 2017-08-25 7:32 ` Nélio Laranjeiro
0 siblings, 0 replies; 41+ messages in thread
From: Nélio Laranjeiro @ 2017-08-25 7:32 UTC (permalink / raw)
To: Xueming(Steven) Li; +Cc: dev
On Fri, Aug 25, 2017 at 07:15:50AM +0000, Xueming(Steven) Li wrote:
> Nelio, thanks, comments inline.
>[...]
> > > static int
> > > -priv_set_link(struct priv *priv, int up)
> > > +mlx5_dev_set_link(struct rte_eth_dev *dev, int up)
> > > {
> > > - struct rte_eth_dev *dev = priv->dev;
> > > + struct priv *priv = dev->data->dev_private;
> > > int err;
> > >
> >
> > This function should lock/unclock priv.
> This is a static function, caller function do the lock/unlock.
> Is there naming convention here? Mlx5_* is outpost interfaces that normally
> require lock/unlock priv?
Yes there is a naming convention following the patterns:
- priv_...(struct *priv priv, ...): no locks inside.
- priv_dev_...(struct *priv priv, struct rte_eth_dev *dev, ...): no locks
inside.
- mlx5_...(struct rte_eth_dev *dev, ...): should lock any access to struct priv
and to priv_*().
> > > if (up) {
> > > err = priv_set_flags(priv, ~IFF_UP, IFF_UP);
> > > if (err)
> > > return err;
> > > - priv_select_tx_function(priv);
> > > - priv_select_rx_function(priv);
> > > + mlx5_dev_select_tx_function(dev);
> > > + mlx5_dev_select_rx_function(dev);
> >
> > This also induce that those function mlx5_dev_select_rx/tx_function() should
> > be renamed to:
> > priv_dev_select_rx/tx_function(struct *priv, struct rte_eth_dev *dev, ...)
> >
> > this will avoid the multiple lock/unlocks inside the functions.
> So priv_* are lock-free functions?
priv_*() assume the lock have been done by the caller.
Hope it helps.
Thanks,
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v2 1/6] net/mlx5: change eth device reference for secondary process
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
2017-08-24 14:03 ` [dpdk-dev] [PATCH v1 2/2] net/mlx5: add multiple process support Xueming Li
2017-08-25 6:52 ` [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Nélio Laranjeiro
@ 2017-09-15 15:59 ` Xueming Li
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 2/6] net/mlx5: install a socket to exchange a file descriptor Xueming Li
` (4 more replies)
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 0/6] net/mlx5 multi-process support Xueming Li
` (18 subsequent siblings)
21 siblings, 5 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-15 15:59 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: dev, Xueming Li
rte_eth_dev created by primary process were not available in secondary
process, it was not possible to use the primary process local memory
object from a secondary process.
This patch modify the reference of primary rte_eth_dev object, use
local rte_eth_dev secondary process instead.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.h | 6 +++---
drivers/net/mlx5/mlx5_ethdev.c | 47 ++++++++++++++++++++++++-----------------
drivers/net/mlx5/mlx5_fdir.c | 2 ++
| 1 +
drivers/net/mlx5/mlx5_rxq.c | 1 +
drivers/net/mlx5/mlx5_trigger.c | 4 ++--
6 files changed, 37 insertions(+), 24 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ab03fe0..78b27ed 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -87,7 +87,7 @@ struct mlx5_xstats_ctrl {
};
struct priv {
- struct rte_eth_dev *dev; /* Ethernet device. */
+ struct rte_eth_dev *dev; /* Ethernet device of master process. */
struct ibv_context *ctx; /* Verbs context. */
struct ibv_device_attr_ex device_attr; /* Device properties. */
struct ibv_pd *pd; /* Protection Domain. */
@@ -208,8 +208,8 @@ int mlx5_ibv_device_to_pci_addr(const struct ibv_device *,
void priv_dev_interrupt_handler_install(struct priv *, struct rte_eth_dev *);
int mlx5_set_link_down(struct rte_eth_dev *dev);
int mlx5_set_link_up(struct rte_eth_dev *dev);
-void priv_select_tx_function(struct priv *);
-void priv_select_rx_function(struct priv *);
+void priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev);
+void priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev);
/* mlx5_mac.c */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 6f17a95..c1affba 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1260,7 +1260,9 @@ struct priv *
* Change the link state (UP / DOWN).
*
* @param priv
- * Pointer to Ethernet device structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
* @param up
* Nonzero for link up, otherwise link down.
*
@@ -1268,17 +1270,16 @@ struct priv *
* 0 on success, errno value on failure.
*/
static int
-priv_set_link(struct priv *priv, int up)
+priv_dev_set_link(struct priv *priv, struct rte_eth_dev *dev, int up)
{
- struct rte_eth_dev *dev = priv->dev;
int err;
if (up) {
err = priv_set_flags(priv, ~IFF_UP, IFF_UP);
if (err)
return err;
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
+ priv_dev_select_tx_function(priv, dev);
+ priv_dev_select_rx_function(priv, dev);
} else {
err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP);
if (err)
@@ -1305,7 +1306,7 @@ struct priv *
int err;
priv_lock(priv);
- err = priv_set_link(priv, 0);
+ err = priv_dev_set_link(priv, dev, 0);
priv_unlock(priv);
return err;
}
@@ -1326,7 +1327,7 @@ struct priv *
int err;
priv_lock(priv);
- err = priv_set_link(priv, 1);
+ err = priv_dev_set_link(priv, dev, 1);
priv_unlock(priv);
return err;
}
@@ -1335,29 +1336,33 @@ struct priv *
* Configure the TX function to use.
*
* @param priv
- * Pointer to private structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
*/
void
-priv_select_tx_function(struct priv *priv)
+priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev)
{
- priv->dev->tx_pkt_burst = mlx5_tx_burst;
+ assert(priv != NULL);
+ assert(dev != NULL);
+ dev->tx_pkt_burst = mlx5_tx_burst;
/* Select appropriate TX function. */
if (priv->mps == MLX5_MPW_ENHANCED) {
if (priv_check_vec_tx_support(priv) > 0) {
if (priv_check_raw_vec_tx_support(priv) > 0)
- priv->dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
+ dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
else
- priv->dev->tx_pkt_burst = mlx5_tx_burst_vec;
+ dev->tx_pkt_burst = mlx5_tx_burst_vec;
DEBUG("selected Enhanced MPW TX vectorized function");
} else {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_empw;
+ dev->tx_pkt_burst = mlx5_tx_burst_empw;
DEBUG("selected Enhanced MPW TX function");
}
} else if (priv->mps && priv->txq_inline) {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
+ dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
DEBUG("selected MPW inline TX function");
} else if (priv->mps) {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw;
+ dev->tx_pkt_burst = mlx5_tx_burst_mpw;
DEBUG("selected MPW TX function");
}
}
@@ -1366,15 +1371,19 @@ struct priv *
* Configure the RX function to use.
*
* @param priv
- * Pointer to private structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
*/
void
-priv_select_rx_function(struct priv *priv)
+priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev)
{
+ assert(priv != NULL);
+ assert(dev != NULL);
if (priv_check_vec_rx_support(priv) > 0) {
- priv->dev->rx_pkt_burst = mlx5_rx_burst_vec;
+ dev->rx_pkt_burst = mlx5_rx_burst_vec;
DEBUG("selected RX vectorized function");
} else {
- priv->dev->rx_pkt_burst = mlx5_rx_burst;
+ dev->rx_pkt_burst = mlx5_rx_burst;
}
}
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index acae668..66e3818 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -1068,6 +1068,8 @@ struct mlx5_fdir_filter {
int ret = EINVAL;
struct priv *priv = dev->data->dev_private;
+ if (mlx5_is_secondary())
+ return -E_RTE_SECONDARY;
switch (filter_type) {
case RTE_ETH_FILTER_GENERIC:
if (filter_op != RTE_ETH_FILTER_GET)
--git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index 1249943..d3d2603 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -350,6 +350,7 @@
int ret;
struct priv *priv = dev->data->dev_private;
+ assert(!mlx5_is_secondary());
mlx5_dev_stop(dev);
priv_lock(priv);
ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 22448c9..b71f72f 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1250,6 +1250,7 @@
unsigned int count = 0;
struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+ assert(!mlx5_is_secondary());
if (!priv->dev->data->dev_conf.intr_conf.rxq)
return 0;
priv_rx_intr_vec_disable(priv);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 3fa9401..51c31aa 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -66,8 +66,8 @@
return 0;
}
/* Update Rx/Tx callback. */
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
+ priv_dev_select_tx_function(priv, dev);
+ priv_dev_select_rx_function(priv, dev);
DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
err = priv_create_hash_rxqs(priv);
if (!err)
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v2 2/6] net/mlx5: install a socket to exchange a file descriptor
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 1/6] " Xueming Li
@ 2017-09-15 15:59 ` Xueming Li
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 3/6] net/mlx5: allocate verbs object into shared memory Xueming Li
` (3 subsequent siblings)
4 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-15 15:59 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: dev, Xueming Li
Use a unix socket to get back the communication channel with the Kernel
driver from the primary process, this is necessary to remap those pages
in the secondary process memory space and thus use the same Tx queues.
This is only supported from rdma-core (v15).
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
This v2 patchset depends on upstream rdma-core enhancement:
http://www.dpdk.org/ml/archives/dev/2017-August/073405.html
---
drivers/net/mlx5/Makefile | 1 +
drivers/net/mlx5/mlx5.c | 35 +++++
drivers/net/mlx5/mlx5.h | 9 ++
drivers/net/mlx5/mlx5_ethdev.c | 50 +++++--
drivers/net/mlx5/mlx5_rxtx.h | 2 +
drivers/net/mlx5/mlx5_socket.c | 294 +++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx5/mlx5_txq.c | 66 +++++++++
7 files changed, 447 insertions(+), 10 deletions(-)
create mode 100644 drivers/net/mlx5/mlx5_socket.c
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index d9c42b5..787e86b 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -52,6 +52,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_fdir.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
# Basic CFLAGS.
CFLAGS += -O3
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index dd1d086..bfa38ba 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -209,6 +209,7 @@ struct mlx5_args {
}
if (priv->reta_idx != NULL)
rte_free(priv->reta_idx);
+ priv_socket_uninit(priv);
priv_unlock(priv);
memset(priv, 0, sizeof(*priv));
}
@@ -578,6 +579,40 @@ struct mlx5_args {
.rx_vec_en = MLX5_ARG_UNSET,
};
+ mlx5_dev[idx].ports |= test;
+
+ if (mlx5_is_secondary()) {
+ /* from rte_ethdev.c */
+ char name[RTE_ETH_NAME_MAX_LEN];
+
+ snprintf(name, sizeof(name), "%s port %u",
+ ibv_get_device_name(ibv_dev), port);
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (eth_dev == NULL) {
+ ERROR("can not attach rte ethdev");
+ err = ENOMEM;
+ goto error;
+ }
+ eth_dev->device = &pci_dev->device;
+ eth_dev->dev_ops = NULL;
+ priv = eth_dev->data->dev_private;
+ /* Receive command fd from primary process */
+ err = priv_socket_connect(priv);
+ if (err < 0) {
+ err = -err;
+ goto error;
+ }
+ /* Remap UAR for Tx queues. */
+ err = priv_tx_uar_remap(priv, err);
+ if (err < 0) {
+ err = -err;
+ goto error;
+ }
+ priv_dev_select_rx_function(priv, eth_dev);
+ priv_dev_select_tx_function(priv, eth_dev);
+ continue;
+ }
+
DEBUG("using port %u (%08" PRIx32 ")", port, test);
ctx = ibv_open_device(ibv_dev);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 78b27ed..1ce02e8 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -151,6 +151,8 @@ struct priv {
uint32_t link_speed_capa; /* Link speed capabilities. */
struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
rte_spinlock_t lock; /* Lock for control functions. */
+ int primary_socket; /* Unix socket for primary process. */
+ struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
};
/**
@@ -299,4 +301,11 @@ int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
void priv_flow_stop(struct priv *);
int priv_flow_rxq_in_use(struct priv *, struct rxq *);
+/* mlx5_socket.c */
+
+int priv_socket_init(struct priv *priv);
+int priv_socket_uninit(struct priv *priv);
+void priv_socket_handle(struct priv *priv);
+int priv_socket_connect(struct priv *priv);
+
#endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index c1affba..46c3013 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -31,6 +31,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#define _GNU_SOURCE
+
#include <stddef.h>
#include <assert.h>
#include <unistd.h>
@@ -50,6 +52,7 @@
#include <linux/version.h>
#include <fcntl.h>
#include <stdalign.h>
+#include <sys/un.h>
#include <rte_atomic.h>
#include <rte_ethdev.h>
@@ -1204,6 +1207,23 @@ struct priv *
}
/**
+ * Handle interrupts from the socket.
+ *
+ * @param cb_arg
+ * Callback argument.
+ */
+static void
+mlx5_dev_handler_socket(void *cb_arg)
+{
+ struct rte_eth_dev *dev = cb_arg;
+ struct priv *priv = dev->data->dev_private;
+
+ priv_lock(priv);
+ priv_socket_handle(priv);
+ priv_unlock(priv);
+}
+
+/**
* Uninstall interrupt handler.
*
* @param priv
@@ -1214,16 +1234,19 @@ struct priv *
void
priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
{
- if (!dev->data->dev_conf.intr_conf.lsc)
- return;
- rte_intr_callback_unregister(&priv->intr_handle,
- mlx5_dev_interrupt_handler,
- dev);
+ if (dev->data->dev_conf.intr_conf.lsc)
+ rte_intr_callback_unregister(&priv->intr_handle,
+ mlx5_dev_interrupt_handler, dev);
+ if (priv->primary_socket)
+ rte_intr_callback_unregister(&priv->intr_handle_socket,
+ mlx5_dev_handler_socket, dev);
if (priv->pending_alarm)
rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev);
priv->pending_alarm = 0;
priv->intr_handle.fd = 0;
priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ priv->intr_handle_socket.fd = 0;
+ priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN;
}
/**
@@ -1239,20 +1262,27 @@ struct priv *
{
int rc, flags;
- if (!dev->data->dev_conf.intr_conf.lsc)
- return;
+ assert(!mlx5_is_secondary());
assert(priv->ctx->async_fd > 0);
flags = fcntl(priv->ctx->async_fd, F_GETFL);
rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
if (rc < 0) {
INFO("failed to change file descriptor async event queue");
dev->data->dev_conf.intr_conf.lsc = 0;
- } else {
+ }
+ if (dev->data->dev_conf.intr_conf.lsc) {
priv->intr_handle.fd = priv->ctx->async_fd;
priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
rte_intr_callback_register(&priv->intr_handle,
- mlx5_dev_interrupt_handler,
- dev);
+ mlx5_dev_interrupt_handler, dev);
+ }
+
+ rc = priv_socket_init(priv);
+ if (!rc && priv->primary_socket) {
+ priv->intr_handle_socket.fd = priv->primary_socket;
+ priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT;
+ rte_intr_callback_register(&priv->intr_handle_socket,
+ mlx5_dev_handler_socket, dev);
}
}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index e352a1e..5357be0 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -286,6 +286,7 @@ struct txq_ctrl {
struct ibv_qp *qp; /* Queue Pair. */
unsigned int socket; /* CPU socket ID for allocations. */
struct txq txq; /* Data path structure. */
+ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
};
/* mlx5_rxq.c */
@@ -319,6 +320,7 @@ int txq_ctrl_setup(struct rte_eth_dev *, struct txq_ctrl *, uint16_t,
int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
const struct rte_eth_txconf *);
void mlx5_tx_queue_release(void *);
+int priv_tx_uar_remap(struct priv *priv, int fd);
/* mlx5_rxtx.c */
diff --git a/drivers/net/mlx5/mlx5_socket.c b/drivers/net/mlx5/mlx5_socket.c
new file mode 100644
index 0000000..78b4138
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_socket.c
@@ -0,0 +1,294 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2016 6WIND S.A.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+
+/**
+ * Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_socket_init(struct priv *priv)
+{
+ struct sockaddr_un sun = {
+ .sun_family = AF_UNIX,
+ };
+ int ret;
+ int flags;
+ struct stat file_stat;
+
+ /*
+ * Initialise the socket to communicate with the secondary
+ * process.
+ */
+ ret = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ret < 0) {
+ WARN("secondary process not supported: %s", strerror(errno));
+ return ret;
+ }
+ priv->primary_socket = ret;
+ flags = fcntl(priv->primary_socket, F_GETFL, 0);
+ if (flags == -1)
+ goto out;
+ ret = fcntl(priv->primary_socket, F_SETFL, flags | O_NONBLOCK);
+ if (ret < 0)
+ goto out;
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+ MLX5_DRIVER_NAME, priv->primary_socket);
+ ret = stat(sun.sun_path, &file_stat);
+ if (!ret)
+ claim_zero(remove(sun.sun_path));
+ ret = bind(priv->primary_socket, (const struct sockaddr *)&sun,
+ sizeof(sun));
+ if (ret < 0) {
+ WARN("cannot bind socket, secondary process not supported: %s",
+ strerror(errno));
+ goto close;
+ }
+ ret = listen(priv->primary_socket, 0);
+ if (ret < 0) {
+ WARN("Secondary process not supported: %s", strerror(errno));
+ goto close;
+ }
+ return ret;
+close:
+ remove(sun.sun_path);
+out:
+ claim_zero(close(priv->primary_socket));
+ priv->primary_socket = 0;
+ return -(ret);
+}
+
+/**
+ * Un-Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_socket_uninit(struct priv *priv)
+{
+ MKSTR(path, "/var/tmp/%s_%d", MLX5_DRIVER_NAME, priv->primary_socket);
+ claim_zero(close(priv->primary_socket));
+ priv->primary_socket = 0;
+ claim_zero(remove(path));
+ return 0;
+}
+
+/**
+ * Handle socket interrupts.
+ *
+ * @param priv
+ * Pointer to private structure.
+ */
+void
+priv_socket_handle(struct priv *priv)
+{
+ int conn_sock;
+ int ret = 0;
+ struct cmsghdr *cmsg = NULL;
+ struct ucred *cred = NULL;
+ char buf[CMSG_SPACE(sizeof(struct ucred))] = { 0 };
+ char vbuf[1024] = { 0 };
+ struct iovec io = {
+ .iov_base = vbuf,
+ .iov_len = sizeof(*vbuf),
+ };
+ struct msghdr msg = {
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ .msg_control = buf,
+ .msg_controllen = sizeof(buf),
+ };
+ int *fd;
+
+ /* Accept the connection from the client. */
+ conn_sock = accept(priv->primary_socket, NULL, NULL);
+ if (conn_sock < 0) {
+ WARN("connection failed: %s", strerror(errno));
+ return;
+ }
+ ret = setsockopt(conn_sock, SOL_SOCKET, SO_PASSCRED, &(int){1},
+ sizeof(int));
+ if (ret < 0) {
+ WARN("cannot change socket options");
+ goto out;
+ }
+ ret = recvmsg(conn_sock, &msg, MSG_WAITALL);
+ if (ret < 0) {
+ WARN("received an empty message: %s", strerror(errno));
+ goto out;
+ }
+ /* Expect to receive credentials only. */
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ WARN("no message");
+ goto out;
+ }
+ if ((cmsg->cmsg_type == SCM_CREDENTIALS) &&
+ (cmsg->cmsg_len >= sizeof(*cred))) {
+ cred = (struct ucred *)CMSG_DATA(cmsg);
+ assert(cred != NULL);
+ }
+ cmsg = CMSG_NXTHDR(&msg, cmsg);
+ if (cmsg != NULL) {
+ WARN("Message wrongly formated");
+ goto out;
+ }
+ /* Make sure all the ancillary data was received and valid. */
+ if ((cred == NULL) || (cred->uid != getuid()) ||
+ (cred->gid != getgid())) {
+ WARN("wrong credentials");
+ goto out;
+ }
+ /* Set-up the ancillary data. */
+ cmsg = CMSG_FIRSTHDR(&msg);
+ assert(cmsg != NULL);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(priv->ctx->cmd_fd));
+ fd = (int *)CMSG_DATA(cmsg);
+ *fd = priv->ctx->cmd_fd;
+ ret = sendmsg(conn_sock, &msg, 0);
+ if (ret < 0)
+ WARN("cannot send response");
+out:
+ close(conn_sock);
+}
+
+/**
+ * Connect to the primary process.
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * fd on success, negative errno value on failure.
+ */
+int
+priv_socket_connect(struct priv *priv)
+{
+ struct sockaddr_un sun = {
+ .sun_family = AF_UNIX,
+ };
+ int socket_fd;
+ int *fd = NULL;
+ int ret;
+ struct ucred *cred;
+ char buf[CMSG_SPACE(sizeof(*cred))] = { 0 };
+ char vbuf[1024] = { 0 };
+ struct iovec io = {
+ .iov_base = vbuf,
+ .iov_len = sizeof(*vbuf),
+ };
+ struct msghdr msg = {
+ .msg_control = buf,
+ .msg_controllen = sizeof(buf),
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+
+ ret = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ret < 0) {
+ WARN("cannot connect to primary");
+ return ret;
+ }
+ socket_fd = ret;
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+ MLX5_DRIVER_NAME, priv->primary_socket);
+ ret = connect(socket_fd, (const struct sockaddr *)&sun, sizeof(sun));
+ if (ret < 0) {
+ WARN("cannot connect to primary");
+ goto out;
+ }
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ DEBUG("cannot get first message");
+ goto out;
+ }
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(*cred));
+ cred = (struct ucred *)CMSG_DATA(cmsg);
+ if (cred == NULL) {
+ DEBUG("no credentials received");
+ goto out;
+ }
+ cred->pid = getpid();
+ cred->uid = getuid();
+ cred->gid = getgid();
+ ret = sendmsg(socket_fd, &msg, MSG_DONTWAIT);
+ if (ret < 0) {
+ WARN("cannot send credentials to primary: %s",
+ strerror(errno));
+ goto out;
+ }
+ ret = recvmsg(socket_fd, &msg, MSG_WAITALL);
+ if (ret <= 0) {
+ WARN("no message from primary: %s", strerror(errno));
+ goto out;
+ }
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ WARN("No file descriptor received");
+ goto out;
+ }
+ fd = (int *)CMSG_DATA(cmsg);
+ if (*fd <= 0) {
+ WARN("no file descriptor received: %s", strerror(errno));
+ ret = *fd;
+ goto out;
+ }
+ ret = *fd;
+out:
+ close(socket_fd);
+ return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index d6c9657..18ea471 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -36,6 +36,8 @@
#include <errno.h>
#include <string.h>
#include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -168,6 +170,7 @@
struct mlx5dv_obj obj;
int ret = 0;
+ qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;
obj.cq.in = ibcq;
obj.cq.out = &cq_info;
obj.qp.in = tmpl->qp;
@@ -194,6 +197,13 @@
tmpl->txq.elts =
(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])
((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
+ if (qp.comp_mask | MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
+ tmpl->uar_mmap_offset = qp.uar_mmap_offset;
+ } else {
+ ERROR("Failed to retrieve UAR info, invalid libmlx5.so version");
+ return EINVAL;
+ }
+
return 0;
}
@@ -533,3 +543,59 @@
rte_free(txq_ctrl);
priv_unlock(priv);
}
+
+
+/**
+ * Map locally UAR used in Tx queues for BlueFlame doorbell.
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ * @param fd
+ * Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_tx_uar_remap(struct priv *priv, int fd)
+{
+ unsigned int i, j;
+ uintptr_t pages[priv->txqs_n];
+ unsigned int pages_n = 0;
+ uintptr_t uar_va;
+ void *addr;
+ struct txq *txq;
+ struct txq_ctrl *txq_ctrl;
+ int already_mapped;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+
+ /*
+ * As rdma-core, UARs are mapped in size of OS page size.
+ * Use aligned address to avoid duplicate mmap.
+ * Ref to libmlx5 function: mlx5_init_context()
+ */
+ for (i = 0; i != priv->txqs_n; ++i) {
+ txq = (*priv->txqs)[i];
+ txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+ uar_va = (uintptr_t)txq_ctrl->txq.bf_reg;
+ uar_va = RTE_ALIGN_FLOOR(uar_va, page_size);
+ already_mapped = 0;
+ for (j = 0; j != pages_n; ++j) {
+ if (pages[j] == uar_va) {
+ already_mapped = 1;
+ break;
+ }
+ }
+ if (already_mapped)
+ continue;
+ pages[pages_n++] = uar_va;
+ addr = mmap((void *)uar_va, page_size,
+ PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
+ txq_ctrl->uar_mmap_offset);
+ if (addr != (void *)uar_va) {
+ ERROR("call to mmap failed on UAR for txq %d\n", i);
+ return -1;
+ }
+ }
+ return 0;
+}
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v2 3/6] net/mlx5: allocate verbs object into shared memory
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 1/6] " Xueming Li
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 2/6] net/mlx5: install a socket to exchange a file descriptor Xueming Li
@ 2017-09-15 15:59 ` Xueming Li
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 4/6] net/mlx5: remove verbs fork check Xueming Li
` (2 subsequent siblings)
4 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-15 15:59 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: dev, Xueming Li
PMD uses Verbs object which were not available in the shared memory.
This patch modify the location where Verbs objects are allocated (from
process memory address space to shared memory address space) and thus
allow a secondary process to use those object by mapping this shared
memory space its own memory space.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index bfa38ba..11490d4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -132,6 +132,52 @@ struct mlx5_args {
}
/**
+ * Verbs callback to allocate a memory. This function should allocate the space
+ * according to the size provided residing inside a huge page.
+ * Please note that all allocation must respect the alignment from libmlx5
+ * (i.e. currently sysconf(_SC_PAGESIZE)).
+ *
+ * @param[in] size
+ * The size in bytes of the memory to allocate.
+ * @param[in] data
+ * A pointer to the callback data.
+ *
+ * @return
+ * a pointer to the allocate space.
+ */
+static void *
+mlx5_alloc_verbs_buf(size_t size, void *data)
+{
+ struct priv *priv = data;
+ void *ret;
+ size_t alignment = sysconf(_SC_PAGESIZE);
+
+ assert(data != NULL);
+ assert(!mlx5_is_secondary());
+ ret = rte_malloc_socket(__func__, size, alignment,
+ priv->dev->device->numa_node);
+ DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret);
+ return ret;
+}
+
+/**
+ * Verbs callback to free a memory.
+ *
+ * @param[in] ptr
+ * A pointer to the memory to free.
+ * @param[in] data
+ * A pointer to the callback data.
+ */
+static void
+mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
+{
+ assert(data != NULL);
+ assert(!mlx5_is_secondary());
+ DEBUG("Extern free request: %p", ptr);
+ rte_free(ptr);
+}
+
+/**
* DPDK callback to close the device.
*
* Destroy all queues and objects, free memory.
@@ -826,6 +872,15 @@ struct mlx5_args {
eth_dev->dev_ops = &mlx5_dev_ops;
TAILQ_INIT(&priv->flows);
+ /* Hint libmlx5 to use PMD allocator for data plane resources */
+ struct mlx5dv_ctx_allocators alctr = {
+ .alloc = &mlx5_alloc_verbs_buf,
+ .free = &mlx5_free_verbs_buf,
+ .data = priv,
+ };
+ mlx5dv_set_context_attr(ctx, MLX5DV_CTX_ATTR_BUF_ALLOCATORS,
+ (void *)((uintptr_t)&alctr));
+
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
priv_set_flags(priv, ~IFF_UP, IFF_UP);
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v2 4/6] net/mlx5: remove verbs fork check
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 1/6] " Xueming Li
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 2/6] net/mlx5: install a socket to exchange a file descriptor Xueming Li
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 3/6] net/mlx5: allocate verbs object into shared memory Xueming Li
@ 2017-09-15 15:59 ` Xueming Li
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 5/6] net/mlx5: add operations for secondary process Xueming Li
2017-09-15 16:00 ` [dpdk-dev] [PATCH v2 6/6] net/mlx5: multi-process document update Xueming Li
4 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-15 15:59 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: dev, Xueming Li
Verbs API prohibited resources inheritance by default, this further
disabled PMD forked secondary process that replis on memory inherited
from parent process.
This patch removes verbs fork check to enable DPDK forked process.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.c | 8 --------
1 file changed, 8 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 11490d4..e124a14 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -977,16 +977,8 @@ struct mlx5_args {
{
/* Build the static table for ptype conversion. */
mlx5_set_ptype_table();
- /*
- * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
- * huge pages. Calling ibv_fork_init() during init allows
- * applications to use fork() safely for purposes other than
- * using this PMD, which is not supported in forked processes.
- */
- setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
/* Don't map UAR to WC if BlueFlame is not used.*/
setenv("MLX5_SHUT_UP_BF", "1", 1);
- ibv_fork_init();
rte_pci_register(&mlx5_driver);
}
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v2 5/6] net/mlx5: add operations for secondary process
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 1/6] " Xueming Li
` (2 preceding siblings ...)
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 4/6] net/mlx5: remove verbs fork check Xueming Li
@ 2017-09-15 15:59 ` Xueming Li
2017-09-15 16:00 ` [dpdk-dev] [PATCH v2 6/6] net/mlx5: multi-process document update Xueming Li
4 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-15 15:59 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: dev, Xueming Li
Add operations that are safe for secondary processes:
* (x)stats
* device info get
* rx/tx descriptor status
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.c | 16 +++++++++++++++-
drivers/net/mlx5/mlx5.h | 1 +
drivers/net/mlx5/mlx5_ethdev.c | 11 +++++------
3 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index e124a14..76e92a5 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -303,6 +303,18 @@ struct mlx5_args {
.rx_queue_intr_disable = mlx5_rx_intr_disable,
};
+
+static const struct eth_dev_ops mlx5_dev_sec_ops = {
+ .stats_get = mlx5_stats_get,
+ .stats_reset = mlx5_stats_reset,
+ .xstats_get = mlx5_xstats_get,
+ .xstats_reset = mlx5_xstats_reset,
+ .xstats_get_names = mlx5_xstats_get_names,
+ .dev_infos_get = mlx5_dev_infos_get,
+ .rx_descriptor_status = mlx5_rx_descriptor_status,
+ .tx_descriptor_status = mlx5_tx_descriptor_status,
+};
+
static struct {
struct rte_pci_addr pci_addr; /* associated PCI address */
uint32_t ports; /* physical ports bitfield. */
@@ -640,7 +652,7 @@ struct mlx5_args {
goto error;
}
eth_dev->device = &pci_dev->device;
- eth_dev->dev_ops = NULL;
+ eth_dev->dev_ops = &mlx5_dev_sec_ops;
priv = eth_dev->data->dev_private;
/* Receive command fd from primary process */
err = priv_socket_connect(priv);
@@ -707,6 +719,8 @@ struct mlx5_args {
}
priv->ctx = ctx;
+ strncpy(priv->ibdev_path, priv->ctx->device->ibdev_path,
+ sizeof(priv->ibdev_path));
priv->device_attr = device_attr;
priv->port = port;
priv->pd = pd;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1ce02e8..928aeb6 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -91,6 +91,7 @@ struct priv {
struct ibv_context *ctx; /* Verbs context. */
struct ibv_device_attr_ex device_attr; /* Device properties. */
struct ibv_pd *pd; /* Protection Domain. */
+ char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for secondary */
/*
* MAC addresses array and configuration bit-field.
* An extra entry that cannot be modified by the DPDK is reserved
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 46c3013..8f3ff89 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -165,7 +165,7 @@ struct priv *
char match[IF_NAMESIZE] = "";
{
- MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path);
+ MKSTR(path, "%s/device/net", priv->ibdev_path);
dir = opendir(path);
if (dir == NULL)
@@ -183,7 +183,7 @@ struct priv *
continue;
MKSTR(path, "%s/device/net/%s/%s",
- priv->ctx->device->ibdev_path, name,
+ priv->ibdev_path, name,
(dev_type ? "dev_id" : "dev_port"));
file = fopen(path, "rb");
@@ -271,11 +271,11 @@ struct priv *
if (priv_is_ib_cntr(entry)) {
MKSTR(path, "%s/ports/1/hw_counters/%s",
- priv->ctx->device->ibdev_path, entry);
+ priv->ibdev_path, entry);
file = fopen(path, "rb");
} else {
MKSTR(path, "%s/device/net/%s/%s",
- priv->ctx->device->ibdev_path, ifname, entry);
+ priv->ibdev_path, ifname, entry);
file = fopen(path, "rb");
}
if (file == NULL)
@@ -318,8 +318,7 @@ struct priv *
if (priv_get_ifname(priv, &ifname))
return -1;
- MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
- ifname, entry);
+ MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry);
file = fopen(path, "wb");
if (file == NULL)
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v2 6/6] net/mlx5: multi-process document update
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 1/6] " Xueming Li
` (3 preceding siblings ...)
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 5/6] net/mlx5: add operations for secondary process Xueming Li
@ 2017-09-15 16:00 ` Xueming Li
4 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-15 16:00 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: dev, Xueming Li
This patch update the feature list and NIC guide to be multi-process
enabled.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
doc/guides/nics/features/mlx5.ini | 1 +
doc/guides/nics/mlx5.rst | 3 +--
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index 99a8d93..2913591 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -34,6 +34,7 @@ Tx descriptor status = Y
Basic stats = Y
Extended stats = Y
Stats per queue = Y
+Multiprocess aware = Y
Other kdrv = Y
ARMv8 = Y
Power8 = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index ffa20a2..5c77dea 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -92,7 +92,7 @@ Features
- Flow director (RTE_FDIR_MODE_PERFECT, RTE_FDIR_MODE_PERFECT_MAC_VLAN and
RTE_ETH_FDIR_REJECT).
- Flow API.
-- Secondary process TX is supported.
+- Multiple process.
- KVM and VMware ESX SR-IOV modes are supported.
- RSS hash result is supported.
- Hardware TSO.
@@ -106,7 +106,6 @@ Limitations
- Inner RSS for VXLAN frames is not supported yet.
- Port statistics through software counters only.
- Hardware checksum RX offloads for VXLAN inner header are not supported yet.
-- Secondary process RX is not supported.
- Flow pattern without any specific vlan will match for vlan packets as well:
When VLAN spec is not specified in the pattern, the matching rule will be created with VLAN as a wild card.
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v3 0/6] net/mlx5 multi-process support
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (2 preceding siblings ...)
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 1/6] " Xueming Li
@ 2017-09-18 14:36 ` Xueming Li
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 1/6] net/mlx5: change eth device reference for secondary process Xueming Li
` (17 subsequent siblings)
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-18 14:36 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: Xueming Li, dev
This patchset enhances Mellanox multi-process by supporting all multi-process
examples, also support reading ethdev (x)stats in secondary process.
Start from V2, this patchset depends on upstream rdma-core enhancement
and l2fork example bug fix:
http://www.dpdk.org/ml/archives/dev/2017-August/073405.html
http://www.dpdk.org/ml/archives/dev/2017-September/075568.html
V3:
* add cover letter
* add dependency notes
V2:
* split into multiple patches
* support forked secondary process
* add secondary process ethdev operations
* rebase on latest rdma-core upstream api
Xueming Li (6):
net/mlx5: change eth device reference for secondary process
net/mlx5: install a socket to exchange a file descriptor
net/mlx5: allocate verbs object into shared memory
net/mlx5: remove verbs fork check
net/mlx5: add operations for secondary process
net/mlx5: multi-process document update
doc/guides/nics/features/mlx5.ini | 1 +
doc/guides/nics/mlx5.rst | 3 +-
drivers/net/mlx5/Makefile | 1 +
drivers/net/mlx5/mlx5.c | 112 +++++++++++++--
drivers/net/mlx5/mlx5.h | 16 ++-
drivers/net/mlx5/mlx5_ethdev.c | 108 +++++++++-----
drivers/net/mlx5/mlx5_fdir.c | 2 +
drivers/net/mlx5/mlx5_rss.c | 1 +
drivers/net/mlx5/mlx5_rxq.c | 1 +
drivers/net/mlx5/mlx5_rxtx.h | 2 +
drivers/net/mlx5/mlx5_socket.c | 294 ++++++++++++++++++++++++++++++++++++++
drivers/net/mlx5/mlx5_trigger.c | 4 +-
drivers/net/mlx5/mlx5_txq.c | 66 +++++++++
13 files changed, 561 insertions(+), 50 deletions(-)
create mode 100644 drivers/net/mlx5/mlx5_socket.c
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v3 1/6] net/mlx5: change eth device reference for secondary process
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (3 preceding siblings ...)
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 0/6] net/mlx5 multi-process support Xueming Li
@ 2017-09-18 14:36 ` Xueming Li
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 2/6] net/mlx5: install a socket to exchange a file descriptor Xueming Li
` (16 subsequent siblings)
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-18 14:36 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: Xueming Li, dev
rte_eth_dev created by primary process were not available in secondary
process, it was not possible to use the primary process local memory
object from a secondary process.
This patch modify the reference of primary rte_eth_dev object, use
local rte_eth_dev secondary process instead.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.h | 6 +++---
drivers/net/mlx5/mlx5_ethdev.c | 47 ++++++++++++++++++++++++-----------------
drivers/net/mlx5/mlx5_fdir.c | 2 ++
| 1 +
drivers/net/mlx5/mlx5_rxq.c | 1 +
drivers/net/mlx5/mlx5_trigger.c | 4 ++--
6 files changed, 37 insertions(+), 24 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ab03fe0..78b27ed 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -87,7 +87,7 @@ struct mlx5_xstats_ctrl {
};
struct priv {
- struct rte_eth_dev *dev; /* Ethernet device. */
+ struct rte_eth_dev *dev; /* Ethernet device of master process. */
struct ibv_context *ctx; /* Verbs context. */
struct ibv_device_attr_ex device_attr; /* Device properties. */
struct ibv_pd *pd; /* Protection Domain. */
@@ -208,8 +208,8 @@ int mlx5_ibv_device_to_pci_addr(const struct ibv_device *,
void priv_dev_interrupt_handler_install(struct priv *, struct rte_eth_dev *);
int mlx5_set_link_down(struct rte_eth_dev *dev);
int mlx5_set_link_up(struct rte_eth_dev *dev);
-void priv_select_tx_function(struct priv *);
-void priv_select_rx_function(struct priv *);
+void priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev);
+void priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev);
/* mlx5_mac.c */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 6f17a95..c1affba 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1260,7 +1260,9 @@ struct priv *
* Change the link state (UP / DOWN).
*
* @param priv
- * Pointer to Ethernet device structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
* @param up
* Nonzero for link up, otherwise link down.
*
@@ -1268,17 +1270,16 @@ struct priv *
* 0 on success, errno value on failure.
*/
static int
-priv_set_link(struct priv *priv, int up)
+priv_dev_set_link(struct priv *priv, struct rte_eth_dev *dev, int up)
{
- struct rte_eth_dev *dev = priv->dev;
int err;
if (up) {
err = priv_set_flags(priv, ~IFF_UP, IFF_UP);
if (err)
return err;
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
+ priv_dev_select_tx_function(priv, dev);
+ priv_dev_select_rx_function(priv, dev);
} else {
err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP);
if (err)
@@ -1305,7 +1306,7 @@ struct priv *
int err;
priv_lock(priv);
- err = priv_set_link(priv, 0);
+ err = priv_dev_set_link(priv, dev, 0);
priv_unlock(priv);
return err;
}
@@ -1326,7 +1327,7 @@ struct priv *
int err;
priv_lock(priv);
- err = priv_set_link(priv, 1);
+ err = priv_dev_set_link(priv, dev, 1);
priv_unlock(priv);
return err;
}
@@ -1335,29 +1336,33 @@ struct priv *
* Configure the TX function to use.
*
* @param priv
- * Pointer to private structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
*/
void
-priv_select_tx_function(struct priv *priv)
+priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev)
{
- priv->dev->tx_pkt_burst = mlx5_tx_burst;
+ assert(priv != NULL);
+ assert(dev != NULL);
+ dev->tx_pkt_burst = mlx5_tx_burst;
/* Select appropriate TX function. */
if (priv->mps == MLX5_MPW_ENHANCED) {
if (priv_check_vec_tx_support(priv) > 0) {
if (priv_check_raw_vec_tx_support(priv) > 0)
- priv->dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
+ dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
else
- priv->dev->tx_pkt_burst = mlx5_tx_burst_vec;
+ dev->tx_pkt_burst = mlx5_tx_burst_vec;
DEBUG("selected Enhanced MPW TX vectorized function");
} else {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_empw;
+ dev->tx_pkt_burst = mlx5_tx_burst_empw;
DEBUG("selected Enhanced MPW TX function");
}
} else if (priv->mps && priv->txq_inline) {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
+ dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
DEBUG("selected MPW inline TX function");
} else if (priv->mps) {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw;
+ dev->tx_pkt_burst = mlx5_tx_burst_mpw;
DEBUG("selected MPW TX function");
}
}
@@ -1366,15 +1371,19 @@ struct priv *
* Configure the RX function to use.
*
* @param priv
- * Pointer to private structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
*/
void
-priv_select_rx_function(struct priv *priv)
+priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev)
{
+ assert(priv != NULL);
+ assert(dev != NULL);
if (priv_check_vec_rx_support(priv) > 0) {
- priv->dev->rx_pkt_burst = mlx5_rx_burst_vec;
+ dev->rx_pkt_burst = mlx5_rx_burst_vec;
DEBUG("selected RX vectorized function");
} else {
- priv->dev->rx_pkt_burst = mlx5_rx_burst;
+ dev->rx_pkt_burst = mlx5_rx_burst;
}
}
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index acae668..66e3818 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -1068,6 +1068,8 @@ struct mlx5_fdir_filter {
int ret = EINVAL;
struct priv *priv = dev->data->dev_private;
+ if (mlx5_is_secondary())
+ return -E_RTE_SECONDARY;
switch (filter_type) {
case RTE_ETH_FILTER_GENERIC:
if (filter_op != RTE_ETH_FILTER_GET)
--git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index 1249943..d3d2603 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -350,6 +350,7 @@
int ret;
struct priv *priv = dev->data->dev_private;
+ assert(!mlx5_is_secondary());
mlx5_dev_stop(dev);
priv_lock(priv);
ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 22448c9..b71f72f 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1250,6 +1250,7 @@
unsigned int count = 0;
struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+ assert(!mlx5_is_secondary());
if (!priv->dev->data->dev_conf.intr_conf.rxq)
return 0;
priv_rx_intr_vec_disable(priv);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 3fa9401..51c31aa 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -66,8 +66,8 @@
return 0;
}
/* Update Rx/Tx callback. */
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
+ priv_dev_select_tx_function(priv, dev);
+ priv_dev_select_rx_function(priv, dev);
DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
err = priv_create_hash_rxqs(priv);
if (!err)
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v3 2/6] net/mlx5: install a socket to exchange a file descriptor
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (4 preceding siblings ...)
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 1/6] net/mlx5: change eth device reference for secondary process Xueming Li
@ 2017-09-18 14:36 ` Xueming Li
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 3/6] net/mlx5: allocate verbs object into shared memory Xueming Li
` (15 subsequent siblings)
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-18 14:36 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: Xueming Li, dev
Use a unix socket to get back the communication channel with the Kernel
driver from the primary process, this is necessary to remap those pages
in the secondary process memory space and thus use the same Tx queues.
This is only supported from rdma-core (v15).
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
drivers/net/mlx5/Makefile | 1 +
drivers/net/mlx5/mlx5.c | 35 +++++
drivers/net/mlx5/mlx5.h | 9 ++
drivers/net/mlx5/mlx5_ethdev.c | 50 +++++--
drivers/net/mlx5/mlx5_rxtx.h | 2 +
drivers/net/mlx5/mlx5_socket.c | 294 +++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx5/mlx5_txq.c | 66 +++++++++
7 files changed, 447 insertions(+), 10 deletions(-)
create mode 100644 drivers/net/mlx5/mlx5_socket.c
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index d9c42b5..787e86b 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -52,6 +52,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_fdir.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
# Basic CFLAGS.
CFLAGS += -O3
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index dd1d086..bfa38ba 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -209,6 +209,7 @@ struct mlx5_args {
}
if (priv->reta_idx != NULL)
rte_free(priv->reta_idx);
+ priv_socket_uninit(priv);
priv_unlock(priv);
memset(priv, 0, sizeof(*priv));
}
@@ -578,6 +579,40 @@ struct mlx5_args {
.rx_vec_en = MLX5_ARG_UNSET,
};
+ mlx5_dev[idx].ports |= test;
+
+ if (mlx5_is_secondary()) {
+ /* from rte_ethdev.c */
+ char name[RTE_ETH_NAME_MAX_LEN];
+
+ snprintf(name, sizeof(name), "%s port %u",
+ ibv_get_device_name(ibv_dev), port);
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (eth_dev == NULL) {
+ ERROR("can not attach rte ethdev");
+ err = ENOMEM;
+ goto error;
+ }
+ eth_dev->device = &pci_dev->device;
+ eth_dev->dev_ops = NULL;
+ priv = eth_dev->data->dev_private;
+ /* Receive command fd from primary process */
+ err = priv_socket_connect(priv);
+ if (err < 0) {
+ err = -err;
+ goto error;
+ }
+ /* Remap UAR for Tx queues. */
+ err = priv_tx_uar_remap(priv, err);
+ if (err < 0) {
+ err = -err;
+ goto error;
+ }
+ priv_dev_select_rx_function(priv, eth_dev);
+ priv_dev_select_tx_function(priv, eth_dev);
+ continue;
+ }
+
DEBUG("using port %u (%08" PRIx32 ")", port, test);
ctx = ibv_open_device(ibv_dev);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 78b27ed..1ce02e8 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -151,6 +151,8 @@ struct priv {
uint32_t link_speed_capa; /* Link speed capabilities. */
struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
rte_spinlock_t lock; /* Lock for control functions. */
+ int primary_socket; /* Unix socket for primary process. */
+ struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
};
/**
@@ -299,4 +301,11 @@ int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
void priv_flow_stop(struct priv *);
int priv_flow_rxq_in_use(struct priv *, struct rxq *);
+/* mlx5_socket.c */
+
+int priv_socket_init(struct priv *priv);
+int priv_socket_uninit(struct priv *priv);
+void priv_socket_handle(struct priv *priv);
+int priv_socket_connect(struct priv *priv);
+
#endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index c1affba..46c3013 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -31,6 +31,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#define _GNU_SOURCE
+
#include <stddef.h>
#include <assert.h>
#include <unistd.h>
@@ -50,6 +52,7 @@
#include <linux/version.h>
#include <fcntl.h>
#include <stdalign.h>
+#include <sys/un.h>
#include <rte_atomic.h>
#include <rte_ethdev.h>
@@ -1204,6 +1207,23 @@ struct priv *
}
/**
+ * Handle interrupts from the socket.
+ *
+ * @param cb_arg
+ * Callback argument.
+ */
+static void
+mlx5_dev_handler_socket(void *cb_arg)
+{
+ struct rte_eth_dev *dev = cb_arg;
+ struct priv *priv = dev->data->dev_private;
+
+ priv_lock(priv);
+ priv_socket_handle(priv);
+ priv_unlock(priv);
+}
+
+/**
* Uninstall interrupt handler.
*
* @param priv
@@ -1214,16 +1234,19 @@ struct priv *
void
priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
{
- if (!dev->data->dev_conf.intr_conf.lsc)
- return;
- rte_intr_callback_unregister(&priv->intr_handle,
- mlx5_dev_interrupt_handler,
- dev);
+ if (dev->data->dev_conf.intr_conf.lsc)
+ rte_intr_callback_unregister(&priv->intr_handle,
+ mlx5_dev_interrupt_handler, dev);
+ if (priv->primary_socket)
+ rte_intr_callback_unregister(&priv->intr_handle_socket,
+ mlx5_dev_handler_socket, dev);
if (priv->pending_alarm)
rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev);
priv->pending_alarm = 0;
priv->intr_handle.fd = 0;
priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ priv->intr_handle_socket.fd = 0;
+ priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN;
}
/**
@@ -1239,20 +1262,27 @@ struct priv *
{
int rc, flags;
- if (!dev->data->dev_conf.intr_conf.lsc)
- return;
+ assert(!mlx5_is_secondary());
assert(priv->ctx->async_fd > 0);
flags = fcntl(priv->ctx->async_fd, F_GETFL);
rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
if (rc < 0) {
INFO("failed to change file descriptor async event queue");
dev->data->dev_conf.intr_conf.lsc = 0;
- } else {
+ }
+ if (dev->data->dev_conf.intr_conf.lsc) {
priv->intr_handle.fd = priv->ctx->async_fd;
priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
rte_intr_callback_register(&priv->intr_handle,
- mlx5_dev_interrupt_handler,
- dev);
+ mlx5_dev_interrupt_handler, dev);
+ }
+
+ rc = priv_socket_init(priv);
+ if (!rc && priv->primary_socket) {
+ priv->intr_handle_socket.fd = priv->primary_socket;
+ priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT;
+ rte_intr_callback_register(&priv->intr_handle_socket,
+ mlx5_dev_handler_socket, dev);
}
}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index e352a1e..5357be0 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -286,6 +286,7 @@ struct txq_ctrl {
struct ibv_qp *qp; /* Queue Pair. */
unsigned int socket; /* CPU socket ID for allocations. */
struct txq txq; /* Data path structure. */
+ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
};
/* mlx5_rxq.c */
@@ -319,6 +320,7 @@ int txq_ctrl_setup(struct rte_eth_dev *, struct txq_ctrl *, uint16_t,
int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
const struct rte_eth_txconf *);
void mlx5_tx_queue_release(void *);
+int priv_tx_uar_remap(struct priv *priv, int fd);
/* mlx5_rxtx.c */
diff --git a/drivers/net/mlx5/mlx5_socket.c b/drivers/net/mlx5/mlx5_socket.c
new file mode 100644
index 0000000..78b4138
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_socket.c
@@ -0,0 +1,294 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2016 6WIND S.A.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+
+/**
+ * Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_socket_init(struct priv *priv)
+{
+ struct sockaddr_un sun = {
+ .sun_family = AF_UNIX,
+ };
+ int ret;
+ int flags;
+ struct stat file_stat;
+
+ /*
+ * Initialise the socket to communicate with the secondary
+ * process.
+ */
+ ret = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ret < 0) {
+ WARN("secondary process not supported: %s", strerror(errno));
+ return ret;
+ }
+ priv->primary_socket = ret;
+ flags = fcntl(priv->primary_socket, F_GETFL, 0);
+ if (flags == -1)
+ goto out;
+ ret = fcntl(priv->primary_socket, F_SETFL, flags | O_NONBLOCK);
+ if (ret < 0)
+ goto out;
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+ MLX5_DRIVER_NAME, priv->primary_socket);
+ ret = stat(sun.sun_path, &file_stat);
+ if (!ret)
+ claim_zero(remove(sun.sun_path));
+ ret = bind(priv->primary_socket, (const struct sockaddr *)&sun,
+ sizeof(sun));
+ if (ret < 0) {
+ WARN("cannot bind socket, secondary process not supported: %s",
+ strerror(errno));
+ goto close;
+ }
+ ret = listen(priv->primary_socket, 0);
+ if (ret < 0) {
+ WARN("Secondary process not supported: %s", strerror(errno));
+ goto close;
+ }
+ return ret;
+close:
+ remove(sun.sun_path);
+out:
+ claim_zero(close(priv->primary_socket));
+ priv->primary_socket = 0;
+ return -(ret);
+}
+
+/**
+ * Un-Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_socket_uninit(struct priv *priv)
+{
+ MKSTR(path, "/var/tmp/%s_%d", MLX5_DRIVER_NAME, priv->primary_socket);
+ claim_zero(close(priv->primary_socket));
+ priv->primary_socket = 0;
+ claim_zero(remove(path));
+ return 0;
+}
+
+/**
+ * Handle socket interrupts.
+ *
+ * @param priv
+ * Pointer to private structure.
+ */
+void
+priv_socket_handle(struct priv *priv)
+{
+ int conn_sock;
+ int ret = 0;
+ struct cmsghdr *cmsg = NULL;
+ struct ucred *cred = NULL;
+ char buf[CMSG_SPACE(sizeof(struct ucred))] = { 0 };
+ char vbuf[1024] = { 0 };
+ struct iovec io = {
+ .iov_base = vbuf,
+ .iov_len = sizeof(*vbuf),
+ };
+ struct msghdr msg = {
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ .msg_control = buf,
+ .msg_controllen = sizeof(buf),
+ };
+ int *fd;
+
+ /* Accept the connection from the client. */
+ conn_sock = accept(priv->primary_socket, NULL, NULL);
+ if (conn_sock < 0) {
+ WARN("connection failed: %s", strerror(errno));
+ return;
+ }
+ ret = setsockopt(conn_sock, SOL_SOCKET, SO_PASSCRED, &(int){1},
+ sizeof(int));
+ if (ret < 0) {
+ WARN("cannot change socket options");
+ goto out;
+ }
+ ret = recvmsg(conn_sock, &msg, MSG_WAITALL);
+ if (ret < 0) {
+ WARN("received an empty message: %s", strerror(errno));
+ goto out;
+ }
+ /* Expect to receive credentials only. */
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ WARN("no message");
+ goto out;
+ }
+ if ((cmsg->cmsg_type == SCM_CREDENTIALS) &&
+ (cmsg->cmsg_len >= sizeof(*cred))) {
+ cred = (struct ucred *)CMSG_DATA(cmsg);
+ assert(cred != NULL);
+ }
+ cmsg = CMSG_NXTHDR(&msg, cmsg);
+ if (cmsg != NULL) {
+ WARN("Message wrongly formated");
+ goto out;
+ }
+ /* Make sure all the ancillary data was received and valid. */
+ if ((cred == NULL) || (cred->uid != getuid()) ||
+ (cred->gid != getgid())) {
+ WARN("wrong credentials");
+ goto out;
+ }
+ /* Set-up the ancillary data. */
+ cmsg = CMSG_FIRSTHDR(&msg);
+ assert(cmsg != NULL);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(priv->ctx->cmd_fd));
+ fd = (int *)CMSG_DATA(cmsg);
+ *fd = priv->ctx->cmd_fd;
+ ret = sendmsg(conn_sock, &msg, 0);
+ if (ret < 0)
+ WARN("cannot send response");
+out:
+ close(conn_sock);
+}
+
+/**
+ * Connect to the primary process.
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * fd on success, negative errno value on failure.
+ */
+int
+priv_socket_connect(struct priv *priv)
+{
+ struct sockaddr_un sun = {
+ .sun_family = AF_UNIX,
+ };
+ int socket_fd;
+ int *fd = NULL;
+ int ret;
+ struct ucred *cred;
+ char buf[CMSG_SPACE(sizeof(*cred))] = { 0 };
+ char vbuf[1024] = { 0 };
+ struct iovec io = {
+ .iov_base = vbuf,
+ .iov_len = sizeof(*vbuf),
+ };
+ struct msghdr msg = {
+ .msg_control = buf,
+ .msg_controllen = sizeof(buf),
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+
+ ret = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ret < 0) {
+ WARN("cannot connect to primary");
+ return ret;
+ }
+ socket_fd = ret;
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+ MLX5_DRIVER_NAME, priv->primary_socket);
+ ret = connect(socket_fd, (const struct sockaddr *)&sun, sizeof(sun));
+ if (ret < 0) {
+ WARN("cannot connect to primary");
+ goto out;
+ }
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ DEBUG("cannot get first message");
+ goto out;
+ }
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(*cred));
+ cred = (struct ucred *)CMSG_DATA(cmsg);
+ if (cred == NULL) {
+ DEBUG("no credentials received");
+ goto out;
+ }
+ cred->pid = getpid();
+ cred->uid = getuid();
+ cred->gid = getgid();
+ ret = sendmsg(socket_fd, &msg, MSG_DONTWAIT);
+ if (ret < 0) {
+ WARN("cannot send credentials to primary: %s",
+ strerror(errno));
+ goto out;
+ }
+ ret = recvmsg(socket_fd, &msg, MSG_WAITALL);
+ if (ret <= 0) {
+ WARN("no message from primary: %s", strerror(errno));
+ goto out;
+ }
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ WARN("No file descriptor received");
+ goto out;
+ }
+ fd = (int *)CMSG_DATA(cmsg);
+ if (*fd <= 0) {
+ WARN("no file descriptor received: %s", strerror(errno));
+ ret = *fd;
+ goto out;
+ }
+ ret = *fd;
+out:
+ close(socket_fd);
+ return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index d6c9657..18ea471 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -36,6 +36,8 @@
#include <errno.h>
#include <string.h>
#include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -168,6 +170,7 @@
struct mlx5dv_obj obj;
int ret = 0;
+ qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;
obj.cq.in = ibcq;
obj.cq.out = &cq_info;
obj.qp.in = tmpl->qp;
@@ -194,6 +197,13 @@
tmpl->txq.elts =
(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])
((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
+ if (qp.comp_mask | MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
+ tmpl->uar_mmap_offset = qp.uar_mmap_offset;
+ } else {
+ ERROR("Failed to retrieve UAR info, invalid libmlx5.so version");
+ return EINVAL;
+ }
+
return 0;
}
@@ -533,3 +543,59 @@
rte_free(txq_ctrl);
priv_unlock(priv);
}
+
+
+/**
+ * Map locally UAR used in Tx queues for BlueFlame doorbell.
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ * @param fd
+ * Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_tx_uar_remap(struct priv *priv, int fd)
+{
+ unsigned int i, j;
+ uintptr_t pages[priv->txqs_n];
+ unsigned int pages_n = 0;
+ uintptr_t uar_va;
+ void *addr;
+ struct txq *txq;
+ struct txq_ctrl *txq_ctrl;
+ int already_mapped;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+
+ /*
+ * As rdma-core, UARs are mapped in size of OS page size.
+ * Use aligned address to avoid duplicate mmap.
+ * Ref to libmlx5 function: mlx5_init_context()
+ */
+ for (i = 0; i != priv->txqs_n; ++i) {
+ txq = (*priv->txqs)[i];
+ txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+ uar_va = (uintptr_t)txq_ctrl->txq.bf_reg;
+ uar_va = RTE_ALIGN_FLOOR(uar_va, page_size);
+ already_mapped = 0;
+ for (j = 0; j != pages_n; ++j) {
+ if (pages[j] == uar_va) {
+ already_mapped = 1;
+ break;
+ }
+ }
+ if (already_mapped)
+ continue;
+ pages[pages_n++] = uar_va;
+ addr = mmap((void *)uar_va, page_size,
+ PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
+ txq_ctrl->uar_mmap_offset);
+ if (addr != (void *)uar_va) {
+ ERROR("call to mmap failed on UAR for txq %d\n", i);
+ return -1;
+ }
+ }
+ return 0;
+}
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v3 3/6] net/mlx5: allocate verbs object into shared memory
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (5 preceding siblings ...)
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 2/6] net/mlx5: install a socket to exchange a file descriptor Xueming Li
@ 2017-09-18 14:36 ` Xueming Li
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 4/6] net/mlx5: remove verbs fork check Xueming Li
` (14 subsequent siblings)
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-18 14:36 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: Xueming Li, dev
PMD uses Verbs object which were not available in the shared memory.
This patch modify the location where Verbs objects are allocated (from
process memory address space to shared memory address space) and thus
allow a secondary process to use those object by mapping this shared
memory space its own memory space.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index bfa38ba..11490d4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -132,6 +132,52 @@ struct mlx5_args {
}
/**
+ * Verbs callback to allocate a memory. This function should allocate the space
+ * according to the size provided residing inside a huge page.
+ * Please note that all allocation must respect the alignment from libmlx5
+ * (i.e. currently sysconf(_SC_PAGESIZE)).
+ *
+ * @param[in] size
+ * The size in bytes of the memory to allocate.
+ * @param[in] data
+ * A pointer to the callback data.
+ *
+ * @return
+ * a pointer to the allocate space.
+ */
+static void *
+mlx5_alloc_verbs_buf(size_t size, void *data)
+{
+ struct priv *priv = data;
+ void *ret;
+ size_t alignment = sysconf(_SC_PAGESIZE);
+
+ assert(data != NULL);
+ assert(!mlx5_is_secondary());
+ ret = rte_malloc_socket(__func__, size, alignment,
+ priv->dev->device->numa_node);
+ DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret);
+ return ret;
+}
+
+/**
+ * Verbs callback to free a memory.
+ *
+ * @param[in] ptr
+ * A pointer to the memory to free.
+ * @param[in] data
+ * A pointer to the callback data.
+ */
+static void
+mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
+{
+ assert(data != NULL);
+ assert(!mlx5_is_secondary());
+ DEBUG("Extern free request: %p", ptr);
+ rte_free(ptr);
+}
+
+/**
* DPDK callback to close the device.
*
* Destroy all queues and objects, free memory.
@@ -826,6 +872,15 @@ struct mlx5_args {
eth_dev->dev_ops = &mlx5_dev_ops;
TAILQ_INIT(&priv->flows);
+ /* Hint libmlx5 to use PMD allocator for data plane resources */
+ struct mlx5dv_ctx_allocators alctr = {
+ .alloc = &mlx5_alloc_verbs_buf,
+ .free = &mlx5_free_verbs_buf,
+ .data = priv,
+ };
+ mlx5dv_set_context_attr(ctx, MLX5DV_CTX_ATTR_BUF_ALLOCATORS,
+ (void *)((uintptr_t)&alctr));
+
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
priv_set_flags(priv, ~IFF_UP, IFF_UP);
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v3 4/6] net/mlx5: remove verbs fork check
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (6 preceding siblings ...)
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 3/6] net/mlx5: allocate verbs object into shared memory Xueming Li
@ 2017-09-18 14:36 ` Xueming Li
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 5/6] net/mlx5: add operations for secondary process Xueming Li
` (13 subsequent siblings)
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-18 14:36 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: Xueming Li, dev
Verbs API prohibited resources inheritance by default, this further
disabled PMD forked secondary process that replis on memory inherited
from parent process.
This patch removes verbs fork check to enable DPDK forked process.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.c | 8 --------
1 file changed, 8 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 11490d4..e124a14 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -977,16 +977,8 @@ struct mlx5_args {
{
/* Build the static table for ptype conversion. */
mlx5_set_ptype_table();
- /*
- * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
- * huge pages. Calling ibv_fork_init() during init allows
- * applications to use fork() safely for purposes other than
- * using this PMD, which is not supported in forked processes.
- */
- setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
/* Don't map UAR to WC if BlueFlame is not used.*/
setenv("MLX5_SHUT_UP_BF", "1", 1);
- ibv_fork_init();
rte_pci_register(&mlx5_driver);
}
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v3 5/6] net/mlx5: add operations for secondary process
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (7 preceding siblings ...)
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 4/6] net/mlx5: remove verbs fork check Xueming Li
@ 2017-09-18 14:36 ` Xueming Li
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 6/6] net/mlx5: multi-process document update Xueming Li
` (12 subsequent siblings)
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-18 14:36 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: Xueming Li, dev
Add operations that are safe for secondary processes:
* (x)stats
* device info get
* rx/tx descriptor status
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.c | 16 +++++++++++++++-
drivers/net/mlx5/mlx5.h | 1 +
drivers/net/mlx5/mlx5_ethdev.c | 11 +++++------
3 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index e124a14..76e92a5 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -303,6 +303,18 @@ struct mlx5_args {
.rx_queue_intr_disable = mlx5_rx_intr_disable,
};
+
+static const struct eth_dev_ops mlx5_dev_sec_ops = {
+ .stats_get = mlx5_stats_get,
+ .stats_reset = mlx5_stats_reset,
+ .xstats_get = mlx5_xstats_get,
+ .xstats_reset = mlx5_xstats_reset,
+ .xstats_get_names = mlx5_xstats_get_names,
+ .dev_infos_get = mlx5_dev_infos_get,
+ .rx_descriptor_status = mlx5_rx_descriptor_status,
+ .tx_descriptor_status = mlx5_tx_descriptor_status,
+};
+
static struct {
struct rte_pci_addr pci_addr; /* associated PCI address */
uint32_t ports; /* physical ports bitfield. */
@@ -640,7 +652,7 @@ struct mlx5_args {
goto error;
}
eth_dev->device = &pci_dev->device;
- eth_dev->dev_ops = NULL;
+ eth_dev->dev_ops = &mlx5_dev_sec_ops;
priv = eth_dev->data->dev_private;
/* Receive command fd from primary process */
err = priv_socket_connect(priv);
@@ -707,6 +719,8 @@ struct mlx5_args {
}
priv->ctx = ctx;
+ strncpy(priv->ibdev_path, priv->ctx->device->ibdev_path,
+ sizeof(priv->ibdev_path));
priv->device_attr = device_attr;
priv->port = port;
priv->pd = pd;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1ce02e8..928aeb6 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -91,6 +91,7 @@ struct priv {
struct ibv_context *ctx; /* Verbs context. */
struct ibv_device_attr_ex device_attr; /* Device properties. */
struct ibv_pd *pd; /* Protection Domain. */
+ char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for secondary */
/*
* MAC addresses array and configuration bit-field.
* An extra entry that cannot be modified by the DPDK is reserved
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 46c3013..8f3ff89 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -165,7 +165,7 @@ struct priv *
char match[IF_NAMESIZE] = "";
{
- MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path);
+ MKSTR(path, "%s/device/net", priv->ibdev_path);
dir = opendir(path);
if (dir == NULL)
@@ -183,7 +183,7 @@ struct priv *
continue;
MKSTR(path, "%s/device/net/%s/%s",
- priv->ctx->device->ibdev_path, name,
+ priv->ibdev_path, name,
(dev_type ? "dev_id" : "dev_port"));
file = fopen(path, "rb");
@@ -271,11 +271,11 @@ struct priv *
if (priv_is_ib_cntr(entry)) {
MKSTR(path, "%s/ports/1/hw_counters/%s",
- priv->ctx->device->ibdev_path, entry);
+ priv->ibdev_path, entry);
file = fopen(path, "rb");
} else {
MKSTR(path, "%s/device/net/%s/%s",
- priv->ctx->device->ibdev_path, ifname, entry);
+ priv->ibdev_path, ifname, entry);
file = fopen(path, "rb");
}
if (file == NULL)
@@ -318,8 +318,7 @@ struct priv *
if (priv_get_ifname(priv, &ifname))
return -1;
- MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
- ifname, entry);
+ MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry);
file = fopen(path, "wb");
if (file == NULL)
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v3 6/6] net/mlx5: multi-process document update
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (8 preceding siblings ...)
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 5/6] net/mlx5: add operations for secondary process Xueming Li
@ 2017-09-18 14:36 ` Xueming Li
2017-09-18 18:47 ` Mcnamara, John
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support Xueming Li
` (11 subsequent siblings)
21 siblings, 1 reply; 41+ messages in thread
From: Xueming Li @ 2017-09-18 14:36 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: Xueming Li, dev
This patch update the feature list and NIC guide to be multi-process
enabled.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
doc/guides/nics/features/mlx5.ini | 1 +
doc/guides/nics/mlx5.rst | 3 +--
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index 99a8d93..2913591 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -34,6 +34,7 @@ Tx descriptor status = Y
Basic stats = Y
Extended stats = Y
Stats per queue = Y
+Multiprocess aware = Y
Other kdrv = Y
ARMv8 = Y
Power8 = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index ffa20a2..5c77dea 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -92,7 +92,7 @@ Features
- Flow director (RTE_FDIR_MODE_PERFECT, RTE_FDIR_MODE_PERFECT_MAC_VLAN and
RTE_ETH_FDIR_REJECT).
- Flow API.
-- Secondary process TX is supported.
+- Multiple process.
- KVM and VMware ESX SR-IOV modes are supported.
- RSS hash result is supported.
- Hardware TSO.
@@ -106,7 +106,6 @@ Limitations
- Inner RSS for VXLAN frames is not supported yet.
- Port statistics through software counters only.
- Hardware checksum RX offloads for VXLAN inner header are not supported yet.
-- Secondary process RX is not supported.
- Flow pattern without any specific vlan will match for vlan packets as well:
When VLAN spec is not specified in the pattern, the matching rule will be created with VLAN as a wild card.
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v3 6/6] net/mlx5: multi-process document update
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 6/6] net/mlx5: multi-process document update Xueming Li
@ 2017-09-18 18:47 ` Mcnamara, John
0 siblings, 0 replies; 41+ messages in thread
From: Mcnamara, John @ 2017-09-18 18:47 UTC (permalink / raw)
To: Xueming Li, Nelio Laranjeiro, Adrien Mazarguil; +Cc: dev
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Xueming Li
> Sent: Monday, September 18, 2017 3:36 PM
> To: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>; Adrien Mazarguil
> <adrien.mazarguil@6wind.com>
> Cc: Xueming Li <xuemingl@mellanox.com>; dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v3 6/6] net/mlx5: multi-process document update
>
> This patch update the feature list and NIC guide to be multi-process
> enabled.
>
> Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: John McNamara <john.mcnamara@intel.com>
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (9 preceding siblings ...)
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 6/6] net/mlx5: multi-process document update Xueming Li
@ 2017-09-19 14:31 ` Xueming Li
2017-09-19 14:41 ` Nélio Laranjeiro
` (3 more replies)
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 1/5] net/mlx5: change eth device reference for secondary process Xueming Li
` (10 subsequent siblings)
21 siblings, 4 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-19 14:31 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: Xueming Li, dev
This patchset enhances Mellanox multi-process by supporting all multi-process
examples, also support reading ethdev (x)stats in secondary process.
Start from V2, this patchset depends on upstream rdma-core enhancement
and l2fork example bug fix:
http://www.dpdk.org/ml/archives/dev/2017-August/073405.html
http://www.dpdk.org/ml/archives/dev/2017-September/075568.html
V4:
* remove forked secondary mode
V3:
* add cover letter
* add dependency notes
V2:
* split into multiple patches
* support forked secondary process
* add secondary process ethdev operations
* rebase on latest rdma-core upstream api
Xueming Li (5):
net/mlx5: change eth device reference for secondary process
net/mlx5: install a socket to exchange a file descriptor
net/mlx5: allocate verbs object into shared memory
net/mlx5: add operations for secondary process
net/mlx5: multi-process document update
doc/guides/nics/features/mlx5.ini | 1 +
doc/guides/nics/mlx5.rst | 4 +-
drivers/net/mlx5/Makefile | 1 +
drivers/net/mlx5/mlx5.c | 104 ++++++++++++++
drivers/net/mlx5/mlx5.h | 16 ++-
drivers/net/mlx5/mlx5_ethdev.c | 108 +++++++++-----
drivers/net/mlx5/mlx5_fdir.c | 2 +
drivers/net/mlx5/mlx5_rss.c | 1 +
drivers/net/mlx5/mlx5_rxq.c | 1 +
drivers/net/mlx5/mlx5_rxtx.h | 2 +
drivers/net/mlx5/mlx5_socket.c | 294 ++++++++++++++++++++++++++++++++++++++
drivers/net/mlx5/mlx5_trigger.c | 4 +-
drivers/net/mlx5/mlx5_txq.c | 66 +++++++++
13 files changed, 562 insertions(+), 42 deletions(-)
create mode 100644 drivers/net/mlx5/mlx5_socket.c
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v4 1/5] net/mlx5: change eth device reference for secondary process
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (10 preceding siblings ...)
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support Xueming Li
@ 2017-09-19 14:31 ` Xueming Li
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 2/5] net/mlx5: install a socket to exchange a file descriptor Xueming Li
` (9 subsequent siblings)
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-19 14:31 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: Xueming Li, dev
rte_eth_dev created by primary process were not available in secondary
process, it was not possible to use the primary process local memory
object from a secondary process.
This patch modify the reference of primary rte_eth_dev object, use
local rte_eth_dev secondary process instead.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.h | 6 +++---
drivers/net/mlx5/mlx5_ethdev.c | 47 ++++++++++++++++++++++++-----------------
drivers/net/mlx5/mlx5_fdir.c | 2 ++
| 1 +
drivers/net/mlx5/mlx5_rxq.c | 1 +
drivers/net/mlx5/mlx5_trigger.c | 4 ++--
6 files changed, 37 insertions(+), 24 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ab03fe0..78b27ed 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -87,7 +87,7 @@ struct mlx5_xstats_ctrl {
};
struct priv {
- struct rte_eth_dev *dev; /* Ethernet device. */
+ struct rte_eth_dev *dev; /* Ethernet device of master process. */
struct ibv_context *ctx; /* Verbs context. */
struct ibv_device_attr_ex device_attr; /* Device properties. */
struct ibv_pd *pd; /* Protection Domain. */
@@ -208,8 +208,8 @@ int mlx5_ibv_device_to_pci_addr(const struct ibv_device *,
void priv_dev_interrupt_handler_install(struct priv *, struct rte_eth_dev *);
int mlx5_set_link_down(struct rte_eth_dev *dev);
int mlx5_set_link_up(struct rte_eth_dev *dev);
-void priv_select_tx_function(struct priv *);
-void priv_select_rx_function(struct priv *);
+void priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev);
+void priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev);
/* mlx5_mac.c */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 6f17a95..c1affba 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1260,7 +1260,9 @@ struct priv *
* Change the link state (UP / DOWN).
*
* @param priv
- * Pointer to Ethernet device structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
* @param up
* Nonzero for link up, otherwise link down.
*
@@ -1268,17 +1270,16 @@ struct priv *
* 0 on success, errno value on failure.
*/
static int
-priv_set_link(struct priv *priv, int up)
+priv_dev_set_link(struct priv *priv, struct rte_eth_dev *dev, int up)
{
- struct rte_eth_dev *dev = priv->dev;
int err;
if (up) {
err = priv_set_flags(priv, ~IFF_UP, IFF_UP);
if (err)
return err;
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
+ priv_dev_select_tx_function(priv, dev);
+ priv_dev_select_rx_function(priv, dev);
} else {
err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP);
if (err)
@@ -1305,7 +1306,7 @@ struct priv *
int err;
priv_lock(priv);
- err = priv_set_link(priv, 0);
+ err = priv_dev_set_link(priv, dev, 0);
priv_unlock(priv);
return err;
}
@@ -1326,7 +1327,7 @@ struct priv *
int err;
priv_lock(priv);
- err = priv_set_link(priv, 1);
+ err = priv_dev_set_link(priv, dev, 1);
priv_unlock(priv);
return err;
}
@@ -1335,29 +1336,33 @@ struct priv *
* Configure the TX function to use.
*
* @param priv
- * Pointer to private structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
*/
void
-priv_select_tx_function(struct priv *priv)
+priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev)
{
- priv->dev->tx_pkt_burst = mlx5_tx_burst;
+ assert(priv != NULL);
+ assert(dev != NULL);
+ dev->tx_pkt_burst = mlx5_tx_burst;
/* Select appropriate TX function. */
if (priv->mps == MLX5_MPW_ENHANCED) {
if (priv_check_vec_tx_support(priv) > 0) {
if (priv_check_raw_vec_tx_support(priv) > 0)
- priv->dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
+ dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
else
- priv->dev->tx_pkt_burst = mlx5_tx_burst_vec;
+ dev->tx_pkt_burst = mlx5_tx_burst_vec;
DEBUG("selected Enhanced MPW TX vectorized function");
} else {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_empw;
+ dev->tx_pkt_burst = mlx5_tx_burst_empw;
DEBUG("selected Enhanced MPW TX function");
}
} else if (priv->mps && priv->txq_inline) {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
+ dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
DEBUG("selected MPW inline TX function");
} else if (priv->mps) {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw;
+ dev->tx_pkt_burst = mlx5_tx_burst_mpw;
DEBUG("selected MPW TX function");
}
}
@@ -1366,15 +1371,19 @@ struct priv *
* Configure the RX function to use.
*
* @param priv
- * Pointer to private structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
*/
void
-priv_select_rx_function(struct priv *priv)
+priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev)
{
+ assert(priv != NULL);
+ assert(dev != NULL);
if (priv_check_vec_rx_support(priv) > 0) {
- priv->dev->rx_pkt_burst = mlx5_rx_burst_vec;
+ dev->rx_pkt_burst = mlx5_rx_burst_vec;
DEBUG("selected RX vectorized function");
} else {
- priv->dev->rx_pkt_burst = mlx5_rx_burst;
+ dev->rx_pkt_burst = mlx5_rx_burst;
}
}
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index acae668..66e3818 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -1068,6 +1068,8 @@ struct mlx5_fdir_filter {
int ret = EINVAL;
struct priv *priv = dev->data->dev_private;
+ if (mlx5_is_secondary())
+ return -E_RTE_SECONDARY;
switch (filter_type) {
case RTE_ETH_FILTER_GENERIC:
if (filter_op != RTE_ETH_FILTER_GET)
--git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index 1249943..d3d2603 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -350,6 +350,7 @@
int ret;
struct priv *priv = dev->data->dev_private;
+ assert(!mlx5_is_secondary());
mlx5_dev_stop(dev);
priv_lock(priv);
ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 22448c9..b71f72f 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1250,6 +1250,7 @@
unsigned int count = 0;
struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+ assert(!mlx5_is_secondary());
if (!priv->dev->data->dev_conf.intr_conf.rxq)
return 0;
priv_rx_intr_vec_disable(priv);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 3fa9401..51c31aa 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -66,8 +66,8 @@
return 0;
}
/* Update Rx/Tx callback. */
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
+ priv_dev_select_tx_function(priv, dev);
+ priv_dev_select_rx_function(priv, dev);
DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
err = priv_create_hash_rxqs(priv);
if (!err)
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v4 2/5] net/mlx5: install a socket to exchange a file descriptor
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (11 preceding siblings ...)
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 1/5] net/mlx5: change eth device reference for secondary process Xueming Li
@ 2017-09-19 14:31 ` Xueming Li
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 3/5] net/mlx5: allocate verbs object into shared memory Xueming Li
` (8 subsequent siblings)
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-19 14:31 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: Xueming Li, dev
Use a unix socket to get back the communication channel with the Kernel
driver from the primary process, this is necessary to remap those pages
in the secondary process memory space and thus use the same Tx queues.
This is only supported from rdma-core (v15).
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
drivers/net/mlx5/Makefile | 1 +
drivers/net/mlx5/mlx5.c | 35 +++++
drivers/net/mlx5/mlx5.h | 9 ++
drivers/net/mlx5/mlx5_ethdev.c | 50 +++++--
drivers/net/mlx5/mlx5_rxtx.h | 2 +
drivers/net/mlx5/mlx5_socket.c | 294 +++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx5/mlx5_txq.c | 66 +++++++++
7 files changed, 447 insertions(+), 10 deletions(-)
create mode 100644 drivers/net/mlx5/mlx5_socket.c
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index d9c42b5..787e86b 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -52,6 +52,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_fdir.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
# Basic CFLAGS.
CFLAGS += -O3
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index dd1d086..bfa38ba 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -209,6 +209,7 @@ struct mlx5_args {
}
if (priv->reta_idx != NULL)
rte_free(priv->reta_idx);
+ priv_socket_uninit(priv);
priv_unlock(priv);
memset(priv, 0, sizeof(*priv));
}
@@ -578,6 +579,40 @@ struct mlx5_args {
.rx_vec_en = MLX5_ARG_UNSET,
};
+ mlx5_dev[idx].ports |= test;
+
+ if (mlx5_is_secondary()) {
+ /* from rte_ethdev.c */
+ char name[RTE_ETH_NAME_MAX_LEN];
+
+ snprintf(name, sizeof(name), "%s port %u",
+ ibv_get_device_name(ibv_dev), port);
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (eth_dev == NULL) {
+ ERROR("can not attach rte ethdev");
+ err = ENOMEM;
+ goto error;
+ }
+ eth_dev->device = &pci_dev->device;
+ eth_dev->dev_ops = NULL;
+ priv = eth_dev->data->dev_private;
+ /* Receive command fd from primary process */
+ err = priv_socket_connect(priv);
+ if (err < 0) {
+ err = -err;
+ goto error;
+ }
+ /* Remap UAR for Tx queues. */
+ err = priv_tx_uar_remap(priv, err);
+ if (err < 0) {
+ err = -err;
+ goto error;
+ }
+ priv_dev_select_rx_function(priv, eth_dev);
+ priv_dev_select_tx_function(priv, eth_dev);
+ continue;
+ }
+
DEBUG("using port %u (%08" PRIx32 ")", port, test);
ctx = ibv_open_device(ibv_dev);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 78b27ed..1ce02e8 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -151,6 +151,8 @@ struct priv {
uint32_t link_speed_capa; /* Link speed capabilities. */
struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
rte_spinlock_t lock; /* Lock for control functions. */
+ int primary_socket; /* Unix socket for primary process. */
+ struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
};
/**
@@ -299,4 +301,11 @@ int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
void priv_flow_stop(struct priv *);
int priv_flow_rxq_in_use(struct priv *, struct rxq *);
+/* mlx5_socket.c */
+
+int priv_socket_init(struct priv *priv);
+int priv_socket_uninit(struct priv *priv);
+void priv_socket_handle(struct priv *priv);
+int priv_socket_connect(struct priv *priv);
+
#endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index c1affba..46c3013 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -31,6 +31,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#define _GNU_SOURCE
+
#include <stddef.h>
#include <assert.h>
#include <unistd.h>
@@ -50,6 +52,7 @@
#include <linux/version.h>
#include <fcntl.h>
#include <stdalign.h>
+#include <sys/un.h>
#include <rte_atomic.h>
#include <rte_ethdev.h>
@@ -1204,6 +1207,23 @@ struct priv *
}
/**
+ * Handle interrupts from the socket.
+ *
+ * @param cb_arg
+ * Callback argument.
+ */
+static void
+mlx5_dev_handler_socket(void *cb_arg)
+{
+ struct rte_eth_dev *dev = cb_arg;
+ struct priv *priv = dev->data->dev_private;
+
+ priv_lock(priv);
+ priv_socket_handle(priv);
+ priv_unlock(priv);
+}
+
+/**
* Uninstall interrupt handler.
*
* @param priv
@@ -1214,16 +1234,19 @@ struct priv *
void
priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
{
- if (!dev->data->dev_conf.intr_conf.lsc)
- return;
- rte_intr_callback_unregister(&priv->intr_handle,
- mlx5_dev_interrupt_handler,
- dev);
+ if (dev->data->dev_conf.intr_conf.lsc)
+ rte_intr_callback_unregister(&priv->intr_handle,
+ mlx5_dev_interrupt_handler, dev);
+ if (priv->primary_socket)
+ rte_intr_callback_unregister(&priv->intr_handle_socket,
+ mlx5_dev_handler_socket, dev);
if (priv->pending_alarm)
rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev);
priv->pending_alarm = 0;
priv->intr_handle.fd = 0;
priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ priv->intr_handle_socket.fd = 0;
+ priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN;
}
/**
@@ -1239,20 +1262,27 @@ struct priv *
{
int rc, flags;
- if (!dev->data->dev_conf.intr_conf.lsc)
- return;
+ assert(!mlx5_is_secondary());
assert(priv->ctx->async_fd > 0);
flags = fcntl(priv->ctx->async_fd, F_GETFL);
rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
if (rc < 0) {
INFO("failed to change file descriptor async event queue");
dev->data->dev_conf.intr_conf.lsc = 0;
- } else {
+ }
+ if (dev->data->dev_conf.intr_conf.lsc) {
priv->intr_handle.fd = priv->ctx->async_fd;
priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
rte_intr_callback_register(&priv->intr_handle,
- mlx5_dev_interrupt_handler,
- dev);
+ mlx5_dev_interrupt_handler, dev);
+ }
+
+ rc = priv_socket_init(priv);
+ if (!rc && priv->primary_socket) {
+ priv->intr_handle_socket.fd = priv->primary_socket;
+ priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT;
+ rte_intr_callback_register(&priv->intr_handle_socket,
+ mlx5_dev_handler_socket, dev);
}
}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index e352a1e..5357be0 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -286,6 +286,7 @@ struct txq_ctrl {
struct ibv_qp *qp; /* Queue Pair. */
unsigned int socket; /* CPU socket ID for allocations. */
struct txq txq; /* Data path structure. */
+ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
};
/* mlx5_rxq.c */
@@ -319,6 +320,7 @@ int txq_ctrl_setup(struct rte_eth_dev *, struct txq_ctrl *, uint16_t,
int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
const struct rte_eth_txconf *);
void mlx5_tx_queue_release(void *);
+int priv_tx_uar_remap(struct priv *priv, int fd);
/* mlx5_rxtx.c */
diff --git a/drivers/net/mlx5/mlx5_socket.c b/drivers/net/mlx5/mlx5_socket.c
new file mode 100644
index 0000000..78b4138
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_socket.c
@@ -0,0 +1,294 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2016 6WIND S.A.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+
+/**
+ * Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_socket_init(struct priv *priv)
+{
+ struct sockaddr_un sun = {
+ .sun_family = AF_UNIX,
+ };
+ int ret;
+ int flags;
+ struct stat file_stat;
+
+ /*
+ * Initialise the socket to communicate with the secondary
+ * process.
+ */
+ ret = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ret < 0) {
+ WARN("secondary process not supported: %s", strerror(errno));
+ return ret;
+ }
+ priv->primary_socket = ret;
+ flags = fcntl(priv->primary_socket, F_GETFL, 0);
+ if (flags == -1)
+ goto out;
+ ret = fcntl(priv->primary_socket, F_SETFL, flags | O_NONBLOCK);
+ if (ret < 0)
+ goto out;
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+ MLX5_DRIVER_NAME, priv->primary_socket);
+ ret = stat(sun.sun_path, &file_stat);
+ if (!ret)
+ claim_zero(remove(sun.sun_path));
+ ret = bind(priv->primary_socket, (const struct sockaddr *)&sun,
+ sizeof(sun));
+ if (ret < 0) {
+ WARN("cannot bind socket, secondary process not supported: %s",
+ strerror(errno));
+ goto close;
+ }
+ ret = listen(priv->primary_socket, 0);
+ if (ret < 0) {
+ WARN("Secondary process not supported: %s", strerror(errno));
+ goto close;
+ }
+ return ret;
+close:
+ remove(sun.sun_path);
+out:
+ claim_zero(close(priv->primary_socket));
+ priv->primary_socket = 0;
+ return -(ret);
+}
+
+/**
+ * Un-Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_socket_uninit(struct priv *priv)
+{
+ MKSTR(path, "/var/tmp/%s_%d", MLX5_DRIVER_NAME, priv->primary_socket);
+ claim_zero(close(priv->primary_socket));
+ priv->primary_socket = 0;
+ claim_zero(remove(path));
+ return 0;
+}
+
+/**
+ * Handle socket interrupts.
+ *
+ * @param priv
+ * Pointer to private structure.
+ */
+void
+priv_socket_handle(struct priv *priv)
+{
+ int conn_sock;
+ int ret = 0;
+ struct cmsghdr *cmsg = NULL;
+ struct ucred *cred = NULL;
+ char buf[CMSG_SPACE(sizeof(struct ucred))] = { 0 };
+ char vbuf[1024] = { 0 };
+ struct iovec io = {
+ .iov_base = vbuf,
+ .iov_len = sizeof(*vbuf),
+ };
+ struct msghdr msg = {
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ .msg_control = buf,
+ .msg_controllen = sizeof(buf),
+ };
+ int *fd;
+
+ /* Accept the connection from the client. */
+ conn_sock = accept(priv->primary_socket, NULL, NULL);
+ if (conn_sock < 0) {
+ WARN("connection failed: %s", strerror(errno));
+ return;
+ }
+ ret = setsockopt(conn_sock, SOL_SOCKET, SO_PASSCRED, &(int){1},
+ sizeof(int));
+ if (ret < 0) {
+ WARN("cannot change socket options");
+ goto out;
+ }
+ ret = recvmsg(conn_sock, &msg, MSG_WAITALL);
+ if (ret < 0) {
+ WARN("received an empty message: %s", strerror(errno));
+ goto out;
+ }
+ /* Expect to receive credentials only. */
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ WARN("no message");
+ goto out;
+ }
+ if ((cmsg->cmsg_type == SCM_CREDENTIALS) &&
+ (cmsg->cmsg_len >= sizeof(*cred))) {
+ cred = (struct ucred *)CMSG_DATA(cmsg);
+ assert(cred != NULL);
+ }
+ cmsg = CMSG_NXTHDR(&msg, cmsg);
+ if (cmsg != NULL) {
+ WARN("Message wrongly formated");
+ goto out;
+ }
+ /* Make sure all the ancillary data was received and valid. */
+ if ((cred == NULL) || (cred->uid != getuid()) ||
+ (cred->gid != getgid())) {
+ WARN("wrong credentials");
+ goto out;
+ }
+ /* Set-up the ancillary data. */
+ cmsg = CMSG_FIRSTHDR(&msg);
+ assert(cmsg != NULL);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(priv->ctx->cmd_fd));
+ fd = (int *)CMSG_DATA(cmsg);
+ *fd = priv->ctx->cmd_fd;
+ ret = sendmsg(conn_sock, &msg, 0);
+ if (ret < 0)
+ WARN("cannot send response");
+out:
+ close(conn_sock);
+}
+
+/**
+ * Connect to the primary process.
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * fd on success, negative errno value on failure.
+ */
+int
+priv_socket_connect(struct priv *priv)
+{
+ struct sockaddr_un sun = {
+ .sun_family = AF_UNIX,
+ };
+ int socket_fd;
+ int *fd = NULL;
+ int ret;
+ struct ucred *cred;
+ char buf[CMSG_SPACE(sizeof(*cred))] = { 0 };
+ char vbuf[1024] = { 0 };
+ struct iovec io = {
+ .iov_base = vbuf,
+ .iov_len = sizeof(*vbuf),
+ };
+ struct msghdr msg = {
+ .msg_control = buf,
+ .msg_controllen = sizeof(buf),
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+
+ ret = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ret < 0) {
+ WARN("cannot connect to primary");
+ return ret;
+ }
+ socket_fd = ret;
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+ MLX5_DRIVER_NAME, priv->primary_socket);
+ ret = connect(socket_fd, (const struct sockaddr *)&sun, sizeof(sun));
+ if (ret < 0) {
+ WARN("cannot connect to primary");
+ goto out;
+ }
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ DEBUG("cannot get first message");
+ goto out;
+ }
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(*cred));
+ cred = (struct ucred *)CMSG_DATA(cmsg);
+ if (cred == NULL) {
+ DEBUG("no credentials received");
+ goto out;
+ }
+ cred->pid = getpid();
+ cred->uid = getuid();
+ cred->gid = getgid();
+ ret = sendmsg(socket_fd, &msg, MSG_DONTWAIT);
+ if (ret < 0) {
+ WARN("cannot send credentials to primary: %s",
+ strerror(errno));
+ goto out;
+ }
+ ret = recvmsg(socket_fd, &msg, MSG_WAITALL);
+ if (ret <= 0) {
+ WARN("no message from primary: %s", strerror(errno));
+ goto out;
+ }
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ WARN("No file descriptor received");
+ goto out;
+ }
+ fd = (int *)CMSG_DATA(cmsg);
+ if (*fd <= 0) {
+ WARN("no file descriptor received: %s", strerror(errno));
+ ret = *fd;
+ goto out;
+ }
+ ret = *fd;
+out:
+ close(socket_fd);
+ return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index d6c9657..18ea471 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -36,6 +36,8 @@
#include <errno.h>
#include <string.h>
#include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -168,6 +170,7 @@
struct mlx5dv_obj obj;
int ret = 0;
+ qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;
obj.cq.in = ibcq;
obj.cq.out = &cq_info;
obj.qp.in = tmpl->qp;
@@ -194,6 +197,13 @@
tmpl->txq.elts =
(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])
((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
+ if (qp.comp_mask | MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
+ tmpl->uar_mmap_offset = qp.uar_mmap_offset;
+ } else {
+ ERROR("Failed to retrieve UAR info, invalid libmlx5.so version");
+ return EINVAL;
+ }
+
return 0;
}
@@ -533,3 +543,59 @@
rte_free(txq_ctrl);
priv_unlock(priv);
}
+
+
+/**
+ * Map locally UAR used in Tx queues for BlueFlame doorbell.
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ * @param fd
+ * Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_tx_uar_remap(struct priv *priv, int fd)
+{
+ unsigned int i, j;
+ uintptr_t pages[priv->txqs_n];
+ unsigned int pages_n = 0;
+ uintptr_t uar_va;
+ void *addr;
+ struct txq *txq;
+ struct txq_ctrl *txq_ctrl;
+ int already_mapped;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+
+ /*
+ * As rdma-core, UARs are mapped in size of OS page size.
+ * Use aligned address to avoid duplicate mmap.
+ * Ref to libmlx5 function: mlx5_init_context()
+ */
+ for (i = 0; i != priv->txqs_n; ++i) {
+ txq = (*priv->txqs)[i];
+ txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+ uar_va = (uintptr_t)txq_ctrl->txq.bf_reg;
+ uar_va = RTE_ALIGN_FLOOR(uar_va, page_size);
+ already_mapped = 0;
+ for (j = 0; j != pages_n; ++j) {
+ if (pages[j] == uar_va) {
+ already_mapped = 1;
+ break;
+ }
+ }
+ if (already_mapped)
+ continue;
+ pages[pages_n++] = uar_va;
+ addr = mmap((void *)uar_va, page_size,
+ PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
+ txq_ctrl->uar_mmap_offset);
+ if (addr != (void *)uar_va) {
+ ERROR("call to mmap failed on UAR for txq %d\n", i);
+ return -1;
+ }
+ }
+ return 0;
+}
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v4 3/5] net/mlx5: allocate verbs object into shared memory
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (12 preceding siblings ...)
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 2/5] net/mlx5: install a socket to exchange a file descriptor Xueming Li
@ 2017-09-19 14:31 ` Xueming Li
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 4/5] net/mlx5: add operations for secondary process Xueming Li
` (7 subsequent siblings)
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-19 14:31 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: Xueming Li, dev
PMD uses Verbs object which were not available in the shared memory.
This patch modify the location where Verbs objects are allocated (from
process memory address space to shared memory address space) and thus
allow a secondary process to use those object by mapping this shared
memory space its own memory space.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index bfa38ba..11490d4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -132,6 +132,52 @@ struct mlx5_args {
}
/**
+ * Verbs callback to allocate a memory. This function should allocate the space
+ * according to the size provided residing inside a huge page.
+ * Please note that all allocation must respect the alignment from libmlx5
+ * (i.e. currently sysconf(_SC_PAGESIZE)).
+ *
+ * @param[in] size
+ * The size in bytes of the memory to allocate.
+ * @param[in] data
+ * A pointer to the callback data.
+ *
+ * @return
+ * a pointer to the allocate space.
+ */
+static void *
+mlx5_alloc_verbs_buf(size_t size, void *data)
+{
+ struct priv *priv = data;
+ void *ret;
+ size_t alignment = sysconf(_SC_PAGESIZE);
+
+ assert(data != NULL);
+ assert(!mlx5_is_secondary());
+ ret = rte_malloc_socket(__func__, size, alignment,
+ priv->dev->device->numa_node);
+ DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret);
+ return ret;
+}
+
+/**
+ * Verbs callback to free a memory.
+ *
+ * @param[in] ptr
+ * A pointer to the memory to free.
+ * @param[in] data
+ * A pointer to the callback data.
+ */
+static void
+mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
+{
+ assert(data != NULL);
+ assert(!mlx5_is_secondary());
+ DEBUG("Extern free request: %p", ptr);
+ rte_free(ptr);
+}
+
+/**
* DPDK callback to close the device.
*
* Destroy all queues and objects, free memory.
@@ -826,6 +872,15 @@ struct mlx5_args {
eth_dev->dev_ops = &mlx5_dev_ops;
TAILQ_INIT(&priv->flows);
+ /* Hint libmlx5 to use PMD allocator for data plane resources */
+ struct mlx5dv_ctx_allocators alctr = {
+ .alloc = &mlx5_alloc_verbs_buf,
+ .free = &mlx5_free_verbs_buf,
+ .data = priv,
+ };
+ mlx5dv_set_context_attr(ctx, MLX5DV_CTX_ATTR_BUF_ALLOCATORS,
+ (void *)((uintptr_t)&alctr));
+
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
priv_set_flags(priv, ~IFF_UP, IFF_UP);
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v4 4/5] net/mlx5: add operations for secondary process
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (13 preceding siblings ...)
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 3/5] net/mlx5: allocate verbs object into shared memory Xueming Li
@ 2017-09-19 14:31 ` Xueming Li
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 5/5] net/mlx5: multi-process document update Xueming Li
` (6 subsequent siblings)
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-09-19 14:31 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: Xueming Li, dev
Add operations that are safe for secondary processes:
* (x)stats
* device info get
* rx/tx descriptor status
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.c | 16 +++++++++++++++-
drivers/net/mlx5/mlx5.h | 1 +
drivers/net/mlx5/mlx5_ethdev.c | 11 +++++------
3 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 11490d4..64d9434 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -303,6 +303,18 @@ struct mlx5_args {
.rx_queue_intr_disable = mlx5_rx_intr_disable,
};
+
+static const struct eth_dev_ops mlx5_dev_sec_ops = {
+ .stats_get = mlx5_stats_get,
+ .stats_reset = mlx5_stats_reset,
+ .xstats_get = mlx5_xstats_get,
+ .xstats_reset = mlx5_xstats_reset,
+ .xstats_get_names = mlx5_xstats_get_names,
+ .dev_infos_get = mlx5_dev_infos_get,
+ .rx_descriptor_status = mlx5_rx_descriptor_status,
+ .tx_descriptor_status = mlx5_tx_descriptor_status,
+};
+
static struct {
struct rte_pci_addr pci_addr; /* associated PCI address */
uint32_t ports; /* physical ports bitfield. */
@@ -640,7 +652,7 @@ struct mlx5_args {
goto error;
}
eth_dev->device = &pci_dev->device;
- eth_dev->dev_ops = NULL;
+ eth_dev->dev_ops = &mlx5_dev_sec_ops;
priv = eth_dev->data->dev_private;
/* Receive command fd from primary process */
err = priv_socket_connect(priv);
@@ -707,6 +719,8 @@ struct mlx5_args {
}
priv->ctx = ctx;
+ strncpy(priv->ibdev_path, priv->ctx->device->ibdev_path,
+ sizeof(priv->ibdev_path));
priv->device_attr = device_attr;
priv->port = port;
priv->pd = pd;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1ce02e8..928aeb6 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -91,6 +91,7 @@ struct priv {
struct ibv_context *ctx; /* Verbs context. */
struct ibv_device_attr_ex device_attr; /* Device properties. */
struct ibv_pd *pd; /* Protection Domain. */
+ char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for secondary */
/*
* MAC addresses array and configuration bit-field.
* An extra entry that cannot be modified by the DPDK is reserved
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 46c3013..8f3ff89 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -165,7 +165,7 @@ struct priv *
char match[IF_NAMESIZE] = "";
{
- MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path);
+ MKSTR(path, "%s/device/net", priv->ibdev_path);
dir = opendir(path);
if (dir == NULL)
@@ -183,7 +183,7 @@ struct priv *
continue;
MKSTR(path, "%s/device/net/%s/%s",
- priv->ctx->device->ibdev_path, name,
+ priv->ibdev_path, name,
(dev_type ? "dev_id" : "dev_port"));
file = fopen(path, "rb");
@@ -271,11 +271,11 @@ struct priv *
if (priv_is_ib_cntr(entry)) {
MKSTR(path, "%s/ports/1/hw_counters/%s",
- priv->ctx->device->ibdev_path, entry);
+ priv->ibdev_path, entry);
file = fopen(path, "rb");
} else {
MKSTR(path, "%s/device/net/%s/%s",
- priv->ctx->device->ibdev_path, ifname, entry);
+ priv->ibdev_path, ifname, entry);
file = fopen(path, "rb");
}
if (file == NULL)
@@ -318,8 +318,7 @@ struct priv *
if (priv_get_ifname(priv, &ifname))
return -1;
- MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
- ifname, entry);
+ MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry);
file = fopen(path, "wb");
if (file == NULL)
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v4 5/5] net/mlx5: multi-process document update
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (14 preceding siblings ...)
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 4/5] net/mlx5: add operations for secondary process Xueming Li
@ 2017-09-19 14:31 ` Xueming Li
2017-09-19 16:16 ` Mcnamara, John
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 0/5] net/mlx5 multi-process support Xueming Li
` (5 subsequent siblings)
21 siblings, 1 reply; 41+ messages in thread
From: Xueming Li @ 2017-09-19 14:31 UTC (permalink / raw)
To: Nelio Laranjeiro, Adrien Mazarguil; +Cc: Xueming Li, dev
This patch update the feature list and NIC guide to be multi-process
enabled.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
doc/guides/nics/features/mlx5.ini | 1 +
doc/guides/nics/mlx5.rst | 4 ++--
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index 99a8d93..2913591 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -34,6 +34,7 @@ Tx descriptor status = Y
Basic stats = Y
Extended stats = Y
Stats per queue = Y
+Multiprocess aware = Y
Other kdrv = Y
ARMv8 = Y
Power8 = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index ffa20a2..01cb2ff 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -92,7 +92,7 @@ Features
- Flow director (RTE_FDIR_MODE_PERFECT, RTE_FDIR_MODE_PERFECT_MAC_VLAN and
RTE_ETH_FDIR_REJECT).
- Flow API.
-- Secondary process TX is supported.
+- Multiple process.
- KVM and VMware ESX SR-IOV modes are supported.
- RSS hash result is supported.
- Hardware TSO.
@@ -106,7 +106,7 @@ Limitations
- Inner RSS for VXLAN frames is not supported yet.
- Port statistics through software counters only.
- Hardware checksum RX offloads for VXLAN inner header are not supported yet.
-- Secondary process RX is not supported.
+- Forked secondary process not supported.
- Flow pattern without any specific vlan will match for vlan packets as well:
When VLAN spec is not specified in the pattern, the matching rule will be created with VLAN as a wild card.
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support Xueming Li
@ 2017-09-19 14:41 ` Nélio Laranjeiro
2017-09-19 14:48 ` Ferruh Yigit
` (2 subsequent siblings)
3 siblings, 0 replies; 41+ messages in thread
From: Nélio Laranjeiro @ 2017-09-19 14:41 UTC (permalink / raw)
To: Xueming Li; +Cc: Adrien Mazarguil, dev
On Tue, Sep 19, 2017 at 10:31:47PM +0800, Xueming Li wrote:
> This patchset enhances Mellanox multi-process by supporting all multi-process
> examples, also support reading ethdev (x)stats in secondary process.
>
> Start from V2, this patchset depends on upstream rdma-core enhancement
> and l2fork example bug fix:
> http://www.dpdk.org/ml/archives/dev/2017-August/073405.html
> http://www.dpdk.org/ml/archives/dev/2017-September/075568.html
>
> V4:
> * remove forked secondary mode
You should not push commits with the ACK of someone who did not review
the modification.
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support Xueming Li
2017-09-19 14:41 ` Nélio Laranjeiro
@ 2017-09-19 14:48 ` Ferruh Yigit
2017-09-19 15:02 ` Xueming(Steven) Li
2017-09-20 8:07 ` Nélio Laranjeiro
2017-10-05 0:17 ` Ferruh Yigit
3 siblings, 1 reply; 41+ messages in thread
From: Ferruh Yigit @ 2017-09-19 14:48 UTC (permalink / raw)
To: Xueming Li, Nelio Laranjeiro, Adrien Mazarguil; +Cc: dev
On 9/19/2017 3:31 PM, Xueming Li wrote:
> This patchset enhances Mellanox multi-process by supporting all multi-process
> examples, also support reading ethdev (x)stats in secondary process.
>
> Start from V2, this patchset depends on upstream rdma-core enhancement
> and l2fork example bug fix:
> http://www.dpdk.org/ml/archives/dev/2017-August/073405.html
> http://www.dpdk.org/ml/archives/dev/2017-September/075568.html
Out of curiosity, why this driver patch depends on example app bug fix?
> V4:
> * remove forked secondary mode
>
> V3:
> * add cover letter
> * add dependency notes
>
> V2:
> * split into multiple patches
> * support forked secondary process
> * add secondary process ethdev operations
> * rebase on latest rdma-core upstream api
>
>
> Xueming Li (5):
> net/mlx5: change eth device reference for secondary process
> net/mlx5: install a socket to exchange a file descriptor
> net/mlx5: allocate verbs object into shared memory
> net/mlx5: add operations for secondary process
> net/mlx5: multi-process document update
>
> doc/guides/nics/features/mlx5.ini | 1 +
> doc/guides/nics/mlx5.rst | 4 +-
> drivers/net/mlx5/Makefile | 1 +
> drivers/net/mlx5/mlx5.c | 104 ++++++++++++++
> drivers/net/mlx5/mlx5.h | 16 ++-
> drivers/net/mlx5/mlx5_ethdev.c | 108 +++++++++-----
> drivers/net/mlx5/mlx5_fdir.c | 2 +
> drivers/net/mlx5/mlx5_rss.c | 1 +
> drivers/net/mlx5/mlx5_rxq.c | 1 +
> drivers/net/mlx5/mlx5_rxtx.h | 2 +
> drivers/net/mlx5/mlx5_socket.c | 294 ++++++++++++++++++++++++++++++++++++++
> drivers/net/mlx5/mlx5_trigger.c | 4 +-
> drivers/net/mlx5/mlx5_txq.c | 66 +++++++++
> 13 files changed, 562 insertions(+), 42 deletions(-)
> create mode 100644 drivers/net/mlx5/mlx5_socket.c
>
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support
2017-09-19 14:48 ` Ferruh Yigit
@ 2017-09-19 15:02 ` Xueming(Steven) Li
0 siblings, 0 replies; 41+ messages in thread
From: Xueming(Steven) Li @ 2017-09-19 15:02 UTC (permalink / raw)
To: Ferruh Yigit, Nélio Laranjeiro, Adrien Mazarguil; +Cc: dev
The only example to verify forked mode secondary process - should be
deleted now as fork support removed in v4
> -----Original Message-----
> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
> Sent: Tuesday, September 19, 2017 10:48 PM
> To: Xueming(Steven) Li <xuemingl@mellanox.com>; Nélio Laranjeiro
> <nelio.laranjeiro@6wind.com>; Adrien Mazarguil
> <adrien.mazarguil@6wind.com>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support
>
> On 9/19/2017 3:31 PM, Xueming Li wrote:
> > This patchset enhances Mellanox multi-process by supporting all
> > multi-process examples, also support reading ethdev (x)stats in
> secondary process.
> >
> > Start from V2, this patchset depends on upstream rdma-core enhancement
> > and l2fork example bug fix:
> > https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fwww.
> > dpdk.org%2Fml%2Farchives%2Fdev%2F2017-August%2F073405.html&data=02%7C0
> > 1%7Cxuemingl%40mellanox.com%7Ce4100a14196745c17db308d4ff6d7221%7Ca6529
> > 71c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636414292914587251&sdata=i7zsa3A
> > uSSwREjp25fOd9NHbpOiqrHQ%2B9Os1v6wc%2BiA%3D&reserved=0
> > https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fwww.
> > dpdk.org%2Fml%2Farchives%2Fdev%2F2017-September%2F075568.html&data=02%
> > 7C01%7Cxuemingl%40mellanox.com%7Ce4100a14196745c17db308d4ff6d7221%7Ca6
> > 52971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636414292914587251&sdata=tXcz
> > DtwMbPjGcRTbC0O8a7DCVtR5zKIz7%2B3RA5CC0YA%3D&reserved=0
>
> Out of curiosity, why this driver patch depends on example app bug fix?
>
> > V4:
> > * remove forked secondary mode
> >
> > V3:
> > * add cover letter
> > * add dependency notes
> >
> > V2:
> > * split into multiple patches
> > * support forked secondary process
> > * add secondary process ethdev operations
> > * rebase on latest rdma-core upstream api
> >
> >
> > Xueming Li (5):
> > net/mlx5: change eth device reference for secondary process
> > net/mlx5: install a socket to exchange a file descriptor
> > net/mlx5: allocate verbs object into shared memory
> > net/mlx5: add operations for secondary process
> > net/mlx5: multi-process document update
> >
> > doc/guides/nics/features/mlx5.ini | 1 +
> > doc/guides/nics/mlx5.rst | 4 +-
> > drivers/net/mlx5/Makefile | 1 +
> > drivers/net/mlx5/mlx5.c | 104 ++++++++++++++
> > drivers/net/mlx5/mlx5.h | 16 ++-
> > drivers/net/mlx5/mlx5_ethdev.c | 108 +++++++++-----
> > drivers/net/mlx5/mlx5_fdir.c | 2 +
> > drivers/net/mlx5/mlx5_rss.c | 1 +
> > drivers/net/mlx5/mlx5_rxq.c | 1 +
> > drivers/net/mlx5/mlx5_rxtx.h | 2 +
> > drivers/net/mlx5/mlx5_socket.c | 294
> ++++++++++++++++++++++++++++++++++++++
> > drivers/net/mlx5/mlx5_trigger.c | 4 +-
> > drivers/net/mlx5/mlx5_txq.c | 66 +++++++++
> > 13 files changed, 562 insertions(+), 42 deletions(-) create mode
> > 100644 drivers/net/mlx5/mlx5_socket.c
> >
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v4 5/5] net/mlx5: multi-process document update
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 5/5] net/mlx5: multi-process document update Xueming Li
@ 2017-09-19 16:16 ` Mcnamara, John
0 siblings, 0 replies; 41+ messages in thread
From: Mcnamara, John @ 2017-09-19 16:16 UTC (permalink / raw)
To: Xueming Li, Nelio Laranjeiro, Adrien Mazarguil; +Cc: dev
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Xueming Li
> Sent: Tuesday, September 19, 2017 3:32 PM
> To: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>; Adrien Mazarguil
> <adrien.mazarguil@6wind.com>
> Cc: Xueming Li <xuemingl@mellanox.com>; dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v4 5/5] net/mlx5: multi-process document update
>
> This patch update the feature list and NIC guide to be multi-process
> enabled.
>
> Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: John McNamara <john.mcnamara@intel.com>
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support Xueming Li
2017-09-19 14:41 ` Nélio Laranjeiro
2017-09-19 14:48 ` Ferruh Yigit
@ 2017-09-20 8:07 ` Nélio Laranjeiro
2017-10-05 0:17 ` Ferruh Yigit
3 siblings, 0 replies; 41+ messages in thread
From: Nélio Laranjeiro @ 2017-09-20 8:07 UTC (permalink / raw)
To: Xueming Li; +Cc: Adrien Mazarguil, dev
On Tue, Sep 19, 2017 at 10:31:47PM +0800, Xueming Li wrote:
> This patchset enhances Mellanox multi-process by supporting all multi-process
> examples, also support reading ethdev (x)stats in secondary process.
>
> Start from V2, this patchset depends on upstream rdma-core enhancement
> and l2fork example bug fix:
> http://www.dpdk.org/ml/archives/dev/2017-August/073405.html
> http://www.dpdk.org/ml/archives/dev/2017-September/075568.html
>
> V4:
> * remove forked secondary mode
For the V4 series
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
--
Nélio Laranjeiro
6WIND
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support Xueming Li
` (2 preceding siblings ...)
2017-09-20 8:07 ` Nélio Laranjeiro
@ 2017-10-05 0:17 ` Ferruh Yigit
2017-10-06 15:52 ` Xueming(Steven) Li
3 siblings, 1 reply; 41+ messages in thread
From: Ferruh Yigit @ 2017-10-05 0:17 UTC (permalink / raw)
To: Xueming Li, Nelio Laranjeiro, Adrien Mazarguil; +Cc: dev
On 9/19/2017 3:31 PM, Xueming Li wrote:
> This patchset enhances Mellanox multi-process by supporting all multi-process
> examples, also support reading ethdev (x)stats in secondary process.
>
> Start from V2, this patchset depends on upstream rdma-core enhancement
> and l2fork example bug fix:
> http://www.dpdk.org/ml/archives/dev/2017-August/073405.html
> http://www.dpdk.org/ml/archives/dev/2017-September/075568.html
>
> V4:
> * remove forked secondary mode
>
> V3:
> * add cover letter
> * add dependency notes
>
> V2:
> * split into multiple patches
> * support forked secondary process
> * add secondary process ethdev operations
> * rebase on latest rdma-core upstream api
>
>
> Xueming Li (5):
> net/mlx5: change eth device reference for secondary process
> net/mlx5: install a socket to exchange a file descriptor
> net/mlx5: allocate verbs object into shared memory
> net/mlx5: add operations for secondary process
> net/mlx5: multi-process document update
Hi Xueming,
I guess all dependent patches merged into next-net for this patchset, so
can get this one.
But this is causing merge conflicts on latest next-net, can you please
re-base patchset and sent a new version?
Thanks,
ferruh
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v5 0/5] net/mlx5 multi-process support
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (15 preceding siblings ...)
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 5/5] net/mlx5: multi-process document update Xueming Li
@ 2017-10-06 15:45 ` Xueming Li
2017-10-06 18:21 ` Ferruh Yigit
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 1/5] net/mlx5: change eth device reference for secondary process Xueming Li
` (4 subsequent siblings)
21 siblings, 1 reply; 41+ messages in thread
From: Xueming Li @ 2017-10-06 15:45 UTC (permalink / raw)
To: Nelio Laranjeiro, ferruh.yigit; +Cc: Xueming Li, dev
This patchset enhances Mellanox multi-process by supporting all multi-process
examples, also support reading ethdev (x)stats in secondary process.
V5:
* rebase on latest upstream code, patch 2/5 mlx5_ethdev.c updated.
V4:
* remove forked secondary mode
V3:
* add cover letter
* add dependency notes
V2:
* split into multiple patches
* support forked secondary process
* add secondary process ethdev operations
* rebase on latest rdma-core upstream api
Xueming Li (5):
net/mlx5: change eth device reference for secondary process
net/mlx5: install a socket to exchange a file descriptor
net/mlx5: allocate verbs object into shared memory
net/mlx5: add operations for secondary process
net/mlx5: multi-process document update
doc/guides/nics/features/mlx5.ini | 1 +
doc/guides/nics/mlx5.rst | 4 +-
drivers/net/mlx5/Makefile | 1 +
drivers/net/mlx5/mlx5.c | 104 ++++++++++++++
drivers/net/mlx5/mlx5.h | 16 ++-
drivers/net/mlx5/mlx5_ethdev.c | 112 ++++++++++-----
drivers/net/mlx5/mlx5_fdir.c | 2 +
drivers/net/mlx5/mlx5_rss.c | 1 +
drivers/net/mlx5/mlx5_rxq.c | 1 +
drivers/net/mlx5/mlx5_rxtx.h | 2 +
drivers/net/mlx5/mlx5_socket.c | 294 ++++++++++++++++++++++++++++++++++++++
drivers/net/mlx5/mlx5_trigger.c | 4 +-
drivers/net/mlx5/mlx5_txq.c | 66 +++++++++
13 files changed, 564 insertions(+), 44 deletions(-)
create mode 100644 drivers/net/mlx5/mlx5_socket.c
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v5 1/5] net/mlx5: change eth device reference for secondary process
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (16 preceding siblings ...)
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 0/5] net/mlx5 multi-process support Xueming Li
@ 2017-10-06 15:45 ` Xueming Li
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 2/5] net/mlx5: install a socket to exchange a file descriptor Xueming Li
` (3 subsequent siblings)
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-10-06 15:45 UTC (permalink / raw)
To: Nelio Laranjeiro, ferruh.yigit; +Cc: Xueming Li, dev
rte_eth_dev created by primary process were not available in secondary
process, it was not possible to use the primary process local memory
object from a secondary process.
This patch modify the reference of primary rte_eth_dev object, use
local rte_eth_dev secondary process instead.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.h | 6 +++---
drivers/net/mlx5/mlx5_ethdev.c | 47 ++++++++++++++++++++++++-----------------
drivers/net/mlx5/mlx5_fdir.c | 2 ++
| 1 +
drivers/net/mlx5/mlx5_rxq.c | 1 +
drivers/net/mlx5/mlx5_trigger.c | 4 ++--
6 files changed, 37 insertions(+), 24 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ab03fe0..78b27ed 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -87,7 +87,7 @@ struct mlx5_xstats_ctrl {
};
struct priv {
- struct rte_eth_dev *dev; /* Ethernet device. */
+ struct rte_eth_dev *dev; /* Ethernet device of master process. */
struct ibv_context *ctx; /* Verbs context. */
struct ibv_device_attr_ex device_attr; /* Device properties. */
struct ibv_pd *pd; /* Protection Domain. */
@@ -208,8 +208,8 @@ int mlx5_ibv_device_to_pci_addr(const struct ibv_device *,
void priv_dev_interrupt_handler_install(struct priv *, struct rte_eth_dev *);
int mlx5_set_link_down(struct rte_eth_dev *dev);
int mlx5_set_link_up(struct rte_eth_dev *dev);
-void priv_select_tx_function(struct priv *);
-void priv_select_rx_function(struct priv *);
+void priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev);
+void priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev);
/* mlx5_mac.c */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index b87eb09..831d920 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1296,7 +1296,9 @@ struct priv *
* Change the link state (UP / DOWN).
*
* @param priv
- * Pointer to Ethernet device structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
* @param up
* Nonzero for link up, otherwise link down.
*
@@ -1304,17 +1306,16 @@ struct priv *
* 0 on success, errno value on failure.
*/
static int
-priv_set_link(struct priv *priv, int up)
+priv_dev_set_link(struct priv *priv, struct rte_eth_dev *dev, int up)
{
- struct rte_eth_dev *dev = priv->dev;
int err;
if (up) {
err = priv_set_flags(priv, ~IFF_UP, IFF_UP);
if (err)
return err;
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
+ priv_dev_select_tx_function(priv, dev);
+ priv_dev_select_rx_function(priv, dev);
} else {
err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP);
if (err)
@@ -1341,7 +1342,7 @@ struct priv *
int err;
priv_lock(priv);
- err = priv_set_link(priv, 0);
+ err = priv_dev_set_link(priv, dev, 0);
priv_unlock(priv);
return err;
}
@@ -1362,7 +1363,7 @@ struct priv *
int err;
priv_lock(priv);
- err = priv_set_link(priv, 1);
+ err = priv_dev_set_link(priv, dev, 1);
priv_unlock(priv);
return err;
}
@@ -1371,29 +1372,33 @@ struct priv *
* Configure the TX function to use.
*
* @param priv
- * Pointer to private structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
*/
void
-priv_select_tx_function(struct priv *priv)
+priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev)
{
- priv->dev->tx_pkt_burst = mlx5_tx_burst;
+ assert(priv != NULL);
+ assert(dev != NULL);
+ dev->tx_pkt_burst = mlx5_tx_burst;
/* Select appropriate TX function. */
if (priv->mps == MLX5_MPW_ENHANCED) {
if (priv_check_vec_tx_support(priv) > 0) {
if (priv_check_raw_vec_tx_support(priv) > 0)
- priv->dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
+ dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
else
- priv->dev->tx_pkt_burst = mlx5_tx_burst_vec;
+ dev->tx_pkt_burst = mlx5_tx_burst_vec;
DEBUG("selected Enhanced MPW TX vectorized function");
} else {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_empw;
+ dev->tx_pkt_burst = mlx5_tx_burst_empw;
DEBUG("selected Enhanced MPW TX function");
}
} else if (priv->mps && priv->txq_inline) {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
+ dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
DEBUG("selected MPW inline TX function");
} else if (priv->mps) {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw;
+ dev->tx_pkt_burst = mlx5_tx_burst_mpw;
DEBUG("selected MPW TX function");
}
}
@@ -1402,15 +1407,19 @@ struct priv *
* Configure the RX function to use.
*
* @param priv
- * Pointer to private structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
*/
void
-priv_select_rx_function(struct priv *priv)
+priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev)
{
+ assert(priv != NULL);
+ assert(dev != NULL);
if (priv_check_vec_rx_support(priv) > 0) {
- priv->dev->rx_pkt_burst = mlx5_rx_burst_vec;
+ dev->rx_pkt_burst = mlx5_rx_burst_vec;
DEBUG("selected RX vectorized function");
} else {
- priv->dev->rx_pkt_burst = mlx5_rx_burst;
+ dev->rx_pkt_burst = mlx5_rx_burst;
}
}
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index acae668..66e3818 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -1068,6 +1068,8 @@ struct mlx5_fdir_filter {
int ret = EINVAL;
struct priv *priv = dev->data->dev_private;
+ if (mlx5_is_secondary())
+ return -E_RTE_SECONDARY;
switch (filter_type) {
case RTE_ETH_FILTER_GENERIC:
if (filter_op != RTE_ETH_FILTER_GET)
--git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index 1249943..d3d2603 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -350,6 +350,7 @@
int ret;
struct priv *priv = dev->data->dev_private;
+ assert(!mlx5_is_secondary());
mlx5_dev_stop(dev);
priv_lock(priv);
ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 22448c9..b71f72f 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1250,6 +1250,7 @@
unsigned int count = 0;
struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+ assert(!mlx5_is_secondary());
if (!priv->dev->data->dev_conf.intr_conf.rxq)
return 0;
priv_rx_intr_vec_disable(priv);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 3fa9401..51c31aa 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -66,8 +66,8 @@
return 0;
}
/* Update Rx/Tx callback. */
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
+ priv_dev_select_tx_function(priv, dev);
+ priv_dev_select_rx_function(priv, dev);
DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
err = priv_create_hash_rxqs(priv);
if (!err)
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v5 2/5] net/mlx5: install a socket to exchange a file descriptor
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (17 preceding siblings ...)
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 1/5] net/mlx5: change eth device reference for secondary process Xueming Li
@ 2017-10-06 15:45 ` Xueming Li
2017-10-06 18:21 ` Ferruh Yigit
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 3/5] net/mlx5: allocate verbs object into shared memory Xueming Li
` (2 subsequent siblings)
21 siblings, 1 reply; 41+ messages in thread
From: Xueming Li @ 2017-10-06 15:45 UTC (permalink / raw)
To: Nelio Laranjeiro, ferruh.yigit; +Cc: Xueming Li, dev
Use a unix socket to get back the communication channel with the Kernel
driver from the primary process, this is necessary to remap those pages
in the secondary process memory space and thus use the same Tx queues.
This is only supported from rdma-core (v15).
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
drivers/net/mlx5/Makefile | 1 +
drivers/net/mlx5/mlx5.c | 35 +++++
drivers/net/mlx5/mlx5.h | 9 ++
drivers/net/mlx5/mlx5_ethdev.c | 54 ++++++--
drivers/net/mlx5/mlx5_rxtx.h | 2 +
drivers/net/mlx5/mlx5_socket.c | 294 +++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx5/mlx5_txq.c | 66 +++++++++
7 files changed, 449 insertions(+), 12 deletions(-)
create mode 100644 drivers/net/mlx5/mlx5_socket.c
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index f75d344..bd9ea57 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -52,6 +52,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_fdir.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
# Basic CFLAGS.
CFLAGS += -O3
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index a4b7184..f0e1099 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -209,6 +209,7 @@ struct mlx5_args {
}
if (priv->reta_idx != NULL)
rte_free(priv->reta_idx);
+ priv_socket_uninit(priv);
priv_unlock(priv);
memset(priv, 0, sizeof(*priv));
}
@@ -578,6 +579,40 @@ struct mlx5_args {
.rx_vec_en = MLX5_ARG_UNSET,
};
+ mlx5_dev[idx].ports |= test;
+
+ if (mlx5_is_secondary()) {
+ /* from rte_ethdev.c */
+ char name[RTE_ETH_NAME_MAX_LEN];
+
+ snprintf(name, sizeof(name), "%s port %u",
+ ibv_get_device_name(ibv_dev), port);
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (eth_dev == NULL) {
+ ERROR("can not attach rte ethdev");
+ err = ENOMEM;
+ goto error;
+ }
+ eth_dev->device = &pci_dev->device;
+ eth_dev->dev_ops = NULL;
+ priv = eth_dev->data->dev_private;
+ /* Receive command fd from primary process */
+ err = priv_socket_connect(priv);
+ if (err < 0) {
+ err = -err;
+ goto error;
+ }
+ /* Remap UAR for Tx queues. */
+ err = priv_tx_uar_remap(priv, err);
+ if (err < 0) {
+ err = -err;
+ goto error;
+ }
+ priv_dev_select_rx_function(priv, eth_dev);
+ priv_dev_select_tx_function(priv, eth_dev);
+ continue;
+ }
+
DEBUG("using port %u (%08" PRIx32 ")", port, test);
ctx = ibv_open_device(ibv_dev);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 78b27ed..1ce02e8 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -151,6 +151,8 @@ struct priv {
uint32_t link_speed_capa; /* Link speed capabilities. */
struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
rte_spinlock_t lock; /* Lock for control functions. */
+ int primary_socket; /* Unix socket for primary process. */
+ struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
};
/**
@@ -299,4 +301,11 @@ int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
void priv_flow_stop(struct priv *);
int priv_flow_rxq_in_use(struct priv *, struct rxq *);
+/* mlx5_socket.c */
+
+int priv_socket_init(struct priv *priv);
+int priv_socket_uninit(struct priv *priv);
+void priv_socket_handle(struct priv *priv);
+int priv_socket_connect(struct priv *priv);
+
#endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 831d920..adcde9c 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -31,6 +31,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#define _GNU_SOURCE
+
#include <stddef.h>
#include <assert.h>
#include <unistd.h>
@@ -50,6 +52,7 @@
#include <linux/version.h>
#include <fcntl.h>
#include <stdalign.h>
+#include <sys/un.h>
#include <rte_atomic.h>
#include <rte_ethdev.h>
@@ -1237,6 +1240,23 @@ struct priv *
}
/**
+ * Handle interrupts from the socket.
+ *
+ * @param cb_arg
+ * Callback argument.
+ */
+static void
+mlx5_dev_handler_socket(void *cb_arg)
+{
+ struct rte_eth_dev *dev = cb_arg;
+ struct priv *priv = dev->data->dev_private;
+
+ priv_lock(priv);
+ priv_socket_handle(priv);
+ priv_unlock(priv);
+}
+
+/**
* Uninstall interrupt handler.
*
* @param priv
@@ -1247,17 +1267,20 @@ struct priv *
void
priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
{
- if (!dev->data->dev_conf.intr_conf.lsc &&
- !dev->data->dev_conf.intr_conf.rmv)
- return;
- rte_intr_callback_unregister(&priv->intr_handle,
- mlx5_dev_interrupt_handler,
- dev);
+ if (dev->data->dev_conf.intr_conf.lsc ||
+ dev->data->dev_conf.intr_conf.rmv)
+ rte_intr_callback_unregister(&priv->intr_handle,
+ mlx5_dev_interrupt_handler, dev);
+ if (priv->primary_socket)
+ rte_intr_callback_unregister(&priv->intr_handle_socket,
+ mlx5_dev_handler_socket, dev);
if (priv->pending_alarm)
rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev);
priv->pending_alarm = 0;
priv->intr_handle.fd = 0;
priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ priv->intr_handle_socket.fd = 0;
+ priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN;
}
/**
@@ -1273,9 +1296,7 @@ struct priv *
{
int rc, flags;
- if (!dev->data->dev_conf.intr_conf.lsc &&
- !dev->data->dev_conf.intr_conf.rmv)
- return;
+ assert(!mlx5_is_secondary());
assert(priv->ctx->async_fd > 0);
flags = fcntl(priv->ctx->async_fd, F_GETFL);
rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
@@ -1283,12 +1304,21 @@ struct priv *
INFO("failed to change file descriptor async event queue");
dev->data->dev_conf.intr_conf.lsc = 0;
dev->data->dev_conf.intr_conf.rmv = 0;
- } else {
+ }
+ if (dev->data->dev_conf.intr_conf.lsc ||
+ dev->data->dev_conf.intr_conf.rmv) {
priv->intr_handle.fd = priv->ctx->async_fd;
priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
rte_intr_callback_register(&priv->intr_handle,
- mlx5_dev_interrupt_handler,
- dev);
+ mlx5_dev_interrupt_handler, dev);
+ }
+
+ rc = priv_socket_init(priv);
+ if (!rc && priv->primary_socket) {
+ priv->intr_handle_socket.fd = priv->primary_socket;
+ priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT;
+ rte_intr_callback_register(&priv->intr_handle_socket,
+ mlx5_dev_handler_socket, dev);
}
}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 63e6881..7e40fcd 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -286,6 +286,7 @@ struct txq_ctrl {
struct ibv_qp *qp; /* Queue Pair. */
unsigned int socket; /* CPU socket ID for allocations. */
struct txq txq; /* Data path structure. */
+ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
};
/* mlx5_rxq.c */
@@ -319,6 +320,7 @@ int txq_ctrl_setup(struct rte_eth_dev *, struct txq_ctrl *, uint16_t,
int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
const struct rte_eth_txconf *);
void mlx5_tx_queue_release(void *);
+int priv_tx_uar_remap(struct priv *priv, int fd);
/* mlx5_rxtx.c */
diff --git a/drivers/net/mlx5/mlx5_socket.c b/drivers/net/mlx5/mlx5_socket.c
new file mode 100644
index 0000000..78b4138
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_socket.c
@@ -0,0 +1,294 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2016 6WIND S.A.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+
+/**
+ * Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_socket_init(struct priv *priv)
+{
+ struct sockaddr_un sun = {
+ .sun_family = AF_UNIX,
+ };
+ int ret;
+ int flags;
+ struct stat file_stat;
+
+ /*
+ * Initialise the socket to communicate with the secondary
+ * process.
+ */
+ ret = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ret < 0) {
+ WARN("secondary process not supported: %s", strerror(errno));
+ return ret;
+ }
+ priv->primary_socket = ret;
+ flags = fcntl(priv->primary_socket, F_GETFL, 0);
+ if (flags == -1)
+ goto out;
+ ret = fcntl(priv->primary_socket, F_SETFL, flags | O_NONBLOCK);
+ if (ret < 0)
+ goto out;
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+ MLX5_DRIVER_NAME, priv->primary_socket);
+ ret = stat(sun.sun_path, &file_stat);
+ if (!ret)
+ claim_zero(remove(sun.sun_path));
+ ret = bind(priv->primary_socket, (const struct sockaddr *)&sun,
+ sizeof(sun));
+ if (ret < 0) {
+ WARN("cannot bind socket, secondary process not supported: %s",
+ strerror(errno));
+ goto close;
+ }
+ ret = listen(priv->primary_socket, 0);
+ if (ret < 0) {
+ WARN("Secondary process not supported: %s", strerror(errno));
+ goto close;
+ }
+ return ret;
+close:
+ remove(sun.sun_path);
+out:
+ claim_zero(close(priv->primary_socket));
+ priv->primary_socket = 0;
+ return -(ret);
+}
+
+/**
+ * Un-Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_socket_uninit(struct priv *priv)
+{
+ MKSTR(path, "/var/tmp/%s_%d", MLX5_DRIVER_NAME, priv->primary_socket);
+ claim_zero(close(priv->primary_socket));
+ priv->primary_socket = 0;
+ claim_zero(remove(path));
+ return 0;
+}
+
+/**
+ * Handle socket interrupts.
+ *
+ * @param priv
+ * Pointer to private structure.
+ */
+void
+priv_socket_handle(struct priv *priv)
+{
+ int conn_sock;
+ int ret = 0;
+ struct cmsghdr *cmsg = NULL;
+ struct ucred *cred = NULL;
+ char buf[CMSG_SPACE(sizeof(struct ucred))] = { 0 };
+ char vbuf[1024] = { 0 };
+ struct iovec io = {
+ .iov_base = vbuf,
+ .iov_len = sizeof(*vbuf),
+ };
+ struct msghdr msg = {
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ .msg_control = buf,
+ .msg_controllen = sizeof(buf),
+ };
+ int *fd;
+
+ /* Accept the connection from the client. */
+ conn_sock = accept(priv->primary_socket, NULL, NULL);
+ if (conn_sock < 0) {
+ WARN("connection failed: %s", strerror(errno));
+ return;
+ }
+ ret = setsockopt(conn_sock, SOL_SOCKET, SO_PASSCRED, &(int){1},
+ sizeof(int));
+ if (ret < 0) {
+ WARN("cannot change socket options");
+ goto out;
+ }
+ ret = recvmsg(conn_sock, &msg, MSG_WAITALL);
+ if (ret < 0) {
+ WARN("received an empty message: %s", strerror(errno));
+ goto out;
+ }
+ /* Expect to receive credentials only. */
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ WARN("no message");
+ goto out;
+ }
+ if ((cmsg->cmsg_type == SCM_CREDENTIALS) &&
+ (cmsg->cmsg_len >= sizeof(*cred))) {
+ cred = (struct ucred *)CMSG_DATA(cmsg);
+ assert(cred != NULL);
+ }
+ cmsg = CMSG_NXTHDR(&msg, cmsg);
+ if (cmsg != NULL) {
+ WARN("Message wrongly formated");
+ goto out;
+ }
+ /* Make sure all the ancillary data was received and valid. */
+ if ((cred == NULL) || (cred->uid != getuid()) ||
+ (cred->gid != getgid())) {
+ WARN("wrong credentials");
+ goto out;
+ }
+ /* Set-up the ancillary data. */
+ cmsg = CMSG_FIRSTHDR(&msg);
+ assert(cmsg != NULL);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(priv->ctx->cmd_fd));
+ fd = (int *)CMSG_DATA(cmsg);
+ *fd = priv->ctx->cmd_fd;
+ ret = sendmsg(conn_sock, &msg, 0);
+ if (ret < 0)
+ WARN("cannot send response");
+out:
+ close(conn_sock);
+}
+
+/**
+ * Connect to the primary process.
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * fd on success, negative errno value on failure.
+ */
+int
+priv_socket_connect(struct priv *priv)
+{
+ struct sockaddr_un sun = {
+ .sun_family = AF_UNIX,
+ };
+ int socket_fd;
+ int *fd = NULL;
+ int ret;
+ struct ucred *cred;
+ char buf[CMSG_SPACE(sizeof(*cred))] = { 0 };
+ char vbuf[1024] = { 0 };
+ struct iovec io = {
+ .iov_base = vbuf,
+ .iov_len = sizeof(*vbuf),
+ };
+ struct msghdr msg = {
+ .msg_control = buf,
+ .msg_controllen = sizeof(buf),
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+
+ ret = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ret < 0) {
+ WARN("cannot connect to primary");
+ return ret;
+ }
+ socket_fd = ret;
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+ MLX5_DRIVER_NAME, priv->primary_socket);
+ ret = connect(socket_fd, (const struct sockaddr *)&sun, sizeof(sun));
+ if (ret < 0) {
+ WARN("cannot connect to primary");
+ goto out;
+ }
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ DEBUG("cannot get first message");
+ goto out;
+ }
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(*cred));
+ cred = (struct ucred *)CMSG_DATA(cmsg);
+ if (cred == NULL) {
+ DEBUG("no credentials received");
+ goto out;
+ }
+ cred->pid = getpid();
+ cred->uid = getuid();
+ cred->gid = getgid();
+ ret = sendmsg(socket_fd, &msg, MSG_DONTWAIT);
+ if (ret < 0) {
+ WARN("cannot send credentials to primary: %s",
+ strerror(errno));
+ goto out;
+ }
+ ret = recvmsg(socket_fd, &msg, MSG_WAITALL);
+ if (ret <= 0) {
+ WARN("no message from primary: %s", strerror(errno));
+ goto out;
+ }
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ WARN("No file descriptor received");
+ goto out;
+ }
+ fd = (int *)CMSG_DATA(cmsg);
+ if (*fd <= 0) {
+ WARN("no file descriptor received: %s", strerror(errno));
+ ret = *fd;
+ goto out;
+ }
+ ret = *fd;
+out:
+ close(socket_fd);
+ return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 39a38c1..1b45b4a 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -36,6 +36,8 @@
#include <errno.h>
#include <string.h>
#include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -168,6 +170,7 @@
struct mlx5dv_obj obj;
int ret = 0;
+ qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;
obj.cq.in = ibcq;
obj.cq.out = &cq_info;
obj.qp.in = tmpl->qp;
@@ -194,6 +197,13 @@
tmpl->txq.elts =
(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])
((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
+ if (qp.comp_mask | MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
+ tmpl->uar_mmap_offset = qp.uar_mmap_offset;
+ } else {
+ ERROR("Failed to retrieve UAR info, invalid libmlx5.so version");
+ return EINVAL;
+ }
+
return 0;
}
@@ -557,3 +567,59 @@
rte_free(txq_ctrl);
priv_unlock(priv);
}
+
+
+/**
+ * Map locally UAR used in Tx queues for BlueFlame doorbell.
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ * @param fd
+ * Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_tx_uar_remap(struct priv *priv, int fd)
+{
+ unsigned int i, j;
+ uintptr_t pages[priv->txqs_n];
+ unsigned int pages_n = 0;
+ uintptr_t uar_va;
+ void *addr;
+ struct txq *txq;
+ struct txq_ctrl *txq_ctrl;
+ int already_mapped;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+
+ /*
+ * As rdma-core, UARs are mapped in size of OS page size.
+ * Use aligned address to avoid duplicate mmap.
+ * Ref to libmlx5 function: mlx5_init_context()
+ */
+ for (i = 0; i != priv->txqs_n; ++i) {
+ txq = (*priv->txqs)[i];
+ txq_ctrl = container_of(txq, struct txq_ctrl, txq);
+ uar_va = (uintptr_t)txq_ctrl->txq.bf_reg;
+ uar_va = RTE_ALIGN_FLOOR(uar_va, page_size);
+ already_mapped = 0;
+ for (j = 0; j != pages_n; ++j) {
+ if (pages[j] == uar_va) {
+ already_mapped = 1;
+ break;
+ }
+ }
+ if (already_mapped)
+ continue;
+ pages[pages_n++] = uar_va;
+ addr = mmap((void *)uar_va, page_size,
+ PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
+ txq_ctrl->uar_mmap_offset);
+ if (addr != (void *)uar_va) {
+ ERROR("call to mmap failed on UAR for txq %d\n", i);
+ return -1;
+ }
+ }
+ return 0;
+}
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v5 3/5] net/mlx5: allocate verbs object into shared memory
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (18 preceding siblings ...)
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 2/5] net/mlx5: install a socket to exchange a file descriptor Xueming Li
@ 2017-10-06 15:45 ` Xueming Li
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 4/5] net/mlx5: add operations for secondary process Xueming Li
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 5/5] net/mlx5: multi-process document update Xueming Li
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-10-06 15:45 UTC (permalink / raw)
To: Nelio Laranjeiro, ferruh.yigit; +Cc: Xueming Li, dev
PMD uses Verbs object which were not available in the shared memory.
This patch modify the location where Verbs objects are allocated (from
process memory address space to shared memory address space) and thus
allow a secondary process to use those object by mapping this shared
memory space its own memory space.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index f0e1099..6541ee3 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -132,6 +132,52 @@ struct mlx5_args {
}
/**
+ * Verbs callback to allocate a memory. This function should allocate the space
+ * according to the size provided residing inside a huge page.
+ * Please note that all allocation must respect the alignment from libmlx5
+ * (i.e. currently sysconf(_SC_PAGESIZE)).
+ *
+ * @param[in] size
+ * The size in bytes of the memory to allocate.
+ * @param[in] data
+ * A pointer to the callback data.
+ *
+ * @return
+ * a pointer to the allocate space.
+ */
+static void *
+mlx5_alloc_verbs_buf(size_t size, void *data)
+{
+ struct priv *priv = data;
+ void *ret;
+ size_t alignment = sysconf(_SC_PAGESIZE);
+
+ assert(data != NULL);
+ assert(!mlx5_is_secondary());
+ ret = rte_malloc_socket(__func__, size, alignment,
+ priv->dev->device->numa_node);
+ DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret);
+ return ret;
+}
+
+/**
+ * Verbs callback to free a memory.
+ *
+ * @param[in] ptr
+ * A pointer to the memory to free.
+ * @param[in] data
+ * A pointer to the callback data.
+ */
+static void
+mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
+{
+ assert(data != NULL);
+ assert(!mlx5_is_secondary());
+ DEBUG("Extern free request: %p", ptr);
+ rte_free(ptr);
+}
+
+/**
* DPDK callback to close the device.
*
* Destroy all queues and objects, free memory.
@@ -826,6 +872,15 @@ struct mlx5_args {
eth_dev->dev_ops = &mlx5_dev_ops;
TAILQ_INIT(&priv->flows);
+ /* Hint libmlx5 to use PMD allocator for data plane resources */
+ struct mlx5dv_ctx_allocators alctr = {
+ .alloc = &mlx5_alloc_verbs_buf,
+ .free = &mlx5_free_verbs_buf,
+ .data = priv,
+ };
+ mlx5dv_set_context_attr(ctx, MLX5DV_CTX_ATTR_BUF_ALLOCATORS,
+ (void *)((uintptr_t)&alctr));
+
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
priv_set_flags(priv, ~IFF_UP, IFF_UP);
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v5 4/5] net/mlx5: add operations for secondary process
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (19 preceding siblings ...)
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 3/5] net/mlx5: allocate verbs object into shared memory Xueming Li
@ 2017-10-06 15:45 ` Xueming Li
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 5/5] net/mlx5: multi-process document update Xueming Li
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-10-06 15:45 UTC (permalink / raw)
To: Nelio Laranjeiro, ferruh.yigit; +Cc: Xueming Li, dev
Add operations that are safe for secondary processes:
* (x)stats
* device info get
* rx/tx descriptor status
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5.c | 16 +++++++++++++++-
drivers/net/mlx5/mlx5.h | 1 +
drivers/net/mlx5/mlx5_ethdev.c | 11 +++++------
3 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 6541ee3..92adbcd 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -303,6 +303,18 @@ struct mlx5_args {
.rx_queue_intr_disable = mlx5_rx_intr_disable,
};
+
+static const struct eth_dev_ops mlx5_dev_sec_ops = {
+ .stats_get = mlx5_stats_get,
+ .stats_reset = mlx5_stats_reset,
+ .xstats_get = mlx5_xstats_get,
+ .xstats_reset = mlx5_xstats_reset,
+ .xstats_get_names = mlx5_xstats_get_names,
+ .dev_infos_get = mlx5_dev_infos_get,
+ .rx_descriptor_status = mlx5_rx_descriptor_status,
+ .tx_descriptor_status = mlx5_tx_descriptor_status,
+};
+
static struct {
struct rte_pci_addr pci_addr; /* associated PCI address */
uint32_t ports; /* physical ports bitfield. */
@@ -640,7 +652,7 @@ struct mlx5_args {
goto error;
}
eth_dev->device = &pci_dev->device;
- eth_dev->dev_ops = NULL;
+ eth_dev->dev_ops = &mlx5_dev_sec_ops;
priv = eth_dev->data->dev_private;
/* Receive command fd from primary process */
err = priv_socket_connect(priv);
@@ -707,6 +719,8 @@ struct mlx5_args {
}
priv->ctx = ctx;
+ strncpy(priv->ibdev_path, priv->ctx->device->ibdev_path,
+ sizeof(priv->ibdev_path));
priv->device_attr = device_attr;
priv->port = port;
priv->pd = pd;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1ce02e8..928aeb6 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -91,6 +91,7 @@ struct priv {
struct ibv_context *ctx; /* Verbs context. */
struct ibv_device_attr_ex device_attr; /* Device properties. */
struct ibv_pd *pd; /* Protection Domain. */
+ char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for secondary */
/*
* MAC addresses array and configuration bit-field.
* An extra entry that cannot be modified by the DPDK is reserved
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index adcde9c..318bc9d 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -165,7 +165,7 @@ struct priv *
char match[IF_NAMESIZE] = "";
{
- MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path);
+ MKSTR(path, "%s/device/net", priv->ibdev_path);
dir = opendir(path);
if (dir == NULL)
@@ -183,7 +183,7 @@ struct priv *
continue;
MKSTR(path, "%s/device/net/%s/%s",
- priv->ctx->device->ibdev_path, name,
+ priv->ibdev_path, name,
(dev_type ? "dev_id" : "dev_port"));
file = fopen(path, "rb");
@@ -271,11 +271,11 @@ struct priv *
if (priv_is_ib_cntr(entry)) {
MKSTR(path, "%s/ports/1/hw_counters/%s",
- priv->ctx->device->ibdev_path, entry);
+ priv->ibdev_path, entry);
file = fopen(path, "rb");
} else {
MKSTR(path, "%s/device/net/%s/%s",
- priv->ctx->device->ibdev_path, ifname, entry);
+ priv->ibdev_path, ifname, entry);
file = fopen(path, "rb");
}
if (file == NULL)
@@ -318,8 +318,7 @@ struct priv *
if (priv_get_ifname(priv, &ifname))
return -1;
- MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
- ifname, entry);
+ MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry);
file = fopen(path, "wb");
if (file == NULL)
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* [dpdk-dev] [PATCH v5 5/5] net/mlx5: multi-process document update
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
` (20 preceding siblings ...)
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 4/5] net/mlx5: add operations for secondary process Xueming Li
@ 2017-10-06 15:45 ` Xueming Li
21 siblings, 0 replies; 41+ messages in thread
From: Xueming Li @ 2017-10-06 15:45 UTC (permalink / raw)
To: Nelio Laranjeiro, ferruh.yigit; +Cc: Xueming Li, dev
This patch update the feature list and NIC guide to be multi-process
enabled.
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
doc/guides/nics/features/mlx5.ini | 1 +
doc/guides/nics/mlx5.rst | 4 ++--
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index 4a2c3a6..c363639 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -35,6 +35,7 @@ Tx descriptor status = Y
Basic stats = Y
Extended stats = Y
Stats per queue = Y
+Multiprocess aware = Y
Other kdrv = Y
ARMv8 = Y
Power8 = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index be0e91c..d24941a 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -92,7 +92,7 @@ Features
- Flow director (RTE_FDIR_MODE_PERFECT, RTE_FDIR_MODE_PERFECT_MAC_VLAN and
RTE_ETH_FDIR_REJECT).
- Flow API.
-- Secondary process TX is supported.
+- Multiple process.
- KVM and VMware ESX SR-IOV modes are supported.
- RSS hash result is supported.
- Hardware TSO.
@@ -106,7 +106,7 @@ Limitations
- Inner RSS for VXLAN frames is not supported yet.
- Port statistics through software counters only.
- Hardware checksum RX offloads for VXLAN inner header are not supported yet.
-- Secondary process RX is not supported.
+- Forked secondary process not supported.
- Flow pattern without any specific vlan will match for vlan packets as well:
When VLAN spec is not specified in the pattern, the matching rule will be created with VLAN as a wild card.
--
1.8.3.1
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support
2017-10-05 0:17 ` Ferruh Yigit
@ 2017-10-06 15:52 ` Xueming(Steven) Li
0 siblings, 0 replies; 41+ messages in thread
From: Xueming(Steven) Li @ 2017-10-06 15:52 UTC (permalink / raw)
To: Ferruh Yigit, Nélio Laranjeiro, Adrien Mazarguil; +Cc: dev
> -----Original Message-----
> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
> Sent: Thursday, October 5, 2017 8:18 AM
> To: Xueming(Steven) Li <xuemingl@mellanox.com>; Nélio Laranjeiro
> <nelio.laranjeiro@6wind.com>; Adrien Mazarguil
> <adrien.mazarguil@6wind.com>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support
>
> On 9/19/2017 3:31 PM, Xueming Li wrote:
> > This patchset enhances Mellanox multi-process by supporting all
> > multi-process examples, also support reading ethdev (x)stats in
> secondary process.
> >
> > Start from V2, this patchset depends on upstream rdma-core enhancement
> > and l2fork example bug fix:
> > https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fwww.
> > dpdk.org%2Fml%2Farchives%2Fdev%2F2017-August%2F073405.html&data=02%7C0
> > 1%7Cxuemingl%40mellanox.com%7C298eed4fbcb74aef3d6a08d50b868a08%7Ca6529
> > 71c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636427594843039082&sdata=Rv4%2Bd
> > WocHEGMrPQIbcx0kXiXH40cORcTlCULPohDnRU%3D&reserved=0
> > https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fwww.
> > dpdk.org%2Fml%2Farchives%2Fdev%2F2017-September%2F075568.html&data=02%
> > 7C01%7Cxuemingl%40mellanox.com%7C298eed4fbcb74aef3d6a08d50b868a08%7Ca6
> > 52971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636427594843039082&sdata=2Qq%
> > 2FptWHbOt5jWEAemcU0kkbCOQ168RuagK5Mlpv8So%3D&reserved=0
> >
> > V4:
> > * remove forked secondary mode
> >
> > V3:
> > * add cover letter
> > * add dependency notes
> >
> > V2:
> > * split into multiple patches
> > * support forked secondary process
> > * add secondary process ethdev operations
> > * rebase on latest rdma-core upstream api
> >
> >
> > Xueming Li (5):
> > net/mlx5: change eth device reference for secondary process
> > net/mlx5: install a socket to exchange a file descriptor
> > net/mlx5: allocate verbs object into shared memory
> > net/mlx5: add operations for secondary process
> > net/mlx5: multi-process document update
>
> Hi Xueming,
>
> I guess all dependent patches merged into next-net for this patchset, so
> can get this one.
>
> But this is causing merge conflicts on latest next-net, can you please
> re-base patchset and sent a new version?
>
> Thanks,
> Ferruh
Hi Ferruh,
Rebased and v5 uploaded, thanks very much.
Best Regards,
Xueming
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v5 2/5] net/mlx5: install a socket to exchange a file descriptor
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 2/5] net/mlx5: install a socket to exchange a file descriptor Xueming Li
@ 2017-10-06 18:21 ` Ferruh Yigit
0 siblings, 0 replies; 41+ messages in thread
From: Ferruh Yigit @ 2017-10-06 18:21 UTC (permalink / raw)
To: Xueming Li, Nelio Laranjeiro; +Cc: dev
On 10/6/2017 4:45 PM, Xueming Li wrote:
> Use a unix socket to get back the communication channel with the Kernel
> driver from the primary process, this is necessary to remap those pages
> in the secondary process memory space and thus use the same Tx queues.
>
> This is only supported from rdma-core (v15).
>
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> Signed-off-by: Xueming Li <xuemingl@mellanox.com>
<...>
> +int
> +priv_tx_uar_remap(struct priv *priv, int fd)
> +{
> + unsigned int i, j;
> + uintptr_t pages[priv->txqs_n];
> + unsigned int pages_n = 0;
> + uintptr_t uar_va;
> + void *addr;
> + struct txq *txq;
> + struct txq_ctrl *txq_ctrl;
> + int already_mapped;
> + size_t page_size = sysconf(_SC_PAGESIZE);
> +
> + /*
> + * As rdma-core, UARs are mapped in size of OS page size.
> + * Use aligned address to avoid duplicate mmap.
> + * Ref to libmlx5 function: mlx5_init_context()
> + */
> + for (i = 0; i != priv->txqs_n; ++i) {
> + txq = (*priv->txqs)[i];
> + txq_ctrl = container_of(txq, struct txq_ctrl, txq);
> + uar_va = (uintptr_t)txq_ctrl->txq.bf_reg;
> + uar_va = RTE_ALIGN_FLOOR(uar_va, page_size);
> + already_mapped = 0;
> + for (j = 0; j != pages_n; ++j) {
> + if (pages[j] == uar_va) {
ICC generates following warning [1], but it looks like false positive, I
will disable this warning for this file for ICC while applying, please
double check final commit.
[1]
.../drivers/net/mlx5/mlx5_txq.c(608): error #3656: variable "pages" may
be used before its value is set
if (pages[j] == uar_va) {
^
> + already_mapped = 1;
> + break;
> + }
> + }
> + if (already_mapped)
> + continue;
> + pages[pages_n++] = uar_va;
> + addr = mmap((void *)uar_va, page_size,
> + PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
> + txq_ctrl->uar_mmap_offset);
> + if (addr != (void *)uar_va) {
> + ERROR("call to mmap failed on UAR for txq %d\n", i);
> + return -1;
> + }
> + }
> + return 0;
> +}
>
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/5] net/mlx5 multi-process support
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 0/5] net/mlx5 multi-process support Xueming Li
@ 2017-10-06 18:21 ` Ferruh Yigit
0 siblings, 0 replies; 41+ messages in thread
From: Ferruh Yigit @ 2017-10-06 18:21 UTC (permalink / raw)
To: Xueming Li, Nelio Laranjeiro; +Cc: dev
On 10/6/2017 4:45 PM, Xueming Li wrote:
>
> This patchset enhances Mellanox multi-process by supporting all multi-process
> examples, also support reading ethdev (x)stats in secondary process.
>
> V5:
> * rebase on latest upstream code, patch 2/5 mlx5_ethdev.c updated.
>
> V4:
> * remove forked secondary mode
>
> V3:
> * add cover letter
> * add dependency notes
>
> V2:
> * split into multiple patches
> * support forked secondary process
> * add secondary process ethdev operations
> * rebase on latest rdma-core upstream api
>
> Xueming Li (5):
> net/mlx5: change eth device reference for secondary process
> net/mlx5: install a socket to exchange a file descriptor
> net/mlx5: allocate verbs object into shared memory
> net/mlx5: add operations for secondary process
> net/mlx5: multi-process document update
Series applied to dpdk-next-net/master, thanks.
^ permalink raw reply [flat|nested] 41+ messages in thread
end of thread, other threads:[~2017-10-06 18:21 UTC | newest]
Thread overview: 41+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-24 14:03 [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Xueming Li
2017-08-24 14:03 ` [dpdk-dev] [PATCH v1 2/2] net/mlx5: add multiple process support Xueming Li
2017-08-25 7:27 ` Nélio Laranjeiro
2017-08-25 6:52 ` [dpdk-dev] [PATCH v1 1/2] net/mlx5: change eth device reference for secondary process Nélio Laranjeiro
2017-08-25 7:15 ` Xueming(Steven) Li
2017-08-25 7:32 ` Nélio Laranjeiro
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 1/6] " Xueming Li
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 2/6] net/mlx5: install a socket to exchange a file descriptor Xueming Li
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 3/6] net/mlx5: allocate verbs object into shared memory Xueming Li
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 4/6] net/mlx5: remove verbs fork check Xueming Li
2017-09-15 15:59 ` [dpdk-dev] [PATCH v2 5/6] net/mlx5: add operations for secondary process Xueming Li
2017-09-15 16:00 ` [dpdk-dev] [PATCH v2 6/6] net/mlx5: multi-process document update Xueming Li
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 0/6] net/mlx5 multi-process support Xueming Li
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 1/6] net/mlx5: change eth device reference for secondary process Xueming Li
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 2/6] net/mlx5: install a socket to exchange a file descriptor Xueming Li
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 3/6] net/mlx5: allocate verbs object into shared memory Xueming Li
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 4/6] net/mlx5: remove verbs fork check Xueming Li
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 5/6] net/mlx5: add operations for secondary process Xueming Li
2017-09-18 14:36 ` [dpdk-dev] [PATCH v3 6/6] net/mlx5: multi-process document update Xueming Li
2017-09-18 18:47 ` Mcnamara, John
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 0/5] net/mlx5 multi-process support Xueming Li
2017-09-19 14:41 ` Nélio Laranjeiro
2017-09-19 14:48 ` Ferruh Yigit
2017-09-19 15:02 ` Xueming(Steven) Li
2017-09-20 8:07 ` Nélio Laranjeiro
2017-10-05 0:17 ` Ferruh Yigit
2017-10-06 15:52 ` Xueming(Steven) Li
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 1/5] net/mlx5: change eth device reference for secondary process Xueming Li
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 2/5] net/mlx5: install a socket to exchange a file descriptor Xueming Li
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 3/5] net/mlx5: allocate verbs object into shared memory Xueming Li
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 4/5] net/mlx5: add operations for secondary process Xueming Li
2017-09-19 14:31 ` [dpdk-dev] [PATCH v4 5/5] net/mlx5: multi-process document update Xueming Li
2017-09-19 16:16 ` Mcnamara, John
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 0/5] net/mlx5 multi-process support Xueming Li
2017-10-06 18:21 ` Ferruh Yigit
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 1/5] net/mlx5: change eth device reference for secondary process Xueming Li
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 2/5] net/mlx5: install a socket to exchange a file descriptor Xueming Li
2017-10-06 18:21 ` Ferruh Yigit
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 3/5] net/mlx5: allocate verbs object into shared memory Xueming Li
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 4/5] net/mlx5: add operations for secondary process Xueming Li
2017-10-06 15:45 ` [dpdk-dev] [PATCH v5 5/5] net/mlx5: multi-process document update Xueming Li
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).