* [dpdk-dev] [PATCH 0/3] net/mlx4: add secondary process support
@ 2019-03-07 7:39 Yongseok Koh
2019-03-07 7:39 ` [dpdk-dev] [PATCH 1/3] net/mlx4: change device reference for secondary process Yongseok Koh
` (4 more replies)
0 siblings, 5 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-03-07 7:39 UTC (permalink / raw)
To: shahafs; +Cc: dev
RFC:
https://mails.dpdk.org/archives/dev/2019-March/125516.html
Yongseok Koh (3):
net/mlx4: change device reference for secondary process
net/mlx4: add external allocator for Verbs object
net/mlx4: add secondary process support
doc/guides/nics/features/mlx4.ini | 1 +
doc/guides/nics/mlx4.rst | 10 +
drivers/net/mlx4/Makefile | 11 +
drivers/net/mlx4/meson.build | 13 ++
drivers/net/mlx4/mlx4.c | 449 ++++++++++++++++++++++++++++++++++++--
drivers/net/mlx4/mlx4.h | 82 ++++++-
drivers/net/mlx4/mlx4_flow.c | 39 ++--
drivers/net/mlx4/mlx4_intr.c | 20 +-
drivers/net/mlx4/mlx4_mp.c | 278 +++++++++++++++++++++++
drivers/net/mlx4/mlx4_mr.c | 40 +++-
drivers/net/mlx4/mlx4_prm.h | 4 +-
drivers/net/mlx4/mlx4_rxq.c | 40 ++--
drivers/net/mlx4/mlx4_rxtx.c | 2 +
drivers/net/mlx4/mlx4_rxtx.h | 1 +
drivers/net/mlx4/mlx4_txq.c | 124 ++++++++++-
15 files changed, 1034 insertions(+), 80 deletions(-)
create mode 100644 drivers/net/mlx4/mlx4_mp.c
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH 1/3] net/mlx4: change device reference for secondary process
2019-03-07 7:39 [dpdk-dev] [PATCH 0/3] net/mlx4: add secondary process support Yongseok Koh
@ 2019-03-07 7:39 ` Yongseok Koh
2019-03-07 7:39 ` [dpdk-dev] [PATCH 2/3] net/mlx4: add external allocator for Verbs object Yongseok Koh
` (3 subsequent siblings)
4 siblings, 0 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-03-07 7:39 UTC (permalink / raw)
To: shahafs; +Cc: dev, stable
rte_eth_devices[] is not shared between primary and secondary process, but
a static array to each process. The reverse pointer of device (priv->dev)
becomes invalid if mlx4 supports secondary process. Instead, priv has the
pointer to shared data of the device,
struct rte_eth_dev_data *dev_data;
Two macros are added,
#define PORT_ID(priv) ((priv)->dev_data->port_id)
#define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
Cc: stable@dpdk.org
Suggested-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
drivers/net/mlx4/mlx4.c | 4 ++--
drivers/net/mlx4/mlx4.h | 5 ++++-
drivers/net/mlx4/mlx4_flow.c | 39 +++++++++++++++++++++------------------
drivers/net/mlx4/mlx4_intr.c | 20 ++++++++++----------
drivers/net/mlx4/mlx4_mr.c | 8 ++++----
drivers/net/mlx4/mlx4_rxq.c | 36 +++++++++++++++++++-----------------
drivers/net/mlx4/mlx4_txq.c | 8 ++++----
7 files changed, 64 insertions(+), 56 deletions(-)
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 5ef2e7f41e..bb6ab8ec6e 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -753,11 +753,11 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
* handled by rte_intr_rx_ctl().
*/
eth_dev->intr_handle = &priv->intr_handle;
- priv->dev = eth_dev;
+ priv->dev_data = eth_dev->data;
eth_dev->dev_ops = &mlx4_dev_ops;
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
- mlx4_dev_set_link_up(priv->dev);
+ mlx4_dev_set_link_up(eth_dev);
/* Update link status once if waiting for LSC. */
if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
mlx4_link_update(eth_dev, 0);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 7ac49ca672..51566caf7f 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -79,7 +79,7 @@ LIST_HEAD(mlx4_mr_list, mlx4_mr);
struct mlx4_priv {
LIST_ENTRY(mlx4_priv) mem_event_cb;
/**< Called by memory event callback. */
- struct rte_eth_dev *dev; /**< Ethernet device. */
+ struct rte_eth_dev_data *dev_data; /* Pointer to device data. */
struct ibv_context *ctx; /**< Verbs context. */
struct ibv_device_attr device_attr; /**< Device properties. */
struct ibv_pd *pd; /**< Protection Domain. */
@@ -113,6 +113,9 @@ struct mlx4_priv {
/**< Configured MAC addresses. Unused entries are zeroed. */
};
+#define PORT_ID(priv) ((priv)->dev_data->port_id)
+#define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
+
/* mlx4_ethdev.c */
int mlx4_get_ifname(const struct mlx4_priv *priv, char (*ifname)[IF_NAMESIZE]);
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index f4df4ab1fb..038dc71d35 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -773,7 +773,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
if (flow->rss)
break;
queue = action->conf;
- if (queue->index >= priv->dev->data->nb_rx_queues) {
+ if (queue->index >= ETH_DEV(priv)->data->nb_rx_queues) {
msg = "queue target index beyond number of"
" configured Rx queues";
goto exit_action_not_supported;
@@ -802,7 +802,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
/* Sanity checks. */
for (i = 0; i < rss->queue_num; ++i)
if (rss->queue[i] >=
- priv->dev->data->nb_rx_queues)
+ ETH_DEV(priv)->data->nb_rx_queues)
break;
if (i != rss->queue_num) {
msg = "queue index target beyond number of"
@@ -1072,8 +1072,8 @@ mlx4_flow_toggle(struct mlx4_priv *priv,
/* Stop at the first nonexistent target queue. */
for (i = 0; i != rss->queues; ++i)
if (rss->queue_id[i] >=
- priv->dev->data->nb_rx_queues ||
- !priv->dev->data->rx_queues[rss->queue_id[i]]) {
+ ETH_DEV(priv)->data->nb_rx_queues ||
+ !ETH_DEV(priv)->data->rx_queues[rss->queue_id[i]]) {
missing = 1;
break;
}
@@ -1258,7 +1258,7 @@ static uint16_t
mlx4_flow_internal_next_vlan(struct mlx4_priv *priv, uint16_t vlan)
{
while (vlan < 4096) {
- if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
+ if (ETH_DEV(priv)->data->vlan_filter_conf.ids[vlan / 64] &
(UINT64_C(1) << (vlan % 64)))
return vlan;
++vlan;
@@ -1335,7 +1335,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
* get RSS by default.
*/
uint32_t queues =
- rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1;
+ rte_align32pow2(ETH_DEV(priv)->data->nb_rx_queues + 1) >> 1;
uint16_t queue[queues];
struct rte_flow_action_rss action_rss = {
.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
@@ -1357,9 +1357,9 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
};
struct ether_addr *rule_mac = ð_spec.dst;
rte_be16_t *rule_vlan =
- (priv->dev->data->dev_conf.rxmode.offloads &
+ (ETH_DEV(priv)->data->dev_conf.rxmode.offloads &
DEV_RX_OFFLOAD_VLAN_FILTER) &&
- !priv->dev->data->promiscuous ?
+ !ETH_DEV(priv)->data->promiscuous ?
&vlan_spec.tci :
NULL;
uint16_t vlan = 0;
@@ -1439,7 +1439,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
if (!flow || !flow->internal) {
/* Not found, create a new flow rule. */
memcpy(rule_mac, mac, sizeof(*mac));
- flow = mlx4_flow_create(priv->dev, &attr, pattern,
+ flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
actions, error);
if (!flow) {
err = -rte_errno;
@@ -1455,15 +1455,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
goto next_vlan;
}
/* Take care of promiscuous and all multicast flow rules. */
- if (priv->dev->data->promiscuous || priv->dev->data->all_multicast) {
+ if (ETH_DEV(priv)->data->promiscuous ||
+ ETH_DEV(priv)->data->all_multicast) {
for (flow = LIST_FIRST(&priv->flows);
flow && flow->internal;
flow = LIST_NEXT(flow, next)) {
- if (priv->dev->data->promiscuous) {
+ if (ETH_DEV(priv)->data->promiscuous) {
if (flow->promisc)
break;
} else {
- assert(priv->dev->data->all_multicast);
+ assert(ETH_DEV(priv)->data->all_multicast);
if (flow->allmulti)
break;
}
@@ -1477,16 +1478,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
}
if (!flow || !flow->internal) {
/* Not found, create a new flow rule. */
- if (priv->dev->data->promiscuous) {
+ if (ETH_DEV(priv)->data->promiscuous) {
pattern[1].spec = NULL;
pattern[1].mask = NULL;
} else {
- assert(priv->dev->data->all_multicast);
+ assert(ETH_DEV(priv)->data->all_multicast);
pattern[1].spec = ð_allmulti;
pattern[1].mask = ð_allmulti;
}
pattern[2] = pattern[3];
- flow = mlx4_flow_create(priv->dev, &attr, pattern,
+ flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
actions, error);
if (!flow) {
err = -rte_errno;
@@ -1503,7 +1504,8 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
struct rte_flow *next = LIST_NEXT(flow, next);
if (!flow->select)
- claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
+ claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
+ error));
else
flow->select = 0;
flow = next;
@@ -1541,7 +1543,8 @@ mlx4_flow_sync(struct mlx4_priv *priv, struct rte_flow_error *error)
for (flow = LIST_FIRST(&priv->flows);
flow && flow->internal;
flow = LIST_FIRST(&priv->flows))
- claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
+ claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
+ error));
} else {
/* Refresh internal rules. */
ret = mlx4_flow_internal(priv, error);
@@ -1574,7 +1577,7 @@ mlx4_flow_clean(struct mlx4_priv *priv)
struct rte_flow *flow;
while ((flow = LIST_FIRST(&priv->flows)))
- mlx4_flow_destroy(priv->dev, flow, NULL);
+ mlx4_flow_destroy(ETH_DEV(priv), flow, NULL);
assert(LIST_EMPTY(&priv->rss));
}
diff --git a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c
index ec91242196..4f33526755 100644
--- a/drivers/net/mlx4/mlx4_intr.c
+++ b/drivers/net/mlx4/mlx4_intr.c
@@ -65,7 +65,7 @@ static int
mlx4_rx_intr_vec_enable(struct mlx4_priv *priv)
{
unsigned int i;
- unsigned int rxqs_n = priv->dev->data->nb_rx_queues;
+ unsigned int rxqs_n = ETH_DEV(priv)->data->nb_rx_queues;
unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
unsigned int count = 0;
struct rte_intr_handle *intr_handle = &priv->intr_handle;
@@ -79,7 +79,7 @@ mlx4_rx_intr_vec_enable(struct mlx4_priv *priv)
return -rte_errno;
}
for (i = 0; i != n; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
/* Skip queues that cannot request interrupts. */
if (!rxq || !rxq->channel) {
@@ -120,12 +120,12 @@ static void
mlx4_link_status_alarm(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
assert(priv->intr_alarm == 1);
priv->intr_alarm = 0;
if (intr_conf->lsc && !mlx4_link_status_check(priv))
- _rte_eth_dev_callback_process(priv->dev,
+ _rte_eth_dev_callback_process(ETH_DEV(priv),
RTE_ETH_EVENT_INTR_LSC,
NULL);
}
@@ -145,8 +145,8 @@ mlx4_link_status_alarm(struct mlx4_priv *priv)
static int
mlx4_link_status_check(struct mlx4_priv *priv)
{
- struct rte_eth_link *link = &priv->dev->data->dev_link;
- int ret = mlx4_link_update(priv->dev, 0);
+ struct rte_eth_link *link = Ð_DEV(priv)->data->dev_link;
+ int ret = mlx4_link_update(ETH_DEV(priv), 0);
if (ret)
return ret;
@@ -185,7 +185,7 @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
uint32_t caught[RTE_DIM(type)] = { 0 };
struct ibv_async_event event;
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
unsigned int i;
/* Read all message and acknowledge them. */
@@ -208,7 +208,7 @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
}
for (i = 0; i != RTE_DIM(caught); ++i)
if (caught[i])
- _rte_eth_dev_callback_process(priv->dev, type[i],
+ _rte_eth_dev_callback_process(ETH_DEV(priv), type[i],
NULL);
}
@@ -282,7 +282,7 @@ int
mlx4_intr_install(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
int rc;
mlx4_intr_uninstall(priv);
@@ -381,7 +381,7 @@ int
mlx4_rxq_intr_enable(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
if (intr_conf->rxq && mlx4_rx_intr_vec_enable(priv) < 0)
goto error;
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
index 4376ad0b60..e4be46ab2a 100644
--- a/drivers/net/mlx4/mlx4_mr.c
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -896,7 +896,7 @@ mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
rte_rwlock_read_lock(&mlx4_mem_event_rwlock);
/* Iterate all the existing mlx4 devices. */
LIST_FOREACH(priv, &mlx4_mem_event_cb_list, mem_event_cb)
- mlx4_mr_mem_event_free_cb(priv->dev, addr, len);
+ mlx4_mr_mem_event_free_cb(ETH_DEV(priv), addr, len);
rte_rwlock_read_unlock(&mlx4_mem_event_rwlock);
break;
case RTE_MEM_EVENT_ALLOC:
@@ -1028,7 +1028,7 @@ mlx4_rx_addr2mr_bh(struct rxq *rxq, uintptr_t addr)
DEBUG("Rx queue %u: miss on top-half, mru=%u, head=%u, addr=%p",
rxq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
- return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
+ return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
}
/**
@@ -1050,7 +1050,7 @@ mlx4_tx_addr2mr_bh(struct txq *txq, uintptr_t addr)
DEBUG("Tx queue %u: miss on top-half, mru=%u, head=%u, addr=%p",
txq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
- return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
+ return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
}
/**
@@ -1225,7 +1225,7 @@ mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr, struct rte_mempool *mp)
struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
struct mlx4_priv *priv = txq->priv;
- mlx4_mr_update_ext_mp(priv->dev, mr_ctrl, mp);
+ mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
return mlx4_tx_addr2mr_bh(txq, addr);
}
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 3782c6baab..50f33eb0c5 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -176,6 +176,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
struct ibv_wq *ind_tbl[rss->queues];
struct mlx4_priv *priv = rss->priv;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
const char *msg;
unsigned int i = 0;
int ret;
@@ -189,8 +190,8 @@ mlx4_rss_attach(struct mlx4_rss *rss)
uint16_t id = rss->queue_id[i];
struct rxq *rxq = NULL;
- if (id < priv->dev->data->nb_rx_queues)
- rxq = priv->dev->data->rx_queues[id];
+ if (id < dev->data->nb_rx_queues)
+ rxq = dev->data->rx_queues[id];
if (!rxq) {
ret = EINVAL;
msg = "RSS target queue is not configured";
@@ -269,7 +270,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
rss->ind = NULL;
}
while (i--)
- mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
+ mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]);
ERROR("mlx4: %s", msg);
--rss->usecnt;
rte_errno = ret;
@@ -291,6 +292,7 @@ void
mlx4_rss_detach(struct mlx4_rss *rss)
{
struct mlx4_priv *priv = rss->priv;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
unsigned int i;
assert(rss->refcnt);
@@ -303,7 +305,7 @@ mlx4_rss_detach(struct mlx4_rss *rss)
claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind));
rss->ind = NULL;
for (i = 0; i != rss->queues; ++i)
- mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
+ mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]);
}
/**
@@ -329,7 +331,7 @@ mlx4_rss_detach(struct mlx4_rss *rss)
int
mlx4_rss_init(struct mlx4_priv *priv)
{
- struct rte_eth_dev *dev = priv->dev;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues);
uint32_t wq_num_prev = 0;
const char *msg;
@@ -338,7 +340,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
if (priv->rss_init)
return 0;
- if (priv->dev->data->nb_rx_queues > priv->hw_rss_max_qps) {
+ if (ETH_DEV(priv)->data->nb_rx_queues > priv->hw_rss_max_qps) {
ERROR("RSS does not support more than %d queues",
priv->hw_rss_max_qps);
rte_errno = EINVAL;
@@ -356,8 +358,8 @@ mlx4_rss_init(struct mlx4_priv *priv)
rte_errno = ret;
return -ret;
}
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
struct ibv_cq *cq;
struct ibv_wq *wq;
uint32_t wq_num;
@@ -432,7 +434,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
ERROR("cannot initialize common RSS resources (queue %u): %s: %s",
i, msg, strerror(ret));
while (i--) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
if (rxq)
mlx4_rxq_detach(rxq);
@@ -457,8 +459,8 @@ mlx4_rss_deinit(struct mlx4_priv *priv)
if (!priv->rss_init)
return;
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
if (rxq) {
assert(rxq->usecnt == 1);
@@ -494,7 +496,7 @@ mlx4_rxq_attach(struct rxq *rxq)
}
struct mlx4_priv *priv = rxq->priv;
- struct rte_eth_dev *dev = priv->dev;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
const uint32_t elts_n = 1 << rxq->elts_n;
const uint32_t sges_n = 1 << rxq->sges_n;
struct rte_mbuf *(*elts)[elts_n] = rxq->elts;
@@ -561,7 +563,7 @@ mlx4_rxq_attach(struct rxq *rxq)
}
/* Pre-register Rx mempool. */
DEBUG("port %u Rx queue %u registering mp %s having %u chunks",
- priv->dev->data->port_id, rxq->stats.idx,
+ ETH_DEV(priv)->data->port_id, rxq->stats.idx,
rxq->mp->name, rxq->mp->nb_mem_chunks);
mlx4_mr_update_mp(dev, &rxq->mr_ctrl, rxq->mp);
wqes = (volatile struct mlx4_wqe_data_seg (*)[])
@@ -917,11 +919,11 @@ mlx4_rx_queue_release(void *dpdk_rxq)
if (rxq == NULL)
return;
priv = rxq->priv;
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i)
- if (priv->dev->data->rx_queues[i] == rxq) {
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i)
+ if (ETH_DEV(priv)->data->rx_queues[i] == rxq) {
DEBUG("%p: removing Rx queue %p from list",
- (void *)priv->dev, (void *)rxq);
- priv->dev->data->rx_queues[i] = NULL;
+ (void *)ETH_DEV(priv), (void *)rxq);
+ ETH_DEV(priv)->data->rx_queues[i] = NULL;
break;
}
assert(!rxq->cq);
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 8142775fc4..352700820d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -357,11 +357,11 @@ mlx4_tx_queue_release(void *dpdk_txq)
if (txq == NULL)
return;
priv = txq->priv;
- for (i = 0; i != priv->dev->data->nb_tx_queues; ++i)
- if (priv->dev->data->tx_queues[i] == txq) {
+ for (i = 0; i != ETH_DEV(priv)->data->nb_tx_queues; ++i)
+ if (ETH_DEV(priv)->data->tx_queues[i] == txq) {
DEBUG("%p: removing Tx queue %p from list",
- (void *)priv->dev, (void *)txq);
- priv->dev->data->tx_queues[i] = NULL;
+ (void *)ETH_DEV(priv), (void *)txq);
+ ETH_DEV(priv)->data->tx_queues[i] = NULL;
break;
}
mlx4_txq_free_elts(txq);
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH 2/3] net/mlx4: add external allocator for Verbs object
2019-03-07 7:39 [dpdk-dev] [PATCH 0/3] net/mlx4: add secondary process support Yongseok Koh
2019-03-07 7:39 ` [dpdk-dev] [PATCH 1/3] net/mlx4: change device reference for secondary process Yongseok Koh
@ 2019-03-07 7:39 ` Yongseok Koh
2019-03-07 7:39 ` [dpdk-dev] [PATCH 3/3] net/mlx4: add secondary process support Yongseok Koh
` (2 subsequent siblings)
4 siblings, 0 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-03-07 7:39 UTC (permalink / raw)
To: shahafs; +Cc: dev
To support secondary process, the memory allocated by library such as
completion rings (CQ) and buffer rings (WQ) must be manageable by EAL, in
order to share it with secondary processes. With new changes in rdma-core
and kernel driver, it is possible to provide an external allocator to the
library layer for this purpose. All such resources will now be allocated
within DPDK framework.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
drivers/net/mlx4/Makefile | 5 ++++
drivers/net/mlx4/meson.build | 10 +++++++
drivers/net/mlx4/mlx4.c | 67 ++++++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4.h | 20 +++++++++++++
drivers/net/mlx4/mlx4_rxq.c | 4 +++
drivers/net/mlx4/mlx4_txq.c | 6 ++--
6 files changed, 110 insertions(+), 2 deletions(-)
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 1f1b927484..b527efd625 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -88,6 +88,11 @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
$Q $(RM) -f -- '$@'
$Q : > '$@'
$Q sh -- '$<' '$@' \
+ HAVE_IBV_MLX4_BUF_ALLOCATORS \
+ infiniband/mlx4dv.h \
+ enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_IBV_MLX4_WQE_LSO_SEG \
infiniband/mlx4dv.h \
type 'struct mlx4_wqe_lso_seg' \
diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
index b4f9672e73..650e2c8fbc 100644
--- a/drivers/net/mlx4/meson.build
+++ b/drivers/net/mlx4/meson.build
@@ -70,7 +70,17 @@ if build
[ 'HAVE_IBV_MLX4_WQE_LSO_SEG', 'infiniband/mlx4dv.h',
'struct mlx4_wqe_lso_seg', 'mss_hdr_size' ],
]
+ # input array for meson symbol search:
+ # [ "MACRO to define if found", "header for the search",
+ # "symbol to search" ]
+ has_sym_args = [
+ [ 'HAVE_IBV_MLX4_BUF_ALLOCATORS', 'infiniband/mlx4dv.h',
+ 'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
+ ]
config = configuration_data()
+ foreach arg:has_sym_args
+ config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))
+ endforeach
foreach arg:has_member_args
file_prefix = '#include<' + arg[1] + '>'
config.set(arg[0], cc.has_member(arg[2], arg[3],
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index bb6ab8ec6e..0e0b035df0 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -69,6 +69,62 @@ const char *pmd_mlx4_init_params[] = {
static void mlx4_dev_stop(struct rte_eth_dev *dev);
+#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
+/**
+ * Verbs callback to allocate a memory. This function should allocate the space
+ * according to the size provided residing inside a huge page.
+ * Please note that all allocation must respect the alignment from libmlx4
+ * (i.e. currently sysconf(_SC_PAGESIZE)).
+ *
+ * @param[in] size
+ * The size in bytes of the memory to allocate.
+ * @param[in] data
+ * A pointer to the callback data.
+ *
+ * @return
+ * Allocated buffer, NULL otherwise and rte_errno is set.
+ */
+static void *
+mlx4_alloc_verbs_buf(size_t size, void *data)
+{
+ struct mlx4_priv *priv = data;
+ void *ret;
+ size_t alignment = sysconf(_SC_PAGESIZE);
+ unsigned int socket = SOCKET_ID_ANY;
+
+ if (priv->verbs_alloc_ctx.type == MLX4_VERBS_ALLOC_TYPE_TX_QUEUE) {
+ const struct txq *txq = priv->verbs_alloc_ctx.obj;
+
+ socket = txq->socket;
+ } else if (priv->verbs_alloc_ctx.type ==
+ MLX4_VERBS_ALLOC_TYPE_RX_QUEUE) {
+ const struct rxq *rxq = priv->verbs_alloc_ctx.obj;
+
+ socket = rxq->socket;
+ }
+ assert(data != NULL);
+ ret = rte_malloc_socket(__func__, size, alignment, socket);
+ if (!ret && size)
+ rte_errno = ENOMEM;
+ return ret;
+}
+
+/**
+ * Verbs callback to free a memory.
+ *
+ * @param[in] ptr
+ * A pointer to the memory to free.
+ * @param[in] data
+ * A pointer to the callback data.
+ */
+static void
+mlx4_free_verbs_buf(void *ptr, void *data __rte_unused)
+{
+ assert(data != NULL);
+ rte_free(ptr);
+}
+#endif
+
/**
* DPDK callback for Ethernet device configuration.
*
@@ -755,6 +811,17 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
eth_dev->intr_handle = &priv->intr_handle;
priv->dev_data = eth_dev->data;
eth_dev->dev_ops = &mlx4_dev_ops;
+#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
+ /* Hint libmlx4 to use PMD allocator for data plane resources */
+ struct mlx4dv_ctx_allocators alctr = {
+ .alloc = &mlx4_alloc_verbs_buf,
+ .free = &mlx4_free_verbs_buf,
+ .data = priv,
+ };
+ mlx4_glue->dv_set_context_attr
+ (ctx, MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS,
+ (void *)((uintptr_t)&alctr));
+#endif
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
mlx4_dev_set_link_up(eth_dev);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 51566caf7f..d43e05ea74 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -72,6 +72,24 @@ struct rxq;
struct txq;
struct rte_flow;
+/**
+ * Type of objet being allocated.
+ */
+enum mlx4_verbs_alloc_type {
+ MLX4_VERBS_ALLOC_TYPE_NONE,
+ MLX4_VERBS_ALLOC_TYPE_TX_QUEUE,
+ MLX4_VERBS_ALLOC_TYPE_RX_QUEUE,
+};
+
+/**
+ * Verbs allocator needs a context to know in the callback which kind of
+ * resources it is allocating.
+ */
+struct mlx4_verbs_alloc_ctx {
+ enum mlx4_verbs_alloc_type type; /* Kind of object being allocated. */
+ const void *obj; /* Pointer to the DPDK object. */
+};
+
LIST_HEAD(mlx4_dev_list, mlx4_priv);
LIST_HEAD(mlx4_mr_list, mlx4_mr);
@@ -111,6 +129,8 @@ struct mlx4_priv {
LIST_HEAD(, rte_flow) flows; /**< Configured flow rule handles. */
struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
/**< Configured MAC addresses. Unused entries are zeroed. */
+ struct mlx4_verbs_alloc_ctx verbs_alloc_ctx;
+ /**< Context for Verbs allocator. */
};
#define PORT_ID(priv) ((priv)->dev_data->port_id)
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 50f33eb0c5..f45c1ff85c 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -513,6 +513,8 @@ mlx4_rxq_attach(struct rxq *rxq)
int ret;
assert(rte_is_power_of_2(elts_n));
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_RX_QUEUE;
+ priv->verbs_alloc_ctx.obj = rxq;
cq = mlx4_glue->create_cq(priv->ctx, elts_n / sges_n, NULL,
rxq->channel, 0);
if (!cq) {
@@ -620,6 +622,7 @@ mlx4_rxq_attach(struct rxq *rxq)
rxq->rq_ci = elts_n / sges_n;
rte_wmb();
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return 0;
error:
if (wq)
@@ -630,6 +633,7 @@ mlx4_rxq_attach(struct rxq *rxq)
rte_errno = ret;
ERROR("error while attaching Rx queue %p: %s: %s",
(void *)rxq, msg, strerror(ret));
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return -ret;
}
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 352700820d..2dc198e77f 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -177,10 +177,8 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
uint64_t offloads;
offloads = conf->offloads | dev->data->dev_conf.txmode.offloads;
-
DEBUG("%p: configuring queue %u for %u descriptors",
(void *)dev, idx, desc);
-
if (idx >= dev->data->nb_tx_queues) {
rte_errno = EOVERFLOW;
ERROR("%p: queue index out of range (%u >= %u)",
@@ -241,6 +239,8 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
.lb = !!priv->vf,
.bounce_buf = bounce_buf,
};
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_TX_QUEUE;
+ priv->verbs_alloc_ctx.obj = txq;
txq->cq = mlx4_glue->create_cq(priv->ctx, desc, NULL, NULL, 0);
if (!txq->cq) {
rte_errno = ENOMEM;
@@ -331,6 +331,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
txq->mr_ctrl.dev_gen_ptr = &priv->mr.dev_gen;
DEBUG("%p: adding Tx queue %p to list", (void *)dev, (void *)txq);
dev->data->tx_queues[idx] = txq;
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return 0;
error:
dev->data->tx_queues[idx] = NULL;
@@ -338,6 +339,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
mlx4_tx_queue_release(txq);
rte_errno = ret;
assert(rte_errno > 0);
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return -rte_errno;
}
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH 3/3] net/mlx4: add secondary process support
2019-03-07 7:39 [dpdk-dev] [PATCH 0/3] net/mlx4: add secondary process support Yongseok Koh
2019-03-07 7:39 ` [dpdk-dev] [PATCH 1/3] net/mlx4: change device reference for secondary process Yongseok Koh
2019-03-07 7:39 ` [dpdk-dev] [PATCH 2/3] net/mlx4: add external allocator for Verbs object Yongseok Koh
@ 2019-03-07 7:39 ` Yongseok Koh
2019-03-25 19:17 ` [dpdk-dev] [PATCH v2 0/3] " Yongseok Koh
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 0/3] " Yongseok Koh
4 siblings, 0 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-03-07 7:39 UTC (permalink / raw)
To: shahafs; +Cc: dev
In order to support secondary process, a few features are required.
a) rdma-core library should allocate device resources using DPDK's memory
allocator.
b) UAR should be remapped for secondary processes. Currently, in order not
to use different data structure for secondary processes, PMD tries to
reserve identical virtual address space for both primary and secondary
processes.
c) IPC channel is necessary, which can be easily set with rte_mp APIs.
Through the channel, Verbs command FD is delivered to the secondary
process and the device stop/start event is also broadcast from primary
process.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
doc/guides/nics/features/mlx4.ini | 1 +
doc/guides/nics/mlx4.rst | 10 +
drivers/net/mlx4/Makefile | 6 +
drivers/net/mlx4/meson.build | 3 +
drivers/net/mlx4/mlx4.c | 378 ++++++++++++++++++++++++++++++++++++--
drivers/net/mlx4/mlx4.h | 57 ++++++
drivers/net/mlx4/mlx4_mp.c | 278 ++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4_mr.c | 32 +++-
drivers/net/mlx4/mlx4_prm.h | 4 +-
drivers/net/mlx4/mlx4_rxtx.c | 2 +
drivers/net/mlx4/mlx4_rxtx.h | 1 +
drivers/net/mlx4/mlx4_txq.c | 110 +++++++++++
12 files changed, 860 insertions(+), 22 deletions(-)
create mode 100644 drivers/net/mlx4/mlx4_mp.c
diff --git a/doc/guides/nics/features/mlx4.ini b/doc/guides/nics/features/mlx4.ini
index a211aef332..4502aa2a87 100644
--- a/doc/guides/nics/features/mlx4.ini
+++ b/doc/guides/nics/features/mlx4.ini
@@ -29,6 +29,7 @@ Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
FW version = Y
+Multiprocess aware = Y
Other kdrv = Y
Power8 = Y
x86-32 = Y
diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst
index 4ad361a2c2..cd34838f41 100644
--- a/doc/guides/nics/mlx4.rst
+++ b/doc/guides/nics/mlx4.rst
@@ -145,6 +145,16 @@ below.
Limitations
-----------
+- For secondary process:
+
+ - Forked secondary process not supported.
+ - All mempools must be initialized before rte_eth_dev_start().
+ - External memory unregistered in EAL memseg list cannot be used for DMA
+ unless such memory has been registered by ``mlx4_mr_update_ext_mp()`` in
+ primary process and remapped to the same virtual address in secondary
+ process. If the external memory is registered by primary process but has
+ different virtual address in secondary process, unexpected error may happen.
+
- CRC stripping is supported by default and always reported as "true".
The ability to enable/disable CRC stripping requires OFED version
4.3-1.5.0.0 and above or rdma-core version v18 and above.
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index b527efd625..8126b0dfc6 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -18,6 +18,7 @@ ifneq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_glue.c
endif
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_intr.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mp.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mr.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxq.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxtx.c
@@ -93,6 +94,11 @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
+ HAVE_IBV_MLX4_UAR_MMAP_OFFSET \
+ infiniband/mlx4dv.h \
+ enum MLX4DV_QP_MASK_UAR_MMAP_OFFSET \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_IBV_MLX4_WQE_LSO_SEG \
infiniband/mlx4dv.h \
type 'struct mlx4_wqe_lso_seg' \
diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
index 650e2c8fbc..de020701d1 100644
--- a/drivers/net/mlx4/meson.build
+++ b/drivers/net/mlx4/meson.build
@@ -33,6 +33,7 @@ if build
'mlx4_ethdev.c',
'mlx4_flow.c',
'mlx4_intr.c',
+ 'mlx4_mp.c',
'mlx4_mr.c',
'mlx4_rxq.c',
'mlx4_rxtx.c',
@@ -76,6 +77,8 @@ if build
has_sym_args = [
[ 'HAVE_IBV_MLX4_BUF_ALLOCATORS', 'infiniband/mlx4dv.h',
'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
+ [ 'HAVE_IBV_MLX4_UAR_MMAP_OFFSET', 'infiniband/mlx4dv.h',
+ 'MLX4DV_QP_MASK_UAR_MMAP_OFFSET' ],
]
config = configuration_data()
foreach arg:has_sym_args
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 0e0b035df0..a5cfcdbee3 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -17,6 +17,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/mman.h>
#include <unistd.h>
/* Verbs headers do not support -pedantic. */
@@ -48,10 +49,21 @@
#include "mlx4_rxtx.h"
#include "mlx4_utils.h"
-struct mlx4_dev_list mlx4_mem_event_cb_list =
- LIST_HEAD_INITIALIZER(mlx4_mem_event_cb_list);
+#if defined(HAVE_IBV_MLX4_UAR_MMAP_OFFSET) && \
+ defined(HAVE_IBV_MLX4_BUF_ALLOCATORS)
+#define HAVE_IBV_MLX4_SECONDARY_PROCESS
+#endif
+
+static const char *MZ_MLX4_PMD_SHARED_DATA = "mlx4_pmd_shared_data";
+
+/* Shared memory between primary and secondary processes. */
+struct mlx4_shared_data *mlx4_shared_data;
-rte_rwlock_t mlx4_mem_event_rwlock = RTE_RWLOCK_INITIALIZER;
+/* Spinlock for mlx4_shared_data allocation. */
+static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* Process local data for secondary processes. */
+static struct mlx4_local_data mlx4_local_data;
/** Configuration structure for device arguments. */
struct mlx4_conf {
@@ -69,6 +81,77 @@ const char *pmd_mlx4_init_params[] = {
static void mlx4_dev_stop(struct rte_eth_dev *dev);
+/**
+ * Initialize shared data between primary and secondary process.
+ *
+ * A memzone is reserved by primary process and secondary processes attach to
+ * the memzone.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_init_shared_data(void)
+{
+ const struct rte_memzone *mz;
+ int ret = 0;
+
+ rte_spinlock_lock(&mlx4_shared_data_lock);
+ if (mlx4_shared_data == NULL) {
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* Allocate shared memory. */
+ mz = rte_memzone_reserve(MZ_MLX4_PMD_SHARED_DATA,
+ sizeof(*mlx4_shared_data),
+ SOCKET_ID_ANY, 0);
+ if (mz == NULL) {
+ ERROR("Cannot allocate mlx4 shared data\n");
+ ret = -rte_errno;
+ goto error;
+ }
+ mlx4_shared_data = mz->addr;
+ memset(mlx4_shared_data, 0, sizeof(*mlx4_shared_data));
+ rte_spinlock_init(&mlx4_shared_data->lock);
+ } else {
+ /* Lookup allocated shared memory. */
+ mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
+ if (mz == NULL) {
+ ERROR("Cannot attach mlx4 shared data\n");
+ ret = -rte_errno;
+ goto error;
+ }
+ mlx4_shared_data = mz->addr;
+ memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
+ }
+ }
+error:
+ rte_spinlock_unlock(&mlx4_shared_data_lock);
+ return ret;
+}
+
+/**
+ * Uninitialize shared data between primary and secondary process.
+ *
+ * The pointer of secondary process is dereferenced and primary process frees
+ * the memzone.
+ */
+static void
+mlx4_uninit_shared_data(void)
+{
+ const struct rte_memzone *mz;
+
+ rte_spinlock_lock(&mlx4_shared_data_lock);
+ if (mlx4_shared_data) {
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
+ rte_memzone_free(mz);
+ } else {
+ memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
+ }
+ mlx4_shared_data = NULL;
+ }
+ rte_spinlock_unlock(&mlx4_shared_data_lock);
+}
+
#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
/**
* Verbs callback to allocate a memory. This function should allocate the space
@@ -181,6 +264,11 @@ mlx4_dev_start(struct rte_eth_dev *dev)
return 0;
DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
priv->started = 1;
+ ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
+ if (ret) {
+ ERROR("%p: cannot remap UAR", (void *)dev);
+ goto err;
+ }
ret = mlx4_rss_init(priv);
if (ret) {
ERROR("%p: cannot initialize RSS resources: %s",
@@ -208,6 +296,8 @@ mlx4_dev_start(struct rte_eth_dev *dev)
rte_wmb();
dev->tx_pkt_burst = mlx4_tx_burst;
dev->rx_pkt_burst = mlx4_rx_burst;
+ /* Enable datapath on secondary process. */
+ mlx4_mp_req_start_rxtx(dev);
return 0;
err:
mlx4_dev_stop(dev);
@@ -226,6 +316,8 @@ static void
mlx4_dev_stop(struct rte_eth_dev *dev)
{
struct mlx4_priv *priv = dev->data->dev_private;
+ const size_t page_size = sysconf(_SC_PAGESIZE);
+ int i;
if (!priv->started)
return;
@@ -234,9 +326,20 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
dev->tx_pkt_burst = mlx4_tx_burst_removed;
dev->rx_pkt_burst = mlx4_rx_burst_removed;
rte_wmb();
+ /* Disable datapath on secondary process. */
+ mlx4_mp_req_stop_rxtx(dev);
mlx4_flow_sync(priv, NULL);
mlx4_rxq_intr_disable(priv);
mlx4_rss_deinit(priv);
+ for (i = 0; i != dev->data->nb_tx_queues; ++i) {
+ struct txq *txq;
+
+ txq = dev->data->tx_queues[i];
+ if (!txq)
+ continue;
+ munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
+ page_size), page_size);
+ }
}
/**
@@ -259,6 +362,8 @@ mlx4_dev_close(struct rte_eth_dev *dev)
dev->rx_pkt_burst = mlx4_rx_burst_removed;
dev->tx_pkt_burst = mlx4_tx_burst_removed;
rte_wmb();
+ /* Disable datapath on secondary process. */
+ mlx4_mp_req_stop_rxtx(dev);
mlx4_flow_clean(priv);
mlx4_rss_deinit(priv);
for (i = 0; i != dev->data->nb_rx_queues; ++i)
@@ -310,6 +415,16 @@ static const struct eth_dev_ops mlx4_dev_ops = {
.is_removed = mlx4_is_removed,
};
+#ifdef HAVE_IBV_MLX4_SECONDARY_PROCESS
+/* Available operations from secondary process. */
+static const struct eth_dev_ops mlx4_dev_sec_ops = {
+ .stats_get = mlx4_stats_get,
+ .stats_reset = mlx4_stats_reset,
+ .fw_version_get = mlx4_fw_version_get,
+ .dev_infos_get = mlx4_dev_infos_get,
+};
+#endif
+
/**
* Get PCI information from struct ibv_device.
*
@@ -549,6 +664,200 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
static struct rte_pci_driver mlx4_driver;
+static int
+find_lower_va_bound(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg)
+{
+ void **addr = arg;
+
+ if (msl->external)
+ return 0;
+ if (*addr == NULL)
+ *addr = ms->addr;
+ else
+ *addr = RTE_MIN(*addr, ms->addr);
+
+ return 0;
+}
+
+/**
+ * Reserve UAR address space for primary process.
+ *
+ * Process local resource is used by both primary and secondary to avoid
+ * duplicate reservation. The space has to be available on both primary and
+ * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
+ * check.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_uar_init_primary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+ void *addr = (void *)0;
+
+ if (sd->uar_base)
+ return 0;
+ /* find out lower bound of hugepage segments */
+ rte_memseg_walk(find_lower_va_bound, &addr);
+ /* keep distance to hugepages to minimize potential conflicts. */
+ addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
+ /* anonymous mmap, no real memory consumption. */
+ addr = mmap(addr, MLX4_UAR_SIZE,
+ PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED) {
+ ERROR("failed to reserve UAR address space, please"
+ " adjust MLX4_UAR_SIZE or try --base-virtaddr");
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ /* Accept either same addr or a new addr returned from mmap if target
+ * range occupied.
+ */
+ INFO("reserved UAR address space: %p", addr);
+ sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
+ return 0;
+}
+
+/**
+ * Unmap UAR address space reserved for primary process.
+ */
+static void
+mlx4_uar_uninit_primary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+
+ if (!sd->uar_base)
+ return;
+ munmap(sd->uar_base, MLX4_UAR_SIZE);
+ sd->uar_base = NULL;
+}
+
+/**
+ * Reserve UAR address space for secondary process, align with primary process.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_uar_init_secondary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+ struct mlx4_local_data *ld = &mlx4_local_data;
+ void *addr;
+
+ if (ld->uar_base) { /* Already reserved. */
+ assert(sd->uar_base == ld->uar_base);
+ return 0;
+ }
+ assert(sd->uar_base);
+ /* anonymous mmap, no real memory consumption. */
+ addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
+ PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED) {
+ ERROR("UAR mmap failed: %p size: %llu",
+ sd->uar_base, MLX4_UAR_SIZE);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ if (sd->uar_base != addr) {
+ ERROR("UAR address %p size %llu occupied, please"
+ " adjust MLX4_UAR_OFFSET or try EAL parameter"
+ " --base-virtaddr",
+ sd->uar_base, MLX4_UAR_SIZE);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ ld->uar_base = addr;
+ INFO("reserved UAR address space: %p", addr);
+ return 0;
+}
+
+/**
+ * Unmap UAR address space reserved for secondary process.
+ */
+static void
+mlx4_uar_uninit_secondary(void)
+{
+ struct mlx4_local_data *ld = &mlx4_local_data;
+
+ if (!ld->uar_base)
+ return;
+ munmap(ld->uar_base, MLX4_UAR_SIZE);
+ ld->uar_base = NULL;
+}
+
+/**
+ * PMD global initialization.
+ *
+ * Independent from individual device, this function initializes global
+ * per-PMD data structures distinguishing primary and secondary processes.
+ * Hence, each initialization is called once per a process.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_init_once(void)
+{
+ struct mlx4_shared_data *sd;
+ struct mlx4_local_data *ld = &mlx4_local_data;
+ int ret;
+
+ if (mlx4_init_shared_data())
+ return -rte_errno;
+ sd = mlx4_shared_data;
+ assert(sd);
+ rte_spinlock_lock(&sd->lock);
+ switch (rte_eal_process_type()) {
+ case RTE_PROC_PRIMARY:
+ if (sd->init_done)
+ break;
+ LIST_INIT(&sd->mem_event_cb_list);
+ rte_rwlock_init(&sd->mem_event_rwlock);
+ rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
+ mlx4_mr_mem_event_cb, NULL);
+ mlx4_mp_init_primary();
+ ret = mlx4_uar_init_primary();
+ if (ret)
+ goto error;
+ sd->init_done = true;
+ break;
+ case RTE_PROC_SECONDARY:
+ if (ld->init_done)
+ break;
+ mlx4_mp_init_secondary();
+ ret = mlx4_uar_init_secondary();
+ if (ret)
+ goto error;
+ ++sd->secondary_cnt;
+ ld->init_done = true;
+ break;
+ default:
+ break;
+ }
+ rte_spinlock_unlock(&sd->lock);
+ return 0;
+error:
+ switch (rte_eal_process_type()) {
+ case RTE_PROC_PRIMARY:
+ mlx4_uar_uninit_primary();
+ mlx4_mp_uninit_primary();
+ rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
+ break;
+ case RTE_PROC_SECONDARY:
+ mlx4_uar_uninit_secondary();
+ mlx4_mp_uninit_secondary();
+ break;
+ default:
+ break;
+ }
+ rte_spinlock_unlock(&sd->lock);
+ mlx4_uninit_shared_data();
+ return -rte_errno;
+}
+
/**
* DPDK callback to register a PCI device.
*
@@ -579,6 +888,12 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
int i;
(void)pci_drv;
+ err = mlx4_init_once();
+ if (err) {
+ ERROR("unable to init PMD global data: %s",
+ strerror(rte_errno));
+ return -rte_errno;
+ }
assert(pci_drv == &mlx4_driver);
list = mlx4_glue->get_device_list(&i);
if (list == NULL) {
@@ -659,6 +974,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
struct mlx4_priv *priv = NULL;
struct rte_eth_dev *eth_dev = NULL;
struct ether_addr mac;
+ char name[RTE_ETH_NAME_MAX_LEN];
/* If port is not enabled, skip. */
if (!(conf.ports.enabled & (1 << i)))
@@ -669,6 +985,44 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
err = ENODEV;
goto port_error;
}
+ snprintf(name, sizeof(name), "%s port %u",
+ mlx4_glue->get_device_name(ibv_dev), port);
+#ifdef HAVE_IBV_MLX4_SECONDARY_PROCESS
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (eth_dev == NULL) {
+ ERROR("can not attach rte ethdev");
+ rte_errno = ENOMEM;
+ err = rte_errno;
+ goto error;
+ }
+ eth_dev->device = &pci_dev->device;
+ eth_dev->dev_ops = &mlx4_dev_sec_ops;
+ /* Receive command fd from primary process */
+ err = mlx4_mp_req_verbs_cmd_fd(eth_dev);
+ if (err < 0) {
+ err = rte_errno;
+ goto error;
+ }
+ /* Remap UAR for Tx queues. */
+ err = mlx4_tx_uar_remap(eth_dev, err);
+ if (err) {
+ err = rte_errno;
+ goto error;
+ }
+ /*
+ * Ethdev pointer is still required as input since
+ * the primary device is not accessible from the
+ * secondary process.
+ */
+ eth_dev->tx_pkt_burst = mlx4_tx_burst;
+ eth_dev->rx_pkt_burst = mlx4_rx_burst;
+ claim_zero(mlx4_glue->close_device(ctx));
+ rte_eth_copy_pci_info(eth_dev, pci_dev);
+ rte_eth_dev_probing_finish(eth_dev);
+ continue;
+ }
+#endif
/* Check port status. */
err = mlx4_glue->query_port(ctx, port, &port_attr);
if (err) {
@@ -774,14 +1128,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
/* Get actual MTU if possible. */
mlx4_mtu_get(priv, &priv->mtu);
DEBUG("port %u MTU is %u", priv->port, priv->mtu);
- /* from rte_ethdev.c */
- {
- char name[RTE_ETH_NAME_MAX_LEN];
-
- snprintf(name, sizeof(name), "%s port %u",
- mlx4_glue->get_device_name(ibv_dev), port);
- eth_dev = rte_eth_dev_allocate(name);
- }
+ eth_dev = rte_eth_dev_allocate(name);
if (eth_dev == NULL) {
err = ENOMEM;
ERROR("can not allocate rte ethdev");
@@ -842,9 +1189,10 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
goto port_error;
}
/* Add device to memory callback list. */
- rte_rwlock_write_lock(&mlx4_mem_event_rwlock);
- LIST_INSERT_HEAD(&mlx4_mem_event_cb_list, priv, mem_event_cb);
- rte_rwlock_write_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock);
+ LIST_INSERT_HEAD(&mlx4_shared_data->mem_event_cb_list,
+ priv, mem_event_cb);
+ rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
rte_eth_dev_probing_finish(eth_dev);
continue;
port_error:
@@ -1075,8 +1423,6 @@ RTE_INIT(rte_mlx4_pmd_init)
}
mlx4_glue->fork_init();
rte_pci_register(&mlx4_driver);
- rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
- mlx4_mr_mem_event_cb, NULL);
}
RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index d43e05ea74..bb75f99e03 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -53,6 +53,16 @@
/** Port parameter. */
#define MLX4_PMD_PORT_KVARG "port"
+/* Reserved address space for UAR mapping. */
+#define MLX4_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
+
+/* Offset of reserved UAR address space to hugepage memory. Offset is used here
+ * to minimize possibility of address next to hugepage being used by other code
+ * in either primary or secondary process, failing to map TX UAR would make TX
+ * packets invisible to HW.
+ */
+#define MLX4_UAR_OFFSET (2ULL << (sizeof(uintptr_t) * 4))
+
enum {
PCI_VENDOR_ID_MELLANOX = 0x15b3,
};
@@ -63,6 +73,23 @@ enum {
PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO = 0x1007,
};
+/* Request types for IPC. */
+enum mlx4_mp_req_type {
+ MLX4_MP_REQ_VERBS_CMD_FD = 1,
+ MLX4_MP_REQ_START_RXTX,
+ MLX4_MP_REQ_STOP_RXTX,
+};
+
+/* Pameters for IPC. */
+struct mlx4_mp_param {
+ enum mlx4_mp_req_type type;
+ int port_id;
+ int result;
+};
+
+/** Key string for IPC. */
+#define MLX4_MP_NAME "net_mlx4_mp"
+
/** Driver name reported to lower layers and used in log output. */
#define MLX4_DRIVER_NAME "net_mlx4"
@@ -93,6 +120,27 @@ struct mlx4_verbs_alloc_ctx {
LIST_HEAD(mlx4_dev_list, mlx4_priv);
LIST_HEAD(mlx4_mr_list, mlx4_mr);
+/* Shared data between primary and secondary processes. */
+struct mlx4_shared_data {
+ rte_spinlock_t lock;
+ /* Global spinlock for primary and secondary processes. */
+ int init_done; /* Whether primary has done initialization. */
+ unsigned int secondary_cnt; /* Number of secondary processes init'd. */
+ void *uar_base;
+ /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+ struct mlx4_dev_list mem_event_cb_list;
+ rte_rwlock_t mem_event_rwlock;
+};
+
+/* Per-process data structure, not visible to other processes. */
+struct mlx4_local_data {
+ int init_done; /* Whether a secondary has done initialization. */
+ void *uar_base;
+ /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+};
+
+extern struct mlx4_shared_data *mlx4_shared_data;
+
/** Private data structure. */
struct mlx4_priv {
LIST_ENTRY(mlx4_priv) mem_event_cb;
@@ -175,4 +223,13 @@ void mlx4_rxq_intr_disable(struct mlx4_priv *priv);
int mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx);
int mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx);
+/* mlx4_mp.c */
+void mlx4_mp_req_start_rxtx(struct rte_eth_dev *dev);
+void mlx4_mp_req_stop_rxtx(struct rte_eth_dev *dev);
+int mlx4_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev);
+void mlx4_mp_init_primary(void);
+void mlx4_mp_uninit_primary(void);
+void mlx4_mp_init_secondary(void);
+void mlx4_mp_uninit_secondary(void);
+
#endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_mp.c b/drivers/net/mlx4/mlx4_mp.c
new file mode 100644
index 0000000000..b0a91b44fd
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_mp.c
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ * Copyright 2019 Mellanox Technologies, Ltd
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <rte_eal.h>
+#include <rte_ethdev_driver.h>
+#include <rte_string_fns.h>
+
+#include "mlx4.h"
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
+
+/**
+ * Initialize IPC message.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[out] msg
+ * Pointer to message to fill in.
+ * @param[in] type
+ * Message type.
+ */
+static inline void
+mp_init_msg(struct rte_eth_dev *dev, struct rte_mp_msg *msg,
+ enum mlx4_mp_req_type type)
+{
+ struct mlx4_mp_param *param = (struct mlx4_mp_param *)msg->param;
+
+ memset(msg, 0, sizeof(*msg));
+ strlcpy(msg->name, MLX4_MP_NAME, sizeof(msg->name));
+ msg->len_param = sizeof(*param);
+ param->type = type;
+ param->port_id = dev->data->port_id;
+}
+
+/**
+ * Return file descriptor for mmap to the secondary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] peer
+ * Pointer to the peer socket path.
+ *
+ * @return
+ * 0 on success, negative errno value otherwise and rte_errno is set.
+ */
+static int
+mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_mp_msg mp_res;
+ struct mlx4_mp_param *res = (struct mlx4_mp_param *)mp_res.param;
+ const struct mlx4_mp_param *param =
+ (const struct mlx4_mp_param *)mp_msg->param;
+ struct rte_eth_dev *dev = &rte_eth_devices[param->port_id];
+ struct mlx4_priv *priv = dev->data->dev_private;
+ int ret = 0;
+
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ switch (param->type) {
+ case MLX4_MP_REQ_VERBS_CMD_FD:
+ mp_init_msg(dev, &mp_res, param->type);
+ mp_res.num_fds = 1;
+ mp_res.fds[0] = priv->ctx->cmd_fd;
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ default:
+ rte_errno = EINVAL;
+ ERROR("port %u invalid mp request type", dev->data->port_id);
+ return -rte_errno;
+ }
+ return ret;
+}
+
+/**
+ * IPC message handler of a secondary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] peer
+ * Pointer to the peer socket path.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_mp_msg mp_res;
+ struct mlx4_mp_param *res = (struct mlx4_mp_param *)mp_res.param;
+ const struct mlx4_mp_param *param =
+ (const struct mlx4_mp_param *)mp_msg->param;
+ struct rte_eth_dev *dev = &rte_eth_devices[param->port_id];
+ int ret = 0;
+
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ switch (param->type) {
+ case MLX4_MP_REQ_START_RXTX:
+ INFO("port %u starting datapath", dev->data->port_id);
+ rte_mb();
+ dev->tx_pkt_burst = mlx4_tx_burst;
+ dev->rx_pkt_burst = mlx4_rx_burst;
+ mp_init_msg(dev, &mp_res, param->type);
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ case MLX4_MP_REQ_STOP_RXTX:
+ INFO("port %u stopping datapath", dev->data->port_id);
+ dev->tx_pkt_burst = mlx4_tx_burst_removed;
+ dev->rx_pkt_burst = mlx4_rx_burst_removed;
+ rte_mb();
+ mp_init_msg(dev, &mp_res, param->type);
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ default:
+ rte_errno = EINVAL;
+ ERROR("port %u invalid mp request type", dev->data->port_id);
+ return -rte_errno;
+ }
+ return ret;
+}
+
+/**
+ * Broadcast request of stopping/starting data-path to secondary processes.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] type
+ * Request type.
+ */
+static void
+mp_req_on_rxtx(struct rte_eth_dev *dev, enum mlx4_mp_req_type type)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_msg *mp_res;
+ struct rte_mp_reply mp_rep;
+ struct mlx4_mp_param *res __rte_unused;
+ struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ if (!mlx4_shared_data->secondary_cnt)
+ return;
+ if (type != MLX4_MP_REQ_START_RXTX && type != MLX4_MP_REQ_STOP_RXTX) {
+ ERROR("port %u unknown request (req_type %d)",
+ dev->data->port_id, type);
+ return;
+ }
+ mp_init_msg(dev, &mp_req, type);
+ ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+ if (ret) {
+ ERROR("port %u failed to request stop/start Rx/Tx (%d)",
+ dev->data->port_id, type);
+ goto exit;
+ }
+ if (mp_rep.nb_sent != mp_rep.nb_received) {
+ ERROR("port %u not all secondaries responded (req_type %d)",
+ dev->data->port_id, type);
+ goto exit;
+ }
+ mp_res = &mp_rep.msgs[0];
+ res = (struct mlx4_mp_param *)mp_res->param;
+ assert(!res->result);
+exit:
+ free(mp_rep.msgs);
+}
+
+/**
+ * Broadcast request of starting data-path to secondary processes. The request
+ * is synchronous.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ */
+void
+mlx4_mp_req_start_rxtx(struct rte_eth_dev *dev)
+{
+ mp_req_on_rxtx(dev, MLX4_MP_REQ_START_RXTX);
+}
+
+/**
+ * Broadcast request of stopping data-path to secondary processes. The request
+ * is synchronous.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ */
+void
+mlx4_mp_req_stop_rxtx(struct rte_eth_dev *dev)
+{
+ mp_req_on_rxtx(dev, MLX4_MP_REQ_STOP_RXTX);
+}
+
+/**
+ * IPC message handler of primary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ *
+ * @return
+ * fd on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_msg *mp_res;
+ struct rte_mp_reply mp_rep;
+ struct mlx4_mp_param *res __rte_unused;
+ struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+ int cmd_fd;
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ mp_init_msg(dev, &mp_req, MLX4_MP_REQ_VERBS_CMD_FD);
+ ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+ if (ret) {
+ ERROR("port %u failed to get command FD from primary process",
+ dev->data->port_id);
+ return -rte_errno;
+ }
+ assert(mp_rep.nb_received == 1);
+ mp_res = &mp_rep.msgs[0];
+ res = (struct mlx4_mp_param *)mp_res->param;
+ assert(!res->result);
+ assert(mp_res->num_fds == 1);
+ cmd_fd = mp_res->fds[0];
+ free(mp_rep.msgs);
+ DEBUG("port %u command FD from primary is %d",
+ dev->data->port_id, cmd_fd);
+ return cmd_fd;
+}
+
+/**
+ * Initialize by primary process.
+ */
+void
+mlx4_mp_init_primary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ rte_mp_action_register(MLX4_MP_NAME, mp_primary_handle);
+}
+
+/**
+ * Un-initialize by primary process.
+ */
+void
+mlx4_mp_uninit_primary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ rte_mp_action_unregister(MLX4_MP_NAME);
+}
+
+/**
+ * Initialize by secondary process.
+ */
+void
+mlx4_mp_init_secondary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ rte_mp_action_register(MLX4_MP_NAME, mp_secondary_handle);
+}
+
+/**
+ * Un-initialize by secondary process.
+ */
+void
+mlx4_mp_uninit_secondary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ rte_mp_action_unregister(MLX4_MP_NAME);
+}
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
index e4be46ab2a..01894faecf 100644
--- a/drivers/net/mlx4/mlx4_mr.c
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -489,6 +489,8 @@ mlx4_mr_garbage_collect(struct rte_eth_dev *dev)
struct mlx4_mr *mr_next;
struct mlx4_mr_list free_list = LIST_HEAD_INITIALIZER(free_list);
+ /* Must be called from the primary process. */
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
/*
* MR can't be freed with holding the lock because rte_free() could call
* memory free callback function. This will be a deadlock situation.
@@ -561,6 +563,14 @@ mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
DEBUG("port %u creating a MR using address (%p)",
dev->data->port_id, (void *)addr);
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ WARN("port %u using address (%p) of unregistered mempool"
+ " in secondary process, please create mempool"
+ " before rte_eth_dev_start()",
+ dev->data->port_id, (void *)addr);
+ rte_errno = EPERM;
+ goto err_nolock;
+ }
/*
* Release detached MRs if any. This can't be called with holding either
* memory_hotplug_lock or priv->mr.rwlock. MRs on the free list have
@@ -890,14 +900,17 @@ mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
size_t len, void *arg __rte_unused)
{
struct mlx4_priv *priv;
+ struct mlx4_dev_list *dev_list = &mlx4_shared_data->mem_event_cb_list;
+ /* Must be called from the primary process. */
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
switch (event_type) {
case RTE_MEM_EVENT_FREE:
- rte_rwlock_read_lock(&mlx4_mem_event_rwlock);
+ rte_rwlock_read_lock(&mlx4_shared_data->mem_event_rwlock);
/* Iterate all the existing mlx4 devices. */
- LIST_FOREACH(priv, &mlx4_mem_event_cb_list, mem_event_cb)
+ LIST_FOREACH(priv, dev_list, mem_event_cb)
mlx4_mr_mem_event_free_cb(ETH_DEV(priv), addr, len);
- rte_rwlock_read_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_read_unlock(&mlx4_shared_data->mem_event_rwlock);
break;
case RTE_MEM_EVENT_ALLOC:
default:
@@ -1130,6 +1143,7 @@ mlx4_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque,
struct mlx4_mr_cache entry;
uint32_t lkey;
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
/* If already registered, it should return. */
rte_rwlock_read_lock(&priv->mr.rwlock);
lkey = mr_lookup_dev(dev, &entry, addr);
@@ -1225,6 +1239,14 @@ mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr, struct rte_mempool *mp)
struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
struct mlx4_priv *priv = txq->priv;
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ WARN("port %u using address (%p) from unregistered mempool"
+ " having externally allocated memory"
+ " in secondary process, please create mempool"
+ " prior to rte_eth_dev_start()",
+ PORT_ID(priv), (void *)addr);
+ return UINT32_MAX;
+ }
mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
return mlx4_tx_addr2mr_bh(txq, addr);
}
@@ -1336,9 +1358,9 @@ mlx4_mr_release(struct rte_eth_dev *dev)
struct mlx4_mr *mr_next = LIST_FIRST(&priv->mr.mr_list);
/* Remove from memory callback device list. */
- rte_rwlock_write_lock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock);
LIST_REMOVE(priv, mem_event_cb);
- rte_rwlock_write_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
#ifndef NDEBUG
mlx4_mr_dump_dev(dev);
#endif
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index aef77ba06e..b3e11dde25 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,7 +77,9 @@ struct mlx4_sq {
uint32_t owner_opcode;
/**< Default owner opcode with HW valid owner bit. */
uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
- volatile uint32_t *db; /**< Pointer to the doorbell. */
+ volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
+ volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
};
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 8c88effcd1..f22f1ba559 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1365,6 +1365,7 @@ mlx4_tx_burst_removed(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
(void)dpdk_txq;
(void)pkts;
(void)pkts_n;
+ rte_mb();
return 0;
}
@@ -1390,5 +1391,6 @@ mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
(void)dpdk_rxq;
(void)pkts;
(void)pkts_n;
+ rte_mb();
return 0;
}
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 9409602b32..7d7a8988ed 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -152,6 +152,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
/* mlx4_txq.c */
+int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 2dc198e77f..f3275fe024 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -13,7 +13,9 @@
#include <stddef.h>
#include <stdint.h>
#include <string.h>
+#include <sys/mman.h>
#include <inttypes.h>
+#include <unistd.h>
/* Verbs headers do not support -pedantic. */
#ifdef PEDANTIC
@@ -38,6 +40,97 @@
#include "mlx4_utils.h"
/**
+ * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
+ * Both primary and secondary process do mmap to make UAR address
+ * aligned.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param fd
+ * Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+int
+mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+{
+ unsigned int i, j;
+ const unsigned int txqs_n = dev->data->nb_tx_queues;
+ uintptr_t pages[txqs_n];
+ unsigned int pages_n = 0;
+ uintptr_t uar_va;
+ uintptr_t off;
+ void *addr;
+ void *ret;
+ struct txq *txq;
+ int already_mapped;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+
+ memset(pages, 0, txqs_n * sizeof(uintptr_t));
+ /*
+ * As rdma-core, UARs are mapped in size of OS page size.
+ * Use aligned address to avoid duplicate mmap.
+ * Ref to libmlx4 function: mlx4_init_context()
+ */
+ for (i = 0; i != txqs_n; ++i) {
+ txq = dev->data->tx_queues[i];
+ if (!txq)
+ continue;
+ /* UAR addr form verbs used to find dup and offset in page. */
+ uar_va = (uintptr_t)txq->msq.qp_sdb;
+ off = uar_va & (page_size - 1); /* offset in page. */
+ uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
+ already_mapped = 0;
+ for (j = 0; j != pages_n; ++j) {
+ if (pages[j] == uar_va) {
+ already_mapped = 1;
+ break;
+ }
+ }
+ /* new address in reserved UAR address space. */
+ addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
+ uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
+ if (!already_mapped) {
+ pages[pages_n++] = uar_va;
+ /* fixed mmap to specified address in reserved
+ * address space.
+ */
+ ret = mmap(addr, page_size,
+ PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
+ txq->msq.uar_mmap_offset);
+ if (ret != addr) {
+ /* fixed mmap has to return same address. */
+ ERROR("call to mmap failed on UAR for txq %u",
+ i);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ }
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
+ txq->msq.db = RTE_PTR_ADD((void *)addr, off);
+ else
+ assert(txq->msq.db ==
+ RTE_PTR_ADD((void *)addr, off));
+ }
+ return 0;
+}
+#else
+int
+mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+{
+ /*
+ * If rdma-core doesn't support UAR remap, secondary process is not
+ * supported, thus secondary cannot call this function but only primary
+ * makes a call. Return success to not interrupt initialization.
+ */
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ return 0;
+}
+#endif
+
+/**
* Free Tx queue elements.
*
* @param txq
@@ -89,7 +182,12 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
sq->owner_opcode = MLX4_OPCODE_SEND | (0u << MLX4_SQ_OWNER_BIT);
sq->stamp = rte_cpu_to_be_32(MLX4_SQ_STAMP_VAL |
(0u << MLX4_SQ_OWNER_BIT));
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ sq->uar_mmap_offset = dqp->uar_mmap_offset;
+ sq->qp_sdb = dqp->sdb;
+#else
sq->db = dqp->sdb;
+#endif
sq->doorbell_qpn = dqp->doorbell_qpn;
cq->buf = dcq->buf.buf;
cq->cqe_cnt = dcq->cqe_cnt;
@@ -307,6 +405,11 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
goto error;
}
/* Retrieve device queue information. */
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ dv_qp = (struct mlx4dv_qp){
+ .comp_mask = MLX4DV_QP_MASK_UAR_MMAP_OFFSET,
+ };
+#endif
mlxdv.cq.in = txq->cq;
mlxdv.cq.out = &dv_cq;
mlxdv.qp.in = txq->qp;
@@ -318,6 +421,13 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
" accessing the device queues", (void *)dev);
goto error;
}
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ if (!(dv_qp.comp_mask & MLX4DV_QP_MASK_UAR_MMAP_OFFSET)) {
+ rte_errno = EINVAL;
+ ERROR("%p: failed to obtain UAR mmap offset", (void *)dev);
+ goto error;
+ }
+#endif
mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
/* Save first wqe pointer in the first element. */
(&(*txq->elts)[0])->wqe =
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v2 0/3] net/mlx4: add secondary process support
2019-03-07 7:39 [dpdk-dev] [PATCH 0/3] net/mlx4: add secondary process support Yongseok Koh
` (2 preceding siblings ...)
2019-03-07 7:39 ` [dpdk-dev] [PATCH 3/3] net/mlx4: add secondary process support Yongseok Koh
@ 2019-03-25 19:17 ` Yongseok Koh
2019-03-25 19:17 ` Yongseok Koh
` (3 more replies)
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 0/3] " Yongseok Koh
4 siblings, 4 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:17 UTC (permalink / raw)
To: shahafs; +Cc: dev
RFC:
https://mails.dpdk.org/archives/dev/2019-March/125516.html
v2:
* add more sanity check for eth_dev and return value from IPC request.
* complement commit messages
* add MLX5_MP_REQ_TIMEOUT_SEC
Yongseok Koh (3):
net/mlx4: change device reference for secondary process
net/mlx4: add external allocator for Verbs object
net/mlx4: add secondary process support
doc/guides/nics/features/mlx4.ini | 1 +
doc/guides/nics/mlx4.rst | 10 +
drivers/net/mlx4/Makefile | 11 +
drivers/net/mlx4/meson.build | 13 ++
drivers/net/mlx4/mlx4.c | 449 ++++++++++++++++++++++++++++++++++++--
drivers/net/mlx4/mlx4.h | 85 +++++++-
drivers/net/mlx4/mlx4_flow.c | 39 ++--
drivers/net/mlx4/mlx4_intr.c | 20 +-
drivers/net/mlx4/mlx4_mp.c | 304 ++++++++++++++++++++++++++
drivers/net/mlx4/mlx4_mr.c | 40 +++-
drivers/net/mlx4/mlx4_prm.h | 4 +-
drivers/net/mlx4/mlx4_rxq.c | 40 ++--
drivers/net/mlx4/mlx4_rxtx.c | 2 +
drivers/net/mlx4/mlx4_rxtx.h | 1 +
drivers/net/mlx4/mlx4_txq.c | 125 ++++++++++-
15 files changed, 1064 insertions(+), 80 deletions(-)
create mode 100644 drivers/net/mlx4/mlx4_mp.c
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v2 0/3] net/mlx4: add secondary process support
2019-03-25 19:17 ` [dpdk-dev] [PATCH v2 0/3] " Yongseok Koh
@ 2019-03-25 19:17 ` Yongseok Koh
2019-03-25 19:17 ` [dpdk-dev] [PATCH v2 1/3] net/mlx4: change device reference for secondary process Yongseok Koh
` (2 subsequent siblings)
3 siblings, 0 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:17 UTC (permalink / raw)
To: shahafs; +Cc: dev
RFC:
https://mails.dpdk.org/archives/dev/2019-March/125516.html
v2:
* add more sanity check for eth_dev and return value from IPC request.
* complement commit messages
* add MLX5_MP_REQ_TIMEOUT_SEC
Yongseok Koh (3):
net/mlx4: change device reference for secondary process
net/mlx4: add external allocator for Verbs object
net/mlx4: add secondary process support
doc/guides/nics/features/mlx4.ini | 1 +
doc/guides/nics/mlx4.rst | 10 +
drivers/net/mlx4/Makefile | 11 +
drivers/net/mlx4/meson.build | 13 ++
drivers/net/mlx4/mlx4.c | 449 ++++++++++++++++++++++++++++++++++++--
drivers/net/mlx4/mlx4.h | 85 +++++++-
drivers/net/mlx4/mlx4_flow.c | 39 ++--
drivers/net/mlx4/mlx4_intr.c | 20 +-
drivers/net/mlx4/mlx4_mp.c | 304 ++++++++++++++++++++++++++
drivers/net/mlx4/mlx4_mr.c | 40 +++-
drivers/net/mlx4/mlx4_prm.h | 4 +-
drivers/net/mlx4/mlx4_rxq.c | 40 ++--
drivers/net/mlx4/mlx4_rxtx.c | 2 +
drivers/net/mlx4/mlx4_rxtx.h | 1 +
drivers/net/mlx4/mlx4_txq.c | 125 ++++++++++-
15 files changed, 1064 insertions(+), 80 deletions(-)
create mode 100644 drivers/net/mlx4/mlx4_mp.c
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v2 1/3] net/mlx4: change device reference for secondary process
2019-03-25 19:17 ` [dpdk-dev] [PATCH v2 0/3] " Yongseok Koh
2019-03-25 19:17 ` Yongseok Koh
@ 2019-03-25 19:17 ` Yongseok Koh
2019-03-25 19:17 ` Yongseok Koh
2019-03-26 19:16 ` Shahaf Shuler
2019-03-25 19:18 ` [dpdk-dev] [PATCH v2 2/3] net/mlx4: add external allocator for Verbs object Yongseok Koh
2019-03-25 19:18 ` [dpdk-dev] [PATCH v2 3/3] net/mlx4: add secondary process support Yongseok Koh
3 siblings, 2 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:17 UTC (permalink / raw)
To: shahafs; +Cc: dev, stable
rte_eth_devices[] is not shared between primary and secondary process, but
a static array to each process. The reverse pointer of device (priv->dev)
becomes invalid if mlx4 supports secondary process. Instead, priv has the
pointer to shared data of the device,
struct rte_eth_dev_data *dev_data;
Two macros are added,
#define PORT_ID(priv) ((priv)->dev_data->port_id)
#define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
Cc: stable@dpdk.org
Suggested-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
drivers/net/mlx4/mlx4.c | 4 ++--
drivers/net/mlx4/mlx4.h | 5 ++++-
drivers/net/mlx4/mlx4_flow.c | 39 +++++++++++++++++++++------------------
drivers/net/mlx4/mlx4_intr.c | 20 ++++++++++----------
drivers/net/mlx4/mlx4_mr.c | 8 ++++----
drivers/net/mlx4/mlx4_rxq.c | 36 +++++++++++++++++++-----------------
drivers/net/mlx4/mlx4_txq.c | 8 ++++----
7 files changed, 64 insertions(+), 56 deletions(-)
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 5ef2e7f41e..bb6ab8ec6e 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -753,11 +753,11 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
* handled by rte_intr_rx_ctl().
*/
eth_dev->intr_handle = &priv->intr_handle;
- priv->dev = eth_dev;
+ priv->dev_data = eth_dev->data;
eth_dev->dev_ops = &mlx4_dev_ops;
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
- mlx4_dev_set_link_up(priv->dev);
+ mlx4_dev_set_link_up(eth_dev);
/* Update link status once if waiting for LSC. */
if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
mlx4_link_update(eth_dev, 0);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 7ac49ca672..51566caf7f 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -79,7 +79,7 @@ LIST_HEAD(mlx4_mr_list, mlx4_mr);
struct mlx4_priv {
LIST_ENTRY(mlx4_priv) mem_event_cb;
/**< Called by memory event callback. */
- struct rte_eth_dev *dev; /**< Ethernet device. */
+ struct rte_eth_dev_data *dev_data; /* Pointer to device data. */
struct ibv_context *ctx; /**< Verbs context. */
struct ibv_device_attr device_attr; /**< Device properties. */
struct ibv_pd *pd; /**< Protection Domain. */
@@ -113,6 +113,9 @@ struct mlx4_priv {
/**< Configured MAC addresses. Unused entries are zeroed. */
};
+#define PORT_ID(priv) ((priv)->dev_data->port_id)
+#define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
+
/* mlx4_ethdev.c */
int mlx4_get_ifname(const struct mlx4_priv *priv, char (*ifname)[IF_NAMESIZE]);
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index f4df4ab1fb..038dc71d35 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -773,7 +773,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
if (flow->rss)
break;
queue = action->conf;
- if (queue->index >= priv->dev->data->nb_rx_queues) {
+ if (queue->index >= ETH_DEV(priv)->data->nb_rx_queues) {
msg = "queue target index beyond number of"
" configured Rx queues";
goto exit_action_not_supported;
@@ -802,7 +802,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
/* Sanity checks. */
for (i = 0; i < rss->queue_num; ++i)
if (rss->queue[i] >=
- priv->dev->data->nb_rx_queues)
+ ETH_DEV(priv)->data->nb_rx_queues)
break;
if (i != rss->queue_num) {
msg = "queue index target beyond number of"
@@ -1072,8 +1072,8 @@ mlx4_flow_toggle(struct mlx4_priv *priv,
/* Stop at the first nonexistent target queue. */
for (i = 0; i != rss->queues; ++i)
if (rss->queue_id[i] >=
- priv->dev->data->nb_rx_queues ||
- !priv->dev->data->rx_queues[rss->queue_id[i]]) {
+ ETH_DEV(priv)->data->nb_rx_queues ||
+ !ETH_DEV(priv)->data->rx_queues[rss->queue_id[i]]) {
missing = 1;
break;
}
@@ -1258,7 +1258,7 @@ static uint16_t
mlx4_flow_internal_next_vlan(struct mlx4_priv *priv, uint16_t vlan)
{
while (vlan < 4096) {
- if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
+ if (ETH_DEV(priv)->data->vlan_filter_conf.ids[vlan / 64] &
(UINT64_C(1) << (vlan % 64)))
return vlan;
++vlan;
@@ -1335,7 +1335,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
* get RSS by default.
*/
uint32_t queues =
- rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1;
+ rte_align32pow2(ETH_DEV(priv)->data->nb_rx_queues + 1) >> 1;
uint16_t queue[queues];
struct rte_flow_action_rss action_rss = {
.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
@@ -1357,9 +1357,9 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
};
struct ether_addr *rule_mac = ð_spec.dst;
rte_be16_t *rule_vlan =
- (priv->dev->data->dev_conf.rxmode.offloads &
+ (ETH_DEV(priv)->data->dev_conf.rxmode.offloads &
DEV_RX_OFFLOAD_VLAN_FILTER) &&
- !priv->dev->data->promiscuous ?
+ !ETH_DEV(priv)->data->promiscuous ?
&vlan_spec.tci :
NULL;
uint16_t vlan = 0;
@@ -1439,7 +1439,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
if (!flow || !flow->internal) {
/* Not found, create a new flow rule. */
memcpy(rule_mac, mac, sizeof(*mac));
- flow = mlx4_flow_create(priv->dev, &attr, pattern,
+ flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
actions, error);
if (!flow) {
err = -rte_errno;
@@ -1455,15 +1455,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
goto next_vlan;
}
/* Take care of promiscuous and all multicast flow rules. */
- if (priv->dev->data->promiscuous || priv->dev->data->all_multicast) {
+ if (ETH_DEV(priv)->data->promiscuous ||
+ ETH_DEV(priv)->data->all_multicast) {
for (flow = LIST_FIRST(&priv->flows);
flow && flow->internal;
flow = LIST_NEXT(flow, next)) {
- if (priv->dev->data->promiscuous) {
+ if (ETH_DEV(priv)->data->promiscuous) {
if (flow->promisc)
break;
} else {
- assert(priv->dev->data->all_multicast);
+ assert(ETH_DEV(priv)->data->all_multicast);
if (flow->allmulti)
break;
}
@@ -1477,16 +1478,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
}
if (!flow || !flow->internal) {
/* Not found, create a new flow rule. */
- if (priv->dev->data->promiscuous) {
+ if (ETH_DEV(priv)->data->promiscuous) {
pattern[1].spec = NULL;
pattern[1].mask = NULL;
} else {
- assert(priv->dev->data->all_multicast);
+ assert(ETH_DEV(priv)->data->all_multicast);
pattern[1].spec = ð_allmulti;
pattern[1].mask = ð_allmulti;
}
pattern[2] = pattern[3];
- flow = mlx4_flow_create(priv->dev, &attr, pattern,
+ flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
actions, error);
if (!flow) {
err = -rte_errno;
@@ -1503,7 +1504,8 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
struct rte_flow *next = LIST_NEXT(flow, next);
if (!flow->select)
- claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
+ claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
+ error));
else
flow->select = 0;
flow = next;
@@ -1541,7 +1543,8 @@ mlx4_flow_sync(struct mlx4_priv *priv, struct rte_flow_error *error)
for (flow = LIST_FIRST(&priv->flows);
flow && flow->internal;
flow = LIST_FIRST(&priv->flows))
- claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
+ claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
+ error));
} else {
/* Refresh internal rules. */
ret = mlx4_flow_internal(priv, error);
@@ -1574,7 +1577,7 @@ mlx4_flow_clean(struct mlx4_priv *priv)
struct rte_flow *flow;
while ((flow = LIST_FIRST(&priv->flows)))
- mlx4_flow_destroy(priv->dev, flow, NULL);
+ mlx4_flow_destroy(ETH_DEV(priv), flow, NULL);
assert(LIST_EMPTY(&priv->rss));
}
diff --git a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c
index ec91242196..4f33526755 100644
--- a/drivers/net/mlx4/mlx4_intr.c
+++ b/drivers/net/mlx4/mlx4_intr.c
@@ -65,7 +65,7 @@ static int
mlx4_rx_intr_vec_enable(struct mlx4_priv *priv)
{
unsigned int i;
- unsigned int rxqs_n = priv->dev->data->nb_rx_queues;
+ unsigned int rxqs_n = ETH_DEV(priv)->data->nb_rx_queues;
unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
unsigned int count = 0;
struct rte_intr_handle *intr_handle = &priv->intr_handle;
@@ -79,7 +79,7 @@ mlx4_rx_intr_vec_enable(struct mlx4_priv *priv)
return -rte_errno;
}
for (i = 0; i != n; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
/* Skip queues that cannot request interrupts. */
if (!rxq || !rxq->channel) {
@@ -120,12 +120,12 @@ static void
mlx4_link_status_alarm(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
assert(priv->intr_alarm == 1);
priv->intr_alarm = 0;
if (intr_conf->lsc && !mlx4_link_status_check(priv))
- _rte_eth_dev_callback_process(priv->dev,
+ _rte_eth_dev_callback_process(ETH_DEV(priv),
RTE_ETH_EVENT_INTR_LSC,
NULL);
}
@@ -145,8 +145,8 @@ mlx4_link_status_alarm(struct mlx4_priv *priv)
static int
mlx4_link_status_check(struct mlx4_priv *priv)
{
- struct rte_eth_link *link = &priv->dev->data->dev_link;
- int ret = mlx4_link_update(priv->dev, 0);
+ struct rte_eth_link *link = Ð_DEV(priv)->data->dev_link;
+ int ret = mlx4_link_update(ETH_DEV(priv), 0);
if (ret)
return ret;
@@ -185,7 +185,7 @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
uint32_t caught[RTE_DIM(type)] = { 0 };
struct ibv_async_event event;
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
unsigned int i;
/* Read all message and acknowledge them. */
@@ -208,7 +208,7 @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
}
for (i = 0; i != RTE_DIM(caught); ++i)
if (caught[i])
- _rte_eth_dev_callback_process(priv->dev, type[i],
+ _rte_eth_dev_callback_process(ETH_DEV(priv), type[i],
NULL);
}
@@ -282,7 +282,7 @@ int
mlx4_intr_install(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
int rc;
mlx4_intr_uninstall(priv);
@@ -381,7 +381,7 @@ int
mlx4_rxq_intr_enable(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
if (intr_conf->rxq && mlx4_rx_intr_vec_enable(priv) < 0)
goto error;
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
index 4376ad0b60..e4be46ab2a 100644
--- a/drivers/net/mlx4/mlx4_mr.c
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -896,7 +896,7 @@ mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
rte_rwlock_read_lock(&mlx4_mem_event_rwlock);
/* Iterate all the existing mlx4 devices. */
LIST_FOREACH(priv, &mlx4_mem_event_cb_list, mem_event_cb)
- mlx4_mr_mem_event_free_cb(priv->dev, addr, len);
+ mlx4_mr_mem_event_free_cb(ETH_DEV(priv), addr, len);
rte_rwlock_read_unlock(&mlx4_mem_event_rwlock);
break;
case RTE_MEM_EVENT_ALLOC:
@@ -1028,7 +1028,7 @@ mlx4_rx_addr2mr_bh(struct rxq *rxq, uintptr_t addr)
DEBUG("Rx queue %u: miss on top-half, mru=%u, head=%u, addr=%p",
rxq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
- return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
+ return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
}
/**
@@ -1050,7 +1050,7 @@ mlx4_tx_addr2mr_bh(struct txq *txq, uintptr_t addr)
DEBUG("Tx queue %u: miss on top-half, mru=%u, head=%u, addr=%p",
txq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
- return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
+ return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
}
/**
@@ -1225,7 +1225,7 @@ mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr, struct rte_mempool *mp)
struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
struct mlx4_priv *priv = txq->priv;
- mlx4_mr_update_ext_mp(priv->dev, mr_ctrl, mp);
+ mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
return mlx4_tx_addr2mr_bh(txq, addr);
}
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 3782c6baab..50f33eb0c5 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -176,6 +176,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
struct ibv_wq *ind_tbl[rss->queues];
struct mlx4_priv *priv = rss->priv;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
const char *msg;
unsigned int i = 0;
int ret;
@@ -189,8 +190,8 @@ mlx4_rss_attach(struct mlx4_rss *rss)
uint16_t id = rss->queue_id[i];
struct rxq *rxq = NULL;
- if (id < priv->dev->data->nb_rx_queues)
- rxq = priv->dev->data->rx_queues[id];
+ if (id < dev->data->nb_rx_queues)
+ rxq = dev->data->rx_queues[id];
if (!rxq) {
ret = EINVAL;
msg = "RSS target queue is not configured";
@@ -269,7 +270,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
rss->ind = NULL;
}
while (i--)
- mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
+ mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]);
ERROR("mlx4: %s", msg);
--rss->usecnt;
rte_errno = ret;
@@ -291,6 +292,7 @@ void
mlx4_rss_detach(struct mlx4_rss *rss)
{
struct mlx4_priv *priv = rss->priv;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
unsigned int i;
assert(rss->refcnt);
@@ -303,7 +305,7 @@ mlx4_rss_detach(struct mlx4_rss *rss)
claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind));
rss->ind = NULL;
for (i = 0; i != rss->queues; ++i)
- mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
+ mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]);
}
/**
@@ -329,7 +331,7 @@ mlx4_rss_detach(struct mlx4_rss *rss)
int
mlx4_rss_init(struct mlx4_priv *priv)
{
- struct rte_eth_dev *dev = priv->dev;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues);
uint32_t wq_num_prev = 0;
const char *msg;
@@ -338,7 +340,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
if (priv->rss_init)
return 0;
- if (priv->dev->data->nb_rx_queues > priv->hw_rss_max_qps) {
+ if (ETH_DEV(priv)->data->nb_rx_queues > priv->hw_rss_max_qps) {
ERROR("RSS does not support more than %d queues",
priv->hw_rss_max_qps);
rte_errno = EINVAL;
@@ -356,8 +358,8 @@ mlx4_rss_init(struct mlx4_priv *priv)
rte_errno = ret;
return -ret;
}
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
struct ibv_cq *cq;
struct ibv_wq *wq;
uint32_t wq_num;
@@ -432,7 +434,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
ERROR("cannot initialize common RSS resources (queue %u): %s: %s",
i, msg, strerror(ret));
while (i--) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
if (rxq)
mlx4_rxq_detach(rxq);
@@ -457,8 +459,8 @@ mlx4_rss_deinit(struct mlx4_priv *priv)
if (!priv->rss_init)
return;
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
if (rxq) {
assert(rxq->usecnt == 1);
@@ -494,7 +496,7 @@ mlx4_rxq_attach(struct rxq *rxq)
}
struct mlx4_priv *priv = rxq->priv;
- struct rte_eth_dev *dev = priv->dev;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
const uint32_t elts_n = 1 << rxq->elts_n;
const uint32_t sges_n = 1 << rxq->sges_n;
struct rte_mbuf *(*elts)[elts_n] = rxq->elts;
@@ -561,7 +563,7 @@ mlx4_rxq_attach(struct rxq *rxq)
}
/* Pre-register Rx mempool. */
DEBUG("port %u Rx queue %u registering mp %s having %u chunks",
- priv->dev->data->port_id, rxq->stats.idx,
+ ETH_DEV(priv)->data->port_id, rxq->stats.idx,
rxq->mp->name, rxq->mp->nb_mem_chunks);
mlx4_mr_update_mp(dev, &rxq->mr_ctrl, rxq->mp);
wqes = (volatile struct mlx4_wqe_data_seg (*)[])
@@ -917,11 +919,11 @@ mlx4_rx_queue_release(void *dpdk_rxq)
if (rxq == NULL)
return;
priv = rxq->priv;
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i)
- if (priv->dev->data->rx_queues[i] == rxq) {
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i)
+ if (ETH_DEV(priv)->data->rx_queues[i] == rxq) {
DEBUG("%p: removing Rx queue %p from list",
- (void *)priv->dev, (void *)rxq);
- priv->dev->data->rx_queues[i] = NULL;
+ (void *)ETH_DEV(priv), (void *)rxq);
+ ETH_DEV(priv)->data->rx_queues[i] = NULL;
break;
}
assert(!rxq->cq);
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 8142775fc4..352700820d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -357,11 +357,11 @@ mlx4_tx_queue_release(void *dpdk_txq)
if (txq == NULL)
return;
priv = txq->priv;
- for (i = 0; i != priv->dev->data->nb_tx_queues; ++i)
- if (priv->dev->data->tx_queues[i] == txq) {
+ for (i = 0; i != ETH_DEV(priv)->data->nb_tx_queues; ++i)
+ if (ETH_DEV(priv)->data->tx_queues[i] == txq) {
DEBUG("%p: removing Tx queue %p from list",
- (void *)priv->dev, (void *)txq);
- priv->dev->data->tx_queues[i] = NULL;
+ (void *)ETH_DEV(priv), (void *)txq);
+ ETH_DEV(priv)->data->tx_queues[i] = NULL;
break;
}
mlx4_txq_free_elts(txq);
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v2 1/3] net/mlx4: change device reference for secondary process
2019-03-25 19:17 ` [dpdk-dev] [PATCH v2 1/3] net/mlx4: change device reference for secondary process Yongseok Koh
@ 2019-03-25 19:17 ` Yongseok Koh
2019-03-26 19:16 ` Shahaf Shuler
1 sibling, 0 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:17 UTC (permalink / raw)
To: shahafs; +Cc: dev, stable
rte_eth_devices[] is not shared between primary and secondary process, but
a static array to each process. The reverse pointer of device (priv->dev)
becomes invalid if mlx4 supports secondary process. Instead, priv has the
pointer to shared data of the device,
struct rte_eth_dev_data *dev_data;
Two macros are added,
#define PORT_ID(priv) ((priv)->dev_data->port_id)
#define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
Cc: stable@dpdk.org
Suggested-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
drivers/net/mlx4/mlx4.c | 4 ++--
drivers/net/mlx4/mlx4.h | 5 ++++-
drivers/net/mlx4/mlx4_flow.c | 39 +++++++++++++++++++++------------------
drivers/net/mlx4/mlx4_intr.c | 20 ++++++++++----------
drivers/net/mlx4/mlx4_mr.c | 8 ++++----
drivers/net/mlx4/mlx4_rxq.c | 36 +++++++++++++++++++-----------------
drivers/net/mlx4/mlx4_txq.c | 8 ++++----
7 files changed, 64 insertions(+), 56 deletions(-)
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 5ef2e7f41e..bb6ab8ec6e 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -753,11 +753,11 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
* handled by rte_intr_rx_ctl().
*/
eth_dev->intr_handle = &priv->intr_handle;
- priv->dev = eth_dev;
+ priv->dev_data = eth_dev->data;
eth_dev->dev_ops = &mlx4_dev_ops;
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
- mlx4_dev_set_link_up(priv->dev);
+ mlx4_dev_set_link_up(eth_dev);
/* Update link status once if waiting for LSC. */
if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
mlx4_link_update(eth_dev, 0);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 7ac49ca672..51566caf7f 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -79,7 +79,7 @@ LIST_HEAD(mlx4_mr_list, mlx4_mr);
struct mlx4_priv {
LIST_ENTRY(mlx4_priv) mem_event_cb;
/**< Called by memory event callback. */
- struct rte_eth_dev *dev; /**< Ethernet device. */
+ struct rte_eth_dev_data *dev_data; /* Pointer to device data. */
struct ibv_context *ctx; /**< Verbs context. */
struct ibv_device_attr device_attr; /**< Device properties. */
struct ibv_pd *pd; /**< Protection Domain. */
@@ -113,6 +113,9 @@ struct mlx4_priv {
/**< Configured MAC addresses. Unused entries are zeroed. */
};
+#define PORT_ID(priv) ((priv)->dev_data->port_id)
+#define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
+
/* mlx4_ethdev.c */
int mlx4_get_ifname(const struct mlx4_priv *priv, char (*ifname)[IF_NAMESIZE]);
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index f4df4ab1fb..038dc71d35 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -773,7 +773,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
if (flow->rss)
break;
queue = action->conf;
- if (queue->index >= priv->dev->data->nb_rx_queues) {
+ if (queue->index >= ETH_DEV(priv)->data->nb_rx_queues) {
msg = "queue target index beyond number of"
" configured Rx queues";
goto exit_action_not_supported;
@@ -802,7 +802,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
/* Sanity checks. */
for (i = 0; i < rss->queue_num; ++i)
if (rss->queue[i] >=
- priv->dev->data->nb_rx_queues)
+ ETH_DEV(priv)->data->nb_rx_queues)
break;
if (i != rss->queue_num) {
msg = "queue index target beyond number of"
@@ -1072,8 +1072,8 @@ mlx4_flow_toggle(struct mlx4_priv *priv,
/* Stop at the first nonexistent target queue. */
for (i = 0; i != rss->queues; ++i)
if (rss->queue_id[i] >=
- priv->dev->data->nb_rx_queues ||
- !priv->dev->data->rx_queues[rss->queue_id[i]]) {
+ ETH_DEV(priv)->data->nb_rx_queues ||
+ !ETH_DEV(priv)->data->rx_queues[rss->queue_id[i]]) {
missing = 1;
break;
}
@@ -1258,7 +1258,7 @@ static uint16_t
mlx4_flow_internal_next_vlan(struct mlx4_priv *priv, uint16_t vlan)
{
while (vlan < 4096) {
- if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
+ if (ETH_DEV(priv)->data->vlan_filter_conf.ids[vlan / 64] &
(UINT64_C(1) << (vlan % 64)))
return vlan;
++vlan;
@@ -1335,7 +1335,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
* get RSS by default.
*/
uint32_t queues =
- rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1;
+ rte_align32pow2(ETH_DEV(priv)->data->nb_rx_queues + 1) >> 1;
uint16_t queue[queues];
struct rte_flow_action_rss action_rss = {
.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
@@ -1357,9 +1357,9 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
};
struct ether_addr *rule_mac = ð_spec.dst;
rte_be16_t *rule_vlan =
- (priv->dev->data->dev_conf.rxmode.offloads &
+ (ETH_DEV(priv)->data->dev_conf.rxmode.offloads &
DEV_RX_OFFLOAD_VLAN_FILTER) &&
- !priv->dev->data->promiscuous ?
+ !ETH_DEV(priv)->data->promiscuous ?
&vlan_spec.tci :
NULL;
uint16_t vlan = 0;
@@ -1439,7 +1439,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
if (!flow || !flow->internal) {
/* Not found, create a new flow rule. */
memcpy(rule_mac, mac, sizeof(*mac));
- flow = mlx4_flow_create(priv->dev, &attr, pattern,
+ flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
actions, error);
if (!flow) {
err = -rte_errno;
@@ -1455,15 +1455,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
goto next_vlan;
}
/* Take care of promiscuous and all multicast flow rules. */
- if (priv->dev->data->promiscuous || priv->dev->data->all_multicast) {
+ if (ETH_DEV(priv)->data->promiscuous ||
+ ETH_DEV(priv)->data->all_multicast) {
for (flow = LIST_FIRST(&priv->flows);
flow && flow->internal;
flow = LIST_NEXT(flow, next)) {
- if (priv->dev->data->promiscuous) {
+ if (ETH_DEV(priv)->data->promiscuous) {
if (flow->promisc)
break;
} else {
- assert(priv->dev->data->all_multicast);
+ assert(ETH_DEV(priv)->data->all_multicast);
if (flow->allmulti)
break;
}
@@ -1477,16 +1478,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
}
if (!flow || !flow->internal) {
/* Not found, create a new flow rule. */
- if (priv->dev->data->promiscuous) {
+ if (ETH_DEV(priv)->data->promiscuous) {
pattern[1].spec = NULL;
pattern[1].mask = NULL;
} else {
- assert(priv->dev->data->all_multicast);
+ assert(ETH_DEV(priv)->data->all_multicast);
pattern[1].spec = ð_allmulti;
pattern[1].mask = ð_allmulti;
}
pattern[2] = pattern[3];
- flow = mlx4_flow_create(priv->dev, &attr, pattern,
+ flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
actions, error);
if (!flow) {
err = -rte_errno;
@@ -1503,7 +1504,8 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
struct rte_flow *next = LIST_NEXT(flow, next);
if (!flow->select)
- claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
+ claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
+ error));
else
flow->select = 0;
flow = next;
@@ -1541,7 +1543,8 @@ mlx4_flow_sync(struct mlx4_priv *priv, struct rte_flow_error *error)
for (flow = LIST_FIRST(&priv->flows);
flow && flow->internal;
flow = LIST_FIRST(&priv->flows))
- claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
+ claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
+ error));
} else {
/* Refresh internal rules. */
ret = mlx4_flow_internal(priv, error);
@@ -1574,7 +1577,7 @@ mlx4_flow_clean(struct mlx4_priv *priv)
struct rte_flow *flow;
while ((flow = LIST_FIRST(&priv->flows)))
- mlx4_flow_destroy(priv->dev, flow, NULL);
+ mlx4_flow_destroy(ETH_DEV(priv), flow, NULL);
assert(LIST_EMPTY(&priv->rss));
}
diff --git a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c
index ec91242196..4f33526755 100644
--- a/drivers/net/mlx4/mlx4_intr.c
+++ b/drivers/net/mlx4/mlx4_intr.c
@@ -65,7 +65,7 @@ static int
mlx4_rx_intr_vec_enable(struct mlx4_priv *priv)
{
unsigned int i;
- unsigned int rxqs_n = priv->dev->data->nb_rx_queues;
+ unsigned int rxqs_n = ETH_DEV(priv)->data->nb_rx_queues;
unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
unsigned int count = 0;
struct rte_intr_handle *intr_handle = &priv->intr_handle;
@@ -79,7 +79,7 @@ mlx4_rx_intr_vec_enable(struct mlx4_priv *priv)
return -rte_errno;
}
for (i = 0; i != n; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
/* Skip queues that cannot request interrupts. */
if (!rxq || !rxq->channel) {
@@ -120,12 +120,12 @@ static void
mlx4_link_status_alarm(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
assert(priv->intr_alarm == 1);
priv->intr_alarm = 0;
if (intr_conf->lsc && !mlx4_link_status_check(priv))
- _rte_eth_dev_callback_process(priv->dev,
+ _rte_eth_dev_callback_process(ETH_DEV(priv),
RTE_ETH_EVENT_INTR_LSC,
NULL);
}
@@ -145,8 +145,8 @@ mlx4_link_status_alarm(struct mlx4_priv *priv)
static int
mlx4_link_status_check(struct mlx4_priv *priv)
{
- struct rte_eth_link *link = &priv->dev->data->dev_link;
- int ret = mlx4_link_update(priv->dev, 0);
+ struct rte_eth_link *link = Ð_DEV(priv)->data->dev_link;
+ int ret = mlx4_link_update(ETH_DEV(priv), 0);
if (ret)
return ret;
@@ -185,7 +185,7 @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
uint32_t caught[RTE_DIM(type)] = { 0 };
struct ibv_async_event event;
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
unsigned int i;
/* Read all message and acknowledge them. */
@@ -208,7 +208,7 @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
}
for (i = 0; i != RTE_DIM(caught); ++i)
if (caught[i])
- _rte_eth_dev_callback_process(priv->dev, type[i],
+ _rte_eth_dev_callback_process(ETH_DEV(priv), type[i],
NULL);
}
@@ -282,7 +282,7 @@ int
mlx4_intr_install(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
int rc;
mlx4_intr_uninstall(priv);
@@ -381,7 +381,7 @@ int
mlx4_rxq_intr_enable(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
if (intr_conf->rxq && mlx4_rx_intr_vec_enable(priv) < 0)
goto error;
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
index 4376ad0b60..e4be46ab2a 100644
--- a/drivers/net/mlx4/mlx4_mr.c
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -896,7 +896,7 @@ mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
rte_rwlock_read_lock(&mlx4_mem_event_rwlock);
/* Iterate all the existing mlx4 devices. */
LIST_FOREACH(priv, &mlx4_mem_event_cb_list, mem_event_cb)
- mlx4_mr_mem_event_free_cb(priv->dev, addr, len);
+ mlx4_mr_mem_event_free_cb(ETH_DEV(priv), addr, len);
rte_rwlock_read_unlock(&mlx4_mem_event_rwlock);
break;
case RTE_MEM_EVENT_ALLOC:
@@ -1028,7 +1028,7 @@ mlx4_rx_addr2mr_bh(struct rxq *rxq, uintptr_t addr)
DEBUG("Rx queue %u: miss on top-half, mru=%u, head=%u, addr=%p",
rxq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
- return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
+ return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
}
/**
@@ -1050,7 +1050,7 @@ mlx4_tx_addr2mr_bh(struct txq *txq, uintptr_t addr)
DEBUG("Tx queue %u: miss on top-half, mru=%u, head=%u, addr=%p",
txq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
- return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
+ return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
}
/**
@@ -1225,7 +1225,7 @@ mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr, struct rte_mempool *mp)
struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
struct mlx4_priv *priv = txq->priv;
- mlx4_mr_update_ext_mp(priv->dev, mr_ctrl, mp);
+ mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
return mlx4_tx_addr2mr_bh(txq, addr);
}
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 3782c6baab..50f33eb0c5 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -176,6 +176,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
struct ibv_wq *ind_tbl[rss->queues];
struct mlx4_priv *priv = rss->priv;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
const char *msg;
unsigned int i = 0;
int ret;
@@ -189,8 +190,8 @@ mlx4_rss_attach(struct mlx4_rss *rss)
uint16_t id = rss->queue_id[i];
struct rxq *rxq = NULL;
- if (id < priv->dev->data->nb_rx_queues)
- rxq = priv->dev->data->rx_queues[id];
+ if (id < dev->data->nb_rx_queues)
+ rxq = dev->data->rx_queues[id];
if (!rxq) {
ret = EINVAL;
msg = "RSS target queue is not configured";
@@ -269,7 +270,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
rss->ind = NULL;
}
while (i--)
- mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
+ mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]);
ERROR("mlx4: %s", msg);
--rss->usecnt;
rte_errno = ret;
@@ -291,6 +292,7 @@ void
mlx4_rss_detach(struct mlx4_rss *rss)
{
struct mlx4_priv *priv = rss->priv;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
unsigned int i;
assert(rss->refcnt);
@@ -303,7 +305,7 @@ mlx4_rss_detach(struct mlx4_rss *rss)
claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind));
rss->ind = NULL;
for (i = 0; i != rss->queues; ++i)
- mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
+ mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]);
}
/**
@@ -329,7 +331,7 @@ mlx4_rss_detach(struct mlx4_rss *rss)
int
mlx4_rss_init(struct mlx4_priv *priv)
{
- struct rte_eth_dev *dev = priv->dev;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues);
uint32_t wq_num_prev = 0;
const char *msg;
@@ -338,7 +340,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
if (priv->rss_init)
return 0;
- if (priv->dev->data->nb_rx_queues > priv->hw_rss_max_qps) {
+ if (ETH_DEV(priv)->data->nb_rx_queues > priv->hw_rss_max_qps) {
ERROR("RSS does not support more than %d queues",
priv->hw_rss_max_qps);
rte_errno = EINVAL;
@@ -356,8 +358,8 @@ mlx4_rss_init(struct mlx4_priv *priv)
rte_errno = ret;
return -ret;
}
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
struct ibv_cq *cq;
struct ibv_wq *wq;
uint32_t wq_num;
@@ -432,7 +434,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
ERROR("cannot initialize common RSS resources (queue %u): %s: %s",
i, msg, strerror(ret));
while (i--) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
if (rxq)
mlx4_rxq_detach(rxq);
@@ -457,8 +459,8 @@ mlx4_rss_deinit(struct mlx4_priv *priv)
if (!priv->rss_init)
return;
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
if (rxq) {
assert(rxq->usecnt == 1);
@@ -494,7 +496,7 @@ mlx4_rxq_attach(struct rxq *rxq)
}
struct mlx4_priv *priv = rxq->priv;
- struct rte_eth_dev *dev = priv->dev;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
const uint32_t elts_n = 1 << rxq->elts_n;
const uint32_t sges_n = 1 << rxq->sges_n;
struct rte_mbuf *(*elts)[elts_n] = rxq->elts;
@@ -561,7 +563,7 @@ mlx4_rxq_attach(struct rxq *rxq)
}
/* Pre-register Rx mempool. */
DEBUG("port %u Rx queue %u registering mp %s having %u chunks",
- priv->dev->data->port_id, rxq->stats.idx,
+ ETH_DEV(priv)->data->port_id, rxq->stats.idx,
rxq->mp->name, rxq->mp->nb_mem_chunks);
mlx4_mr_update_mp(dev, &rxq->mr_ctrl, rxq->mp);
wqes = (volatile struct mlx4_wqe_data_seg (*)[])
@@ -917,11 +919,11 @@ mlx4_rx_queue_release(void *dpdk_rxq)
if (rxq == NULL)
return;
priv = rxq->priv;
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i)
- if (priv->dev->data->rx_queues[i] == rxq) {
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i)
+ if (ETH_DEV(priv)->data->rx_queues[i] == rxq) {
DEBUG("%p: removing Rx queue %p from list",
- (void *)priv->dev, (void *)rxq);
- priv->dev->data->rx_queues[i] = NULL;
+ (void *)ETH_DEV(priv), (void *)rxq);
+ ETH_DEV(priv)->data->rx_queues[i] = NULL;
break;
}
assert(!rxq->cq);
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 8142775fc4..352700820d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -357,11 +357,11 @@ mlx4_tx_queue_release(void *dpdk_txq)
if (txq == NULL)
return;
priv = txq->priv;
- for (i = 0; i != priv->dev->data->nb_tx_queues; ++i)
- if (priv->dev->data->tx_queues[i] == txq) {
+ for (i = 0; i != ETH_DEV(priv)->data->nb_tx_queues; ++i)
+ if (ETH_DEV(priv)->data->tx_queues[i] == txq) {
DEBUG("%p: removing Tx queue %p from list",
- (void *)priv->dev, (void *)txq);
- priv->dev->data->tx_queues[i] = NULL;
+ (void *)ETH_DEV(priv), (void *)txq);
+ ETH_DEV(priv)->data->tx_queues[i] = NULL;
break;
}
mlx4_txq_free_elts(txq);
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v2 2/3] net/mlx4: add external allocator for Verbs object
2019-03-25 19:17 ` [dpdk-dev] [PATCH v2 0/3] " Yongseok Koh
2019-03-25 19:17 ` Yongseok Koh
2019-03-25 19:17 ` [dpdk-dev] [PATCH v2 1/3] net/mlx4: change device reference for secondary process Yongseok Koh
@ 2019-03-25 19:18 ` Yongseok Koh
2019-03-25 19:18 ` Yongseok Koh
2019-03-26 19:21 ` Shahaf Shuler
2019-03-25 19:18 ` [dpdk-dev] [PATCH v2 3/3] net/mlx4: add secondary process support Yongseok Koh
3 siblings, 2 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:18 UTC (permalink / raw)
To: shahafs; +Cc: dev
To support secondary process, the memory allocated by library such as
completion rings (CQ) and buffer rings (WQ) must be manageable by EAL, in
order to share it with secondary processes. With new changes in rdma-core
and kernel driver, it is possible to provide an external allocator to the
library layer for this purpose. All such resources will now be allocated
within DPDK framework.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
drivers/net/mlx4/Makefile | 5 ++++
drivers/net/mlx4/meson.build | 10 +++++++
drivers/net/mlx4/mlx4.c | 67 ++++++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4.h | 20 +++++++++++++
drivers/net/mlx4/mlx4_rxq.c | 4 +++
drivers/net/mlx4/mlx4_txq.c | 6 ++--
6 files changed, 110 insertions(+), 2 deletions(-)
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 1f1b927484..b527efd625 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -88,6 +88,11 @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
$Q $(RM) -f -- '$@'
$Q : > '$@'
$Q sh -- '$<' '$@' \
+ HAVE_IBV_MLX4_BUF_ALLOCATORS \
+ infiniband/mlx4dv.h \
+ enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_IBV_MLX4_WQE_LSO_SEG \
infiniband/mlx4dv.h \
type 'struct mlx4_wqe_lso_seg' \
diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
index b4f9672e73..650e2c8fbc 100644
--- a/drivers/net/mlx4/meson.build
+++ b/drivers/net/mlx4/meson.build
@@ -70,7 +70,17 @@ if build
[ 'HAVE_IBV_MLX4_WQE_LSO_SEG', 'infiniband/mlx4dv.h',
'struct mlx4_wqe_lso_seg', 'mss_hdr_size' ],
]
+ # input array for meson symbol search:
+ # [ "MACRO to define if found", "header for the search",
+ # "symbol to search" ]
+ has_sym_args = [
+ [ 'HAVE_IBV_MLX4_BUF_ALLOCATORS', 'infiniband/mlx4dv.h',
+ 'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
+ ]
config = configuration_data()
+ foreach arg:has_sym_args
+ config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))
+ endforeach
foreach arg:has_member_args
file_prefix = '#include<' + arg[1] + '>'
config.set(arg[0], cc.has_member(arg[2], arg[3],
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index bb6ab8ec6e..0e0b035df0 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -69,6 +69,62 @@ const char *pmd_mlx4_init_params[] = {
static void mlx4_dev_stop(struct rte_eth_dev *dev);
+#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
+/**
+ * Verbs callback to allocate a memory. This function should allocate the space
+ * according to the size provided residing inside a huge page.
+ * Please note that all allocation must respect the alignment from libmlx4
+ * (i.e. currently sysconf(_SC_PAGESIZE)).
+ *
+ * @param[in] size
+ * The size in bytes of the memory to allocate.
+ * @param[in] data
+ * A pointer to the callback data.
+ *
+ * @return
+ * Allocated buffer, NULL otherwise and rte_errno is set.
+ */
+static void *
+mlx4_alloc_verbs_buf(size_t size, void *data)
+{
+ struct mlx4_priv *priv = data;
+ void *ret;
+ size_t alignment = sysconf(_SC_PAGESIZE);
+ unsigned int socket = SOCKET_ID_ANY;
+
+ if (priv->verbs_alloc_ctx.type == MLX4_VERBS_ALLOC_TYPE_TX_QUEUE) {
+ const struct txq *txq = priv->verbs_alloc_ctx.obj;
+
+ socket = txq->socket;
+ } else if (priv->verbs_alloc_ctx.type ==
+ MLX4_VERBS_ALLOC_TYPE_RX_QUEUE) {
+ const struct rxq *rxq = priv->verbs_alloc_ctx.obj;
+
+ socket = rxq->socket;
+ }
+ assert(data != NULL);
+ ret = rte_malloc_socket(__func__, size, alignment, socket);
+ if (!ret && size)
+ rte_errno = ENOMEM;
+ return ret;
+}
+
+/**
+ * Verbs callback to free a memory.
+ *
+ * @param[in] ptr
+ * A pointer to the memory to free.
+ * @param[in] data
+ * A pointer to the callback data.
+ */
+static void
+mlx4_free_verbs_buf(void *ptr, void *data __rte_unused)
+{
+ assert(data != NULL);
+ rte_free(ptr);
+}
+#endif
+
/**
* DPDK callback for Ethernet device configuration.
*
@@ -755,6 +811,17 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
eth_dev->intr_handle = &priv->intr_handle;
priv->dev_data = eth_dev->data;
eth_dev->dev_ops = &mlx4_dev_ops;
+#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
+ /* Hint libmlx4 to use PMD allocator for data plane resources */
+ struct mlx4dv_ctx_allocators alctr = {
+ .alloc = &mlx4_alloc_verbs_buf,
+ .free = &mlx4_free_verbs_buf,
+ .data = priv,
+ };
+ mlx4_glue->dv_set_context_attr
+ (ctx, MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS,
+ (void *)((uintptr_t)&alctr));
+#endif
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
mlx4_dev_set_link_up(eth_dev);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 51566caf7f..d43e05ea74 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -72,6 +72,24 @@ struct rxq;
struct txq;
struct rte_flow;
+/**
+ * Type of objet being allocated.
+ */
+enum mlx4_verbs_alloc_type {
+ MLX4_VERBS_ALLOC_TYPE_NONE,
+ MLX4_VERBS_ALLOC_TYPE_TX_QUEUE,
+ MLX4_VERBS_ALLOC_TYPE_RX_QUEUE,
+};
+
+/**
+ * Verbs allocator needs a context to know in the callback which kind of
+ * resources it is allocating.
+ */
+struct mlx4_verbs_alloc_ctx {
+ enum mlx4_verbs_alloc_type type; /* Kind of object being allocated. */
+ const void *obj; /* Pointer to the DPDK object. */
+};
+
LIST_HEAD(mlx4_dev_list, mlx4_priv);
LIST_HEAD(mlx4_mr_list, mlx4_mr);
@@ -111,6 +129,8 @@ struct mlx4_priv {
LIST_HEAD(, rte_flow) flows; /**< Configured flow rule handles. */
struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
/**< Configured MAC addresses. Unused entries are zeroed. */
+ struct mlx4_verbs_alloc_ctx verbs_alloc_ctx;
+ /**< Context for Verbs allocator. */
};
#define PORT_ID(priv) ((priv)->dev_data->port_id)
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 50f33eb0c5..f45c1ff85c 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -513,6 +513,8 @@ mlx4_rxq_attach(struct rxq *rxq)
int ret;
assert(rte_is_power_of_2(elts_n));
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_RX_QUEUE;
+ priv->verbs_alloc_ctx.obj = rxq;
cq = mlx4_glue->create_cq(priv->ctx, elts_n / sges_n, NULL,
rxq->channel, 0);
if (!cq) {
@@ -620,6 +622,7 @@ mlx4_rxq_attach(struct rxq *rxq)
rxq->rq_ci = elts_n / sges_n;
rte_wmb();
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return 0;
error:
if (wq)
@@ -630,6 +633,7 @@ mlx4_rxq_attach(struct rxq *rxq)
rte_errno = ret;
ERROR("error while attaching Rx queue %p: %s: %s",
(void *)rxq, msg, strerror(ret));
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return -ret;
}
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 352700820d..2dc198e77f 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -177,10 +177,8 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
uint64_t offloads;
offloads = conf->offloads | dev->data->dev_conf.txmode.offloads;
-
DEBUG("%p: configuring queue %u for %u descriptors",
(void *)dev, idx, desc);
-
if (idx >= dev->data->nb_tx_queues) {
rte_errno = EOVERFLOW;
ERROR("%p: queue index out of range (%u >= %u)",
@@ -241,6 +239,8 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
.lb = !!priv->vf,
.bounce_buf = bounce_buf,
};
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_TX_QUEUE;
+ priv->verbs_alloc_ctx.obj = txq;
txq->cq = mlx4_glue->create_cq(priv->ctx, desc, NULL, NULL, 0);
if (!txq->cq) {
rte_errno = ENOMEM;
@@ -331,6 +331,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
txq->mr_ctrl.dev_gen_ptr = &priv->mr.dev_gen;
DEBUG("%p: adding Tx queue %p to list", (void *)dev, (void *)txq);
dev->data->tx_queues[idx] = txq;
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return 0;
error:
dev->data->tx_queues[idx] = NULL;
@@ -338,6 +339,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
mlx4_tx_queue_release(txq);
rte_errno = ret;
assert(rte_errno > 0);
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return -rte_errno;
}
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v2 2/3] net/mlx4: add external allocator for Verbs object
2019-03-25 19:18 ` [dpdk-dev] [PATCH v2 2/3] net/mlx4: add external allocator for Verbs object Yongseok Koh
@ 2019-03-25 19:18 ` Yongseok Koh
2019-03-26 19:21 ` Shahaf Shuler
1 sibling, 0 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:18 UTC (permalink / raw)
To: shahafs; +Cc: dev
To support secondary process, the memory allocated by library such as
completion rings (CQ) and buffer rings (WQ) must be manageable by EAL, in
order to share it with secondary processes. With new changes in rdma-core
and kernel driver, it is possible to provide an external allocator to the
library layer for this purpose. All such resources will now be allocated
within DPDK framework.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
drivers/net/mlx4/Makefile | 5 ++++
drivers/net/mlx4/meson.build | 10 +++++++
drivers/net/mlx4/mlx4.c | 67 ++++++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4.h | 20 +++++++++++++
drivers/net/mlx4/mlx4_rxq.c | 4 +++
drivers/net/mlx4/mlx4_txq.c | 6 ++--
6 files changed, 110 insertions(+), 2 deletions(-)
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 1f1b927484..b527efd625 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -88,6 +88,11 @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
$Q $(RM) -f -- '$@'
$Q : > '$@'
$Q sh -- '$<' '$@' \
+ HAVE_IBV_MLX4_BUF_ALLOCATORS \
+ infiniband/mlx4dv.h \
+ enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_IBV_MLX4_WQE_LSO_SEG \
infiniband/mlx4dv.h \
type 'struct mlx4_wqe_lso_seg' \
diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
index b4f9672e73..650e2c8fbc 100644
--- a/drivers/net/mlx4/meson.build
+++ b/drivers/net/mlx4/meson.build
@@ -70,7 +70,17 @@ if build
[ 'HAVE_IBV_MLX4_WQE_LSO_SEG', 'infiniband/mlx4dv.h',
'struct mlx4_wqe_lso_seg', 'mss_hdr_size' ],
]
+ # input array for meson symbol search:
+ # [ "MACRO to define if found", "header for the search",
+ # "symbol to search" ]
+ has_sym_args = [
+ [ 'HAVE_IBV_MLX4_BUF_ALLOCATORS', 'infiniband/mlx4dv.h',
+ 'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
+ ]
config = configuration_data()
+ foreach arg:has_sym_args
+ config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))
+ endforeach
foreach arg:has_member_args
file_prefix = '#include<' + arg[1] + '>'
config.set(arg[0], cc.has_member(arg[2], arg[3],
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index bb6ab8ec6e..0e0b035df0 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -69,6 +69,62 @@ const char *pmd_mlx4_init_params[] = {
static void mlx4_dev_stop(struct rte_eth_dev *dev);
+#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
+/**
+ * Verbs callback to allocate a memory. This function should allocate the space
+ * according to the size provided residing inside a huge page.
+ * Please note that all allocation must respect the alignment from libmlx4
+ * (i.e. currently sysconf(_SC_PAGESIZE)).
+ *
+ * @param[in] size
+ * The size in bytes of the memory to allocate.
+ * @param[in] data
+ * A pointer to the callback data.
+ *
+ * @return
+ * Allocated buffer, NULL otherwise and rte_errno is set.
+ */
+static void *
+mlx4_alloc_verbs_buf(size_t size, void *data)
+{
+ struct mlx4_priv *priv = data;
+ void *ret;
+ size_t alignment = sysconf(_SC_PAGESIZE);
+ unsigned int socket = SOCKET_ID_ANY;
+
+ if (priv->verbs_alloc_ctx.type == MLX4_VERBS_ALLOC_TYPE_TX_QUEUE) {
+ const struct txq *txq = priv->verbs_alloc_ctx.obj;
+
+ socket = txq->socket;
+ } else if (priv->verbs_alloc_ctx.type ==
+ MLX4_VERBS_ALLOC_TYPE_RX_QUEUE) {
+ const struct rxq *rxq = priv->verbs_alloc_ctx.obj;
+
+ socket = rxq->socket;
+ }
+ assert(data != NULL);
+ ret = rte_malloc_socket(__func__, size, alignment, socket);
+ if (!ret && size)
+ rte_errno = ENOMEM;
+ return ret;
+}
+
+/**
+ * Verbs callback to free a memory.
+ *
+ * @param[in] ptr
+ * A pointer to the memory to free.
+ * @param[in] data
+ * A pointer to the callback data.
+ */
+static void
+mlx4_free_verbs_buf(void *ptr, void *data __rte_unused)
+{
+ assert(data != NULL);
+ rte_free(ptr);
+}
+#endif
+
/**
* DPDK callback for Ethernet device configuration.
*
@@ -755,6 +811,17 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
eth_dev->intr_handle = &priv->intr_handle;
priv->dev_data = eth_dev->data;
eth_dev->dev_ops = &mlx4_dev_ops;
+#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
+ /* Hint libmlx4 to use PMD allocator for data plane resources */
+ struct mlx4dv_ctx_allocators alctr = {
+ .alloc = &mlx4_alloc_verbs_buf,
+ .free = &mlx4_free_verbs_buf,
+ .data = priv,
+ };
+ mlx4_glue->dv_set_context_attr
+ (ctx, MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS,
+ (void *)((uintptr_t)&alctr));
+#endif
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
mlx4_dev_set_link_up(eth_dev);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 51566caf7f..d43e05ea74 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -72,6 +72,24 @@ struct rxq;
struct txq;
struct rte_flow;
+/**
+ * Type of objet being allocated.
+ */
+enum mlx4_verbs_alloc_type {
+ MLX4_VERBS_ALLOC_TYPE_NONE,
+ MLX4_VERBS_ALLOC_TYPE_TX_QUEUE,
+ MLX4_VERBS_ALLOC_TYPE_RX_QUEUE,
+};
+
+/**
+ * Verbs allocator needs a context to know in the callback which kind of
+ * resources it is allocating.
+ */
+struct mlx4_verbs_alloc_ctx {
+ enum mlx4_verbs_alloc_type type; /* Kind of object being allocated. */
+ const void *obj; /* Pointer to the DPDK object. */
+};
+
LIST_HEAD(mlx4_dev_list, mlx4_priv);
LIST_HEAD(mlx4_mr_list, mlx4_mr);
@@ -111,6 +129,8 @@ struct mlx4_priv {
LIST_HEAD(, rte_flow) flows; /**< Configured flow rule handles. */
struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
/**< Configured MAC addresses. Unused entries are zeroed. */
+ struct mlx4_verbs_alloc_ctx verbs_alloc_ctx;
+ /**< Context for Verbs allocator. */
};
#define PORT_ID(priv) ((priv)->dev_data->port_id)
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 50f33eb0c5..f45c1ff85c 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -513,6 +513,8 @@ mlx4_rxq_attach(struct rxq *rxq)
int ret;
assert(rte_is_power_of_2(elts_n));
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_RX_QUEUE;
+ priv->verbs_alloc_ctx.obj = rxq;
cq = mlx4_glue->create_cq(priv->ctx, elts_n / sges_n, NULL,
rxq->channel, 0);
if (!cq) {
@@ -620,6 +622,7 @@ mlx4_rxq_attach(struct rxq *rxq)
rxq->rq_ci = elts_n / sges_n;
rte_wmb();
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return 0;
error:
if (wq)
@@ -630,6 +633,7 @@ mlx4_rxq_attach(struct rxq *rxq)
rte_errno = ret;
ERROR("error while attaching Rx queue %p: %s: %s",
(void *)rxq, msg, strerror(ret));
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return -ret;
}
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 352700820d..2dc198e77f 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -177,10 +177,8 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
uint64_t offloads;
offloads = conf->offloads | dev->data->dev_conf.txmode.offloads;
-
DEBUG("%p: configuring queue %u for %u descriptors",
(void *)dev, idx, desc);
-
if (idx >= dev->data->nb_tx_queues) {
rte_errno = EOVERFLOW;
ERROR("%p: queue index out of range (%u >= %u)",
@@ -241,6 +239,8 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
.lb = !!priv->vf,
.bounce_buf = bounce_buf,
};
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_TX_QUEUE;
+ priv->verbs_alloc_ctx.obj = txq;
txq->cq = mlx4_glue->create_cq(priv->ctx, desc, NULL, NULL, 0);
if (!txq->cq) {
rte_errno = ENOMEM;
@@ -331,6 +331,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
txq->mr_ctrl.dev_gen_ptr = &priv->mr.dev_gen;
DEBUG("%p: adding Tx queue %p to list", (void *)dev, (void *)txq);
dev->data->tx_queues[idx] = txq;
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return 0;
error:
dev->data->tx_queues[idx] = NULL;
@@ -338,6 +339,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
mlx4_tx_queue_release(txq);
rte_errno = ret;
assert(rte_errno > 0);
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return -rte_errno;
}
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v2 3/3] net/mlx4: add secondary process support
2019-03-25 19:17 ` [dpdk-dev] [PATCH v2 0/3] " Yongseok Koh
` (2 preceding siblings ...)
2019-03-25 19:18 ` [dpdk-dev] [PATCH v2 2/3] net/mlx4: add external allocator for Verbs object Yongseok Koh
@ 2019-03-25 19:18 ` Yongseok Koh
2019-03-25 19:18 ` Yongseok Koh
2019-03-26 19:33 ` Shahaf Shuler
3 siblings, 2 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:18 UTC (permalink / raw)
To: shahafs; +Cc: dev
In order to support secondary process, a few features are required.
a) rdma-core library should allocate device resources using DPDK's memory
allocator.
b) UAR should be remapped for secondary processes. Currently, in order not
to use different data structure for secondary processes, PMD tries to
reserve identical virtual address space for both primary and secondary
processes.
c) IPC channel is necessary, which can be easily set with rte_mp APIs.
Through the channel, Verbs command FD is delivered to the secondary
process and the device stop/start event is also broadcast from primary
process.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
doc/guides/nics/features/mlx4.ini | 1 +
doc/guides/nics/mlx4.rst | 10 +
drivers/net/mlx4/Makefile | 6 +
drivers/net/mlx4/meson.build | 3 +
drivers/net/mlx4/mlx4.c | 378 ++++++++++++++++++++++++++++++++++++--
drivers/net/mlx4/mlx4.h | 60 ++++++
drivers/net/mlx4/mlx4_mp.c | 304 ++++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4_mr.c | 32 +++-
drivers/net/mlx4/mlx4_prm.h | 4 +-
drivers/net/mlx4/mlx4_rxtx.c | 2 +
drivers/net/mlx4/mlx4_rxtx.h | 1 +
drivers/net/mlx4/mlx4_txq.c | 111 +++++++++++
12 files changed, 890 insertions(+), 22 deletions(-)
create mode 100644 drivers/net/mlx4/mlx4_mp.c
diff --git a/doc/guides/nics/features/mlx4.ini b/doc/guides/nics/features/mlx4.ini
index a211aef332..4502aa2a87 100644
--- a/doc/guides/nics/features/mlx4.ini
+++ b/doc/guides/nics/features/mlx4.ini
@@ -29,6 +29,7 @@ Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
FW version = Y
+Multiprocess aware = Y
Other kdrv = Y
Power8 = Y
x86-32 = Y
diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst
index 4ad361a2c2..cd34838f41 100644
--- a/doc/guides/nics/mlx4.rst
+++ b/doc/guides/nics/mlx4.rst
@@ -145,6 +145,16 @@ below.
Limitations
-----------
+- For secondary process:
+
+ - Forked secondary process not supported.
+ - All mempools must be initialized before rte_eth_dev_start().
+ - External memory unregistered in EAL memseg list cannot be used for DMA
+ unless such memory has been registered by ``mlx4_mr_update_ext_mp()`` in
+ primary process and remapped to the same virtual address in secondary
+ process. If the external memory is registered by primary process but has
+ different virtual address in secondary process, unexpected error may happen.
+
- CRC stripping is supported by default and always reported as "true".
The ability to enable/disable CRC stripping requires OFED version
4.3-1.5.0.0 and above or rdma-core version v18 and above.
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index b527efd625..8126b0dfc6 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -18,6 +18,7 @@ ifneq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_glue.c
endif
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_intr.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mp.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mr.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxq.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxtx.c
@@ -93,6 +94,11 @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
+ HAVE_IBV_MLX4_UAR_MMAP_OFFSET \
+ infiniband/mlx4dv.h \
+ enum MLX4DV_QP_MASK_UAR_MMAP_OFFSET \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_IBV_MLX4_WQE_LSO_SEG \
infiniband/mlx4dv.h \
type 'struct mlx4_wqe_lso_seg' \
diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
index 650e2c8fbc..de020701d1 100644
--- a/drivers/net/mlx4/meson.build
+++ b/drivers/net/mlx4/meson.build
@@ -33,6 +33,7 @@ if build
'mlx4_ethdev.c',
'mlx4_flow.c',
'mlx4_intr.c',
+ 'mlx4_mp.c',
'mlx4_mr.c',
'mlx4_rxq.c',
'mlx4_rxtx.c',
@@ -76,6 +77,8 @@ if build
has_sym_args = [
[ 'HAVE_IBV_MLX4_BUF_ALLOCATORS', 'infiniband/mlx4dv.h',
'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
+ [ 'HAVE_IBV_MLX4_UAR_MMAP_OFFSET', 'infiniband/mlx4dv.h',
+ 'MLX4DV_QP_MASK_UAR_MMAP_OFFSET' ],
]
config = configuration_data()
foreach arg:has_sym_args
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 0e0b035df0..a5cfcdbee3 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -17,6 +17,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/mman.h>
#include <unistd.h>
/* Verbs headers do not support -pedantic. */
@@ -48,10 +49,21 @@
#include "mlx4_rxtx.h"
#include "mlx4_utils.h"
-struct mlx4_dev_list mlx4_mem_event_cb_list =
- LIST_HEAD_INITIALIZER(mlx4_mem_event_cb_list);
+#if defined(HAVE_IBV_MLX4_UAR_MMAP_OFFSET) && \
+ defined(HAVE_IBV_MLX4_BUF_ALLOCATORS)
+#define HAVE_IBV_MLX4_SECONDARY_PROCESS
+#endif
+
+static const char *MZ_MLX4_PMD_SHARED_DATA = "mlx4_pmd_shared_data";
+
+/* Shared memory between primary and secondary processes. */
+struct mlx4_shared_data *mlx4_shared_data;
-rte_rwlock_t mlx4_mem_event_rwlock = RTE_RWLOCK_INITIALIZER;
+/* Spinlock for mlx4_shared_data allocation. */
+static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* Process local data for secondary processes. */
+static struct mlx4_local_data mlx4_local_data;
/** Configuration structure for device arguments. */
struct mlx4_conf {
@@ -69,6 +81,77 @@ const char *pmd_mlx4_init_params[] = {
static void mlx4_dev_stop(struct rte_eth_dev *dev);
+/**
+ * Initialize shared data between primary and secondary process.
+ *
+ * A memzone is reserved by primary process and secondary processes attach to
+ * the memzone.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_init_shared_data(void)
+{
+ const struct rte_memzone *mz;
+ int ret = 0;
+
+ rte_spinlock_lock(&mlx4_shared_data_lock);
+ if (mlx4_shared_data == NULL) {
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* Allocate shared memory. */
+ mz = rte_memzone_reserve(MZ_MLX4_PMD_SHARED_DATA,
+ sizeof(*mlx4_shared_data),
+ SOCKET_ID_ANY, 0);
+ if (mz == NULL) {
+ ERROR("Cannot allocate mlx4 shared data\n");
+ ret = -rte_errno;
+ goto error;
+ }
+ mlx4_shared_data = mz->addr;
+ memset(mlx4_shared_data, 0, sizeof(*mlx4_shared_data));
+ rte_spinlock_init(&mlx4_shared_data->lock);
+ } else {
+ /* Lookup allocated shared memory. */
+ mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
+ if (mz == NULL) {
+ ERROR("Cannot attach mlx4 shared data\n");
+ ret = -rte_errno;
+ goto error;
+ }
+ mlx4_shared_data = mz->addr;
+ memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
+ }
+ }
+error:
+ rte_spinlock_unlock(&mlx4_shared_data_lock);
+ return ret;
+}
+
+/**
+ * Uninitialize shared data between primary and secondary process.
+ *
+ * The pointer of secondary process is dereferenced and primary process frees
+ * the memzone.
+ */
+static void
+mlx4_uninit_shared_data(void)
+{
+ const struct rte_memzone *mz;
+
+ rte_spinlock_lock(&mlx4_shared_data_lock);
+ if (mlx4_shared_data) {
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
+ rte_memzone_free(mz);
+ } else {
+ memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
+ }
+ mlx4_shared_data = NULL;
+ }
+ rte_spinlock_unlock(&mlx4_shared_data_lock);
+}
+
#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
/**
* Verbs callback to allocate a memory. This function should allocate the space
@@ -181,6 +264,11 @@ mlx4_dev_start(struct rte_eth_dev *dev)
return 0;
DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
priv->started = 1;
+ ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
+ if (ret) {
+ ERROR("%p: cannot remap UAR", (void *)dev);
+ goto err;
+ }
ret = mlx4_rss_init(priv);
if (ret) {
ERROR("%p: cannot initialize RSS resources: %s",
@@ -208,6 +296,8 @@ mlx4_dev_start(struct rte_eth_dev *dev)
rte_wmb();
dev->tx_pkt_burst = mlx4_tx_burst;
dev->rx_pkt_burst = mlx4_rx_burst;
+ /* Enable datapath on secondary process. */
+ mlx4_mp_req_start_rxtx(dev);
return 0;
err:
mlx4_dev_stop(dev);
@@ -226,6 +316,8 @@ static void
mlx4_dev_stop(struct rte_eth_dev *dev)
{
struct mlx4_priv *priv = dev->data->dev_private;
+ const size_t page_size = sysconf(_SC_PAGESIZE);
+ int i;
if (!priv->started)
return;
@@ -234,9 +326,20 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
dev->tx_pkt_burst = mlx4_tx_burst_removed;
dev->rx_pkt_burst = mlx4_rx_burst_removed;
rte_wmb();
+ /* Disable datapath on secondary process. */
+ mlx4_mp_req_stop_rxtx(dev);
mlx4_flow_sync(priv, NULL);
mlx4_rxq_intr_disable(priv);
mlx4_rss_deinit(priv);
+ for (i = 0; i != dev->data->nb_tx_queues; ++i) {
+ struct txq *txq;
+
+ txq = dev->data->tx_queues[i];
+ if (!txq)
+ continue;
+ munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
+ page_size), page_size);
+ }
}
/**
@@ -259,6 +362,8 @@ mlx4_dev_close(struct rte_eth_dev *dev)
dev->rx_pkt_burst = mlx4_rx_burst_removed;
dev->tx_pkt_burst = mlx4_tx_burst_removed;
rte_wmb();
+ /* Disable datapath on secondary process. */
+ mlx4_mp_req_stop_rxtx(dev);
mlx4_flow_clean(priv);
mlx4_rss_deinit(priv);
for (i = 0; i != dev->data->nb_rx_queues; ++i)
@@ -310,6 +415,16 @@ static const struct eth_dev_ops mlx4_dev_ops = {
.is_removed = mlx4_is_removed,
};
+#ifdef HAVE_IBV_MLX4_SECONDARY_PROCESS
+/* Available operations from secondary process. */
+static const struct eth_dev_ops mlx4_dev_sec_ops = {
+ .stats_get = mlx4_stats_get,
+ .stats_reset = mlx4_stats_reset,
+ .fw_version_get = mlx4_fw_version_get,
+ .dev_infos_get = mlx4_dev_infos_get,
+};
+#endif
+
/**
* Get PCI information from struct ibv_device.
*
@@ -549,6 +664,200 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
static struct rte_pci_driver mlx4_driver;
+static int
+find_lower_va_bound(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg)
+{
+ void **addr = arg;
+
+ if (msl->external)
+ return 0;
+ if (*addr == NULL)
+ *addr = ms->addr;
+ else
+ *addr = RTE_MIN(*addr, ms->addr);
+
+ return 0;
+}
+
+/**
+ * Reserve UAR address space for primary process.
+ *
+ * Process local resource is used by both primary and secondary to avoid
+ * duplicate reservation. The space has to be available on both primary and
+ * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
+ * check.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_uar_init_primary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+ void *addr = (void *)0;
+
+ if (sd->uar_base)
+ return 0;
+ /* find out lower bound of hugepage segments */
+ rte_memseg_walk(find_lower_va_bound, &addr);
+ /* keep distance to hugepages to minimize potential conflicts. */
+ addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
+ /* anonymous mmap, no real memory consumption. */
+ addr = mmap(addr, MLX4_UAR_SIZE,
+ PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED) {
+ ERROR("failed to reserve UAR address space, please"
+ " adjust MLX4_UAR_SIZE or try --base-virtaddr");
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ /* Accept either same addr or a new addr returned from mmap if target
+ * range occupied.
+ */
+ INFO("reserved UAR address space: %p", addr);
+ sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
+ return 0;
+}
+
+/**
+ * Unmap UAR address space reserved for primary process.
+ */
+static void
+mlx4_uar_uninit_primary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+
+ if (!sd->uar_base)
+ return;
+ munmap(sd->uar_base, MLX4_UAR_SIZE);
+ sd->uar_base = NULL;
+}
+
+/**
+ * Reserve UAR address space for secondary process, align with primary process.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_uar_init_secondary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+ struct mlx4_local_data *ld = &mlx4_local_data;
+ void *addr;
+
+ if (ld->uar_base) { /* Already reserved. */
+ assert(sd->uar_base == ld->uar_base);
+ return 0;
+ }
+ assert(sd->uar_base);
+ /* anonymous mmap, no real memory consumption. */
+ addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
+ PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED) {
+ ERROR("UAR mmap failed: %p size: %llu",
+ sd->uar_base, MLX4_UAR_SIZE);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ if (sd->uar_base != addr) {
+ ERROR("UAR address %p size %llu occupied, please"
+ " adjust MLX4_UAR_OFFSET or try EAL parameter"
+ " --base-virtaddr",
+ sd->uar_base, MLX4_UAR_SIZE);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ ld->uar_base = addr;
+ INFO("reserved UAR address space: %p", addr);
+ return 0;
+}
+
+/**
+ * Unmap UAR address space reserved for secondary process.
+ */
+static void
+mlx4_uar_uninit_secondary(void)
+{
+ struct mlx4_local_data *ld = &mlx4_local_data;
+
+ if (!ld->uar_base)
+ return;
+ munmap(ld->uar_base, MLX4_UAR_SIZE);
+ ld->uar_base = NULL;
+}
+
+/**
+ * PMD global initialization.
+ *
+ * Independent from individual device, this function initializes global
+ * per-PMD data structures distinguishing primary and secondary processes.
+ * Hence, each initialization is called once per a process.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_init_once(void)
+{
+ struct mlx4_shared_data *sd;
+ struct mlx4_local_data *ld = &mlx4_local_data;
+ int ret;
+
+ if (mlx4_init_shared_data())
+ return -rte_errno;
+ sd = mlx4_shared_data;
+ assert(sd);
+ rte_spinlock_lock(&sd->lock);
+ switch (rte_eal_process_type()) {
+ case RTE_PROC_PRIMARY:
+ if (sd->init_done)
+ break;
+ LIST_INIT(&sd->mem_event_cb_list);
+ rte_rwlock_init(&sd->mem_event_rwlock);
+ rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
+ mlx4_mr_mem_event_cb, NULL);
+ mlx4_mp_init_primary();
+ ret = mlx4_uar_init_primary();
+ if (ret)
+ goto error;
+ sd->init_done = true;
+ break;
+ case RTE_PROC_SECONDARY:
+ if (ld->init_done)
+ break;
+ mlx4_mp_init_secondary();
+ ret = mlx4_uar_init_secondary();
+ if (ret)
+ goto error;
+ ++sd->secondary_cnt;
+ ld->init_done = true;
+ break;
+ default:
+ break;
+ }
+ rte_spinlock_unlock(&sd->lock);
+ return 0;
+error:
+ switch (rte_eal_process_type()) {
+ case RTE_PROC_PRIMARY:
+ mlx4_uar_uninit_primary();
+ mlx4_mp_uninit_primary();
+ rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
+ break;
+ case RTE_PROC_SECONDARY:
+ mlx4_uar_uninit_secondary();
+ mlx4_mp_uninit_secondary();
+ break;
+ default:
+ break;
+ }
+ rte_spinlock_unlock(&sd->lock);
+ mlx4_uninit_shared_data();
+ return -rte_errno;
+}
+
/**
* DPDK callback to register a PCI device.
*
@@ -579,6 +888,12 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
int i;
(void)pci_drv;
+ err = mlx4_init_once();
+ if (err) {
+ ERROR("unable to init PMD global data: %s",
+ strerror(rte_errno));
+ return -rte_errno;
+ }
assert(pci_drv == &mlx4_driver);
list = mlx4_glue->get_device_list(&i);
if (list == NULL) {
@@ -659,6 +974,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
struct mlx4_priv *priv = NULL;
struct rte_eth_dev *eth_dev = NULL;
struct ether_addr mac;
+ char name[RTE_ETH_NAME_MAX_LEN];
/* If port is not enabled, skip. */
if (!(conf.ports.enabled & (1 << i)))
@@ -669,6 +985,44 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
err = ENODEV;
goto port_error;
}
+ snprintf(name, sizeof(name), "%s port %u",
+ mlx4_glue->get_device_name(ibv_dev), port);
+#ifdef HAVE_IBV_MLX4_SECONDARY_PROCESS
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (eth_dev == NULL) {
+ ERROR("can not attach rte ethdev");
+ rte_errno = ENOMEM;
+ err = rte_errno;
+ goto error;
+ }
+ eth_dev->device = &pci_dev->device;
+ eth_dev->dev_ops = &mlx4_dev_sec_ops;
+ /* Receive command fd from primary process */
+ err = mlx4_mp_req_verbs_cmd_fd(eth_dev);
+ if (err < 0) {
+ err = rte_errno;
+ goto error;
+ }
+ /* Remap UAR for Tx queues. */
+ err = mlx4_tx_uar_remap(eth_dev, err);
+ if (err) {
+ err = rte_errno;
+ goto error;
+ }
+ /*
+ * Ethdev pointer is still required as input since
+ * the primary device is not accessible from the
+ * secondary process.
+ */
+ eth_dev->tx_pkt_burst = mlx4_tx_burst;
+ eth_dev->rx_pkt_burst = mlx4_rx_burst;
+ claim_zero(mlx4_glue->close_device(ctx));
+ rte_eth_copy_pci_info(eth_dev, pci_dev);
+ rte_eth_dev_probing_finish(eth_dev);
+ continue;
+ }
+#endif
/* Check port status. */
err = mlx4_glue->query_port(ctx, port, &port_attr);
if (err) {
@@ -774,14 +1128,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
/* Get actual MTU if possible. */
mlx4_mtu_get(priv, &priv->mtu);
DEBUG("port %u MTU is %u", priv->port, priv->mtu);
- /* from rte_ethdev.c */
- {
- char name[RTE_ETH_NAME_MAX_LEN];
-
- snprintf(name, sizeof(name), "%s port %u",
- mlx4_glue->get_device_name(ibv_dev), port);
- eth_dev = rte_eth_dev_allocate(name);
- }
+ eth_dev = rte_eth_dev_allocate(name);
if (eth_dev == NULL) {
err = ENOMEM;
ERROR("can not allocate rte ethdev");
@@ -842,9 +1189,10 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
goto port_error;
}
/* Add device to memory callback list. */
- rte_rwlock_write_lock(&mlx4_mem_event_rwlock);
- LIST_INSERT_HEAD(&mlx4_mem_event_cb_list, priv, mem_event_cb);
- rte_rwlock_write_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock);
+ LIST_INSERT_HEAD(&mlx4_shared_data->mem_event_cb_list,
+ priv, mem_event_cb);
+ rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
rte_eth_dev_probing_finish(eth_dev);
continue;
port_error:
@@ -1075,8 +1423,6 @@ RTE_INIT(rte_mlx4_pmd_init)
}
mlx4_glue->fork_init();
rte_pci_register(&mlx4_driver);
- rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
- mlx4_mr_mem_event_cb, NULL);
}
RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index d43e05ea74..832edc962d 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -53,6 +53,16 @@
/** Port parameter. */
#define MLX4_PMD_PORT_KVARG "port"
+/* Reserved address space for UAR mapping. */
+#define MLX4_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
+
+/* Offset of reserved UAR address space to hugepage memory. Offset is used here
+ * to minimize possibility of address next to hugepage being used by other code
+ * in either primary or secondary process, failing to map TX UAR would make TX
+ * packets invisible to HW.
+ */
+#define MLX4_UAR_OFFSET (2ULL << (sizeof(uintptr_t) * 4))
+
enum {
PCI_VENDOR_ID_MELLANOX = 0x15b3,
};
@@ -63,6 +73,26 @@ enum {
PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO = 0x1007,
};
+/* Request types for IPC. */
+enum mlx4_mp_req_type {
+ MLX4_MP_REQ_VERBS_CMD_FD = 1,
+ MLX4_MP_REQ_START_RXTX,
+ MLX4_MP_REQ_STOP_RXTX,
+};
+
+/* Pameters for IPC. */
+struct mlx4_mp_param {
+ enum mlx4_mp_req_type type;
+ int port_id;
+ int result;
+};
+
+/** Request timeout for IPC. */
+#define MLX4_MP_REQ_TIMEOUT_SEC 5
+
+/** Key string for IPC. */
+#define MLX4_MP_NAME "net_mlx4_mp"
+
/** Driver name reported to lower layers and used in log output. */
#define MLX4_DRIVER_NAME "net_mlx4"
@@ -93,6 +123,27 @@ struct mlx4_verbs_alloc_ctx {
LIST_HEAD(mlx4_dev_list, mlx4_priv);
LIST_HEAD(mlx4_mr_list, mlx4_mr);
+/* Shared data between primary and secondary processes. */
+struct mlx4_shared_data {
+ rte_spinlock_t lock;
+ /* Global spinlock for primary and secondary processes. */
+ int init_done; /* Whether primary has done initialization. */
+ unsigned int secondary_cnt; /* Number of secondary processes init'd. */
+ void *uar_base;
+ /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+ struct mlx4_dev_list mem_event_cb_list;
+ rte_rwlock_t mem_event_rwlock;
+};
+
+/* Per-process data structure, not visible to other processes. */
+struct mlx4_local_data {
+ int init_done; /* Whether a secondary has done initialization. */
+ void *uar_base;
+ /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+};
+
+extern struct mlx4_shared_data *mlx4_shared_data;
+
/** Private data structure. */
struct mlx4_priv {
LIST_ENTRY(mlx4_priv) mem_event_cb;
@@ -175,4 +226,13 @@ void mlx4_rxq_intr_disable(struct mlx4_priv *priv);
int mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx);
int mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx);
+/* mlx4_mp.c */
+void mlx4_mp_req_start_rxtx(struct rte_eth_dev *dev);
+void mlx4_mp_req_stop_rxtx(struct rte_eth_dev *dev);
+int mlx4_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev);
+void mlx4_mp_init_primary(void);
+void mlx4_mp_uninit_primary(void);
+void mlx4_mp_init_secondary(void);
+void mlx4_mp_uninit_secondary(void);
+
#endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_mp.c b/drivers/net/mlx4/mlx4_mp.c
new file mode 100644
index 0000000000..eaeb257348
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_mp.c
@@ -0,0 +1,304 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ * Copyright 2019 Mellanox Technologies, Ltd
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <rte_eal.h>
+#include <rte_ethdev_driver.h>
+#include <rte_string_fns.h>
+
+#include "mlx4.h"
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
+
+/**
+ * Initialize IPC message.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[out] msg
+ * Pointer to message to fill in.
+ * @param[in] type
+ * Message type.
+ */
+static inline void
+mp_init_msg(struct rte_eth_dev *dev, struct rte_mp_msg *msg,
+ enum mlx4_mp_req_type type)
+{
+ struct mlx4_mp_param *param = (struct mlx4_mp_param *)msg->param;
+
+ memset(msg, 0, sizeof(*msg));
+ strlcpy(msg->name, MLX4_MP_NAME, sizeof(msg->name));
+ msg->len_param = sizeof(*param);
+ param->type = type;
+ param->port_id = dev->data->port_id;
+}
+
+/**
+ * IPC message handler of primary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] peer
+ * Pointer to the peer socket path.
+ *
+ * @return
+ * 0 on success, negative errno value otherwise and rte_errno is set.
+ */
+static int
+mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_mp_msg mp_res;
+ struct mlx4_mp_param *res = (struct mlx4_mp_param *)mp_res.param;
+ const struct mlx4_mp_param *param =
+ (const struct mlx4_mp_param *)mp_msg->param;
+ struct rte_eth_dev *dev;
+ struct mlx4_priv *priv;
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ if (!rte_eth_dev_is_valid_port(param->port_id)) {
+ rte_errno = ENODEV;
+ ERROR("port %u invalid port ID", param->port_id);
+ return -rte_errno;
+ }
+ dev = &rte_eth_devices[param->port_id];
+ priv = dev->data->dev_private;
+ switch (param->type) {
+ case MLX4_MP_REQ_VERBS_CMD_FD:
+ mp_init_msg(dev, &mp_res, param->type);
+ mp_res.num_fds = 1;
+ mp_res.fds[0] = priv->ctx->cmd_fd;
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ default:
+ rte_errno = EINVAL;
+ ERROR("port %u invalid mp request type", dev->data->port_id);
+ return -rte_errno;
+ }
+ return ret;
+}
+
+/**
+ * IPC message handler of a secondary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] peer
+ * Pointer to the peer socket path.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_mp_msg mp_res;
+ struct mlx4_mp_param *res = (struct mlx4_mp_param *)mp_res.param;
+ const struct mlx4_mp_param *param =
+ (const struct mlx4_mp_param *)mp_msg->param;
+ struct rte_eth_dev *dev;
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ if (!rte_eth_dev_is_valid_port(param->port_id)) {
+ rte_errno = ENODEV;
+ ERROR("port %u invalid port ID", param->port_id);
+ return -rte_errno;
+ }
+ dev = &rte_eth_devices[param->port_id];
+ switch (param->type) {
+ case MLX4_MP_REQ_START_RXTX:
+ INFO("port %u starting datapath", dev->data->port_id);
+ rte_mb();
+ dev->tx_pkt_burst = mlx4_tx_burst;
+ dev->rx_pkt_burst = mlx4_rx_burst;
+ mp_init_msg(dev, &mp_res, param->type);
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ case MLX4_MP_REQ_STOP_RXTX:
+ INFO("port %u stopping datapath", dev->data->port_id);
+ dev->tx_pkt_burst = mlx4_tx_burst_removed;
+ dev->rx_pkt_burst = mlx4_rx_burst_removed;
+ rte_mb();
+ mp_init_msg(dev, &mp_res, param->type);
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ default:
+ rte_errno = EINVAL;
+ ERROR("port %u invalid mp request type", dev->data->port_id);
+ return -rte_errno;
+ }
+ return ret;
+}
+
+/**
+ * Broadcast request of stopping/starting data-path to secondary processes.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] type
+ * Request type.
+ */
+static void
+mp_req_on_rxtx(struct rte_eth_dev *dev, enum mlx4_mp_req_type type)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_msg *mp_res;
+ struct rte_mp_reply mp_rep;
+ struct mlx4_mp_param *res __rte_unused;
+ struct timespec ts = {.tv_sec = MLX4_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+ int ret;
+ int i;
+
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ if (!mlx4_shared_data->secondary_cnt)
+ return;
+ if (type != MLX4_MP_REQ_START_RXTX && type != MLX4_MP_REQ_STOP_RXTX) {
+ ERROR("port %u unknown request (req_type %d)",
+ dev->data->port_id, type);
+ return;
+ }
+ mp_init_msg(dev, &mp_req, type);
+ ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+ if (ret) {
+ ERROR("port %u failed to request stop/start Rx/Tx (%d)",
+ dev->data->port_id, type);
+ goto exit;
+ }
+ if (mp_rep.nb_sent != mp_rep.nb_received) {
+ ERROR("port %u not all secondaries responded (req_type %d)",
+ dev->data->port_id, type);
+ goto exit;
+ }
+ for (i = 0; i < mp_rep.nb_received; i++) {
+ mp_res = &mp_rep.msgs[i];
+ res = (struct mlx4_mp_param *)mp_res->param;
+ if (res->result) {
+ ERROR("port %u request failed on secondary #%d",
+ dev->data->port_id, i);
+ goto exit;
+ }
+ }
+exit:
+ free(mp_rep.msgs);
+}
+
+/**
+ * Broadcast request of starting data-path to secondary processes. The request
+ * is synchronous.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ */
+void
+mlx4_mp_req_start_rxtx(struct rte_eth_dev *dev)
+{
+ mp_req_on_rxtx(dev, MLX4_MP_REQ_START_RXTX);
+}
+
+/**
+ * Broadcast request of stopping data-path to secondary processes. The request
+ * is synchronous.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ */
+void
+mlx4_mp_req_stop_rxtx(struct rte_eth_dev *dev)
+{
+ mp_req_on_rxtx(dev, MLX4_MP_REQ_STOP_RXTX);
+}
+
+/**
+ * IPC message handler of primary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ *
+ * @return
+ * fd on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_msg *mp_res;
+ struct rte_mp_reply mp_rep;
+ struct mlx4_mp_param *res;
+ struct timespec ts = {.tv_sec = MLX4_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ mp_init_msg(dev, &mp_req, MLX4_MP_REQ_VERBS_CMD_FD);
+ ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+ if (ret) {
+ ERROR("port %u request to primary process failed",
+ dev->data->port_id);
+ return -rte_errno;
+ }
+ assert(mp_rep.nb_received == 1);
+ mp_res = &mp_rep.msgs[0];
+ res = (struct mlx4_mp_param *)mp_res->param;
+ if (res->result) {
+ rte_errno = -res->result;
+ ERROR("port %u failed to get command FD from primary process",
+ dev->data->port_id);
+ ret = -rte_errno;
+ goto exit;
+ }
+ assert(mp_res->num_fds == 1);
+ ret = mp_res->fds[0];
+ DEBUG("port %u command FD from primary is %d",
+ dev->data->port_id, ret);
+exit:
+ free(mp_rep.msgs);
+ return ret;
+}
+
+/**
+ * Initialize by primary process.
+ */
+void
+mlx4_mp_init_primary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ rte_mp_action_register(MLX4_MP_NAME, mp_primary_handle);
+}
+
+/**
+ * Un-initialize by primary process.
+ */
+void
+mlx4_mp_uninit_primary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ rte_mp_action_unregister(MLX4_MP_NAME);
+}
+
+/**
+ * Initialize by secondary process.
+ */
+void
+mlx4_mp_init_secondary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ rte_mp_action_register(MLX4_MP_NAME, mp_secondary_handle);
+}
+
+/**
+ * Un-initialize by secondary process.
+ */
+void
+mlx4_mp_uninit_secondary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ rte_mp_action_unregister(MLX4_MP_NAME);
+}
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
index e4be46ab2a..01894faecf 100644
--- a/drivers/net/mlx4/mlx4_mr.c
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -489,6 +489,8 @@ mlx4_mr_garbage_collect(struct rte_eth_dev *dev)
struct mlx4_mr *mr_next;
struct mlx4_mr_list free_list = LIST_HEAD_INITIALIZER(free_list);
+ /* Must be called from the primary process. */
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
/*
* MR can't be freed with holding the lock because rte_free() could call
* memory free callback function. This will be a deadlock situation.
@@ -561,6 +563,14 @@ mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
DEBUG("port %u creating a MR using address (%p)",
dev->data->port_id, (void *)addr);
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ WARN("port %u using address (%p) of unregistered mempool"
+ " in secondary process, please create mempool"
+ " before rte_eth_dev_start()",
+ dev->data->port_id, (void *)addr);
+ rte_errno = EPERM;
+ goto err_nolock;
+ }
/*
* Release detached MRs if any. This can't be called with holding either
* memory_hotplug_lock or priv->mr.rwlock. MRs on the free list have
@@ -890,14 +900,17 @@ mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
size_t len, void *arg __rte_unused)
{
struct mlx4_priv *priv;
+ struct mlx4_dev_list *dev_list = &mlx4_shared_data->mem_event_cb_list;
+ /* Must be called from the primary process. */
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
switch (event_type) {
case RTE_MEM_EVENT_FREE:
- rte_rwlock_read_lock(&mlx4_mem_event_rwlock);
+ rte_rwlock_read_lock(&mlx4_shared_data->mem_event_rwlock);
/* Iterate all the existing mlx4 devices. */
- LIST_FOREACH(priv, &mlx4_mem_event_cb_list, mem_event_cb)
+ LIST_FOREACH(priv, dev_list, mem_event_cb)
mlx4_mr_mem_event_free_cb(ETH_DEV(priv), addr, len);
- rte_rwlock_read_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_read_unlock(&mlx4_shared_data->mem_event_rwlock);
break;
case RTE_MEM_EVENT_ALLOC:
default:
@@ -1130,6 +1143,7 @@ mlx4_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque,
struct mlx4_mr_cache entry;
uint32_t lkey;
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
/* If already registered, it should return. */
rte_rwlock_read_lock(&priv->mr.rwlock);
lkey = mr_lookup_dev(dev, &entry, addr);
@@ -1225,6 +1239,14 @@ mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr, struct rte_mempool *mp)
struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
struct mlx4_priv *priv = txq->priv;
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ WARN("port %u using address (%p) from unregistered mempool"
+ " having externally allocated memory"
+ " in secondary process, please create mempool"
+ " prior to rte_eth_dev_start()",
+ PORT_ID(priv), (void *)addr);
+ return UINT32_MAX;
+ }
mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
return mlx4_tx_addr2mr_bh(txq, addr);
}
@@ -1336,9 +1358,9 @@ mlx4_mr_release(struct rte_eth_dev *dev)
struct mlx4_mr *mr_next = LIST_FIRST(&priv->mr.mr_list);
/* Remove from memory callback device list. */
- rte_rwlock_write_lock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock);
LIST_REMOVE(priv, mem_event_cb);
- rte_rwlock_write_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
#ifndef NDEBUG
mlx4_mr_dump_dev(dev);
#endif
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index aef77ba06e..b3e11dde25 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,7 +77,9 @@ struct mlx4_sq {
uint32_t owner_opcode;
/**< Default owner opcode with HW valid owner bit. */
uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
- volatile uint32_t *db; /**< Pointer to the doorbell. */
+ volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
+ volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
};
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 8c88effcd1..f22f1ba559 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1365,6 +1365,7 @@ mlx4_tx_burst_removed(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
(void)dpdk_txq;
(void)pkts;
(void)pkts_n;
+ rte_mb();
return 0;
}
@@ -1390,5 +1391,6 @@ mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
(void)dpdk_rxq;
(void)pkts;
(void)pkts_n;
+ rte_mb();
return 0;
}
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 9409602b32..7d7a8988ed 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -152,6 +152,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
/* mlx4_txq.c */
+int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 2dc198e77f..ed00843425 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -13,7 +13,9 @@
#include <stddef.h>
#include <stdint.h>
#include <string.h>
+#include <sys/mman.h>
#include <inttypes.h>
+#include <unistd.h>
/* Verbs headers do not support -pedantic. */
#ifdef PEDANTIC
@@ -38,6 +40,98 @@
#include "mlx4_utils.h"
/**
+ * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
+ * Both primary and secondary process do mmap to make UAR address
+ * aligned.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param fd
+ * Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+int
+mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+{
+ unsigned int i, j;
+ const unsigned int txqs_n = dev->data->nb_tx_queues;
+ uintptr_t pages[txqs_n];
+ unsigned int pages_n = 0;
+ uintptr_t uar_va;
+ uintptr_t off;
+ void *addr;
+ void *ret;
+ struct txq *txq;
+ int already_mapped;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+
+ memset(pages, 0, txqs_n * sizeof(uintptr_t));
+ /*
+ * As rdma-core, UARs are mapped in size of OS page size.
+ * Use aligned address to avoid duplicate mmap.
+ * Ref to libmlx4 function: mlx4_init_context()
+ */
+ for (i = 0; i != txqs_n; ++i) {
+ txq = dev->data->tx_queues[i];
+ if (!txq)
+ continue;
+ /* UAR addr form verbs used to find dup and offset in page. */
+ uar_va = (uintptr_t)txq->msq.qp_sdb;
+ off = uar_va & (page_size - 1); /* offset in page. */
+ uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
+ already_mapped = 0;
+ for (j = 0; j != pages_n; ++j) {
+ if (pages[j] == uar_va) {
+ already_mapped = 1;
+ break;
+ }
+ }
+ /* new address in reserved UAR address space. */
+ addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
+ uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
+ if (!already_mapped) {
+ pages[pages_n++] = uar_va;
+ /* fixed mmap to specified address in reserved
+ * address space.
+ */
+ ret = mmap(addr, page_size,
+ PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
+ txq->msq.uar_mmap_offset);
+ if (ret != addr) {
+ /* fixed mmap has to return same address. */
+ ERROR("port %u call to mmap failed on UAR"
+ " for txq %u",
+ dev->data->port_id, i);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ }
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
+ txq->msq.db = RTE_PTR_ADD((void *)addr, off);
+ else
+ assert(txq->msq.db ==
+ RTE_PTR_ADD((void *)addr, off));
+ }
+ return 0;
+}
+#else
+int
+mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+{
+ /*
+ * If rdma-core doesn't support UAR remap, secondary process is not
+ * supported, thus secondary cannot call this function but only primary
+ * makes a call. Return success to not interrupt initialization.
+ */
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ return 0;
+}
+#endif
+
+/**
* Free Tx queue elements.
*
* @param txq
@@ -89,7 +183,12 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
sq->owner_opcode = MLX4_OPCODE_SEND | (0u << MLX4_SQ_OWNER_BIT);
sq->stamp = rte_cpu_to_be_32(MLX4_SQ_STAMP_VAL |
(0u << MLX4_SQ_OWNER_BIT));
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ sq->uar_mmap_offset = dqp->uar_mmap_offset;
+ sq->qp_sdb = dqp->sdb;
+#else
sq->db = dqp->sdb;
+#endif
sq->doorbell_qpn = dqp->doorbell_qpn;
cq->buf = dcq->buf.buf;
cq->cqe_cnt = dcq->cqe_cnt;
@@ -307,6 +406,11 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
goto error;
}
/* Retrieve device queue information. */
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ dv_qp = (struct mlx4dv_qp){
+ .comp_mask = MLX4DV_QP_MASK_UAR_MMAP_OFFSET,
+ };
+#endif
mlxdv.cq.in = txq->cq;
mlxdv.cq.out = &dv_cq;
mlxdv.qp.in = txq->qp;
@@ -318,6 +422,13 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
" accessing the device queues", (void *)dev);
goto error;
}
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ if (!(dv_qp.comp_mask & MLX4DV_QP_MASK_UAR_MMAP_OFFSET)) {
+ rte_errno = EINVAL;
+ ERROR("%p: failed to obtain UAR mmap offset", (void *)dev);
+ goto error;
+ }
+#endif
mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
/* Save first wqe pointer in the first element. */
(&(*txq->elts)[0])->wqe =
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v2 3/3] net/mlx4: add secondary process support
2019-03-25 19:18 ` [dpdk-dev] [PATCH v2 3/3] net/mlx4: add secondary process support Yongseok Koh
@ 2019-03-25 19:18 ` Yongseok Koh
2019-03-26 19:33 ` Shahaf Shuler
1 sibling, 0 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:18 UTC (permalink / raw)
To: shahafs; +Cc: dev
In order to support secondary process, a few features are required.
a) rdma-core library should allocate device resources using DPDK's memory
allocator.
b) UAR should be remapped for secondary processes. Currently, in order not
to use different data structure for secondary processes, PMD tries to
reserve identical virtual address space for both primary and secondary
processes.
c) IPC channel is necessary, which can be easily set with rte_mp APIs.
Through the channel, Verbs command FD is delivered to the secondary
process and the device stop/start event is also broadcast from primary
process.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
doc/guides/nics/features/mlx4.ini | 1 +
doc/guides/nics/mlx4.rst | 10 +
drivers/net/mlx4/Makefile | 6 +
drivers/net/mlx4/meson.build | 3 +
drivers/net/mlx4/mlx4.c | 378 ++++++++++++++++++++++++++++++++++++--
drivers/net/mlx4/mlx4.h | 60 ++++++
drivers/net/mlx4/mlx4_mp.c | 304 ++++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4_mr.c | 32 +++-
drivers/net/mlx4/mlx4_prm.h | 4 +-
drivers/net/mlx4/mlx4_rxtx.c | 2 +
drivers/net/mlx4/mlx4_rxtx.h | 1 +
drivers/net/mlx4/mlx4_txq.c | 111 +++++++++++
12 files changed, 890 insertions(+), 22 deletions(-)
create mode 100644 drivers/net/mlx4/mlx4_mp.c
diff --git a/doc/guides/nics/features/mlx4.ini b/doc/guides/nics/features/mlx4.ini
index a211aef332..4502aa2a87 100644
--- a/doc/guides/nics/features/mlx4.ini
+++ b/doc/guides/nics/features/mlx4.ini
@@ -29,6 +29,7 @@ Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
FW version = Y
+Multiprocess aware = Y
Other kdrv = Y
Power8 = Y
x86-32 = Y
diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst
index 4ad361a2c2..cd34838f41 100644
--- a/doc/guides/nics/mlx4.rst
+++ b/doc/guides/nics/mlx4.rst
@@ -145,6 +145,16 @@ below.
Limitations
-----------
+- For secondary process:
+
+ - Forked secondary process not supported.
+ - All mempools must be initialized before rte_eth_dev_start().
+ - External memory unregistered in EAL memseg list cannot be used for DMA
+ unless such memory has been registered by ``mlx4_mr_update_ext_mp()`` in
+ primary process and remapped to the same virtual address in secondary
+ process. If the external memory is registered by primary process but has
+ different virtual address in secondary process, unexpected error may happen.
+
- CRC stripping is supported by default and always reported as "true".
The ability to enable/disable CRC stripping requires OFED version
4.3-1.5.0.0 and above or rdma-core version v18 and above.
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index b527efd625..8126b0dfc6 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -18,6 +18,7 @@ ifneq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_glue.c
endif
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_intr.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mp.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mr.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxq.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxtx.c
@@ -93,6 +94,11 @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
+ HAVE_IBV_MLX4_UAR_MMAP_OFFSET \
+ infiniband/mlx4dv.h \
+ enum MLX4DV_QP_MASK_UAR_MMAP_OFFSET \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_IBV_MLX4_WQE_LSO_SEG \
infiniband/mlx4dv.h \
type 'struct mlx4_wqe_lso_seg' \
diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
index 650e2c8fbc..de020701d1 100644
--- a/drivers/net/mlx4/meson.build
+++ b/drivers/net/mlx4/meson.build
@@ -33,6 +33,7 @@ if build
'mlx4_ethdev.c',
'mlx4_flow.c',
'mlx4_intr.c',
+ 'mlx4_mp.c',
'mlx4_mr.c',
'mlx4_rxq.c',
'mlx4_rxtx.c',
@@ -76,6 +77,8 @@ if build
has_sym_args = [
[ 'HAVE_IBV_MLX4_BUF_ALLOCATORS', 'infiniband/mlx4dv.h',
'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
+ [ 'HAVE_IBV_MLX4_UAR_MMAP_OFFSET', 'infiniband/mlx4dv.h',
+ 'MLX4DV_QP_MASK_UAR_MMAP_OFFSET' ],
]
config = configuration_data()
foreach arg:has_sym_args
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 0e0b035df0..a5cfcdbee3 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -17,6 +17,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/mman.h>
#include <unistd.h>
/* Verbs headers do not support -pedantic. */
@@ -48,10 +49,21 @@
#include "mlx4_rxtx.h"
#include "mlx4_utils.h"
-struct mlx4_dev_list mlx4_mem_event_cb_list =
- LIST_HEAD_INITIALIZER(mlx4_mem_event_cb_list);
+#if defined(HAVE_IBV_MLX4_UAR_MMAP_OFFSET) && \
+ defined(HAVE_IBV_MLX4_BUF_ALLOCATORS)
+#define HAVE_IBV_MLX4_SECONDARY_PROCESS
+#endif
+
+static const char *MZ_MLX4_PMD_SHARED_DATA = "mlx4_pmd_shared_data";
+
+/* Shared memory between primary and secondary processes. */
+struct mlx4_shared_data *mlx4_shared_data;
-rte_rwlock_t mlx4_mem_event_rwlock = RTE_RWLOCK_INITIALIZER;
+/* Spinlock for mlx4_shared_data allocation. */
+static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* Process local data for secondary processes. */
+static struct mlx4_local_data mlx4_local_data;
/** Configuration structure for device arguments. */
struct mlx4_conf {
@@ -69,6 +81,77 @@ const char *pmd_mlx4_init_params[] = {
static void mlx4_dev_stop(struct rte_eth_dev *dev);
+/**
+ * Initialize shared data between primary and secondary process.
+ *
+ * A memzone is reserved by primary process and secondary processes attach to
+ * the memzone.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_init_shared_data(void)
+{
+ const struct rte_memzone *mz;
+ int ret = 0;
+
+ rte_spinlock_lock(&mlx4_shared_data_lock);
+ if (mlx4_shared_data == NULL) {
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* Allocate shared memory. */
+ mz = rte_memzone_reserve(MZ_MLX4_PMD_SHARED_DATA,
+ sizeof(*mlx4_shared_data),
+ SOCKET_ID_ANY, 0);
+ if (mz == NULL) {
+ ERROR("Cannot allocate mlx4 shared data\n");
+ ret = -rte_errno;
+ goto error;
+ }
+ mlx4_shared_data = mz->addr;
+ memset(mlx4_shared_data, 0, sizeof(*mlx4_shared_data));
+ rte_spinlock_init(&mlx4_shared_data->lock);
+ } else {
+ /* Lookup allocated shared memory. */
+ mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
+ if (mz == NULL) {
+ ERROR("Cannot attach mlx4 shared data\n");
+ ret = -rte_errno;
+ goto error;
+ }
+ mlx4_shared_data = mz->addr;
+ memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
+ }
+ }
+error:
+ rte_spinlock_unlock(&mlx4_shared_data_lock);
+ return ret;
+}
+
+/**
+ * Uninitialize shared data between primary and secondary process.
+ *
+ * The pointer of secondary process is dereferenced and primary process frees
+ * the memzone.
+ */
+static void
+mlx4_uninit_shared_data(void)
+{
+ const struct rte_memzone *mz;
+
+ rte_spinlock_lock(&mlx4_shared_data_lock);
+ if (mlx4_shared_data) {
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
+ rte_memzone_free(mz);
+ } else {
+ memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
+ }
+ mlx4_shared_data = NULL;
+ }
+ rte_spinlock_unlock(&mlx4_shared_data_lock);
+}
+
#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
/**
* Verbs callback to allocate a memory. This function should allocate the space
@@ -181,6 +264,11 @@ mlx4_dev_start(struct rte_eth_dev *dev)
return 0;
DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
priv->started = 1;
+ ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
+ if (ret) {
+ ERROR("%p: cannot remap UAR", (void *)dev);
+ goto err;
+ }
ret = mlx4_rss_init(priv);
if (ret) {
ERROR("%p: cannot initialize RSS resources: %s",
@@ -208,6 +296,8 @@ mlx4_dev_start(struct rte_eth_dev *dev)
rte_wmb();
dev->tx_pkt_burst = mlx4_tx_burst;
dev->rx_pkt_burst = mlx4_rx_burst;
+ /* Enable datapath on secondary process. */
+ mlx4_mp_req_start_rxtx(dev);
return 0;
err:
mlx4_dev_stop(dev);
@@ -226,6 +316,8 @@ static void
mlx4_dev_stop(struct rte_eth_dev *dev)
{
struct mlx4_priv *priv = dev->data->dev_private;
+ const size_t page_size = sysconf(_SC_PAGESIZE);
+ int i;
if (!priv->started)
return;
@@ -234,9 +326,20 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
dev->tx_pkt_burst = mlx4_tx_burst_removed;
dev->rx_pkt_burst = mlx4_rx_burst_removed;
rte_wmb();
+ /* Disable datapath on secondary process. */
+ mlx4_mp_req_stop_rxtx(dev);
mlx4_flow_sync(priv, NULL);
mlx4_rxq_intr_disable(priv);
mlx4_rss_deinit(priv);
+ for (i = 0; i != dev->data->nb_tx_queues; ++i) {
+ struct txq *txq;
+
+ txq = dev->data->tx_queues[i];
+ if (!txq)
+ continue;
+ munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
+ page_size), page_size);
+ }
}
/**
@@ -259,6 +362,8 @@ mlx4_dev_close(struct rte_eth_dev *dev)
dev->rx_pkt_burst = mlx4_rx_burst_removed;
dev->tx_pkt_burst = mlx4_tx_burst_removed;
rte_wmb();
+ /* Disable datapath on secondary process. */
+ mlx4_mp_req_stop_rxtx(dev);
mlx4_flow_clean(priv);
mlx4_rss_deinit(priv);
for (i = 0; i != dev->data->nb_rx_queues; ++i)
@@ -310,6 +415,16 @@ static const struct eth_dev_ops mlx4_dev_ops = {
.is_removed = mlx4_is_removed,
};
+#ifdef HAVE_IBV_MLX4_SECONDARY_PROCESS
+/* Available operations from secondary process. */
+static const struct eth_dev_ops mlx4_dev_sec_ops = {
+ .stats_get = mlx4_stats_get,
+ .stats_reset = mlx4_stats_reset,
+ .fw_version_get = mlx4_fw_version_get,
+ .dev_infos_get = mlx4_dev_infos_get,
+};
+#endif
+
/**
* Get PCI information from struct ibv_device.
*
@@ -549,6 +664,200 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
static struct rte_pci_driver mlx4_driver;
+static int
+find_lower_va_bound(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg)
+{
+ void **addr = arg;
+
+ if (msl->external)
+ return 0;
+ if (*addr == NULL)
+ *addr = ms->addr;
+ else
+ *addr = RTE_MIN(*addr, ms->addr);
+
+ return 0;
+}
+
+/**
+ * Reserve UAR address space for primary process.
+ *
+ * Process local resource is used by both primary and secondary to avoid
+ * duplicate reservation. The space has to be available on both primary and
+ * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
+ * check.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_uar_init_primary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+ void *addr = (void *)0;
+
+ if (sd->uar_base)
+ return 0;
+ /* find out lower bound of hugepage segments */
+ rte_memseg_walk(find_lower_va_bound, &addr);
+ /* keep distance to hugepages to minimize potential conflicts. */
+ addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
+ /* anonymous mmap, no real memory consumption. */
+ addr = mmap(addr, MLX4_UAR_SIZE,
+ PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED) {
+ ERROR("failed to reserve UAR address space, please"
+ " adjust MLX4_UAR_SIZE or try --base-virtaddr");
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ /* Accept either same addr or a new addr returned from mmap if target
+ * range occupied.
+ */
+ INFO("reserved UAR address space: %p", addr);
+ sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
+ return 0;
+}
+
+/**
+ * Unmap UAR address space reserved for primary process.
+ */
+static void
+mlx4_uar_uninit_primary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+
+ if (!sd->uar_base)
+ return;
+ munmap(sd->uar_base, MLX4_UAR_SIZE);
+ sd->uar_base = NULL;
+}
+
+/**
+ * Reserve UAR address space for secondary process, align with primary process.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_uar_init_secondary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+ struct mlx4_local_data *ld = &mlx4_local_data;
+ void *addr;
+
+ if (ld->uar_base) { /* Already reserved. */
+ assert(sd->uar_base == ld->uar_base);
+ return 0;
+ }
+ assert(sd->uar_base);
+ /* anonymous mmap, no real memory consumption. */
+ addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
+ PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED) {
+ ERROR("UAR mmap failed: %p size: %llu",
+ sd->uar_base, MLX4_UAR_SIZE);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ if (sd->uar_base != addr) {
+ ERROR("UAR address %p size %llu occupied, please"
+ " adjust MLX4_UAR_OFFSET or try EAL parameter"
+ " --base-virtaddr",
+ sd->uar_base, MLX4_UAR_SIZE);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ ld->uar_base = addr;
+ INFO("reserved UAR address space: %p", addr);
+ return 0;
+}
+
+/**
+ * Unmap UAR address space reserved for secondary process.
+ */
+static void
+mlx4_uar_uninit_secondary(void)
+{
+ struct mlx4_local_data *ld = &mlx4_local_data;
+
+ if (!ld->uar_base)
+ return;
+ munmap(ld->uar_base, MLX4_UAR_SIZE);
+ ld->uar_base = NULL;
+}
+
+/**
+ * PMD global initialization.
+ *
+ * Independent from individual device, this function initializes global
+ * per-PMD data structures distinguishing primary and secondary processes.
+ * Hence, each initialization is called once per a process.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_init_once(void)
+{
+ struct mlx4_shared_data *sd;
+ struct mlx4_local_data *ld = &mlx4_local_data;
+ int ret;
+
+ if (mlx4_init_shared_data())
+ return -rte_errno;
+ sd = mlx4_shared_data;
+ assert(sd);
+ rte_spinlock_lock(&sd->lock);
+ switch (rte_eal_process_type()) {
+ case RTE_PROC_PRIMARY:
+ if (sd->init_done)
+ break;
+ LIST_INIT(&sd->mem_event_cb_list);
+ rte_rwlock_init(&sd->mem_event_rwlock);
+ rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
+ mlx4_mr_mem_event_cb, NULL);
+ mlx4_mp_init_primary();
+ ret = mlx4_uar_init_primary();
+ if (ret)
+ goto error;
+ sd->init_done = true;
+ break;
+ case RTE_PROC_SECONDARY:
+ if (ld->init_done)
+ break;
+ mlx4_mp_init_secondary();
+ ret = mlx4_uar_init_secondary();
+ if (ret)
+ goto error;
+ ++sd->secondary_cnt;
+ ld->init_done = true;
+ break;
+ default:
+ break;
+ }
+ rte_spinlock_unlock(&sd->lock);
+ return 0;
+error:
+ switch (rte_eal_process_type()) {
+ case RTE_PROC_PRIMARY:
+ mlx4_uar_uninit_primary();
+ mlx4_mp_uninit_primary();
+ rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
+ break;
+ case RTE_PROC_SECONDARY:
+ mlx4_uar_uninit_secondary();
+ mlx4_mp_uninit_secondary();
+ break;
+ default:
+ break;
+ }
+ rte_spinlock_unlock(&sd->lock);
+ mlx4_uninit_shared_data();
+ return -rte_errno;
+}
+
/**
* DPDK callback to register a PCI device.
*
@@ -579,6 +888,12 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
int i;
(void)pci_drv;
+ err = mlx4_init_once();
+ if (err) {
+ ERROR("unable to init PMD global data: %s",
+ strerror(rte_errno));
+ return -rte_errno;
+ }
assert(pci_drv == &mlx4_driver);
list = mlx4_glue->get_device_list(&i);
if (list == NULL) {
@@ -659,6 +974,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
struct mlx4_priv *priv = NULL;
struct rte_eth_dev *eth_dev = NULL;
struct ether_addr mac;
+ char name[RTE_ETH_NAME_MAX_LEN];
/* If port is not enabled, skip. */
if (!(conf.ports.enabled & (1 << i)))
@@ -669,6 +985,44 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
err = ENODEV;
goto port_error;
}
+ snprintf(name, sizeof(name), "%s port %u",
+ mlx4_glue->get_device_name(ibv_dev), port);
+#ifdef HAVE_IBV_MLX4_SECONDARY_PROCESS
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (eth_dev == NULL) {
+ ERROR("can not attach rte ethdev");
+ rte_errno = ENOMEM;
+ err = rte_errno;
+ goto error;
+ }
+ eth_dev->device = &pci_dev->device;
+ eth_dev->dev_ops = &mlx4_dev_sec_ops;
+ /* Receive command fd from primary process */
+ err = mlx4_mp_req_verbs_cmd_fd(eth_dev);
+ if (err < 0) {
+ err = rte_errno;
+ goto error;
+ }
+ /* Remap UAR for Tx queues. */
+ err = mlx4_tx_uar_remap(eth_dev, err);
+ if (err) {
+ err = rte_errno;
+ goto error;
+ }
+ /*
+ * Ethdev pointer is still required as input since
+ * the primary device is not accessible from the
+ * secondary process.
+ */
+ eth_dev->tx_pkt_burst = mlx4_tx_burst;
+ eth_dev->rx_pkt_burst = mlx4_rx_burst;
+ claim_zero(mlx4_glue->close_device(ctx));
+ rte_eth_copy_pci_info(eth_dev, pci_dev);
+ rte_eth_dev_probing_finish(eth_dev);
+ continue;
+ }
+#endif
/* Check port status. */
err = mlx4_glue->query_port(ctx, port, &port_attr);
if (err) {
@@ -774,14 +1128,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
/* Get actual MTU if possible. */
mlx4_mtu_get(priv, &priv->mtu);
DEBUG("port %u MTU is %u", priv->port, priv->mtu);
- /* from rte_ethdev.c */
- {
- char name[RTE_ETH_NAME_MAX_LEN];
-
- snprintf(name, sizeof(name), "%s port %u",
- mlx4_glue->get_device_name(ibv_dev), port);
- eth_dev = rte_eth_dev_allocate(name);
- }
+ eth_dev = rte_eth_dev_allocate(name);
if (eth_dev == NULL) {
err = ENOMEM;
ERROR("can not allocate rte ethdev");
@@ -842,9 +1189,10 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
goto port_error;
}
/* Add device to memory callback list. */
- rte_rwlock_write_lock(&mlx4_mem_event_rwlock);
- LIST_INSERT_HEAD(&mlx4_mem_event_cb_list, priv, mem_event_cb);
- rte_rwlock_write_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock);
+ LIST_INSERT_HEAD(&mlx4_shared_data->mem_event_cb_list,
+ priv, mem_event_cb);
+ rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
rte_eth_dev_probing_finish(eth_dev);
continue;
port_error:
@@ -1075,8 +1423,6 @@ RTE_INIT(rte_mlx4_pmd_init)
}
mlx4_glue->fork_init();
rte_pci_register(&mlx4_driver);
- rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
- mlx4_mr_mem_event_cb, NULL);
}
RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index d43e05ea74..832edc962d 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -53,6 +53,16 @@
/** Port parameter. */
#define MLX4_PMD_PORT_KVARG "port"
+/* Reserved address space for UAR mapping. */
+#define MLX4_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
+
+/* Offset of reserved UAR address space to hugepage memory. Offset is used here
+ * to minimize possibility of address next to hugepage being used by other code
+ * in either primary or secondary process, failing to map TX UAR would make TX
+ * packets invisible to HW.
+ */
+#define MLX4_UAR_OFFSET (2ULL << (sizeof(uintptr_t) * 4))
+
enum {
PCI_VENDOR_ID_MELLANOX = 0x15b3,
};
@@ -63,6 +73,26 @@ enum {
PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO = 0x1007,
};
+/* Request types for IPC. */
+enum mlx4_mp_req_type {
+ MLX4_MP_REQ_VERBS_CMD_FD = 1,
+ MLX4_MP_REQ_START_RXTX,
+ MLX4_MP_REQ_STOP_RXTX,
+};
+
+/* Pameters for IPC. */
+struct mlx4_mp_param {
+ enum mlx4_mp_req_type type;
+ int port_id;
+ int result;
+};
+
+/** Request timeout for IPC. */
+#define MLX4_MP_REQ_TIMEOUT_SEC 5
+
+/** Key string for IPC. */
+#define MLX4_MP_NAME "net_mlx4_mp"
+
/** Driver name reported to lower layers and used in log output. */
#define MLX4_DRIVER_NAME "net_mlx4"
@@ -93,6 +123,27 @@ struct mlx4_verbs_alloc_ctx {
LIST_HEAD(mlx4_dev_list, mlx4_priv);
LIST_HEAD(mlx4_mr_list, mlx4_mr);
+/* Shared data between primary and secondary processes. */
+struct mlx4_shared_data {
+ rte_spinlock_t lock;
+ /* Global spinlock for primary and secondary processes. */
+ int init_done; /* Whether primary has done initialization. */
+ unsigned int secondary_cnt; /* Number of secondary processes init'd. */
+ void *uar_base;
+ /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+ struct mlx4_dev_list mem_event_cb_list;
+ rte_rwlock_t mem_event_rwlock;
+};
+
+/* Per-process data structure, not visible to other processes. */
+struct mlx4_local_data {
+ int init_done; /* Whether a secondary has done initialization. */
+ void *uar_base;
+ /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+};
+
+extern struct mlx4_shared_data *mlx4_shared_data;
+
/** Private data structure. */
struct mlx4_priv {
LIST_ENTRY(mlx4_priv) mem_event_cb;
@@ -175,4 +226,13 @@ void mlx4_rxq_intr_disable(struct mlx4_priv *priv);
int mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx);
int mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx);
+/* mlx4_mp.c */
+void mlx4_mp_req_start_rxtx(struct rte_eth_dev *dev);
+void mlx4_mp_req_stop_rxtx(struct rte_eth_dev *dev);
+int mlx4_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev);
+void mlx4_mp_init_primary(void);
+void mlx4_mp_uninit_primary(void);
+void mlx4_mp_init_secondary(void);
+void mlx4_mp_uninit_secondary(void);
+
#endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_mp.c b/drivers/net/mlx4/mlx4_mp.c
new file mode 100644
index 0000000000..eaeb257348
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_mp.c
@@ -0,0 +1,304 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ * Copyright 2019 Mellanox Technologies, Ltd
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <rte_eal.h>
+#include <rte_ethdev_driver.h>
+#include <rte_string_fns.h>
+
+#include "mlx4.h"
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
+
+/**
+ * Initialize IPC message.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[out] msg
+ * Pointer to message to fill in.
+ * @param[in] type
+ * Message type.
+ */
+static inline void
+mp_init_msg(struct rte_eth_dev *dev, struct rte_mp_msg *msg,
+ enum mlx4_mp_req_type type)
+{
+ struct mlx4_mp_param *param = (struct mlx4_mp_param *)msg->param;
+
+ memset(msg, 0, sizeof(*msg));
+ strlcpy(msg->name, MLX4_MP_NAME, sizeof(msg->name));
+ msg->len_param = sizeof(*param);
+ param->type = type;
+ param->port_id = dev->data->port_id;
+}
+
+/**
+ * IPC message handler of primary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] peer
+ * Pointer to the peer socket path.
+ *
+ * @return
+ * 0 on success, negative errno value otherwise and rte_errno is set.
+ */
+static int
+mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_mp_msg mp_res;
+ struct mlx4_mp_param *res = (struct mlx4_mp_param *)mp_res.param;
+ const struct mlx4_mp_param *param =
+ (const struct mlx4_mp_param *)mp_msg->param;
+ struct rte_eth_dev *dev;
+ struct mlx4_priv *priv;
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ if (!rte_eth_dev_is_valid_port(param->port_id)) {
+ rte_errno = ENODEV;
+ ERROR("port %u invalid port ID", param->port_id);
+ return -rte_errno;
+ }
+ dev = &rte_eth_devices[param->port_id];
+ priv = dev->data->dev_private;
+ switch (param->type) {
+ case MLX4_MP_REQ_VERBS_CMD_FD:
+ mp_init_msg(dev, &mp_res, param->type);
+ mp_res.num_fds = 1;
+ mp_res.fds[0] = priv->ctx->cmd_fd;
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ default:
+ rte_errno = EINVAL;
+ ERROR("port %u invalid mp request type", dev->data->port_id);
+ return -rte_errno;
+ }
+ return ret;
+}
+
+/**
+ * IPC message handler of a secondary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] peer
+ * Pointer to the peer socket path.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_mp_msg mp_res;
+ struct mlx4_mp_param *res = (struct mlx4_mp_param *)mp_res.param;
+ const struct mlx4_mp_param *param =
+ (const struct mlx4_mp_param *)mp_msg->param;
+ struct rte_eth_dev *dev;
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ if (!rte_eth_dev_is_valid_port(param->port_id)) {
+ rte_errno = ENODEV;
+ ERROR("port %u invalid port ID", param->port_id);
+ return -rte_errno;
+ }
+ dev = &rte_eth_devices[param->port_id];
+ switch (param->type) {
+ case MLX4_MP_REQ_START_RXTX:
+ INFO("port %u starting datapath", dev->data->port_id);
+ rte_mb();
+ dev->tx_pkt_burst = mlx4_tx_burst;
+ dev->rx_pkt_burst = mlx4_rx_burst;
+ mp_init_msg(dev, &mp_res, param->type);
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ case MLX4_MP_REQ_STOP_RXTX:
+ INFO("port %u stopping datapath", dev->data->port_id);
+ dev->tx_pkt_burst = mlx4_tx_burst_removed;
+ dev->rx_pkt_burst = mlx4_rx_burst_removed;
+ rte_mb();
+ mp_init_msg(dev, &mp_res, param->type);
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ default:
+ rte_errno = EINVAL;
+ ERROR("port %u invalid mp request type", dev->data->port_id);
+ return -rte_errno;
+ }
+ return ret;
+}
+
+/**
+ * Broadcast request of stopping/starting data-path to secondary processes.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] type
+ * Request type.
+ */
+static void
+mp_req_on_rxtx(struct rte_eth_dev *dev, enum mlx4_mp_req_type type)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_msg *mp_res;
+ struct rte_mp_reply mp_rep;
+ struct mlx4_mp_param *res __rte_unused;
+ struct timespec ts = {.tv_sec = MLX4_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+ int ret;
+ int i;
+
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ if (!mlx4_shared_data->secondary_cnt)
+ return;
+ if (type != MLX4_MP_REQ_START_RXTX && type != MLX4_MP_REQ_STOP_RXTX) {
+ ERROR("port %u unknown request (req_type %d)",
+ dev->data->port_id, type);
+ return;
+ }
+ mp_init_msg(dev, &mp_req, type);
+ ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+ if (ret) {
+ ERROR("port %u failed to request stop/start Rx/Tx (%d)",
+ dev->data->port_id, type);
+ goto exit;
+ }
+ if (mp_rep.nb_sent != mp_rep.nb_received) {
+ ERROR("port %u not all secondaries responded (req_type %d)",
+ dev->data->port_id, type);
+ goto exit;
+ }
+ for (i = 0; i < mp_rep.nb_received; i++) {
+ mp_res = &mp_rep.msgs[i];
+ res = (struct mlx4_mp_param *)mp_res->param;
+ if (res->result) {
+ ERROR("port %u request failed on secondary #%d",
+ dev->data->port_id, i);
+ goto exit;
+ }
+ }
+exit:
+ free(mp_rep.msgs);
+}
+
+/**
+ * Broadcast request of starting data-path to secondary processes. The request
+ * is synchronous.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ */
+void
+mlx4_mp_req_start_rxtx(struct rte_eth_dev *dev)
+{
+ mp_req_on_rxtx(dev, MLX4_MP_REQ_START_RXTX);
+}
+
+/**
+ * Broadcast request of stopping data-path to secondary processes. The request
+ * is synchronous.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ */
+void
+mlx4_mp_req_stop_rxtx(struct rte_eth_dev *dev)
+{
+ mp_req_on_rxtx(dev, MLX4_MP_REQ_STOP_RXTX);
+}
+
+/**
+ * IPC message handler of primary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ *
+ * @return
+ * fd on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_msg *mp_res;
+ struct rte_mp_reply mp_rep;
+ struct mlx4_mp_param *res;
+ struct timespec ts = {.tv_sec = MLX4_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ mp_init_msg(dev, &mp_req, MLX4_MP_REQ_VERBS_CMD_FD);
+ ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+ if (ret) {
+ ERROR("port %u request to primary process failed",
+ dev->data->port_id);
+ return -rte_errno;
+ }
+ assert(mp_rep.nb_received == 1);
+ mp_res = &mp_rep.msgs[0];
+ res = (struct mlx4_mp_param *)mp_res->param;
+ if (res->result) {
+ rte_errno = -res->result;
+ ERROR("port %u failed to get command FD from primary process",
+ dev->data->port_id);
+ ret = -rte_errno;
+ goto exit;
+ }
+ assert(mp_res->num_fds == 1);
+ ret = mp_res->fds[0];
+ DEBUG("port %u command FD from primary is %d",
+ dev->data->port_id, ret);
+exit:
+ free(mp_rep.msgs);
+ return ret;
+}
+
+/**
+ * Initialize by primary process.
+ */
+void
+mlx4_mp_init_primary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ rte_mp_action_register(MLX4_MP_NAME, mp_primary_handle);
+}
+
+/**
+ * Un-initialize by primary process.
+ */
+void
+mlx4_mp_uninit_primary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ rte_mp_action_unregister(MLX4_MP_NAME);
+}
+
+/**
+ * Initialize by secondary process.
+ */
+void
+mlx4_mp_init_secondary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ rte_mp_action_register(MLX4_MP_NAME, mp_secondary_handle);
+}
+
+/**
+ * Un-initialize by secondary process.
+ */
+void
+mlx4_mp_uninit_secondary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ rte_mp_action_unregister(MLX4_MP_NAME);
+}
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
index e4be46ab2a..01894faecf 100644
--- a/drivers/net/mlx4/mlx4_mr.c
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -489,6 +489,8 @@ mlx4_mr_garbage_collect(struct rte_eth_dev *dev)
struct mlx4_mr *mr_next;
struct mlx4_mr_list free_list = LIST_HEAD_INITIALIZER(free_list);
+ /* Must be called from the primary process. */
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
/*
* MR can't be freed with holding the lock because rte_free() could call
* memory free callback function. This will be a deadlock situation.
@@ -561,6 +563,14 @@ mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
DEBUG("port %u creating a MR using address (%p)",
dev->data->port_id, (void *)addr);
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ WARN("port %u using address (%p) of unregistered mempool"
+ " in secondary process, please create mempool"
+ " before rte_eth_dev_start()",
+ dev->data->port_id, (void *)addr);
+ rte_errno = EPERM;
+ goto err_nolock;
+ }
/*
* Release detached MRs if any. This can't be called with holding either
* memory_hotplug_lock or priv->mr.rwlock. MRs on the free list have
@@ -890,14 +900,17 @@ mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
size_t len, void *arg __rte_unused)
{
struct mlx4_priv *priv;
+ struct mlx4_dev_list *dev_list = &mlx4_shared_data->mem_event_cb_list;
+ /* Must be called from the primary process. */
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
switch (event_type) {
case RTE_MEM_EVENT_FREE:
- rte_rwlock_read_lock(&mlx4_mem_event_rwlock);
+ rte_rwlock_read_lock(&mlx4_shared_data->mem_event_rwlock);
/* Iterate all the existing mlx4 devices. */
- LIST_FOREACH(priv, &mlx4_mem_event_cb_list, mem_event_cb)
+ LIST_FOREACH(priv, dev_list, mem_event_cb)
mlx4_mr_mem_event_free_cb(ETH_DEV(priv), addr, len);
- rte_rwlock_read_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_read_unlock(&mlx4_shared_data->mem_event_rwlock);
break;
case RTE_MEM_EVENT_ALLOC:
default:
@@ -1130,6 +1143,7 @@ mlx4_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque,
struct mlx4_mr_cache entry;
uint32_t lkey;
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
/* If already registered, it should return. */
rte_rwlock_read_lock(&priv->mr.rwlock);
lkey = mr_lookup_dev(dev, &entry, addr);
@@ -1225,6 +1239,14 @@ mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr, struct rte_mempool *mp)
struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
struct mlx4_priv *priv = txq->priv;
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ WARN("port %u using address (%p) from unregistered mempool"
+ " having externally allocated memory"
+ " in secondary process, please create mempool"
+ " prior to rte_eth_dev_start()",
+ PORT_ID(priv), (void *)addr);
+ return UINT32_MAX;
+ }
mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
return mlx4_tx_addr2mr_bh(txq, addr);
}
@@ -1336,9 +1358,9 @@ mlx4_mr_release(struct rte_eth_dev *dev)
struct mlx4_mr *mr_next = LIST_FIRST(&priv->mr.mr_list);
/* Remove from memory callback device list. */
- rte_rwlock_write_lock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock);
LIST_REMOVE(priv, mem_event_cb);
- rte_rwlock_write_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
#ifndef NDEBUG
mlx4_mr_dump_dev(dev);
#endif
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index aef77ba06e..b3e11dde25 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,7 +77,9 @@ struct mlx4_sq {
uint32_t owner_opcode;
/**< Default owner opcode with HW valid owner bit. */
uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
- volatile uint32_t *db; /**< Pointer to the doorbell. */
+ volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
+ volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
};
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 8c88effcd1..f22f1ba559 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1365,6 +1365,7 @@ mlx4_tx_burst_removed(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
(void)dpdk_txq;
(void)pkts;
(void)pkts_n;
+ rte_mb();
return 0;
}
@@ -1390,5 +1391,6 @@ mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
(void)dpdk_rxq;
(void)pkts;
(void)pkts_n;
+ rte_mb();
return 0;
}
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 9409602b32..7d7a8988ed 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -152,6 +152,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
/* mlx4_txq.c */
+int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 2dc198e77f..ed00843425 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -13,7 +13,9 @@
#include <stddef.h>
#include <stdint.h>
#include <string.h>
+#include <sys/mman.h>
#include <inttypes.h>
+#include <unistd.h>
/* Verbs headers do not support -pedantic. */
#ifdef PEDANTIC
@@ -38,6 +40,98 @@
#include "mlx4_utils.h"
/**
+ * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
+ * Both primary and secondary process do mmap to make UAR address
+ * aligned.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param fd
+ * Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+int
+mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+{
+ unsigned int i, j;
+ const unsigned int txqs_n = dev->data->nb_tx_queues;
+ uintptr_t pages[txqs_n];
+ unsigned int pages_n = 0;
+ uintptr_t uar_va;
+ uintptr_t off;
+ void *addr;
+ void *ret;
+ struct txq *txq;
+ int already_mapped;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+
+ memset(pages, 0, txqs_n * sizeof(uintptr_t));
+ /*
+ * As rdma-core, UARs are mapped in size of OS page size.
+ * Use aligned address to avoid duplicate mmap.
+ * Ref to libmlx4 function: mlx4_init_context()
+ */
+ for (i = 0; i != txqs_n; ++i) {
+ txq = dev->data->tx_queues[i];
+ if (!txq)
+ continue;
+ /* UAR addr form verbs used to find dup and offset in page. */
+ uar_va = (uintptr_t)txq->msq.qp_sdb;
+ off = uar_va & (page_size - 1); /* offset in page. */
+ uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
+ already_mapped = 0;
+ for (j = 0; j != pages_n; ++j) {
+ if (pages[j] == uar_va) {
+ already_mapped = 1;
+ break;
+ }
+ }
+ /* new address in reserved UAR address space. */
+ addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
+ uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
+ if (!already_mapped) {
+ pages[pages_n++] = uar_va;
+ /* fixed mmap to specified address in reserved
+ * address space.
+ */
+ ret = mmap(addr, page_size,
+ PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
+ txq->msq.uar_mmap_offset);
+ if (ret != addr) {
+ /* fixed mmap has to return same address. */
+ ERROR("port %u call to mmap failed on UAR"
+ " for txq %u",
+ dev->data->port_id, i);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ }
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
+ txq->msq.db = RTE_PTR_ADD((void *)addr, off);
+ else
+ assert(txq->msq.db ==
+ RTE_PTR_ADD((void *)addr, off));
+ }
+ return 0;
+}
+#else
+int
+mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+{
+ /*
+ * If rdma-core doesn't support UAR remap, secondary process is not
+ * supported, thus secondary cannot call this function but only primary
+ * makes a call. Return success to not interrupt initialization.
+ */
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ return 0;
+}
+#endif
+
+/**
* Free Tx queue elements.
*
* @param txq
@@ -89,7 +183,12 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
sq->owner_opcode = MLX4_OPCODE_SEND | (0u << MLX4_SQ_OWNER_BIT);
sq->stamp = rte_cpu_to_be_32(MLX4_SQ_STAMP_VAL |
(0u << MLX4_SQ_OWNER_BIT));
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ sq->uar_mmap_offset = dqp->uar_mmap_offset;
+ sq->qp_sdb = dqp->sdb;
+#else
sq->db = dqp->sdb;
+#endif
sq->doorbell_qpn = dqp->doorbell_qpn;
cq->buf = dcq->buf.buf;
cq->cqe_cnt = dcq->cqe_cnt;
@@ -307,6 +406,11 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
goto error;
}
/* Retrieve device queue information. */
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ dv_qp = (struct mlx4dv_qp){
+ .comp_mask = MLX4DV_QP_MASK_UAR_MMAP_OFFSET,
+ };
+#endif
mlxdv.cq.in = txq->cq;
mlxdv.cq.out = &dv_cq;
mlxdv.qp.in = txq->qp;
@@ -318,6 +422,13 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
" accessing the device queues", (void *)dev);
goto error;
}
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ if (!(dv_qp.comp_mask & MLX4DV_QP_MASK_UAR_MMAP_OFFSET)) {
+ rte_errno = EINVAL;
+ ERROR("%p: failed to obtain UAR mmap offset", (void *)dev);
+ goto error;
+ }
+#endif
mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
/* Save first wqe pointer in the first element. */
(&(*txq->elts)[0])->wqe =
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/3] net/mlx4: change device reference for secondary process
2019-03-25 19:17 ` [dpdk-dev] [PATCH v2 1/3] net/mlx4: change device reference for secondary process Yongseok Koh
2019-03-25 19:17 ` Yongseok Koh
@ 2019-03-26 19:16 ` Shahaf Shuler
2019-03-26 19:16 ` Shahaf Shuler
1 sibling, 1 reply; 30+ messages in thread
From: Shahaf Shuler @ 2019-03-26 19:16 UTC (permalink / raw)
To: Yongseok Koh; +Cc: dev, stable
Monday, March 25, 2019 9:18 PM, Yongseok Koh:
> Subject: [dpdk-dev] [PATCH v2 1/3] net/mlx4: change device reference for
> secondary process
>
> rte_eth_devices[] is not shared between primary and secondary process,
> but a static array to each process. The reverse pointer of device (priv->dev)
> becomes invalid if mlx4 supports secondary process. Instead, priv has the
> pointer to shared data of the device,
> struct rte_eth_dev_data *dev_data;
>
> Two macros are added,
> #define PORT_ID(priv) ((priv)->dev_data->port_id)
> #define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
>
> Cc: stable@dpdk.org
>
> Suggested-by: Raslan Darawsheh <rasland@mellanox.com>
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
> ---
> drivers/net/mlx4/mlx4.c | 4 ++--
> drivers/net/mlx4/mlx4.h | 5 ++++-
> drivers/net/mlx4/mlx4_flow.c | 39 +++++++++++++++++++++-----------------
> -
> drivers/net/mlx4/mlx4_intr.c | 20 ++++++++++----------
> drivers/net/mlx4/mlx4_mr.c | 8 ++++----
> drivers/net/mlx4/mlx4_rxq.c | 36 +++++++++++++++++++-----------------
> drivers/net/mlx4/mlx4_txq.c | 8 ++++----
> 7 files changed, 64 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index
> 5ef2e7f41e..bb6ab8ec6e 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -753,11 +753,11 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv,
> struct rte_pci_device *pci_dev)
> * handled by rte_intr_rx_ctl().
> */
> eth_dev->intr_handle = &priv->intr_handle;
> - priv->dev = eth_dev;
> + priv->dev_data = eth_dev->data;
> eth_dev->dev_ops = &mlx4_dev_ops;
> /* Bring Ethernet device up. */
> DEBUG("forcing Ethernet interface up");
> - mlx4_dev_set_link_up(priv->dev);
> + mlx4_dev_set_link_up(eth_dev);
> /* Update link status once if waiting for LSC. */
> if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
> mlx4_link_update(eth_dev, 0);
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index
> 7ac49ca672..51566caf7f 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -79,7 +79,7 @@ LIST_HEAD(mlx4_mr_list, mlx4_mr); struct mlx4_priv {
> LIST_ENTRY(mlx4_priv) mem_event_cb;
> /**< Called by memory event callback. */
> - struct rte_eth_dev *dev; /**< Ethernet device. */
> + struct rte_eth_dev_data *dev_data; /* Pointer to device data. */
> struct ibv_context *ctx; /**< Verbs context. */
> struct ibv_device_attr device_attr; /**< Device properties. */
> struct ibv_pd *pd; /**< Protection Domain. */ @@ -113,6 +113,9
> @@ struct mlx4_priv {
> /**< Configured MAC addresses. Unused entries are zeroed. */ };
>
> +#define PORT_ID(priv) ((priv)->dev_data->port_id) #define ETH_DEV(priv)
> +(&rte_eth_devices[PORT_ID(priv)])
> +
> /* mlx4_ethdev.c */
>
> int mlx4_get_ifname(const struct mlx4_priv *priv, char
> (*ifname)[IF_NAMESIZE]); diff --git a/drivers/net/mlx4/mlx4_flow.c
> b/drivers/net/mlx4/mlx4_flow.c index f4df4ab1fb..038dc71d35 100644
> --- a/drivers/net/mlx4/mlx4_flow.c
> +++ b/drivers/net/mlx4/mlx4_flow.c
> @@ -773,7 +773,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
> if (flow->rss)
> break;
> queue = action->conf;
> - if (queue->index >= priv->dev->data-
> >nb_rx_queues) {
> + if (queue->index >= ETH_DEV(priv)->data-
> >nb_rx_queues) {
> msg = "queue target index beyond number
> of"
> " configured Rx queues";
> goto exit_action_not_supported;
> @@ -802,7 +802,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
> /* Sanity checks. */
> for (i = 0; i < rss->queue_num; ++i)
> if (rss->queue[i] >=
> - priv->dev->data->nb_rx_queues)
> + ETH_DEV(priv)->data->nb_rx_queues)
> break;
> if (i != rss->queue_num) {
> msg = "queue index target beyond number
> of"
> @@ -1072,8 +1072,8 @@ mlx4_flow_toggle(struct mlx4_priv *priv,
> /* Stop at the first nonexistent target queue. */
> for (i = 0; i != rss->queues; ++i)
> if (rss->queue_id[i] >=
> - priv->dev->data->nb_rx_queues ||
> - !priv->dev->data->rx_queues[rss->queue_id[i]]) {
> + ETH_DEV(priv)->data->nb_rx_queues ||
> + !ETH_DEV(priv)->data->rx_queues[rss-
> >queue_id[i]]) {
> missing = 1;
> break;
> }
> @@ -1258,7 +1258,7 @@ static uint16_t
> mlx4_flow_internal_next_vlan(struct mlx4_priv *priv, uint16_t vlan) {
> while (vlan < 4096) {
> - if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
> + if (ETH_DEV(priv)->data->vlan_filter_conf.ids[vlan / 64] &
> (UINT64_C(1) << (vlan % 64)))
> return vlan;
> ++vlan;
> @@ -1335,7 +1335,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> * get RSS by default.
> */
> uint32_t queues =
> - rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1;
> + rte_align32pow2(ETH_DEV(priv)->data->nb_rx_queues + 1)
> >> 1;
> uint16_t queue[queues];
> struct rte_flow_action_rss action_rss = {
> .func = RTE_ETH_HASH_FUNCTION_DEFAULT, @@ -1357,9
> +1357,9 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> };
> struct ether_addr *rule_mac = ð_spec.dst;
> rte_be16_t *rule_vlan =
> - (priv->dev->data->dev_conf.rxmode.offloads &
> + (ETH_DEV(priv)->data->dev_conf.rxmode.offloads &
> DEV_RX_OFFLOAD_VLAN_FILTER) &&
> - !priv->dev->data->promiscuous ?
> + !ETH_DEV(priv)->data->promiscuous ?
> &vlan_spec.tci :
> NULL;
> uint16_t vlan = 0;
> @@ -1439,7 +1439,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> if (!flow || !flow->internal) {
> /* Not found, create a new flow rule. */
> memcpy(rule_mac, mac, sizeof(*mac));
> - flow = mlx4_flow_create(priv->dev, &attr, pattern,
> + flow = mlx4_flow_create(ETH_DEV(priv), &attr,
> pattern,
> actions, error);
> if (!flow) {
> err = -rte_errno;
> @@ -1455,15 +1455,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> goto next_vlan;
> }
> /* Take care of promiscuous and all multicast flow rules. */
> - if (priv->dev->data->promiscuous || priv->dev->data->all_multicast)
> {
> + if (ETH_DEV(priv)->data->promiscuous ||
> + ETH_DEV(priv)->data->all_multicast) {
> for (flow = LIST_FIRST(&priv->flows);
> flow && flow->internal;
> flow = LIST_NEXT(flow, next)) {
> - if (priv->dev->data->promiscuous) {
> + if (ETH_DEV(priv)->data->promiscuous) {
> if (flow->promisc)
> break;
> } else {
> - assert(priv->dev->data->all_multicast);
> + assert(ETH_DEV(priv)->data->all_multicast);
> if (flow->allmulti)
> break;
> }
> @@ -1477,16 +1478,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> }
> if (!flow || !flow->internal) {
> /* Not found, create a new flow rule. */
> - if (priv->dev->data->promiscuous) {
> + if (ETH_DEV(priv)->data->promiscuous) {
> pattern[1].spec = NULL;
> pattern[1].mask = NULL;
> } else {
> - assert(priv->dev->data->all_multicast);
> + assert(ETH_DEV(priv)->data->all_multicast);
> pattern[1].spec = ð_allmulti;
> pattern[1].mask = ð_allmulti;
> }
> pattern[2] = pattern[3];
> - flow = mlx4_flow_create(priv->dev, &attr, pattern,
> + flow = mlx4_flow_create(ETH_DEV(priv), &attr,
> pattern,
> actions, error);
> if (!flow) {
> err = -rte_errno;
> @@ -1503,7 +1504,8 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> struct rte_flow *next = LIST_NEXT(flow, next);
>
> if (!flow->select)
> - claim_zero(mlx4_flow_destroy(priv->dev, flow,
> error));
> + claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
> + error));
> else
> flow->select = 0;
> flow = next;
> @@ -1541,7 +1543,8 @@ mlx4_flow_sync(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> for (flow = LIST_FIRST(&priv->flows);
> flow && flow->internal;
> flow = LIST_FIRST(&priv->flows))
> - claim_zero(mlx4_flow_destroy(priv->dev, flow,
> error));
> + claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
> + error));
> } else {
> /* Refresh internal rules. */
> ret = mlx4_flow_internal(priv, error); @@ -1574,7 +1577,7
> @@ mlx4_flow_clean(struct mlx4_priv *priv)
> struct rte_flow *flow;
>
> while ((flow = LIST_FIRST(&priv->flows)))
> - mlx4_flow_destroy(priv->dev, flow, NULL);
> + mlx4_flow_destroy(ETH_DEV(priv), flow, NULL);
> assert(LIST_EMPTY(&priv->rss));
> }
>
> diff --git a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c
> index ec91242196..4f33526755 100644
> --- a/drivers/net/mlx4/mlx4_intr.c
> +++ b/drivers/net/mlx4/mlx4_intr.c
> @@ -65,7 +65,7 @@ static int
> mlx4_rx_intr_vec_enable(struct mlx4_priv *priv) {
> unsigned int i;
> - unsigned int rxqs_n = priv->dev->data->nb_rx_queues;
> + unsigned int rxqs_n = ETH_DEV(priv)->data->nb_rx_queues;
> unsigned int n = RTE_MIN(rxqs_n,
> (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
> unsigned int count = 0;
> struct rte_intr_handle *intr_handle = &priv->intr_handle; @@ -79,7
> +79,7 @@ mlx4_rx_intr_vec_enable(struct mlx4_priv *priv)
> return -rte_errno;
> }
> for (i = 0; i != n; ++i) {
> - struct rxq *rxq = priv->dev->data->rx_queues[i];
> + struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
>
> /* Skip queues that cannot request interrupts. */
> if (!rxq || !rxq->channel) {
> @@ -120,12 +120,12 @@ static void
> mlx4_link_status_alarm(struct mlx4_priv *priv) {
> const struct rte_intr_conf *const intr_conf =
> - &priv->dev->data->dev_conf.intr_conf;
> + Ð_DEV(priv)->data->dev_conf.intr_conf;
>
> assert(priv->intr_alarm == 1);
> priv->intr_alarm = 0;
> if (intr_conf->lsc && !mlx4_link_status_check(priv))
> - _rte_eth_dev_callback_process(priv->dev,
> + _rte_eth_dev_callback_process(ETH_DEV(priv),
> RTE_ETH_EVENT_INTR_LSC,
> NULL);
> }
> @@ -145,8 +145,8 @@ mlx4_link_status_alarm(struct mlx4_priv *priv) static
> int mlx4_link_status_check(struct mlx4_priv *priv) {
> - struct rte_eth_link *link = &priv->dev->data->dev_link;
> - int ret = mlx4_link_update(priv->dev, 0);
> + struct rte_eth_link *link = Ð_DEV(priv)->data->dev_link;
> + int ret = mlx4_link_update(ETH_DEV(priv), 0);
>
> if (ret)
> return ret;
> @@ -185,7 +185,7 @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
> uint32_t caught[RTE_DIM(type)] = { 0 };
> struct ibv_async_event event;
> const struct rte_intr_conf *const intr_conf =
> - &priv->dev->data->dev_conf.intr_conf;
> + Ð_DEV(priv)->data->dev_conf.intr_conf;
> unsigned int i;
>
> /* Read all message and acknowledge them. */ @@ -208,7 +208,7
> @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
> }
> for (i = 0; i != RTE_DIM(caught); ++i)
> if (caught[i])
> - _rte_eth_dev_callback_process(priv->dev, type[i],
> + _rte_eth_dev_callback_process(ETH_DEV(priv),
> type[i],
> NULL);
> }
>
> @@ -282,7 +282,7 @@ int
> mlx4_intr_install(struct mlx4_priv *priv) {
> const struct rte_intr_conf *const intr_conf =
> - &priv->dev->data->dev_conf.intr_conf;
> + Ð_DEV(priv)->data->dev_conf.intr_conf;
> int rc;
>
> mlx4_intr_uninstall(priv);
> @@ -381,7 +381,7 @@ int
> mlx4_rxq_intr_enable(struct mlx4_priv *priv) {
> const struct rte_intr_conf *const intr_conf =
> - &priv->dev->data->dev_conf.intr_conf;
> + Ð_DEV(priv)->data->dev_conf.intr_conf;
>
> if (intr_conf->rxq && mlx4_rx_intr_vec_enable(priv) < 0)
> goto error;
> diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c index
> 4376ad0b60..e4be46ab2a 100644
> --- a/drivers/net/mlx4/mlx4_mr.c
> +++ b/drivers/net/mlx4/mlx4_mr.c
> @@ -896,7 +896,7 @@ mlx4_mr_mem_event_cb(enum rte_mem_event
> event_type, const void *addr,
> rte_rwlock_read_lock(&mlx4_mem_event_rwlock);
> /* Iterate all the existing mlx4 devices. */
> LIST_FOREACH(priv, &mlx4_mem_event_cb_list,
> mem_event_cb)
> - mlx4_mr_mem_event_free_cb(priv->dev, addr,
> len);
> + mlx4_mr_mem_event_free_cb(ETH_DEV(priv),
> addr, len);
> rte_rwlock_read_unlock(&mlx4_mem_event_rwlock);
> break;
> case RTE_MEM_EVENT_ALLOC:
> @@ -1028,7 +1028,7 @@ mlx4_rx_addr2mr_bh(struct rxq *rxq, uintptr_t
> addr)
>
> DEBUG("Rx queue %u: miss on top-half, mru=%u, head=%u,
> addr=%p",
> rxq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
> - return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
> + return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
> }
>
> /**
> @@ -1050,7 +1050,7 @@ mlx4_tx_addr2mr_bh(struct txq *txq, uintptr_t
> addr)
>
> DEBUG("Tx queue %u: miss on top-half, mru=%u, head=%u,
> addr=%p",
> txq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
> - return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
> + return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
> }
>
> /**
> @@ -1225,7 +1225,7 @@ mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t
> addr, struct rte_mempool *mp)
> struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
> struct mlx4_priv *priv = txq->priv;
>
> - mlx4_mr_update_ext_mp(priv->dev, mr_ctrl, mp);
> + mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
> return mlx4_tx_addr2mr_bh(txq, addr);
> }
>
> diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
> index 3782c6baab..50f33eb0c5 100644
> --- a/drivers/net/mlx4/mlx4_rxq.c
> +++ b/drivers/net/mlx4/mlx4_rxq.c
> @@ -176,6 +176,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
>
> struct ibv_wq *ind_tbl[rss->queues];
> struct mlx4_priv *priv = rss->priv;
> + struct rte_eth_dev *dev = ETH_DEV(priv);
> const char *msg;
> unsigned int i = 0;
> int ret;
> @@ -189,8 +190,8 @@ mlx4_rss_attach(struct mlx4_rss *rss)
> uint16_t id = rss->queue_id[i];
> struct rxq *rxq = NULL;
>
> - if (id < priv->dev->data->nb_rx_queues)
> - rxq = priv->dev->data->rx_queues[id];
> + if (id < dev->data->nb_rx_queues)
> + rxq = dev->data->rx_queues[id];
> if (!rxq) {
> ret = EINVAL;
> msg = "RSS target queue is not configured"; @@ -
> 269,7 +270,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
> rss->ind = NULL;
> }
> while (i--)
> - mlx4_rxq_detach(priv->dev->data->rx_queues[rss-
> >queue_id[i]]);
> + mlx4_rxq_detach(dev->data->rx_queues[rss-
> >queue_id[i]]);
> ERROR("mlx4: %s", msg);
> --rss->usecnt;
> rte_errno = ret;
> @@ -291,6 +292,7 @@ void
> mlx4_rss_detach(struct mlx4_rss *rss)
> {
> struct mlx4_priv *priv = rss->priv;
> + struct rte_eth_dev *dev = ETH_DEV(priv);
> unsigned int i;
>
> assert(rss->refcnt);
> @@ -303,7 +305,7 @@ mlx4_rss_detach(struct mlx4_rss *rss)
> claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind));
> rss->ind = NULL;
> for (i = 0; i != rss->queues; ++i)
> - mlx4_rxq_detach(priv->dev->data->rx_queues[rss-
> >queue_id[i]]);
> + mlx4_rxq_detach(dev->data->rx_queues[rss-
> >queue_id[i]]);
> }
>
> /**
> @@ -329,7 +331,7 @@ mlx4_rss_detach(struct mlx4_rss *rss) int
> mlx4_rss_init(struct mlx4_priv *priv) {
> - struct rte_eth_dev *dev = priv->dev;
> + struct rte_eth_dev *dev = ETH_DEV(priv);
> uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues);
> uint32_t wq_num_prev = 0;
> const char *msg;
> @@ -338,7 +340,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
>
> if (priv->rss_init)
> return 0;
> - if (priv->dev->data->nb_rx_queues > priv->hw_rss_max_qps) {
> + if (ETH_DEV(priv)->data->nb_rx_queues > priv->hw_rss_max_qps) {
> ERROR("RSS does not support more than %d queues",
> priv->hw_rss_max_qps);
> rte_errno = EINVAL;
> @@ -356,8 +358,8 @@ mlx4_rss_init(struct mlx4_priv *priv)
> rte_errno = ret;
> return -ret;
> }
> - for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
> - struct rxq *rxq = priv->dev->data->rx_queues[i];
> + for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
> + struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
> struct ibv_cq *cq;
> struct ibv_wq *wq;
> uint32_t wq_num;
> @@ -432,7 +434,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
> ERROR("cannot initialize common RSS resources (queue %u): %s:
> %s",
> i, msg, strerror(ret));
> while (i--) {
> - struct rxq *rxq = priv->dev->data->rx_queues[i];
> + struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
>
> if (rxq)
> mlx4_rxq_detach(rxq);
> @@ -457,8 +459,8 @@ mlx4_rss_deinit(struct mlx4_priv *priv)
>
> if (!priv->rss_init)
> return;
> - for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
> - struct rxq *rxq = priv->dev->data->rx_queues[i];
> + for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
> + struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
>
> if (rxq) {
> assert(rxq->usecnt == 1);
> @@ -494,7 +496,7 @@ mlx4_rxq_attach(struct rxq *rxq)
> }
>
> struct mlx4_priv *priv = rxq->priv;
> - struct rte_eth_dev *dev = priv->dev;
> + struct rte_eth_dev *dev = ETH_DEV(priv);
> const uint32_t elts_n = 1 << rxq->elts_n;
> const uint32_t sges_n = 1 << rxq->sges_n;
> struct rte_mbuf *(*elts)[elts_n] = rxq->elts; @@ -561,7 +563,7 @@
> mlx4_rxq_attach(struct rxq *rxq)
> }
> /* Pre-register Rx mempool. */
> DEBUG("port %u Rx queue %u registering mp %s having %u chunks",
> - priv->dev->data->port_id, rxq->stats.idx,
> + ETH_DEV(priv)->data->port_id, rxq->stats.idx,
> rxq->mp->name, rxq->mp->nb_mem_chunks);
> mlx4_mr_update_mp(dev, &rxq->mr_ctrl, rxq->mp);
> wqes = (volatile struct mlx4_wqe_data_seg (*)[]) @@ -917,11
> +919,11 @@ mlx4_rx_queue_release(void *dpdk_rxq)
> if (rxq == NULL)
> return;
> priv = rxq->priv;
> - for (i = 0; i != priv->dev->data->nb_rx_queues; ++i)
> - if (priv->dev->data->rx_queues[i] == rxq) {
> + for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i)
> + if (ETH_DEV(priv)->data->rx_queues[i] == rxq) {
> DEBUG("%p: removing Rx queue %p from list",
> - (void *)priv->dev, (void *)rxq);
> - priv->dev->data->rx_queues[i] = NULL;
> + (void *)ETH_DEV(priv), (void *)rxq);
> + ETH_DEV(priv)->data->rx_queues[i] = NULL;
> break;
> }
> assert(!rxq->cq);
> diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
> index 8142775fc4..352700820d 100644
> --- a/drivers/net/mlx4/mlx4_txq.c
> +++ b/drivers/net/mlx4/mlx4_txq.c
> @@ -357,11 +357,11 @@ mlx4_tx_queue_release(void *dpdk_txq)
> if (txq == NULL)
> return;
> priv = txq->priv;
> - for (i = 0; i != priv->dev->data->nb_tx_queues; ++i)
> - if (priv->dev->data->tx_queues[i] == txq) {
> + for (i = 0; i != ETH_DEV(priv)->data->nb_tx_queues; ++i)
> + if (ETH_DEV(priv)->data->tx_queues[i] == txq) {
> DEBUG("%p: removing Tx queue %p from list",
> - (void *)priv->dev, (void *)txq);
> - priv->dev->data->tx_queues[i] = NULL;
> + (void *)ETH_DEV(priv), (void *)txq);
> + ETH_DEV(priv)->data->tx_queues[i] = NULL;
> break;
> }
> mlx4_txq_free_elts(txq);
> --
> 2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/3] net/mlx4: change device reference for secondary process
2019-03-26 19:16 ` Shahaf Shuler
@ 2019-03-26 19:16 ` Shahaf Shuler
0 siblings, 0 replies; 30+ messages in thread
From: Shahaf Shuler @ 2019-03-26 19:16 UTC (permalink / raw)
To: Yongseok Koh; +Cc: dev, stable
Monday, March 25, 2019 9:18 PM, Yongseok Koh:
> Subject: [dpdk-dev] [PATCH v2 1/3] net/mlx4: change device reference for
> secondary process
>
> rte_eth_devices[] is not shared between primary and secondary process,
> but a static array to each process. The reverse pointer of device (priv->dev)
> becomes invalid if mlx4 supports secondary process. Instead, priv has the
> pointer to shared data of the device,
> struct rte_eth_dev_data *dev_data;
>
> Two macros are added,
> #define PORT_ID(priv) ((priv)->dev_data->port_id)
> #define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
>
> Cc: stable@dpdk.org
>
> Suggested-by: Raslan Darawsheh <rasland@mellanox.com>
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
> ---
> drivers/net/mlx4/mlx4.c | 4 ++--
> drivers/net/mlx4/mlx4.h | 5 ++++-
> drivers/net/mlx4/mlx4_flow.c | 39 +++++++++++++++++++++-----------------
> -
> drivers/net/mlx4/mlx4_intr.c | 20 ++++++++++----------
> drivers/net/mlx4/mlx4_mr.c | 8 ++++----
> drivers/net/mlx4/mlx4_rxq.c | 36 +++++++++++++++++++-----------------
> drivers/net/mlx4/mlx4_txq.c | 8 ++++----
> 7 files changed, 64 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index
> 5ef2e7f41e..bb6ab8ec6e 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -753,11 +753,11 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv,
> struct rte_pci_device *pci_dev)
> * handled by rte_intr_rx_ctl().
> */
> eth_dev->intr_handle = &priv->intr_handle;
> - priv->dev = eth_dev;
> + priv->dev_data = eth_dev->data;
> eth_dev->dev_ops = &mlx4_dev_ops;
> /* Bring Ethernet device up. */
> DEBUG("forcing Ethernet interface up");
> - mlx4_dev_set_link_up(priv->dev);
> + mlx4_dev_set_link_up(eth_dev);
> /* Update link status once if waiting for LSC. */
> if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
> mlx4_link_update(eth_dev, 0);
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index
> 7ac49ca672..51566caf7f 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -79,7 +79,7 @@ LIST_HEAD(mlx4_mr_list, mlx4_mr); struct mlx4_priv {
> LIST_ENTRY(mlx4_priv) mem_event_cb;
> /**< Called by memory event callback. */
> - struct rte_eth_dev *dev; /**< Ethernet device. */
> + struct rte_eth_dev_data *dev_data; /* Pointer to device data. */
> struct ibv_context *ctx; /**< Verbs context. */
> struct ibv_device_attr device_attr; /**< Device properties. */
> struct ibv_pd *pd; /**< Protection Domain. */ @@ -113,6 +113,9
> @@ struct mlx4_priv {
> /**< Configured MAC addresses. Unused entries are zeroed. */ };
>
> +#define PORT_ID(priv) ((priv)->dev_data->port_id) #define ETH_DEV(priv)
> +(&rte_eth_devices[PORT_ID(priv)])
> +
> /* mlx4_ethdev.c */
>
> int mlx4_get_ifname(const struct mlx4_priv *priv, char
> (*ifname)[IF_NAMESIZE]); diff --git a/drivers/net/mlx4/mlx4_flow.c
> b/drivers/net/mlx4/mlx4_flow.c index f4df4ab1fb..038dc71d35 100644
> --- a/drivers/net/mlx4/mlx4_flow.c
> +++ b/drivers/net/mlx4/mlx4_flow.c
> @@ -773,7 +773,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
> if (flow->rss)
> break;
> queue = action->conf;
> - if (queue->index >= priv->dev->data-
> >nb_rx_queues) {
> + if (queue->index >= ETH_DEV(priv)->data-
> >nb_rx_queues) {
> msg = "queue target index beyond number
> of"
> " configured Rx queues";
> goto exit_action_not_supported;
> @@ -802,7 +802,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
> /* Sanity checks. */
> for (i = 0; i < rss->queue_num; ++i)
> if (rss->queue[i] >=
> - priv->dev->data->nb_rx_queues)
> + ETH_DEV(priv)->data->nb_rx_queues)
> break;
> if (i != rss->queue_num) {
> msg = "queue index target beyond number
> of"
> @@ -1072,8 +1072,8 @@ mlx4_flow_toggle(struct mlx4_priv *priv,
> /* Stop at the first nonexistent target queue. */
> for (i = 0; i != rss->queues; ++i)
> if (rss->queue_id[i] >=
> - priv->dev->data->nb_rx_queues ||
> - !priv->dev->data->rx_queues[rss->queue_id[i]]) {
> + ETH_DEV(priv)->data->nb_rx_queues ||
> + !ETH_DEV(priv)->data->rx_queues[rss-
> >queue_id[i]]) {
> missing = 1;
> break;
> }
> @@ -1258,7 +1258,7 @@ static uint16_t
> mlx4_flow_internal_next_vlan(struct mlx4_priv *priv, uint16_t vlan) {
> while (vlan < 4096) {
> - if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
> + if (ETH_DEV(priv)->data->vlan_filter_conf.ids[vlan / 64] &
> (UINT64_C(1) << (vlan % 64)))
> return vlan;
> ++vlan;
> @@ -1335,7 +1335,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> * get RSS by default.
> */
> uint32_t queues =
> - rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1;
> + rte_align32pow2(ETH_DEV(priv)->data->nb_rx_queues + 1)
> >> 1;
> uint16_t queue[queues];
> struct rte_flow_action_rss action_rss = {
> .func = RTE_ETH_HASH_FUNCTION_DEFAULT, @@ -1357,9
> +1357,9 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> };
> struct ether_addr *rule_mac = ð_spec.dst;
> rte_be16_t *rule_vlan =
> - (priv->dev->data->dev_conf.rxmode.offloads &
> + (ETH_DEV(priv)->data->dev_conf.rxmode.offloads &
> DEV_RX_OFFLOAD_VLAN_FILTER) &&
> - !priv->dev->data->promiscuous ?
> + !ETH_DEV(priv)->data->promiscuous ?
> &vlan_spec.tci :
> NULL;
> uint16_t vlan = 0;
> @@ -1439,7 +1439,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> if (!flow || !flow->internal) {
> /* Not found, create a new flow rule. */
> memcpy(rule_mac, mac, sizeof(*mac));
> - flow = mlx4_flow_create(priv->dev, &attr, pattern,
> + flow = mlx4_flow_create(ETH_DEV(priv), &attr,
> pattern,
> actions, error);
> if (!flow) {
> err = -rte_errno;
> @@ -1455,15 +1455,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> goto next_vlan;
> }
> /* Take care of promiscuous and all multicast flow rules. */
> - if (priv->dev->data->promiscuous || priv->dev->data->all_multicast)
> {
> + if (ETH_DEV(priv)->data->promiscuous ||
> + ETH_DEV(priv)->data->all_multicast) {
> for (flow = LIST_FIRST(&priv->flows);
> flow && flow->internal;
> flow = LIST_NEXT(flow, next)) {
> - if (priv->dev->data->promiscuous) {
> + if (ETH_DEV(priv)->data->promiscuous) {
> if (flow->promisc)
> break;
> } else {
> - assert(priv->dev->data->all_multicast);
> + assert(ETH_DEV(priv)->data->all_multicast);
> if (flow->allmulti)
> break;
> }
> @@ -1477,16 +1478,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> }
> if (!flow || !flow->internal) {
> /* Not found, create a new flow rule. */
> - if (priv->dev->data->promiscuous) {
> + if (ETH_DEV(priv)->data->promiscuous) {
> pattern[1].spec = NULL;
> pattern[1].mask = NULL;
> } else {
> - assert(priv->dev->data->all_multicast);
> + assert(ETH_DEV(priv)->data->all_multicast);
> pattern[1].spec = ð_allmulti;
> pattern[1].mask = ð_allmulti;
> }
> pattern[2] = pattern[3];
> - flow = mlx4_flow_create(priv->dev, &attr, pattern,
> + flow = mlx4_flow_create(ETH_DEV(priv), &attr,
> pattern,
> actions, error);
> if (!flow) {
> err = -rte_errno;
> @@ -1503,7 +1504,8 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> struct rte_flow *next = LIST_NEXT(flow, next);
>
> if (!flow->select)
> - claim_zero(mlx4_flow_destroy(priv->dev, flow,
> error));
> + claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
> + error));
> else
> flow->select = 0;
> flow = next;
> @@ -1541,7 +1543,8 @@ mlx4_flow_sync(struct mlx4_priv *priv, struct
> rte_flow_error *error)
> for (flow = LIST_FIRST(&priv->flows);
> flow && flow->internal;
> flow = LIST_FIRST(&priv->flows))
> - claim_zero(mlx4_flow_destroy(priv->dev, flow,
> error));
> + claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
> + error));
> } else {
> /* Refresh internal rules. */
> ret = mlx4_flow_internal(priv, error); @@ -1574,7 +1577,7
> @@ mlx4_flow_clean(struct mlx4_priv *priv)
> struct rte_flow *flow;
>
> while ((flow = LIST_FIRST(&priv->flows)))
> - mlx4_flow_destroy(priv->dev, flow, NULL);
> + mlx4_flow_destroy(ETH_DEV(priv), flow, NULL);
> assert(LIST_EMPTY(&priv->rss));
> }
>
> diff --git a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c
> index ec91242196..4f33526755 100644
> --- a/drivers/net/mlx4/mlx4_intr.c
> +++ b/drivers/net/mlx4/mlx4_intr.c
> @@ -65,7 +65,7 @@ static int
> mlx4_rx_intr_vec_enable(struct mlx4_priv *priv) {
> unsigned int i;
> - unsigned int rxqs_n = priv->dev->data->nb_rx_queues;
> + unsigned int rxqs_n = ETH_DEV(priv)->data->nb_rx_queues;
> unsigned int n = RTE_MIN(rxqs_n,
> (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
> unsigned int count = 0;
> struct rte_intr_handle *intr_handle = &priv->intr_handle; @@ -79,7
> +79,7 @@ mlx4_rx_intr_vec_enable(struct mlx4_priv *priv)
> return -rte_errno;
> }
> for (i = 0; i != n; ++i) {
> - struct rxq *rxq = priv->dev->data->rx_queues[i];
> + struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
>
> /* Skip queues that cannot request interrupts. */
> if (!rxq || !rxq->channel) {
> @@ -120,12 +120,12 @@ static void
> mlx4_link_status_alarm(struct mlx4_priv *priv) {
> const struct rte_intr_conf *const intr_conf =
> - &priv->dev->data->dev_conf.intr_conf;
> + Ð_DEV(priv)->data->dev_conf.intr_conf;
>
> assert(priv->intr_alarm == 1);
> priv->intr_alarm = 0;
> if (intr_conf->lsc && !mlx4_link_status_check(priv))
> - _rte_eth_dev_callback_process(priv->dev,
> + _rte_eth_dev_callback_process(ETH_DEV(priv),
> RTE_ETH_EVENT_INTR_LSC,
> NULL);
> }
> @@ -145,8 +145,8 @@ mlx4_link_status_alarm(struct mlx4_priv *priv) static
> int mlx4_link_status_check(struct mlx4_priv *priv) {
> - struct rte_eth_link *link = &priv->dev->data->dev_link;
> - int ret = mlx4_link_update(priv->dev, 0);
> + struct rte_eth_link *link = Ð_DEV(priv)->data->dev_link;
> + int ret = mlx4_link_update(ETH_DEV(priv), 0);
>
> if (ret)
> return ret;
> @@ -185,7 +185,7 @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
> uint32_t caught[RTE_DIM(type)] = { 0 };
> struct ibv_async_event event;
> const struct rte_intr_conf *const intr_conf =
> - &priv->dev->data->dev_conf.intr_conf;
> + Ð_DEV(priv)->data->dev_conf.intr_conf;
> unsigned int i;
>
> /* Read all message and acknowledge them. */ @@ -208,7 +208,7
> @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
> }
> for (i = 0; i != RTE_DIM(caught); ++i)
> if (caught[i])
> - _rte_eth_dev_callback_process(priv->dev, type[i],
> + _rte_eth_dev_callback_process(ETH_DEV(priv),
> type[i],
> NULL);
> }
>
> @@ -282,7 +282,7 @@ int
> mlx4_intr_install(struct mlx4_priv *priv) {
> const struct rte_intr_conf *const intr_conf =
> - &priv->dev->data->dev_conf.intr_conf;
> + Ð_DEV(priv)->data->dev_conf.intr_conf;
> int rc;
>
> mlx4_intr_uninstall(priv);
> @@ -381,7 +381,7 @@ int
> mlx4_rxq_intr_enable(struct mlx4_priv *priv) {
> const struct rte_intr_conf *const intr_conf =
> - &priv->dev->data->dev_conf.intr_conf;
> + Ð_DEV(priv)->data->dev_conf.intr_conf;
>
> if (intr_conf->rxq && mlx4_rx_intr_vec_enable(priv) < 0)
> goto error;
> diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c index
> 4376ad0b60..e4be46ab2a 100644
> --- a/drivers/net/mlx4/mlx4_mr.c
> +++ b/drivers/net/mlx4/mlx4_mr.c
> @@ -896,7 +896,7 @@ mlx4_mr_mem_event_cb(enum rte_mem_event
> event_type, const void *addr,
> rte_rwlock_read_lock(&mlx4_mem_event_rwlock);
> /* Iterate all the existing mlx4 devices. */
> LIST_FOREACH(priv, &mlx4_mem_event_cb_list,
> mem_event_cb)
> - mlx4_mr_mem_event_free_cb(priv->dev, addr,
> len);
> + mlx4_mr_mem_event_free_cb(ETH_DEV(priv),
> addr, len);
> rte_rwlock_read_unlock(&mlx4_mem_event_rwlock);
> break;
> case RTE_MEM_EVENT_ALLOC:
> @@ -1028,7 +1028,7 @@ mlx4_rx_addr2mr_bh(struct rxq *rxq, uintptr_t
> addr)
>
> DEBUG("Rx queue %u: miss on top-half, mru=%u, head=%u,
> addr=%p",
> rxq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
> - return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
> + return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
> }
>
> /**
> @@ -1050,7 +1050,7 @@ mlx4_tx_addr2mr_bh(struct txq *txq, uintptr_t
> addr)
>
> DEBUG("Tx queue %u: miss on top-half, mru=%u, head=%u,
> addr=%p",
> txq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
> - return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
> + return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
> }
>
> /**
> @@ -1225,7 +1225,7 @@ mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t
> addr, struct rte_mempool *mp)
> struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
> struct mlx4_priv *priv = txq->priv;
>
> - mlx4_mr_update_ext_mp(priv->dev, mr_ctrl, mp);
> + mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
> return mlx4_tx_addr2mr_bh(txq, addr);
> }
>
> diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
> index 3782c6baab..50f33eb0c5 100644
> --- a/drivers/net/mlx4/mlx4_rxq.c
> +++ b/drivers/net/mlx4/mlx4_rxq.c
> @@ -176,6 +176,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
>
> struct ibv_wq *ind_tbl[rss->queues];
> struct mlx4_priv *priv = rss->priv;
> + struct rte_eth_dev *dev = ETH_DEV(priv);
> const char *msg;
> unsigned int i = 0;
> int ret;
> @@ -189,8 +190,8 @@ mlx4_rss_attach(struct mlx4_rss *rss)
> uint16_t id = rss->queue_id[i];
> struct rxq *rxq = NULL;
>
> - if (id < priv->dev->data->nb_rx_queues)
> - rxq = priv->dev->data->rx_queues[id];
> + if (id < dev->data->nb_rx_queues)
> + rxq = dev->data->rx_queues[id];
> if (!rxq) {
> ret = EINVAL;
> msg = "RSS target queue is not configured"; @@ -
> 269,7 +270,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
> rss->ind = NULL;
> }
> while (i--)
> - mlx4_rxq_detach(priv->dev->data->rx_queues[rss-
> >queue_id[i]]);
> + mlx4_rxq_detach(dev->data->rx_queues[rss-
> >queue_id[i]]);
> ERROR("mlx4: %s", msg);
> --rss->usecnt;
> rte_errno = ret;
> @@ -291,6 +292,7 @@ void
> mlx4_rss_detach(struct mlx4_rss *rss)
> {
> struct mlx4_priv *priv = rss->priv;
> + struct rte_eth_dev *dev = ETH_DEV(priv);
> unsigned int i;
>
> assert(rss->refcnt);
> @@ -303,7 +305,7 @@ mlx4_rss_detach(struct mlx4_rss *rss)
> claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind));
> rss->ind = NULL;
> for (i = 0; i != rss->queues; ++i)
> - mlx4_rxq_detach(priv->dev->data->rx_queues[rss-
> >queue_id[i]]);
> + mlx4_rxq_detach(dev->data->rx_queues[rss-
> >queue_id[i]]);
> }
>
> /**
> @@ -329,7 +331,7 @@ mlx4_rss_detach(struct mlx4_rss *rss) int
> mlx4_rss_init(struct mlx4_priv *priv) {
> - struct rte_eth_dev *dev = priv->dev;
> + struct rte_eth_dev *dev = ETH_DEV(priv);
> uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues);
> uint32_t wq_num_prev = 0;
> const char *msg;
> @@ -338,7 +340,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
>
> if (priv->rss_init)
> return 0;
> - if (priv->dev->data->nb_rx_queues > priv->hw_rss_max_qps) {
> + if (ETH_DEV(priv)->data->nb_rx_queues > priv->hw_rss_max_qps) {
> ERROR("RSS does not support more than %d queues",
> priv->hw_rss_max_qps);
> rte_errno = EINVAL;
> @@ -356,8 +358,8 @@ mlx4_rss_init(struct mlx4_priv *priv)
> rte_errno = ret;
> return -ret;
> }
> - for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
> - struct rxq *rxq = priv->dev->data->rx_queues[i];
> + for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
> + struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
> struct ibv_cq *cq;
> struct ibv_wq *wq;
> uint32_t wq_num;
> @@ -432,7 +434,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
> ERROR("cannot initialize common RSS resources (queue %u): %s:
> %s",
> i, msg, strerror(ret));
> while (i--) {
> - struct rxq *rxq = priv->dev->data->rx_queues[i];
> + struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
>
> if (rxq)
> mlx4_rxq_detach(rxq);
> @@ -457,8 +459,8 @@ mlx4_rss_deinit(struct mlx4_priv *priv)
>
> if (!priv->rss_init)
> return;
> - for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
> - struct rxq *rxq = priv->dev->data->rx_queues[i];
> + for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
> + struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
>
> if (rxq) {
> assert(rxq->usecnt == 1);
> @@ -494,7 +496,7 @@ mlx4_rxq_attach(struct rxq *rxq)
> }
>
> struct mlx4_priv *priv = rxq->priv;
> - struct rte_eth_dev *dev = priv->dev;
> + struct rte_eth_dev *dev = ETH_DEV(priv);
> const uint32_t elts_n = 1 << rxq->elts_n;
> const uint32_t sges_n = 1 << rxq->sges_n;
> struct rte_mbuf *(*elts)[elts_n] = rxq->elts; @@ -561,7 +563,7 @@
> mlx4_rxq_attach(struct rxq *rxq)
> }
> /* Pre-register Rx mempool. */
> DEBUG("port %u Rx queue %u registering mp %s having %u chunks",
> - priv->dev->data->port_id, rxq->stats.idx,
> + ETH_DEV(priv)->data->port_id, rxq->stats.idx,
> rxq->mp->name, rxq->mp->nb_mem_chunks);
> mlx4_mr_update_mp(dev, &rxq->mr_ctrl, rxq->mp);
> wqes = (volatile struct mlx4_wqe_data_seg (*)[]) @@ -917,11
> +919,11 @@ mlx4_rx_queue_release(void *dpdk_rxq)
> if (rxq == NULL)
> return;
> priv = rxq->priv;
> - for (i = 0; i != priv->dev->data->nb_rx_queues; ++i)
> - if (priv->dev->data->rx_queues[i] == rxq) {
> + for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i)
> + if (ETH_DEV(priv)->data->rx_queues[i] == rxq) {
> DEBUG("%p: removing Rx queue %p from list",
> - (void *)priv->dev, (void *)rxq);
> - priv->dev->data->rx_queues[i] = NULL;
> + (void *)ETH_DEV(priv), (void *)rxq);
> + ETH_DEV(priv)->data->rx_queues[i] = NULL;
> break;
> }
> assert(!rxq->cq);
> diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
> index 8142775fc4..352700820d 100644
> --- a/drivers/net/mlx4/mlx4_txq.c
> +++ b/drivers/net/mlx4/mlx4_txq.c
> @@ -357,11 +357,11 @@ mlx4_tx_queue_release(void *dpdk_txq)
> if (txq == NULL)
> return;
> priv = txq->priv;
> - for (i = 0; i != priv->dev->data->nb_tx_queues; ++i)
> - if (priv->dev->data->tx_queues[i] == txq) {
> + for (i = 0; i != ETH_DEV(priv)->data->nb_tx_queues; ++i)
> + if (ETH_DEV(priv)->data->tx_queues[i] == txq) {
> DEBUG("%p: removing Tx queue %p from list",
> - (void *)priv->dev, (void *)txq);
> - priv->dev->data->tx_queues[i] = NULL;
> + (void *)ETH_DEV(priv), (void *)txq);
> + ETH_DEV(priv)->data->tx_queues[i] = NULL;
> break;
> }
> mlx4_txq_free_elts(txq);
> --
> 2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/3] net/mlx4: add external allocator for Verbs object
2019-03-25 19:18 ` [dpdk-dev] [PATCH v2 2/3] net/mlx4: add external allocator for Verbs object Yongseok Koh
2019-03-25 19:18 ` Yongseok Koh
@ 2019-03-26 19:21 ` Shahaf Shuler
2019-03-26 19:21 ` Shahaf Shuler
1 sibling, 1 reply; 30+ messages in thread
From: Shahaf Shuler @ 2019-03-26 19:21 UTC (permalink / raw)
To: Yongseok Koh; +Cc: dev
Monday, March 25, 2019 9:18 PM, Yongseok Koh:
> Subject: [dpdk-dev] [PATCH v2 2/3] net/mlx4: add external allocator for
> Verbs object
>
> To support secondary process, the memory allocated by library such as
> completion rings (CQ) and buffer rings (WQ) must be manageable by EAL, in
> order to share it with secondary processes. With new changes in rdma-core
> and kernel driver, it is possible to provide an external allocator to the library
> layer for this purpose. All such resources will now be allocated within DPDK
> framework.
>
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/3] net/mlx4: add external allocator for Verbs object
2019-03-26 19:21 ` Shahaf Shuler
@ 2019-03-26 19:21 ` Shahaf Shuler
0 siblings, 0 replies; 30+ messages in thread
From: Shahaf Shuler @ 2019-03-26 19:21 UTC (permalink / raw)
To: Yongseok Koh; +Cc: dev
Monday, March 25, 2019 9:18 PM, Yongseok Koh:
> Subject: [dpdk-dev] [PATCH v2 2/3] net/mlx4: add external allocator for
> Verbs object
>
> To support secondary process, the memory allocated by library such as
> completion rings (CQ) and buffer rings (WQ) must be manageable by EAL, in
> order to share it with secondary processes. With new changes in rdma-core
> and kernel driver, it is possible to provide an external allocator to the library
> layer for this purpose. All such resources will now be allocated within DPDK
> framework.
>
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/3] net/mlx4: add secondary process support
2019-03-25 19:18 ` [dpdk-dev] [PATCH v2 3/3] net/mlx4: add secondary process support Yongseok Koh
2019-03-25 19:18 ` Yongseok Koh
@ 2019-03-26 19:33 ` Shahaf Shuler
2019-03-26 19:33 ` Shahaf Shuler
2019-03-28 19:01 ` Yongseok Koh
1 sibling, 2 replies; 30+ messages in thread
From: Shahaf Shuler @ 2019-03-26 19:33 UTC (permalink / raw)
To: Yongseok Koh; +Cc: dev
Monday, March 25, 2019 9:18 PM, Yongseok Koh:
> To: Shahaf Shuler <shahafs@mellanox.com>
> Cc: dev@dpdk.org
> Subject: [PATCH v2 3/3] net/mlx4: add secondary process support
>
> In order to support secondary process, a few features are required.
>
> a) rdma-core library should allocate device resources using DPDK's memory
> allocator.
>
> b) UAR should be remapped for secondary processes. Currently, in order not
> to use different data structure for secondary processes, PMD tries to
> reserve identical virtual address space for both primary and secondary
> processes.
>
> c) IPC channel is necessary, which can be easily set with rte_mp APIs.
> Through the channel, Verbs command FD is delivered to the secondary
> process and the device stop/start event is also broadcast from primary
> process.
>
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> ---
> doc/guides/nics/features/mlx4.ini | 1 +
> doc/guides/nics/mlx4.rst | 10 +
> drivers/net/mlx4/Makefile | 6 +
> drivers/net/mlx4/meson.build | 3 +
> drivers/net/mlx4/mlx4.c | 378
> ++++++++++++++++++++++++++++++++++++--
> drivers/net/mlx4/mlx4.h | 60 ++++++
> drivers/net/mlx4/mlx4_mp.c | 304
> ++++++++++++++++++++++++++++++
> drivers/net/mlx4/mlx4_mr.c | 32 +++-
> drivers/net/mlx4/mlx4_prm.h | 4 +-
> drivers/net/mlx4/mlx4_rxtx.c | 2 +
> drivers/net/mlx4/mlx4_rxtx.h | 1 +
> drivers/net/mlx4/mlx4_txq.c | 111 +++++++++++
> 12 files changed, 890 insertions(+), 22 deletions(-) create mode 100644
> drivers/net/mlx4/mlx4_mp.c
>
> diff --git a/doc/guides/nics/features/mlx4.ini
> b/doc/guides/nics/features/mlx4.ini
> index a211aef332..4502aa2a87 100644
> --- a/doc/guides/nics/features/mlx4.ini
> +++ b/doc/guides/nics/features/mlx4.ini
> @@ -29,6 +29,7 @@ Packet type parsing = Y
> Basic stats = Y
> Stats per queue = Y
> FW version = Y
> +Multiprocess aware = Y
> Other kdrv = Y
> Power8 = Y
> x86-32 = Y
> diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst index
> 4ad361a2c2..cd34838f41 100644
> --- a/doc/guides/nics/mlx4.rst
> +++ b/doc/guides/nics/mlx4.rst
> @@ -145,6 +145,16 @@ below.
> Limitations
> -----------
>
> +- For secondary process:
> +
> + - Forked secondary process not supported.
> + - All mempools must be initialized before rte_eth_dev_start().
> + - External memory unregistered in EAL memseg list cannot be used for
> DMA
> + unless such memory has been registered by
> ``mlx4_mr_update_ext_mp()`` in
> + primary process and remapped to the same virtual address in secondary
> + process. If the external memory is registered by primary process but has
> + different virtual address in secondary process, unexpected error may
> happen.
> +
> - CRC stripping is supported by default and always reported as "true".
> The ability to enable/disable CRC stripping requires OFED version
> 4.3-1.5.0.0 and above or rdma-core version v18 and above.
> diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index
> b527efd625..8126b0dfc6 100644
> --- a/drivers/net/mlx4/Makefile
> +++ b/drivers/net/mlx4/Makefile
> @@ -18,6 +18,7 @@ ifneq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)
> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_glue.c endif
> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_intr.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mp.c
> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mr.c
> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxq.c
> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxtx.c @@ -93,6 +94,11
> @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
> enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
> $(AUTOCONF_OUTPUT)
> $Q sh -- '$<' '$@' \
> + HAVE_IBV_MLX4_UAR_MMAP_OFFSET \
> + infiniband/mlx4dv.h \
> + enum MLX4DV_QP_MASK_UAR_MMAP_OFFSET \
> + $(AUTOCONF_OUTPUT)
> + $Q sh -- '$<' '$@' \
> HAVE_IBV_MLX4_WQE_LSO_SEG \
> infiniband/mlx4dv.h \
> type 'struct mlx4_wqe_lso_seg' \
> diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
> index 650e2c8fbc..de020701d1 100644
> --- a/drivers/net/mlx4/meson.build
> +++ b/drivers/net/mlx4/meson.build
> @@ -33,6 +33,7 @@ if build
> 'mlx4_ethdev.c',
> 'mlx4_flow.c',
> 'mlx4_intr.c',
> + 'mlx4_mp.c',
> 'mlx4_mr.c',
> 'mlx4_rxq.c',
> 'mlx4_rxtx.c',
> @@ -76,6 +77,8 @@ if build
> has_sym_args = [
> [ 'HAVE_IBV_MLX4_BUF_ALLOCATORS',
> 'infiniband/mlx4dv.h',
> 'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
> + [ 'HAVE_IBV_MLX4_UAR_MMAP_OFFSET',
> 'infiniband/mlx4dv.h',
> + 'MLX4DV_QP_MASK_UAR_MMAP_OFFSET' ],
> ]
> config = configuration_data()
> foreach arg:has_sym_args
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index
> 0e0b035df0..a5cfcdbee3 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -17,6 +17,7 @@
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> +#include <sys/mman.h>
> #include <unistd.h>
>
> /* Verbs headers do not support -pedantic. */ @@ -48,10 +49,21 @@
> #include "mlx4_rxtx.h"
> #include "mlx4_utils.h"
>
> -struct mlx4_dev_list mlx4_mem_event_cb_list =
> - LIST_HEAD_INITIALIZER(mlx4_mem_event_cb_list);
> +#if defined(HAVE_IBV_MLX4_UAR_MMAP_OFFSET) && \
> + defined(HAVE_IBV_MLX4_BUF_ALLOCATORS)
> +#define HAVE_IBV_MLX4_SECONDARY_PROCESS #endif
Features should not be detected on compilation time rather by run time based on capabilities.
On this case,
If you are able to register the external allocator (dv call returns w/ success) and the mmap for the uar index also succeed, then you have support for secondary.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/3] net/mlx4: add secondary process support
2019-03-26 19:33 ` Shahaf Shuler
@ 2019-03-26 19:33 ` Shahaf Shuler
2019-03-28 19:01 ` Yongseok Koh
1 sibling, 0 replies; 30+ messages in thread
From: Shahaf Shuler @ 2019-03-26 19:33 UTC (permalink / raw)
To: Yongseok Koh; +Cc: dev
Monday, March 25, 2019 9:18 PM, Yongseok Koh:
> To: Shahaf Shuler <shahafs@mellanox.com>
> Cc: dev@dpdk.org
> Subject: [PATCH v2 3/3] net/mlx4: add secondary process support
>
> In order to support secondary process, a few features are required.
>
> a) rdma-core library should allocate device resources using DPDK's memory
> allocator.
>
> b) UAR should be remapped for secondary processes. Currently, in order not
> to use different data structure for secondary processes, PMD tries to
> reserve identical virtual address space for both primary and secondary
> processes.
>
> c) IPC channel is necessary, which can be easily set with rte_mp APIs.
> Through the channel, Verbs command FD is delivered to the secondary
> process and the device stop/start event is also broadcast from primary
> process.
>
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> ---
> doc/guides/nics/features/mlx4.ini | 1 +
> doc/guides/nics/mlx4.rst | 10 +
> drivers/net/mlx4/Makefile | 6 +
> drivers/net/mlx4/meson.build | 3 +
> drivers/net/mlx4/mlx4.c | 378
> ++++++++++++++++++++++++++++++++++++--
> drivers/net/mlx4/mlx4.h | 60 ++++++
> drivers/net/mlx4/mlx4_mp.c | 304
> ++++++++++++++++++++++++++++++
> drivers/net/mlx4/mlx4_mr.c | 32 +++-
> drivers/net/mlx4/mlx4_prm.h | 4 +-
> drivers/net/mlx4/mlx4_rxtx.c | 2 +
> drivers/net/mlx4/mlx4_rxtx.h | 1 +
> drivers/net/mlx4/mlx4_txq.c | 111 +++++++++++
> 12 files changed, 890 insertions(+), 22 deletions(-) create mode 100644
> drivers/net/mlx4/mlx4_mp.c
>
> diff --git a/doc/guides/nics/features/mlx4.ini
> b/doc/guides/nics/features/mlx4.ini
> index a211aef332..4502aa2a87 100644
> --- a/doc/guides/nics/features/mlx4.ini
> +++ b/doc/guides/nics/features/mlx4.ini
> @@ -29,6 +29,7 @@ Packet type parsing = Y
> Basic stats = Y
> Stats per queue = Y
> FW version = Y
> +Multiprocess aware = Y
> Other kdrv = Y
> Power8 = Y
> x86-32 = Y
> diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst index
> 4ad361a2c2..cd34838f41 100644
> --- a/doc/guides/nics/mlx4.rst
> +++ b/doc/guides/nics/mlx4.rst
> @@ -145,6 +145,16 @@ below.
> Limitations
> -----------
>
> +- For secondary process:
> +
> + - Forked secondary process not supported.
> + - All mempools must be initialized before rte_eth_dev_start().
> + - External memory unregistered in EAL memseg list cannot be used for
> DMA
> + unless such memory has been registered by
> ``mlx4_mr_update_ext_mp()`` in
> + primary process and remapped to the same virtual address in secondary
> + process. If the external memory is registered by primary process but has
> + different virtual address in secondary process, unexpected error may
> happen.
> +
> - CRC stripping is supported by default and always reported as "true".
> The ability to enable/disable CRC stripping requires OFED version
> 4.3-1.5.0.0 and above or rdma-core version v18 and above.
> diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index
> b527efd625..8126b0dfc6 100644
> --- a/drivers/net/mlx4/Makefile
> +++ b/drivers/net/mlx4/Makefile
> @@ -18,6 +18,7 @@ ifneq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)
> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_glue.c endif
> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_intr.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mp.c
> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mr.c
> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxq.c
> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxtx.c @@ -93,6 +94,11
> @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
> enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
> $(AUTOCONF_OUTPUT)
> $Q sh -- '$<' '$@' \
> + HAVE_IBV_MLX4_UAR_MMAP_OFFSET \
> + infiniband/mlx4dv.h \
> + enum MLX4DV_QP_MASK_UAR_MMAP_OFFSET \
> + $(AUTOCONF_OUTPUT)
> + $Q sh -- '$<' '$@' \
> HAVE_IBV_MLX4_WQE_LSO_SEG \
> infiniband/mlx4dv.h \
> type 'struct mlx4_wqe_lso_seg' \
> diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
> index 650e2c8fbc..de020701d1 100644
> --- a/drivers/net/mlx4/meson.build
> +++ b/drivers/net/mlx4/meson.build
> @@ -33,6 +33,7 @@ if build
> 'mlx4_ethdev.c',
> 'mlx4_flow.c',
> 'mlx4_intr.c',
> + 'mlx4_mp.c',
> 'mlx4_mr.c',
> 'mlx4_rxq.c',
> 'mlx4_rxtx.c',
> @@ -76,6 +77,8 @@ if build
> has_sym_args = [
> [ 'HAVE_IBV_MLX4_BUF_ALLOCATORS',
> 'infiniband/mlx4dv.h',
> 'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
> + [ 'HAVE_IBV_MLX4_UAR_MMAP_OFFSET',
> 'infiniband/mlx4dv.h',
> + 'MLX4DV_QP_MASK_UAR_MMAP_OFFSET' ],
> ]
> config = configuration_data()
> foreach arg:has_sym_args
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index
> 0e0b035df0..a5cfcdbee3 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -17,6 +17,7 @@
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> +#include <sys/mman.h>
> #include <unistd.h>
>
> /* Verbs headers do not support -pedantic. */ @@ -48,10 +49,21 @@
> #include "mlx4_rxtx.h"
> #include "mlx4_utils.h"
>
> -struct mlx4_dev_list mlx4_mem_event_cb_list =
> - LIST_HEAD_INITIALIZER(mlx4_mem_event_cb_list);
> +#if defined(HAVE_IBV_MLX4_UAR_MMAP_OFFSET) && \
> + defined(HAVE_IBV_MLX4_BUF_ALLOCATORS)
> +#define HAVE_IBV_MLX4_SECONDARY_PROCESS #endif
Features should not be detected on compilation time rather by run time based on capabilities.
On this case,
If you are able to register the external allocator (dv call returns w/ success) and the mmap for the uar index also succeed, then you have support for secondary.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/3] net/mlx4: add secondary process support
2019-03-26 19:33 ` Shahaf Shuler
2019-03-26 19:33 ` Shahaf Shuler
@ 2019-03-28 19:01 ` Yongseok Koh
2019-03-28 19:01 ` Yongseok Koh
1 sibling, 1 reply; 30+ messages in thread
From: Yongseok Koh @ 2019-03-28 19:01 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: dev
> On Mar 26, 2019, at 12:33 PM, Shahaf Shuler <shahafs@mellanox.com> wrote:
>
> Monday, March 25, 2019 9:18 PM, Yongseok Koh:
>> To: Shahaf Shuler <shahafs@mellanox.com>
>> Cc: dev@dpdk.org
>> Subject: [PATCH v2 3/3] net/mlx4: add secondary process support
>>
>> In order to support secondary process, a few features are required.
>>
>> a) rdma-core library should allocate device resources using DPDK's memory
>> allocator.
>>
>> b) UAR should be remapped for secondary processes. Currently, in order not
>> to use different data structure for secondary processes, PMD tries to
>> reserve identical virtual address space for both primary and secondary
>> processes.
>>
>> c) IPC channel is necessary, which can be easily set with rte_mp APIs.
>> Through the channel, Verbs command FD is delivered to the secondary
>> process and the device stop/start event is also broadcast from primary
>> process.
>>
>> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
>> ---
>> doc/guides/nics/features/mlx4.ini | 1 +
>> doc/guides/nics/mlx4.rst | 10 +
>> drivers/net/mlx4/Makefile | 6 +
>> drivers/net/mlx4/meson.build | 3 +
>> drivers/net/mlx4/mlx4.c | 378
>> ++++++++++++++++++++++++++++++++++++--
>> drivers/net/mlx4/mlx4.h | 60 ++++++
>> drivers/net/mlx4/mlx4_mp.c | 304
>> ++++++++++++++++++++++++++++++
>> drivers/net/mlx4/mlx4_mr.c | 32 +++-
>> drivers/net/mlx4/mlx4_prm.h | 4 +-
>> drivers/net/mlx4/mlx4_rxtx.c | 2 +
>> drivers/net/mlx4/mlx4_rxtx.h | 1 +
>> drivers/net/mlx4/mlx4_txq.c | 111 +++++++++++
>> 12 files changed, 890 insertions(+), 22 deletions(-) create mode 100644
>> drivers/net/mlx4/mlx4_mp.c
>>
>> diff --git a/doc/guides/nics/features/mlx4.ini
>> b/doc/guides/nics/features/mlx4.ini
>> index a211aef332..4502aa2a87 100644
>> --- a/doc/guides/nics/features/mlx4.ini
>> +++ b/doc/guides/nics/features/mlx4.ini
>> @@ -29,6 +29,7 @@ Packet type parsing = Y
>> Basic stats = Y
>> Stats per queue = Y
>> FW version = Y
>> +Multiprocess aware = Y
>> Other kdrv = Y
>> Power8 = Y
>> x86-32 = Y
>> diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst index
>> 4ad361a2c2..cd34838f41 100644
>> --- a/doc/guides/nics/mlx4.rst
>> +++ b/doc/guides/nics/mlx4.rst
>> @@ -145,6 +145,16 @@ below.
>> Limitations
>> -----------
>>
>> +- For secondary process:
>> +
>> + - Forked secondary process not supported.
>> + - All mempools must be initialized before rte_eth_dev_start().
>> + - External memory unregistered in EAL memseg list cannot be used for
>> DMA
>> + unless such memory has been registered by
>> ``mlx4_mr_update_ext_mp()`` in
>> + primary process and remapped to the same virtual address in secondary
>> + process. If the external memory is registered by primary process but has
>> + different virtual address in secondary process, unexpected error may
>> happen.
>> +
>> - CRC stripping is supported by default and always reported as "true".
>> The ability to enable/disable CRC stripping requires OFED version
>> 4.3-1.5.0.0 and above or rdma-core version v18 and above.
>> diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index
>> b527efd625..8126b0dfc6 100644
>> --- a/drivers/net/mlx4/Makefile
>> +++ b/drivers/net/mlx4/Makefile
>> @@ -18,6 +18,7 @@ ifneq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)
>> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_glue.c endif
>> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_intr.c
>> +SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mp.c
>> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mr.c
>> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxq.c
>> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxtx.c @@ -93,6 +94,11
>> @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
>> enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
>> $(AUTOCONF_OUTPUT)
>> $Q sh -- '$<' '$@' \
>> + HAVE_IBV_MLX4_UAR_MMAP_OFFSET \
>> + infiniband/mlx4dv.h \
>> + enum MLX4DV_QP_MASK_UAR_MMAP_OFFSET \
>> + $(AUTOCONF_OUTPUT)
>> + $Q sh -- '$<' '$@' \
>> HAVE_IBV_MLX4_WQE_LSO_SEG \
>> infiniband/mlx4dv.h \
>> type 'struct mlx4_wqe_lso_seg' \
>> diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
>> index 650e2c8fbc..de020701d1 100644
>> --- a/drivers/net/mlx4/meson.build
>> +++ b/drivers/net/mlx4/meson.build
>> @@ -33,6 +33,7 @@ if build
>> 'mlx4_ethdev.c',
>> 'mlx4_flow.c',
>> 'mlx4_intr.c',
>> + 'mlx4_mp.c',
>> 'mlx4_mr.c',
>> 'mlx4_rxq.c',
>> 'mlx4_rxtx.c',
>> @@ -76,6 +77,8 @@ if build
>> has_sym_args = [
>> [ 'HAVE_IBV_MLX4_BUF_ALLOCATORS',
>> 'infiniband/mlx4dv.h',
>> 'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
>> + [ 'HAVE_IBV_MLX4_UAR_MMAP_OFFSET',
>> 'infiniband/mlx4dv.h',
>> + 'MLX4DV_QP_MASK_UAR_MMAP_OFFSET' ],
>> ]
>> config = configuration_data()
>> foreach arg:has_sym_args
>> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index
>> 0e0b035df0..a5cfcdbee3 100644
>> --- a/drivers/net/mlx4/mlx4.c
>> +++ b/drivers/net/mlx4/mlx4.c
>> @@ -17,6 +17,7 @@
>> #include <stdio.h>
>> #include <stdlib.h>
>> #include <string.h>
>> +#include <sys/mman.h>
>> #include <unistd.h>
>>
>> /* Verbs headers do not support -pedantic. */ @@ -48,10 +49,21 @@
>> #include "mlx4_rxtx.h"
>> #include "mlx4_utils.h"
>>
>> -struct mlx4_dev_list mlx4_mem_event_cb_list =
>> - LIST_HEAD_INITIALIZER(mlx4_mem_event_cb_list);
>> +#if defined(HAVE_IBV_MLX4_UAR_MMAP_OFFSET) && \
>> + defined(HAVE_IBV_MLX4_BUF_ALLOCATORS)
>> +#define HAVE_IBV_MLX4_SECONDARY_PROCESS #endif
>
> Features should not be detected on compilation time rather by run time based on capabilities.
> On this case,
> If you are able to register the external allocator (dv call returns w/ success) and the mmap for the uar index also succeed, then you have support for secondary.
A bit confused.
Do you want to have redundant definitions in mlx5_prm.h in order to make the test calls?
Eg., MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS and MLX4DV_QP_MASK_UAR_MMAP_OFFSET.
Thanks,
Yongseok
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/3] net/mlx4: add secondary process support
2019-03-28 19:01 ` Yongseok Koh
@ 2019-03-28 19:01 ` Yongseok Koh
0 siblings, 0 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-03-28 19:01 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: dev
> On Mar 26, 2019, at 12:33 PM, Shahaf Shuler <shahafs@mellanox.com> wrote:
>
> Monday, March 25, 2019 9:18 PM, Yongseok Koh:
>> To: Shahaf Shuler <shahafs@mellanox.com>
>> Cc: dev@dpdk.org
>> Subject: [PATCH v2 3/3] net/mlx4: add secondary process support
>>
>> In order to support secondary process, a few features are required.
>>
>> a) rdma-core library should allocate device resources using DPDK's memory
>> allocator.
>>
>> b) UAR should be remapped for secondary processes. Currently, in order not
>> to use different data structure for secondary processes, PMD tries to
>> reserve identical virtual address space for both primary and secondary
>> processes.
>>
>> c) IPC channel is necessary, which can be easily set with rte_mp APIs.
>> Through the channel, Verbs command FD is delivered to the secondary
>> process and the device stop/start event is also broadcast from primary
>> process.
>>
>> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
>> ---
>> doc/guides/nics/features/mlx4.ini | 1 +
>> doc/guides/nics/mlx4.rst | 10 +
>> drivers/net/mlx4/Makefile | 6 +
>> drivers/net/mlx4/meson.build | 3 +
>> drivers/net/mlx4/mlx4.c | 378
>> ++++++++++++++++++++++++++++++++++++--
>> drivers/net/mlx4/mlx4.h | 60 ++++++
>> drivers/net/mlx4/mlx4_mp.c | 304
>> ++++++++++++++++++++++++++++++
>> drivers/net/mlx4/mlx4_mr.c | 32 +++-
>> drivers/net/mlx4/mlx4_prm.h | 4 +-
>> drivers/net/mlx4/mlx4_rxtx.c | 2 +
>> drivers/net/mlx4/mlx4_rxtx.h | 1 +
>> drivers/net/mlx4/mlx4_txq.c | 111 +++++++++++
>> 12 files changed, 890 insertions(+), 22 deletions(-) create mode 100644
>> drivers/net/mlx4/mlx4_mp.c
>>
>> diff --git a/doc/guides/nics/features/mlx4.ini
>> b/doc/guides/nics/features/mlx4.ini
>> index a211aef332..4502aa2a87 100644
>> --- a/doc/guides/nics/features/mlx4.ini
>> +++ b/doc/guides/nics/features/mlx4.ini
>> @@ -29,6 +29,7 @@ Packet type parsing = Y
>> Basic stats = Y
>> Stats per queue = Y
>> FW version = Y
>> +Multiprocess aware = Y
>> Other kdrv = Y
>> Power8 = Y
>> x86-32 = Y
>> diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst index
>> 4ad361a2c2..cd34838f41 100644
>> --- a/doc/guides/nics/mlx4.rst
>> +++ b/doc/guides/nics/mlx4.rst
>> @@ -145,6 +145,16 @@ below.
>> Limitations
>> -----------
>>
>> +- For secondary process:
>> +
>> + - Forked secondary process not supported.
>> + - All mempools must be initialized before rte_eth_dev_start().
>> + - External memory unregistered in EAL memseg list cannot be used for
>> DMA
>> + unless such memory has been registered by
>> ``mlx4_mr_update_ext_mp()`` in
>> + primary process and remapped to the same virtual address in secondary
>> + process. If the external memory is registered by primary process but has
>> + different virtual address in secondary process, unexpected error may
>> happen.
>> +
>> - CRC stripping is supported by default and always reported as "true".
>> The ability to enable/disable CRC stripping requires OFED version
>> 4.3-1.5.0.0 and above or rdma-core version v18 and above.
>> diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index
>> b527efd625..8126b0dfc6 100644
>> --- a/drivers/net/mlx4/Makefile
>> +++ b/drivers/net/mlx4/Makefile
>> @@ -18,6 +18,7 @@ ifneq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)
>> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_glue.c endif
>> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_intr.c
>> +SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mp.c
>> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mr.c
>> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxq.c
>> SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxtx.c @@ -93,6 +94,11
>> @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
>> enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
>> $(AUTOCONF_OUTPUT)
>> $Q sh -- '$<' '$@' \
>> + HAVE_IBV_MLX4_UAR_MMAP_OFFSET \
>> + infiniband/mlx4dv.h \
>> + enum MLX4DV_QP_MASK_UAR_MMAP_OFFSET \
>> + $(AUTOCONF_OUTPUT)
>> + $Q sh -- '$<' '$@' \
>> HAVE_IBV_MLX4_WQE_LSO_SEG \
>> infiniband/mlx4dv.h \
>> type 'struct mlx4_wqe_lso_seg' \
>> diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
>> index 650e2c8fbc..de020701d1 100644
>> --- a/drivers/net/mlx4/meson.build
>> +++ b/drivers/net/mlx4/meson.build
>> @@ -33,6 +33,7 @@ if build
>> 'mlx4_ethdev.c',
>> 'mlx4_flow.c',
>> 'mlx4_intr.c',
>> + 'mlx4_mp.c',
>> 'mlx4_mr.c',
>> 'mlx4_rxq.c',
>> 'mlx4_rxtx.c',
>> @@ -76,6 +77,8 @@ if build
>> has_sym_args = [
>> [ 'HAVE_IBV_MLX4_BUF_ALLOCATORS',
>> 'infiniband/mlx4dv.h',
>> 'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
>> + [ 'HAVE_IBV_MLX4_UAR_MMAP_OFFSET',
>> 'infiniband/mlx4dv.h',
>> + 'MLX4DV_QP_MASK_UAR_MMAP_OFFSET' ],
>> ]
>> config = configuration_data()
>> foreach arg:has_sym_args
>> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index
>> 0e0b035df0..a5cfcdbee3 100644
>> --- a/drivers/net/mlx4/mlx4.c
>> +++ b/drivers/net/mlx4/mlx4.c
>> @@ -17,6 +17,7 @@
>> #include <stdio.h>
>> #include <stdlib.h>
>> #include <string.h>
>> +#include <sys/mman.h>
>> #include <unistd.h>
>>
>> /* Verbs headers do not support -pedantic. */ @@ -48,10 +49,21 @@
>> #include "mlx4_rxtx.h"
>> #include "mlx4_utils.h"
>>
>> -struct mlx4_dev_list mlx4_mem_event_cb_list =
>> - LIST_HEAD_INITIALIZER(mlx4_mem_event_cb_list);
>> +#if defined(HAVE_IBV_MLX4_UAR_MMAP_OFFSET) && \
>> + defined(HAVE_IBV_MLX4_BUF_ALLOCATORS)
>> +#define HAVE_IBV_MLX4_SECONDARY_PROCESS #endif
>
> Features should not be detected on compilation time rather by run time based on capabilities.
> On this case,
> If you are able to register the external allocator (dv call returns w/ success) and the mmap for the uar index also succeed, then you have support for secondary.
A bit confused.
Do you want to have redundant definitions in mlx5_prm.h in order to make the test calls?
Eg., MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS and MLX4DV_QP_MASK_UAR_MMAP_OFFSET.
Thanks,
Yongseok
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v3 0/3] net/mlx4: add secondary process support
2019-03-07 7:39 [dpdk-dev] [PATCH 0/3] net/mlx4: add secondary process support Yongseok Koh
` (3 preceding siblings ...)
2019-03-25 19:17 ` [dpdk-dev] [PATCH v2 0/3] " Yongseok Koh
@ 2019-04-01 21:15 ` Yongseok Koh
2019-04-01 21:15 ` Yongseok Koh
` (4 more replies)
4 siblings, 5 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:15 UTC (permalink / raw)
To: shahafs; +Cc: dev
RFC:
https://mails.dpdk.org/archives/dev/2019-March/125516.html
v3:
* rebase on the latest branch tip
* remove HAVE_IBV_MLX4_SECONDARY_PROCESS and make it determined in run-time
v2:
* add more sanity check for eth_dev and return value from IPC request
* complement commit messages
* add MLX5_MP_REQ_TIMEOUT_SEC
Yongseok Koh (3):
net/mlx4: change device reference for secondary process
net/mlx4: add external allocator for Verbs object
net/mlx4: add secondary process support
doc/guides/nics/features/mlx4.ini | 1 +
doc/guides/nics/mlx4.rst | 10 +
drivers/net/mlx4/Makefile | 11 +
drivers/net/mlx4/meson.build | 13 ++
drivers/net/mlx4/mlx4.c | 453 ++++++++++++++++++++++++++++++++++++--
drivers/net/mlx4/mlx4.h | 86 +++++++-
drivers/net/mlx4/mlx4_flow.c | 39 ++--
drivers/net/mlx4/mlx4_intr.c | 20 +-
drivers/net/mlx4/mlx4_mp.c | 304 +++++++++++++++++++++++++
drivers/net/mlx4/mlx4_mr.c | 40 +++-
drivers/net/mlx4/mlx4_prm.h | 4 +-
drivers/net/mlx4/mlx4_rxq.c | 40 ++--
drivers/net/mlx4/mlx4_rxtx.c | 2 +
drivers/net/mlx4/mlx4_rxtx.h | 1 +
drivers/net/mlx4/mlx4_txq.c | 127 ++++++++++-
15 files changed, 1071 insertions(+), 80 deletions(-)
create mode 100644 drivers/net/mlx4/mlx4_mp.c
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v3 0/3] net/mlx4: add secondary process support
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 0/3] " Yongseok Koh
@ 2019-04-01 21:15 ` Yongseok Koh
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 1/3] net/mlx4: change device reference for secondary process Yongseok Koh
` (3 subsequent siblings)
4 siblings, 0 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:15 UTC (permalink / raw)
To: shahafs; +Cc: dev
RFC:
https://mails.dpdk.org/archives/dev/2019-March/125516.html
v3:
* rebase on the latest branch tip
* remove HAVE_IBV_MLX4_SECONDARY_PROCESS and make it determined in run-time
v2:
* add more sanity check for eth_dev and return value from IPC request
* complement commit messages
* add MLX5_MP_REQ_TIMEOUT_SEC
Yongseok Koh (3):
net/mlx4: change device reference for secondary process
net/mlx4: add external allocator for Verbs object
net/mlx4: add secondary process support
doc/guides/nics/features/mlx4.ini | 1 +
doc/guides/nics/mlx4.rst | 10 +
drivers/net/mlx4/Makefile | 11 +
drivers/net/mlx4/meson.build | 13 ++
drivers/net/mlx4/mlx4.c | 453 ++++++++++++++++++++++++++++++++++++--
drivers/net/mlx4/mlx4.h | 86 +++++++-
drivers/net/mlx4/mlx4_flow.c | 39 ++--
drivers/net/mlx4/mlx4_intr.c | 20 +-
drivers/net/mlx4/mlx4_mp.c | 304 +++++++++++++++++++++++++
drivers/net/mlx4/mlx4_mr.c | 40 +++-
drivers/net/mlx4/mlx4_prm.h | 4 +-
drivers/net/mlx4/mlx4_rxq.c | 40 ++--
drivers/net/mlx4/mlx4_rxtx.c | 2 +
drivers/net/mlx4/mlx4_rxtx.h | 1 +
drivers/net/mlx4/mlx4_txq.c | 127 ++++++++++-
15 files changed, 1071 insertions(+), 80 deletions(-)
create mode 100644 drivers/net/mlx4/mlx4_mp.c
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v3 1/3] net/mlx4: change device reference for secondary process
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 0/3] " Yongseok Koh
2019-04-01 21:15 ` Yongseok Koh
@ 2019-04-01 21:15 ` Yongseok Koh
2019-04-01 21:15 ` Yongseok Koh
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 2/3] net/mlx4: add external allocator for Verbs object Yongseok Koh
` (2 subsequent siblings)
4 siblings, 1 reply; 30+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:15 UTC (permalink / raw)
To: shahafs; +Cc: dev, stable
rte_eth_devices[] is not shared between primary and secondary process, but
a static array to each process. The reverse pointer of device (priv->dev)
becomes invalid if mlx4 supports secondary process. Instead, priv has the
pointer to shared data of the device,
struct rte_eth_dev_data *dev_data;
Two macros are added,
#define PORT_ID(priv) ((priv)->dev_data->port_id)
#define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
Cc: stable@dpdk.org
Suggested-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
drivers/net/mlx4/mlx4.c | 4 ++--
drivers/net/mlx4/mlx4.h | 5 ++++-
drivers/net/mlx4/mlx4_flow.c | 39 +++++++++++++++++++++------------------
drivers/net/mlx4/mlx4_intr.c | 20 ++++++++++----------
drivers/net/mlx4/mlx4_mr.c | 8 ++++----
drivers/net/mlx4/mlx4_rxq.c | 36 +++++++++++++++++++-----------------
drivers/net/mlx4/mlx4_txq.c | 8 ++++----
7 files changed, 64 insertions(+), 56 deletions(-)
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 5ef2e7f41e..bb6ab8ec6e 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -753,11 +753,11 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
* handled by rte_intr_rx_ctl().
*/
eth_dev->intr_handle = &priv->intr_handle;
- priv->dev = eth_dev;
+ priv->dev_data = eth_dev->data;
eth_dev->dev_ops = &mlx4_dev_ops;
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
- mlx4_dev_set_link_up(priv->dev);
+ mlx4_dev_set_link_up(eth_dev);
/* Update link status once if waiting for LSC. */
if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
mlx4_link_update(eth_dev, 0);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 7ac49ca672..51566caf7f 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -79,7 +79,7 @@ LIST_HEAD(mlx4_mr_list, mlx4_mr);
struct mlx4_priv {
LIST_ENTRY(mlx4_priv) mem_event_cb;
/**< Called by memory event callback. */
- struct rte_eth_dev *dev; /**< Ethernet device. */
+ struct rte_eth_dev_data *dev_data; /* Pointer to device data. */
struct ibv_context *ctx; /**< Verbs context. */
struct ibv_device_attr device_attr; /**< Device properties. */
struct ibv_pd *pd; /**< Protection Domain. */
@@ -113,6 +113,9 @@ struct mlx4_priv {
/**< Configured MAC addresses. Unused entries are zeroed. */
};
+#define PORT_ID(priv) ((priv)->dev_data->port_id)
+#define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
+
/* mlx4_ethdev.c */
int mlx4_get_ifname(const struct mlx4_priv *priv, char (*ifname)[IF_NAMESIZE]);
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index f4df4ab1fb..038dc71d35 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -773,7 +773,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
if (flow->rss)
break;
queue = action->conf;
- if (queue->index >= priv->dev->data->nb_rx_queues) {
+ if (queue->index >= ETH_DEV(priv)->data->nb_rx_queues) {
msg = "queue target index beyond number of"
" configured Rx queues";
goto exit_action_not_supported;
@@ -802,7 +802,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
/* Sanity checks. */
for (i = 0; i < rss->queue_num; ++i)
if (rss->queue[i] >=
- priv->dev->data->nb_rx_queues)
+ ETH_DEV(priv)->data->nb_rx_queues)
break;
if (i != rss->queue_num) {
msg = "queue index target beyond number of"
@@ -1072,8 +1072,8 @@ mlx4_flow_toggle(struct mlx4_priv *priv,
/* Stop at the first nonexistent target queue. */
for (i = 0; i != rss->queues; ++i)
if (rss->queue_id[i] >=
- priv->dev->data->nb_rx_queues ||
- !priv->dev->data->rx_queues[rss->queue_id[i]]) {
+ ETH_DEV(priv)->data->nb_rx_queues ||
+ !ETH_DEV(priv)->data->rx_queues[rss->queue_id[i]]) {
missing = 1;
break;
}
@@ -1258,7 +1258,7 @@ static uint16_t
mlx4_flow_internal_next_vlan(struct mlx4_priv *priv, uint16_t vlan)
{
while (vlan < 4096) {
- if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
+ if (ETH_DEV(priv)->data->vlan_filter_conf.ids[vlan / 64] &
(UINT64_C(1) << (vlan % 64)))
return vlan;
++vlan;
@@ -1335,7 +1335,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
* get RSS by default.
*/
uint32_t queues =
- rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1;
+ rte_align32pow2(ETH_DEV(priv)->data->nb_rx_queues + 1) >> 1;
uint16_t queue[queues];
struct rte_flow_action_rss action_rss = {
.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
@@ -1357,9 +1357,9 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
};
struct ether_addr *rule_mac = ð_spec.dst;
rte_be16_t *rule_vlan =
- (priv->dev->data->dev_conf.rxmode.offloads &
+ (ETH_DEV(priv)->data->dev_conf.rxmode.offloads &
DEV_RX_OFFLOAD_VLAN_FILTER) &&
- !priv->dev->data->promiscuous ?
+ !ETH_DEV(priv)->data->promiscuous ?
&vlan_spec.tci :
NULL;
uint16_t vlan = 0;
@@ -1439,7 +1439,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
if (!flow || !flow->internal) {
/* Not found, create a new flow rule. */
memcpy(rule_mac, mac, sizeof(*mac));
- flow = mlx4_flow_create(priv->dev, &attr, pattern,
+ flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
actions, error);
if (!flow) {
err = -rte_errno;
@@ -1455,15 +1455,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
goto next_vlan;
}
/* Take care of promiscuous and all multicast flow rules. */
- if (priv->dev->data->promiscuous || priv->dev->data->all_multicast) {
+ if (ETH_DEV(priv)->data->promiscuous ||
+ ETH_DEV(priv)->data->all_multicast) {
for (flow = LIST_FIRST(&priv->flows);
flow && flow->internal;
flow = LIST_NEXT(flow, next)) {
- if (priv->dev->data->promiscuous) {
+ if (ETH_DEV(priv)->data->promiscuous) {
if (flow->promisc)
break;
} else {
- assert(priv->dev->data->all_multicast);
+ assert(ETH_DEV(priv)->data->all_multicast);
if (flow->allmulti)
break;
}
@@ -1477,16 +1478,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
}
if (!flow || !flow->internal) {
/* Not found, create a new flow rule. */
- if (priv->dev->data->promiscuous) {
+ if (ETH_DEV(priv)->data->promiscuous) {
pattern[1].spec = NULL;
pattern[1].mask = NULL;
} else {
- assert(priv->dev->data->all_multicast);
+ assert(ETH_DEV(priv)->data->all_multicast);
pattern[1].spec = ð_allmulti;
pattern[1].mask = ð_allmulti;
}
pattern[2] = pattern[3];
- flow = mlx4_flow_create(priv->dev, &attr, pattern,
+ flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
actions, error);
if (!flow) {
err = -rte_errno;
@@ -1503,7 +1504,8 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
struct rte_flow *next = LIST_NEXT(flow, next);
if (!flow->select)
- claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
+ claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
+ error));
else
flow->select = 0;
flow = next;
@@ -1541,7 +1543,8 @@ mlx4_flow_sync(struct mlx4_priv *priv, struct rte_flow_error *error)
for (flow = LIST_FIRST(&priv->flows);
flow && flow->internal;
flow = LIST_FIRST(&priv->flows))
- claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
+ claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
+ error));
} else {
/* Refresh internal rules. */
ret = mlx4_flow_internal(priv, error);
@@ -1574,7 +1577,7 @@ mlx4_flow_clean(struct mlx4_priv *priv)
struct rte_flow *flow;
while ((flow = LIST_FIRST(&priv->flows)))
- mlx4_flow_destroy(priv->dev, flow, NULL);
+ mlx4_flow_destroy(ETH_DEV(priv), flow, NULL);
assert(LIST_EMPTY(&priv->rss));
}
diff --git a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c
index ec91242196..4f33526755 100644
--- a/drivers/net/mlx4/mlx4_intr.c
+++ b/drivers/net/mlx4/mlx4_intr.c
@@ -65,7 +65,7 @@ static int
mlx4_rx_intr_vec_enable(struct mlx4_priv *priv)
{
unsigned int i;
- unsigned int rxqs_n = priv->dev->data->nb_rx_queues;
+ unsigned int rxqs_n = ETH_DEV(priv)->data->nb_rx_queues;
unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
unsigned int count = 0;
struct rte_intr_handle *intr_handle = &priv->intr_handle;
@@ -79,7 +79,7 @@ mlx4_rx_intr_vec_enable(struct mlx4_priv *priv)
return -rte_errno;
}
for (i = 0; i != n; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
/* Skip queues that cannot request interrupts. */
if (!rxq || !rxq->channel) {
@@ -120,12 +120,12 @@ static void
mlx4_link_status_alarm(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
assert(priv->intr_alarm == 1);
priv->intr_alarm = 0;
if (intr_conf->lsc && !mlx4_link_status_check(priv))
- _rte_eth_dev_callback_process(priv->dev,
+ _rte_eth_dev_callback_process(ETH_DEV(priv),
RTE_ETH_EVENT_INTR_LSC,
NULL);
}
@@ -145,8 +145,8 @@ mlx4_link_status_alarm(struct mlx4_priv *priv)
static int
mlx4_link_status_check(struct mlx4_priv *priv)
{
- struct rte_eth_link *link = &priv->dev->data->dev_link;
- int ret = mlx4_link_update(priv->dev, 0);
+ struct rte_eth_link *link = Ð_DEV(priv)->data->dev_link;
+ int ret = mlx4_link_update(ETH_DEV(priv), 0);
if (ret)
return ret;
@@ -185,7 +185,7 @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
uint32_t caught[RTE_DIM(type)] = { 0 };
struct ibv_async_event event;
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
unsigned int i;
/* Read all message and acknowledge them. */
@@ -208,7 +208,7 @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
}
for (i = 0; i != RTE_DIM(caught); ++i)
if (caught[i])
- _rte_eth_dev_callback_process(priv->dev, type[i],
+ _rte_eth_dev_callback_process(ETH_DEV(priv), type[i],
NULL);
}
@@ -282,7 +282,7 @@ int
mlx4_intr_install(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
int rc;
mlx4_intr_uninstall(priv);
@@ -381,7 +381,7 @@ int
mlx4_rxq_intr_enable(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
if (intr_conf->rxq && mlx4_rx_intr_vec_enable(priv) < 0)
goto error;
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
index 4376ad0b60..e4be46ab2a 100644
--- a/drivers/net/mlx4/mlx4_mr.c
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -896,7 +896,7 @@ mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
rte_rwlock_read_lock(&mlx4_mem_event_rwlock);
/* Iterate all the existing mlx4 devices. */
LIST_FOREACH(priv, &mlx4_mem_event_cb_list, mem_event_cb)
- mlx4_mr_mem_event_free_cb(priv->dev, addr, len);
+ mlx4_mr_mem_event_free_cb(ETH_DEV(priv), addr, len);
rte_rwlock_read_unlock(&mlx4_mem_event_rwlock);
break;
case RTE_MEM_EVENT_ALLOC:
@@ -1028,7 +1028,7 @@ mlx4_rx_addr2mr_bh(struct rxq *rxq, uintptr_t addr)
DEBUG("Rx queue %u: miss on top-half, mru=%u, head=%u, addr=%p",
rxq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
- return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
+ return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
}
/**
@@ -1050,7 +1050,7 @@ mlx4_tx_addr2mr_bh(struct txq *txq, uintptr_t addr)
DEBUG("Tx queue %u: miss on top-half, mru=%u, head=%u, addr=%p",
txq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
- return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
+ return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
}
/**
@@ -1225,7 +1225,7 @@ mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr, struct rte_mempool *mp)
struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
struct mlx4_priv *priv = txq->priv;
- mlx4_mr_update_ext_mp(priv->dev, mr_ctrl, mp);
+ mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
return mlx4_tx_addr2mr_bh(txq, addr);
}
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 3782c6baab..50f33eb0c5 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -176,6 +176,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
struct ibv_wq *ind_tbl[rss->queues];
struct mlx4_priv *priv = rss->priv;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
const char *msg;
unsigned int i = 0;
int ret;
@@ -189,8 +190,8 @@ mlx4_rss_attach(struct mlx4_rss *rss)
uint16_t id = rss->queue_id[i];
struct rxq *rxq = NULL;
- if (id < priv->dev->data->nb_rx_queues)
- rxq = priv->dev->data->rx_queues[id];
+ if (id < dev->data->nb_rx_queues)
+ rxq = dev->data->rx_queues[id];
if (!rxq) {
ret = EINVAL;
msg = "RSS target queue is not configured";
@@ -269,7 +270,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
rss->ind = NULL;
}
while (i--)
- mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
+ mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]);
ERROR("mlx4: %s", msg);
--rss->usecnt;
rte_errno = ret;
@@ -291,6 +292,7 @@ void
mlx4_rss_detach(struct mlx4_rss *rss)
{
struct mlx4_priv *priv = rss->priv;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
unsigned int i;
assert(rss->refcnt);
@@ -303,7 +305,7 @@ mlx4_rss_detach(struct mlx4_rss *rss)
claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind));
rss->ind = NULL;
for (i = 0; i != rss->queues; ++i)
- mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
+ mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]);
}
/**
@@ -329,7 +331,7 @@ mlx4_rss_detach(struct mlx4_rss *rss)
int
mlx4_rss_init(struct mlx4_priv *priv)
{
- struct rte_eth_dev *dev = priv->dev;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues);
uint32_t wq_num_prev = 0;
const char *msg;
@@ -338,7 +340,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
if (priv->rss_init)
return 0;
- if (priv->dev->data->nb_rx_queues > priv->hw_rss_max_qps) {
+ if (ETH_DEV(priv)->data->nb_rx_queues > priv->hw_rss_max_qps) {
ERROR("RSS does not support more than %d queues",
priv->hw_rss_max_qps);
rte_errno = EINVAL;
@@ -356,8 +358,8 @@ mlx4_rss_init(struct mlx4_priv *priv)
rte_errno = ret;
return -ret;
}
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
struct ibv_cq *cq;
struct ibv_wq *wq;
uint32_t wq_num;
@@ -432,7 +434,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
ERROR("cannot initialize common RSS resources (queue %u): %s: %s",
i, msg, strerror(ret));
while (i--) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
if (rxq)
mlx4_rxq_detach(rxq);
@@ -457,8 +459,8 @@ mlx4_rss_deinit(struct mlx4_priv *priv)
if (!priv->rss_init)
return;
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
if (rxq) {
assert(rxq->usecnt == 1);
@@ -494,7 +496,7 @@ mlx4_rxq_attach(struct rxq *rxq)
}
struct mlx4_priv *priv = rxq->priv;
- struct rte_eth_dev *dev = priv->dev;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
const uint32_t elts_n = 1 << rxq->elts_n;
const uint32_t sges_n = 1 << rxq->sges_n;
struct rte_mbuf *(*elts)[elts_n] = rxq->elts;
@@ -561,7 +563,7 @@ mlx4_rxq_attach(struct rxq *rxq)
}
/* Pre-register Rx mempool. */
DEBUG("port %u Rx queue %u registering mp %s having %u chunks",
- priv->dev->data->port_id, rxq->stats.idx,
+ ETH_DEV(priv)->data->port_id, rxq->stats.idx,
rxq->mp->name, rxq->mp->nb_mem_chunks);
mlx4_mr_update_mp(dev, &rxq->mr_ctrl, rxq->mp);
wqes = (volatile struct mlx4_wqe_data_seg (*)[])
@@ -917,11 +919,11 @@ mlx4_rx_queue_release(void *dpdk_rxq)
if (rxq == NULL)
return;
priv = rxq->priv;
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i)
- if (priv->dev->data->rx_queues[i] == rxq) {
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i)
+ if (ETH_DEV(priv)->data->rx_queues[i] == rxq) {
DEBUG("%p: removing Rx queue %p from list",
- (void *)priv->dev, (void *)rxq);
- priv->dev->data->rx_queues[i] = NULL;
+ (void *)ETH_DEV(priv), (void *)rxq);
+ ETH_DEV(priv)->data->rx_queues[i] = NULL;
break;
}
assert(!rxq->cq);
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 8142775fc4..352700820d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -357,11 +357,11 @@ mlx4_tx_queue_release(void *dpdk_txq)
if (txq == NULL)
return;
priv = txq->priv;
- for (i = 0; i != priv->dev->data->nb_tx_queues; ++i)
- if (priv->dev->data->tx_queues[i] == txq) {
+ for (i = 0; i != ETH_DEV(priv)->data->nb_tx_queues; ++i)
+ if (ETH_DEV(priv)->data->tx_queues[i] == txq) {
DEBUG("%p: removing Tx queue %p from list",
- (void *)priv->dev, (void *)txq);
- priv->dev->data->tx_queues[i] = NULL;
+ (void *)ETH_DEV(priv), (void *)txq);
+ ETH_DEV(priv)->data->tx_queues[i] = NULL;
break;
}
mlx4_txq_free_elts(txq);
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v3 1/3] net/mlx4: change device reference for secondary process
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 1/3] net/mlx4: change device reference for secondary process Yongseok Koh
@ 2019-04-01 21:15 ` Yongseok Koh
0 siblings, 0 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:15 UTC (permalink / raw)
To: shahafs; +Cc: dev, stable
rte_eth_devices[] is not shared between primary and secondary process, but
a static array to each process. The reverse pointer of device (priv->dev)
becomes invalid if mlx4 supports secondary process. Instead, priv has the
pointer to shared data of the device,
struct rte_eth_dev_data *dev_data;
Two macros are added,
#define PORT_ID(priv) ((priv)->dev_data->port_id)
#define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
Cc: stable@dpdk.org
Suggested-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
drivers/net/mlx4/mlx4.c | 4 ++--
drivers/net/mlx4/mlx4.h | 5 ++++-
drivers/net/mlx4/mlx4_flow.c | 39 +++++++++++++++++++++------------------
drivers/net/mlx4/mlx4_intr.c | 20 ++++++++++----------
drivers/net/mlx4/mlx4_mr.c | 8 ++++----
drivers/net/mlx4/mlx4_rxq.c | 36 +++++++++++++++++++-----------------
drivers/net/mlx4/mlx4_txq.c | 8 ++++----
7 files changed, 64 insertions(+), 56 deletions(-)
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 5ef2e7f41e..bb6ab8ec6e 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -753,11 +753,11 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
* handled by rte_intr_rx_ctl().
*/
eth_dev->intr_handle = &priv->intr_handle;
- priv->dev = eth_dev;
+ priv->dev_data = eth_dev->data;
eth_dev->dev_ops = &mlx4_dev_ops;
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
- mlx4_dev_set_link_up(priv->dev);
+ mlx4_dev_set_link_up(eth_dev);
/* Update link status once if waiting for LSC. */
if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
mlx4_link_update(eth_dev, 0);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 7ac49ca672..51566caf7f 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -79,7 +79,7 @@ LIST_HEAD(mlx4_mr_list, mlx4_mr);
struct mlx4_priv {
LIST_ENTRY(mlx4_priv) mem_event_cb;
/**< Called by memory event callback. */
- struct rte_eth_dev *dev; /**< Ethernet device. */
+ struct rte_eth_dev_data *dev_data; /* Pointer to device data. */
struct ibv_context *ctx; /**< Verbs context. */
struct ibv_device_attr device_attr; /**< Device properties. */
struct ibv_pd *pd; /**< Protection Domain. */
@@ -113,6 +113,9 @@ struct mlx4_priv {
/**< Configured MAC addresses. Unused entries are zeroed. */
};
+#define PORT_ID(priv) ((priv)->dev_data->port_id)
+#define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
+
/* mlx4_ethdev.c */
int mlx4_get_ifname(const struct mlx4_priv *priv, char (*ifname)[IF_NAMESIZE]);
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index f4df4ab1fb..038dc71d35 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -773,7 +773,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
if (flow->rss)
break;
queue = action->conf;
- if (queue->index >= priv->dev->data->nb_rx_queues) {
+ if (queue->index >= ETH_DEV(priv)->data->nb_rx_queues) {
msg = "queue target index beyond number of"
" configured Rx queues";
goto exit_action_not_supported;
@@ -802,7 +802,7 @@ mlx4_flow_prepare(struct mlx4_priv *priv,
/* Sanity checks. */
for (i = 0; i < rss->queue_num; ++i)
if (rss->queue[i] >=
- priv->dev->data->nb_rx_queues)
+ ETH_DEV(priv)->data->nb_rx_queues)
break;
if (i != rss->queue_num) {
msg = "queue index target beyond number of"
@@ -1072,8 +1072,8 @@ mlx4_flow_toggle(struct mlx4_priv *priv,
/* Stop at the first nonexistent target queue. */
for (i = 0; i != rss->queues; ++i)
if (rss->queue_id[i] >=
- priv->dev->data->nb_rx_queues ||
- !priv->dev->data->rx_queues[rss->queue_id[i]]) {
+ ETH_DEV(priv)->data->nb_rx_queues ||
+ !ETH_DEV(priv)->data->rx_queues[rss->queue_id[i]]) {
missing = 1;
break;
}
@@ -1258,7 +1258,7 @@ static uint16_t
mlx4_flow_internal_next_vlan(struct mlx4_priv *priv, uint16_t vlan)
{
while (vlan < 4096) {
- if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
+ if (ETH_DEV(priv)->data->vlan_filter_conf.ids[vlan / 64] &
(UINT64_C(1) << (vlan % 64)))
return vlan;
++vlan;
@@ -1335,7 +1335,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
* get RSS by default.
*/
uint32_t queues =
- rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1;
+ rte_align32pow2(ETH_DEV(priv)->data->nb_rx_queues + 1) >> 1;
uint16_t queue[queues];
struct rte_flow_action_rss action_rss = {
.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
@@ -1357,9 +1357,9 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
};
struct ether_addr *rule_mac = ð_spec.dst;
rte_be16_t *rule_vlan =
- (priv->dev->data->dev_conf.rxmode.offloads &
+ (ETH_DEV(priv)->data->dev_conf.rxmode.offloads &
DEV_RX_OFFLOAD_VLAN_FILTER) &&
- !priv->dev->data->promiscuous ?
+ !ETH_DEV(priv)->data->promiscuous ?
&vlan_spec.tci :
NULL;
uint16_t vlan = 0;
@@ -1439,7 +1439,7 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
if (!flow || !flow->internal) {
/* Not found, create a new flow rule. */
memcpy(rule_mac, mac, sizeof(*mac));
- flow = mlx4_flow_create(priv->dev, &attr, pattern,
+ flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
actions, error);
if (!flow) {
err = -rte_errno;
@@ -1455,15 +1455,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
goto next_vlan;
}
/* Take care of promiscuous and all multicast flow rules. */
- if (priv->dev->data->promiscuous || priv->dev->data->all_multicast) {
+ if (ETH_DEV(priv)->data->promiscuous ||
+ ETH_DEV(priv)->data->all_multicast) {
for (flow = LIST_FIRST(&priv->flows);
flow && flow->internal;
flow = LIST_NEXT(flow, next)) {
- if (priv->dev->data->promiscuous) {
+ if (ETH_DEV(priv)->data->promiscuous) {
if (flow->promisc)
break;
} else {
- assert(priv->dev->data->all_multicast);
+ assert(ETH_DEV(priv)->data->all_multicast);
if (flow->allmulti)
break;
}
@@ -1477,16 +1478,16 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
}
if (!flow || !flow->internal) {
/* Not found, create a new flow rule. */
- if (priv->dev->data->promiscuous) {
+ if (ETH_DEV(priv)->data->promiscuous) {
pattern[1].spec = NULL;
pattern[1].mask = NULL;
} else {
- assert(priv->dev->data->all_multicast);
+ assert(ETH_DEV(priv)->data->all_multicast);
pattern[1].spec = ð_allmulti;
pattern[1].mask = ð_allmulti;
}
pattern[2] = pattern[3];
- flow = mlx4_flow_create(priv->dev, &attr, pattern,
+ flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
actions, error);
if (!flow) {
err = -rte_errno;
@@ -1503,7 +1504,8 @@ mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
struct rte_flow *next = LIST_NEXT(flow, next);
if (!flow->select)
- claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
+ claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
+ error));
else
flow->select = 0;
flow = next;
@@ -1541,7 +1543,8 @@ mlx4_flow_sync(struct mlx4_priv *priv, struct rte_flow_error *error)
for (flow = LIST_FIRST(&priv->flows);
flow && flow->internal;
flow = LIST_FIRST(&priv->flows))
- claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
+ claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
+ error));
} else {
/* Refresh internal rules. */
ret = mlx4_flow_internal(priv, error);
@@ -1574,7 +1577,7 @@ mlx4_flow_clean(struct mlx4_priv *priv)
struct rte_flow *flow;
while ((flow = LIST_FIRST(&priv->flows)))
- mlx4_flow_destroy(priv->dev, flow, NULL);
+ mlx4_flow_destroy(ETH_DEV(priv), flow, NULL);
assert(LIST_EMPTY(&priv->rss));
}
diff --git a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c
index ec91242196..4f33526755 100644
--- a/drivers/net/mlx4/mlx4_intr.c
+++ b/drivers/net/mlx4/mlx4_intr.c
@@ -65,7 +65,7 @@ static int
mlx4_rx_intr_vec_enable(struct mlx4_priv *priv)
{
unsigned int i;
- unsigned int rxqs_n = priv->dev->data->nb_rx_queues;
+ unsigned int rxqs_n = ETH_DEV(priv)->data->nb_rx_queues;
unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
unsigned int count = 0;
struct rte_intr_handle *intr_handle = &priv->intr_handle;
@@ -79,7 +79,7 @@ mlx4_rx_intr_vec_enable(struct mlx4_priv *priv)
return -rte_errno;
}
for (i = 0; i != n; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
/* Skip queues that cannot request interrupts. */
if (!rxq || !rxq->channel) {
@@ -120,12 +120,12 @@ static void
mlx4_link_status_alarm(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
assert(priv->intr_alarm == 1);
priv->intr_alarm = 0;
if (intr_conf->lsc && !mlx4_link_status_check(priv))
- _rte_eth_dev_callback_process(priv->dev,
+ _rte_eth_dev_callback_process(ETH_DEV(priv),
RTE_ETH_EVENT_INTR_LSC,
NULL);
}
@@ -145,8 +145,8 @@ mlx4_link_status_alarm(struct mlx4_priv *priv)
static int
mlx4_link_status_check(struct mlx4_priv *priv)
{
- struct rte_eth_link *link = &priv->dev->data->dev_link;
- int ret = mlx4_link_update(priv->dev, 0);
+ struct rte_eth_link *link = Ð_DEV(priv)->data->dev_link;
+ int ret = mlx4_link_update(ETH_DEV(priv), 0);
if (ret)
return ret;
@@ -185,7 +185,7 @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
uint32_t caught[RTE_DIM(type)] = { 0 };
struct ibv_async_event event;
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
unsigned int i;
/* Read all message and acknowledge them. */
@@ -208,7 +208,7 @@ mlx4_interrupt_handler(struct mlx4_priv *priv)
}
for (i = 0; i != RTE_DIM(caught); ++i)
if (caught[i])
- _rte_eth_dev_callback_process(priv->dev, type[i],
+ _rte_eth_dev_callback_process(ETH_DEV(priv), type[i],
NULL);
}
@@ -282,7 +282,7 @@ int
mlx4_intr_install(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
int rc;
mlx4_intr_uninstall(priv);
@@ -381,7 +381,7 @@ int
mlx4_rxq_intr_enable(struct mlx4_priv *priv)
{
const struct rte_intr_conf *const intr_conf =
- &priv->dev->data->dev_conf.intr_conf;
+ Ð_DEV(priv)->data->dev_conf.intr_conf;
if (intr_conf->rxq && mlx4_rx_intr_vec_enable(priv) < 0)
goto error;
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
index 4376ad0b60..e4be46ab2a 100644
--- a/drivers/net/mlx4/mlx4_mr.c
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -896,7 +896,7 @@ mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
rte_rwlock_read_lock(&mlx4_mem_event_rwlock);
/* Iterate all the existing mlx4 devices. */
LIST_FOREACH(priv, &mlx4_mem_event_cb_list, mem_event_cb)
- mlx4_mr_mem_event_free_cb(priv->dev, addr, len);
+ mlx4_mr_mem_event_free_cb(ETH_DEV(priv), addr, len);
rte_rwlock_read_unlock(&mlx4_mem_event_rwlock);
break;
case RTE_MEM_EVENT_ALLOC:
@@ -1028,7 +1028,7 @@ mlx4_rx_addr2mr_bh(struct rxq *rxq, uintptr_t addr)
DEBUG("Rx queue %u: miss on top-half, mru=%u, head=%u, addr=%p",
rxq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
- return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
+ return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
}
/**
@@ -1050,7 +1050,7 @@ mlx4_tx_addr2mr_bh(struct txq *txq, uintptr_t addr)
DEBUG("Tx queue %u: miss on top-half, mru=%u, head=%u, addr=%p",
txq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr);
- return mlx4_mr_addr2mr_bh(priv->dev, mr_ctrl, addr);
+ return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
}
/**
@@ -1225,7 +1225,7 @@ mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr, struct rte_mempool *mp)
struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
struct mlx4_priv *priv = txq->priv;
- mlx4_mr_update_ext_mp(priv->dev, mr_ctrl, mp);
+ mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
return mlx4_tx_addr2mr_bh(txq, addr);
}
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 3782c6baab..50f33eb0c5 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -176,6 +176,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
struct ibv_wq *ind_tbl[rss->queues];
struct mlx4_priv *priv = rss->priv;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
const char *msg;
unsigned int i = 0;
int ret;
@@ -189,8 +190,8 @@ mlx4_rss_attach(struct mlx4_rss *rss)
uint16_t id = rss->queue_id[i];
struct rxq *rxq = NULL;
- if (id < priv->dev->data->nb_rx_queues)
- rxq = priv->dev->data->rx_queues[id];
+ if (id < dev->data->nb_rx_queues)
+ rxq = dev->data->rx_queues[id];
if (!rxq) {
ret = EINVAL;
msg = "RSS target queue is not configured";
@@ -269,7 +270,7 @@ mlx4_rss_attach(struct mlx4_rss *rss)
rss->ind = NULL;
}
while (i--)
- mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
+ mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]);
ERROR("mlx4: %s", msg);
--rss->usecnt;
rte_errno = ret;
@@ -291,6 +292,7 @@ void
mlx4_rss_detach(struct mlx4_rss *rss)
{
struct mlx4_priv *priv = rss->priv;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
unsigned int i;
assert(rss->refcnt);
@@ -303,7 +305,7 @@ mlx4_rss_detach(struct mlx4_rss *rss)
claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind));
rss->ind = NULL;
for (i = 0; i != rss->queues; ++i)
- mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
+ mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]);
}
/**
@@ -329,7 +331,7 @@ mlx4_rss_detach(struct mlx4_rss *rss)
int
mlx4_rss_init(struct mlx4_priv *priv)
{
- struct rte_eth_dev *dev = priv->dev;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues);
uint32_t wq_num_prev = 0;
const char *msg;
@@ -338,7 +340,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
if (priv->rss_init)
return 0;
- if (priv->dev->data->nb_rx_queues > priv->hw_rss_max_qps) {
+ if (ETH_DEV(priv)->data->nb_rx_queues > priv->hw_rss_max_qps) {
ERROR("RSS does not support more than %d queues",
priv->hw_rss_max_qps);
rte_errno = EINVAL;
@@ -356,8 +358,8 @@ mlx4_rss_init(struct mlx4_priv *priv)
rte_errno = ret;
return -ret;
}
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
struct ibv_cq *cq;
struct ibv_wq *wq;
uint32_t wq_num;
@@ -432,7 +434,7 @@ mlx4_rss_init(struct mlx4_priv *priv)
ERROR("cannot initialize common RSS resources (queue %u): %s: %s",
i, msg, strerror(ret));
while (i--) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
if (rxq)
mlx4_rxq_detach(rxq);
@@ -457,8 +459,8 @@ mlx4_rss_deinit(struct mlx4_priv *priv)
if (!priv->rss_init)
return;
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
- struct rxq *rxq = priv->dev->data->rx_queues[i];
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
+ struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
if (rxq) {
assert(rxq->usecnt == 1);
@@ -494,7 +496,7 @@ mlx4_rxq_attach(struct rxq *rxq)
}
struct mlx4_priv *priv = rxq->priv;
- struct rte_eth_dev *dev = priv->dev;
+ struct rte_eth_dev *dev = ETH_DEV(priv);
const uint32_t elts_n = 1 << rxq->elts_n;
const uint32_t sges_n = 1 << rxq->sges_n;
struct rte_mbuf *(*elts)[elts_n] = rxq->elts;
@@ -561,7 +563,7 @@ mlx4_rxq_attach(struct rxq *rxq)
}
/* Pre-register Rx mempool. */
DEBUG("port %u Rx queue %u registering mp %s having %u chunks",
- priv->dev->data->port_id, rxq->stats.idx,
+ ETH_DEV(priv)->data->port_id, rxq->stats.idx,
rxq->mp->name, rxq->mp->nb_mem_chunks);
mlx4_mr_update_mp(dev, &rxq->mr_ctrl, rxq->mp);
wqes = (volatile struct mlx4_wqe_data_seg (*)[])
@@ -917,11 +919,11 @@ mlx4_rx_queue_release(void *dpdk_rxq)
if (rxq == NULL)
return;
priv = rxq->priv;
- for (i = 0; i != priv->dev->data->nb_rx_queues; ++i)
- if (priv->dev->data->rx_queues[i] == rxq) {
+ for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i)
+ if (ETH_DEV(priv)->data->rx_queues[i] == rxq) {
DEBUG("%p: removing Rx queue %p from list",
- (void *)priv->dev, (void *)rxq);
- priv->dev->data->rx_queues[i] = NULL;
+ (void *)ETH_DEV(priv), (void *)rxq);
+ ETH_DEV(priv)->data->rx_queues[i] = NULL;
break;
}
assert(!rxq->cq);
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 8142775fc4..352700820d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -357,11 +357,11 @@ mlx4_tx_queue_release(void *dpdk_txq)
if (txq == NULL)
return;
priv = txq->priv;
- for (i = 0; i != priv->dev->data->nb_tx_queues; ++i)
- if (priv->dev->data->tx_queues[i] == txq) {
+ for (i = 0; i != ETH_DEV(priv)->data->nb_tx_queues; ++i)
+ if (ETH_DEV(priv)->data->tx_queues[i] == txq) {
DEBUG("%p: removing Tx queue %p from list",
- (void *)priv->dev, (void *)txq);
- priv->dev->data->tx_queues[i] = NULL;
+ (void *)ETH_DEV(priv), (void *)txq);
+ ETH_DEV(priv)->data->tx_queues[i] = NULL;
break;
}
mlx4_txq_free_elts(txq);
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v3 2/3] net/mlx4: add external allocator for Verbs object
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 0/3] " Yongseok Koh
2019-04-01 21:15 ` Yongseok Koh
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 1/3] net/mlx4: change device reference for secondary process Yongseok Koh
@ 2019-04-01 21:15 ` Yongseok Koh
2019-04-01 21:15 ` Yongseok Koh
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 3/3] net/mlx4: add secondary process support Yongseok Koh
2019-04-02 7:12 ` [dpdk-dev] [PATCH v3 0/3] " Shahaf Shuler
4 siblings, 1 reply; 30+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:15 UTC (permalink / raw)
To: shahafs; +Cc: dev
To support secondary process, the memory allocated by library such as
completion rings (CQ) and buffer rings (WQ) must be manageable by EAL, in
order to share it with secondary processes. With new changes in rdma-core
and kernel driver, it is possible to provide an external allocator to the
library layer for this purpose. All such resources will now be allocated
within DPDK framework.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
drivers/net/mlx4/Makefile | 5 ++++
drivers/net/mlx4/meson.build | 10 +++++++
drivers/net/mlx4/mlx4.c | 67 ++++++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4.h | 20 +++++++++++++
drivers/net/mlx4/mlx4_rxq.c | 4 +++
drivers/net/mlx4/mlx4_txq.c | 6 ++--
6 files changed, 110 insertions(+), 2 deletions(-)
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 1f1b927484..b527efd625 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -88,6 +88,11 @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
$Q $(RM) -f -- '$@'
$Q : > '$@'
$Q sh -- '$<' '$@' \
+ HAVE_IBV_MLX4_BUF_ALLOCATORS \
+ infiniband/mlx4dv.h \
+ enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_IBV_MLX4_WQE_LSO_SEG \
infiniband/mlx4dv.h \
type 'struct mlx4_wqe_lso_seg' \
diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
index b4f9672e73..650e2c8fbc 100644
--- a/drivers/net/mlx4/meson.build
+++ b/drivers/net/mlx4/meson.build
@@ -70,7 +70,17 @@ if build
[ 'HAVE_IBV_MLX4_WQE_LSO_SEG', 'infiniband/mlx4dv.h',
'struct mlx4_wqe_lso_seg', 'mss_hdr_size' ],
]
+ # input array for meson symbol search:
+ # [ "MACRO to define if found", "header for the search",
+ # "symbol to search" ]
+ has_sym_args = [
+ [ 'HAVE_IBV_MLX4_BUF_ALLOCATORS', 'infiniband/mlx4dv.h',
+ 'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
+ ]
config = configuration_data()
+ foreach arg:has_sym_args
+ config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))
+ endforeach
foreach arg:has_member_args
file_prefix = '#include<' + arg[1] + '>'
config.set(arg[0], cc.has_member(arg[2], arg[3],
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index bb6ab8ec6e..0e0b035df0 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -69,6 +69,62 @@ const char *pmd_mlx4_init_params[] = {
static void mlx4_dev_stop(struct rte_eth_dev *dev);
+#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
+/**
+ * Verbs callback to allocate a memory. This function should allocate the space
+ * according to the size provided residing inside a huge page.
+ * Please note that all allocation must respect the alignment from libmlx4
+ * (i.e. currently sysconf(_SC_PAGESIZE)).
+ *
+ * @param[in] size
+ * The size in bytes of the memory to allocate.
+ * @param[in] data
+ * A pointer to the callback data.
+ *
+ * @return
+ * Allocated buffer, NULL otherwise and rte_errno is set.
+ */
+static void *
+mlx4_alloc_verbs_buf(size_t size, void *data)
+{
+ struct mlx4_priv *priv = data;
+ void *ret;
+ size_t alignment = sysconf(_SC_PAGESIZE);
+ unsigned int socket = SOCKET_ID_ANY;
+
+ if (priv->verbs_alloc_ctx.type == MLX4_VERBS_ALLOC_TYPE_TX_QUEUE) {
+ const struct txq *txq = priv->verbs_alloc_ctx.obj;
+
+ socket = txq->socket;
+ } else if (priv->verbs_alloc_ctx.type ==
+ MLX4_VERBS_ALLOC_TYPE_RX_QUEUE) {
+ const struct rxq *rxq = priv->verbs_alloc_ctx.obj;
+
+ socket = rxq->socket;
+ }
+ assert(data != NULL);
+ ret = rte_malloc_socket(__func__, size, alignment, socket);
+ if (!ret && size)
+ rte_errno = ENOMEM;
+ return ret;
+}
+
+/**
+ * Verbs callback to free a memory.
+ *
+ * @param[in] ptr
+ * A pointer to the memory to free.
+ * @param[in] data
+ * A pointer to the callback data.
+ */
+static void
+mlx4_free_verbs_buf(void *ptr, void *data __rte_unused)
+{
+ assert(data != NULL);
+ rte_free(ptr);
+}
+#endif
+
/**
* DPDK callback for Ethernet device configuration.
*
@@ -755,6 +811,17 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
eth_dev->intr_handle = &priv->intr_handle;
priv->dev_data = eth_dev->data;
eth_dev->dev_ops = &mlx4_dev_ops;
+#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
+ /* Hint libmlx4 to use PMD allocator for data plane resources */
+ struct mlx4dv_ctx_allocators alctr = {
+ .alloc = &mlx4_alloc_verbs_buf,
+ .free = &mlx4_free_verbs_buf,
+ .data = priv,
+ };
+ mlx4_glue->dv_set_context_attr
+ (ctx, MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS,
+ (void *)((uintptr_t)&alctr));
+#endif
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
mlx4_dev_set_link_up(eth_dev);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 51566caf7f..d43e05ea74 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -72,6 +72,24 @@ struct rxq;
struct txq;
struct rte_flow;
+/**
+ * Type of objet being allocated.
+ */
+enum mlx4_verbs_alloc_type {
+ MLX4_VERBS_ALLOC_TYPE_NONE,
+ MLX4_VERBS_ALLOC_TYPE_TX_QUEUE,
+ MLX4_VERBS_ALLOC_TYPE_RX_QUEUE,
+};
+
+/**
+ * Verbs allocator needs a context to know in the callback which kind of
+ * resources it is allocating.
+ */
+struct mlx4_verbs_alloc_ctx {
+ enum mlx4_verbs_alloc_type type; /* Kind of object being allocated. */
+ const void *obj; /* Pointer to the DPDK object. */
+};
+
LIST_HEAD(mlx4_dev_list, mlx4_priv);
LIST_HEAD(mlx4_mr_list, mlx4_mr);
@@ -111,6 +129,8 @@ struct mlx4_priv {
LIST_HEAD(, rte_flow) flows; /**< Configured flow rule handles. */
struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
/**< Configured MAC addresses. Unused entries are zeroed. */
+ struct mlx4_verbs_alloc_ctx verbs_alloc_ctx;
+ /**< Context for Verbs allocator. */
};
#define PORT_ID(priv) ((priv)->dev_data->port_id)
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 50f33eb0c5..f45c1ff85c 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -513,6 +513,8 @@ mlx4_rxq_attach(struct rxq *rxq)
int ret;
assert(rte_is_power_of_2(elts_n));
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_RX_QUEUE;
+ priv->verbs_alloc_ctx.obj = rxq;
cq = mlx4_glue->create_cq(priv->ctx, elts_n / sges_n, NULL,
rxq->channel, 0);
if (!cq) {
@@ -620,6 +622,7 @@ mlx4_rxq_attach(struct rxq *rxq)
rxq->rq_ci = elts_n / sges_n;
rte_wmb();
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return 0;
error:
if (wq)
@@ -630,6 +633,7 @@ mlx4_rxq_attach(struct rxq *rxq)
rte_errno = ret;
ERROR("error while attaching Rx queue %p: %s: %s",
(void *)rxq, msg, strerror(ret));
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return -ret;
}
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 352700820d..2dc198e77f 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -177,10 +177,8 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
uint64_t offloads;
offloads = conf->offloads | dev->data->dev_conf.txmode.offloads;
-
DEBUG("%p: configuring queue %u for %u descriptors",
(void *)dev, idx, desc);
-
if (idx >= dev->data->nb_tx_queues) {
rte_errno = EOVERFLOW;
ERROR("%p: queue index out of range (%u >= %u)",
@@ -241,6 +239,8 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
.lb = !!priv->vf,
.bounce_buf = bounce_buf,
};
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_TX_QUEUE;
+ priv->verbs_alloc_ctx.obj = txq;
txq->cq = mlx4_glue->create_cq(priv->ctx, desc, NULL, NULL, 0);
if (!txq->cq) {
rte_errno = ENOMEM;
@@ -331,6 +331,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
txq->mr_ctrl.dev_gen_ptr = &priv->mr.dev_gen;
DEBUG("%p: adding Tx queue %p to list", (void *)dev, (void *)txq);
dev->data->tx_queues[idx] = txq;
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return 0;
error:
dev->data->tx_queues[idx] = NULL;
@@ -338,6 +339,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
mlx4_tx_queue_release(txq);
rte_errno = ret;
assert(rte_errno > 0);
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return -rte_errno;
}
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v3 2/3] net/mlx4: add external allocator for Verbs object
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 2/3] net/mlx4: add external allocator for Verbs object Yongseok Koh
@ 2019-04-01 21:15 ` Yongseok Koh
0 siblings, 0 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:15 UTC (permalink / raw)
To: shahafs; +Cc: dev
To support secondary process, the memory allocated by library such as
completion rings (CQ) and buffer rings (WQ) must be manageable by EAL, in
order to share it with secondary processes. With new changes in rdma-core
and kernel driver, it is possible to provide an external allocator to the
library layer for this purpose. All such resources will now be allocated
within DPDK framework.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
drivers/net/mlx4/Makefile | 5 ++++
drivers/net/mlx4/meson.build | 10 +++++++
drivers/net/mlx4/mlx4.c | 67 ++++++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4.h | 20 +++++++++++++
drivers/net/mlx4/mlx4_rxq.c | 4 +++
drivers/net/mlx4/mlx4_txq.c | 6 ++--
6 files changed, 110 insertions(+), 2 deletions(-)
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 1f1b927484..b527efd625 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -88,6 +88,11 @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
$Q $(RM) -f -- '$@'
$Q : > '$@'
$Q sh -- '$<' '$@' \
+ HAVE_IBV_MLX4_BUF_ALLOCATORS \
+ infiniband/mlx4dv.h \
+ enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_IBV_MLX4_WQE_LSO_SEG \
infiniband/mlx4dv.h \
type 'struct mlx4_wqe_lso_seg' \
diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
index b4f9672e73..650e2c8fbc 100644
--- a/drivers/net/mlx4/meson.build
+++ b/drivers/net/mlx4/meson.build
@@ -70,7 +70,17 @@ if build
[ 'HAVE_IBV_MLX4_WQE_LSO_SEG', 'infiniband/mlx4dv.h',
'struct mlx4_wqe_lso_seg', 'mss_hdr_size' ],
]
+ # input array for meson symbol search:
+ # [ "MACRO to define if found", "header for the search",
+ # "symbol to search" ]
+ has_sym_args = [
+ [ 'HAVE_IBV_MLX4_BUF_ALLOCATORS', 'infiniband/mlx4dv.h',
+ 'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
+ ]
config = configuration_data()
+ foreach arg:has_sym_args
+ config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))
+ endforeach
foreach arg:has_member_args
file_prefix = '#include<' + arg[1] + '>'
config.set(arg[0], cc.has_member(arg[2], arg[3],
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index bb6ab8ec6e..0e0b035df0 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -69,6 +69,62 @@ const char *pmd_mlx4_init_params[] = {
static void mlx4_dev_stop(struct rte_eth_dev *dev);
+#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
+/**
+ * Verbs callback to allocate a memory. This function should allocate the space
+ * according to the size provided residing inside a huge page.
+ * Please note that all allocation must respect the alignment from libmlx4
+ * (i.e. currently sysconf(_SC_PAGESIZE)).
+ *
+ * @param[in] size
+ * The size in bytes of the memory to allocate.
+ * @param[in] data
+ * A pointer to the callback data.
+ *
+ * @return
+ * Allocated buffer, NULL otherwise and rte_errno is set.
+ */
+static void *
+mlx4_alloc_verbs_buf(size_t size, void *data)
+{
+ struct mlx4_priv *priv = data;
+ void *ret;
+ size_t alignment = sysconf(_SC_PAGESIZE);
+ unsigned int socket = SOCKET_ID_ANY;
+
+ if (priv->verbs_alloc_ctx.type == MLX4_VERBS_ALLOC_TYPE_TX_QUEUE) {
+ const struct txq *txq = priv->verbs_alloc_ctx.obj;
+
+ socket = txq->socket;
+ } else if (priv->verbs_alloc_ctx.type ==
+ MLX4_VERBS_ALLOC_TYPE_RX_QUEUE) {
+ const struct rxq *rxq = priv->verbs_alloc_ctx.obj;
+
+ socket = rxq->socket;
+ }
+ assert(data != NULL);
+ ret = rte_malloc_socket(__func__, size, alignment, socket);
+ if (!ret && size)
+ rte_errno = ENOMEM;
+ return ret;
+}
+
+/**
+ * Verbs callback to free a memory.
+ *
+ * @param[in] ptr
+ * A pointer to the memory to free.
+ * @param[in] data
+ * A pointer to the callback data.
+ */
+static void
+mlx4_free_verbs_buf(void *ptr, void *data __rte_unused)
+{
+ assert(data != NULL);
+ rte_free(ptr);
+}
+#endif
+
/**
* DPDK callback for Ethernet device configuration.
*
@@ -755,6 +811,17 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
eth_dev->intr_handle = &priv->intr_handle;
priv->dev_data = eth_dev->data;
eth_dev->dev_ops = &mlx4_dev_ops;
+#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
+ /* Hint libmlx4 to use PMD allocator for data plane resources */
+ struct mlx4dv_ctx_allocators alctr = {
+ .alloc = &mlx4_alloc_verbs_buf,
+ .free = &mlx4_free_verbs_buf,
+ .data = priv,
+ };
+ mlx4_glue->dv_set_context_attr
+ (ctx, MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS,
+ (void *)((uintptr_t)&alctr));
+#endif
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
mlx4_dev_set_link_up(eth_dev);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 51566caf7f..d43e05ea74 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -72,6 +72,24 @@ struct rxq;
struct txq;
struct rte_flow;
+/**
+ * Type of objet being allocated.
+ */
+enum mlx4_verbs_alloc_type {
+ MLX4_VERBS_ALLOC_TYPE_NONE,
+ MLX4_VERBS_ALLOC_TYPE_TX_QUEUE,
+ MLX4_VERBS_ALLOC_TYPE_RX_QUEUE,
+};
+
+/**
+ * Verbs allocator needs a context to know in the callback which kind of
+ * resources it is allocating.
+ */
+struct mlx4_verbs_alloc_ctx {
+ enum mlx4_verbs_alloc_type type; /* Kind of object being allocated. */
+ const void *obj; /* Pointer to the DPDK object. */
+};
+
LIST_HEAD(mlx4_dev_list, mlx4_priv);
LIST_HEAD(mlx4_mr_list, mlx4_mr);
@@ -111,6 +129,8 @@ struct mlx4_priv {
LIST_HEAD(, rte_flow) flows; /**< Configured flow rule handles. */
struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
/**< Configured MAC addresses. Unused entries are zeroed. */
+ struct mlx4_verbs_alloc_ctx verbs_alloc_ctx;
+ /**< Context for Verbs allocator. */
};
#define PORT_ID(priv) ((priv)->dev_data->port_id)
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 50f33eb0c5..f45c1ff85c 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -513,6 +513,8 @@ mlx4_rxq_attach(struct rxq *rxq)
int ret;
assert(rte_is_power_of_2(elts_n));
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_RX_QUEUE;
+ priv->verbs_alloc_ctx.obj = rxq;
cq = mlx4_glue->create_cq(priv->ctx, elts_n / sges_n, NULL,
rxq->channel, 0);
if (!cq) {
@@ -620,6 +622,7 @@ mlx4_rxq_attach(struct rxq *rxq)
rxq->rq_ci = elts_n / sges_n;
rte_wmb();
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return 0;
error:
if (wq)
@@ -630,6 +633,7 @@ mlx4_rxq_attach(struct rxq *rxq)
rte_errno = ret;
ERROR("error while attaching Rx queue %p: %s: %s",
(void *)rxq, msg, strerror(ret));
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return -ret;
}
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 352700820d..2dc198e77f 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -177,10 +177,8 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
uint64_t offloads;
offloads = conf->offloads | dev->data->dev_conf.txmode.offloads;
-
DEBUG("%p: configuring queue %u for %u descriptors",
(void *)dev, idx, desc);
-
if (idx >= dev->data->nb_tx_queues) {
rte_errno = EOVERFLOW;
ERROR("%p: queue index out of range (%u >= %u)",
@@ -241,6 +239,8 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
.lb = !!priv->vf,
.bounce_buf = bounce_buf,
};
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_TX_QUEUE;
+ priv->verbs_alloc_ctx.obj = txq;
txq->cq = mlx4_glue->create_cq(priv->ctx, desc, NULL, NULL, 0);
if (!txq->cq) {
rte_errno = ENOMEM;
@@ -331,6 +331,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
txq->mr_ctrl.dev_gen_ptr = &priv->mr.dev_gen;
DEBUG("%p: adding Tx queue %p to list", (void *)dev, (void *)txq);
dev->data->tx_queues[idx] = txq;
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return 0;
error:
dev->data->tx_queues[idx] = NULL;
@@ -338,6 +339,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
mlx4_tx_queue_release(txq);
rte_errno = ret;
assert(rte_errno > 0);
+ priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
return -rte_errno;
}
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v3 3/3] net/mlx4: add secondary process support
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 0/3] " Yongseok Koh
` (2 preceding siblings ...)
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 2/3] net/mlx4: add external allocator for Verbs object Yongseok Koh
@ 2019-04-01 21:15 ` Yongseok Koh
2019-04-01 21:15 ` Yongseok Koh
2019-04-02 7:12 ` [dpdk-dev] [PATCH v3 0/3] " Shahaf Shuler
4 siblings, 1 reply; 30+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:15 UTC (permalink / raw)
To: shahafs; +Cc: dev
In order to support secondary process, a few features are required.
a) rdma-core library should allocate device resources using DPDK's memory
allocator.
b) UAR should be remapped for secondary processes. Currently, in order not
to use different data structure for secondary processes, PMD tries to
reserve identical virtual address space for both primary and secondary
processes.
c) IPC channel is necessary, which can be easily set with rte_mp APIs.
Through the channel, Verbs command FD is delivered to the secondary
process and the device stop/start event is also broadcast from primary
process.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
doc/guides/nics/features/mlx4.ini | 1 +
doc/guides/nics/mlx4.rst | 10 +
drivers/net/mlx4/Makefile | 6 +
drivers/net/mlx4/meson.build | 3 +
drivers/net/mlx4/mlx4.c | 384 ++++++++++++++++++++++++++++++++++++--
drivers/net/mlx4/mlx4.h | 61 ++++++
drivers/net/mlx4/mlx4_mp.c | 304 ++++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4_mr.c | 32 +++-
drivers/net/mlx4/mlx4_prm.h | 4 +-
drivers/net/mlx4/mlx4_rxtx.c | 2 +
drivers/net/mlx4/mlx4_rxtx.h | 1 +
drivers/net/mlx4/mlx4_txq.c | 113 +++++++++++
12 files changed, 898 insertions(+), 23 deletions(-)
create mode 100644 drivers/net/mlx4/mlx4_mp.c
diff --git a/doc/guides/nics/features/mlx4.ini b/doc/guides/nics/features/mlx4.ini
index a211aef332..4502aa2a87 100644
--- a/doc/guides/nics/features/mlx4.ini
+++ b/doc/guides/nics/features/mlx4.ini
@@ -29,6 +29,7 @@ Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
FW version = Y
+Multiprocess aware = Y
Other kdrv = Y
Power8 = Y
x86-32 = Y
diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst
index 4ad361a2c2..cd34838f41 100644
--- a/doc/guides/nics/mlx4.rst
+++ b/doc/guides/nics/mlx4.rst
@@ -145,6 +145,16 @@ below.
Limitations
-----------
+- For secondary process:
+
+ - Forked secondary process not supported.
+ - All mempools must be initialized before rte_eth_dev_start().
+ - External memory unregistered in EAL memseg list cannot be used for DMA
+ unless such memory has been registered by ``mlx4_mr_update_ext_mp()`` in
+ primary process and remapped to the same virtual address in secondary
+ process. If the external memory is registered by primary process but has
+ different virtual address in secondary process, unexpected error may happen.
+
- CRC stripping is supported by default and always reported as "true".
The ability to enable/disable CRC stripping requires OFED version
4.3-1.5.0.0 and above or rdma-core version v18 and above.
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index b527efd625..8126b0dfc6 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -18,6 +18,7 @@ ifneq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_glue.c
endif
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_intr.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mp.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mr.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxq.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxtx.c
@@ -93,6 +94,11 @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
+ HAVE_IBV_MLX4_UAR_MMAP_OFFSET \
+ infiniband/mlx4dv.h \
+ enum MLX4DV_QP_MASK_UAR_MMAP_OFFSET \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_IBV_MLX4_WQE_LSO_SEG \
infiniband/mlx4dv.h \
type 'struct mlx4_wqe_lso_seg' \
diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
index 650e2c8fbc..de020701d1 100644
--- a/drivers/net/mlx4/meson.build
+++ b/drivers/net/mlx4/meson.build
@@ -33,6 +33,7 @@ if build
'mlx4_ethdev.c',
'mlx4_flow.c',
'mlx4_intr.c',
+ 'mlx4_mp.c',
'mlx4_mr.c',
'mlx4_rxq.c',
'mlx4_rxtx.c',
@@ -76,6 +77,8 @@ if build
has_sym_args = [
[ 'HAVE_IBV_MLX4_BUF_ALLOCATORS', 'infiniband/mlx4dv.h',
'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
+ [ 'HAVE_IBV_MLX4_UAR_MMAP_OFFSET', 'infiniband/mlx4dv.h',
+ 'MLX4DV_QP_MASK_UAR_MMAP_OFFSET' ],
]
config = configuration_data()
foreach arg:has_sym_args
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 0e0b035df0..315640a6d7 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -17,6 +17,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/mman.h>
#include <unistd.h>
/* Verbs headers do not support -pedantic. */
@@ -48,10 +49,16 @@
#include "mlx4_rxtx.h"
#include "mlx4_utils.h"
-struct mlx4_dev_list mlx4_mem_event_cb_list =
- LIST_HEAD_INITIALIZER(mlx4_mem_event_cb_list);
+static const char *MZ_MLX4_PMD_SHARED_DATA = "mlx4_pmd_shared_data";
-rte_rwlock_t mlx4_mem_event_rwlock = RTE_RWLOCK_INITIALIZER;
+/* Shared memory between primary and secondary processes. */
+struct mlx4_shared_data *mlx4_shared_data;
+
+/* Spinlock for mlx4_shared_data allocation. */
+static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* Process local data for secondary processes. */
+static struct mlx4_local_data mlx4_local_data;
/** Configuration structure for device arguments. */
struct mlx4_conf {
@@ -69,6 +76,77 @@ const char *pmd_mlx4_init_params[] = {
static void mlx4_dev_stop(struct rte_eth_dev *dev);
+/**
+ * Initialize shared data between primary and secondary process.
+ *
+ * A memzone is reserved by primary process and secondary processes attach to
+ * the memzone.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_init_shared_data(void)
+{
+ const struct rte_memzone *mz;
+ int ret = 0;
+
+ rte_spinlock_lock(&mlx4_shared_data_lock);
+ if (mlx4_shared_data == NULL) {
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* Allocate shared memory. */
+ mz = rte_memzone_reserve(MZ_MLX4_PMD_SHARED_DATA,
+ sizeof(*mlx4_shared_data),
+ SOCKET_ID_ANY, 0);
+ if (mz == NULL) {
+ ERROR("Cannot allocate mlx4 shared data\n");
+ ret = -rte_errno;
+ goto error;
+ }
+ mlx4_shared_data = mz->addr;
+ memset(mlx4_shared_data, 0, sizeof(*mlx4_shared_data));
+ rte_spinlock_init(&mlx4_shared_data->lock);
+ } else {
+ /* Lookup allocated shared memory. */
+ mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
+ if (mz == NULL) {
+ ERROR("Cannot attach mlx4 shared data\n");
+ ret = -rte_errno;
+ goto error;
+ }
+ mlx4_shared_data = mz->addr;
+ memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
+ }
+ }
+error:
+ rte_spinlock_unlock(&mlx4_shared_data_lock);
+ return ret;
+}
+
+/**
+ * Uninitialize shared data between primary and secondary process.
+ *
+ * The pointer of secondary process is dereferenced and primary process frees
+ * the memzone.
+ */
+static void
+mlx4_uninit_shared_data(void)
+{
+ const struct rte_memzone *mz;
+
+ rte_spinlock_lock(&mlx4_shared_data_lock);
+ if (mlx4_shared_data) {
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
+ rte_memzone_free(mz);
+ } else {
+ memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
+ }
+ mlx4_shared_data = NULL;
+ }
+ rte_spinlock_unlock(&mlx4_shared_data_lock);
+}
+
#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
/**
* Verbs callback to allocate a memory. This function should allocate the space
@@ -181,6 +259,11 @@ mlx4_dev_start(struct rte_eth_dev *dev)
return 0;
DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
priv->started = 1;
+ ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
+ if (ret) {
+ ERROR("%p: cannot remap UAR", (void *)dev);
+ goto err;
+ }
ret = mlx4_rss_init(priv);
if (ret) {
ERROR("%p: cannot initialize RSS resources: %s",
@@ -208,6 +291,8 @@ mlx4_dev_start(struct rte_eth_dev *dev)
rte_wmb();
dev->tx_pkt_burst = mlx4_tx_burst;
dev->rx_pkt_burst = mlx4_rx_burst;
+ /* Enable datapath on secondary process. */
+ mlx4_mp_req_start_rxtx(dev);
return 0;
err:
mlx4_dev_stop(dev);
@@ -226,6 +311,8 @@ static void
mlx4_dev_stop(struct rte_eth_dev *dev)
{
struct mlx4_priv *priv = dev->data->dev_private;
+ const size_t page_size = sysconf(_SC_PAGESIZE);
+ int i;
if (!priv->started)
return;
@@ -234,9 +321,20 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
dev->tx_pkt_burst = mlx4_tx_burst_removed;
dev->rx_pkt_burst = mlx4_rx_burst_removed;
rte_wmb();
+ /* Disable datapath on secondary process. */
+ mlx4_mp_req_stop_rxtx(dev);
mlx4_flow_sync(priv, NULL);
mlx4_rxq_intr_disable(priv);
mlx4_rss_deinit(priv);
+ for (i = 0; i != dev->data->nb_tx_queues; ++i) {
+ struct txq *txq;
+
+ txq = dev->data->tx_queues[i];
+ if (!txq)
+ continue;
+ munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
+ page_size), page_size);
+ }
}
/**
@@ -259,6 +357,8 @@ mlx4_dev_close(struct rte_eth_dev *dev)
dev->rx_pkt_burst = mlx4_rx_burst_removed;
dev->tx_pkt_burst = mlx4_tx_burst_removed;
rte_wmb();
+ /* Disable datapath on secondary process. */
+ mlx4_mp_req_stop_rxtx(dev);
mlx4_flow_clean(priv);
mlx4_rss_deinit(priv);
for (i = 0; i != dev->data->nb_rx_queues; ++i)
@@ -310,6 +410,14 @@ static const struct eth_dev_ops mlx4_dev_ops = {
.is_removed = mlx4_is_removed,
};
+/* Available operations from secondary process. */
+static const struct eth_dev_ops mlx4_dev_sec_ops = {
+ .stats_get = mlx4_stats_get,
+ .stats_reset = mlx4_stats_reset,
+ .fw_version_get = mlx4_fw_version_get,
+ .dev_infos_get = mlx4_dev_infos_get,
+};
+
/**
* Get PCI information from struct ibv_device.
*
@@ -549,6 +657,200 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
static struct rte_pci_driver mlx4_driver;
+static int
+find_lower_va_bound(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg)
+{
+ void **addr = arg;
+
+ if (msl->external)
+ return 0;
+ if (*addr == NULL)
+ *addr = ms->addr;
+ else
+ *addr = RTE_MIN(*addr, ms->addr);
+
+ return 0;
+}
+
+/**
+ * Reserve UAR address space for primary process.
+ *
+ * Process local resource is used by both primary and secondary to avoid
+ * duplicate reservation. The space has to be available on both primary and
+ * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
+ * check.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_uar_init_primary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+ void *addr = (void *)0;
+
+ if (sd->uar_base)
+ return 0;
+ /* find out lower bound of hugepage segments */
+ rte_memseg_walk(find_lower_va_bound, &addr);
+ /* keep distance to hugepages to minimize potential conflicts. */
+ addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
+ /* anonymous mmap, no real memory consumption. */
+ addr = mmap(addr, MLX4_UAR_SIZE,
+ PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED) {
+ ERROR("failed to reserve UAR address space, please"
+ " adjust MLX4_UAR_SIZE or try --base-virtaddr");
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ /* Accept either same addr or a new addr returned from mmap if target
+ * range occupied.
+ */
+ INFO("reserved UAR address space: %p", addr);
+ sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
+ return 0;
+}
+
+/**
+ * Unmap UAR address space reserved for primary process.
+ */
+static void
+mlx4_uar_uninit_primary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+
+ if (!sd->uar_base)
+ return;
+ munmap(sd->uar_base, MLX4_UAR_SIZE);
+ sd->uar_base = NULL;
+}
+
+/**
+ * Reserve UAR address space for secondary process, align with primary process.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_uar_init_secondary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+ struct mlx4_local_data *ld = &mlx4_local_data;
+ void *addr;
+
+ if (ld->uar_base) { /* Already reserved. */
+ assert(sd->uar_base == ld->uar_base);
+ return 0;
+ }
+ assert(sd->uar_base);
+ /* anonymous mmap, no real memory consumption. */
+ addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
+ PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED) {
+ ERROR("UAR mmap failed: %p size: %llu",
+ sd->uar_base, MLX4_UAR_SIZE);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ if (sd->uar_base != addr) {
+ ERROR("UAR address %p size %llu occupied, please"
+ " adjust MLX4_UAR_OFFSET or try EAL parameter"
+ " --base-virtaddr",
+ sd->uar_base, MLX4_UAR_SIZE);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ ld->uar_base = addr;
+ INFO("reserved UAR address space: %p", addr);
+ return 0;
+}
+
+/**
+ * Unmap UAR address space reserved for secondary process.
+ */
+static void
+mlx4_uar_uninit_secondary(void)
+{
+ struct mlx4_local_data *ld = &mlx4_local_data;
+
+ if (!ld->uar_base)
+ return;
+ munmap(ld->uar_base, MLX4_UAR_SIZE);
+ ld->uar_base = NULL;
+}
+
+/**
+ * PMD global initialization.
+ *
+ * Independent from individual device, this function initializes global
+ * per-PMD data structures distinguishing primary and secondary processes.
+ * Hence, each initialization is called once per a process.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_init_once(void)
+{
+ struct mlx4_shared_data *sd;
+ struct mlx4_local_data *ld = &mlx4_local_data;
+ int ret;
+
+ if (mlx4_init_shared_data())
+ return -rte_errno;
+ sd = mlx4_shared_data;
+ assert(sd);
+ rte_spinlock_lock(&sd->lock);
+ switch (rte_eal_process_type()) {
+ case RTE_PROC_PRIMARY:
+ if (sd->init_done)
+ break;
+ LIST_INIT(&sd->mem_event_cb_list);
+ rte_rwlock_init(&sd->mem_event_rwlock);
+ rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
+ mlx4_mr_mem_event_cb, NULL);
+ mlx4_mp_init_primary();
+ ret = mlx4_uar_init_primary();
+ if (ret)
+ goto error;
+ sd->init_done = true;
+ break;
+ case RTE_PROC_SECONDARY:
+ if (ld->init_done)
+ break;
+ mlx4_mp_init_secondary();
+ ret = mlx4_uar_init_secondary();
+ if (ret)
+ goto error;
+ ++sd->secondary_cnt;
+ ld->init_done = true;
+ break;
+ default:
+ break;
+ }
+ rte_spinlock_unlock(&sd->lock);
+ return 0;
+error:
+ switch (rte_eal_process_type()) {
+ case RTE_PROC_PRIMARY:
+ mlx4_uar_uninit_primary();
+ mlx4_mp_uninit_primary();
+ rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
+ break;
+ case RTE_PROC_SECONDARY:
+ mlx4_uar_uninit_secondary();
+ mlx4_mp_uninit_secondary();
+ break;
+ default:
+ break;
+ }
+ rte_spinlock_unlock(&sd->lock);
+ mlx4_uninit_shared_data();
+ return -rte_errno;
+}
+
/**
* DPDK callback to register a PCI device.
*
@@ -579,6 +881,12 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
int i;
(void)pci_drv;
+ err = mlx4_init_once();
+ if (err) {
+ ERROR("unable to init PMD global data: %s",
+ strerror(rte_errno));
+ return -rte_errno;
+ }
assert(pci_drv == &mlx4_driver);
list = mlx4_glue->get_device_list(&i);
if (list == NULL) {
@@ -659,6 +967,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
struct mlx4_priv *priv = NULL;
struct rte_eth_dev *eth_dev = NULL;
struct ether_addr mac;
+ char name[RTE_ETH_NAME_MAX_LEN];
/* If port is not enabled, skip. */
if (!(conf.ports.enabled & (1 << i)))
@@ -669,6 +978,51 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
err = ENODEV;
goto port_error;
}
+ snprintf(name, sizeof(name), "%s port %u",
+ mlx4_glue->get_device_name(ibv_dev), port);
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (eth_dev == NULL) {
+ ERROR("can not attach rte ethdev");
+ rte_errno = ENOMEM;
+ err = rte_errno;
+ goto error;
+ }
+ priv = eth_dev->data->dev_private;
+ if (!priv->verbs_alloc_ctx.enabled) {
+ ERROR("secondary process is not supported"
+ " due to lack of external allocator"
+ " from Verbs");
+ rte_errno = ENOTSUP;
+ err = rte_errno;
+ goto error;
+ }
+ eth_dev->device = &pci_dev->device;
+ eth_dev->dev_ops = &mlx4_dev_sec_ops;
+ /* Receive command fd from primary process. */
+ err = mlx4_mp_req_verbs_cmd_fd(eth_dev);
+ if (err < 0) {
+ err = rte_errno;
+ goto error;
+ }
+ /* Remap UAR for Tx queues. */
+ err = mlx4_tx_uar_remap(eth_dev, err);
+ if (err) {
+ err = rte_errno;
+ goto error;
+ }
+ /*
+ * Ethdev pointer is still required as input since
+ * the primary device is not accessible from the
+ * secondary process.
+ */
+ eth_dev->tx_pkt_burst = mlx4_tx_burst;
+ eth_dev->rx_pkt_burst = mlx4_rx_burst;
+ claim_zero(mlx4_glue->close_device(ctx));
+ rte_eth_copy_pci_info(eth_dev, pci_dev);
+ rte_eth_dev_probing_finish(eth_dev);
+ continue;
+ }
/* Check port status. */
err = mlx4_glue->query_port(ctx, port, &port_attr);
if (err) {
@@ -774,14 +1128,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
/* Get actual MTU if possible. */
mlx4_mtu_get(priv, &priv->mtu);
DEBUG("port %u MTU is %u", priv->port, priv->mtu);
- /* from rte_ethdev.c */
- {
- char name[RTE_ETH_NAME_MAX_LEN];
-
- snprintf(name, sizeof(name), "%s port %u",
- mlx4_glue->get_device_name(ibv_dev), port);
- eth_dev = rte_eth_dev_allocate(name);
- }
+ eth_dev = rte_eth_dev_allocate(name);
if (eth_dev == NULL) {
err = ENOMEM;
ERROR("can not allocate rte ethdev");
@@ -818,9 +1165,13 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
.free = &mlx4_free_verbs_buf,
.data = priv,
};
- mlx4_glue->dv_set_context_attr
+ err = mlx4_glue->dv_set_context_attr
(ctx, MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS,
(void *)((uintptr_t)&alctr));
+ if (err)
+ WARN("Verbs external allocator is not supported");
+ else
+ priv->verbs_alloc_ctx.enabled = 1;
#endif
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
@@ -842,9 +1193,10 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
goto port_error;
}
/* Add device to memory callback list. */
- rte_rwlock_write_lock(&mlx4_mem_event_rwlock);
- LIST_INSERT_HEAD(&mlx4_mem_event_cb_list, priv, mem_event_cb);
- rte_rwlock_write_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock);
+ LIST_INSERT_HEAD(&mlx4_shared_data->mem_event_cb_list,
+ priv, mem_event_cb);
+ rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
rte_eth_dev_probing_finish(eth_dev);
continue;
port_error:
@@ -1075,8 +1427,6 @@ RTE_INIT(rte_mlx4_pmd_init)
}
mlx4_glue->fork_init();
rte_pci_register(&mlx4_driver);
- rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
- mlx4_mr_mem_event_cb, NULL);
}
RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index d43e05ea74..1a7b1fb541 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -53,6 +53,16 @@
/** Port parameter. */
#define MLX4_PMD_PORT_KVARG "port"
+/* Reserved address space for UAR mapping. */
+#define MLX4_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
+
+/* Offset of reserved UAR address space to hugepage memory. Offset is used here
+ * to minimize possibility of address next to hugepage being used by other code
+ * in either primary or secondary process, failing to map TX UAR would make TX
+ * packets invisible to HW.
+ */
+#define MLX4_UAR_OFFSET (2ULL << (sizeof(uintptr_t) * 4))
+
enum {
PCI_VENDOR_ID_MELLANOX = 0x15b3,
};
@@ -63,6 +73,26 @@ enum {
PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO = 0x1007,
};
+/* Request types for IPC. */
+enum mlx4_mp_req_type {
+ MLX4_MP_REQ_VERBS_CMD_FD = 1,
+ MLX4_MP_REQ_START_RXTX,
+ MLX4_MP_REQ_STOP_RXTX,
+};
+
+/* Pameters for IPC. */
+struct mlx4_mp_param {
+ enum mlx4_mp_req_type type;
+ int port_id;
+ int result;
+};
+
+/** Request timeout for IPC. */
+#define MLX4_MP_REQ_TIMEOUT_SEC 5
+
+/** Key string for IPC. */
+#define MLX4_MP_NAME "net_mlx4_mp"
+
/** Driver name reported to lower layers and used in log output. */
#define MLX4_DRIVER_NAME "net_mlx4"
@@ -86,6 +116,7 @@ enum mlx4_verbs_alloc_type {
* resources it is allocating.
*/
struct mlx4_verbs_alloc_ctx {
+ int enabled;
enum mlx4_verbs_alloc_type type; /* Kind of object being allocated. */
const void *obj; /* Pointer to the DPDK object. */
};
@@ -93,6 +124,27 @@ struct mlx4_verbs_alloc_ctx {
LIST_HEAD(mlx4_dev_list, mlx4_priv);
LIST_HEAD(mlx4_mr_list, mlx4_mr);
+/* Shared data between primary and secondary processes. */
+struct mlx4_shared_data {
+ rte_spinlock_t lock;
+ /* Global spinlock for primary and secondary processes. */
+ int init_done; /* Whether primary has done initialization. */
+ unsigned int secondary_cnt; /* Number of secondary processes init'd. */
+ void *uar_base;
+ /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+ struct mlx4_dev_list mem_event_cb_list;
+ rte_rwlock_t mem_event_rwlock;
+};
+
+/* Per-process data structure, not visible to other processes. */
+struct mlx4_local_data {
+ int init_done; /* Whether a secondary has done initialization. */
+ void *uar_base;
+ /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+};
+
+extern struct mlx4_shared_data *mlx4_shared_data;
+
/** Private data structure. */
struct mlx4_priv {
LIST_ENTRY(mlx4_priv) mem_event_cb;
@@ -175,4 +227,13 @@ void mlx4_rxq_intr_disable(struct mlx4_priv *priv);
int mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx);
int mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx);
+/* mlx4_mp.c */
+void mlx4_mp_req_start_rxtx(struct rte_eth_dev *dev);
+void mlx4_mp_req_stop_rxtx(struct rte_eth_dev *dev);
+int mlx4_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev);
+void mlx4_mp_init_primary(void);
+void mlx4_mp_uninit_primary(void);
+void mlx4_mp_init_secondary(void);
+void mlx4_mp_uninit_secondary(void);
+
#endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_mp.c b/drivers/net/mlx4/mlx4_mp.c
new file mode 100644
index 0000000000..eaeb257348
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_mp.c
@@ -0,0 +1,304 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ * Copyright 2019 Mellanox Technologies, Ltd
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <rte_eal.h>
+#include <rte_ethdev_driver.h>
+#include <rte_string_fns.h>
+
+#include "mlx4.h"
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
+
+/**
+ * Initialize IPC message.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[out] msg
+ * Pointer to message to fill in.
+ * @param[in] type
+ * Message type.
+ */
+static inline void
+mp_init_msg(struct rte_eth_dev *dev, struct rte_mp_msg *msg,
+ enum mlx4_mp_req_type type)
+{
+ struct mlx4_mp_param *param = (struct mlx4_mp_param *)msg->param;
+
+ memset(msg, 0, sizeof(*msg));
+ strlcpy(msg->name, MLX4_MP_NAME, sizeof(msg->name));
+ msg->len_param = sizeof(*param);
+ param->type = type;
+ param->port_id = dev->data->port_id;
+}
+
+/**
+ * IPC message handler of primary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] peer
+ * Pointer to the peer socket path.
+ *
+ * @return
+ * 0 on success, negative errno value otherwise and rte_errno is set.
+ */
+static int
+mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_mp_msg mp_res;
+ struct mlx4_mp_param *res = (struct mlx4_mp_param *)mp_res.param;
+ const struct mlx4_mp_param *param =
+ (const struct mlx4_mp_param *)mp_msg->param;
+ struct rte_eth_dev *dev;
+ struct mlx4_priv *priv;
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ if (!rte_eth_dev_is_valid_port(param->port_id)) {
+ rte_errno = ENODEV;
+ ERROR("port %u invalid port ID", param->port_id);
+ return -rte_errno;
+ }
+ dev = &rte_eth_devices[param->port_id];
+ priv = dev->data->dev_private;
+ switch (param->type) {
+ case MLX4_MP_REQ_VERBS_CMD_FD:
+ mp_init_msg(dev, &mp_res, param->type);
+ mp_res.num_fds = 1;
+ mp_res.fds[0] = priv->ctx->cmd_fd;
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ default:
+ rte_errno = EINVAL;
+ ERROR("port %u invalid mp request type", dev->data->port_id);
+ return -rte_errno;
+ }
+ return ret;
+}
+
+/**
+ * IPC message handler of a secondary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] peer
+ * Pointer to the peer socket path.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_mp_msg mp_res;
+ struct mlx4_mp_param *res = (struct mlx4_mp_param *)mp_res.param;
+ const struct mlx4_mp_param *param =
+ (const struct mlx4_mp_param *)mp_msg->param;
+ struct rte_eth_dev *dev;
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ if (!rte_eth_dev_is_valid_port(param->port_id)) {
+ rte_errno = ENODEV;
+ ERROR("port %u invalid port ID", param->port_id);
+ return -rte_errno;
+ }
+ dev = &rte_eth_devices[param->port_id];
+ switch (param->type) {
+ case MLX4_MP_REQ_START_RXTX:
+ INFO("port %u starting datapath", dev->data->port_id);
+ rte_mb();
+ dev->tx_pkt_burst = mlx4_tx_burst;
+ dev->rx_pkt_burst = mlx4_rx_burst;
+ mp_init_msg(dev, &mp_res, param->type);
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ case MLX4_MP_REQ_STOP_RXTX:
+ INFO("port %u stopping datapath", dev->data->port_id);
+ dev->tx_pkt_burst = mlx4_tx_burst_removed;
+ dev->rx_pkt_burst = mlx4_rx_burst_removed;
+ rte_mb();
+ mp_init_msg(dev, &mp_res, param->type);
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ default:
+ rte_errno = EINVAL;
+ ERROR("port %u invalid mp request type", dev->data->port_id);
+ return -rte_errno;
+ }
+ return ret;
+}
+
+/**
+ * Broadcast request of stopping/starting data-path to secondary processes.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] type
+ * Request type.
+ */
+static void
+mp_req_on_rxtx(struct rte_eth_dev *dev, enum mlx4_mp_req_type type)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_msg *mp_res;
+ struct rte_mp_reply mp_rep;
+ struct mlx4_mp_param *res __rte_unused;
+ struct timespec ts = {.tv_sec = MLX4_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+ int ret;
+ int i;
+
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ if (!mlx4_shared_data->secondary_cnt)
+ return;
+ if (type != MLX4_MP_REQ_START_RXTX && type != MLX4_MP_REQ_STOP_RXTX) {
+ ERROR("port %u unknown request (req_type %d)",
+ dev->data->port_id, type);
+ return;
+ }
+ mp_init_msg(dev, &mp_req, type);
+ ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+ if (ret) {
+ ERROR("port %u failed to request stop/start Rx/Tx (%d)",
+ dev->data->port_id, type);
+ goto exit;
+ }
+ if (mp_rep.nb_sent != mp_rep.nb_received) {
+ ERROR("port %u not all secondaries responded (req_type %d)",
+ dev->data->port_id, type);
+ goto exit;
+ }
+ for (i = 0; i < mp_rep.nb_received; i++) {
+ mp_res = &mp_rep.msgs[i];
+ res = (struct mlx4_mp_param *)mp_res->param;
+ if (res->result) {
+ ERROR("port %u request failed on secondary #%d",
+ dev->data->port_id, i);
+ goto exit;
+ }
+ }
+exit:
+ free(mp_rep.msgs);
+}
+
+/**
+ * Broadcast request of starting data-path to secondary processes. The request
+ * is synchronous.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ */
+void
+mlx4_mp_req_start_rxtx(struct rte_eth_dev *dev)
+{
+ mp_req_on_rxtx(dev, MLX4_MP_REQ_START_RXTX);
+}
+
+/**
+ * Broadcast request of stopping data-path to secondary processes. The request
+ * is synchronous.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ */
+void
+mlx4_mp_req_stop_rxtx(struct rte_eth_dev *dev)
+{
+ mp_req_on_rxtx(dev, MLX4_MP_REQ_STOP_RXTX);
+}
+
+/**
+ * IPC message handler of primary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ *
+ * @return
+ * fd on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_msg *mp_res;
+ struct rte_mp_reply mp_rep;
+ struct mlx4_mp_param *res;
+ struct timespec ts = {.tv_sec = MLX4_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ mp_init_msg(dev, &mp_req, MLX4_MP_REQ_VERBS_CMD_FD);
+ ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+ if (ret) {
+ ERROR("port %u request to primary process failed",
+ dev->data->port_id);
+ return -rte_errno;
+ }
+ assert(mp_rep.nb_received == 1);
+ mp_res = &mp_rep.msgs[0];
+ res = (struct mlx4_mp_param *)mp_res->param;
+ if (res->result) {
+ rte_errno = -res->result;
+ ERROR("port %u failed to get command FD from primary process",
+ dev->data->port_id);
+ ret = -rte_errno;
+ goto exit;
+ }
+ assert(mp_res->num_fds == 1);
+ ret = mp_res->fds[0];
+ DEBUG("port %u command FD from primary is %d",
+ dev->data->port_id, ret);
+exit:
+ free(mp_rep.msgs);
+ return ret;
+}
+
+/**
+ * Initialize by primary process.
+ */
+void
+mlx4_mp_init_primary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ rte_mp_action_register(MLX4_MP_NAME, mp_primary_handle);
+}
+
+/**
+ * Un-initialize by primary process.
+ */
+void
+mlx4_mp_uninit_primary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ rte_mp_action_unregister(MLX4_MP_NAME);
+}
+
+/**
+ * Initialize by secondary process.
+ */
+void
+mlx4_mp_init_secondary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ rte_mp_action_register(MLX4_MP_NAME, mp_secondary_handle);
+}
+
+/**
+ * Un-initialize by secondary process.
+ */
+void
+mlx4_mp_uninit_secondary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ rte_mp_action_unregister(MLX4_MP_NAME);
+}
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
index e4be46ab2a..01894faecf 100644
--- a/drivers/net/mlx4/mlx4_mr.c
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -489,6 +489,8 @@ mlx4_mr_garbage_collect(struct rte_eth_dev *dev)
struct mlx4_mr *mr_next;
struct mlx4_mr_list free_list = LIST_HEAD_INITIALIZER(free_list);
+ /* Must be called from the primary process. */
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
/*
* MR can't be freed with holding the lock because rte_free() could call
* memory free callback function. This will be a deadlock situation.
@@ -561,6 +563,14 @@ mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
DEBUG("port %u creating a MR using address (%p)",
dev->data->port_id, (void *)addr);
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ WARN("port %u using address (%p) of unregistered mempool"
+ " in secondary process, please create mempool"
+ " before rte_eth_dev_start()",
+ dev->data->port_id, (void *)addr);
+ rte_errno = EPERM;
+ goto err_nolock;
+ }
/*
* Release detached MRs if any. This can't be called with holding either
* memory_hotplug_lock or priv->mr.rwlock. MRs on the free list have
@@ -890,14 +900,17 @@ mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
size_t len, void *arg __rte_unused)
{
struct mlx4_priv *priv;
+ struct mlx4_dev_list *dev_list = &mlx4_shared_data->mem_event_cb_list;
+ /* Must be called from the primary process. */
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
switch (event_type) {
case RTE_MEM_EVENT_FREE:
- rte_rwlock_read_lock(&mlx4_mem_event_rwlock);
+ rte_rwlock_read_lock(&mlx4_shared_data->mem_event_rwlock);
/* Iterate all the existing mlx4 devices. */
- LIST_FOREACH(priv, &mlx4_mem_event_cb_list, mem_event_cb)
+ LIST_FOREACH(priv, dev_list, mem_event_cb)
mlx4_mr_mem_event_free_cb(ETH_DEV(priv), addr, len);
- rte_rwlock_read_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_read_unlock(&mlx4_shared_data->mem_event_rwlock);
break;
case RTE_MEM_EVENT_ALLOC:
default:
@@ -1130,6 +1143,7 @@ mlx4_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque,
struct mlx4_mr_cache entry;
uint32_t lkey;
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
/* If already registered, it should return. */
rte_rwlock_read_lock(&priv->mr.rwlock);
lkey = mr_lookup_dev(dev, &entry, addr);
@@ -1225,6 +1239,14 @@ mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr, struct rte_mempool *mp)
struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
struct mlx4_priv *priv = txq->priv;
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ WARN("port %u using address (%p) from unregistered mempool"
+ " having externally allocated memory"
+ " in secondary process, please create mempool"
+ " prior to rte_eth_dev_start()",
+ PORT_ID(priv), (void *)addr);
+ return UINT32_MAX;
+ }
mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
return mlx4_tx_addr2mr_bh(txq, addr);
}
@@ -1336,9 +1358,9 @@ mlx4_mr_release(struct rte_eth_dev *dev)
struct mlx4_mr *mr_next = LIST_FIRST(&priv->mr.mr_list);
/* Remove from memory callback device list. */
- rte_rwlock_write_lock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock);
LIST_REMOVE(priv, mem_event_cb);
- rte_rwlock_write_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
#ifndef NDEBUG
mlx4_mr_dump_dev(dev);
#endif
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index aef77ba06e..b3e11dde25 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,7 +77,9 @@ struct mlx4_sq {
uint32_t owner_opcode;
/**< Default owner opcode with HW valid owner bit. */
uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
- volatile uint32_t *db; /**< Pointer to the doorbell. */
+ volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
+ volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
};
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 8c88effcd1..f22f1ba559 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1365,6 +1365,7 @@ mlx4_tx_burst_removed(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
(void)dpdk_txq;
(void)pkts;
(void)pkts_n;
+ rte_mb();
return 0;
}
@@ -1390,5 +1391,6 @@ mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
(void)dpdk_rxq;
(void)pkts;
(void)pkts_n;
+ rte_mb();
return 0;
}
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 9409602b32..7d7a8988ed 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -152,6 +152,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
/* mlx4_txq.c */
+int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 2dc198e77f..698a648c8d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -13,7 +13,9 @@
#include <stddef.h>
#include <stdint.h>
#include <string.h>
+#include <sys/mman.h>
#include <inttypes.h>
+#include <unistd.h>
/* Verbs headers do not support -pedantic. */
#ifdef PEDANTIC
@@ -38,6 +40,100 @@
#include "mlx4_utils.h"
/**
+ * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
+ * Both primary and secondary process do mmap to make UAR address
+ * aligned.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param fd
+ * Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+int
+mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+{
+ unsigned int i, j;
+ const unsigned int txqs_n = dev->data->nb_tx_queues;
+ uintptr_t pages[txqs_n];
+ unsigned int pages_n = 0;
+ uintptr_t uar_va;
+ uintptr_t off;
+ void *addr;
+ void *ret;
+ struct txq *txq;
+ int already_mapped;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+
+ memset(pages, 0, txqs_n * sizeof(uintptr_t));
+ /*
+ * As rdma-core, UARs are mapped in size of OS page size.
+ * Use aligned address to avoid duplicate mmap.
+ * Ref to libmlx4 function: mlx4_init_context()
+ */
+ for (i = 0; i != txqs_n; ++i) {
+ txq = dev->data->tx_queues[i];
+ if (!txq)
+ continue;
+ /* UAR addr form verbs used to find dup and offset in page. */
+ uar_va = (uintptr_t)txq->msq.qp_sdb;
+ off = uar_va & (page_size - 1); /* offset in page. */
+ uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
+ already_mapped = 0;
+ for (j = 0; j != pages_n; ++j) {
+ if (pages[j] == uar_va) {
+ already_mapped = 1;
+ break;
+ }
+ }
+ /* new address in reserved UAR address space. */
+ addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
+ uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
+ if (!already_mapped) {
+ pages[pages_n++] = uar_va;
+ /* fixed mmap to specified address in reserved
+ * address space.
+ */
+ ret = mmap(addr, page_size,
+ PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
+ txq->msq.uar_mmap_offset);
+ if (ret != addr) {
+ /* fixed mmap has to return same address. */
+ ERROR("port %u call to mmap failed on UAR"
+ " for txq %u",
+ dev->data->port_id, i);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ }
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
+ txq->msq.db = RTE_PTR_ADD((void *)addr, off);
+ else
+ assert(txq->msq.db ==
+ RTE_PTR_ADD((void *)addr, off));
+ }
+ return 0;
+}
+#else
+int
+mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+{
+ /*
+ * Even if rdma-core doesn't support UAR remap, primary process
+ * shouldn't be interrupted.
+ */
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ return 0;
+ ERROR("UAR remap is not supported");
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+}
+#endif
+
+/**
* Free Tx queue elements.
*
* @param txq
@@ -89,7 +185,13 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
sq->owner_opcode = MLX4_OPCODE_SEND | (0u << MLX4_SQ_OWNER_BIT);
sq->stamp = rte_cpu_to_be_32(MLX4_SQ_STAMP_VAL |
(0u << MLX4_SQ_OWNER_BIT));
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ sq->uar_mmap_offset = dqp->uar_mmap_offset;
+ sq->qp_sdb = dqp->sdb;
+#else
+ sq->uar_mmap_offset = -1; /* Make mmap() fail. */
sq->db = dqp->sdb;
+#endif
sq->doorbell_qpn = dqp->doorbell_qpn;
cq->buf = dcq->buf.buf;
cq->cqe_cnt = dcq->cqe_cnt;
@@ -307,6 +409,11 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
goto error;
}
/* Retrieve device queue information. */
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ dv_qp = (struct mlx4dv_qp){
+ .comp_mask = MLX4DV_QP_MASK_UAR_MMAP_OFFSET,
+ };
+#endif
mlxdv.cq.in = txq->cq;
mlxdv.cq.out = &dv_cq;
mlxdv.qp.in = txq->qp;
@@ -318,6 +425,12 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
" accessing the device queues", (void *)dev);
goto error;
}
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ if (!(dv_qp.comp_mask & MLX4DV_QP_MASK_UAR_MMAP_OFFSET)) {
+ WARN("%p: failed to obtain UAR mmap offset", (void *)dev);
+ dv_qp.uar_mmap_offset = -1; /* Make mmap() fail. */
+ }
+#endif
mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
/* Save first wqe pointer in the first element. */
(&(*txq->elts)[0])->wqe =
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [dpdk-dev] [PATCH v3 3/3] net/mlx4: add secondary process support
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 3/3] net/mlx4: add secondary process support Yongseok Koh
@ 2019-04-01 21:15 ` Yongseok Koh
0 siblings, 0 replies; 30+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:15 UTC (permalink / raw)
To: shahafs; +Cc: dev
In order to support secondary process, a few features are required.
a) rdma-core library should allocate device resources using DPDK's memory
allocator.
b) UAR should be remapped for secondary processes. Currently, in order not
to use different data structure for secondary processes, PMD tries to
reserve identical virtual address space for both primary and secondary
processes.
c) IPC channel is necessary, which can be easily set with rte_mp APIs.
Through the channel, Verbs command FD is delivered to the secondary
process and the device stop/start event is also broadcast from primary
process.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
doc/guides/nics/features/mlx4.ini | 1 +
doc/guides/nics/mlx4.rst | 10 +
drivers/net/mlx4/Makefile | 6 +
drivers/net/mlx4/meson.build | 3 +
drivers/net/mlx4/mlx4.c | 384 ++++++++++++++++++++++++++++++++++++--
drivers/net/mlx4/mlx4.h | 61 ++++++
drivers/net/mlx4/mlx4_mp.c | 304 ++++++++++++++++++++++++++++++
drivers/net/mlx4/mlx4_mr.c | 32 +++-
drivers/net/mlx4/mlx4_prm.h | 4 +-
drivers/net/mlx4/mlx4_rxtx.c | 2 +
drivers/net/mlx4/mlx4_rxtx.h | 1 +
drivers/net/mlx4/mlx4_txq.c | 113 +++++++++++
12 files changed, 898 insertions(+), 23 deletions(-)
create mode 100644 drivers/net/mlx4/mlx4_mp.c
diff --git a/doc/guides/nics/features/mlx4.ini b/doc/guides/nics/features/mlx4.ini
index a211aef332..4502aa2a87 100644
--- a/doc/guides/nics/features/mlx4.ini
+++ b/doc/guides/nics/features/mlx4.ini
@@ -29,6 +29,7 @@ Packet type parsing = Y
Basic stats = Y
Stats per queue = Y
FW version = Y
+Multiprocess aware = Y
Other kdrv = Y
Power8 = Y
x86-32 = Y
diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst
index 4ad361a2c2..cd34838f41 100644
--- a/doc/guides/nics/mlx4.rst
+++ b/doc/guides/nics/mlx4.rst
@@ -145,6 +145,16 @@ below.
Limitations
-----------
+- For secondary process:
+
+ - Forked secondary process not supported.
+ - All mempools must be initialized before rte_eth_dev_start().
+ - External memory unregistered in EAL memseg list cannot be used for DMA
+ unless such memory has been registered by ``mlx4_mr_update_ext_mp()`` in
+ primary process and remapped to the same virtual address in secondary
+ process. If the external memory is registered by primary process but has
+ different virtual address in secondary process, unexpected error may happen.
+
- CRC stripping is supported by default and always reported as "true".
The ability to enable/disable CRC stripping requires OFED version
4.3-1.5.0.0 and above or rdma-core version v18 and above.
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index b527efd625..8126b0dfc6 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -18,6 +18,7 @@ ifneq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_glue.c
endif
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_intr.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mp.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mr.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxq.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxtx.c
@@ -93,6 +94,11 @@ mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
enum MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
+ HAVE_IBV_MLX4_UAR_MMAP_OFFSET \
+ infiniband/mlx4dv.h \
+ enum MLX4DV_QP_MASK_UAR_MMAP_OFFSET \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_IBV_MLX4_WQE_LSO_SEG \
infiniband/mlx4dv.h \
type 'struct mlx4_wqe_lso_seg' \
diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
index 650e2c8fbc..de020701d1 100644
--- a/drivers/net/mlx4/meson.build
+++ b/drivers/net/mlx4/meson.build
@@ -33,6 +33,7 @@ if build
'mlx4_ethdev.c',
'mlx4_flow.c',
'mlx4_intr.c',
+ 'mlx4_mp.c',
'mlx4_mr.c',
'mlx4_rxq.c',
'mlx4_rxtx.c',
@@ -76,6 +77,8 @@ if build
has_sym_args = [
[ 'HAVE_IBV_MLX4_BUF_ALLOCATORS', 'infiniband/mlx4dv.h',
'MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS' ],
+ [ 'HAVE_IBV_MLX4_UAR_MMAP_OFFSET', 'infiniband/mlx4dv.h',
+ 'MLX4DV_QP_MASK_UAR_MMAP_OFFSET' ],
]
config = configuration_data()
foreach arg:has_sym_args
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 0e0b035df0..315640a6d7 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -17,6 +17,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/mman.h>
#include <unistd.h>
/* Verbs headers do not support -pedantic. */
@@ -48,10 +49,16 @@
#include "mlx4_rxtx.h"
#include "mlx4_utils.h"
-struct mlx4_dev_list mlx4_mem_event_cb_list =
- LIST_HEAD_INITIALIZER(mlx4_mem_event_cb_list);
+static const char *MZ_MLX4_PMD_SHARED_DATA = "mlx4_pmd_shared_data";
-rte_rwlock_t mlx4_mem_event_rwlock = RTE_RWLOCK_INITIALIZER;
+/* Shared memory between primary and secondary processes. */
+struct mlx4_shared_data *mlx4_shared_data;
+
+/* Spinlock for mlx4_shared_data allocation. */
+static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* Process local data for secondary processes. */
+static struct mlx4_local_data mlx4_local_data;
/** Configuration structure for device arguments. */
struct mlx4_conf {
@@ -69,6 +76,77 @@ const char *pmd_mlx4_init_params[] = {
static void mlx4_dev_stop(struct rte_eth_dev *dev);
+/**
+ * Initialize shared data between primary and secondary process.
+ *
+ * A memzone is reserved by primary process and secondary processes attach to
+ * the memzone.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_init_shared_data(void)
+{
+ const struct rte_memzone *mz;
+ int ret = 0;
+
+ rte_spinlock_lock(&mlx4_shared_data_lock);
+ if (mlx4_shared_data == NULL) {
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* Allocate shared memory. */
+ mz = rte_memzone_reserve(MZ_MLX4_PMD_SHARED_DATA,
+ sizeof(*mlx4_shared_data),
+ SOCKET_ID_ANY, 0);
+ if (mz == NULL) {
+ ERROR("Cannot allocate mlx4 shared data\n");
+ ret = -rte_errno;
+ goto error;
+ }
+ mlx4_shared_data = mz->addr;
+ memset(mlx4_shared_data, 0, sizeof(*mlx4_shared_data));
+ rte_spinlock_init(&mlx4_shared_data->lock);
+ } else {
+ /* Lookup allocated shared memory. */
+ mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
+ if (mz == NULL) {
+ ERROR("Cannot attach mlx4 shared data\n");
+ ret = -rte_errno;
+ goto error;
+ }
+ mlx4_shared_data = mz->addr;
+ memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
+ }
+ }
+error:
+ rte_spinlock_unlock(&mlx4_shared_data_lock);
+ return ret;
+}
+
+/**
+ * Uninitialize shared data between primary and secondary process.
+ *
+ * The pointer of secondary process is dereferenced and primary process frees
+ * the memzone.
+ */
+static void
+mlx4_uninit_shared_data(void)
+{
+ const struct rte_memzone *mz;
+
+ rte_spinlock_lock(&mlx4_shared_data_lock);
+ if (mlx4_shared_data) {
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
+ rte_memzone_free(mz);
+ } else {
+ memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
+ }
+ mlx4_shared_data = NULL;
+ }
+ rte_spinlock_unlock(&mlx4_shared_data_lock);
+}
+
#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
/**
* Verbs callback to allocate a memory. This function should allocate the space
@@ -181,6 +259,11 @@ mlx4_dev_start(struct rte_eth_dev *dev)
return 0;
DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
priv->started = 1;
+ ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
+ if (ret) {
+ ERROR("%p: cannot remap UAR", (void *)dev);
+ goto err;
+ }
ret = mlx4_rss_init(priv);
if (ret) {
ERROR("%p: cannot initialize RSS resources: %s",
@@ -208,6 +291,8 @@ mlx4_dev_start(struct rte_eth_dev *dev)
rte_wmb();
dev->tx_pkt_burst = mlx4_tx_burst;
dev->rx_pkt_burst = mlx4_rx_burst;
+ /* Enable datapath on secondary process. */
+ mlx4_mp_req_start_rxtx(dev);
return 0;
err:
mlx4_dev_stop(dev);
@@ -226,6 +311,8 @@ static void
mlx4_dev_stop(struct rte_eth_dev *dev)
{
struct mlx4_priv *priv = dev->data->dev_private;
+ const size_t page_size = sysconf(_SC_PAGESIZE);
+ int i;
if (!priv->started)
return;
@@ -234,9 +321,20 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
dev->tx_pkt_burst = mlx4_tx_burst_removed;
dev->rx_pkt_burst = mlx4_rx_burst_removed;
rte_wmb();
+ /* Disable datapath on secondary process. */
+ mlx4_mp_req_stop_rxtx(dev);
mlx4_flow_sync(priv, NULL);
mlx4_rxq_intr_disable(priv);
mlx4_rss_deinit(priv);
+ for (i = 0; i != dev->data->nb_tx_queues; ++i) {
+ struct txq *txq;
+
+ txq = dev->data->tx_queues[i];
+ if (!txq)
+ continue;
+ munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
+ page_size), page_size);
+ }
}
/**
@@ -259,6 +357,8 @@ mlx4_dev_close(struct rte_eth_dev *dev)
dev->rx_pkt_burst = mlx4_rx_burst_removed;
dev->tx_pkt_burst = mlx4_tx_burst_removed;
rte_wmb();
+ /* Disable datapath on secondary process. */
+ mlx4_mp_req_stop_rxtx(dev);
mlx4_flow_clean(priv);
mlx4_rss_deinit(priv);
for (i = 0; i != dev->data->nb_rx_queues; ++i)
@@ -310,6 +410,14 @@ static const struct eth_dev_ops mlx4_dev_ops = {
.is_removed = mlx4_is_removed,
};
+/* Available operations from secondary process. */
+static const struct eth_dev_ops mlx4_dev_sec_ops = {
+ .stats_get = mlx4_stats_get,
+ .stats_reset = mlx4_stats_reset,
+ .fw_version_get = mlx4_fw_version_get,
+ .dev_infos_get = mlx4_dev_infos_get,
+};
+
/**
* Get PCI information from struct ibv_device.
*
@@ -549,6 +657,200 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
static struct rte_pci_driver mlx4_driver;
+static int
+find_lower_va_bound(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg)
+{
+ void **addr = arg;
+
+ if (msl->external)
+ return 0;
+ if (*addr == NULL)
+ *addr = ms->addr;
+ else
+ *addr = RTE_MIN(*addr, ms->addr);
+
+ return 0;
+}
+
+/**
+ * Reserve UAR address space for primary process.
+ *
+ * Process local resource is used by both primary and secondary to avoid
+ * duplicate reservation. The space has to be available on both primary and
+ * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
+ * check.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_uar_init_primary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+ void *addr = (void *)0;
+
+ if (sd->uar_base)
+ return 0;
+ /* find out lower bound of hugepage segments */
+ rte_memseg_walk(find_lower_va_bound, &addr);
+ /* keep distance to hugepages to minimize potential conflicts. */
+ addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
+ /* anonymous mmap, no real memory consumption. */
+ addr = mmap(addr, MLX4_UAR_SIZE,
+ PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED) {
+ ERROR("failed to reserve UAR address space, please"
+ " adjust MLX4_UAR_SIZE or try --base-virtaddr");
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ /* Accept either same addr or a new addr returned from mmap if target
+ * range occupied.
+ */
+ INFO("reserved UAR address space: %p", addr);
+ sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
+ return 0;
+}
+
+/**
+ * Unmap UAR address space reserved for primary process.
+ */
+static void
+mlx4_uar_uninit_primary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+
+ if (!sd->uar_base)
+ return;
+ munmap(sd->uar_base, MLX4_UAR_SIZE);
+ sd->uar_base = NULL;
+}
+
+/**
+ * Reserve UAR address space for secondary process, align with primary process.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_uar_init_secondary(void)
+{
+ struct mlx4_shared_data *sd = mlx4_shared_data;
+ struct mlx4_local_data *ld = &mlx4_local_data;
+ void *addr;
+
+ if (ld->uar_base) { /* Already reserved. */
+ assert(sd->uar_base == ld->uar_base);
+ return 0;
+ }
+ assert(sd->uar_base);
+ /* anonymous mmap, no real memory consumption. */
+ addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
+ PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED) {
+ ERROR("UAR mmap failed: %p size: %llu",
+ sd->uar_base, MLX4_UAR_SIZE);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ if (sd->uar_base != addr) {
+ ERROR("UAR address %p size %llu occupied, please"
+ " adjust MLX4_UAR_OFFSET or try EAL parameter"
+ " --base-virtaddr",
+ sd->uar_base, MLX4_UAR_SIZE);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ ld->uar_base = addr;
+ INFO("reserved UAR address space: %p", addr);
+ return 0;
+}
+
+/**
+ * Unmap UAR address space reserved for secondary process.
+ */
+static void
+mlx4_uar_uninit_secondary(void)
+{
+ struct mlx4_local_data *ld = &mlx4_local_data;
+
+ if (!ld->uar_base)
+ return;
+ munmap(ld->uar_base, MLX4_UAR_SIZE);
+ ld->uar_base = NULL;
+}
+
+/**
+ * PMD global initialization.
+ *
+ * Independent from individual device, this function initializes global
+ * per-PMD data structures distinguishing primary and secondary processes.
+ * Hence, each initialization is called once per a process.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_init_once(void)
+{
+ struct mlx4_shared_data *sd;
+ struct mlx4_local_data *ld = &mlx4_local_data;
+ int ret;
+
+ if (mlx4_init_shared_data())
+ return -rte_errno;
+ sd = mlx4_shared_data;
+ assert(sd);
+ rte_spinlock_lock(&sd->lock);
+ switch (rte_eal_process_type()) {
+ case RTE_PROC_PRIMARY:
+ if (sd->init_done)
+ break;
+ LIST_INIT(&sd->mem_event_cb_list);
+ rte_rwlock_init(&sd->mem_event_rwlock);
+ rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
+ mlx4_mr_mem_event_cb, NULL);
+ mlx4_mp_init_primary();
+ ret = mlx4_uar_init_primary();
+ if (ret)
+ goto error;
+ sd->init_done = true;
+ break;
+ case RTE_PROC_SECONDARY:
+ if (ld->init_done)
+ break;
+ mlx4_mp_init_secondary();
+ ret = mlx4_uar_init_secondary();
+ if (ret)
+ goto error;
+ ++sd->secondary_cnt;
+ ld->init_done = true;
+ break;
+ default:
+ break;
+ }
+ rte_spinlock_unlock(&sd->lock);
+ return 0;
+error:
+ switch (rte_eal_process_type()) {
+ case RTE_PROC_PRIMARY:
+ mlx4_uar_uninit_primary();
+ mlx4_mp_uninit_primary();
+ rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
+ break;
+ case RTE_PROC_SECONDARY:
+ mlx4_uar_uninit_secondary();
+ mlx4_mp_uninit_secondary();
+ break;
+ default:
+ break;
+ }
+ rte_spinlock_unlock(&sd->lock);
+ mlx4_uninit_shared_data();
+ return -rte_errno;
+}
+
/**
* DPDK callback to register a PCI device.
*
@@ -579,6 +881,12 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
int i;
(void)pci_drv;
+ err = mlx4_init_once();
+ if (err) {
+ ERROR("unable to init PMD global data: %s",
+ strerror(rte_errno));
+ return -rte_errno;
+ }
assert(pci_drv == &mlx4_driver);
list = mlx4_glue->get_device_list(&i);
if (list == NULL) {
@@ -659,6 +967,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
struct mlx4_priv *priv = NULL;
struct rte_eth_dev *eth_dev = NULL;
struct ether_addr mac;
+ char name[RTE_ETH_NAME_MAX_LEN];
/* If port is not enabled, skip. */
if (!(conf.ports.enabled & (1 << i)))
@@ -669,6 +978,51 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
err = ENODEV;
goto port_error;
}
+ snprintf(name, sizeof(name), "%s port %u",
+ mlx4_glue->get_device_name(ibv_dev), port);
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (eth_dev == NULL) {
+ ERROR("can not attach rte ethdev");
+ rte_errno = ENOMEM;
+ err = rte_errno;
+ goto error;
+ }
+ priv = eth_dev->data->dev_private;
+ if (!priv->verbs_alloc_ctx.enabled) {
+ ERROR("secondary process is not supported"
+ " due to lack of external allocator"
+ " from Verbs");
+ rte_errno = ENOTSUP;
+ err = rte_errno;
+ goto error;
+ }
+ eth_dev->device = &pci_dev->device;
+ eth_dev->dev_ops = &mlx4_dev_sec_ops;
+ /* Receive command fd from primary process. */
+ err = mlx4_mp_req_verbs_cmd_fd(eth_dev);
+ if (err < 0) {
+ err = rte_errno;
+ goto error;
+ }
+ /* Remap UAR for Tx queues. */
+ err = mlx4_tx_uar_remap(eth_dev, err);
+ if (err) {
+ err = rte_errno;
+ goto error;
+ }
+ /*
+ * Ethdev pointer is still required as input since
+ * the primary device is not accessible from the
+ * secondary process.
+ */
+ eth_dev->tx_pkt_burst = mlx4_tx_burst;
+ eth_dev->rx_pkt_burst = mlx4_rx_burst;
+ claim_zero(mlx4_glue->close_device(ctx));
+ rte_eth_copy_pci_info(eth_dev, pci_dev);
+ rte_eth_dev_probing_finish(eth_dev);
+ continue;
+ }
/* Check port status. */
err = mlx4_glue->query_port(ctx, port, &port_attr);
if (err) {
@@ -774,14 +1128,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
/* Get actual MTU if possible. */
mlx4_mtu_get(priv, &priv->mtu);
DEBUG("port %u MTU is %u", priv->port, priv->mtu);
- /* from rte_ethdev.c */
- {
- char name[RTE_ETH_NAME_MAX_LEN];
-
- snprintf(name, sizeof(name), "%s port %u",
- mlx4_glue->get_device_name(ibv_dev), port);
- eth_dev = rte_eth_dev_allocate(name);
- }
+ eth_dev = rte_eth_dev_allocate(name);
if (eth_dev == NULL) {
err = ENOMEM;
ERROR("can not allocate rte ethdev");
@@ -818,9 +1165,13 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
.free = &mlx4_free_verbs_buf,
.data = priv,
};
- mlx4_glue->dv_set_context_attr
+ err = mlx4_glue->dv_set_context_attr
(ctx, MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS,
(void *)((uintptr_t)&alctr));
+ if (err)
+ WARN("Verbs external allocator is not supported");
+ else
+ priv->verbs_alloc_ctx.enabled = 1;
#endif
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
@@ -842,9 +1193,10 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
goto port_error;
}
/* Add device to memory callback list. */
- rte_rwlock_write_lock(&mlx4_mem_event_rwlock);
- LIST_INSERT_HEAD(&mlx4_mem_event_cb_list, priv, mem_event_cb);
- rte_rwlock_write_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock);
+ LIST_INSERT_HEAD(&mlx4_shared_data->mem_event_cb_list,
+ priv, mem_event_cb);
+ rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
rte_eth_dev_probing_finish(eth_dev);
continue;
port_error:
@@ -1075,8 +1427,6 @@ RTE_INIT(rte_mlx4_pmd_init)
}
mlx4_glue->fork_init();
rte_pci_register(&mlx4_driver);
- rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
- mlx4_mr_mem_event_cb, NULL);
}
RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index d43e05ea74..1a7b1fb541 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -53,6 +53,16 @@
/** Port parameter. */
#define MLX4_PMD_PORT_KVARG "port"
+/* Reserved address space for UAR mapping. */
+#define MLX4_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
+
+/* Offset of reserved UAR address space to hugepage memory. Offset is used here
+ * to minimize possibility of address next to hugepage being used by other code
+ * in either primary or secondary process, failing to map TX UAR would make TX
+ * packets invisible to HW.
+ */
+#define MLX4_UAR_OFFSET (2ULL << (sizeof(uintptr_t) * 4))
+
enum {
PCI_VENDOR_ID_MELLANOX = 0x15b3,
};
@@ -63,6 +73,26 @@ enum {
PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO = 0x1007,
};
+/* Request types for IPC. */
+enum mlx4_mp_req_type {
+ MLX4_MP_REQ_VERBS_CMD_FD = 1,
+ MLX4_MP_REQ_START_RXTX,
+ MLX4_MP_REQ_STOP_RXTX,
+};
+
+/* Pameters for IPC. */
+struct mlx4_mp_param {
+ enum mlx4_mp_req_type type;
+ int port_id;
+ int result;
+};
+
+/** Request timeout for IPC. */
+#define MLX4_MP_REQ_TIMEOUT_SEC 5
+
+/** Key string for IPC. */
+#define MLX4_MP_NAME "net_mlx4_mp"
+
/** Driver name reported to lower layers and used in log output. */
#define MLX4_DRIVER_NAME "net_mlx4"
@@ -86,6 +116,7 @@ enum mlx4_verbs_alloc_type {
* resources it is allocating.
*/
struct mlx4_verbs_alloc_ctx {
+ int enabled;
enum mlx4_verbs_alloc_type type; /* Kind of object being allocated. */
const void *obj; /* Pointer to the DPDK object. */
};
@@ -93,6 +124,27 @@ struct mlx4_verbs_alloc_ctx {
LIST_HEAD(mlx4_dev_list, mlx4_priv);
LIST_HEAD(mlx4_mr_list, mlx4_mr);
+/* Shared data between primary and secondary processes. */
+struct mlx4_shared_data {
+ rte_spinlock_t lock;
+ /* Global spinlock for primary and secondary processes. */
+ int init_done; /* Whether primary has done initialization. */
+ unsigned int secondary_cnt; /* Number of secondary processes init'd. */
+ void *uar_base;
+ /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+ struct mlx4_dev_list mem_event_cb_list;
+ rte_rwlock_t mem_event_rwlock;
+};
+
+/* Per-process data structure, not visible to other processes. */
+struct mlx4_local_data {
+ int init_done; /* Whether a secondary has done initialization. */
+ void *uar_base;
+ /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+};
+
+extern struct mlx4_shared_data *mlx4_shared_data;
+
/** Private data structure. */
struct mlx4_priv {
LIST_ENTRY(mlx4_priv) mem_event_cb;
@@ -175,4 +227,13 @@ void mlx4_rxq_intr_disable(struct mlx4_priv *priv);
int mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx);
int mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx);
+/* mlx4_mp.c */
+void mlx4_mp_req_start_rxtx(struct rte_eth_dev *dev);
+void mlx4_mp_req_stop_rxtx(struct rte_eth_dev *dev);
+int mlx4_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev);
+void mlx4_mp_init_primary(void);
+void mlx4_mp_uninit_primary(void);
+void mlx4_mp_init_secondary(void);
+void mlx4_mp_uninit_secondary(void);
+
#endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_mp.c b/drivers/net/mlx4/mlx4_mp.c
new file mode 100644
index 0000000000..eaeb257348
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_mp.c
@@ -0,0 +1,304 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 6WIND S.A.
+ * Copyright 2019 Mellanox Technologies, Ltd
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <rte_eal.h>
+#include <rte_ethdev_driver.h>
+#include <rte_string_fns.h>
+
+#include "mlx4.h"
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
+
+/**
+ * Initialize IPC message.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[out] msg
+ * Pointer to message to fill in.
+ * @param[in] type
+ * Message type.
+ */
+static inline void
+mp_init_msg(struct rte_eth_dev *dev, struct rte_mp_msg *msg,
+ enum mlx4_mp_req_type type)
+{
+ struct mlx4_mp_param *param = (struct mlx4_mp_param *)msg->param;
+
+ memset(msg, 0, sizeof(*msg));
+ strlcpy(msg->name, MLX4_MP_NAME, sizeof(msg->name));
+ msg->len_param = sizeof(*param);
+ param->type = type;
+ param->port_id = dev->data->port_id;
+}
+
+/**
+ * IPC message handler of primary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] peer
+ * Pointer to the peer socket path.
+ *
+ * @return
+ * 0 on success, negative errno value otherwise and rte_errno is set.
+ */
+static int
+mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_mp_msg mp_res;
+ struct mlx4_mp_param *res = (struct mlx4_mp_param *)mp_res.param;
+ const struct mlx4_mp_param *param =
+ (const struct mlx4_mp_param *)mp_msg->param;
+ struct rte_eth_dev *dev;
+ struct mlx4_priv *priv;
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ if (!rte_eth_dev_is_valid_port(param->port_id)) {
+ rte_errno = ENODEV;
+ ERROR("port %u invalid port ID", param->port_id);
+ return -rte_errno;
+ }
+ dev = &rte_eth_devices[param->port_id];
+ priv = dev->data->dev_private;
+ switch (param->type) {
+ case MLX4_MP_REQ_VERBS_CMD_FD:
+ mp_init_msg(dev, &mp_res, param->type);
+ mp_res.num_fds = 1;
+ mp_res.fds[0] = priv->ctx->cmd_fd;
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ default:
+ rte_errno = EINVAL;
+ ERROR("port %u invalid mp request type", dev->data->port_id);
+ return -rte_errno;
+ }
+ return ret;
+}
+
+/**
+ * IPC message handler of a secondary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] peer
+ * Pointer to the peer socket path.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_mp_msg mp_res;
+ struct mlx4_mp_param *res = (struct mlx4_mp_param *)mp_res.param;
+ const struct mlx4_mp_param *param =
+ (const struct mlx4_mp_param *)mp_msg->param;
+ struct rte_eth_dev *dev;
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ if (!rte_eth_dev_is_valid_port(param->port_id)) {
+ rte_errno = ENODEV;
+ ERROR("port %u invalid port ID", param->port_id);
+ return -rte_errno;
+ }
+ dev = &rte_eth_devices[param->port_id];
+ switch (param->type) {
+ case MLX4_MP_REQ_START_RXTX:
+ INFO("port %u starting datapath", dev->data->port_id);
+ rte_mb();
+ dev->tx_pkt_burst = mlx4_tx_burst;
+ dev->rx_pkt_burst = mlx4_rx_burst;
+ mp_init_msg(dev, &mp_res, param->type);
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ case MLX4_MP_REQ_STOP_RXTX:
+ INFO("port %u stopping datapath", dev->data->port_id);
+ dev->tx_pkt_burst = mlx4_tx_burst_removed;
+ dev->rx_pkt_burst = mlx4_rx_burst_removed;
+ rte_mb();
+ mp_init_msg(dev, &mp_res, param->type);
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+ default:
+ rte_errno = EINVAL;
+ ERROR("port %u invalid mp request type", dev->data->port_id);
+ return -rte_errno;
+ }
+ return ret;
+}
+
+/**
+ * Broadcast request of stopping/starting data-path to secondary processes.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ * @param[in] type
+ * Request type.
+ */
+static void
+mp_req_on_rxtx(struct rte_eth_dev *dev, enum mlx4_mp_req_type type)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_msg *mp_res;
+ struct rte_mp_reply mp_rep;
+ struct mlx4_mp_param *res __rte_unused;
+ struct timespec ts = {.tv_sec = MLX4_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+ int ret;
+ int i;
+
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ if (!mlx4_shared_data->secondary_cnt)
+ return;
+ if (type != MLX4_MP_REQ_START_RXTX && type != MLX4_MP_REQ_STOP_RXTX) {
+ ERROR("port %u unknown request (req_type %d)",
+ dev->data->port_id, type);
+ return;
+ }
+ mp_init_msg(dev, &mp_req, type);
+ ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+ if (ret) {
+ ERROR("port %u failed to request stop/start Rx/Tx (%d)",
+ dev->data->port_id, type);
+ goto exit;
+ }
+ if (mp_rep.nb_sent != mp_rep.nb_received) {
+ ERROR("port %u not all secondaries responded (req_type %d)",
+ dev->data->port_id, type);
+ goto exit;
+ }
+ for (i = 0; i < mp_rep.nb_received; i++) {
+ mp_res = &mp_rep.msgs[i];
+ res = (struct mlx4_mp_param *)mp_res->param;
+ if (res->result) {
+ ERROR("port %u request failed on secondary #%d",
+ dev->data->port_id, i);
+ goto exit;
+ }
+ }
+exit:
+ free(mp_rep.msgs);
+}
+
+/**
+ * Broadcast request of starting data-path to secondary processes. The request
+ * is synchronous.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ */
+void
+mlx4_mp_req_start_rxtx(struct rte_eth_dev *dev)
+{
+ mp_req_on_rxtx(dev, MLX4_MP_REQ_START_RXTX);
+}
+
+/**
+ * Broadcast request of stopping data-path to secondary processes. The request
+ * is synchronous.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ */
+void
+mlx4_mp_req_stop_rxtx(struct rte_eth_dev *dev)
+{
+ mp_req_on_rxtx(dev, MLX4_MP_REQ_STOP_RXTX);
+}
+
+/**
+ * IPC message handler of primary process.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet structure.
+ *
+ * @return
+ * fd on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_msg *mp_res;
+ struct rte_mp_reply mp_rep;
+ struct mlx4_mp_param *res;
+ struct timespec ts = {.tv_sec = MLX4_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+ int ret;
+
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ mp_init_msg(dev, &mp_req, MLX4_MP_REQ_VERBS_CMD_FD);
+ ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+ if (ret) {
+ ERROR("port %u request to primary process failed",
+ dev->data->port_id);
+ return -rte_errno;
+ }
+ assert(mp_rep.nb_received == 1);
+ mp_res = &mp_rep.msgs[0];
+ res = (struct mlx4_mp_param *)mp_res->param;
+ if (res->result) {
+ rte_errno = -res->result;
+ ERROR("port %u failed to get command FD from primary process",
+ dev->data->port_id);
+ ret = -rte_errno;
+ goto exit;
+ }
+ assert(mp_res->num_fds == 1);
+ ret = mp_res->fds[0];
+ DEBUG("port %u command FD from primary is %d",
+ dev->data->port_id, ret);
+exit:
+ free(mp_rep.msgs);
+ return ret;
+}
+
+/**
+ * Initialize by primary process.
+ */
+void
+mlx4_mp_init_primary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ rte_mp_action_register(MLX4_MP_NAME, mp_primary_handle);
+}
+
+/**
+ * Un-initialize by primary process.
+ */
+void
+mlx4_mp_uninit_primary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ rte_mp_action_unregister(MLX4_MP_NAME);
+}
+
+/**
+ * Initialize by secondary process.
+ */
+void
+mlx4_mp_init_secondary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ rte_mp_action_register(MLX4_MP_NAME, mp_secondary_handle);
+}
+
+/**
+ * Un-initialize by secondary process.
+ */
+void
+mlx4_mp_uninit_secondary(void)
+{
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+ rte_mp_action_unregister(MLX4_MP_NAME);
+}
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
index e4be46ab2a..01894faecf 100644
--- a/drivers/net/mlx4/mlx4_mr.c
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -489,6 +489,8 @@ mlx4_mr_garbage_collect(struct rte_eth_dev *dev)
struct mlx4_mr *mr_next;
struct mlx4_mr_list free_list = LIST_HEAD_INITIALIZER(free_list);
+ /* Must be called from the primary process. */
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
/*
* MR can't be freed with holding the lock because rte_free() could call
* memory free callback function. This will be a deadlock situation.
@@ -561,6 +563,14 @@ mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
DEBUG("port %u creating a MR using address (%p)",
dev->data->port_id, (void *)addr);
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ WARN("port %u using address (%p) of unregistered mempool"
+ " in secondary process, please create mempool"
+ " before rte_eth_dev_start()",
+ dev->data->port_id, (void *)addr);
+ rte_errno = EPERM;
+ goto err_nolock;
+ }
/*
* Release detached MRs if any. This can't be called with holding either
* memory_hotplug_lock or priv->mr.rwlock. MRs on the free list have
@@ -890,14 +900,17 @@ mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
size_t len, void *arg __rte_unused)
{
struct mlx4_priv *priv;
+ struct mlx4_dev_list *dev_list = &mlx4_shared_data->mem_event_cb_list;
+ /* Must be called from the primary process. */
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
switch (event_type) {
case RTE_MEM_EVENT_FREE:
- rte_rwlock_read_lock(&mlx4_mem_event_rwlock);
+ rte_rwlock_read_lock(&mlx4_shared_data->mem_event_rwlock);
/* Iterate all the existing mlx4 devices. */
- LIST_FOREACH(priv, &mlx4_mem_event_cb_list, mem_event_cb)
+ LIST_FOREACH(priv, dev_list, mem_event_cb)
mlx4_mr_mem_event_free_cb(ETH_DEV(priv), addr, len);
- rte_rwlock_read_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_read_unlock(&mlx4_shared_data->mem_event_rwlock);
break;
case RTE_MEM_EVENT_ALLOC:
default:
@@ -1130,6 +1143,7 @@ mlx4_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque,
struct mlx4_mr_cache entry;
uint32_t lkey;
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
/* If already registered, it should return. */
rte_rwlock_read_lock(&priv->mr.rwlock);
lkey = mr_lookup_dev(dev, &entry, addr);
@@ -1225,6 +1239,14 @@ mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr, struct rte_mempool *mp)
struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
struct mlx4_priv *priv = txq->priv;
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ WARN("port %u using address (%p) from unregistered mempool"
+ " having externally allocated memory"
+ " in secondary process, please create mempool"
+ " prior to rte_eth_dev_start()",
+ PORT_ID(priv), (void *)addr);
+ return UINT32_MAX;
+ }
mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
return mlx4_tx_addr2mr_bh(txq, addr);
}
@@ -1336,9 +1358,9 @@ mlx4_mr_release(struct rte_eth_dev *dev)
struct mlx4_mr *mr_next = LIST_FIRST(&priv->mr.mr_list);
/* Remove from memory callback device list. */
- rte_rwlock_write_lock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock);
LIST_REMOVE(priv, mem_event_cb);
- rte_rwlock_write_unlock(&mlx4_mem_event_rwlock);
+ rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
#ifndef NDEBUG
mlx4_mr_dump_dev(dev);
#endif
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index aef77ba06e..b3e11dde25 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,7 +77,9 @@ struct mlx4_sq {
uint32_t owner_opcode;
/**< Default owner opcode with HW valid owner bit. */
uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
- volatile uint32_t *db; /**< Pointer to the doorbell. */
+ volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
+ volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
};
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 8c88effcd1..f22f1ba559 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1365,6 +1365,7 @@ mlx4_tx_burst_removed(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
(void)dpdk_txq;
(void)pkts;
(void)pkts_n;
+ rte_mb();
return 0;
}
@@ -1390,5 +1391,6 @@ mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
(void)dpdk_rxq;
(void)pkts;
(void)pkts_n;
+ rte_mb();
return 0;
}
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 9409602b32..7d7a8988ed 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -152,6 +152,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
/* mlx4_txq.c */
+int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 2dc198e77f..698a648c8d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -13,7 +13,9 @@
#include <stddef.h>
#include <stdint.h>
#include <string.h>
+#include <sys/mman.h>
#include <inttypes.h>
+#include <unistd.h>
/* Verbs headers do not support -pedantic. */
#ifdef PEDANTIC
@@ -38,6 +40,100 @@
#include "mlx4_utils.h"
/**
+ * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
+ * Both primary and secondary process do mmap to make UAR address
+ * aligned.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param fd
+ * Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+int
+mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+{
+ unsigned int i, j;
+ const unsigned int txqs_n = dev->data->nb_tx_queues;
+ uintptr_t pages[txqs_n];
+ unsigned int pages_n = 0;
+ uintptr_t uar_va;
+ uintptr_t off;
+ void *addr;
+ void *ret;
+ struct txq *txq;
+ int already_mapped;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+
+ memset(pages, 0, txqs_n * sizeof(uintptr_t));
+ /*
+ * As rdma-core, UARs are mapped in size of OS page size.
+ * Use aligned address to avoid duplicate mmap.
+ * Ref to libmlx4 function: mlx4_init_context()
+ */
+ for (i = 0; i != txqs_n; ++i) {
+ txq = dev->data->tx_queues[i];
+ if (!txq)
+ continue;
+ /* UAR addr form verbs used to find dup and offset in page. */
+ uar_va = (uintptr_t)txq->msq.qp_sdb;
+ off = uar_va & (page_size - 1); /* offset in page. */
+ uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
+ already_mapped = 0;
+ for (j = 0; j != pages_n; ++j) {
+ if (pages[j] == uar_va) {
+ already_mapped = 1;
+ break;
+ }
+ }
+ /* new address in reserved UAR address space. */
+ addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
+ uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
+ if (!already_mapped) {
+ pages[pages_n++] = uar_va;
+ /* fixed mmap to specified address in reserved
+ * address space.
+ */
+ ret = mmap(addr, page_size,
+ PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
+ txq->msq.uar_mmap_offset);
+ if (ret != addr) {
+ /* fixed mmap has to return same address. */
+ ERROR("port %u call to mmap failed on UAR"
+ " for txq %u",
+ dev->data->port_id, i);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ }
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
+ txq->msq.db = RTE_PTR_ADD((void *)addr, off);
+ else
+ assert(txq->msq.db ==
+ RTE_PTR_ADD((void *)addr, off));
+ }
+ return 0;
+}
+#else
+int
+mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+{
+ /*
+ * Even if rdma-core doesn't support UAR remap, primary process
+ * shouldn't be interrupted.
+ */
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ return 0;
+ ERROR("UAR remap is not supported");
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+}
+#endif
+
+/**
* Free Tx queue elements.
*
* @param txq
@@ -89,7 +185,13 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
sq->owner_opcode = MLX4_OPCODE_SEND | (0u << MLX4_SQ_OWNER_BIT);
sq->stamp = rte_cpu_to_be_32(MLX4_SQ_STAMP_VAL |
(0u << MLX4_SQ_OWNER_BIT));
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ sq->uar_mmap_offset = dqp->uar_mmap_offset;
+ sq->qp_sdb = dqp->sdb;
+#else
+ sq->uar_mmap_offset = -1; /* Make mmap() fail. */
sq->db = dqp->sdb;
+#endif
sq->doorbell_qpn = dqp->doorbell_qpn;
cq->buf = dcq->buf.buf;
cq->cqe_cnt = dcq->cqe_cnt;
@@ -307,6 +409,11 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
goto error;
}
/* Retrieve device queue information. */
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ dv_qp = (struct mlx4dv_qp){
+ .comp_mask = MLX4DV_QP_MASK_UAR_MMAP_OFFSET,
+ };
+#endif
mlxdv.cq.in = txq->cq;
mlxdv.cq.out = &dv_cq;
mlxdv.qp.in = txq->qp;
@@ -318,6 +425,12 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
" accessing the device queues", (void *)dev);
goto error;
}
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+ if (!(dv_qp.comp_mask & MLX4DV_QP_MASK_UAR_MMAP_OFFSET)) {
+ WARN("%p: failed to obtain UAR mmap offset", (void *)dev);
+ dv_qp.uar_mmap_offset = -1; /* Make mmap() fail. */
+ }
+#endif
mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
/* Save first wqe pointer in the first element. */
(&(*txq->elts)[0])->wqe =
--
2.11.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/3] net/mlx4: add secondary process support
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 0/3] " Yongseok Koh
` (3 preceding siblings ...)
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 3/3] net/mlx4: add secondary process support Yongseok Koh
@ 2019-04-02 7:12 ` Shahaf Shuler
2019-04-02 7:12 ` Shahaf Shuler
4 siblings, 1 reply; 30+ messages in thread
From: Shahaf Shuler @ 2019-04-02 7:12 UTC (permalink / raw)
To: Yongseok Koh; +Cc: dev
Tuesday, April 2, 2019 12:16 AM, Yongseok Koh:
> Subject: [PATCH v3 0/3] net/mlx4: add secondary process support
>
> RFC:
> https://mails.dpdk.org/archives/dev/2019-March/125516.html
>
> v3:
> * rebase on the latest branch tip
> * remove HAVE_IBV_MLX4_SECONDARY_PROCESS and make it determined
> in run-time
>
> v2:
> * add more sanity check for eth_dev and return value from IPC request
> * complement commit messages
> * add MLX5_MP_REQ_TIMEOUT_SEC
>
> Yongseok Koh (3):
> net/mlx4: change device reference for secondary process
> net/mlx4: add external allocator for Verbs object
> net/mlx4: add secondary process support
Applied to next-net-mlx, thanks.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/3] net/mlx4: add secondary process support
2019-04-02 7:12 ` [dpdk-dev] [PATCH v3 0/3] " Shahaf Shuler
@ 2019-04-02 7:12 ` Shahaf Shuler
0 siblings, 0 replies; 30+ messages in thread
From: Shahaf Shuler @ 2019-04-02 7:12 UTC (permalink / raw)
To: Yongseok Koh; +Cc: dev
Tuesday, April 2, 2019 12:16 AM, Yongseok Koh:
> Subject: [PATCH v3 0/3] net/mlx4: add secondary process support
>
> RFC:
> https://mails.dpdk.org/archives/dev/2019-March/125516.html
>
> v3:
> * rebase on the latest branch tip
> * remove HAVE_IBV_MLX4_SECONDARY_PROCESS and make it determined
> in run-time
>
> v2:
> * add more sanity check for eth_dev and return value from IPC request
> * complement commit messages
> * add MLX5_MP_REQ_TIMEOUT_SEC
>
> Yongseok Koh (3):
> net/mlx4: change device reference for secondary process
> net/mlx4: add external allocator for Verbs object
> net/mlx4: add secondary process support
Applied to next-net-mlx, thanks.
^ permalink raw reply [flat|nested] 30+ messages in thread
end of thread, other threads:[~2019-04-02 7:12 UTC | newest]
Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-07 7:39 [dpdk-dev] [PATCH 0/3] net/mlx4: add secondary process support Yongseok Koh
2019-03-07 7:39 ` [dpdk-dev] [PATCH 1/3] net/mlx4: change device reference for secondary process Yongseok Koh
2019-03-07 7:39 ` [dpdk-dev] [PATCH 2/3] net/mlx4: add external allocator for Verbs object Yongseok Koh
2019-03-07 7:39 ` [dpdk-dev] [PATCH 3/3] net/mlx4: add secondary process support Yongseok Koh
2019-03-25 19:17 ` [dpdk-dev] [PATCH v2 0/3] " Yongseok Koh
2019-03-25 19:17 ` Yongseok Koh
2019-03-25 19:17 ` [dpdk-dev] [PATCH v2 1/3] net/mlx4: change device reference for secondary process Yongseok Koh
2019-03-25 19:17 ` Yongseok Koh
2019-03-26 19:16 ` Shahaf Shuler
2019-03-26 19:16 ` Shahaf Shuler
2019-03-25 19:18 ` [dpdk-dev] [PATCH v2 2/3] net/mlx4: add external allocator for Verbs object Yongseok Koh
2019-03-25 19:18 ` Yongseok Koh
2019-03-26 19:21 ` Shahaf Shuler
2019-03-26 19:21 ` Shahaf Shuler
2019-03-25 19:18 ` [dpdk-dev] [PATCH v2 3/3] net/mlx4: add secondary process support Yongseok Koh
2019-03-25 19:18 ` Yongseok Koh
2019-03-26 19:33 ` Shahaf Shuler
2019-03-26 19:33 ` Shahaf Shuler
2019-03-28 19:01 ` Yongseok Koh
2019-03-28 19:01 ` Yongseok Koh
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 0/3] " Yongseok Koh
2019-04-01 21:15 ` Yongseok Koh
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 1/3] net/mlx4: change device reference for secondary process Yongseok Koh
2019-04-01 21:15 ` Yongseok Koh
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 2/3] net/mlx4: add external allocator for Verbs object Yongseok Koh
2019-04-01 21:15 ` Yongseok Koh
2019-04-01 21:15 ` [dpdk-dev] [PATCH v3 3/3] net/mlx4: add secondary process support Yongseok Koh
2019-04-01 21:15 ` Yongseok Koh
2019-04-02 7:12 ` [dpdk-dev] [PATCH v3 0/3] " Shahaf Shuler
2019-04-02 7:12 ` Shahaf Shuler
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).