* [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data @ 2019-03-05 9:52 Raslan Darawsheh 2019-03-05 9:52 ` [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device " Raslan Darawsheh ` (4 more replies) 0 siblings, 5 replies; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-05 9:52 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen In multiprocess context, the private structure is shared between processes. The back reference from private to generic data was using a pointer to a per process eth_dev. It's now changed to a reference of the shared data. Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> Signed-off-by: Thomas Monjalon <thomas@monjalon.net> --- drivers/net/failsafe/failsafe.c | 2 +- drivers/net/failsafe/failsafe_intr.c | 10 +++++----- drivers/net/failsafe/failsafe_ops.c | 4 ++-- drivers/net/failsafe/failsafe_private.h | 6 +++++- drivers/net/failsafe/failsafe_rxtx.c | 4 ++-- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c index 06e859e..68926ca 100644 --- a/drivers/net/failsafe/failsafe.c +++ b/drivers/net/failsafe/failsafe.c @@ -181,7 +181,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev) return -1; } priv = PRIV(dev); - priv->dev = dev; + priv->data = dev->data; dev->dev_ops = &failsafe_ops; dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0]; dev->data->dev_link = eth_link; diff --git a/drivers/net/failsafe/failsafe_intr.c b/drivers/net/failsafe/failsafe_intr.c index 1c2cb71..09aa3c6 100644 --- a/drivers/net/failsafe/failsafe_intr.c +++ b/drivers/net/failsafe/failsafe_intr.c @@ -133,8 +133,8 @@ fs_rx_event_proxy_service_install(struct fs_priv *priv) /* prepare service info */ memset(&service, 0, sizeof(struct rte_service_spec)); snprintf(service.name, sizeof(service.name), "%s_Rx_service", - priv->dev->data->name); - service.socket_id = priv->dev->data->numa_node; + priv->data->name); + service.socket_id = priv->data->numa_node; service.callback = fs_rx_event_proxy_routine; service.callback_userdata = priv; @@ -437,7 +437,7 @@ fs_rx_intr_vec_install(struct fs_priv *priv) unsigned int count; struct rte_intr_handle *intr_handle; - rxqs_n = priv->dev->data->nb_rx_queues; + rxqs_n = priv->data->nb_rx_queues; n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); count = 0; intr_handle = &priv->intr_handle; @@ -452,7 +452,7 @@ fs_rx_intr_vec_install(struct fs_priv *priv) return -rte_errno; } for (i = 0; i < n; i++) { - struct rxq *rxq = priv->dev->data->rx_queues[i]; + struct rxq *rxq = priv->data->rx_queues[i]; /* Skip queues that cannot request interrupts. */ if (rxq == NULL || rxq->event_fd < 0) { @@ -521,7 +521,7 @@ failsafe_rx_intr_install(struct rte_eth_dev *dev) { struct fs_priv *priv = PRIV(dev); const struct rte_intr_conf *const intr_conf = - &priv->dev->data->dev_conf.intr_conf; + &priv->data->dev_conf.intr_conf; if (intr_conf->rxq == 0 || dev->intr_handle != NULL) return 0; diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c index e3add40..65957a2 100644 --- a/drivers/net/failsafe/failsafe_ops.c +++ b/drivers/net/failsafe/failsafe_ops.c @@ -452,7 +452,7 @@ fs_rx_queue_release(void *queue) if (queue == NULL) return; rxq = queue; - dev = rxq->priv->dev; + dev = &rte_eth_devices[rxq->priv->data->port_id]; fs_lock(dev, 0); if (rxq->event_fd > 0) close(rxq->event_fd); @@ -636,7 +636,7 @@ fs_tx_queue_release(void *queue) if (queue == NULL) return; txq = queue; - dev = txq->priv->dev; + dev = &rte_eth_devices[txq->priv->data->port_id]; fs_lock(dev, 0); FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { if (ETH(sdev)->data->tx_queues != NULL && diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h index 0dfea65..29dfd40 100644 --- a/drivers/net/failsafe/failsafe_private.h +++ b/drivers/net/failsafe/failsafe_private.h @@ -128,8 +128,12 @@ struct sub_device { unsigned int lsc_callback:1; }; +/* + * This is referenced by eth_dev->data->dev_private + * This is shared between processes. + */ struct fs_priv { - struct rte_eth_dev *dev; + struct rte_eth_dev_data *data; /* backreference to shared data. */ /* * Set of sub_devices. * subs[0] is the preferred device diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c index 034f47b..231c832 100644 --- a/drivers/net/failsafe/failsafe_rxtx.c +++ b/drivers/net/failsafe/failsafe_rxtx.c @@ -126,7 +126,7 @@ failsafe_tx_burst(void *queue, uint16_t nb_tx; txq = queue; - sdev = TX_SUBDEV(txq->priv->dev); + sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]); if (unlikely(fs_tx_unsafe(sdev))) return 0; sub_txq = ETH(sdev)->data->tx_queues[txq->qid]; @@ -147,7 +147,7 @@ failsafe_tx_burst_fast(void *queue, uint16_t nb_tx; txq = queue; - sdev = TX_SUBDEV(txq->priv->dev); + sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]); RTE_ASSERT(!fs_tx_unsafe(sdev)); sub_txq = ETH(sdev)->data->tx_queues[txq->qid]; FS_ATOMIC_P(txq->refcnt[sdev->sid]); -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data 2019-03-05 9:52 [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh @ 2019-03-05 9:52 ` Raslan Darawsheh 2019-03-05 9:59 ` Thomas Monjalon 2019-03-05 17:38 ` Gaëtan Rivet 2019-03-05 9:52 ` [dpdk-dev] [PATCH v2 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh ` (3 subsequent siblings) 4 siblings, 2 replies; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-05 9:52 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen In multiprocess context, the pointer to sub-device is shared between processes. Previously, it was a pointer to per process eth_dev so it's needed to replace this dependency. Signed-off-by: Thomas Monjalon <thomas@monjalon.net> Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> --- v2: - moved comment in fs_sdev about subs to this commit - added parenthesis around macro arguments. --- drivers/net/failsafe/failsafe_eal.c | 2 +- drivers/net/failsafe/failsafe_ether.c | 7 ++++--- drivers/net/failsafe/failsafe_private.h | 13 ++++++++----- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c index 56d1669..6fac4b6 100644 --- a/drivers/net/failsafe/failsafe_eal.c +++ b/drivers/net/failsafe/failsafe_eal.c @@ -112,7 +112,7 @@ fs_bus_init(struct rte_eth_dev *dev) continue; } } - ETH(sdev) = &rte_eth_devices[pid]; + sdev->data = rte_eth_devices[pid].data; SUB_ID(sdev) = i; sdev->fs_port_id = dev->data->port_id; sdev->dev = ETH(sdev)->device; diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c index d5b1488..e1fff59 100644 --- a/drivers/net/failsafe/failsafe_ether.c +++ b/drivers/net/failsafe/failsafe_ether.c @@ -267,18 +267,19 @@ static void fs_dev_remove(struct sub_device *sdev) { int ret; + struct rte_eth_dev *edev = ETH(sdev); if (sdev == NULL) return; switch (sdev->state) { case DEV_STARTED: failsafe_rx_intr_uninstall_subdevice(sdev); - rte_eth_dev_stop(PORT_ID(sdev)); + rte_eth_dev_stop(edev->data->port_id); sdev->state = DEV_ACTIVE; /* fallthrough */ case DEV_ACTIVE: failsafe_eth_dev_unregister_callbacks(sdev); - rte_eth_dev_close(PORT_ID(sdev)); + rte_eth_dev_close(edev->data->port_id); sdev->state = DEV_PROBED; /* fallthrough */ case DEV_PROBED: @@ -287,7 +288,7 @@ fs_dev_remove(struct sub_device *sdev) ERROR("Bus detach failed for sub_device %u", SUB_ID(sdev)); } else { - rte_eth_dev_release_port(ETH(sdev)); + rte_eth_dev_release_port(edev); } sdev->state = DEV_PARSED; /* fallthrough */ diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h index 84e847f..1e2ad2d 100644 --- a/drivers/net/failsafe/failsafe_private.h +++ b/drivers/net/failsafe/failsafe_private.h @@ -100,13 +100,16 @@ struct fs_stats { uint64_t timestamp; }; +/* + * Allocated in shared memory. + */ struct sub_device { /* Exhaustive DPDK device description */ struct sub_device *next; struct rte_devargs devargs; - struct rte_bus *bus; - struct rte_device *dev; - struct rte_eth_dev *edev; + struct rte_bus *bus; /* per process. */ + struct rte_device *dev; /* per process. */ + struct rte_eth_dev_data *data; /* shared between processes */ uint8_t sid; /* Device state machine */ enum dev_state state; @@ -139,7 +142,7 @@ struct fs_priv { * subs[0] is the preferred device * any other is just another slave */ - struct sub_device *subs; + struct sub_device *subs; /* shared between processes */ uint8_t subs_head; /* if head == tail, no subs */ uint8_t subs_tail; /* first invalid */ uint8_t subs_tx; /* current emitting device */ @@ -254,7 +257,7 @@ extern int failsafe_mac_from_arg; /* sdev: (struct sub_device *) */ #define ETH(sdev) \ - ((sdev)->edev) + ((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data->port_id]) /* sdev: (struct sub_device *) */ #define PORT_ID(sdev) \ -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data 2019-03-05 9:52 ` [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device " Raslan Darawsheh @ 2019-03-05 9:59 ` Thomas Monjalon 2019-03-05 17:38 ` Gaëtan Rivet 1 sibling, 0 replies; 33+ messages in thread From: Thomas Monjalon @ 2019-03-05 9:59 UTC (permalink / raw) To: Raslan Darawsheh; +Cc: gaetan.rivet, dev, stephen 05/03/2019 10:52, Raslan Darawsheh: > +/* > + * Allocated in shared memory. > + */ > struct sub_device { > /* Exhaustive DPDK device description */ > struct sub_device *next; > struct rte_devargs devargs; > - struct rte_bus *bus; > - struct rte_device *dev; > - struct rte_eth_dev *edev; > + struct rte_bus *bus; /* per process. */ > + struct rte_device *dev; /* per process. */ Thinking again about these comments. Given it is in a shared struct, it would be more precise to say "only for primary process". > + struct rte_eth_dev_data *data; /* shared between processes */ ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data 2019-03-05 9:52 ` [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device " Raslan Darawsheh 2019-03-05 9:59 ` Thomas Monjalon @ 2019-03-05 17:38 ` Gaëtan Rivet 2019-03-05 17:58 ` Thomas Monjalon 1 sibling, 1 reply; 33+ messages in thread From: Gaëtan Rivet @ 2019-03-05 17:38 UTC (permalink / raw) To: Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen On Tue, Mar 05, 2019 at 09:52:05AM +0000, Raslan Darawsheh wrote: > In multiprocess context, the pointer to sub-device is shared between > processes. Previously, it was a pointer to per process eth_dev so > it's needed to replace this dependency. > > Signed-off-by: Thomas Monjalon <thomas@monjalon.net> > Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> > --- > v2: - moved comment in fs_sdev about subs to this commit > - added parenthesis around macro arguments. > --- > drivers/net/failsafe/failsafe_eal.c | 2 +- > drivers/net/failsafe/failsafe_ether.c | 7 ++++--- > drivers/net/failsafe/failsafe_private.h | 13 ++++++++----- > 3 files changed, 13 insertions(+), 9 deletions(-) > > diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c > index 56d1669..6fac4b6 100644 > --- a/drivers/net/failsafe/failsafe_eal.c > +++ b/drivers/net/failsafe/failsafe_eal.c > @@ -112,7 +112,7 @@ fs_bus_init(struct rte_eth_dev *dev) > continue; > } > } > - ETH(sdev) = &rte_eth_devices[pid]; > + sdev->data = rte_eth_devices[pid].data; > SUB_ID(sdev) = i; > sdev->fs_port_id = dev->data->port_id; > sdev->dev = ETH(sdev)->device; > diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c > index d5b1488..e1fff59 100644 > --- a/drivers/net/failsafe/failsafe_ether.c > +++ b/drivers/net/failsafe/failsafe_ether.c > @@ -267,18 +267,19 @@ static void > fs_dev_remove(struct sub_device *sdev) > { > int ret; > + struct rte_eth_dev *edev = ETH(sdev); I'd have added that above the "int ret;". (inverse christmas tree and all that.) > > if (sdev == NULL) > return; > switch (sdev->state) { > case DEV_STARTED: > failsafe_rx_intr_uninstall_subdevice(sdev); > - rte_eth_dev_stop(PORT_ID(sdev)); > + rte_eth_dev_stop(edev->data->port_id); > sdev->state = DEV_ACTIVE; > /* fallthrough */ > case DEV_ACTIVE: > failsafe_eth_dev_unregister_callbacks(sdev); > - rte_eth_dev_close(PORT_ID(sdev)); > + rte_eth_dev_close(edev->data->port_id); Ok I see. I missed that during the first reading, the private_data is zeroed on dev_close(), so ETH(sdev) becomes invalid here. What happens when a primary process closes a device before a secondary? Is the secondary unable to stop / close its own then? Isn't there some missing uninit? This seems dangerous to me. Why not instead allocating a per-process slab of memory that would hold the relevant references and outlive the shared data (a per-process rte_eth_dev private data...). > sdev->state = DEV_PROBED; > /* fallthrough */ > case DEV_PROBED: > @@ -287,7 +288,7 @@ fs_dev_remove(struct sub_device *sdev) > ERROR("Bus detach failed for sub_device %u", > SUB_ID(sdev)); > } else { > - rte_eth_dev_release_port(ETH(sdev)); > + rte_eth_dev_release_port(edev); > } > sdev->state = DEV_PARSED; > /* fallthrough */ > diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h > index 84e847f..1e2ad2d 100644 > --- a/drivers/net/failsafe/failsafe_private.h > +++ b/drivers/net/failsafe/failsafe_private.h > @@ -100,13 +100,16 @@ struct fs_stats { > uint64_t timestamp; > }; > > +/* > + * Allocated in shared memory. > + */ > struct sub_device { > /* Exhaustive DPDK device description */ > struct sub_device *next; > struct rte_devargs devargs; > - struct rte_bus *bus; > - struct rte_device *dev; > - struct rte_eth_dev *edev; > + struct rte_bus *bus; /* per process. */ > + struct rte_device *dev; /* per process. */ > + struct rte_eth_dev_data *data; /* shared between processes */ > uint8_t sid; > /* Device state machine */ > enum dev_state state; > @@ -139,7 +142,7 @@ struct fs_priv { > * subs[0] is the preferred device > * any other is just another slave > */ > - struct sub_device *subs; > + struct sub_device *subs; /* shared between processes */ > uint8_t subs_head; /* if head == tail, no subs */ > uint8_t subs_tail; /* first invalid */ > uint8_t subs_tx; /* current emitting device */ > @@ -254,7 +257,7 @@ extern int failsafe_mac_from_arg; > > /* sdev: (struct sub_device *) */ > #define ETH(sdev) \ > - ((sdev)->edev) > + ((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data->port_id]) > > /* sdev: (struct sub_device *) */ > #define PORT_ID(sdev) \ > -- > 2.7.4 > -- Gaëtan Rivet 6WIND ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data 2019-03-05 17:38 ` Gaëtan Rivet @ 2019-03-05 17:58 ` Thomas Monjalon 2019-03-06 10:46 ` Gaëtan Rivet 0 siblings, 1 reply; 33+ messages in thread From: Thomas Monjalon @ 2019-03-05 17:58 UTC (permalink / raw) To: Gaëtan Rivet; +Cc: Raslan Darawsheh, dev, stephen Hi, 05/03/2019 18:38, Gaëtan Rivet: > > fs_dev_remove(struct sub_device *sdev) [...] > > - rte_eth_dev_close(PORT_ID(sdev)); > > + rte_eth_dev_close(edev->data->port_id); > > Ok I see. I missed that during the first reading, the private_data is > zeroed on dev_close(), so ETH(sdev) becomes invalid here. I don't follow you. What do you mean with this comment? > What happens when a primary process closes a device before a secondary? > Is the secondary unable to stop / close its own then? Isn't there some > missing uninit? Is the secondary process supposed to do any closing? The device management should be done only by the primary process. Note: anyway all this hotplug related code should be dropped from failsafe to be replaced by EAL hotplug management. > This seems dangerous to me. Why not instead allocating a per-process > slab of memory that would hold the relevant references and outlive the > shared data (a per-process rte_eth_dev private data...). Which data do you think should be allocated per process? ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data 2019-03-05 17:58 ` Thomas Monjalon @ 2019-03-06 10:46 ` Gaëtan Rivet 2019-03-06 18:02 ` Thomas Monjalon 0 siblings, 1 reply; 33+ messages in thread From: Gaëtan Rivet @ 2019-03-06 10:46 UTC (permalink / raw) To: Thomas Monjalon; +Cc: Raslan Darawsheh, dev, stephen On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote: > Hi, > > 05/03/2019 18:38, Gaëtan Rivet: > > > fs_dev_remove(struct sub_device *sdev) > [...] > > > - rte_eth_dev_close(PORT_ID(sdev)); > > > + rte_eth_dev_close(edev->data->port_id); > > > > Ok I see. I missed that during the first reading, the private_data is > > zeroed on dev_close(), so ETH(sdev) becomes invalid here. > > I don't follow you. What do you mean with this comment? > PORT_ID(sdev) uses ETH(sdev). ETH(sdev) will now point to `&rte_eth_devices[(sdev)->data->port_id]`, so data->port_id will be zeroed on sdev close. So once the sub-device has been closed, calling rte_eth_dev_release_port(ETH(sdev)) is not possible anymore, justifying the change (taking the reference to ETH(sdev) first, then using it after shared data has been overwritten). > > What happens when a primary process closes a device before a secondary? > > Is the secondary unable to stop / close its own then? Isn't there some > > missing uninit? > > Is the secondary process supposed to do any closing? > The device management should be done only by the primary process. > > Note: anyway all this hotplug related code should be dropped > from failsafe to be replaced by EAL hotplug management. > I don't know, I've never used secondary process. However, cursory reading the code of rte_eth_dev_close(), I don't see a guard against calling it from a secondary process? Reading code like rte_free(dev->data->rx_queues); dev->data->rx_queues = NULL; within makes me think the issue has been seen at least once, where shared data is freed multiple times, so I guessed some secondary processes were calling it. Maybe they are not meant to, but what prevents them from being badly written? Also, given rte_dev_remove IPC call to transfer the order to the primary, it seems that at least secondary processes are expected to call rte_dev_remove() at some point? So are they only authorized to call rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? Is there a specific documentation detailing the design of secondary process and the related responsibilities in the lifetime of a device? How are they synching their rte_eth_devices list if they are not calling rte_eth_dev_close(), ever? > > This seems dangerous to me. Why not instead allocating a per-process > > slab of memory that would hold the relevant references and outlive the > > shared data (a per-process rte_eth_dev private data...). > > Which data do you think should be allocated per process? > > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------] +--------------------------------------------------------------+ | +------------------+ +- rte_eth_devices[n] -+ | | |rte_eth_dev_data |<---------------+ data | | PRIMARY | | | +dev_priv-+ | | | | | dev_private +-->| | | | | | | ... | | | | | | | | port_id | | | | | | | | | | | | | | | | | | | | | | | | | | | +----------------------+ | | | | | | +- rte_eth_devices[n] -+ | | | | | | | | | SECONDARY | | | | | | | | | | | | | | | | | | | | | | | | | | | +---------+ | | | | | |<---------------+ data | | | +------------------+ +----------------------+ | +--------------------------------------------------------------+ Here port_id is used within fail-safe to get back to rte_eth_devices[n]. This disappears once a device is closed, as all shared space is zeroed. This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct, and at some point it is not anymore, once a sub-device has been closed. This seems dangerous. I was thinking initially that allocating a place where each sdev would store their rte_eth_devices / port_id back-reference could alleviate the issue, meaning that the fail-safe would not zero it on sdev_close(), and it would remain valid for the lifetime of a sub-device, so even when a sub-device is in DEV_PROBED state. But now that I think about it, it could probably be simpler: instead of using (ETH(sdev)->data->port_id) for the port_id of an sdev (meaning that it is dependent on the lifetime of the sdev, instead of the lifetime of the failsafe), the port-id itself should be stored in the sub_device structure. This structure will be available for the lifetime of the failsafe, and the port_id is correct accross all processes. So PORT_ID(sdev) would be defined to something like (sdev->port_id), and ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would remain correct even once the primary has closed the sub-device. What do you think? Do you agree that the current state is dangerous, and do you think the solution would alleviate the issue? Maybe the concern is unfounded and the only issue is within fs_dev_remove(). -- Gaëtan Rivet 6WIND ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data 2019-03-06 10:46 ` Gaëtan Rivet @ 2019-03-06 18:02 ` Thomas Monjalon 2019-03-07 8:43 ` Raslan Darawsheh 0 siblings, 1 reply; 33+ messages in thread From: Thomas Monjalon @ 2019-03-06 18:02 UTC (permalink / raw) To: Gaëtan Rivet, Raslan Darawsheh; +Cc: dev, stephen 06/03/2019 11:46, Gaëtan Rivet: > On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote: > > 05/03/2019 18:38, Gaëtan Rivet: > > > What happens when a primary process closes a device before a secondary? > > > Is the secondary unable to stop / close its own then? Isn't there some > > > missing uninit? > > > > Is the secondary process supposed to do any closing? > > The device management should be done only by the primary process. > > > > Note: anyway all this hotplug related code should be dropped > > from failsafe to be replaced by EAL hotplug management. > > > > I don't know, I've never used secondary process. > However, cursory reading the code of rte_eth_dev_close(), I don't see > a guard against calling it from a secondary process? Yes indeed, there is no guard. That's something not clear in DPDK, previously we were attaching some vdevs in secondary only. > Reading code like > > rte_free(dev->data->rx_queues); > dev->data->rx_queues = NULL; > > within makes me think the issue has been seen at least once, where > shared data is freed multiple times, so I guessed some secondary > processes were calling it. Maybe they are not meant to, but what > prevents them from being badly written? > > Also, given rte_dev_remove IPC call to transfer the order to the > primary, it seems that at least secondary processes are expected to call > rte_dev_remove() at some point? So are they only authorized to call > rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? Is > there a specific documentation detailing the design of secondary process > and the related responsibilities in the lifetime of a device? How are > they synching their rte_eth_devices list if they are not calling > rte_eth_dev_close(), ever? All these calls should be done in primary. The IPC mechanism calls the attach/detach in secondary at EAL level. The PMDs does the bridge between EAL device and ethdev port status. But you are right, there can be a sync issue if closing an ethdev port and not removing the EAL device. This is a generic question about deciding whether we want all ethdev ports to be synced in multi-process or not. In failsafe context, we are closing the EAL device and change the state of the sub-device accordingly. So I think there is no issue. > > > This seems dangerous to me. Why not instead allocating a per-process > > > slab of memory that would hold the relevant references and outlive the > > > shared data (a per-process rte_eth_dev private data...). > > > > Which data do you think should be allocated per process? > > > > > > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------] > +--------------------------------------------------------------+ > | +------------------+ +- rte_eth_devices[n] -+ | > | |rte_eth_dev_data |<---------------+ data | | PRIMARY > | | | +dev_priv-+ | | | > | | dev_private +-->| | | | | > | | ... | | | | | | > | | port_id | | | | | | > | | | | | | | | > | | | | | | | | > | | | | | +----------------------+ | > | | | | | +- rte_eth_devices[n] -+ | > | | | | | | | | SECONDARY > | | | | | | | | > | | | | | | | | > | | | | | | | | > | | | +---------+ | | | > | | |<---------------+ data | | > | +------------------+ +----------------------+ | > +--------------------------------------------------------------+ > > Here port_id is used within fail-safe to get back to rte_eth_devices[n]. > This disappears once a device is closed, as all shared space is zeroed. > > This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct, > and at some point it is not anymore, once a sub-device has been > closed. This seems dangerous. The state of the sub-device is changed. I don't see any issue. > I was thinking initially that allocating a place where each sdev would > store their rte_eth_devices / port_id back-reference could alleviate the > issue, meaning that the fail-safe would not zero it on sdev_close(), and > it would remain valid for the lifetime of a sub-device, so even when a > sub-device is in DEV_PROBED state. > > But now that I think about it, it could probably be simpler: instead of > using (ETH(sdev)->data->port_id) for the port_id of an sdev (meaning > that it is dependent on the lifetime of the sdev, instead of the > lifetime of the failsafe), the port-id itself should be stored in the > sub_device structure. This structure will be available for the lifetime > of the failsafe, and the port_id is correct accross all processes. > > So PORT_ID(sdev) would be defined to something like (sdev->port_id), and > ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would remain > correct even once the primary has closed the sub-device. > > What do you think? Do you agree that the current state is dangerous, and > do you think the solution would alleviate the issue? Maybe the concern > is unfounded and the only issue is within fs_dev_remove(). Yes it is only seen in fs_dev_remove(). I discussed about your proposal with Raslan, and we agree we could change from sub_device.data to sub_device.port_id, it may be more future-proof. I have only one doubt: look at the macro in this patch: #define ETH(sdev) \ ((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data->port_id]) The NULL check cannot be done with a port id. I think it was needed to manage one case. Raslan? ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data 2019-03-06 18:02 ` Thomas Monjalon @ 2019-03-07 8:43 ` Raslan Darawsheh 2019-03-07 9:47 ` Gaëtan Rivet 0 siblings, 1 reply; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-07 8:43 UTC (permalink / raw) To: Thomas Monjalon, Gaëtan Rivet; +Cc: dev, stephen Hi, > -----Original Message----- > From: Thomas Monjalon <thomas@monjalon.net> > Sent: Wednesday, March 6, 2019 8:02 PM > To: Gaëtan Rivet <gaetan.rivet@6wind.com>; Raslan Darawsheh > <rasland@mellanox.com> > Cc: dev@dpdk.org; stephen@networkplumber.org > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device > with shared data > > 06/03/2019 11:46, Gaëtan Rivet: > > On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote: > > > 05/03/2019 18:38, Gaëtan Rivet: > > > > What happens when a primary process closes a device before a > secondary? > > > > Is the secondary unable to stop / close its own then? Isn't there > > > > some missing uninit? > > > > > > Is the secondary process supposed to do any closing? > > > The device management should be done only by the primary process. > > > > > > Note: anyway all this hotplug related code should be dropped from > > > failsafe to be replaced by EAL hotplug management. > > > > > > > I don't know, I've never used secondary process. > > However, cursory reading the code of rte_eth_dev_close(), I don't see > > a guard against calling it from a secondary process? > > Yes indeed, there is no guard. > That's something not clear in DPDK, previously we were attaching some > vdevs in secondary only. > > > Reading code like > > > > rte_free(dev->data->rx_queues); > > dev->data->rx_queues = NULL; > > > > within makes me think the issue has been seen at least once, where > > shared data is freed multiple times, so I guessed some secondary > > processes were calling it. Maybe they are not meant to, but what > > prevents them from being badly written? > > > > Also, given rte_dev_remove IPC call to transfer the order to the > > primary, it seems that at least secondary processes are expected to > > call > > rte_dev_remove() at some point? So are they only authorized to call > > rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? Is > > there a specific documentation detailing the design of secondary > > process and the related responsibilities in the lifetime of a device? > > How are they synching their rte_eth_devices list if they are not > > calling rte_eth_dev_close(), ever? > > All these calls should be done in primary. > The IPC mechanism calls the attach/detach in secondary at EAL level. > The PMDs does the bridge between EAL device and ethdev port status. > But you are right, there can be a sync issue if closing an ethdev port and not > removing the EAL device. > This is a generic question about deciding whether we want all ethdev ports to > be synced in multi-process or not. > > In failsafe context, we are closing the EAL device and change the state of the > sub-device accordingly. So I think there is no issue. > > > > > This seems dangerous to me. Why not instead allocating a > > > > per-process slab of memory that would hold the relevant references > > > > and outlive the shared data (a per-process rte_eth_dev private data...). > > > > > > Which data do you think should be allocated per process? > > > > > > > > > > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------] > > +--------------------------------------------------------------+ > > | +------------------+ +- rte_eth_devices[n] -+ | > > | |rte_eth_dev_data |<---------------+ data | | PRIMARY > > | | | +dev_priv-+ | | | > > | | dev_private +-->| | | | | > > | | ... | | | | | | > > | | port_id | | | | | | > > | | | | | | | | > > | | | | | | | | > > | | | | | +----------------------+ | > > | | | | | +- rte_eth_devices[n] -+ | > > | | | | | | | | SECONDARY > > | | | | | | | | > > | | | | | | | | > > | | | | | | | | > > | | | +---------+ | | | > > | | |<---------------+ data | | > > | +------------------+ +----------------------+ | > > +--------------------------------------------------------------+ > > > > Here port_id is used within fail-safe to get back to rte_eth_devices[n]. > > This disappears once a device is closed, as all shared space is zeroed. > > > > This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct, and > > at some point it is not anymore, once a sub-device has been closed. > > This seems dangerous. > > The state of the sub-device is changed. > I don't see any issue. > > > I was thinking initially that allocating a place where each sdev would > > store their rte_eth_devices / port_id back-reference could alleviate > > the issue, meaning that the fail-safe would not zero it on > > sdev_close(), and it would remain valid for the lifetime of a > > sub-device, so even when a sub-device is in DEV_PROBED state. > > > > But now that I think about it, it could probably be simpler: instead > > of using (ETH(sdev)->data->port_id) for the port_id of an sdev > > (meaning that it is dependent on the lifetime of the sdev, instead of > > the lifetime of the failsafe), the port-id itself should be stored in > > the sub_device structure. This structure will be available for the > > lifetime of the failsafe, and the port_id is correct accross all processes. > > > > So PORT_ID(sdev) would be defined to something like (sdev->port_id), > > and > > ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would remain > > correct even once the primary has closed the sub-device. > > > > What do you think? Do you agree that the current state is dangerous, > > and do you think the solution would alleviate the issue? Maybe the > > concern is unfounded and the only issue is within fs_dev_remove(). > > Yes it is only seen in fs_dev_remove(). > I discussed about your proposal with Raslan, and we agree we could change > from sub_device.data to sub_device.port_id, it may be more future-proof. > > I have only one doubt: look at the macro in this patch: > > #define ETH(sdev) \ > ((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data- > >port_id]) > > The NULL check cannot be done with a port id. > I think it was needed to manage one case. Raslan? That's right since we need it for fs_tx_unsafe, to add a protection for plugged out devices during TX. > > Kindest regards Raslan Darawsheh ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data 2019-03-07 8:43 ` Raslan Darawsheh @ 2019-03-07 9:47 ` Gaëtan Rivet 2019-03-07 11:34 ` Raslan Darawsheh 0 siblings, 1 reply; 33+ messages in thread From: Gaëtan Rivet @ 2019-03-07 9:47 UTC (permalink / raw) To: Raslan Darawsheh; +Cc: Thomas Monjalon, dev, stephen On Thu, Mar 07, 2019 at 08:43:01AM +0000, Raslan Darawsheh wrote: > Hi, > > > -----Original Message----- > > From: Thomas Monjalon <thomas@monjalon.net> > > Sent: Wednesday, March 6, 2019 8:02 PM > > To: Gaëtan Rivet <gaetan.rivet@6wind.com>; Raslan Darawsheh > > <rasland@mellanox.com> > > Cc: dev@dpdk.org; stephen@networkplumber.org > > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device > > with shared data > > > > 06/03/2019 11:46, Gaëtan Rivet: > > > On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote: > > > > 05/03/2019 18:38, Gaëtan Rivet: > > > > > What happens when a primary process closes a device before a > > secondary? > > > > > Is the secondary unable to stop / close its own then? Isn't there > > > > > some missing uninit? > > > > > > > > Is the secondary process supposed to do any closing? > > > > The device management should be done only by the primary process. > > > > > > > > Note: anyway all this hotplug related code should be dropped from > > > > failsafe to be replaced by EAL hotplug management. > > > > > > > > > > I don't know, I've never used secondary process. > > > However, cursory reading the code of rte_eth_dev_close(), I don't see > > > a guard against calling it from a secondary process? > > > > Yes indeed, there is no guard. > > That's something not clear in DPDK, previously we were attaching some > > vdevs in secondary only. > > > > > Reading code like > > > > > > rte_free(dev->data->rx_queues); > > > dev->data->rx_queues = NULL; > > > > > > within makes me think the issue has been seen at least once, where > > > shared data is freed multiple times, so I guessed some secondary > > > processes were calling it. Maybe they are not meant to, but what > > > prevents them from being badly written? > > > > > > Also, given rte_dev_remove IPC call to transfer the order to the > > > primary, it seems that at least secondary processes are expected to > > > call > > > rte_dev_remove() at some point? So are they only authorized to call > > > rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? Is > > > there a specific documentation detailing the design of secondary > > > process and the related responsibilities in the lifetime of a device? > > > How are they synching their rte_eth_devices list if they are not > > > calling rte_eth_dev_close(), ever? > > > > All these calls should be done in primary. > > The IPC mechanism calls the attach/detach in secondary at EAL level. > > The PMDs does the bridge between EAL device and ethdev port status. > > But you are right, there can be a sync issue if closing an ethdev port and not > > removing the EAL device. > > This is a generic question about deciding whether we want all ethdev ports to > > be synced in multi-process or not. > > > > In failsafe context, we are closing the EAL device and change the state of the > > sub-device accordingly. So I think there is no issue. > > > > > > > This seems dangerous to me. Why not instead allocating a > > > > > per-process slab of memory that would hold the relevant references > > > > > and outlive the shared data (a per-process rte_eth_dev private data...). > > > > > > > > Which data do you think should be allocated per process? > > > > > > > > > > > > > > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------] > > > +--------------------------------------------------------------+ > > > | +------------------+ +- rte_eth_devices[n] -+ | > > > | |rte_eth_dev_data |<---------------+ data | | PRIMARY > > > | | | +dev_priv-+ | | | > > > | | dev_private +-->| | | | | > > > | | ... | | | | | | > > > | | port_id | | | | | | > > > | | | | | | | | > > > | | | | | | | | > > > | | | | | +----------------------+ | > > > | | | | | +- rte_eth_devices[n] -+ | > > > | | | | | | | | SECONDARY > > > | | | | | | | | > > > | | | | | | | | > > > | | | | | | | | > > > | | | +---------+ | | | > > > | | |<---------------+ data | | > > > | +------------------+ +----------------------+ | > > > +--------------------------------------------------------------+ > > > > > > Here port_id is used within fail-safe to get back to rte_eth_devices[n]. > > > This disappears once a device is closed, as all shared space is zeroed. > > > > > > This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct, and > > > at some point it is not anymore, once a sub-device has been closed. > > > This seems dangerous. > > > > The state of the sub-device is changed. > > I don't see any issue. > > > > > I was thinking initially that allocating a place where each sdev would > > > store their rte_eth_devices / port_id back-reference could alleviate > > > the issue, meaning that the fail-safe would not zero it on > > > sdev_close(), and it would remain valid for the lifetime of a > > > sub-device, so even when a sub-device is in DEV_PROBED state. > > > > > > But now that I think about it, it could probably be simpler: instead > > > of using (ETH(sdev)->data->port_id) for the port_id of an sdev > > > (meaning that it is dependent on the lifetime of the sdev, instead of > > > the lifetime of the failsafe), the port-id itself should be stored in > > > the sub_device structure. This structure will be available for the > > > lifetime of the failsafe, and the port_id is correct accross all processes. > > > > > > So PORT_ID(sdev) would be defined to something like (sdev->port_id), > > > and > > > ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would remain > > > correct even once the primary has closed the sub-device. > > > > > > What do you think? Do you agree that the current state is dangerous, > > > and do you think the solution would alleviate the issue? Maybe the > > > concern is unfounded and the only issue is within fs_dev_remove(). > > > > Yes it is only seen in fs_dev_remove(). > > I discussed about your proposal with Raslan, and we agree we could change > > from sub_device.data to sub_device.port_id, it may be more future-proof. > > > > I have only one doubt: look at the macro in this patch: > > > > #define ETH(sdev) \ > > ((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data- > > >port_id]) > > > > The NULL check cannot be done with a port id. > > I think it was needed to manage one case. Raslan? > > That's right since we need it for fs_tx_unsafe, to add a protection for plugged out devices during TX. Ok, thanks for your insights Thomas and Raslan. Sorry about the rambling above I needed to write down the stuff to think about it. You can use RTE_MAX_ETHPORTS as a sentinel value for port_id, this way the value is kept unsigned and there are several checks against this specific value otherwise. so ETH(sdev) could be (PORT_ID(sdev) >= RTE_MAX_ETHPORTS ? NULL : &rte_eth_devices[PORT_ID(sdev)]) -- Gaëtan Rivet 6WIND ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data 2019-03-07 9:47 ` Gaëtan Rivet @ 2019-03-07 11:34 ` Raslan Darawsheh 2019-03-07 11:50 ` Gaëtan Rivet 0 siblings, 1 reply; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-07 11:34 UTC (permalink / raw) To: Gaëtan Rivet; +Cc: Thomas Monjalon, dev, stephen Hi, > -----Original Message----- > From: Gaëtan Rivet <gaetan.rivet@6wind.com> > Sent: Thursday, March 7, 2019 11:48 AM > To: Raslan Darawsheh <rasland@mellanox.com> > Cc: Thomas Monjalon <thomas@monjalon.net>; dev@dpdk.org; > stephen@networkplumber.org > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device > with shared data > > On Thu, Mar 07, 2019 at 08:43:01AM +0000, Raslan Darawsheh wrote: > > Hi, > > > > > -----Original Message----- > > > From: Thomas Monjalon <thomas@monjalon.net> > > > Sent: Wednesday, March 6, 2019 8:02 PM > > > To: Gaëtan Rivet <gaetan.rivet@6wind.com>; Raslan Darawsheh > > > <rasland@mellanox.com> > > > Cc: dev@dpdk.org; stephen@networkplumber.org > > > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local > > > sub-device with shared data > > > > > > 06/03/2019 11:46, Gaëtan Rivet: > > > > On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote: > > > > > 05/03/2019 18:38, Gaëtan Rivet: > > > > > > What happens when a primary process closes a device before a > > > secondary? > > > > > > Is the secondary unable to stop / close its own then? Isn't > > > > > > there some missing uninit? > > > > > > > > > > Is the secondary process supposed to do any closing? > > > > > The device management should be done only by the primary process. > > > > > > > > > > Note: anyway all this hotplug related code should be dropped > > > > > from failsafe to be replaced by EAL hotplug management. > > > > > > > > > > > > > I don't know, I've never used secondary process. > > > > However, cursory reading the code of rte_eth_dev_close(), I don't > > > > see a guard against calling it from a secondary process? > > > > > > Yes indeed, there is no guard. > > > That's something not clear in DPDK, previously we were attaching > > > some vdevs in secondary only. > > > > > > > Reading code like > > > > > > > > rte_free(dev->data->rx_queues); > > > > dev->data->rx_queues = NULL; > > > > > > > > within makes me think the issue has been seen at least once, where > > > > shared data is freed multiple times, so I guessed some secondary > > > > processes were calling it. Maybe they are not meant to, but what > > > > prevents them from being badly written? > > > > > > > > Also, given rte_dev_remove IPC call to transfer the order to the > > > > primary, it seems that at least secondary processes are expected > > > > to call > > > > rte_dev_remove() at some point? So are they only authorized to > > > > call > > > > rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? > > > > Is there a specific documentation detailing the design of > > > > secondary process and the related responsibilities in the lifetime of a > device? > > > > How are they synching their rte_eth_devices list if they are not > > > > calling rte_eth_dev_close(), ever? > > > > > > All these calls should be done in primary. > > > The IPC mechanism calls the attach/detach in secondary at EAL level. > > > The PMDs does the bridge between EAL device and ethdev port status. > > > But you are right, there can be a sync issue if closing an ethdev > > > port and not removing the EAL device. > > > This is a generic question about deciding whether we want all ethdev > > > ports to be synced in multi-process or not. > > > > > > In failsafe context, we are closing the EAL device and change the > > > state of the sub-device accordingly. So I think there is no issue. > > > > > > > > > This seems dangerous to me. Why not instead allocating a > > > > > > per-process slab of memory that would hold the relevant > > > > > > references and outlive the shared data (a per-process rte_eth_dev > private data...). > > > > > > > > > > Which data do you think should be allocated per process? > > > > > > > > > > > > > > > > > > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------] > > > > +--------------------------------------------------------------+ > > > > | +------------------+ +- rte_eth_devices[n] -+ | > > > > | |rte_eth_dev_data |<---------------+ data | | PRIMARY > > > > | | | +dev_priv-+ | | | > > > > | | dev_private +-->| | | | | > > > > | | ... | | | | | | > > > > | | port_id | | | | | | > > > > | | | | | | | | > > > > | | | | | | | | > > > > | | | | | +----------------------+ | > > > > | | | | | +- rte_eth_devices[n] -+ | > > > > | | | | | | | | SECONDARY > > > > | | | | | | | | > > > > | | | | | | | | > > > > | | | | | | | | > > > > | | | +---------+ | | | > > > > | | |<---------------+ data | | > > > > | +------------------+ +----------------------+ | > > > > +--------------------------------------------------------------+ > > > > > > > > Here port_id is used within fail-safe to get back to rte_eth_devices[n]. > > > > This disappears once a device is closed, as all shared space is zeroed. > > > > > > > > This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct, > > > > and at some point it is not anymore, once a sub-device has been closed. > > > > This seems dangerous. > > > > > > The state of the sub-device is changed. > > > I don't see any issue. > > > > > > > I was thinking initially that allocating a place where each sdev > > > > would store their rte_eth_devices / port_id back-reference could > > > > alleviate the issue, meaning that the fail-safe would not zero it > > > > on sdev_close(), and it would remain valid for the lifetime of a > > > > sub-device, so even when a sub-device is in DEV_PROBED state. > > > > > > > > But now that I think about it, it could probably be simpler: > > > > instead of using (ETH(sdev)->data->port_id) for the port_id of an > > > > sdev (meaning that it is dependent on the lifetime of the sdev, > > > > instead of the lifetime of the failsafe), the port-id itself > > > > should be stored in the sub_device structure. This structure will > > > > be available for the lifetime of the failsafe, and the port_id is correct > accross all processes. > > > > > > > > So PORT_ID(sdev) would be defined to something like > > > > (sdev->port_id), and > > > > ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would > > > > remain correct even once the primary has closed the sub-device. > > > > > > > > What do you think? Do you agree that the current state is > > > > dangerous, and do you think the solution would alleviate the > > > > issue? Maybe the concern is unfounded and the only issue is within > fs_dev_remove(). > > > > > > Yes it is only seen in fs_dev_remove(). > > > I discussed about your proposal with Raslan, and we agree we could > > > change from sub_device.data to sub_device.port_id, it may be more > future-proof. > > > > > > I have only one doubt: look at the macro in this patch: > > > > > > #define ETH(sdev) \ > > > ((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data- > > > >port_id]) > > > > > > The NULL check cannot be done with a port id. > > > I think it was needed to manage one case. Raslan? > > > > That's right since we need it for fs_tx_unsafe, to add a protection for > plugged out devices during TX. > > Ok, thanks for your insights Thomas and Raslan. Sorry about the rambling > above I needed to write down the stuff to think about it. > > You can use RTE_MAX_ETHPORTS as a sentinel value for port_id, this way the > value is kept unsigned and there are several checks against this specific value > otherwise. > > so ETH(sdev) could be > > (PORT_ID(sdev) >= RTE_MAX_ETHPORTS ? NULL : > &rte_eth_devices[PORT_ID(sdev)]) > But, this mean that you have to explicitly set the port id to RTE_MAX_ETH_PORTS in the sdev after fs_dev_remove and you shouldn't rely on the port ID anymore. > -- > Gaëtan Rivet > 6WIND Kindest regards Raslan Darawsheh ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data 2019-03-07 11:34 ` Raslan Darawsheh @ 2019-03-07 11:50 ` Gaëtan Rivet 0 siblings, 0 replies; 33+ messages in thread From: Gaëtan Rivet @ 2019-03-07 11:50 UTC (permalink / raw) To: Raslan Darawsheh; +Cc: Thomas Monjalon, dev, stephen On Thu, Mar 07, 2019 at 11:34:42AM +0000, Raslan Darawsheh wrote: > Hi, > > > -----Original Message----- > > From: Gaëtan Rivet <gaetan.rivet@6wind.com> > > Sent: Thursday, March 7, 2019 11:48 AM > > To: Raslan Darawsheh <rasland@mellanox.com> > > Cc: Thomas Monjalon <thomas@monjalon.net>; dev@dpdk.org; > > stephen@networkplumber.org > > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device > > with shared data > > > > On Thu, Mar 07, 2019 at 08:43:01AM +0000, Raslan Darawsheh wrote: > > > Hi, > > > > > > > -----Original Message----- > > > > From: Thomas Monjalon <thomas@monjalon.net> > > > > Sent: Wednesday, March 6, 2019 8:02 PM > > > > To: Gaëtan Rivet <gaetan.rivet@6wind.com>; Raslan Darawsheh > > > > <rasland@mellanox.com> > > > > Cc: dev@dpdk.org; stephen@networkplumber.org > > > > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local > > > > sub-device with shared data > > > > > > > > 06/03/2019 11:46, Gaëtan Rivet: > > > > > On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote: > > > > > > 05/03/2019 18:38, Gaëtan Rivet: > > > > > > > What happens when a primary process closes a device before a > > > > secondary? > > > > > > > Is the secondary unable to stop / close its own then? Isn't > > > > > > > there some missing uninit? > > > > > > > > > > > > Is the secondary process supposed to do any closing? > > > > > > The device management should be done only by the primary process. > > > > > > > > > > > > Note: anyway all this hotplug related code should be dropped > > > > > > from failsafe to be replaced by EAL hotplug management. > > > > > > > > > > > > > > > > I don't know, I've never used secondary process. > > > > > However, cursory reading the code of rte_eth_dev_close(), I don't > > > > > see a guard against calling it from a secondary process? > > > > > > > > Yes indeed, there is no guard. > > > > That's something not clear in DPDK, previously we were attaching > > > > some vdevs in secondary only. > > > > > > > > > Reading code like > > > > > > > > > > rte_free(dev->data->rx_queues); > > > > > dev->data->rx_queues = NULL; > > > > > > > > > > within makes me think the issue has been seen at least once, where > > > > > shared data is freed multiple times, so I guessed some secondary > > > > > processes were calling it. Maybe they are not meant to, but what > > > > > prevents them from being badly written? > > > > > > > > > > Also, given rte_dev_remove IPC call to transfer the order to the > > > > > primary, it seems that at least secondary processes are expected > > > > > to call > > > > > rte_dev_remove() at some point? So are they only authorized to > > > > > call > > > > > rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? > > > > > Is there a specific documentation detailing the design of > > > > > secondary process and the related responsibilities in the lifetime of a > > device? > > > > > How are they synching their rte_eth_devices list if they are not > > > > > calling rte_eth_dev_close(), ever? > > > > > > > > All these calls should be done in primary. > > > > The IPC mechanism calls the attach/detach in secondary at EAL level. > > > > The PMDs does the bridge between EAL device and ethdev port status. > > > > But you are right, there can be a sync issue if closing an ethdev > > > > port and not removing the EAL device. > > > > This is a generic question about deciding whether we want all ethdev > > > > ports to be synced in multi-process or not. > > > > > > > > In failsafe context, we are closing the EAL device and change the > > > > state of the sub-device accordingly. So I think there is no issue. > > > > > > > > > > > This seems dangerous to me. Why not instead allocating a > > > > > > > per-process slab of memory that would hold the relevant > > > > > > > references and outlive the shared data (a per-process rte_eth_dev > > private data...). > > > > > > > > > > > > Which data do you think should be allocated per process? > > > > > > > > > > > > > > > > > > > > > > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------] > > > > > +--------------------------------------------------------------+ > > > > > | +------------------+ +- rte_eth_devices[n] -+ | > > > > > | |rte_eth_dev_data |<---------------+ data | | PRIMARY > > > > > | | | +dev_priv-+ | | | > > > > > | | dev_private +-->| | | | | > > > > > | | ... | | | | | | > > > > > | | port_id | | | | | | > > > > > | | | | | | | | > > > > > | | | | | | | | > > > > > | | | | | +----------------------+ | > > > > > | | | | | +- rte_eth_devices[n] -+ | > > > > > | | | | | | | | SECONDARY > > > > > | | | | | | | | > > > > > | | | | | | | | > > > > > | | | | | | | | > > > > > | | | +---------+ | | | > > > > > | | |<---------------+ data | | > > > > > | +------------------+ +----------------------+ | > > > > > +--------------------------------------------------------------+ > > > > > > > > > > Here port_id is used within fail-safe to get back to rte_eth_devices[n]. > > > > > This disappears once a device is closed, as all shared space is zeroed. > > > > > > > > > > This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct, > > > > > and at some point it is not anymore, once a sub-device has been closed. > > > > > This seems dangerous. > > > > > > > > The state of the sub-device is changed. > > > > I don't see any issue. > > > > > > > > > I was thinking initially that allocating a place where each sdev > > > > > would store their rte_eth_devices / port_id back-reference could > > > > > alleviate the issue, meaning that the fail-safe would not zero it > > > > > on sdev_close(), and it would remain valid for the lifetime of a > > > > > sub-device, so even when a sub-device is in DEV_PROBED state. > > > > > > > > > > But now that I think about it, it could probably be simpler: > > > > > instead of using (ETH(sdev)->data->port_id) for the port_id of an > > > > > sdev (meaning that it is dependent on the lifetime of the sdev, > > > > > instead of the lifetime of the failsafe), the port-id itself > > > > > should be stored in the sub_device structure. This structure will > > > > > be available for the lifetime of the failsafe, and the port_id is correct > > accross all processes. > > > > > > > > > > So PORT_ID(sdev) would be defined to something like > > > > > (sdev->port_id), and > > > > > ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would > > > > > remain correct even once the primary has closed the sub-device. > > > > > > > > > > What do you think? Do you agree that the current state is > > > > > dangerous, and do you think the solution would alleviate the > > > > > issue? Maybe the concern is unfounded and the only issue is within > > fs_dev_remove(). > > > > > > > > Yes it is only seen in fs_dev_remove(). > > > > I discussed about your proposal with Raslan, and we agree we could > > > > change from sub_device.data to sub_device.port_id, it may be more > > future-proof. > > > > > > > > I have only one doubt: look at the macro in this patch: > > > > > > > > #define ETH(sdev) \ > > > > ((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data- > > > > >port_id]) > > > > > > > > The NULL check cannot be done with a port id. > > > > I think it was needed to manage one case. Raslan? > > > > > > That's right since we need it for fs_tx_unsafe, to add a protection for > > plugged out devices during TX. > > > > Ok, thanks for your insights Thomas and Raslan. Sorry about the rambling > > above I needed to write down the stuff to think about it. > > > > You can use RTE_MAX_ETHPORTS as a sentinel value for port_id, this way the > > value is kept unsigned and there are several checks against this specific value > > otherwise. > > > > so ETH(sdev) could be > > > > (PORT_ID(sdev) >= RTE_MAX_ETHPORTS ? NULL : > > &rte_eth_devices[PORT_ID(sdev)]) > > > But, this mean that you have to explicitly set the port id to RTE_MAX_ETH_PORTS in the sdev after fs_dev_remove and you shouldn't rely on the port ID anymore. > Yes, once a sub-device has completely finished its removal (from the fail-safe PoV), the fail-safe marks the sub-device as not used anymore. This seems correct. If the fail-safe used the sdev->data->port_id instead, it would return 0, which is a valid port_id that is probably still used by another port. > > -- > > > Gaëtan Rivet > > 6WIND > > Kindest regards > Raslan Darawsheh -- Gaëtan Rivet 6WIND ^ permalink raw reply [flat|nested] 33+ messages in thread
* [dpdk-dev] [PATCH v2 2/4] net/failsafe: change back-reference from sub-device 2019-03-05 9:52 [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh 2019-03-05 9:52 ` [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device " Raslan Darawsheh @ 2019-03-05 9:52 ` Raslan Darawsheh 2019-03-05 16:48 ` Gaëtan Rivet 2019-03-05 9:52 ` [dpdk-dev] [PATCH v2 4/4] net/failsafe: support secondary process Raslan Darawsheh ` (2 subsequent siblings) 4 siblings, 1 reply; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-05 9:52 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen In multiprocess context, the sub-device structure is shared between processes. The reference to the failsafe device was a per process pointer. It's changed to port id which is the same for all processes. Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> Signed-off-by: Thomas Monjalon <thomas@monjalon.net> --- v2: changed macro to an inline function --- drivers/net/failsafe/failsafe_eal.c | 2 +- drivers/net/failsafe/failsafe_ether.c | 15 ++++++++------- drivers/net/failsafe/failsafe_intr.c | 10 +++++----- drivers/net/failsafe/failsafe_private.h | 13 ++++++++++--- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c index 8a888b1..56d1669 100644 --- a/drivers/net/failsafe/failsafe_eal.c +++ b/drivers/net/failsafe/failsafe_eal.c @@ -114,7 +114,7 @@ fs_bus_init(struct rte_eth_dev *dev) } ETH(sdev) = &rte_eth_devices[pid]; SUB_ID(sdev) = i; - sdev->fs_dev = dev; + sdev->fs_port_id = dev->data->port_id; sdev->dev = ETH(sdev)->device; sdev->state = DEV_PROBED; } diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c index 1783165..d5b1488 100644 --- a/drivers/net/failsafe/failsafe_ether.c +++ b/drivers/net/failsafe/failsafe_ether.c @@ -298,7 +298,7 @@ fs_dev_remove(struct sub_device *sdev) break; } sdev->remove = 0; - failsafe_hotplug_alarm_install(sdev->fs_dev); + failsafe_hotplug_alarm_install(fsdev_from_subdev(sdev)); } static void @@ -318,8 +318,9 @@ fs_dev_stats_save(struct sub_device *sdev) WARN("Using latest snapshot taken before %"PRIu64" seconds.\n", (rte_rdtsc() - timestamp) / rte_get_tsc_hz()); } - failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator, - err ? &sdev->stats_snapshot.stats : &stats); + failsafe_stats_increment + (&PRIV(fsdev_from_subdev(sdev))->stats_accumulator, + err ? &sdev->stats_snapshot.stats : &stats); memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot)); } @@ -566,17 +567,17 @@ failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused, { struct sub_device *sdev = cb_arg; - fs_lock(sdev->fs_dev, 0); + fs_lock(fsdev_from_subdev(sdev), 0); /* Switch as soon as possible tx_dev. */ - fs_switch_dev(sdev->fs_dev, sdev); + fs_switch_dev(fsdev_from_subdev(sdev), sdev); /* Use safe bursts in any case. */ - failsafe_set_burst_fn(sdev->fs_dev, 1); + failsafe_set_burst_fn(fsdev_from_subdev(sdev), 1); /* * Async removal, the sub-PMD will try to unregister * the callback at the source of the current thread context. */ sdev->remove = 1; - fs_unlock(sdev->fs_dev, 0); + fs_unlock(fsdev_from_subdev(sdev), 0); return 0; } diff --git a/drivers/net/failsafe/failsafe_intr.c b/drivers/net/failsafe/failsafe_intr.c index 09aa3c6..d372d09 100644 --- a/drivers/net/failsafe/failsafe_intr.c +++ b/drivers/net/failsafe/failsafe_intr.c @@ -274,14 +274,14 @@ failsafe_eth_rx_intr_ctl_subdevice(struct sub_device *sdev, int op) int rc; int ret = 0; + fsdev = fsdev_from_subdev(sdev); if (sdev == NULL || (ETH(sdev) == NULL) || - sdev->fs_dev == NULL || (PRIV(sdev->fs_dev) == NULL)) { + fsdev == NULL || (PRIV(fsdev) == NULL)) { ERROR("Called with invalid arguments"); return -EINVAL; } dev = ETH(sdev); - fsdev = sdev->fs_dev; - epfd = PRIV(sdev->fs_dev)->rxp.efd; + epfd = PRIV(fsdev)->rxp.efd; pid = PORT_ID(sdev); if (epfd <= 0) { @@ -330,7 +330,7 @@ int failsafe_rx_intr_install_subdevice(struct sub_device *sdev) const struct rte_intr_conf *const intr_conf = Ð(sdev)->data->dev_conf.intr_conf; - fsdev = sdev->fs_dev; + fsdev = fsdev_from_subdev(sdev); rxq = (struct rxq **)fsdev->data->rx_queues; if (intr_conf->rxq == 0) return 0; @@ -368,7 +368,7 @@ void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev) struct rte_eth_dev *fsdev; struct rxq *fsrxq; - fsdev = sdev->fs_dev; + fsdev = fsdev_from_subdev(sdev); for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) { if (qid < fsdev->data->nb_rx_queues) { fsrxq = fsdev->data->rx_queues[qid]; diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h index 29dfd40..84e847f 100644 --- a/drivers/net/failsafe/failsafe_private.h +++ b/drivers/net/failsafe/failsafe_private.h @@ -117,7 +117,7 @@ struct sub_device { /* Others are retrieved through a file descriptor */ char *fd_str; /* fail-safe device backreference */ - struct rte_eth_dev *fs_dev; + uint16_t fs_port_id; /* shared between processes*/ /* flag calling for recollection */ volatile unsigned int remove:1; /* flow isolation state */ @@ -324,7 +324,8 @@ extern int failsafe_mac_from_arg; */ #define FS_ATOMIC_RX(s, i) \ rte_atomic64_read( \ - &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \ + &((struct rxq *) \ + (fsdev_from_subdev(s)->data->rx_queues[i]))->refcnt[(s)->sid] \ ) /** * s: (struct sub_device *) @@ -332,7 +333,8 @@ extern int failsafe_mac_from_arg; */ #define FS_ATOMIC_TX(s, i) \ rte_atomic64_read( \ - &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \ + &((struct txq *) \ + (fsdev_from_subdev(s)->data->tx_queues[i]))->refcnt[(s)->sid] \ ) #ifdef RTE_EXEC_ENV_BSDAPP @@ -379,6 +381,11 @@ fs_find_next(struct rte_eth_dev *dev, return &subs[sid]; } +static inline struct rte_eth_dev * +fsdev_from_subdev(struct sub_device *sdev) { + return &rte_eth_devices[sdev->fs_port_id]; +} + /* * Lock hot-plug mutex. * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism. -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/4] net/failsafe: change back-reference from sub-device 2019-03-05 9:52 ` [dpdk-dev] [PATCH v2 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh @ 2019-03-05 16:48 ` Gaëtan Rivet 2019-03-07 9:01 ` Raslan Darawsheh 0 siblings, 1 reply; 33+ messages in thread From: Gaëtan Rivet @ 2019-03-05 16:48 UTC (permalink / raw) To: Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen Beside the squash referenced in p1, On Tue, Mar 05, 2019 at 09:52:05AM +0000, Raslan Darawsheh wrote: > In multiprocess context, the sub-device structure is shared > between processes. The reference to the failsafe device was > a per process pointer. It's changed to port id which is the > same for all processes. > > Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> > Signed-off-by: Thomas Monjalon <thomas@monjalon.net> > --- > v2: changed macro to an inline function > --- > drivers/net/failsafe/failsafe_eal.c | 2 +- > drivers/net/failsafe/failsafe_ether.c | 15 ++++++++------- > drivers/net/failsafe/failsafe_intr.c | 10 +++++----- > drivers/net/failsafe/failsafe_private.h | 13 ++++++++++--- > 4 files changed, 24 insertions(+), 16 deletions(-) > > diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c > index 8a888b1..56d1669 100644 > --- a/drivers/net/failsafe/failsafe_eal.c > +++ b/drivers/net/failsafe/failsafe_eal.c > @@ -114,7 +114,7 @@ fs_bus_init(struct rte_eth_dev *dev) > } > ETH(sdev) = &rte_eth_devices[pid]; > SUB_ID(sdev) = i; > - sdev->fs_dev = dev; > + sdev->fs_port_id = dev->data->port_id; > sdev->dev = ETH(sdev)->device; > sdev->state = DEV_PROBED; > } > diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c > index 1783165..d5b1488 100644 > --- a/drivers/net/failsafe/failsafe_ether.c > +++ b/drivers/net/failsafe/failsafe_ether.c > @@ -298,7 +298,7 @@ fs_dev_remove(struct sub_device *sdev) > break; > } > sdev->remove = 0; > - failsafe_hotplug_alarm_install(sdev->fs_dev); > + failsafe_hotplug_alarm_install(fsdev_from_subdev(sdev)); > } > > static void > @@ -318,8 +318,9 @@ fs_dev_stats_save(struct sub_device *sdev) > WARN("Using latest snapshot taken before %"PRIu64" seconds.\n", > (rte_rdtsc() - timestamp) / rte_get_tsc_hz()); > } > - failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator, > - err ? &sdev->stats_snapshot.stats : &stats); > + failsafe_stats_increment > + (&PRIV(fsdev_from_subdev(sdev))->stats_accumulator, > + err ? &sdev->stats_snapshot.stats : &stats); > memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot)); > } > > @@ -566,17 +567,17 @@ failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused, > { > struct sub_device *sdev = cb_arg; > > - fs_lock(sdev->fs_dev, 0); > + fs_lock(fsdev_from_subdev(sdev), 0); > /* Switch as soon as possible tx_dev. */ > - fs_switch_dev(sdev->fs_dev, sdev); > + fs_switch_dev(fsdev_from_subdev(sdev), sdev); > /* Use safe bursts in any case. */ > - failsafe_set_burst_fn(sdev->fs_dev, 1); > + failsafe_set_burst_fn(fsdev_from_subdev(sdev), 1); > /* > * Async removal, the sub-PMD will try to unregister > * the callback at the source of the current thread context. > */ > sdev->remove = 1; > - fs_unlock(sdev->fs_dev, 0); > + fs_unlock(fsdev_from_subdev(sdev), 0); > return 0; > } > > diff --git a/drivers/net/failsafe/failsafe_intr.c b/drivers/net/failsafe/failsafe_intr.c > index 09aa3c6..d372d09 100644 > --- a/drivers/net/failsafe/failsafe_intr.c > +++ b/drivers/net/failsafe/failsafe_intr.c > @@ -274,14 +274,14 @@ failsafe_eth_rx_intr_ctl_subdevice(struct sub_device *sdev, int op) > int rc; > int ret = 0; > > + fsdev = fsdev_from_subdev(sdev); > if (sdev == NULL || (ETH(sdev) == NULL) || > - sdev->fs_dev == NULL || (PRIV(sdev->fs_dev) == NULL)) { > + fsdev == NULL || (PRIV(fsdev) == NULL)) { > ERROR("Called with invalid arguments"); > return -EINVAL; > } > dev = ETH(sdev); > - fsdev = sdev->fs_dev; > - epfd = PRIV(sdev->fs_dev)->rxp.efd; > + epfd = PRIV(fsdev)->rxp.efd; > pid = PORT_ID(sdev); > > if (epfd <= 0) { > @@ -330,7 +330,7 @@ int failsafe_rx_intr_install_subdevice(struct sub_device *sdev) > const struct rte_intr_conf *const intr_conf = > Ð(sdev)->data->dev_conf.intr_conf; > > - fsdev = sdev->fs_dev; > + fsdev = fsdev_from_subdev(sdev); > rxq = (struct rxq **)fsdev->data->rx_queues; > if (intr_conf->rxq == 0) > return 0; > @@ -368,7 +368,7 @@ void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev) > struct rte_eth_dev *fsdev; > struct rxq *fsrxq; > > - fsdev = sdev->fs_dev; > + fsdev = fsdev_from_subdev(sdev); > for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) { > if (qid < fsdev->data->nb_rx_queues) { > fsrxq = fsdev->data->rx_queues[qid]; > diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h > index 29dfd40..84e847f 100644 > --- a/drivers/net/failsafe/failsafe_private.h > +++ b/drivers/net/failsafe/failsafe_private.h > @@ -117,7 +117,7 @@ struct sub_device { > /* Others are retrieved through a file descriptor */ > char *fd_str; > /* fail-safe device backreference */ > - struct rte_eth_dev *fs_dev; > + uint16_t fs_port_id; /* shared between processes*/ > /* flag calling for recollection */ > volatile unsigned int remove:1; > /* flow isolation state */ > @@ -324,7 +324,8 @@ extern int failsafe_mac_from_arg; > */ > #define FS_ATOMIC_RX(s, i) \ > rte_atomic64_read( \ > - &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \ > + &((struct rxq *) \ > + (fsdev_from_subdev(s)->data->rx_queues[i]))->refcnt[(s)->sid] \ > ) > /** > * s: (struct sub_device *) > @@ -332,7 +333,8 @@ extern int failsafe_mac_from_arg; > */ > #define FS_ATOMIC_TX(s, i) \ > rte_atomic64_read( \ > - &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \ > + &((struct txq *) \ > + (fsdev_from_subdev(s)->data->tx_queues[i]))->refcnt[(s)->sid] \ > ) > > #ifdef RTE_EXEC_ENV_BSDAPP > @@ -379,6 +381,11 @@ fs_find_next(struct rte_eth_dev *dev, > return &subs[sid]; > } > > +static inline struct rte_eth_dev * > +fsdev_from_subdev(struct sub_device *sdev) { > + return &rte_eth_devices[sdev->fs_port_id]; > +} > + Using a static inline that already enforce parameter type makes the _from_subdev suffix redundant. I think fs_dev(struct sub_device *sdev); would be more readable in most of the changes above, while keeping with the fs_ prefix used in all static failsafe functions. > /* > * Lock hot-plug mutex. > * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism. > -- > 2.7.4 > -- Gaëtan Rivet 6WIND ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/4] net/failsafe: change back-reference from sub-device 2019-03-05 16:48 ` Gaëtan Rivet @ 2019-03-07 9:01 ` Raslan Darawsheh 2019-03-07 9:43 ` Gaëtan Rivet 0 siblings, 1 reply; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-07 9:01 UTC (permalink / raw) To: Gaëtan Rivet; +Cc: dev, Thomas Monjalon, stephen Hi, > -----Original Message----- > From: Gaëtan Rivet <gaetan.rivet@6wind.com> > Sent: Tuesday, March 5, 2019 6:49 PM > To: Raslan Darawsheh <rasland@mellanox.com> > Cc: dev@dpdk.org; Thomas Monjalon <thomas@monjalon.net>; > stephen@networkplumber.org > Subject: Re: [PATCH v2 2/4] net/failsafe: change back-reference from sub- > device > > Beside the squash referenced in p1, > > On Tue, Mar 05, 2019 at 09:52:05AM +0000, Raslan Darawsheh wrote: > > In multiprocess context, the sub-device structure is shared between > > processes. The reference to the failsafe device was a per process > > pointer. It's changed to port id which is the same for all processes. > > > > Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> > > Signed-off-by: Thomas Monjalon <thomas@monjalon.net> > > --- > > v2: changed macro to an inline function > > --- > > drivers/net/failsafe/failsafe_eal.c | 2 +- > > drivers/net/failsafe/failsafe_ether.c | 15 ++++++++------- > > drivers/net/failsafe/failsafe_intr.c | 10 +++++----- > > drivers/net/failsafe/failsafe_private.h | 13 ++++++++++--- > > 4 files changed, 24 insertions(+), 16 deletions(-) > > > > diff --git a/drivers/net/failsafe/failsafe_eal.c > > b/drivers/net/failsafe/failsafe_eal.c > > index 8a888b1..56d1669 100644 > > --- a/drivers/net/failsafe/failsafe_eal.c > > +++ b/drivers/net/failsafe/failsafe_eal.c > > @@ -114,7 +114,7 @@ fs_bus_init(struct rte_eth_dev *dev) > > } > > ETH(sdev) = &rte_eth_devices[pid]; > > SUB_ID(sdev) = i; > > - sdev->fs_dev = dev; > > + sdev->fs_port_id = dev->data->port_id; > > sdev->dev = ETH(sdev)->device; > > sdev->state = DEV_PROBED; > > } > > diff --git a/drivers/net/failsafe/failsafe_ether.c > > b/drivers/net/failsafe/failsafe_ether.c > > index 1783165..d5b1488 100644 > > --- a/drivers/net/failsafe/failsafe_ether.c > > +++ b/drivers/net/failsafe/failsafe_ether.c > > @@ -298,7 +298,7 @@ fs_dev_remove(struct sub_device *sdev) > > break; > > } > > sdev->remove = 0; > > - failsafe_hotplug_alarm_install(sdev->fs_dev); > > + failsafe_hotplug_alarm_install(fsdev_from_subdev(sdev)); > > } > > > > static void > > @@ -318,8 +318,9 @@ fs_dev_stats_save(struct sub_device *sdev) > > WARN("Using latest snapshot taken before > %"PRIu64" seconds.\n", > > (rte_rdtsc() - timestamp) / > rte_get_tsc_hz()); > > } > > - failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator, > > - err ? &sdev->stats_snapshot.stats : &stats); > > + failsafe_stats_increment > > + (&PRIV(fsdev_from_subdev(sdev))->stats_accumulator, > > + err ? &sdev->stats_snapshot.stats : &stats); > > memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot)); > } > > > > @@ -566,17 +567,17 @@ failsafe_eth_rmv_event_callback(uint16_t > port_id > > __rte_unused, { > > struct sub_device *sdev = cb_arg; > > > > - fs_lock(sdev->fs_dev, 0); > > + fs_lock(fsdev_from_subdev(sdev), 0); > > /* Switch as soon as possible tx_dev. */ > > - fs_switch_dev(sdev->fs_dev, sdev); > > + fs_switch_dev(fsdev_from_subdev(sdev), sdev); > > /* Use safe bursts in any case. */ > > - failsafe_set_burst_fn(sdev->fs_dev, 1); > > + failsafe_set_burst_fn(fsdev_from_subdev(sdev), 1); > > /* > > * Async removal, the sub-PMD will try to unregister > > * the callback at the source of the current thread context. > > */ > > sdev->remove = 1; > > - fs_unlock(sdev->fs_dev, 0); > > + fs_unlock(fsdev_from_subdev(sdev), 0); > > return 0; > > } > > > > diff --git a/drivers/net/failsafe/failsafe_intr.c > > b/drivers/net/failsafe/failsafe_intr.c > > index 09aa3c6..d372d09 100644 > > --- a/drivers/net/failsafe/failsafe_intr.c > > +++ b/drivers/net/failsafe/failsafe_intr.c > > @@ -274,14 +274,14 @@ failsafe_eth_rx_intr_ctl_subdevice(struct > sub_device *sdev, int op) > > int rc; > > int ret = 0; > > > > + fsdev = fsdev_from_subdev(sdev); > > if (sdev == NULL || (ETH(sdev) == NULL) || > > - sdev->fs_dev == NULL || (PRIV(sdev->fs_dev) == NULL)) { > > + fsdev == NULL || (PRIV(fsdev) == NULL)) { > > ERROR("Called with invalid arguments"); > > return -EINVAL; > > } > > dev = ETH(sdev); > > - fsdev = sdev->fs_dev; > > - epfd = PRIV(sdev->fs_dev)->rxp.efd; > > + epfd = PRIV(fsdev)->rxp.efd; > > pid = PORT_ID(sdev); > > > > if (epfd <= 0) { > > @@ -330,7 +330,7 @@ int failsafe_rx_intr_install_subdevice(struct > sub_device *sdev) > > const struct rte_intr_conf *const intr_conf = > > Ð(sdev)->data->dev_conf.intr_conf; > > > > - fsdev = sdev->fs_dev; > > + fsdev = fsdev_from_subdev(sdev); > > rxq = (struct rxq **)fsdev->data->rx_queues; > > if (intr_conf->rxq == 0) > > return 0; > > @@ -368,7 +368,7 @@ void failsafe_rx_intr_uninstall_subdevice(struct > sub_device *sdev) > > struct rte_eth_dev *fsdev; > > struct rxq *fsrxq; > > > > - fsdev = sdev->fs_dev; > > + fsdev = fsdev_from_subdev(sdev); > > for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) { > > if (qid < fsdev->data->nb_rx_queues) { > > fsrxq = fsdev->data->rx_queues[qid]; diff --git > > a/drivers/net/failsafe/failsafe_private.h > > b/drivers/net/failsafe/failsafe_private.h > > index 29dfd40..84e847f 100644 > > --- a/drivers/net/failsafe/failsafe_private.h > > +++ b/drivers/net/failsafe/failsafe_private.h > > @@ -117,7 +117,7 @@ struct sub_device { > > /* Others are retrieved through a file descriptor */ > > char *fd_str; > > /* fail-safe device backreference */ > > - struct rte_eth_dev *fs_dev; > > + uint16_t fs_port_id; /* shared between processes*/ > > /* flag calling for recollection */ > > volatile unsigned int remove:1; > > /* flow isolation state */ > > @@ -324,7 +324,8 @@ extern int failsafe_mac_from_arg; > > */ > > #define FS_ATOMIC_RX(s, i) \ > > rte_atomic64_read( \ > > - &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] > \ > > + &((struct rxq *) \ > > + (fsdev_from_subdev(s)->data->rx_queues[i]))->refcnt[(s)->sid] \ > > ) > > /** > > * s: (struct sub_device *) > > @@ -332,7 +333,8 @@ extern int failsafe_mac_from_arg; > > */ > > #define FS_ATOMIC_TX(s, i) \ > > rte_atomic64_read( \ > > - &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] > \ > > + &((struct txq *) \ > > + (fsdev_from_subdev(s)->data->tx_queues[i]))->refcnt[(s)->sid] \ > > ) > > > > #ifdef RTE_EXEC_ENV_BSDAPP > > @@ -379,6 +381,11 @@ fs_find_next(struct rte_eth_dev *dev, > > return &subs[sid]; > > } > > > > +static inline struct rte_eth_dev * > > +fsdev_from_subdev(struct sub_device *sdev) { > > + return &rte_eth_devices[sdev->fs_port_id]; > > +} > > + > > Using a static inline that already enforce parameter type makes the > _from_subdev suffix redundant. > > I think > > fs_dev(struct sub_device *sdev); > > would be more readable in most of the changes above, while keeping with > the fs_ prefix used in all static failsafe functions. > I think you are right if I'm only reading the prototype of the function/implementation. But, if I'm reading it from the function call, it would be easier to read/understand it. I would change the prefix to be fs_dev_from_subdev instead what do you think? > > /* > > * Lock hot-plug mutex. > > * is_alarm means that the caller is, for sure, the hot-plug alarm > mechanism. > > -- > > 2.7.4 > > > > -- > Gaëtan Rivet > 6WIND Kindest regards, Raslan Darawsheh ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/4] net/failsafe: change back-reference from sub-device 2019-03-07 9:01 ` Raslan Darawsheh @ 2019-03-07 9:43 ` Gaëtan Rivet 0 siblings, 0 replies; 33+ messages in thread From: Gaëtan Rivet @ 2019-03-07 9:43 UTC (permalink / raw) To: Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen Hello Raslan, On Thu, Mar 07, 2019 at 09:01:11AM +0000, Raslan Darawsheh wrote: > Hi, > > > -----Original Message----- > > From: Gaëtan Rivet <gaetan.rivet@6wind.com> > > Sent: Tuesday, March 5, 2019 6:49 PM > > To: Raslan Darawsheh <rasland@mellanox.com> > > Cc: dev@dpdk.org; Thomas Monjalon <thomas@monjalon.net>; > > stephen@networkplumber.org > > Subject: Re: [PATCH v2 2/4] net/failsafe: change back-reference from sub- > > device > > > > Beside the squash referenced in p1, > > > > On Tue, Mar 05, 2019 at 09:52:05AM +0000, Raslan Darawsheh wrote: > > > In multiprocess context, the sub-device structure is shared between > > > processes. The reference to the failsafe device was a per process > > > pointer. It's changed to port id which is the same for all processes. > > > > > > Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> > > > Signed-off-by: Thomas Monjalon <thomas@monjalon.net> > > > --- > > > v2: changed macro to an inline function > > > --- > > > drivers/net/failsafe/failsafe_eal.c | 2 +- > > > drivers/net/failsafe/failsafe_ether.c | 15 ++++++++------- > > > drivers/net/failsafe/failsafe_intr.c | 10 +++++----- > > > drivers/net/failsafe/failsafe_private.h | 13 ++++++++++--- > > > 4 files changed, 24 insertions(+), 16 deletions(-) > > > > > > diff --git a/drivers/net/failsafe/failsafe_eal.c > > > b/drivers/net/failsafe/failsafe_eal.c > > > index 8a888b1..56d1669 100644 > > > --- a/drivers/net/failsafe/failsafe_eal.c > > > +++ b/drivers/net/failsafe/failsafe_eal.c > > > @@ -114,7 +114,7 @@ fs_bus_init(struct rte_eth_dev *dev) > > > } > > > ETH(sdev) = &rte_eth_devices[pid]; > > > SUB_ID(sdev) = i; > > > - sdev->fs_dev = dev; > > > + sdev->fs_port_id = dev->data->port_id; > > > sdev->dev = ETH(sdev)->device; > > > sdev->state = DEV_PROBED; > > > } > > > diff --git a/drivers/net/failsafe/failsafe_ether.c > > > b/drivers/net/failsafe/failsafe_ether.c > > > index 1783165..d5b1488 100644 > > > --- a/drivers/net/failsafe/failsafe_ether.c > > > +++ b/drivers/net/failsafe/failsafe_ether.c > > > @@ -298,7 +298,7 @@ fs_dev_remove(struct sub_device *sdev) > > > break; > > > } > > > sdev->remove = 0; > > > - failsafe_hotplug_alarm_install(sdev->fs_dev); > > > + failsafe_hotplug_alarm_install(fsdev_from_subdev(sdev)); > > > } > > > > > > static void > > > @@ -318,8 +318,9 @@ fs_dev_stats_save(struct sub_device *sdev) > > > WARN("Using latest snapshot taken before > > %"PRIu64" seconds.\n", > > > (rte_rdtsc() - timestamp) / > > rte_get_tsc_hz()); > > > } > > > - failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator, > > > - err ? &sdev->stats_snapshot.stats : &stats); > > > + failsafe_stats_increment > > > + (&PRIV(fsdev_from_subdev(sdev))->stats_accumulator, > > > + err ? &sdev->stats_snapshot.stats : &stats); > > > memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot)); > > } > > > > > > @@ -566,17 +567,17 @@ failsafe_eth_rmv_event_callback(uint16_t > > port_id > > > __rte_unused, { > > > struct sub_device *sdev = cb_arg; > > > > > > - fs_lock(sdev->fs_dev, 0); > > > + fs_lock(fsdev_from_subdev(sdev), 0); > > > /* Switch as soon as possible tx_dev. */ > > > - fs_switch_dev(sdev->fs_dev, sdev); > > > + fs_switch_dev(fsdev_from_subdev(sdev), sdev); > > > /* Use safe bursts in any case. */ > > > - failsafe_set_burst_fn(sdev->fs_dev, 1); > > > + failsafe_set_burst_fn(fsdev_from_subdev(sdev), 1); > > > /* > > > * Async removal, the sub-PMD will try to unregister > > > * the callback at the source of the current thread context. > > > */ > > > sdev->remove = 1; > > > - fs_unlock(sdev->fs_dev, 0); > > > + fs_unlock(fsdev_from_subdev(sdev), 0); > > > return 0; > > > } > > > > > > diff --git a/drivers/net/failsafe/failsafe_intr.c > > > b/drivers/net/failsafe/failsafe_intr.c > > > index 09aa3c6..d372d09 100644 > > > --- a/drivers/net/failsafe/failsafe_intr.c > > > +++ b/drivers/net/failsafe/failsafe_intr.c > > > @@ -274,14 +274,14 @@ failsafe_eth_rx_intr_ctl_subdevice(struct > > sub_device *sdev, int op) > > > int rc; > > > int ret = 0; > > > > > > + fsdev = fsdev_from_subdev(sdev); > > > if (sdev == NULL || (ETH(sdev) == NULL) || > > > - sdev->fs_dev == NULL || (PRIV(sdev->fs_dev) == NULL)) { > > > + fsdev == NULL || (PRIV(fsdev) == NULL)) { > > > ERROR("Called with invalid arguments"); > > > return -EINVAL; > > > } > > > dev = ETH(sdev); > > > - fsdev = sdev->fs_dev; > > > - epfd = PRIV(sdev->fs_dev)->rxp.efd; > > > + epfd = PRIV(fsdev)->rxp.efd; > > > pid = PORT_ID(sdev); > > > > > > if (epfd <= 0) { > > > @@ -330,7 +330,7 @@ int failsafe_rx_intr_install_subdevice(struct > > sub_device *sdev) > > > const struct rte_intr_conf *const intr_conf = > > > Ð(sdev)->data->dev_conf.intr_conf; > > > > > > - fsdev = sdev->fs_dev; > > > + fsdev = fsdev_from_subdev(sdev); > > > rxq = (struct rxq **)fsdev->data->rx_queues; > > > if (intr_conf->rxq == 0) > > > return 0; > > > @@ -368,7 +368,7 @@ void failsafe_rx_intr_uninstall_subdevice(struct > > sub_device *sdev) > > > struct rte_eth_dev *fsdev; > > > struct rxq *fsrxq; > > > > > > - fsdev = sdev->fs_dev; > > > + fsdev = fsdev_from_subdev(sdev); > > > for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) { > > > if (qid < fsdev->data->nb_rx_queues) { > > > fsrxq = fsdev->data->rx_queues[qid]; diff --git > > > a/drivers/net/failsafe/failsafe_private.h > > > b/drivers/net/failsafe/failsafe_private.h > > > index 29dfd40..84e847f 100644 > > > --- a/drivers/net/failsafe/failsafe_private.h > > > +++ b/drivers/net/failsafe/failsafe_private.h > > > @@ -117,7 +117,7 @@ struct sub_device { > > > /* Others are retrieved through a file descriptor */ > > > char *fd_str; > > > /* fail-safe device backreference */ > > > - struct rte_eth_dev *fs_dev; > > > + uint16_t fs_port_id; /* shared between processes*/ > > > /* flag calling for recollection */ > > > volatile unsigned int remove:1; > > > /* flow isolation state */ > > > @@ -324,7 +324,8 @@ extern int failsafe_mac_from_arg; > > > */ > > > #define FS_ATOMIC_RX(s, i) \ > > > rte_atomic64_read( \ > > > - &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] > > \ > > > + &((struct rxq *) \ > > > + (fsdev_from_subdev(s)->data->rx_queues[i]))->refcnt[(s)->sid] \ > > > ) > > > /** > > > * s: (struct sub_device *) > > > @@ -332,7 +333,8 @@ extern int failsafe_mac_from_arg; > > > */ > > > #define FS_ATOMIC_TX(s, i) \ > > > rte_atomic64_read( \ > > > - &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] > > \ > > > + &((struct txq *) \ > > > + (fsdev_from_subdev(s)->data->tx_queues[i]))->refcnt[(s)->sid] \ > > > ) > > > > > > #ifdef RTE_EXEC_ENV_BSDAPP > > > @@ -379,6 +381,11 @@ fs_find_next(struct rte_eth_dev *dev, > > > return &subs[sid]; > > > } > > > > > > +static inline struct rte_eth_dev * > > > +fsdev_from_subdev(struct sub_device *sdev) { > > > + return &rte_eth_devices[sdev->fs_port_id]; > > > +} > > > + > > > > Using a static inline that already enforce parameter type makes the > > _from_subdev suffix redundant. > > > > I think > > > > fs_dev(struct sub_device *sdev); > > > > would be more readable in most of the changes above, while keeping with > > the fs_ prefix used in all static failsafe functions. > > > > I think you are right if I'm only reading the prototype of the function/implementation. > But, if I'm reading it from the function call, it would be easier to read/understand it. > I would change the prefix to be fs_dev_from_subdev instead what do you think? > I prefer reading fs_dev(sdev) instead of fs{,_}dev_from_subdev(sdev), so I'd still vote for using fs_dev(); I think in the context of fail-safe it is clear enough that it gives a handle to the fail-safe device. > > > > /* > > > * Lock hot-plug mutex. > > > * is_alarm means that the caller is, for sure, the hot-plug alarm > > mechanism. > > > -- > > > 2.7.4 > > > > > > > -- > > Gaëtan Rivet > > 6WIND > > > Kindest regards, > Raslan Darawsheh -- Gaëtan Rivet 6WIND ^ permalink raw reply [flat|nested] 33+ messages in thread
* [dpdk-dev] [PATCH v2 4/4] net/failsafe: support secondary process 2019-03-05 9:52 [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh 2019-03-05 9:52 ` [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device " Raslan Darawsheh 2019-03-05 9:52 ` [dpdk-dev] [PATCH v2 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh @ 2019-03-05 9:52 ` Raslan Darawsheh 2019-03-05 16:43 ` [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data Gaëtan Rivet 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh 4 siblings, 0 replies; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-05 9:52 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen Add implementation for probe in secondary. Failsafe will attempt to attach all the sub-devices in secondary process. Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> Signed-off-by: Thomas Monjalon <thomas@monjalon.net> --- v2: changed devargs_alread_listed return value to be bool. --- drivers/net/failsafe/failsafe.c | 45 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c index 68926ca..c3f568b 100644 --- a/drivers/net/failsafe/failsafe.c +++ b/drivers/net/failsafe/failsafe.c @@ -3,6 +3,8 @@ * Copyright 2017 Mellanox Technologies, Ltd */ +#include <stdbool.h> + #include <rte_alarm.h> #include <rte_malloc.h> #include <rte_ethdev_driver.h> @@ -312,11 +314,28 @@ fs_rte_eth_free(const char *name) return ret; } +static bool +devargs_already_listed(struct rte_devargs *devargs) +{ + struct rte_devargs *list_da; + + RTE_EAL_DEVARGS_FOREACH(devargs->bus->name, list_da) { + if (strcmp(list_da->name, devargs->name) == 0) + /* devargs already in the list */ + return true; + } + return false; +} + static int rte_pmd_failsafe_probe(struct rte_vdev_device *vdev) { const char *name; struct rte_eth_dev *eth_dev; + struct sub_device *sdev; + struct rte_devargs devargs; + uint8_t i; + int ret; name = rte_vdev_device_name(vdev); INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s", @@ -329,9 +348,33 @@ rte_pmd_failsafe_probe(struct rte_vdev_device *vdev) ERROR("Failed to probe %s", name); return -1; } - /* TODO: request info from primary to set up Rx and Tx */ eth_dev->dev_ops = &failsafe_ops; eth_dev->device = &vdev->device; + eth_dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst; + eth_dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst; + /* + * Failsafe will attempt to probe all of its sub-devices. + * Any failure in sub-devices is not a fatal error. + * A sub-device can be plugged later. + */ + FOREACH_SUBDEV(sdev, i, eth_dev) { + /* rebuild devargs to be able to get the bus name. */ + ret = rte_devargs_parse(&devargs, + sdev->devargs.name); + if (ret != 0) { + ERROR("Failed to parse devargs %s", + devargs.name); + continue; + } + if (!devargs_already_listed(&devargs)) { + ret = rte_dev_probe(devargs.name); + if (ret != 0) { + ERROR("Failed to probe devargs %s", + devargs.name); + continue; + } + } + } rte_eth_dev_probing_finish(eth_dev); return 0; } -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data 2019-03-05 9:52 [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh ` (2 preceding siblings ...) 2019-03-05 9:52 ` [dpdk-dev] [PATCH v2 4/4] net/failsafe: support secondary process Raslan Darawsheh @ 2019-03-05 16:43 ` Gaëtan Rivet 2019-03-05 17:40 ` Gaëtan Rivet 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh 4 siblings, 1 reply; 33+ messages in thread From: Gaëtan Rivet @ 2019-03-05 16:43 UTC (permalink / raw) To: Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen Hello Raslan, Sorry for the delay. I have had a little trouble reading the patches. I think the 3 first should be squashed into a single one, it would be more coherent. I think I have seen a few points where doing so would have prevented some unnecessary changes for example, simplifying the series. (thinking about at least two PORT_ID() and a few ETH() removal that might have been prevented, I will try to point them all to you.) On Tue, Mar 05, 2019 at 09:52:04AM +0000, Raslan Darawsheh wrote: > In multiprocess context, the private structure is shared between > processes. The back reference from private to generic data was using > a pointer to a per process eth_dev. It's now changed to a reference of > the shared data. > > Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> > Signed-off-by: Thomas Monjalon <thomas@monjalon.net> > --- > drivers/net/failsafe/failsafe.c | 2 +- > drivers/net/failsafe/failsafe_intr.c | 10 +++++----- > drivers/net/failsafe/failsafe_ops.c | 4 ++-- > drivers/net/failsafe/failsafe_private.h | 6 +++++- > drivers/net/failsafe/failsafe_rxtx.c | 4 ++-- > 5 files changed, 15 insertions(+), 11 deletions(-) > Beside the squashing, this patch seems ok. -- Gaëtan Rivet 6WIND ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data 2019-03-05 16:43 ` [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data Gaëtan Rivet @ 2019-03-05 17:40 ` Gaëtan Rivet 2019-03-05 17:41 ` Thomas Monjalon 0 siblings, 1 reply; 33+ messages in thread From: Gaëtan Rivet @ 2019-03-05 17:40 UTC (permalink / raw) To: Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen On Tue, Mar 05, 2019 at 05:43:26PM +0100, Gaëtan Rivet wrote: > Hello Raslan, > > Sorry for the delay. > > I have had a little trouble reading the patches. I think the 3 first > should be squashed into a single one, it would be more coherent. > > I think I have seen a few points where doing so would have prevented > some unnecessary changes for example, simplifying the series. (thinking > about at least two PORT_ID() and a few ETH() removal that might have > been prevented, I will try to point them all to you.) > > On Tue, Mar 05, 2019 at 09:52:04AM +0000, Raslan Darawsheh wrote: > > In multiprocess context, the private structure is shared between > > processes. The back reference from private to generic data was using > > a pointer to a per process eth_dev. It's now changed to a reference of > > the shared data. > > > > Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> > > Signed-off-by: Thomas Monjalon <thomas@monjalon.net> > > --- > > drivers/net/failsafe/failsafe.c | 2 +- > > drivers/net/failsafe/failsafe_intr.c | 10 +++++----- > > drivers/net/failsafe/failsafe_ops.c | 4 ++-- > > drivers/net/failsafe/failsafe_private.h | 6 +++++- > > drivers/net/failsafe/failsafe_rxtx.c | 4 ++-- > > 5 files changed, 15 insertions(+), 11 deletions(-) > > > > Beside the squashing, this patch seems ok. Okay after reading a little more closely, it does not seem interesting to squash finaly, and it will be simpler for you to continue with your current series, so forget about that. -- Gaëtan Rivet 6WIND ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data 2019-03-05 17:40 ` Gaëtan Rivet @ 2019-03-05 17:41 ` Thomas Monjalon 0 siblings, 0 replies; 33+ messages in thread From: Thomas Monjalon @ 2019-03-05 17:41 UTC (permalink / raw) To: Gaëtan Rivet; +Cc: Raslan Darawsheh, dev, stephen 05/03/2019 18:40, Gaëtan Rivet: > On Tue, Mar 05, 2019 at 05:43:26PM +0100, Gaëtan Rivet wrote: > > I have had a little trouble reading the patches. I think the 3 first > > should be squashed into a single one, it would be more coherent. > > > > I think I have seen a few points where doing so would have prevented > > some unnecessary changes for example, simplifying the series. (thinking > > about at least two PORT_ID() and a few ETH() removal that might have > > been prevented, I will try to point them all to you.) > > [...] > > > > Beside the squashing, this patch seems ok. > > Okay after reading a little more closely, it does not seem interesting > to squash finaly, and it will be simpler for you to continue with your > current series, so forget about that. Yes, and it's easier to track changes when split as it is. ^ permalink raw reply [flat|nested] 33+ messages in thread
* [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe 2019-03-05 9:52 [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh ` (3 preceding siblings ...) 2019-03-05 16:43 ` [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data Gaëtan Rivet @ 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` Raslan Darawsheh ` (5 more replies) 4 siblings, 6 replies; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen this set of patches are intended to support secondary process for failsafe. v3 integrates changes after review from Gaetan There is no functional change. Raslan Darawsheh (4): net/failsafe: replace local device with shared data net/failsafe: change back-reference from sub-device net/failsafe: replace sub-device pointer with port id net/failsafe: support secondary process drivers/net/failsafe/failsafe.c | 53 +++++++++++++++++++++++++++++++-- drivers/net/failsafe/failsafe_eal.c | 4 +-- drivers/net/failsafe/failsafe_ether.c | 16 +++++----- drivers/net/failsafe/failsafe_intr.c | 20 ++++++------- drivers/net/failsafe/failsafe_ops.c | 4 +-- drivers/net/failsafe/failsafe_private.h | 38 +++++++++++++++-------- drivers/net/failsafe/failsafe_rxtx.c | 4 +-- 7 files changed, 102 insertions(+), 37 deletions(-) -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh @ 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh ` (4 subsequent siblings) 5 siblings, 0 replies; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen this set of patches are intended to support secondary process for failsafe. v3 integrates changes after review from Gaetan There is no functional change. Raslan Darawsheh (4): net/failsafe: replace local device with shared data net/failsafe: change back-reference from sub-device net/failsafe: replace sub-device pointer with port id net/failsafe: support secondary process drivers/net/failsafe/failsafe.c | 53 +++++++++++++++++++++++++++++++-- drivers/net/failsafe/failsafe_eal.c | 4 +-- drivers/net/failsafe/failsafe_ether.c | 16 +++++----- drivers/net/failsafe/failsafe_intr.c | 20 ++++++------- drivers/net/failsafe/failsafe_ops.c | 4 +-- drivers/net/failsafe/failsafe_private.h | 38 +++++++++++++++-------- drivers/net/failsafe/failsafe_rxtx.c | 4 +-- 7 files changed, 102 insertions(+), 37 deletions(-) -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* [dpdk-dev] [PATCH v3 1/4] net/failsafe: replace local device with shared data 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh 2019-03-18 16:05 ` Raslan Darawsheh @ 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh ` (3 subsequent siblings) 5 siblings, 1 reply; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen In multiprocess context, the private structure is shared between processes. The back reference from private to generic data was using a pointer to a per process eth_dev. It's now changed to a reference of the shared data. Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> Signed-off-by: Thomas Monjalon <thomas@monjalon.net> --- drivers/net/failsafe/failsafe.c | 2 +- drivers/net/failsafe/failsafe_intr.c | 10 +++++----- drivers/net/failsafe/failsafe_ops.c | 4 ++-- drivers/net/failsafe/failsafe_private.h | 6 +++++- drivers/net/failsafe/failsafe_rxtx.c | 4 ++-- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c index 06e859e..68926ca 100644 --- a/drivers/net/failsafe/failsafe.c +++ b/drivers/net/failsafe/failsafe.c @@ -181,7 +181,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev) return -1; } priv = PRIV(dev); - priv->dev = dev; + priv->data = dev->data; dev->dev_ops = &failsafe_ops; dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0]; dev->data->dev_link = eth_link; diff --git a/drivers/net/failsafe/failsafe_intr.c b/drivers/net/failsafe/failsafe_intr.c index 1c2cb71..09aa3c6 100644 --- a/drivers/net/failsafe/failsafe_intr.c +++ b/drivers/net/failsafe/failsafe_intr.c @@ -133,8 +133,8 @@ fs_rx_event_proxy_service_install(struct fs_priv *priv) /* prepare service info */ memset(&service, 0, sizeof(struct rte_service_spec)); snprintf(service.name, sizeof(service.name), "%s_Rx_service", - priv->dev->data->name); - service.socket_id = priv->dev->data->numa_node; + priv->data->name); + service.socket_id = priv->data->numa_node; service.callback = fs_rx_event_proxy_routine; service.callback_userdata = priv; @@ -437,7 +437,7 @@ fs_rx_intr_vec_install(struct fs_priv *priv) unsigned int count; struct rte_intr_handle *intr_handle; - rxqs_n = priv->dev->data->nb_rx_queues; + rxqs_n = priv->data->nb_rx_queues; n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); count = 0; intr_handle = &priv->intr_handle; @@ -452,7 +452,7 @@ fs_rx_intr_vec_install(struct fs_priv *priv) return -rte_errno; } for (i = 0; i < n; i++) { - struct rxq *rxq = priv->dev->data->rx_queues[i]; + struct rxq *rxq = priv->data->rx_queues[i]; /* Skip queues that cannot request interrupts. */ if (rxq == NULL || rxq->event_fd < 0) { @@ -521,7 +521,7 @@ failsafe_rx_intr_install(struct rte_eth_dev *dev) { struct fs_priv *priv = PRIV(dev); const struct rte_intr_conf *const intr_conf = - &priv->dev->data->dev_conf.intr_conf; + &priv->data->dev_conf.intr_conf; if (intr_conf->rxq == 0 || dev->intr_handle != NULL) return 0; diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c index e3add40..65957a2 100644 --- a/drivers/net/failsafe/failsafe_ops.c +++ b/drivers/net/failsafe/failsafe_ops.c @@ -452,7 +452,7 @@ fs_rx_queue_release(void *queue) if (queue == NULL) return; rxq = queue; - dev = rxq->priv->dev; + dev = &rte_eth_devices[rxq->priv->data->port_id]; fs_lock(dev, 0); if (rxq->event_fd > 0) close(rxq->event_fd); @@ -636,7 +636,7 @@ fs_tx_queue_release(void *queue) if (queue == NULL) return; txq = queue; - dev = txq->priv->dev; + dev = &rte_eth_devices[txq->priv->data->port_id]; fs_lock(dev, 0); FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { if (ETH(sdev)->data->tx_queues != NULL && diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h index 0dfea65..29dfd40 100644 --- a/drivers/net/failsafe/failsafe_private.h +++ b/drivers/net/failsafe/failsafe_private.h @@ -128,8 +128,12 @@ struct sub_device { unsigned int lsc_callback:1; }; +/* + * This is referenced by eth_dev->data->dev_private + * This is shared between processes. + */ struct fs_priv { - struct rte_eth_dev *dev; + struct rte_eth_dev_data *data; /* backreference to shared data. */ /* * Set of sub_devices. * subs[0] is the preferred device diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c index 034f47b..231c832 100644 --- a/drivers/net/failsafe/failsafe_rxtx.c +++ b/drivers/net/failsafe/failsafe_rxtx.c @@ -126,7 +126,7 @@ failsafe_tx_burst(void *queue, uint16_t nb_tx; txq = queue; - sdev = TX_SUBDEV(txq->priv->dev); + sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]); if (unlikely(fs_tx_unsafe(sdev))) return 0; sub_txq = ETH(sdev)->data->tx_queues[txq->qid]; @@ -147,7 +147,7 @@ failsafe_tx_burst_fast(void *queue, uint16_t nb_tx; txq = queue; - sdev = TX_SUBDEV(txq->priv->dev); + sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]); RTE_ASSERT(!fs_tx_unsafe(sdev)); sub_txq = ETH(sdev)->data->tx_queues[txq->qid]; FS_ATOMIC_P(txq->refcnt[sdev->sid]); -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* [dpdk-dev] [PATCH v3 1/4] net/failsafe: replace local device with shared data 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh @ 2019-03-18 16:05 ` Raslan Darawsheh 0 siblings, 0 replies; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen In multiprocess context, the private structure is shared between processes. The back reference from private to generic data was using a pointer to a per process eth_dev. It's now changed to a reference of the shared data. Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> Signed-off-by: Thomas Monjalon <thomas@monjalon.net> --- drivers/net/failsafe/failsafe.c | 2 +- drivers/net/failsafe/failsafe_intr.c | 10 +++++----- drivers/net/failsafe/failsafe_ops.c | 4 ++-- drivers/net/failsafe/failsafe_private.h | 6 +++++- drivers/net/failsafe/failsafe_rxtx.c | 4 ++-- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c index 06e859e..68926ca 100644 --- a/drivers/net/failsafe/failsafe.c +++ b/drivers/net/failsafe/failsafe.c @@ -181,7 +181,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev) return -1; } priv = PRIV(dev); - priv->dev = dev; + priv->data = dev->data; dev->dev_ops = &failsafe_ops; dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0]; dev->data->dev_link = eth_link; diff --git a/drivers/net/failsafe/failsafe_intr.c b/drivers/net/failsafe/failsafe_intr.c index 1c2cb71..09aa3c6 100644 --- a/drivers/net/failsafe/failsafe_intr.c +++ b/drivers/net/failsafe/failsafe_intr.c @@ -133,8 +133,8 @@ fs_rx_event_proxy_service_install(struct fs_priv *priv) /* prepare service info */ memset(&service, 0, sizeof(struct rte_service_spec)); snprintf(service.name, sizeof(service.name), "%s_Rx_service", - priv->dev->data->name); - service.socket_id = priv->dev->data->numa_node; + priv->data->name); + service.socket_id = priv->data->numa_node; service.callback = fs_rx_event_proxy_routine; service.callback_userdata = priv; @@ -437,7 +437,7 @@ fs_rx_intr_vec_install(struct fs_priv *priv) unsigned int count; struct rte_intr_handle *intr_handle; - rxqs_n = priv->dev->data->nb_rx_queues; + rxqs_n = priv->data->nb_rx_queues; n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); count = 0; intr_handle = &priv->intr_handle; @@ -452,7 +452,7 @@ fs_rx_intr_vec_install(struct fs_priv *priv) return -rte_errno; } for (i = 0; i < n; i++) { - struct rxq *rxq = priv->dev->data->rx_queues[i]; + struct rxq *rxq = priv->data->rx_queues[i]; /* Skip queues that cannot request interrupts. */ if (rxq == NULL || rxq->event_fd < 0) { @@ -521,7 +521,7 @@ failsafe_rx_intr_install(struct rte_eth_dev *dev) { struct fs_priv *priv = PRIV(dev); const struct rte_intr_conf *const intr_conf = - &priv->dev->data->dev_conf.intr_conf; + &priv->data->dev_conf.intr_conf; if (intr_conf->rxq == 0 || dev->intr_handle != NULL) return 0; diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c index e3add40..65957a2 100644 --- a/drivers/net/failsafe/failsafe_ops.c +++ b/drivers/net/failsafe/failsafe_ops.c @@ -452,7 +452,7 @@ fs_rx_queue_release(void *queue) if (queue == NULL) return; rxq = queue; - dev = rxq->priv->dev; + dev = &rte_eth_devices[rxq->priv->data->port_id]; fs_lock(dev, 0); if (rxq->event_fd > 0) close(rxq->event_fd); @@ -636,7 +636,7 @@ fs_tx_queue_release(void *queue) if (queue == NULL) return; txq = queue; - dev = txq->priv->dev; + dev = &rte_eth_devices[txq->priv->data->port_id]; fs_lock(dev, 0); FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { if (ETH(sdev)->data->tx_queues != NULL && diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h index 0dfea65..29dfd40 100644 --- a/drivers/net/failsafe/failsafe_private.h +++ b/drivers/net/failsafe/failsafe_private.h @@ -128,8 +128,12 @@ struct sub_device { unsigned int lsc_callback:1; }; +/* + * This is referenced by eth_dev->data->dev_private + * This is shared between processes. + */ struct fs_priv { - struct rte_eth_dev *dev; + struct rte_eth_dev_data *data; /* backreference to shared data. */ /* * Set of sub_devices. * subs[0] is the preferred device diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c index 034f47b..231c832 100644 --- a/drivers/net/failsafe/failsafe_rxtx.c +++ b/drivers/net/failsafe/failsafe_rxtx.c @@ -126,7 +126,7 @@ failsafe_tx_burst(void *queue, uint16_t nb_tx; txq = queue; - sdev = TX_SUBDEV(txq->priv->dev); + sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]); if (unlikely(fs_tx_unsafe(sdev))) return 0; sub_txq = ETH(sdev)->data->tx_queues[txq->qid]; @@ -147,7 +147,7 @@ failsafe_tx_burst_fast(void *queue, uint16_t nb_tx; txq = queue; - sdev = TX_SUBDEV(txq->priv->dev); + sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]); RTE_ASSERT(!fs_tx_unsafe(sdev)); sub_txq = ETH(sdev)->data->tx_queues[txq->qid]; FS_ATOMIC_P(txq->refcnt[sdev->sid]); -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* [dpdk-dev] [PATCH v3 2/4] net/failsafe: change back-reference from sub-device 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh @ 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 4/4] net/failsafe: support secondary process Raslan Darawsheh ` (2 subsequent siblings) 5 siblings, 1 reply; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen In multiprocess context, the sub-device structure is shared between processes. The reference to the failsafe device was a per process pointer. It's changed to port id which is the same for all processes. Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> Signed-off-by: Thomas Monjalon <thomas@monjalon.net> --- v2: changed macro to an inline function v3: changed prefix of inline function. --- drivers/net/failsafe/failsafe_eal.c | 2 +- drivers/net/failsafe/failsafe_ether.c | 15 ++++++++------- drivers/net/failsafe/failsafe_intr.c | 10 +++++----- drivers/net/failsafe/failsafe_private.h | 15 ++++++++++----- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c index 8a888b1..56d1669 100644 --- a/drivers/net/failsafe/failsafe_eal.c +++ b/drivers/net/failsafe/failsafe_eal.c @@ -114,7 +114,7 @@ fs_bus_init(struct rte_eth_dev *dev) } ETH(sdev) = &rte_eth_devices[pid]; SUB_ID(sdev) = i; - sdev->fs_dev = dev; + sdev->fs_port_id = dev->data->port_id; sdev->dev = ETH(sdev)->device; sdev->state = DEV_PROBED; } diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c index 1783165..7fa209a 100644 --- a/drivers/net/failsafe/failsafe_ether.c +++ b/drivers/net/failsafe/failsafe_ether.c @@ -298,7 +298,7 @@ fs_dev_remove(struct sub_device *sdev) break; } sdev->remove = 0; - failsafe_hotplug_alarm_install(sdev->fs_dev); + failsafe_hotplug_alarm_install(fs_dev(sdev)); } static void @@ -318,8 +318,9 @@ fs_dev_stats_save(struct sub_device *sdev) WARN("Using latest snapshot taken before %"PRIu64" seconds.\n", (rte_rdtsc() - timestamp) / rte_get_tsc_hz()); } - failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator, - err ? &sdev->stats_snapshot.stats : &stats); + failsafe_stats_increment + (&PRIV(fs_dev(sdev))->stats_accumulator, + err ? &sdev->stats_snapshot.stats : &stats); memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot)); } @@ -566,17 +567,17 @@ failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused, { struct sub_device *sdev = cb_arg; - fs_lock(sdev->fs_dev, 0); + fs_lock(fs_dev(sdev), 0); /* Switch as soon as possible tx_dev. */ - fs_switch_dev(sdev->fs_dev, sdev); + fs_switch_dev(fs_dev(sdev), sdev); /* Use safe bursts in any case. */ - failsafe_set_burst_fn(sdev->fs_dev, 1); + failsafe_set_burst_fn(fs_dev(sdev), 1); /* * Async removal, the sub-PMD will try to unregister * the callback at the source of the current thread context. */ sdev->remove = 1; - fs_unlock(sdev->fs_dev, 0); + fs_unlock(fs_dev(sdev), 0); return 0; } diff --git a/drivers/net/failsafe/failsafe_intr.c b/drivers/net/failsafe/failsafe_intr.c index 09aa3c6..0f34c5b 100644 --- a/drivers/net/failsafe/failsafe_intr.c +++ b/drivers/net/failsafe/failsafe_intr.c @@ -274,14 +274,14 @@ failsafe_eth_rx_intr_ctl_subdevice(struct sub_device *sdev, int op) int rc; int ret = 0; + fsdev = fs_dev(sdev); if (sdev == NULL || (ETH(sdev) == NULL) || - sdev->fs_dev == NULL || (PRIV(sdev->fs_dev) == NULL)) { + fsdev == NULL || (PRIV(fsdev) == NULL)) { ERROR("Called with invalid arguments"); return -EINVAL; } dev = ETH(sdev); - fsdev = sdev->fs_dev; - epfd = PRIV(sdev->fs_dev)->rxp.efd; + epfd = PRIV(fsdev)->rxp.efd; pid = PORT_ID(sdev); if (epfd <= 0) { @@ -330,7 +330,7 @@ int failsafe_rx_intr_install_subdevice(struct sub_device *sdev) const struct rte_intr_conf *const intr_conf = Ð(sdev)->data->dev_conf.intr_conf; - fsdev = sdev->fs_dev; + fsdev = fs_dev(sdev); rxq = (struct rxq **)fsdev->data->rx_queues; if (intr_conf->rxq == 0) return 0; @@ -368,7 +368,7 @@ void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev) struct rte_eth_dev *fsdev; struct rxq *fsrxq; - fsdev = sdev->fs_dev; + fsdev = fs_dev(sdev); for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) { if (qid < fsdev->data->nb_rx_queues) { fsrxq = fsdev->data->rx_queues[qid]; diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h index 29dfd40..af0c9d1 100644 --- a/drivers/net/failsafe/failsafe_private.h +++ b/drivers/net/failsafe/failsafe_private.h @@ -117,7 +117,7 @@ struct sub_device { /* Others are retrieved through a file descriptor */ char *fd_str; /* fail-safe device backreference */ - struct rte_eth_dev *fs_dev; + uint16_t fs_port_id; /* shared between processes */ /* flag calling for recollection */ volatile unsigned int remove:1; /* flow isolation state */ @@ -324,16 +324,16 @@ extern int failsafe_mac_from_arg; */ #define FS_ATOMIC_RX(s, i) \ rte_atomic64_read( \ - &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \ - ) + &((struct rxq *) \ + (fs_dev(s)->data->rx_queues[i]))->refcnt[(s)->sid]) /** * s: (struct sub_device *) * i: uint16_t qid */ #define FS_ATOMIC_TX(s, i) \ rte_atomic64_read( \ - &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \ - ) + &((struct txq *) \ + (fs_dev(s)->data->tx_queues[i]))->refcnt[(s)->sid]) #ifdef RTE_EXEC_ENV_BSDAPP #define FS_THREADID_TYPE void* @@ -379,6 +379,11 @@ fs_find_next(struct rte_eth_dev *dev, return &subs[sid]; } +static inline struct rte_eth_dev * +fs_dev(struct sub_device *sdev) { + return &rte_eth_devices[sdev->fs_port_id]; +} + /* * Lock hot-plug mutex. * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism. -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* [dpdk-dev] [PATCH v3 2/4] net/failsafe: change back-reference from sub-device 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh @ 2019-03-18 16:05 ` Raslan Darawsheh 0 siblings, 0 replies; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen In multiprocess context, the sub-device structure is shared between processes. The reference to the failsafe device was a per process pointer. It's changed to port id which is the same for all processes. Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> Signed-off-by: Thomas Monjalon <thomas@monjalon.net> --- v2: changed macro to an inline function v3: changed prefix of inline function. --- drivers/net/failsafe/failsafe_eal.c | 2 +- drivers/net/failsafe/failsafe_ether.c | 15 ++++++++------- drivers/net/failsafe/failsafe_intr.c | 10 +++++----- drivers/net/failsafe/failsafe_private.h | 15 ++++++++++----- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c index 8a888b1..56d1669 100644 --- a/drivers/net/failsafe/failsafe_eal.c +++ b/drivers/net/failsafe/failsafe_eal.c @@ -114,7 +114,7 @@ fs_bus_init(struct rte_eth_dev *dev) } ETH(sdev) = &rte_eth_devices[pid]; SUB_ID(sdev) = i; - sdev->fs_dev = dev; + sdev->fs_port_id = dev->data->port_id; sdev->dev = ETH(sdev)->device; sdev->state = DEV_PROBED; } diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c index 1783165..7fa209a 100644 --- a/drivers/net/failsafe/failsafe_ether.c +++ b/drivers/net/failsafe/failsafe_ether.c @@ -298,7 +298,7 @@ fs_dev_remove(struct sub_device *sdev) break; } sdev->remove = 0; - failsafe_hotplug_alarm_install(sdev->fs_dev); + failsafe_hotplug_alarm_install(fs_dev(sdev)); } static void @@ -318,8 +318,9 @@ fs_dev_stats_save(struct sub_device *sdev) WARN("Using latest snapshot taken before %"PRIu64" seconds.\n", (rte_rdtsc() - timestamp) / rte_get_tsc_hz()); } - failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator, - err ? &sdev->stats_snapshot.stats : &stats); + failsafe_stats_increment + (&PRIV(fs_dev(sdev))->stats_accumulator, + err ? &sdev->stats_snapshot.stats : &stats); memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot)); } @@ -566,17 +567,17 @@ failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused, { struct sub_device *sdev = cb_arg; - fs_lock(sdev->fs_dev, 0); + fs_lock(fs_dev(sdev), 0); /* Switch as soon as possible tx_dev. */ - fs_switch_dev(sdev->fs_dev, sdev); + fs_switch_dev(fs_dev(sdev), sdev); /* Use safe bursts in any case. */ - failsafe_set_burst_fn(sdev->fs_dev, 1); + failsafe_set_burst_fn(fs_dev(sdev), 1); /* * Async removal, the sub-PMD will try to unregister * the callback at the source of the current thread context. */ sdev->remove = 1; - fs_unlock(sdev->fs_dev, 0); + fs_unlock(fs_dev(sdev), 0); return 0; } diff --git a/drivers/net/failsafe/failsafe_intr.c b/drivers/net/failsafe/failsafe_intr.c index 09aa3c6..0f34c5b 100644 --- a/drivers/net/failsafe/failsafe_intr.c +++ b/drivers/net/failsafe/failsafe_intr.c @@ -274,14 +274,14 @@ failsafe_eth_rx_intr_ctl_subdevice(struct sub_device *sdev, int op) int rc; int ret = 0; + fsdev = fs_dev(sdev); if (sdev == NULL || (ETH(sdev) == NULL) || - sdev->fs_dev == NULL || (PRIV(sdev->fs_dev) == NULL)) { + fsdev == NULL || (PRIV(fsdev) == NULL)) { ERROR("Called with invalid arguments"); return -EINVAL; } dev = ETH(sdev); - fsdev = sdev->fs_dev; - epfd = PRIV(sdev->fs_dev)->rxp.efd; + epfd = PRIV(fsdev)->rxp.efd; pid = PORT_ID(sdev); if (epfd <= 0) { @@ -330,7 +330,7 @@ int failsafe_rx_intr_install_subdevice(struct sub_device *sdev) const struct rte_intr_conf *const intr_conf = Ð(sdev)->data->dev_conf.intr_conf; - fsdev = sdev->fs_dev; + fsdev = fs_dev(sdev); rxq = (struct rxq **)fsdev->data->rx_queues; if (intr_conf->rxq == 0) return 0; @@ -368,7 +368,7 @@ void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev) struct rte_eth_dev *fsdev; struct rxq *fsrxq; - fsdev = sdev->fs_dev; + fsdev = fs_dev(sdev); for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) { if (qid < fsdev->data->nb_rx_queues) { fsrxq = fsdev->data->rx_queues[qid]; diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h index 29dfd40..af0c9d1 100644 --- a/drivers/net/failsafe/failsafe_private.h +++ b/drivers/net/failsafe/failsafe_private.h @@ -117,7 +117,7 @@ struct sub_device { /* Others are retrieved through a file descriptor */ char *fd_str; /* fail-safe device backreference */ - struct rte_eth_dev *fs_dev; + uint16_t fs_port_id; /* shared between processes */ /* flag calling for recollection */ volatile unsigned int remove:1; /* flow isolation state */ @@ -324,16 +324,16 @@ extern int failsafe_mac_from_arg; */ #define FS_ATOMIC_RX(s, i) \ rte_atomic64_read( \ - &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \ - ) + &((struct rxq *) \ + (fs_dev(s)->data->rx_queues[i]))->refcnt[(s)->sid]) /** * s: (struct sub_device *) * i: uint16_t qid */ #define FS_ATOMIC_TX(s, i) \ rte_atomic64_read( \ - &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \ - ) + &((struct txq *) \ + (fs_dev(s)->data->tx_queues[i]))->refcnt[(s)->sid]) #ifdef RTE_EXEC_ENV_BSDAPP #define FS_THREADID_TYPE void* @@ -379,6 +379,11 @@ fs_find_next(struct rte_eth_dev *dev, return &subs[sid]; } +static inline struct rte_eth_dev * +fs_dev(struct sub_device *sdev) { + return &rte_eth_devices[sdev->fs_port_id]; +} + /* * Lock hot-plug mutex. * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism. -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* [dpdk-dev] [PATCH v3 4/4] net/failsafe: support secondary process 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh ` (2 preceding siblings ...) 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh @ 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 3/4] net/failsafe: replace sub-device pointer with port id Raslan Darawsheh 2019-03-18 16:16 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Gaëtan Rivet 5 siblings, 1 reply; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen Add implementation for probe in secondary. Failsafe will attempt to attach all the sub-devices in secondary process. Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> Signed-off-by: Thomas Monjalon <thomas@monjalon.net> --- v2: changed devargs_alread_listed return value to be bool. --- drivers/net/failsafe/failsafe.c | 45 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c index e53a89d..42dfaca 100644 --- a/drivers/net/failsafe/failsafe.c +++ b/drivers/net/failsafe/failsafe.c @@ -3,6 +3,8 @@ * Copyright 2017 Mellanox Technologies, Ltd */ +#include <stdbool.h> + #include <rte_alarm.h> #include <rte_malloc.h> #include <rte_ethdev_driver.h> @@ -318,11 +320,28 @@ fs_rte_eth_free(const char *name) return ret; } +static bool +devargs_already_listed(struct rte_devargs *devargs) +{ + struct rte_devargs *list_da; + + RTE_EAL_DEVARGS_FOREACH(devargs->bus->name, list_da) { + if (strcmp(list_da->name, devargs->name) == 0) + /* devargs already in the list */ + return true; + } + return false; +} + static int rte_pmd_failsafe_probe(struct rte_vdev_device *vdev) { const char *name; struct rte_eth_dev *eth_dev; + struct sub_device *sdev; + struct rte_devargs devargs; + uint8_t i; + int ret; name = rte_vdev_device_name(vdev); INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s", @@ -335,9 +354,33 @@ rte_pmd_failsafe_probe(struct rte_vdev_device *vdev) ERROR("Failed to probe %s", name); return -1; } - /* TODO: request info from primary to set up Rx and Tx */ eth_dev->dev_ops = &failsafe_ops; eth_dev->device = &vdev->device; + eth_dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst; + eth_dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst; + /* + * Failsafe will attempt to probe all of its sub-devices. + * Any failure in sub-devices is not a fatal error. + * A sub-device can be plugged later. + */ + FOREACH_SUBDEV(sdev, i, eth_dev) { + /* rebuild devargs to be able to get the bus name. */ + ret = rte_devargs_parse(&devargs, + sdev->devargs.name); + if (ret != 0) { + ERROR("Failed to parse devargs %s", + devargs.name); + continue; + } + if (!devargs_already_listed(&devargs)) { + ret = rte_dev_probe(devargs.name); + if (ret != 0) { + ERROR("Failed to probe devargs %s", + devargs.name); + continue; + } + } + } rte_eth_dev_probing_finish(eth_dev); return 0; } -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* [dpdk-dev] [PATCH v3 4/4] net/failsafe: support secondary process 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 4/4] net/failsafe: support secondary process Raslan Darawsheh @ 2019-03-18 16:05 ` Raslan Darawsheh 0 siblings, 0 replies; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen Add implementation for probe in secondary. Failsafe will attempt to attach all the sub-devices in secondary process. Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> Signed-off-by: Thomas Monjalon <thomas@monjalon.net> --- v2: changed devargs_alread_listed return value to be bool. --- drivers/net/failsafe/failsafe.c | 45 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c index e53a89d..42dfaca 100644 --- a/drivers/net/failsafe/failsafe.c +++ b/drivers/net/failsafe/failsafe.c @@ -3,6 +3,8 @@ * Copyright 2017 Mellanox Technologies, Ltd */ +#include <stdbool.h> + #include <rte_alarm.h> #include <rte_malloc.h> #include <rte_ethdev_driver.h> @@ -318,11 +320,28 @@ fs_rte_eth_free(const char *name) return ret; } +static bool +devargs_already_listed(struct rte_devargs *devargs) +{ + struct rte_devargs *list_da; + + RTE_EAL_DEVARGS_FOREACH(devargs->bus->name, list_da) { + if (strcmp(list_da->name, devargs->name) == 0) + /* devargs already in the list */ + return true; + } + return false; +} + static int rte_pmd_failsafe_probe(struct rte_vdev_device *vdev) { const char *name; struct rte_eth_dev *eth_dev; + struct sub_device *sdev; + struct rte_devargs devargs; + uint8_t i; + int ret; name = rte_vdev_device_name(vdev); INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s", @@ -335,9 +354,33 @@ rte_pmd_failsafe_probe(struct rte_vdev_device *vdev) ERROR("Failed to probe %s", name); return -1; } - /* TODO: request info from primary to set up Rx and Tx */ eth_dev->dev_ops = &failsafe_ops; eth_dev->device = &vdev->device; + eth_dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst; + eth_dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst; + /* + * Failsafe will attempt to probe all of its sub-devices. + * Any failure in sub-devices is not a fatal error. + * A sub-device can be plugged later. + */ + FOREACH_SUBDEV(sdev, i, eth_dev) { + /* rebuild devargs to be able to get the bus name. */ + ret = rte_devargs_parse(&devargs, + sdev->devargs.name); + if (ret != 0) { + ERROR("Failed to parse devargs %s", + devargs.name); + continue; + } + if (!devargs_already_listed(&devargs)) { + ret = rte_dev_probe(devargs.name); + if (ret != 0) { + ERROR("Failed to probe devargs %s", + devargs.name); + continue; + } + } + } rte_eth_dev_probing_finish(eth_dev); return 0; } -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* [dpdk-dev] [PATCH v3 3/4] net/failsafe: replace sub-device pointer with port id 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh ` (3 preceding siblings ...) 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 4/4] net/failsafe: support secondary process Raslan Darawsheh @ 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:16 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Gaëtan Rivet 5 siblings, 1 reply; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen In multiprocess context, the pointer to sub-device is shared between processes. Previously, it was a pointer to per process eth_dev so it's needed to replace this dependency. Signed-off-by: Thomas Monjalon <thomas@monjalon.net> Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> --- v2: - moved comment in fs_sdev about subs to this commit - added parenthesis around macro arguments. v3: - replaced shared data with port id for sub dev. - update comment on the sub_dev struct --- drivers/net/failsafe/failsafe.c | 6 ++++++ drivers/net/failsafe/failsafe_eal.c | 2 +- drivers/net/failsafe/failsafe_ether.c | 1 + drivers/net/failsafe/failsafe_private.h | 17 +++++++++++------ 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c index 68926ca..e53a89d 100644 --- a/drivers/net/failsafe/failsafe.c +++ b/drivers/net/failsafe/failsafe.c @@ -30,6 +30,8 @@ fs_sub_device_alloc(struct rte_eth_dev *dev, uint8_t nb_subs; int ret; int i; + struct sub_device *sdev; + uint8_t sdev_iterator; ret = failsafe_args_count_subdevice(dev, params); if (ret) @@ -51,6 +53,10 @@ fs_sub_device_alloc(struct rte_eth_dev *dev, for (i = 1; i < nb_subs; i++) PRIV(dev)->subs[i - 1].next = PRIV(dev)->subs + i; PRIV(dev)->subs[i - 1].next = PRIV(dev)->subs; + + FOREACH_SUBDEV(sdev, sdev_iterator, dev) { + sdev->sdev_port_id = RTE_MAX_ETHPORTS; + } return 0; } diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c index 56d1669..74fd8e9 100644 --- a/drivers/net/failsafe/failsafe_eal.c +++ b/drivers/net/failsafe/failsafe_eal.c @@ -112,7 +112,7 @@ fs_bus_init(struct rte_eth_dev *dev) continue; } } - ETH(sdev) = &rte_eth_devices[pid]; + sdev->sdev_port_id = pid; SUB_ID(sdev) = i; sdev->fs_port_id = dev->data->port_id; sdev->dev = ETH(sdev)->device; diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c index 7fa209a..7ac23d4 100644 --- a/drivers/net/failsafe/failsafe_ether.c +++ b/drivers/net/failsafe/failsafe_ether.c @@ -294,6 +294,7 @@ fs_dev_remove(struct sub_device *sdev) case DEV_PARSED: case DEV_UNDEFINED: sdev->state = DEV_UNDEFINED; + sdev->sdev_port_id = RTE_MAX_ETHPORTS; /* the end */ break; } diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h index af0c9d1..2a632d8 100644 --- a/drivers/net/failsafe/failsafe_private.h +++ b/drivers/net/failsafe/failsafe_private.h @@ -100,13 +100,15 @@ struct fs_stats { uint64_t timestamp; }; +/* + * Allocated in shared memory. + */ struct sub_device { /* Exhaustive DPDK device description */ struct sub_device *next; struct rte_devargs devargs; - struct rte_bus *bus; - struct rte_device *dev; - struct rte_eth_dev *edev; + struct rte_bus *bus; /* for primary process only. */ + struct rte_device *dev; /* for primary process only. */ uint8_t sid; /* Device state machine */ enum dev_state state; @@ -118,6 +120,8 @@ struct sub_device { char *fd_str; /* fail-safe device backreference */ uint16_t fs_port_id; /* shared between processes */ + /* sub device port id*/ + uint16_t sdev_port_id; /* shared between processes */ /* flag calling for recollection */ volatile unsigned int remove:1; /* flow isolation state */ @@ -139,7 +143,7 @@ struct fs_priv { * subs[0] is the preferred device * any other is just another slave */ - struct sub_device *subs; + struct sub_device *subs; /* shared between processes */ uint8_t subs_head; /* if head == tail, no subs */ uint8_t subs_tail; /* first invalid */ uint8_t subs_tx; /* current emitting device */ @@ -254,11 +258,12 @@ extern int failsafe_mac_from_arg; /* sdev: (struct sub_device *) */ #define ETH(sdev) \ - ((sdev)->edev) + ((sdev)->sdev_port_id == RTE_MAX_ETHPORTS ? \ + NULL : &rte_eth_devices[(sdev)->sdev_port_id]) /* sdev: (struct sub_device *) */ #define PORT_ID(sdev) \ - (ETH(sdev)->data->port_id) + ((sdev)->sdev_port_id) /* sdev: (struct sub_device *) */ #define SUB_ID(sdev) \ -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* [dpdk-dev] [PATCH v3 3/4] net/failsafe: replace sub-device pointer with port id 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 3/4] net/failsafe: replace sub-device pointer with port id Raslan Darawsheh @ 2019-03-18 16:05 ` Raslan Darawsheh 0 siblings, 0 replies; 33+ messages in thread From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw) To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen In multiprocess context, the pointer to sub-device is shared between processes. Previously, it was a pointer to per process eth_dev so it's needed to replace this dependency. Signed-off-by: Thomas Monjalon <thomas@monjalon.net> Signed-off-by: Raslan Darawsheh <rasland@mellanox.com> --- v2: - moved comment in fs_sdev about subs to this commit - added parenthesis around macro arguments. v3: - replaced shared data with port id for sub dev. - update comment on the sub_dev struct --- drivers/net/failsafe/failsafe.c | 6 ++++++ drivers/net/failsafe/failsafe_eal.c | 2 +- drivers/net/failsafe/failsafe_ether.c | 1 + drivers/net/failsafe/failsafe_private.h | 17 +++++++++++------ 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c index 68926ca..e53a89d 100644 --- a/drivers/net/failsafe/failsafe.c +++ b/drivers/net/failsafe/failsafe.c @@ -30,6 +30,8 @@ fs_sub_device_alloc(struct rte_eth_dev *dev, uint8_t nb_subs; int ret; int i; + struct sub_device *sdev; + uint8_t sdev_iterator; ret = failsafe_args_count_subdevice(dev, params); if (ret) @@ -51,6 +53,10 @@ fs_sub_device_alloc(struct rte_eth_dev *dev, for (i = 1; i < nb_subs; i++) PRIV(dev)->subs[i - 1].next = PRIV(dev)->subs + i; PRIV(dev)->subs[i - 1].next = PRIV(dev)->subs; + + FOREACH_SUBDEV(sdev, sdev_iterator, dev) { + sdev->sdev_port_id = RTE_MAX_ETHPORTS; + } return 0; } diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c index 56d1669..74fd8e9 100644 --- a/drivers/net/failsafe/failsafe_eal.c +++ b/drivers/net/failsafe/failsafe_eal.c @@ -112,7 +112,7 @@ fs_bus_init(struct rte_eth_dev *dev) continue; } } - ETH(sdev) = &rte_eth_devices[pid]; + sdev->sdev_port_id = pid; SUB_ID(sdev) = i; sdev->fs_port_id = dev->data->port_id; sdev->dev = ETH(sdev)->device; diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c index 7fa209a..7ac23d4 100644 --- a/drivers/net/failsafe/failsafe_ether.c +++ b/drivers/net/failsafe/failsafe_ether.c @@ -294,6 +294,7 @@ fs_dev_remove(struct sub_device *sdev) case DEV_PARSED: case DEV_UNDEFINED: sdev->state = DEV_UNDEFINED; + sdev->sdev_port_id = RTE_MAX_ETHPORTS; /* the end */ break; } diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h index af0c9d1..2a632d8 100644 --- a/drivers/net/failsafe/failsafe_private.h +++ b/drivers/net/failsafe/failsafe_private.h @@ -100,13 +100,15 @@ struct fs_stats { uint64_t timestamp; }; +/* + * Allocated in shared memory. + */ struct sub_device { /* Exhaustive DPDK device description */ struct sub_device *next; struct rte_devargs devargs; - struct rte_bus *bus; - struct rte_device *dev; - struct rte_eth_dev *edev; + struct rte_bus *bus; /* for primary process only. */ + struct rte_device *dev; /* for primary process only. */ uint8_t sid; /* Device state machine */ enum dev_state state; @@ -118,6 +120,8 @@ struct sub_device { char *fd_str; /* fail-safe device backreference */ uint16_t fs_port_id; /* shared between processes */ + /* sub device port id*/ + uint16_t sdev_port_id; /* shared between processes */ /* flag calling for recollection */ volatile unsigned int remove:1; /* flow isolation state */ @@ -139,7 +143,7 @@ struct fs_priv { * subs[0] is the preferred device * any other is just another slave */ - struct sub_device *subs; + struct sub_device *subs; /* shared between processes */ uint8_t subs_head; /* if head == tail, no subs */ uint8_t subs_tail; /* first invalid */ uint8_t subs_tx; /* current emitting device */ @@ -254,11 +258,12 @@ extern int failsafe_mac_from_arg; /* sdev: (struct sub_device *) */ #define ETH(sdev) \ - ((sdev)->edev) + ((sdev)->sdev_port_id == RTE_MAX_ETHPORTS ? \ + NULL : &rte_eth_devices[(sdev)->sdev_port_id]) /* sdev: (struct sub_device *) */ #define PORT_ID(sdev) \ - (ETH(sdev)->data->port_id) + ((sdev)->sdev_port_id) /* sdev: (struct sub_device *) */ #define SUB_ID(sdev) \ -- 2.7.4 ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh ` (4 preceding siblings ...) 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 3/4] net/failsafe: replace sub-device pointer with port id Raslan Darawsheh @ 2019-03-18 16:16 ` Gaëtan Rivet 2019-03-18 16:16 ` Gaëtan Rivet 2019-03-27 14:08 ` Ferruh Yigit 5 siblings, 2 replies; 33+ messages in thread From: Gaëtan Rivet @ 2019-03-18 16:16 UTC (permalink / raw) To: Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen Hello Raslan, It all seems okay, thank you for the reworks and the added features :). For the series: Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com> On Mon, Mar 18, 2019 at 04:05:24PM +0000, Raslan Darawsheh wrote: > this set of patches are intended to support secondary process for failsafe. > > v3 integrates changes after review from Gaetan > There is no functional change. > > Raslan Darawsheh (4): > net/failsafe: replace local device with shared data > net/failsafe: change back-reference from sub-device > net/failsafe: replace sub-device pointer with port id > net/failsafe: support secondary process > > drivers/net/failsafe/failsafe.c | 53 +++++++++++++++++++++++++++++++-- > drivers/net/failsafe/failsafe_eal.c | 4 +-- > drivers/net/failsafe/failsafe_ether.c | 16 +++++----- > drivers/net/failsafe/failsafe_intr.c | 20 ++++++------- > drivers/net/failsafe/failsafe_ops.c | 4 +-- > drivers/net/failsafe/failsafe_private.h | 38 +++++++++++++++-------- > drivers/net/failsafe/failsafe_rxtx.c | 4 +-- > 7 files changed, 102 insertions(+), 37 deletions(-) > > -- > 2.7.4 > -- Gaëtan Rivet 6WIND ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe 2019-03-18 16:16 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Gaëtan Rivet @ 2019-03-18 16:16 ` Gaëtan Rivet 2019-03-27 14:08 ` Ferruh Yigit 1 sibling, 0 replies; 33+ messages in thread From: Gaëtan Rivet @ 2019-03-18 16:16 UTC (permalink / raw) To: Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen [-- Warning: decoded text below may be mangled, UTF-8 assumed --] [-- Attachment #1: Type: text/plain; charset="UTF-8", Size: 1156 bytes --] Hello Raslan, It all seems okay, thank you for the reworks and the added features :). For the series: Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com> On Mon, Mar 18, 2019 at 04:05:24PM +0000, Raslan Darawsheh wrote: > this set of patches are intended to support secondary process for failsafe. > > v3 integrates changes after review from Gaetan > There is no functional change. > > Raslan Darawsheh (4): > net/failsafe: replace local device with shared data > net/failsafe: change back-reference from sub-device > net/failsafe: replace sub-device pointer with port id > net/failsafe: support secondary process > > drivers/net/failsafe/failsafe.c | 53 +++++++++++++++++++++++++++++++-- > drivers/net/failsafe/failsafe_eal.c | 4 +-- > drivers/net/failsafe/failsafe_ether.c | 16 +++++----- > drivers/net/failsafe/failsafe_intr.c | 20 ++++++------- > drivers/net/failsafe/failsafe_ops.c | 4 +-- > drivers/net/failsafe/failsafe_private.h | 38 +++++++++++++++-------- > drivers/net/failsafe/failsafe_rxtx.c | 4 +-- > 7 files changed, 102 insertions(+), 37 deletions(-) > > -- > 2.7.4 > -- Gaëtan Rivet 6WIND ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe 2019-03-18 16:16 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Gaëtan Rivet 2019-03-18 16:16 ` Gaëtan Rivet @ 2019-03-27 14:08 ` Ferruh Yigit 2019-03-27 14:08 ` Ferruh Yigit 1 sibling, 1 reply; 33+ messages in thread From: Ferruh Yigit @ 2019-03-27 14:08 UTC (permalink / raw) To: Gaëtan Rivet, Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen <...> > On Mon, Mar 18, 2019 at 04:05:24PM +0000, Raslan Darawsheh wrote: >> this set of patches are intended to support secondary process for failsafe. >> >> v3 integrates changes after review from Gaetan >> There is no functional change. >> >> Raslan Darawsheh (4): >> net/failsafe: replace local device with shared data >> net/failsafe: change back-reference from sub-device >> net/failsafe: replace sub-device pointer with port id >> net/failsafe: support secondary process >> > For the series: > Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com> Series applied to dpdk-next-net/master, thanks. ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe 2019-03-27 14:08 ` Ferruh Yigit @ 2019-03-27 14:08 ` Ferruh Yigit 0 siblings, 0 replies; 33+ messages in thread From: Ferruh Yigit @ 2019-03-27 14:08 UTC (permalink / raw) To: Gaëtan Rivet, Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen <...> > On Mon, Mar 18, 2019 at 04:05:24PM +0000, Raslan Darawsheh wrote: >> this set of patches are intended to support secondary process for failsafe. >> >> v3 integrates changes after review from Gaetan >> There is no functional change. >> >> Raslan Darawsheh (4): >> net/failsafe: replace local device with shared data >> net/failsafe: change back-reference from sub-device >> net/failsafe: replace sub-device pointer with port id >> net/failsafe: support secondary process >> > For the series: > Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com> Series applied to dpdk-next-net/master, thanks. ^ permalink raw reply [flat|nested] 33+ messages in thread
end of thread, other threads:[~2019-03-27 14:08 UTC | newest] Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2019-03-05 9:52 [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh 2019-03-05 9:52 ` [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device " Raslan Darawsheh 2019-03-05 9:59 ` Thomas Monjalon 2019-03-05 17:38 ` Gaëtan Rivet 2019-03-05 17:58 ` Thomas Monjalon 2019-03-06 10:46 ` Gaëtan Rivet 2019-03-06 18:02 ` Thomas Monjalon 2019-03-07 8:43 ` Raslan Darawsheh 2019-03-07 9:47 ` Gaëtan Rivet 2019-03-07 11:34 ` Raslan Darawsheh 2019-03-07 11:50 ` Gaëtan Rivet 2019-03-05 9:52 ` [dpdk-dev] [PATCH v2 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh 2019-03-05 16:48 ` Gaëtan Rivet 2019-03-07 9:01 ` Raslan Darawsheh 2019-03-07 9:43 ` Gaëtan Rivet 2019-03-05 9:52 ` [dpdk-dev] [PATCH v2 4/4] net/failsafe: support secondary process Raslan Darawsheh 2019-03-05 16:43 ` [dpdk-dev] [PATCH v2 1/4] net/failsafe: replace local device with shared data Gaëtan Rivet 2019-03-05 17:40 ` Gaëtan Rivet 2019-03-05 17:41 ` Thomas Monjalon 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 4/4] net/failsafe: support secondary process Raslan Darawsheh 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:05 ` [dpdk-dev] [PATCH v3 3/4] net/failsafe: replace sub-device pointer with port id Raslan Darawsheh 2019-03-18 16:05 ` Raslan Darawsheh 2019-03-18 16:16 ` [dpdk-dev] [PATCH v3 0/4] support secondary process for failsafe Gaëtan Rivet 2019-03-18 16:16 ` Gaëtan Rivet 2019-03-27 14:08 ` Ferruh Yigit 2019-03-27 14:08 ` Ferruh Yigit
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).