DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] net/failsafe: fix calling device during RMV events
@ 2017-09-09 19:27 Ophir Munk
  2017-09-11  8:31 ` Gaëtan Rivet
  0 siblings, 1 reply; 36+ messages in thread
From: Ophir Munk @ 2017-09-09 19:27 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: Adrien Mazarguil, dev, Thomas Monjalon, Olga Shern, stable

This commit prevents control path operations from failing after a sub
device has informed failsafe it has been removed.

Before this commit if a device was removed and then a control path
operations was initiated on failsafe - in some cases failsafe called the
sub device operation instead of avoiding it. Such cases could lead to
operations failures.

This commit fixes failsafe criteria to determine when the device is removed
such that it will avoid calling the sub device operations during that time
and will only call them otherwise.

Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
---
 drivers/net/failsafe/failsafe_ether.c |  1 +
 drivers/net/failsafe/failsafe_ops.c   | 52 +++++++++++++++++++++++++++++------
 2 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index a3a8cce..1def110 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -378,6 +378,7 @@
 				      i);
 				goto err_remove;
 			}
+			sdev->remove = 0;
 		}
 	}
 	/*
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index ff9ad15..314d53d 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -232,7 +232,6 @@
 			dev->data->dev_conf.intr_conf.lsc = 0;
 		}
 		DEBUG("Configuring sub-device %d", i);
-		sdev->remove = 0;
 		ret = rte_eth_dev_configure(PORT_ID(sdev),
 					dev->data->nb_rx_queues,
 					dev->data->nb_tx_queues,
@@ -311,6 +310,8 @@
 	int ret;
 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
 		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
 		if (ret) {
@@ -330,6 +331,8 @@
 	int ret;
 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
 		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
 		if (ret) {
@@ -517,8 +520,11 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		rte_eth_promiscuous_enable(PORT_ID(sdev));
+	}
 }
 
 static void
@@ -527,8 +533,11 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		rte_eth_promiscuous_disable(PORT_ID(sdev));
+	}
 }
 
 static void
@@ -537,8 +546,11 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		rte_eth_allmulticast_enable(PORT_ID(sdev));
+	}
 }
 
 static void
@@ -547,8 +559,11 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		rte_eth_allmulticast_disable(PORT_ID(sdev));
+	}
 }
 
 static int
@@ -560,6 +575,8 @@
 	int ret;
 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		DEBUG("Calling link_update on sub_device %d", i);
 		ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
 		if (ret && ret != -1) {
@@ -597,8 +614,11 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		rte_eth_stats_reset(PORT_ID(sdev));
+	}
 }
 
 /**
@@ -693,6 +713,8 @@
 	int ret;
 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
 		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
 		if (ret) {
@@ -712,6 +734,8 @@
 	int ret;
 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
 		ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
 		if (ret) {
@@ -746,6 +770,8 @@
 	int ret;
 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
 		ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
 		if (ret) {
@@ -766,9 +792,12 @@
 	/* No check: already done within the rte_eth_dev_mac_addr_remove
 	 * call for the fail-safe device.
 	 */
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
 				&dev->data->mac_addrs[index]);
+	}
 	PRIV(dev)->mac_addr_pool[index] = 0;
 }
 
@@ -784,6 +813,8 @@
 
 	RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
 		if (ret) {
 			ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
@@ -805,8 +836,11 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
+	}
 }
 
 static int
@@ -825,6 +859,8 @@
 		return 0;
 	}
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (sdev->remove)
+			continue;
 		DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
 		ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
 		if (ret) {
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH] net/failsafe: fix calling device during RMV events
  2017-09-09 19:27 [dpdk-dev] [PATCH] net/failsafe: fix calling device during RMV events Ophir Munk
@ 2017-09-11  8:31 ` Gaëtan Rivet
  2017-09-23 21:57   ` Ophir Munk
  0 siblings, 1 reply; 36+ messages in thread
From: Gaëtan Rivet @ 2017-09-11  8:31 UTC (permalink / raw)
  To: Ophir Munk; +Cc: Adrien Mazarguil, dev, Thomas Monjalon, Olga Shern, stable

Hi Ophir,

On Sat, Sep 09, 2017 at 07:27:11PM +0000, Ophir Munk wrote:
> This commit prevents control path operations from failing after a sub
> device has informed failsafe it has been removed.
> 
> Before this commit if a device was removed and then a control path

Here are the steps if I understood correctly:

0. The physical device is removed
1. The interrupt thread flags the device
2. A control lcore initiates a control operation
3. The alarm triggers, waking up the eal-intr-thread,
   initiating the actual device removal.
4. Race condition occurs between control lcore and interrupt thread.

"if a device was removed" is ambiguous I think (are we speaking about
the physical port? Is it only flagged? Is it after the removal of the
device itself?). From the context I gather that you mean the device is
flagged to be removed, but it won't be as clear in a few month when we
revisit this bug :) .

Could you please rephrase this so that the whole context of the issue
is available?

> operations was initiated on failsafe - in some cases failsafe called the
> sub device operation instead of avoiding it. Such cases could lead to
> operations failures.
> 
> This commit fixes failsafe criteria to determine when the device is removed
> such that it will avoid calling the sub device operations during that time
> and will only call them otherwise.
> 

This commit mitigates the race condition, reducing the probability for
it to have an effect. It does not, however, remove this race condition,
which is inherent to the DPDK architecture at the moment.

A proper fix, a more detailed workaround and additional documentation
warning users writing applications to mind their threads could be
interesting.

But let's focus on this patch for the time being.

> Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> ---
>  drivers/net/failsafe/failsafe_ether.c |  1 +
>  drivers/net/failsafe/failsafe_ops.c   | 52 +++++++++++++++++++++++++++++------
>  2 files changed, 45 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
> index a3a8cce..1def110 100644
> --- a/drivers/net/failsafe/failsafe_ether.c
> +++ b/drivers/net/failsafe/failsafe_ether.c
> @@ -378,6 +378,7 @@

Could you please generate your patches with the function name in the
diff?

>  				      i);
>  				goto err_remove;
>  			}
> +			sdev->remove = 0;

You are adding this here, within failsafe_eth_dev_state_sync,
and removing it from the dev_configure ops.

10 lines above, the call to dev_configure is done, meaning that the
remove flag was resetted at this point.

Can you explain why you prefer resetting the flag here?

The position of this flag reset will be dependent upon my subsequent
remarks anyway, so hold that thought :) .

>  		}
>  	}
>  	/*
> diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
> index ff9ad15..314d53d 100644
> --- a/drivers/net/failsafe/failsafe_ops.c
> +++ b/drivers/net/failsafe/failsafe_ops.c
> @@ -232,7 +232,6 @@
>  			dev->data->dev_conf.intr_conf.lsc = 0;
>  		}
>  		DEBUG("Configuring sub-device %d", i);
> -		sdev->remove = 0;
>  		ret = rte_eth_dev_configure(PORT_ID(sdev),
>  					dev->data->nb_rx_queues,
>  					dev->data->nb_tx_queues,
> @@ -311,6 +310,8 @@
>  	int ret;
>  
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> +		if (sdev->remove)
> +			continue;
>  		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
>  		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
>  		if (ret) {
> @@ -330,6 +331,8 @@
>  	int ret;
>  
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> +		if (sdev->remove)
> +			continue;

For this change and all the others:

I think it might be best to have this check added to fs_find_next
directly.

Most of the call to the iterators are done within dev_ops, so it makes
sense I think to have it there.

But then there'd be an issue with the sub-EAL iterations done on
previously-removed ports, as the removed flag is precisely resetted too
late. The function failsafe_dev_remove would also need to have a manual
iteration upon the sub-devices instead of using the macro.

I think you can actually reset this flag within fs_dev_remove, instead
of the next plug-in, then having this check within fs_find_next *should*
not be a problem.

I think you should break up those changes in two: first move the flag
reset to fs_dev_remove instead of fs_dev_configure, then add this check
to the iterator.

This way, a git bisect should allow us to pinpoint more easily any new bug
as both changes have the potential to introduce subtle ones.

>  		DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
>  		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
>  		if (ret) {
> @@ -517,8 +520,11 @@
>  	struct sub_device *sdev;
>  	uint8_t i;
>  
> -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
> +	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> +		if (sdev->remove)
> +			continue;
>  		rte_eth_promiscuous_enable(PORT_ID(sdev));
> +	}
>  }
>  
>  static void

<snip>

> -- 
> 1.8.3.1
> 

Thanks,
-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH] net/failsafe: fix calling device during RMV events
  2017-09-11  8:31 ` Gaëtan Rivet
@ 2017-09-23 21:57   ` Ophir Munk
  2017-10-05 22:42     ` [dpdk-dev] [PATCH v3] " Ophir Munk
  0 siblings, 1 reply; 36+ messages in thread
From: Ophir Munk @ 2017-09-23 21:57 UTC (permalink / raw)
  To: Gaetan Rivet
  Cc: Adrien Mazarguil, dev, Thomas Monjalon, Olga Shern, Ophir Munk, stable

This commit prevents control path operations from failing after a sub
device removal.

Following are the failure steps:
1. The physical device is removed due to change in one of PF parameters
(e.g. MTU)
2. The interrupt thread flags the device
3. Within 2 seconds Interrupt thread initializes the actual device removal,
then every 2 seconds it tries to re-sync (plug in) the device. The trials
fail as long as VF parameter mismatches the PF parameter.
4. A control thread initiates a control operation on failsafe which
initiates this operation on the device.
5. A race condition occurs between the control thread and interrupt thread
when accessing the device data structures.

This commit prevents the race condition in step 5. Before this commit if a
device was removed and then a control thread operation was initiated on
failsafe - in some cases failsafe called the sub device operation instead
of avoiding it. Such cases could lead to operations failures.

This commit fixes failsafe criteria to determine when the device is removed
such that it will avoid calling the sub device operations during that time
and will only call them otherwise.

Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
---
This is V2 patch is in reply to <20170911083117.GM21444@bidouze.vm.6wind.com>

 drivers/net/failsafe/failsafe_ether.c   |  1 +
 drivers/net/failsafe/failsafe_ops.c     | 31 +++++++++++++++----------------
 drivers/net/failsafe/failsafe_private.h | 26 +++++++++++++++++++++-----
 3 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index a3a8cce..1def110 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -378,6 +378,7 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
 				      i);
 				goto err_remove;
 			}
+			sdev->remove = 0;
 		}
 	}
 	/*
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index ff9ad15..721a48a 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -232,7 +232,6 @@ fs_dev_configure(struct rte_eth_dev *dev)
 			dev->data->dev_conf.intr_conf.lsc = 0;
 		}
 		DEBUG("Configuring sub-device %d", i);
-		sdev->remove = 0;
 		ret = rte_eth_dev_configure(PORT_ID(sdev),
 					dev->data->nb_rx_queues,
 					dev->data->nb_tx_queues,
@@ -310,7 +309,7 @@ fs_dev_set_link_up(struct rte_eth_dev *dev)
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev) {
 		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
 		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
 		if (ret) {
@@ -329,7 +328,7 @@ fs_dev_set_link_down(struct rte_eth_dev *dev)
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev) {
 		DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
 		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
 		if (ret) {
@@ -517,7 +516,7 @@ fs_promiscuous_enable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev)
 		rte_eth_promiscuous_enable(PORT_ID(sdev));
 }
 
@@ -527,7 +526,7 @@ fs_promiscuous_disable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev)
 		rte_eth_promiscuous_disable(PORT_ID(sdev));
 }
 
@@ -537,7 +536,7 @@ fs_allmulticast_enable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev)
 		rte_eth_allmulticast_enable(PORT_ID(sdev));
 }
 
@@ -547,7 +546,7 @@ fs_allmulticast_disable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev)
 		rte_eth_allmulticast_disable(PORT_ID(sdev));
 }
 
@@ -559,7 +558,7 @@ fs_link_update(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev) {
 		DEBUG("Calling link_update on sub_device %d", i);
 		ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
 		if (ret && ret != -1) {
@@ -597,7 +596,7 @@ fs_stats_reset(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev)
 		rte_eth_stats_reset(PORT_ID(sdev));
 }
 
@@ -692,7 +691,7 @@ fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev) {
 		DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
 		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
 		if (ret) {
@@ -711,7 +710,7 @@ fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev) {
 		DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
 		ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
 		if (ret) {
@@ -745,7 +744,7 @@ fs_flow_ctrl_set(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev) {
 		DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
 		ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
 		if (ret) {
@@ -766,7 +765,7 @@ fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
 	/* No check: already done within the rte_eth_dev_mac_addr_remove
 	 * call for the fail-safe device.
 	 */
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev)
 		rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
 				&dev->data->mac_addrs[index]);
 	PRIV(dev)->mac_addr_pool[index] = 0;
@@ -783,7 +782,7 @@ fs_mac_addr_add(struct rte_eth_dev *dev,
 	uint8_t i;
 
 	RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev) {
 		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
 		if (ret) {
 			ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
@@ -805,7 +804,7 @@ fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev)
 		rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
 }
 
@@ -824,7 +823,7 @@ fs_filter_ctrl(struct rte_eth_dev *dev,
 		*(const void **)arg = &fs_flow_ops;
 		return 0;
 	}
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE_SAFE(sdev, i, dev) {
 		DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
 		ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
 		if (ret) {
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 0361cf4..fda1606 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -221,9 +221,21 @@ extern int mac_from_arg;
  * state: (enum dev_state), minimum acceptable device state
  */
 #define FOREACH_SUBDEV_STATE(s, i, dev, state)				\
-	for (i = fs_find_next((dev), 0, state);				\
+	for (i = fs_find_next((dev), 0, state, 0);			\
 	     i < PRIV(dev)->subs_tail && (s = &PRIV(dev)->subs[i]);	\
-	     i = fs_find_next((dev), i + 1, state))
+	     i = fs_find_next((dev), i + 1, state, 0))
+
+/**
+ * Stateful iterator construct over fail-safe sub-devices
+ * in ACTIVE state and not removed due to RMV event
+ * s:     (struct sub_device *), iterator
+ * i:     (uint8_t), increment
+ * dev:   (struct rte_eth_dev *), fail-safe ethdev
+ */
+#define FOREACH_SUBDEV_ACTIVE_SAFE(s, i, dev)				\
+	for (i = fs_find_next((dev), 0, DEV_ACTIVE, 1);			\
+	     i < PRIV(dev)->subs_tail && (s = &PRIV(dev)->subs[i]);	\
+	     i = fs_find_next((dev), i + 1, DEV_ACTIVE, 1))
 
 /**
  * Iterator construct over fail-safe sub-devices:
@@ -296,11 +308,15 @@ extern int mac_from_arg;
 
 static inline uint8_t
 fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
-		enum dev_state min_state)
+		enum dev_state min_state, int check_remove)
 {
 	while (sid < PRIV(dev)->subs_tail) {
-		if (PRIV(dev)->subs[sid].state >= min_state)
-			break;
+		if (PRIV(dev)->subs[sid].state >= min_state) {
+			if (check_remove == 0)
+				break;
+			if (PRIV(dev)->subs[sid].remove == 0)
+				break;
+		}
 		sid++;
 	}
 	if (sid >= PRIV(dev)->subs_tail)
-- 
2.7.4

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v3] net/failsafe: fix calling device during RMV events
  2017-09-23 21:57   ` Ophir Munk
@ 2017-10-05 22:42     ` Ophir Munk
  2017-10-20 10:35       ` Gaëtan Rivet
  2018-02-08 12:20       ` [dpdk-dev] [PATCH v4 0/2] failsafe: " Matan Azrad
  0 siblings, 2 replies; 36+ messages in thread
From: Ophir Munk @ 2017-10-05 22:42 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev, Thomas Monjalon, Olga Shern, Ophir Munk, stable

This commit prevents control path operations from failing after a sub
device removal.

Following are the failure steps:
1. The physical device is removed due to change in one of PF parameters
(e.g. MTU)
2. The interrupt thread flags the device
3. Within 2 seconds Interrupt thread initializes the actual device removal,
then every 2 seconds it tries to re-sync (plug in) the device. The trials
fail as long as VF parameter mismatches the PF parameter.
4. A control thread initiates a control operation on failsafe which
initiates this operation on the device.
5. A race condition occurs between the control thread and interrupt thread
when accessing the device data structures.

This commit prevents the race condition in step 5. Before this commit if a
device was removed and then a control thread operation was initiated on
failsafe - in some cases failsafe called the sub device operation instead
of avoiding it. Such cases could lead to operations failures.

This commit fixes failsafe criteria to determine when the device is removed
such that it will avoid calling the sub device operations during that time
and will only call them otherwise.

Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
---
v3:
1. Rebase v2

2. Please ignore checkpatch checks on arguments re-usage - they are confirmed.
	CHECK:MACRO_ARG_REUSE: Macro argument reuse ... possible side-effects?
	#217: FILE: drivers/net/failsafe/failsafe_private.h:241:

3. Add rationales (copy from an email which accompanied v2):

On Monday, September 11, 2017 11:31 AM, Gaetan Rivet wrote:
> 
> Hi Ophir,
> 
> On Sat, Sep 09, 2017 at 07:27:11PM +0000, Ophir Munk wrote:
> > This commit prevents control path operations from failing after a 
> > sub device has informed failsafe it has been removed.
> >
> > Before this commit if a device was removed and then a control path
> 
> Here are the steps if I understood correctly:
> 
> 0. The physical device is removed
> 1. The interrupt thread flags the device 2. A control lcore initiates 
> a control operation 3. The alarm triggers, waking up the eal-intr-thread,
>    initiating the actual device removal.
> 4. Race condition occurs between control lcore and interrupt thread.
> 
> "if a device was removed" is ambiguous I think (are we speaking about 
> the physical port? Is it only flagged? Is it after the removal of the device itself?).
> From the context I gather that you mean the device is flagged to be 
> removed, but it won't be as clear in a few month when we revisit this bug :) .
> 
> Could you please rephrase this so that the whole context of the issue 
> is available?
> 

Done. Commit message was rephrased based on your comments 

> > operations was initiated on failsafe - in some cases failsafe called 
> > the sub device operation instead of avoiding it. Such cases could 
> > lead to operations failures.
> >
> > This commit fixes failsafe criteria to determine when the device is 
> > removed such that it will avoid calling the sub device operations 
> > during that time and will only call them otherwise.
> >
> 
> This commit mitigates the race condition, reducing the probability for 
> it to have an effect. It does not, however, remove this race 
> condition, which is inherent to the DPDK architecture at the moment.
> 
> A proper fix, a more detailed workaround and additional documentation 
> warning users writing applications to mind their threads could be interesting.
> 

The race condition occurs in the last step and may lead to segmentation faults (accessing data structures 
of the same device by 2 threads) The previous steps ("the physical device is removed", etc) were not 
recreated and tested but probably cannot lead to segmentation fault. 

> But let's focus on this patch for the time being.
> 
> > Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> > ---
> >  drivers/net/failsafe/failsafe_ether.c |  1 +
> >  drivers/net/failsafe/failsafe_ops.c   | 52
> +++++++++++++++++++++++++++++------
> >  2 files changed, 45 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/net/failsafe/failsafe_ether.c
> > b/drivers/net/failsafe/failsafe_ether.c
> > index a3a8cce..1def110 100644
> > --- a/drivers/net/failsafe/failsafe_ether.c
> > +++ b/drivers/net/failsafe/failsafe_ether.c
> > @@ -378,6 +378,7 @@
> 
> Could you please generate your patches with the function name in the diff?

Done 

> 
> >  				      i);
> >  				goto err_remove;
> >  			}
> > +			sdev->remove = 0;
> 
> You are adding this here, within failsafe_eth_dev_state_sync, and 
> removing it from the dev_configure ops.
> 
> 10 lines above, the call to dev_configure is done, meaning that the 
> remove flag was resetted at this point.
> 
> Can you explain why you prefer resetting the flag here?
> 
> The position of this flag reset will be dependent upon my subsequent 
> remarks anyway, so hold that thought :) .
> 

The motivation for resetting the "remove" flag within failsafe_eth_dev_state_sync is as follows:
Previously to this patch the "remove" flag was designed to signal the need to remove the sub device. 
Once the sub device was removed and before being reconfigured the "remove" flag was reset. 

After this patch the scope of the "remove" flag was *extended* to indicate the sub device status as 
being "plugged out" by resetting this flag only after a successful call to failsafe_eth_dev_state_sync(). 
The "plug out" status could last a very long time (seconds, minutes, days, weeks, ...).

Previously to this patch failsafe based the "plugged out" status on the sub device state as being below 
ACTIVE however every 2 seconds dev_configure() was called where the sub device was assigned sdev-
>state = DEV_ACTIVE; therefore the sub device state became ACTIVE for some time every 2 seconds. 
This is where the race condition occurred: failsafe considered the sub device as "Plugged in" for some 
time every 2 seconds (based on its ACTIVE state) while it was actually plugged out. 

After this patch the "Plugged out" status is based on the "remove" flag.

> >  		}
> >  	}
> >  	/*
> > diff --git a/drivers/net/failsafe/failsafe_ops.c
> > b/drivers/net/failsafe/failsafe_ops.c
> > index ff9ad15..314d53d 100644
> > --- a/drivers/net/failsafe/failsafe_ops.c
> > +++ b/drivers/net/failsafe/failsafe_ops.c
> > @@ -232,7 +232,6 @@
> >  			dev->data->dev_conf.intr_conf.lsc = 0;
> >  		}
> >  		DEBUG("Configuring sub-device %d", i);
> > -		sdev->remove = 0;
> >  		ret = rte_eth_dev_configure(PORT_ID(sdev),
> >  					dev->data->nb_rx_queues,
> >  					dev->data->nb_tx_queues,
> > @@ -311,6 +310,8 @@
> >  	int ret;
> >
> >  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> > +		if (sdev->remove)
> > +			continue;
> >  		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d",
> i);
> >  		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
> >  		if (ret) {
> > @@ -330,6 +331,8 @@
> >  	int ret;
> >
> >  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> > +		if (sdev->remove)
> > +			continue;
> 
> For this change and all the others:
> 
> I think it might be best to have this check added to fs_find_next directly.
> 
> Most of the call to the iterators are done within dev_ops, so it makes 
> sense I think to have it there.
> 
> But then there'd be an issue with the sub-EAL iterations done on 
> previously- removed ports, as the removed flag is precisely resetted 
> too late. The function failsafe_dev_remove would also need to have a 
> manual iteration upon the sub-devices instead of using the macro.
> 
> I think you can actually reset this flag within fs_dev_remove, instead 
> of the next plug-in, then having this check within fs_find_next 
> *should* not be a problem.
> 

With the new scope of "remove" flag (remaining set to 1 as long as the sub device is "plugged out" 
which may last for a very long time) we cannot reset it in fs_dev_remove which is called every 2 
seconds.

> I think you should break up those changes in two: first move the flag 
> reset to fs_dev_remove instead of fs_dev_configure, then add this 
> check to the iterator.
> 
> This way, a git bisect should allow us to pinpoint more easily any new 
> bug as both changes have the potential to introduce subtle ones.
> 

I suggest defining a new macro 

FOREACH_SUBDEV_ACTIVE(sdev, i, dev)  { ...

that will replace all cases of:

FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
		if (sdev->remove)
			continue;

In order to support the new macro I added a "check_remove" flag to fs_find_next (which is based on 
your idea above: "I think it might be best to have this check added to fs_find_next directly"). 

> >  		DEBUG("Calling rte_eth_dev_set_link_down on sub_device
> %d", i);
> >  		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
> >  		if (ret) {
> > @@ -517,8 +520,11 @@
> >  	struct sub_device *sdev;
> >  	uint8_t i;
> >
> > -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
> > +	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> > +		if (sdev->remove)
> > +			continue;
> >  		rte_eth_promiscuous_enable(PORT_ID(sdev));
> > +	}
> >  }
> >
> >  static void
> 
> <snip>
> 
> > --
> > 1.8.3.1
> >
> 
> Thanks,
> --
> Gaetan Rivet
> 6WIND

 drivers/net/failsafe/failsafe_ether.c   |  1 +
 drivers/net/failsafe/failsafe_ops.c     | 31 +++++++++++++++----------------
 drivers/net/failsafe/failsafe_private.h | 26 ++++++++++++++++++++++----
 3 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 0c0748f..42e9808 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -389,6 +389,7 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
 				      i);
 				goto err_remove;
 			}
+			sdev->remove = 0;
 		}
 	}
 	/*
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index e0f1b0b..b3cac40 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -232,7 +232,6 @@ fs_dev_configure(struct rte_eth_dev *dev)
 			dev->data->dev_conf.intr_conf.lsc = 0;
 		}
 		DEBUG("Configuring sub-device %d", i);
-		sdev->remove = 0;
 		ret = rte_eth_dev_configure(PORT_ID(sdev),
 					dev->data->nb_rx_queues,
 					dev->data->nb_tx_queues,
@@ -310,7 +309,7 @@ fs_dev_set_link_up(struct rte_eth_dev *dev)
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev) {
 		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
 		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
 		if (ret) {
@@ -329,7 +328,7 @@ fs_dev_set_link_down(struct rte_eth_dev *dev)
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev) {
 		DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
 		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
 		if (ret) {
@@ -517,7 +516,7 @@ fs_promiscuous_enable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev)
 		rte_eth_promiscuous_enable(PORT_ID(sdev));
 }
 
@@ -527,7 +526,7 @@ fs_promiscuous_disable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev)
 		rte_eth_promiscuous_disable(PORT_ID(sdev));
 }
 
@@ -537,7 +536,7 @@ fs_allmulticast_enable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev)
 		rte_eth_allmulticast_enable(PORT_ID(sdev));
 }
 
@@ -547,7 +546,7 @@ fs_allmulticast_disable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev)
 		rte_eth_allmulticast_disable(PORT_ID(sdev));
 }
 
@@ -559,7 +558,7 @@ fs_link_update(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev) {
 		DEBUG("Calling link_update on sub_device %d", i);
 		ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
 		if (ret && ret != -1) {
@@ -602,7 +601,7 @@ fs_stats_reset(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev) {
 		rte_eth_stats_reset(PORT_ID(sdev));
 		memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
 	}
@@ -700,7 +699,7 @@ fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev) {
 		DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
 		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
 		if (ret) {
@@ -719,7 +718,7 @@ fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev) {
 		DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
 		ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
 		if (ret) {
@@ -753,7 +752,7 @@ fs_flow_ctrl_set(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev) {
 		DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
 		ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
 		if (ret) {
@@ -774,7 +773,7 @@ fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
 	/* No check: already done within the rte_eth_dev_mac_addr_remove
 	 * call for the fail-safe device.
 	 */
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev)
 		rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
 				&dev->data->mac_addrs[index]);
 	PRIV(dev)->mac_addr_pool[index] = 0;
@@ -791,7 +790,7 @@ fs_mac_addr_add(struct rte_eth_dev *dev,
 	uint8_t i;
 
 	RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev) {
 		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
 		if (ret) {
 			ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
@@ -813,7 +812,7 @@ fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev)
 		rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
 }
 
@@ -832,7 +831,7 @@ fs_filter_ctrl(struct rte_eth_dev *dev,
 		*(const void **)arg = &fs_flow_ops;
 		return 0;
 	}
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_ACTIVE(sdev, i, dev) {
 		DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
 		ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
 		if (ret) {
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index d2d92af..03e1f58 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -225,10 +225,23 @@ extern int mac_from_arg;
  * dev:   (struct rte_eth_dev *), fail-safe ethdev
  * state: (enum dev_state), minimum acceptable device state
  */
+
 #define FOREACH_SUBDEV_STATE(s, i, dev, state)		\
-	for (s = fs_find_next((dev), 0, state, &i);	\
+	for (s = fs_find_next((dev), 0, state, 0, &i);	\
 	     s != NULL;					\
-	     s = fs_find_next((dev), i + 1, state, &i))
+	     s = fs_find_next((dev), i + 1, state, 0, &i))
+
+/**
+ * Stateful iterator construct over fail-safe sub-devices
+ * in ACTIVE state and not removed due to RMV event
+ * s:     (struct sub_device *), iterator
+ * i:     (uint8_t), increment
+ * dev:   (struct rte_eth_dev *), fail-safe ethdev
+ */
+#define FOREACH_SUBDEV_ACTIVE(s, i, dev)				\
+	for (s = fs_find_next((dev), 0, DEV_ACTIVE, 1, &i);	\
+	     s != NULL;						\
+	     s = fs_find_next((dev), i + 1, DEV_ACTIVE, 1, &i))
 
 /**
  * Iterator construct over fail-safe sub-devices:
@@ -303,6 +316,7 @@ static inline struct sub_device *
 fs_find_next(struct rte_eth_dev *dev,
 	     uint8_t sid,
 	     enum dev_state min_state,
+		 uint8_t check_remove,
 	     uint8_t *sid_out)
 {
 	struct sub_device *subs;
@@ -311,8 +325,12 @@ fs_find_next(struct rte_eth_dev *dev,
 	subs = PRIV(dev)->subs;
 	tail = PRIV(dev)->subs_tail;
 	while (sid < tail) {
-		if (subs[sid].state >= min_state)
-			break;
+		if (subs[sid].state >= min_state) {
+			if (check_remove == 0)
+				break;
+			if (PRIV(dev)->subs[sid].remove == 0)
+				break;
+		}
 		sid++;
 	}
 	*sid_out = sid;
-- 
2.7.4

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v3] net/failsafe: fix calling device during RMV events
  2017-10-05 22:42     ` [dpdk-dev] [PATCH v3] " Ophir Munk
@ 2017-10-20 10:35       ` Gaëtan Rivet
  2017-10-23  7:17         ` Ophir Munk
  2018-02-08 12:20       ` [dpdk-dev] [PATCH v4 0/2] failsafe: " Matan Azrad
  1 sibling, 1 reply; 36+ messages in thread
From: Gaëtan Rivet @ 2017-10-20 10:35 UTC (permalink / raw)
  To: Ophir Munk; +Cc: dev, Thomas Monjalon, Olga Shern, stable

Hi Ophir,

Sorry about the delay,
I have a few remarks, I think this patch could be simpler.

First, about the commit logline:
"calling device" is not descriptive enough. I'd suggest

    net/failsafe: fix device configuration during RMV events

But I'm not a native speaker either, so use it if you think it is
better, or don't, it's only a suggestion :).

On Thu, Oct 05, 2017 at 10:42:08PM +0000, Ophir Munk wrote:
> This commit prevents control path operations from failing after a sub
> device removal.
> 
> Following are the failure steps:
> 1. The physical device is removed due to change in one of PF parameters
> (e.g. MTU)
> 2. The interrupt thread flags the device
> 3. Within 2 seconds Interrupt thread initializes the actual device removal,
> then every 2 seconds it tries to re-sync (plug in) the device. The trials
> fail as long as VF parameter mismatches the PF parameter.
> 4. A control thread initiates a control operation on failsafe which
> initiates this operation on the device.
> 5. A race condition occurs between the control thread and interrupt thread
> when accessing the device data structures.
> 
> This commit prevents the race condition in step 5. Before this commit if a
> device was removed and then a control thread operation was initiated on
> failsafe - in some cases failsafe called the sub device operation instead
> of avoiding it. Such cases could lead to operations failures.
> 

This is a nitpick, but as said earlier, this is not preventing the race
condition. This race is still present and can still wreak havok on
unsuspecting users.

If an application has a weak threading model, it will be subject to this
race condition still. It is possible to prevent it fully with proper
care from the application standpoint, but this is not specific to
fail-safe and does not concern us here.

Anyway, it's really a nitpick, I just wanted to point it out. This is
not too important for this patch.

> This commit fixes failsafe criteria to determine when the device is removed
> such that it will avoid calling the sub device operations during that time
> and will only call them otherwise.
> 
> Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> ---
> v3:
> 1. Rebase v2
> 
> 2. Please ignore checkpatch checks on arguments re-usage - they are confirmed.
> 	CHECK:MACRO_ARG_REUSE: Macro argument reuse ... possible side-effects?
> 	#217: FILE: drivers/net/failsafe/failsafe_private.h:241:
> 
> 3. Add rationales (copy from an email which accompanied v2):
> 
> On Monday, September 11, 2017 11:31 AM, Gaetan Rivet wrote:
> > 
> > Hi Ophir,
> > 
> > On Sat, Sep 09, 2017 at 07:27:11PM +0000, Ophir Munk wrote:
> > > This commit prevents control path operations from failing after a 
> > > sub device has informed failsafe it has been removed.
> > >
> > > Before this commit if a device was removed and then a control path
> > 
> > Here are the steps if I understood correctly:
> > 
> > 0. The physical device is removed
> > 1. The interrupt thread flags the device 2. A control lcore initiates 
> > a control operation 3. The alarm triggers, waking up the eal-intr-thread,
> >    initiating the actual device removal.
> > 4. Race condition occurs between control lcore and interrupt thread.
> > 
> > "if a device was removed" is ambiguous I think (are we speaking about 
> > the physical port? Is it only flagged? Is it after the removal of the device itself?).
> > From the context I gather that you mean the device is flagged to be 
> > removed, but it won't be as clear in a few month when we revisit this bug :) .
> > 
> > Could you please rephrase this so that the whole context of the issue 
> > is available?
> > 
> 
> Done. Commit message was rephrased based on your comments 
> 
> > > operations was initiated on failsafe - in some cases failsafe called 
> > > the sub device operation instead of avoiding it. Such cases could 
> > > lead to operations failures.
> > >
> > > This commit fixes failsafe criteria to determine when the device is 
> > > removed such that it will avoid calling the sub device operations 
> > > during that time and will only call them otherwise.
> > >
> > 
> > This commit mitigates the race condition, reducing the probability for 
> > it to have an effect. It does not, however, remove this race 
> > condition, which is inherent to the DPDK architecture at the moment.
> > 
> > A proper fix, a more detailed workaround and additional documentation 
> > warning users writing applications to mind their threads could be interesting.
> > 
> 
> The race condition occurs in the last step and may lead to segmentation faults (accessing data structures 
> of the same device by 2 threads) The previous steps ("the physical device is removed", etc) were not 
> recreated and tested but probably cannot lead to segmentation fault. 
> 
> > But let's focus on this patch for the time being.
> > 
> > > Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
> > > Cc: stable@dpdk.org
> > >
> > > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> > > ---
> > >  drivers/net/failsafe/failsafe_ether.c |  1 +
> > >  drivers/net/failsafe/failsafe_ops.c   | 52
> > +++++++++++++++++++++++++++++------
> > >  2 files changed, 45 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/net/failsafe/failsafe_ether.c
> > > b/drivers/net/failsafe/failsafe_ether.c
> > > index a3a8cce..1def110 100644
> > > --- a/drivers/net/failsafe/failsafe_ether.c
> > > +++ b/drivers/net/failsafe/failsafe_ether.c
> > > @@ -378,6 +378,7 @@
> > 
> > Could you please generate your patches with the function name in the diff?
> 
> Done 
> 
> > 
> > >  				      i);
> > >  				goto err_remove;
> > >  			}
> > > +			sdev->remove = 0;
> > 
> > You are adding this here, within failsafe_eth_dev_state_sync, and 
> > removing it from the dev_configure ops.
> > 
> > 10 lines above, the call to dev_configure is done, meaning that the 
> > remove flag was resetted at this point.
> > 
> > Can you explain why you prefer resetting the flag here?
> > 
> > The position of this flag reset will be dependent upon my subsequent 
> > remarks anyway, so hold that thought :) .
> > 
> 
> The motivation for resetting the "remove" flag within failsafe_eth_dev_state_sync is as follows:
> Previously to this patch the "remove" flag was designed to signal the need to remove the sub device. 
> Once the sub device was removed and before being reconfigured the "remove" flag was reset. 
> 
> After this patch the scope of the "remove" flag was *extended* to indicate the sub device status as 
> being "plugged out" by resetting this flag only after a successful call to failsafe_eth_dev_state_sync(). 
> The "plug out" status could last a very long time (seconds, minutes, days, weeks, ...).
> 
> Previously to this patch failsafe based the "plugged out" status on the sub device state as being below 
> ACTIVE however every 2 seconds dev_configure() was called where the sub device was assigned sdev-
> >state = DEV_ACTIVE; therefore the sub device state became ACTIVE for some time every 2 seconds. 
> This is where the race condition occurred: failsafe considered the sub device as "Plugged in" for some 
> time every 2 seconds (based on its ACTIVE state) while it was actually plugged out. 
> 
> After this patch the "Plugged out" status is based on the "remove" flag.
> 

Sorry, I do not agree with this semantical change on the "remove" flag.
You are essentially adding a new device state, which could be fine per
se, but should not be done here.

The enum dev_state is there for this purpose.

The flag dev->remove, calls for an operation to be done upon the
concerned device. It is not meant to become a new device state.

A point about the work methodoly here: if you wanted to change this
semantic, which could be legitimate and sometimes called for, you should
have proposed it either during a discussion in a response to my previous
email, or introducing the change as a separate patch. This point is
important enough for it to have its own patch, meaning we would have a
whole thread dedicated to it instead of having to interleave
commentaries between related-but-separate diffs on the code.

But anyway, if you think you need to express a PLUGOUT state, I'd
suggest adding a state between DEV_UNDEFINED and DEV_PARSED.
DEV_UNDEFINED means that the device is in limbo and has no existence per
se (its parsing failed for example, it is not clear whether the
parameters are correct, etc...). DEV_PLUGOUT could mean then that the
device has been successfully probed at least once, meaning that it could
possibly have residuals from this probing still there, or specific care
to be taken when manipulating it.

However, I'm not yet convinced that this new state is necessary. I think
you can mitigate this race condition without having to add it. If you
insist in introducing this state, please do so in a separate patch, with
proper definition about the meaning of this state:

  + When it should be valid for a device to be in this state.
  + Which operation corresponds to getting into and out of this state.
  + Why this state is interesting and what could not be expressed before
    that is thus being fixed by introducing this state.

But please verify twice whether you absolutely need to complexify the
current fail-safe internals before going all in and basing your work
upon it :)

> > >  		}
> > >  	}
> > >  	/*
> > > diff --git a/drivers/net/failsafe/failsafe_ops.c
> > > b/drivers/net/failsafe/failsafe_ops.c
> > > index ff9ad15..314d53d 100644
> > > --- a/drivers/net/failsafe/failsafe_ops.c
> > > +++ b/drivers/net/failsafe/failsafe_ops.c
> > > @@ -232,7 +232,6 @@
> > >  			dev->data->dev_conf.intr_conf.lsc = 0;
> > >  		}
> > >  		DEBUG("Configuring sub-device %d", i);
> > > -		sdev->remove = 0;
> > >  		ret = rte_eth_dev_configure(PORT_ID(sdev),
> > >  					dev->data->nb_rx_queues,
> > >  					dev->data->nb_tx_queues,
> > > @@ -311,6 +310,8 @@
> > >  	int ret;
> > >
> > >  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> > > +		if (sdev->remove)
> > > +			continue;
> > >  		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d",
> > i);
> > >  		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
> > >  		if (ret) {
> > > @@ -330,6 +331,8 @@
> > >  	int ret;
> > >
> > >  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> > > +		if (sdev->remove)
> > > +			continue;
> > 
> > For this change and all the others:
> > 
> > I think it might be best to have this check added to fs_find_next directly.
> > 
> > Most of the call to the iterators are done within dev_ops, so it makes 
> > sense I think to have it there.
> > 
> > But then there'd be an issue with the sub-EAL iterations done on 
> > previously- removed ports, as the removed flag is precisely resetted 
> > too late. The function failsafe_dev_remove would also need to have a 
> > manual iteration upon the sub-devices instead of using the macro.
> > 
> > I think you can actually reset this flag within fs_dev_remove, instead 
> > of the next plug-in, then having this check within fs_find_next 
> > *should* not be a problem.
> > 
> 
> With the new scope of "remove" flag (remaining set to 1 as long as the sub device is "plugged out" 
> which may last for a very long time) we cannot reset it in fs_dev_remove which is called every 2 
> seconds.
> 

With the remove flag staying as it is, I think it should thus be
resetted within fs_dev_remove. Actually I think it both helps you write
you fix, and clarify the meaning and intended purpose of this flag.

> > I think you should break up those changes in two: first move the flag 
> > reset to fs_dev_remove instead of fs_dev_configure, then add this 
> > check to the iterator.
> > 

Please, do this fix this way. I think moving the dev->remove flag can
have subtile consequences, and I'd like to have a specific commit to
trace back which one is responsible.

> > This way, a git bisect should allow us to pinpoint more easily any new 
> > bug as both changes have the potential to introduce subtle ones.
> > 

Well, like I said :).

> 
> I suggest defining a new macro 
> 
> FOREACH_SUBDEV_ACTIVE(sdev, i, dev)  { ...
> 
> that will replace all cases of:
> 
> FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> 		if (sdev->remove)
> 			continue;
> 
> In order to support the new macro I added a "check_remove" flag to fs_find_next (which is based on 
> your idea above: "I think it might be best to have this check added to fs_find_next directly"). 
> 

I'd prefer avoiding multiplying the macros.
There are already two iterators. You add one, which now means that there
are two ways of iterating upon active devices: using you new macro, and
using the old one. The difference between the two would be difficult to
know, without profound knowledge of the rest of the code: that in one
place the flag is checked, and in the other it is not.

As such, I suggest you check in all cases that the flag is not set. This
simplifies the use of these macros and the conditions in which their use
is correct.

This means that you have to manually iterate in places where this flag
should be ignored. I listed these places in my previous email, but I may
have missed some, please be careful.

Thanks,
-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v3] net/failsafe: fix calling device during RMV events
  2017-10-20 10:35       ` Gaëtan Rivet
@ 2017-10-23  7:17         ` Ophir Munk
  2017-10-23  8:36           ` Gaëtan Rivet
  0 siblings, 1 reply; 36+ messages in thread
From: Ophir Munk @ 2017-10-23  7:17 UTC (permalink / raw)
  To: Gaëtan Rivet
  Cc: dev, Thomas Monjalon, Olga Shern, stable, Ophir Munk, Matan Azrad

Hi Gaetan,
Thanks for your quick reply. Please see comments inline.

Regards,
Ophir

> -----Original Message-----
> From: Gaëtan Rivet [mailto:gaetan.rivet@6wind.com]
> Sent: Friday, October 20, 2017 1:35 PM
> To: Ophir Munk <ophirmu@mellanox.com>
> Cc: dev@dpdk.org; Thomas Monjalon <thomas@monjalon.net>; Olga Shern
> <olgas@mellanox.com>; stable@dpdk.org
> Subject: Re: [PATCH v3] net/failsafe: fix calling device during RMV events
> 
> Hi Ophir,
> 
> Sorry about the delay,
> I have a few remarks, I think this patch could be simpler.
> 
> First, about the commit logline:
> "calling device" is not descriptive enough. I'd suggest
> 
>     net/failsafe: fix device configuration during RMV events
> 
> But I'm not a native speaker either, so use it if you think it is better, or don't,
> it's only a suggestion :).
> 
> On Thu, Oct 05, 2017 at 10:42:08PM +0000, Ophir Munk wrote:
> > This commit prevents control path operations from failing after a sub
> > device removal.
> >
> > Following are the failure steps:
> > 1. The physical device is removed due to change in one of PF
> > parameters (e.g. MTU) 2. The interrupt thread flags the device 3.
> > Within 2 seconds Interrupt thread initializes the actual device
> > removal, then every 2 seconds it tries to re-sync (plug in) the
> > device. The trials fail as long as VF parameter mismatches the PF
> parameter.
> > 4. A control thread initiates a control operation on failsafe which
> > initiates this operation on the device.
> > 5. A race condition occurs between the control thread and interrupt
> > thread when accessing the device data structures.
> >
> > This commit prevents the race condition in step 5. Before this commit
> > if a device was removed and then a control thread operation was
> > initiated on failsafe - in some cases failsafe called the sub device
> > operation instead of avoiding it. Such cases could lead to operations
> failures.
> >
> 
> This is a nitpick, but as said earlier, this is not preventing the race condition.
> This race is still present and can still wreak havok on unsuspecting users.
> 
> If an application has a weak threading model, it will be subject to this race
> condition still. It is possible to prevent it fully with proper care from the
> application standpoint, but this is not specific to fail-safe and does not
> concern us here.
> 
> Anyway, it's really a nitpick, I just wanted to point it out. This is not too
> important for this patch.
> 
> > This commit fixes failsafe criteria to determine when the device is
> > removed such that it will avoid calling the sub device operations
> > during that time and will only call them otherwise.
> >
> > Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> > ---
> > v3:
> > 1. Rebase v2
> >
> > 2. Please ignore checkpatch checks on arguments re-usage - they are
> confirmed.
> > 	CHECK:MACRO_ARG_REUSE: Macro argument reuse ... possible side-
> effects?
> > 	#217: FILE: drivers/net/failsafe/failsafe_private.h:241:
> >
> > 3. Add rationales (copy from an email which accompanied v2):
> >
> > On Monday, September 11, 2017 11:31 AM, Gaetan Rivet wrote:
> > >
> > > Hi Ophir,
> > >
> > > On Sat, Sep 09, 2017 at 07:27:11PM +0000, Ophir Munk wrote:
> > > > This commit prevents control path operations from failing after a
> > > > sub device has informed failsafe it has been removed.
> > > >
> > > > Before this commit if a device was removed and then a control path
> > >
> > > Here are the steps if I understood correctly:
> > >
> > > 0. The physical device is removed
> > > 1. The interrupt thread flags the device 2. A control lcore
> > > initiates a control operation 3. The alarm triggers, waking up the eal-intr-
> thread,
> > >    initiating the actual device removal.
> > > 4. Race condition occurs between control lcore and interrupt thread.
> > >
> > > "if a device was removed" is ambiguous I think (are we speaking
> > > about the physical port? Is it only flagged? Is it after the removal of the
> device itself?).
> > > From the context I gather that you mean the device is flagged to be
> > > removed, but it won't be as clear in a few month when we revisit this bug
> :) .
> > >
> > > Could you please rephrase this so that the whole context of the
> > > issue is available?
> > >
> >
> > Done. Commit message was rephrased based on your comments
> >
> > > > operations was initiated on failsafe - in some cases failsafe
> > > > called the sub device operation instead of avoiding it. Such cases
> > > > could lead to operations failures.
> > > >
> > > > This commit fixes failsafe criteria to determine when the device
> > > > is removed such that it will avoid calling the sub device
> > > > operations during that time and will only call them otherwise.
> > > >
> > >
> > > This commit mitigates the race condition, reducing the probability
> > > for it to have an effect. It does not, however, remove this race
> > > condition, which is inherent to the DPDK architecture at the moment.
> > >
> > > A proper fix, a more detailed workaround and additional
> > > documentation warning users writing applications to mind their threads
> could be interesting.
> > >
> >
> > The race condition occurs in the last step and may lead to
> > segmentation faults (accessing data structures of the same device by 2
> > threads) The previous steps ("the physical device is removed", etc) were not
> recreated and tested but probably cannot lead to segmentation fault.
> >
> > > But let's focus on this patch for the time being.
> > >
> > > > Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
> > > > Cc: stable@dpdk.org
> > > >
> > > > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> > > > ---
> > > >  drivers/net/failsafe/failsafe_ether.c |  1 +
> > > >  drivers/net/failsafe/failsafe_ops.c   | 52
> > > +++++++++++++++++++++++++++++------
> > > >  2 files changed, 45 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/drivers/net/failsafe/failsafe_ether.c
> > > > b/drivers/net/failsafe/failsafe_ether.c
> > > > index a3a8cce..1def110 100644
> > > > --- a/drivers/net/failsafe/failsafe_ether.c
> > > > +++ b/drivers/net/failsafe/failsafe_ether.c
> > > > @@ -378,6 +378,7 @@
> > >
> > > Could you please generate your patches with the function name in the
> diff?
> >
> > Done
> >
> > >
> > > >  				      i);
> > > >  				goto err_remove;
> > > >  			}
> > > > +			sdev->remove = 0;
> > >
> > > You are adding this here, within failsafe_eth_dev_state_sync, and
> > > removing it from the dev_configure ops.
> > >
> > > 10 lines above, the call to dev_configure is done, meaning that the
> > > remove flag was resetted at this point.
> > >
> > > Can you explain why you prefer resetting the flag here?
> > >
> > > The position of this flag reset will be dependent upon my subsequent
> > > remarks anyway, so hold that thought :) .
> > >
> >
> > The motivation for resetting the "remove" flag within
> failsafe_eth_dev_state_sync is as follows:
> > Previously to this patch the "remove" flag was designed to signal the need
> to remove the sub device.
> > Once the sub device was removed and before being reconfigured the
> "remove" flag was reset.
> >
> > After this patch the scope of the "remove" flag was *extended* to
> > indicate the sub device status as being "plugged out" by resetting this flag
> only after a successful call to failsafe_eth_dev_state_sync().
> > The "plug out" status could last a very long time (seconds, minutes, days,
> weeks, ...).
> >
> > Previously to this patch failsafe based the "plugged out" status on
> > the sub device state as being below ACTIVE however every 2 seconds
> > dev_configure() was called where the sub device was assigned sdev-
> > >state = DEV_ACTIVE; therefore the sub device state became ACTIVE for
> some time every 2 seconds.
> > This is where the race condition occurred: failsafe considered the sub
> > device as "Plugged in" for some time every 2 seconds (based on its ACTIVE
> state) while it was actually plugged out.
> >
> > After this patch the "Plugged out" status is based on the "remove" flag.
> >
> 
> Sorry, I do not agree with this semantical change on the "remove" flag.
> You are essentially adding a new device state, which could be fine per se, but
> should not be done here.
> 
> The enum dev_state is there for this purpose.
> 
> The flag dev->remove, calls for an operation to be done upon the concerned
> device. It is not meant to become a new device state.
> 
> A point about the work methodoly here: if you wanted to change this
> semantic, which could be legitimate and sometimes called for, you should
> have proposed it either during a discussion in a response to my previous
> email, or introducing the change as a separate patch. This point is important
> enough for it to have its own patch, meaning we would have a whole thread
> dedicated to it instead of having to interleave commentaries between
> related-but-separate diffs on the code.
> 
> But anyway, if you think you need to express a PLUGOUT state, I'd suggest
> adding a state between DEV_UNDEFINED and DEV_PARSED.
> DEV_UNDEFINED means that the device is in limbo and has no existence per
> se (its parsing failed for example, it is not clear whether the parameters are
> correct, etc...). DEV_PLUGOUT could mean then that the device has been
> successfully probed at least once, meaning that it could possibly have
> residuals from this probing still there, or specific care to be taken when
> manipulating it.
> 
> However, I'm not yet convinced that this new state is necessary. I think you
> can mitigate this race condition without having to add it. If you insist in
> introducing this state, please do so in a separate patch, with proper
> definition about the meaning of this state:
> 
>   + When it should be valid for a device to be in this state.
>   + Which operation corresponds to getting into and out of this state.
>   + Why this state is interesting and what could not be expressed before
>     that is thus being fixed by introducing this state.
> 
> But please verify twice whether you absolutely need to complexify the
> current fail-safe internals before going all in and basing your work upon it :)
> 

Indeed what I am currently missing in failsafe is knowing the device is in a PLUGOUT state.
Your suggestion to add a new state DEV_PLUGOUT cannot be used with the current implementation
as the device states are modified during an alarm hotplug handling every 2 seconds.
In fs_hotplug_alarm() we call failsafe_eth_dev_state_sync(dev) which eventually calls ->dev_configure(dev) 
where we assign: sdev->state = DEV_ACTIVE; 
then when sync fails fs_hotplug_alarm() calls failsafe_dev_remove(dev) which will call fs_dev_remove(sdev); where the sub devices
states are changed from ACTIVE down to DEV_UNDEFINED.
Having said that it means that during a PLUGOUT event - the states are modified with each invocation of the fs_hotplug_alarm
every 2 seconds. So even if we added DEV_PLUGOUT state - it will not remain fixed during the hotplug alarm handling.
I have also verified all of this with printouts.
When seeing a sub device in state "DEV_ACTIVE" - we cannot tell whether the device is currently in "PLUGOUT situation" or "PLUGIN situation"
This allows operations such as fs_mtu_set() on sub-devices which are in "PLUGOUT situation" while their state is 
DEV_ACTIVE to be manipulated, which I think should have been avoided.

Please note fs_mtu_set() implementation:
tatic int fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
{
  ..
        FOREACH_SUBDEV_ACTIVE(sdev, i, dev) {
	// ***** We are here while the device can be in a "PLUGOUT situation" ***

To summarize:
I am missing a way to know in failsafe that a sub-device is currently plugged out
1. I suggested extending the "remove" flag scope for this purpose. It has minimal changes with current failsafe implementation. You prefer not using "remove".
2. You suggested adding a new state DEV_PLUGOUT. I don't think it will work with current implementation (as explained above) or may require a redesign of current implementation.

Can you suggest another way?

> > > >  		}
> > > >  	}
> > > >  	/*
> > > > diff --git a/drivers/net/failsafe/failsafe_ops.c
> > > > b/drivers/net/failsafe/failsafe_ops.c
> > > > index ff9ad15..314d53d 100644
> > > > --- a/drivers/net/failsafe/failsafe_ops.c
> > > > +++ b/drivers/net/failsafe/failsafe_ops.c
> > > > @@ -232,7 +232,6 @@
> > > >  			dev->data->dev_conf.intr_conf.lsc = 0;
> > > >  		}
> > > >  		DEBUG("Configuring sub-device %d", i);
> > > > -		sdev->remove = 0;
> > > >  		ret = rte_eth_dev_configure(PORT_ID(sdev),
> > > >  					dev->data->nb_rx_queues,
> > > >  					dev->data->nb_tx_queues,
> > > > @@ -311,6 +310,8 @@
> > > >  	int ret;
> > > >
> > > >  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> > > > +		if (sdev->remove)
> > > > +			continue;
> > > >  		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d",
> > > i);
> > > >  		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
> > > >  		if (ret) {
> > > > @@ -330,6 +331,8 @@
> > > >  	int ret;
> > > >
> > > >  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> > > > +		if (sdev->remove)
> > > > +			continue;
> > >
> > > For this change and all the others:
> > >
> > > I think it might be best to have this check added to fs_find_next directly.
> > >
> > > Most of the call to the iterators are done within dev_ops, so it
> > > makes sense I think to have it there.
> > >
> > > But then there'd be an issue with the sub-EAL iterations done on
> > > previously- removed ports, as the removed flag is precisely resetted
> > > too late. The function failsafe_dev_remove would also need to have a
> > > manual iteration upon the sub-devices instead of using the macro.
> > >
> > > I think you can actually reset this flag within fs_dev_remove,
> > > instead of the next plug-in, then having this check within
> > > fs_find_next
> > > *should* not be a problem.
> > >
> >
> > With the new scope of "remove" flag (remaining set to 1 as long as the sub
> device is "plugged out"
> > which may last for a very long time) we cannot reset it in
> > fs_dev_remove which is called every 2 seconds.
> >
> 
> With the remove flag staying as it is, I think it should thus be resetted within
> fs_dev_remove. Actually I think it both helps you write you fix, and clarify the
> meaning and intended purpose of this flag.
> 
> > > I think you should break up those changes in two: first move the
> > > flag reset to fs_dev_remove instead of fs_dev_configure, then add
> > > this check to the iterator.
> > >
> 
> Please, do this fix this way. I think moving the dev->remove flag can have
> subtile consequences, and I'd like to have a specific commit to trace back
> which one is responsible.
> 
> > > This way, a git bisect should allow us to pinpoint more easily any
> > > new bug as both changes have the potential to introduce subtle ones.
> > >
> 
> Well, like I said :).
> 
> >
> > I suggest defining a new macro
> >
> > FOREACH_SUBDEV_ACTIVE(sdev, i, dev)  { ...
> >
> > that will replace all cases of:
> >
> > FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> > 		if (sdev->remove)
> > 			continue;
> >
> > In order to support the new macro I added a "check_remove" flag to
> > fs_find_next (which is based on your idea above: "I think it might be best to
> have this check added to fs_find_next directly").
> >
> 
> I'd prefer avoiding multiplying the macros.

I agree. Should be avoided.

> There are already two iterators. You add one, which now means that there
> are two ways of iterating upon active devices: using you new macro, and
> using the old one. The difference between the two would be difficult to
> know, without profound knowledge of the rest of the code: that in one
> place the flag is checked, and in the other it is not.
> 
> As such, I suggest you check in all cases that the flag is not set. This
> simplifies the use of these macros and the conditions in which their use
> is correct.
> 
> This means that you have to manually iterate in places where this flag
> should be ignored. I listed these places in my previous email, but I may
> have missed some, please be careful.
> 

I already did it in v1 then changed it in V2/3 based on reviews (probably my misunderstanding)

> Thanks,
> --
> Gaëtan Rivet
> 6WIND

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v3] net/failsafe: fix calling device during RMV events
  2017-10-23  7:17         ` Ophir Munk
@ 2017-10-23  8:36           ` Gaëtan Rivet
  2017-11-29 19:17             ` [dpdk-dev] [dpdk-stable] " Ferruh Yigit
  0 siblings, 1 reply; 36+ messages in thread
From: Gaëtan Rivet @ 2017-10-23  8:36 UTC (permalink / raw)
  To: Ophir Munk; +Cc: dev, Thomas Monjalon, Olga Shern, stable, Matan Azrad

On Mon, Oct 23, 2017 at 07:17:41AM +0000, Ophir Munk wrote:
> Hi Gaetan,
> Thanks for your quick reply. Please see comments inline.
> 
> Regards,
> Ophir
> 
> > -----Original Message-----
> > From: Gaëtan Rivet [mailto:gaetan.rivet@6wind.com]
> > Sent: Friday, October 20, 2017 1:35 PM
> > To: Ophir Munk <ophirmu@mellanox.com>
> > Cc: dev@dpdk.org; Thomas Monjalon <thomas@monjalon.net>; Olga Shern
> > <olgas@mellanox.com>; stable@dpdk.org
> > Subject: Re: [PATCH v3] net/failsafe: fix calling device during RMV events
> > 
> > Hi Ophir,
> > 
> > Sorry about the delay,
> > I have a few remarks, I think this patch could be simpler.
> > 
> > First, about the commit logline:
> > "calling device" is not descriptive enough. I'd suggest
> > 
> >     net/failsafe: fix device configuration during RMV events
> > 
> > But I'm not a native speaker either, so use it if you think it is better, or don't,
> > it's only a suggestion :).
> > 
> > On Thu, Oct 05, 2017 at 10:42:08PM +0000, Ophir Munk wrote:
> > > This commit prevents control path operations from failing after a sub
> > > device removal.
> > >
> > > Following are the failure steps:
> > > 1. The physical device is removed due to change in one of PF
> > > parameters (e.g. MTU) 2. The interrupt thread flags the device 3.
> > > Within 2 seconds Interrupt thread initializes the actual device
> > > removal, then every 2 seconds it tries to re-sync (plug in) the
> > > device. The trials fail as long as VF parameter mismatches the PF
> > parameter.
> > > 4. A control thread initiates a control operation on failsafe which
> > > initiates this operation on the device.
> > > 5. A race condition occurs between the control thread and interrupt
> > > thread when accessing the device data structures.
> > >
> > > This commit prevents the race condition in step 5. Before this commit
> > > if a device was removed and then a control thread operation was
> > > initiated on failsafe - in some cases failsafe called the sub device
> > > operation instead of avoiding it. Such cases could lead to operations
> > failures.
> > >
> > 
> > This is a nitpick, but as said earlier, this is not preventing the race condition.
> > This race is still present and can still wreak havok on unsuspecting users.
> > 
> > If an application has a weak threading model, it will be subject to this race
> > condition still. It is possible to prevent it fully with proper care from the
> > application standpoint, but this is not specific to fail-safe and does not
> > concern us here.
> > 
> > Anyway, it's really a nitpick, I just wanted to point it out. This is not too
> > important for this patch.
> > 
> > > This commit fixes failsafe criteria to determine when the device is
> > > removed such that it will avoid calling the sub device operations
> > > during that time and will only call them otherwise.
> > >
> > > Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
> > > Cc: stable@dpdk.org
> > >
> > > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> > > ---
> > > v3:
> > > 1. Rebase v2
> > >
> > > 2. Please ignore checkpatch checks on arguments re-usage - they are
> > confirmed.
> > > 	CHECK:MACRO_ARG_REUSE: Macro argument reuse ... possible side-
> > effects?
> > > 	#217: FILE: drivers/net/failsafe/failsafe_private.h:241:
> > >
> > > 3. Add rationales (copy from an email which accompanied v2):
> > >
> > > On Monday, September 11, 2017 11:31 AM, Gaetan Rivet wrote:
> > > >
> > > > Hi Ophir,
> > > >
> > > > On Sat, Sep 09, 2017 at 07:27:11PM +0000, Ophir Munk wrote:
> > > > > This commit prevents control path operations from failing after a
> > > > > sub device has informed failsafe it has been removed.
> > > > >
> > > > > Before this commit if a device was removed and then a control path
> > > >
> > > > Here are the steps if I understood correctly:
> > > >
> > > > 0. The physical device is removed
> > > > 1. The interrupt thread flags the device 2. A control lcore
> > > > initiates a control operation 3. The alarm triggers, waking up the eal-intr-
> > thread,
> > > >    initiating the actual device removal.
> > > > 4. Race condition occurs between control lcore and interrupt thread.
> > > >
> > > > "if a device was removed" is ambiguous I think (are we speaking
> > > > about the physical port? Is it only flagged? Is it after the removal of the
> > device itself?).
> > > > From the context I gather that you mean the device is flagged to be
> > > > removed, but it won't be as clear in a few month when we revisit this bug
> > :) .
> > > >
> > > > Could you please rephrase this so that the whole context of the
> > > > issue is available?
> > > >
> > >
> > > Done. Commit message was rephrased based on your comments
> > >
> > > > > operations was initiated on failsafe - in some cases failsafe
> > > > > called the sub device operation instead of avoiding it. Such cases
> > > > > could lead to operations failures.
> > > > >
> > > > > This commit fixes failsafe criteria to determine when the device
> > > > > is removed such that it will avoid calling the sub device
> > > > > operations during that time and will only call them otherwise.
> > > > >
> > > >
> > > > This commit mitigates the race condition, reducing the probability
> > > > for it to have an effect. It does not, however, remove this race
> > > > condition, which is inherent to the DPDK architecture at the moment.
> > > >
> > > > A proper fix, a more detailed workaround and additional
> > > > documentation warning users writing applications to mind their threads
> > could be interesting.
> > > >
> > >
> > > The race condition occurs in the last step and may lead to
> > > segmentation faults (accessing data structures of the same device by 2
> > > threads) The previous steps ("the physical device is removed", etc) were not
> > recreated and tested but probably cannot lead to segmentation fault.
> > >
> > > > But let's focus on this patch for the time being.
> > > >
> > > > > Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
> > > > > Cc: stable@dpdk.org
> > > > >
> > > > > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> > > > > ---
> > > > >  drivers/net/failsafe/failsafe_ether.c |  1 +
> > > > >  drivers/net/failsafe/failsafe_ops.c   | 52
> > > > +++++++++++++++++++++++++++++------
> > > > >  2 files changed, 45 insertions(+), 8 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/failsafe/failsafe_ether.c
> > > > > b/drivers/net/failsafe/failsafe_ether.c
> > > > > index a3a8cce..1def110 100644
> > > > > --- a/drivers/net/failsafe/failsafe_ether.c
> > > > > +++ b/drivers/net/failsafe/failsafe_ether.c
> > > > > @@ -378,6 +378,7 @@
> > > >
> > > > Could you please generate your patches with the function name in the
> > diff?
> > >
> > > Done
> > >
> > > >
> > > > >  				      i);
> > > > >  				goto err_remove;
> > > > >  			}
> > > > > +			sdev->remove = 0;
> > > >
> > > > You are adding this here, within failsafe_eth_dev_state_sync, and
> > > > removing it from the dev_configure ops.
> > > >
> > > > 10 lines above, the call to dev_configure is done, meaning that the
> > > > remove flag was resetted at this point.
> > > >
> > > > Can you explain why you prefer resetting the flag here?
> > > >
> > > > The position of this flag reset will be dependent upon my subsequent
> > > > remarks anyway, so hold that thought :) .
> > > >
> > >
> > > The motivation for resetting the "remove" flag within
> > failsafe_eth_dev_state_sync is as follows:
> > > Previously to this patch the "remove" flag was designed to signal the need
> > to remove the sub device.
> > > Once the sub device was removed and before being reconfigured the
> > "remove" flag was reset.
> > >
> > > After this patch the scope of the "remove" flag was *extended* to
> > > indicate the sub device status as being "plugged out" by resetting this flag
> > only after a successful call to failsafe_eth_dev_state_sync().
> > > The "plug out" status could last a very long time (seconds, minutes, days,
> > weeks, ...).
> > >
> > > Previously to this patch failsafe based the "plugged out" status on
> > > the sub device state as being below ACTIVE however every 2 seconds
> > > dev_configure() was called where the sub device was assigned sdev-
> > > >state = DEV_ACTIVE; therefore the sub device state became ACTIVE for
> > some time every 2 seconds.
> > > This is where the race condition occurred: failsafe considered the sub
> > > device as "Plugged in" for some time every 2 seconds (based on its ACTIVE
> > state) while it was actually plugged out.
> > >
> > > After this patch the "Plugged out" status is based on the "remove" flag.
> > >
> > 
> > Sorry, I do not agree with this semantical change on the "remove" flag.
> > You are essentially adding a new device state, which could be fine per se, but
> > should not be done here.
> > 
> > The enum dev_state is there for this purpose.
> > 
> > The flag dev->remove, calls for an operation to be done upon the concerned
> > device. It is not meant to become a new device state.
> > 
> > A point about the work methodoly here: if you wanted to change this
> > semantic, which could be legitimate and sometimes called for, you should
> > have proposed it either during a discussion in a response to my previous
> > email, or introducing the change as a separate patch. This point is important
> > enough for it to have its own patch, meaning we would have a whole thread
> > dedicated to it instead of having to interleave commentaries between
> > related-but-separate diffs on the code.
> > 
> > But anyway, if you think you need to express a PLUGOUT state, I'd suggest
> > adding a state between DEV_UNDEFINED and DEV_PARSED.
> > DEV_UNDEFINED means that the device is in limbo and has no existence per
> > se (its parsing failed for example, it is not clear whether the parameters are
> > correct, etc...). DEV_PLUGOUT could mean then that the device has been
> > successfully probed at least once, meaning that it could possibly have
> > residuals from this probing still there, or specific care to be taken when
> > manipulating it.
> > 
> > However, I'm not yet convinced that this new state is necessary. I think you
> > can mitigate this race condition without having to add it. If you insist in
> > introducing this state, please do so in a separate patch, with proper
> > definition about the meaning of this state:
> > 
> >   + When it should be valid for a device to be in this state.
> >   + Which operation corresponds to getting into and out of this state.
> >   + Why this state is interesting and what could not be expressed before
> >     that is thus being fixed by introducing this state.
> > 
> > But please verify twice whether you absolutely need to complexify the
> > current fail-safe internals before going all in and basing your work upon it :)
> > 
> 
> Indeed what I am currently missing in failsafe is knowing the device is in a PLUGOUT state.
> Your suggestion to add a new state DEV_PLUGOUT cannot be used with the current implementation
> as the device states are modified during an alarm hotplug handling every 2 seconds.
> In fs_hotplug_alarm() we call failsafe_eth_dev_state_sync(dev) which eventually calls ->dev_configure(dev) 
> where we assign: sdev->state = DEV_ACTIVE; 
> then when sync fails fs_hotplug_alarm() calls failsafe_dev_remove(dev) which will call fs_dev_remove(sdev); where the sub devices
> states are changed from ACTIVE down to DEV_UNDEFINED.
> Having said that it means that during a PLUGOUT event - the states are modified with each invocation of the fs_hotplug_alarm
> every 2 seconds. So even if we added DEV_PLUGOUT state - it will not remain fixed during the hotplug alarm handling.
> I have also verified all of this with printouts.
> When seeing a sub device in state "DEV_ACTIVE" - we cannot tell whether the device is currently in "PLUGOUT situation" or "PLUGIN situation"
> This allows operations such as fs_mtu_set() on sub-devices which are in "PLUGOUT situation" while their state is 
> DEV_ACTIVE to be manipulated, which I think should have been avoided.
> 
> Please note fs_mtu_set() implementation:
> tatic int fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
> {
>   ..
>         FOREACH_SUBDEV_ACTIVE(sdev, i, dev) {
> 	// ***** We are here while the device can be in a "PLUGOUT situation" ***
> 
> To summarize:
> I am missing a way to know in failsafe that a sub-device is currently plugged out

(sdev->state < DEV_ACTIVE && !sdev->remove) means that the device is
plugged out.

> 1. I suggested extending the "remove" flag scope for this purpose. It has minimal changes with current failsafe implementation. You prefer not using "remove".

I prefer using it, but as a flag, not as a device state.

> 2. You suggested adding a new state DEV_PLUGOUT. I don't think it will work with current implementation (as explained above) or may require a redesign of current implementation.
> 

I do not suggest adding a new state DEV_PLUGOUT.
I suggest using sdev->remove properly.

> Can you suggest another way?
> 

0. In a separate commit, move the
      sdev->remove = 0;
   from fs_dev_configure, into the case DEV_UNDEFINED of the switch
   within fs_dev_remove. This is cleaner and more logical anyway.

1. Check that sdev->remove is not set in fs_find_next.
   If sdev->remove is set, then the device should be skipped.

2. In failsafe_dev_remove, do not use the FOREACH_SUBDEV_STATE iterator,
   but manually iterate over all sub-devices using the subs_tail and
   subs_head values.
   As the generic iterator would skip over devices which have
   sdev->remove set, this function would not work anymore.

3. Find the places I have missed that needs this manual iterator to be
   used instead of the FOREACH_SUBDEV{,_STATE} ones. I think there is at
   least other place that I cannot recall, there might be more.

If you think this does not work, please tell me why, because then it
means that I have misunderstood something about the race condition you
are trying to fix.

-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [dpdk-stable] [PATCH v3] net/failsafe: fix calling device during RMV events
  2017-10-23  8:36           ` Gaëtan Rivet
@ 2017-11-29 19:17             ` Ferruh Yigit
  2018-01-18 22:22               ` Thomas Monjalon
  0 siblings, 1 reply; 36+ messages in thread
From: Ferruh Yigit @ 2017-11-29 19:17 UTC (permalink / raw)
  To: Gaëtan Rivet, Ophir Munk
  Cc: dev, Thomas Monjalon, Olga Shern, stable, Matan Azrad

On 10/23/2017 1:36 AM, Gaëtan Rivet wrote:
> On Mon, Oct 23, 2017 at 07:17:41AM +0000, Ophir Munk wrote:
>> Hi Gaetan,
>> Thanks for your quick reply. Please see comments inline.
>>
>> Regards,
>> Ophir
>>
>>> -----Original Message-----
>>> From: Gaëtan Rivet [mailto:gaetan.rivet@6wind.com]
>>> Sent: Friday, October 20, 2017 1:35 PM
>>> To: Ophir Munk <ophirmu@mellanox.com>
>>> Cc: dev@dpdk.org; Thomas Monjalon <thomas@monjalon.net>; Olga Shern
>>> <olgas@mellanox.com>; stable@dpdk.org
>>> Subject: Re: [PATCH v3] net/failsafe: fix calling device during RMV events
>>>
>>> Hi Ophir,
>>>
>>> Sorry about the delay,
>>> I have a few remarks, I think this patch could be simpler.
>>>
>>> First, about the commit logline:
>>> "calling device" is not descriptive enough. I'd suggest
>>>
>>>     net/failsafe: fix device configuration during RMV events
>>>
>>> But I'm not a native speaker either, so use it if you think it is better, or don't,
>>> it's only a suggestion :).
>>>
>>> On Thu, Oct 05, 2017 at 10:42:08PM +0000, Ophir Munk wrote:
>>>> This commit prevents control path operations from failing after a sub
>>>> device removal.
>>>>
>>>> Following are the failure steps:
>>>> 1. The physical device is removed due to change in one of PF
>>>> parameters (e.g. MTU) 2. The interrupt thread flags the device 3.
>>>> Within 2 seconds Interrupt thread initializes the actual device
>>>> removal, then every 2 seconds it tries to re-sync (plug in) the
>>>> device. The trials fail as long as VF parameter mismatches the PF
>>> parameter.
>>>> 4. A control thread initiates a control operation on failsafe which
>>>> initiates this operation on the device.
>>>> 5. A race condition occurs between the control thread and interrupt
>>>> thread when accessing the device data structures.
>>>>
>>>> This commit prevents the race condition in step 5. Before this commit
>>>> if a device was removed and then a control thread operation was
>>>> initiated on failsafe - in some cases failsafe called the sub device
>>>> operation instead of avoiding it. Such cases could lead to operations
>>> failures.
>>>>
>>>
>>> This is a nitpick, but as said earlier, this is not preventing the race condition.
>>> This race is still present and can still wreak havok on unsuspecting users.
>>>
>>> If an application has a weak threading model, it will be subject to this race
>>> condition still. It is possible to prevent it fully with proper care from the
>>> application standpoint, but this is not specific to fail-safe and does not
>>> concern us here.
>>>
>>> Anyway, it's really a nitpick, I just wanted to point it out. This is not too
>>> important for this patch.
>>>
>>>> This commit fixes failsafe criteria to determine when the device is
>>>> removed such that it will avoid calling the sub device operations
>>>> during that time and will only call them otherwise.
>>>>
>>>> Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
>>>> Cc: stable@dpdk.org
>>>>
>>>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
>>>> ---
>>>> v3:
>>>> 1. Rebase v2
>>>>
>>>> 2. Please ignore checkpatch checks on arguments re-usage - they are
>>> confirmed.
>>>> 	CHECK:MACRO_ARG_REUSE: Macro argument reuse ... possible side-
>>> effects?
>>>> 	#217: FILE: drivers/net/failsafe/failsafe_private.h:241:
>>>>
>>>> 3. Add rationales (copy from an email which accompanied v2):
>>>>
>>>> On Monday, September 11, 2017 11:31 AM, Gaetan Rivet wrote:
>>>>>
>>>>> Hi Ophir,
>>>>>
>>>>> On Sat, Sep 09, 2017 at 07:27:11PM +0000, Ophir Munk wrote:
>>>>>> This commit prevents control path operations from failing after a
>>>>>> sub device has informed failsafe it has been removed.
>>>>>>
>>>>>> Before this commit if a device was removed and then a control path
>>>>>
>>>>> Here are the steps if I understood correctly:
>>>>>
>>>>> 0. The physical device is removed
>>>>> 1. The interrupt thread flags the device 2. A control lcore
>>>>> initiates a control operation 3. The alarm triggers, waking up the eal-intr-
>>> thread,
>>>>>    initiating the actual device removal.
>>>>> 4. Race condition occurs between control lcore and interrupt thread.
>>>>>
>>>>> "if a device was removed" is ambiguous I think (are we speaking
>>>>> about the physical port? Is it only flagged? Is it after the removal of the
>>> device itself?).
>>>>> From the context I gather that you mean the device is flagged to be
>>>>> removed, but it won't be as clear in a few month when we revisit this bug
>>> :) .
>>>>>
>>>>> Could you please rephrase this so that the whole context of the
>>>>> issue is available?
>>>>>
>>>>
>>>> Done. Commit message was rephrased based on your comments
>>>>
>>>>>> operations was initiated on failsafe - in some cases failsafe
>>>>>> called the sub device operation instead of avoiding it. Such cases
>>>>>> could lead to operations failures.
>>>>>>
>>>>>> This commit fixes failsafe criteria to determine when the device
>>>>>> is removed such that it will avoid calling the sub device
>>>>>> operations during that time and will only call them otherwise.
>>>>>>
>>>>>
>>>>> This commit mitigates the race condition, reducing the probability
>>>>> for it to have an effect. It does not, however, remove this race
>>>>> condition, which is inherent to the DPDK architecture at the moment.
>>>>>
>>>>> A proper fix, a more detailed workaround and additional
>>>>> documentation warning users writing applications to mind their threads
>>> could be interesting.
>>>>>
>>>>
>>>> The race condition occurs in the last step and may lead to
>>>> segmentation faults (accessing data structures of the same device by 2
>>>> threads) The previous steps ("the physical device is removed", etc) were not
>>> recreated and tested but probably cannot lead to segmentation fault.
>>>>
>>>>> But let's focus on this patch for the time being.
>>>>>
>>>>>> Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
>>>>>> Cc: stable@dpdk.org
>>>>>>
>>>>>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
>>>>>> ---
>>>>>>  drivers/net/failsafe/failsafe_ether.c |  1 +
>>>>>>  drivers/net/failsafe/failsafe_ops.c   | 52
>>>>> +++++++++++++++++++++++++++++------
>>>>>>  2 files changed, 45 insertions(+), 8 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/net/failsafe/failsafe_ether.c
>>>>>> b/drivers/net/failsafe/failsafe_ether.c
>>>>>> index a3a8cce..1def110 100644
>>>>>> --- a/drivers/net/failsafe/failsafe_ether.c
>>>>>> +++ b/drivers/net/failsafe/failsafe_ether.c
>>>>>> @@ -378,6 +378,7 @@
>>>>>
>>>>> Could you please generate your patches with the function name in the
>>> diff?
>>>>
>>>> Done
>>>>
>>>>>
>>>>>>  				      i);
>>>>>>  				goto err_remove;
>>>>>>  			}
>>>>>> +			sdev->remove = 0;
>>>>>
>>>>> You are adding this here, within failsafe_eth_dev_state_sync, and
>>>>> removing it from the dev_configure ops.
>>>>>
>>>>> 10 lines above, the call to dev_configure is done, meaning that the
>>>>> remove flag was resetted at this point.
>>>>>
>>>>> Can you explain why you prefer resetting the flag here?
>>>>>
>>>>> The position of this flag reset will be dependent upon my subsequent
>>>>> remarks anyway, so hold that thought :) .
>>>>>
>>>>
>>>> The motivation for resetting the "remove" flag within
>>> failsafe_eth_dev_state_sync is as follows:
>>>> Previously to this patch the "remove" flag was designed to signal the need
>>> to remove the sub device.
>>>> Once the sub device was removed and before being reconfigured the
>>> "remove" flag was reset.
>>>>
>>>> After this patch the scope of the "remove" flag was *extended* to
>>>> indicate the sub device status as being "plugged out" by resetting this flag
>>> only after a successful call to failsafe_eth_dev_state_sync().
>>>> The "plug out" status could last a very long time (seconds, minutes, days,
>>> weeks, ...).
>>>>
>>>> Previously to this patch failsafe based the "plugged out" status on
>>>> the sub device state as being below ACTIVE however every 2 seconds
>>>> dev_configure() was called where the sub device was assigned sdev-
>>>>> state = DEV_ACTIVE; therefore the sub device state became ACTIVE for
>>> some time every 2 seconds.
>>>> This is where the race condition occurred: failsafe considered the sub
>>>> device as "Plugged in" for some time every 2 seconds (based on its ACTIVE
>>> state) while it was actually plugged out.
>>>>
>>>> After this patch the "Plugged out" status is based on the "remove" flag.
>>>>
>>>
>>> Sorry, I do not agree with this semantical change on the "remove" flag.
>>> You are essentially adding a new device state, which could be fine per se, but
>>> should not be done here.
>>>
>>> The enum dev_state is there for this purpose.
>>>
>>> The flag dev->remove, calls for an operation to be done upon the concerned
>>> device. It is not meant to become a new device state.
>>>
>>> A point about the work methodoly here: if you wanted to change this
>>> semantic, which could be legitimate and sometimes called for, you should
>>> have proposed it either during a discussion in a response to my previous
>>> email, or introducing the change as a separate patch. This point is important
>>> enough for it to have its own patch, meaning we would have a whole thread
>>> dedicated to it instead of having to interleave commentaries between
>>> related-but-separate diffs on the code.
>>>
>>> But anyway, if you think you need to express a PLUGOUT state, I'd suggest
>>> adding a state between DEV_UNDEFINED and DEV_PARSED.
>>> DEV_UNDEFINED means that the device is in limbo and has no existence per
>>> se (its parsing failed for example, it is not clear whether the parameters are
>>> correct, etc...). DEV_PLUGOUT could mean then that the device has been
>>> successfully probed at least once, meaning that it could possibly have
>>> residuals from this probing still there, or specific care to be taken when
>>> manipulating it.
>>>
>>> However, I'm not yet convinced that this new state is necessary. I think you
>>> can mitigate this race condition without having to add it. If you insist in
>>> introducing this state, please do so in a separate patch, with proper
>>> definition about the meaning of this state:
>>>
>>>   + When it should be valid for a device to be in this state.
>>>   + Which operation corresponds to getting into and out of this state.
>>>   + Why this state is interesting and what could not be expressed before
>>>     that is thus being fixed by introducing this state.
>>>
>>> But please verify twice whether you absolutely need to complexify the
>>> current fail-safe internals before going all in and basing your work upon it :)
>>>
>>
>> Indeed what I am currently missing in failsafe is knowing the device is in a PLUGOUT state.
>> Your suggestion to add a new state DEV_PLUGOUT cannot be used with the current implementation
>> as the device states are modified during an alarm hotplug handling every 2 seconds.
>> In fs_hotplug_alarm() we call failsafe_eth_dev_state_sync(dev) which eventually calls ->dev_configure(dev) 
>> where we assign: sdev->state = DEV_ACTIVE; 
>> then when sync fails fs_hotplug_alarm() calls failsafe_dev_remove(dev) which will call fs_dev_remove(sdev); where the sub devices
>> states are changed from ACTIVE down to DEV_UNDEFINED.
>> Having said that it means that during a PLUGOUT event - the states are modified with each invocation of the fs_hotplug_alarm
>> every 2 seconds. So even if we added DEV_PLUGOUT state - it will not remain fixed during the hotplug alarm handling.
>> I have also verified all of this with printouts.
>> When seeing a sub device in state "DEV_ACTIVE" - we cannot tell whether the device is currently in "PLUGOUT situation" or "PLUGIN situation"
>> This allows operations such as fs_mtu_set() on sub-devices which are in "PLUGOUT situation" while their state is 
>> DEV_ACTIVE to be manipulated, which I think should have been avoided.
>>
>> Please note fs_mtu_set() implementation:
>> tatic int fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
>> {
>>   ..
>>         FOREACH_SUBDEV_ACTIVE(sdev, i, dev) {
>> 	// ***** We are here while the device can be in a "PLUGOUT situation" ***
>>
>> To summarize:
>> I am missing a way to know in failsafe that a sub-device is currently plugged out
> 
> (sdev->state < DEV_ACTIVE && !sdev->remove) means that the device is
> plugged out.
> 
>> 1. I suggested extending the "remove" flag scope for this purpose. It has minimal changes with current failsafe implementation. You prefer not using "remove".
> 
> I prefer using it, but as a flag, not as a device state.
> 
>> 2. You suggested adding a new state DEV_PLUGOUT. I don't think it will work with current implementation (as explained above) or may require a redesign of current implementation.
>>
> 
> I do not suggest adding a new state DEV_PLUGOUT.
> I suggest using sdev->remove properly.
> 
>> Can you suggest another way?
>>
> 
> 0. In a separate commit, move the
>       sdev->remove = 0;
>    from fs_dev_configure, into the case DEV_UNDEFINED of the switch
>    within fs_dev_remove. This is cleaner and more logical anyway.
> 
> 1. Check that sdev->remove is not set in fs_find_next.
>    If sdev->remove is set, then the device should be skipped.
> 
> 2. In failsafe_dev_remove, do not use the FOREACH_SUBDEV_STATE iterator,
>    but manually iterate over all sub-devices using the subs_tail and
>    subs_head values.
>    As the generic iterator would skip over devices which have
>    sdev->remove set, this function would not work anymore.
> 
> 3. Find the places I have missed that needs this manual iterator to be
>    used instead of the FOREACH_SUBDEV{,_STATE} ones. I think there is at
>    least other place that I cannot recall, there might be more.
> 
> If you think this does not work, please tell me why, because then it
> means that I have misunderstood something about the race condition you
> are trying to fix.

Reminder of this patch remaining from previous release.

> 

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [dpdk-stable] [PATCH v3] net/failsafe: fix calling device during RMV events
  2017-11-29 19:17             ` [dpdk-dev] [dpdk-stable] " Ferruh Yigit
@ 2018-01-18 22:22               ` Thomas Monjalon
  2018-01-18 23:35                 ` Gaëtan Rivet
  0 siblings, 1 reply; 36+ messages in thread
From: Thomas Monjalon @ 2018-01-18 22:22 UTC (permalink / raw)
  To: Gaëtan Rivet; +Cc: Ferruh Yigit, Ophir Munk, dev, Olga Shern, Matan Azrad

29/11/2017 20:17, Ferruh Yigit:
> >>> On Thu, Oct 05, 2017 at 10:42:08PM +0000, Ophir Munk wrote:
> >>>> This commit prevents control path operations from failing after a sub
> >>>> device removal.
> >>>>
> >>>> Following are the failure steps:
> >>>> 1. The physical device is removed due to change in one of PF
> >>>> parameters (e.g. MTU) 2. The interrupt thread flags the device 3.
> >>>> Within 2 seconds Interrupt thread initializes the actual device
> >>>> removal, then every 2 seconds it tries to re-sync (plug in) the
> >>>> device. The trials fail as long as VF parameter mismatches the PF
> >>> parameter.
> >>>> 4. A control thread initiates a control operation on failsafe which
> >>>> initiates this operation on the device.
> >>>> 5. A race condition occurs between the control thread and interrupt
> >>>> thread when accessing the device data structures.
> >>>>
> >>>> This commit prevents the race condition in step 5. Before this commit
> >>>> if a device was removed and then a control thread operation was
> >>>> initiated on failsafe - in some cases failsafe called the sub device
> >>>> operation instead of avoiding it. Such cases could lead to operations
> >>> failures.
[...]
> 
> Reminder of this patch remaining from previous release.

Gaetan, what is the decision for this possible race condition?
Can we try to fix it in 18.02?

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [dpdk-stable] [PATCH v3] net/failsafe: fix calling device during RMV events
  2018-01-18 22:22               ` Thomas Monjalon
@ 2018-01-18 23:35                 ` Gaëtan Rivet
  0 siblings, 0 replies; 36+ messages in thread
From: Gaëtan Rivet @ 2018-01-18 23:35 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: Ferruh Yigit, Ophir Munk, dev, Olga Shern, Matan Azrad

On Thu, Jan 18, 2018 at 11:22:51PM +0100, Thomas Monjalon wrote:
> 29/11/2017 20:17, Ferruh Yigit:
> > >>> On Thu, Oct 05, 2017 at 10:42:08PM +0000, Ophir Munk wrote:
> > >>>> This commit prevents control path operations from failing after a sub
> > >>>> device removal.
> > >>>>
> > >>>> Following are the failure steps:
> > >>>> 1. The physical device is removed due to change in one of PF
> > >>>> parameters (e.g. MTU) 2. The interrupt thread flags the device 3.
> > >>>> Within 2 seconds Interrupt thread initializes the actual device
> > >>>> removal, then every 2 seconds it tries to re-sync (plug in) the
> > >>>> device. The trials fail as long as VF parameter mismatches the PF
> > >>> parameter.
> > >>>> 4. A control thread initiates a control operation on failsafe which
> > >>>> initiates this operation on the device.
> > >>>> 5. A race condition occurs between the control thread and interrupt
> > >>>> thread when accessing the device data structures.
> > >>>>
> > >>>> This commit prevents the race condition in step 5. Before this commit
> > >>>> if a device was removed and then a control thread operation was
> > >>>> initiated on failsafe - in some cases failsafe called the sub device
> > >>>> operation instead of avoiding it. Such cases could lead to operations
> > >>> failures.
> [...]
> > 
> > Reminder of this patch remaining from previous release.
> 
> Gaetan, what is the decision for this possible race condition?

This patchset had several issues that I outlined.

> Can we try to fix it in 18.02?

These patches could go in with a rework. If you feel like it I can
review those fixes in the coming weeks if new versions are submitted.

-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v4 0/2] failsafe: fix calling device during RMV events
  2017-10-05 22:42     ` [dpdk-dev] [PATCH v3] " Ophir Munk
  2017-10-20 10:35       ` Gaëtan Rivet
@ 2018-02-08 12:20       ` Matan Azrad
  2018-02-08 12:20         ` [dpdk-dev] [PATCH v4 1/2] net/failsafe: fix hotplug alarm cancel Matan Azrad
                           ` (2 more replies)
  1 sibling, 3 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-08 12:20 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev


This series trys to mitigate failsafe race between control commands to
the asynchronic plug-out\in processes.

A full fix is required and will be sent later.

v4(Matan):
Rebase on top of 18.02-rc3.
Extend the fix for other control commands.
Fix hotplug alarm cancel.

V3(Ophir):
Rebase v2.
Add rationales (copy from an email which accompanied v2).


Matan Azrad (1):
  net/failsafe: fix hotplug alarm cancel

Ophir Munk (1):
  net/failsafe: fix calling device during RMV events

 drivers/net/failsafe/failsafe.c         | 18 ++++++------
 drivers/net/failsafe/failsafe_ether.c   |  2 ++
 drivers/net/failsafe/failsafe_flow.c    |  8 +++---
 drivers/net/failsafe/failsafe_ops.c     | 50 ++++++++++++++++++++-------------
 drivers/net/failsafe/failsafe_private.h | 26 ++++++++++++++---
 5 files changed, 66 insertions(+), 38 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v4 1/2] net/failsafe: fix hotplug alarm cancel
  2018-02-08 12:20       ` [dpdk-dev] [PATCH v4 0/2] failsafe: " Matan Azrad
@ 2018-02-08 12:20         ` Matan Azrad
  2018-02-08 12:20         ` [dpdk-dev] [PATCH v4 2/2] net/failsafe: fix calling device during RMV events Matan Azrad
  2018-02-08 16:34         ` [dpdk-dev] [PATCH v5 0/3] failsafe: " Matan Azrad
  2 siblings, 0 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-08 12:20 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev, stable

The hot-plug alarm mechanism of fail-safe PMD is responsible for
handling removed devices during a plug-out event and to restore them
back to activity following a plug-in event.

Fail-safe sets a flag called "pending_alarm" to validate that only one
alarm callback is pending at any time. While this flag is required to
avoid simultaneous initiations of the alarm thread - it should not be
considered during alarm thread cancellation.

So, when failsafe_hotplug_alarm_cancel() was called while the alarm
callback was being executed the alarm mechanism was not stopped.

Skip checking the "pending_alarm" flag to allow alarm thread
cancellation all the times.

Fixes: ebea83f899d8 ("net/failsafe: add plug-in support")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/failsafe/failsafe.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 2665a39..7b2cdbb 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -85,16 +85,14 @@
 {
 	int ret = 0;
 
-	if (PRIV(dev)->pending_alarm) {
-		rte_errno = 0;
-		rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
-		if (rte_errno) {
-			ERROR("rte_eal_alarm_cancel failed (errno: %s)",
-			      strerror(rte_errno));
-			ret = -rte_errno;
-		} else {
-			PRIV(dev)->pending_alarm = 0;
-		}
+	rte_errno = 0;
+	rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
+	if (rte_errno) {
+		ERROR("rte_eal_alarm_cancel failed (errno: %s)",
+		      strerror(rte_errno));
+		ret = -rte_errno;
+	} else {
+		PRIV(dev)->pending_alarm = 0;
 	}
 	return ret;
 }
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v4 2/2] net/failsafe: fix calling device during RMV events
  2018-02-08 12:20       ` [dpdk-dev] [PATCH v4 0/2] failsafe: " Matan Azrad
  2018-02-08 12:20         ` [dpdk-dev] [PATCH v4 1/2] net/failsafe: fix hotplug alarm cancel Matan Azrad
@ 2018-02-08 12:20         ` Matan Azrad
  2018-02-08 16:34         ` [dpdk-dev] [PATCH v5 0/3] failsafe: " Matan Azrad
  2 siblings, 0 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-08 12:20 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev, Ophir Munk, stable

From: Ophir Munk <ophirmu@mellanox.com>

This commit prevents control path operations from failing after a sub
device removal.

Following are the failure steps:
1. The physical device is removed due to change in one of PF parameters
(e.g. MTU)
2. The interrupt thread flags the device
3. Within 2 seconds Interrupt thread initializes the actual device removal,
then every 2 seconds it tries to re-sync (plug in) the device. The trials
fail as long as VF parameter mismatches the PF parameter.
4. A control thread initiates a control operation on failsafe which
initiates this operation on the device.
5. A race condition occurs between the control thread and interrupt thread
when accessing the device data structures.

This commit mitigates the race condition in step 5.

This commit fixes failsafe criteria to determine when the device is removed
such that it will avoid calling the sub device operations during that time
and will only call them otherwise.

Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/failsafe/failsafe_ether.c   |  2 ++
 drivers/net/failsafe/failsafe_flow.c    |  8 +++---
 drivers/net/failsafe/failsafe_ops.c     | 50 ++++++++++++++++++++-------------
 drivers/net/failsafe/failsafe_private.h | 26 ++++++++++++++---
 4 files changed, 58 insertions(+), 28 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 4c6e938..ca42376 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -377,6 +377,8 @@
 				      i);
 				goto err_remove;
 			}
+			if (PRIV(dev)->state < DEV_STARTED)
+				sdev->remove = 0;
 		}
 	}
 	/*
diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
index 4d18e8e..d4a69cf 100644
--- a/drivers/net/failsafe/failsafe_flow.c
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -55,7 +55,7 @@
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_flow_validate on sub_device %d", i);
 		ret = rte_flow_validate(PORT_ID(sdev),
 				attr, patterns, actions, error);
@@ -80,7 +80,7 @@
 	uint8_t i;
 
 	flow = fs_flow_allocate(attr, patterns, actions);
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		flow->flows[i] = rte_flow_create(PORT_ID(sdev),
 				attr, patterns, actions, error);
 		if (flow->flows[i] == NULL && fs_err(sdev, -rte_errno)) {
@@ -115,7 +115,7 @@
 		return -EINVAL;
 	}
 	ret = 0;
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		int local_ret;
 
 		if (flow->flows[i] == NULL)
@@ -144,7 +144,7 @@
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_flow_flush on sub_device %d", i);
 		ret = rte_flow_flush(PORT_ID(sdev), error);
 		if ((ret = fs_err(sdev, ret))) {
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 7a67e16..3312cb2 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -131,7 +131,6 @@
 			dev->data->dev_conf.intr_conf.lsc = 0;
 		}
 		DEBUG("Configuring sub-device %d", i);
-		sdev->remove = 0;
 		ret = rte_eth_dev_configure(PORT_ID(sdev),
 					dev->data->nb_rx_queues,
 					dev->data->nb_tx_queues,
@@ -182,6 +181,9 @@
 	FOREACH_SUBDEV(sdev, i, dev) {
 		if (sdev->state != DEV_ACTIVE)
 			continue;
+		if (sdev->remove == 1 && PRIV(dev)->state < DEV_STARTED)
+			/* Application shouldn't start removed sub-devices. */
+			continue;
 		DEBUG("Starting sub_device %d", i);
 		ret = rte_eth_dev_start(PORT_ID(sdev));
 		if (ret) {
@@ -197,6 +199,7 @@
 			return ret;
 		}
 		sdev->state = DEV_STARTED;
+		sdev->remove = 0;
 	}
 	if (PRIV(dev)->state < DEV_STARTED)
 		PRIV(dev)->state = DEV_STARTED;
@@ -211,7 +214,7 @@
 	uint8_t i;
 
 	PRIV(dev)->state = DEV_STARTED - 1;
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_STARTED) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_STARTED) {
 		rte_eth_dev_stop(PORT_ID(sdev));
 		failsafe_rx_intr_uninstall_subdevice(sdev);
 		sdev->state = DEV_STARTED - 1;
@@ -226,7 +229,7 @@
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
 		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
 		if ((ret = fs_err(sdev, ret))) {
@@ -245,7 +248,7 @@
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
 		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
 		if ((ret = fs_err(sdev, ret))) {
@@ -265,8 +268,15 @@
 	uint8_t i;
 
 	failsafe_hotplug_alarm_cancel(dev);
-	if (PRIV(dev)->state == DEV_STARTED)
+	if (PRIV(dev)->state == DEV_STARTED) {
+		/*
+		 * Clean remove flags to allow stop for all sub-devices because
+		 * there is not hot-plug alarm to stop the removed sub-devices.
+		 */
+		FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_STARTED)
+			sdev->remove = 0;
 		dev->dev_ops->dev_stop(dev);
+	}
 	PRIV(dev)->state = DEV_ACTIVE - 1;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Closing sub_device %d", i);
@@ -417,7 +427,7 @@
 		return -rte_errno;
 	}
 	rxq->enable_events = 1;
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		ret = rte_eth_dev_rx_intr_enable(PORT_ID(sdev), idx);
 		ret = fs_err(sdev, ret);
 		if (ret)
@@ -448,7 +458,7 @@
 		return -rte_errno;
 	}
 	rxq->enable_events = 0;
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		ret = rte_eth_dev_rx_intr_disable(PORT_ID(sdev), idx);
 		ret = fs_err(sdev, ret);
 		if (ret)
@@ -587,7 +597,7 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_promiscuous_enable(PORT_ID(sdev));
 }
 
@@ -597,7 +607,7 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_promiscuous_disable(PORT_ID(sdev));
 }
 
@@ -607,7 +617,7 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_allmulticast_enable(PORT_ID(sdev));
 }
 
@@ -617,7 +627,7 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_allmulticast_disable(PORT_ID(sdev));
 }
 
@@ -629,7 +639,7 @@
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling link_update on sub_device %d", i);
 		ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
 		if (ret && ret != -1 && sdev->remove == 0 &&
@@ -692,7 +702,7 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		rte_eth_stats_reset(PORT_ID(sdev));
 		memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
 	}
@@ -797,7 +807,7 @@
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
 		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
 		if ((ret = fs_err(sdev, ret))) {
@@ -816,7 +826,7 @@
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
 		ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
 		if ((ret = fs_err(sdev, ret))) {
@@ -850,7 +860,7 @@
 	uint8_t i;
 	int ret;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
 		ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
 		if ((ret = fs_err(sdev, ret))) {
@@ -871,7 +881,7 @@
 	/* No check: already done within the rte_eth_dev_mac_addr_remove
 	 * call for the fail-safe device.
 	 */
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
 				&dev->data->mac_addrs[index]);
 	PRIV(dev)->mac_addr_pool[index] = 0;
@@ -888,7 +898,7 @@
 	uint8_t i;
 
 	RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
@@ -910,7 +920,7 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
 }
 
@@ -929,7 +939,7 @@
 		*(const void **)arg = &fs_flow_ops;
 		return 0;
 	}
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_SAFE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
 		ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
 		if ((ret = fs_err(sdev, ret))) {
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index f3be152..0f3b543 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -250,10 +250,23 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
  * dev:   (struct rte_eth_dev *), fail-safe ethdev
  * state: (enum dev_state), minimum acceptable device state
  */
+
 #define FOREACH_SUBDEV_STATE(s, i, dev, state)		\
-	for (s = fs_find_next((dev), 0, state, &i);	\
+	for (s = fs_find_next((dev), 0, state, 0, &i);	\
 	     s != NULL;					\
-	     s = fs_find_next((dev), i + 1, state, &i))
+	     s = fs_find_next((dev), i + 1, state, 0, &i))
+
+/**
+ * Stateful iterator construct over fail-safe safe sub-devices:
+ * s:     (struct sub_device *), iterator
+ * i:     (uint8_t), increment
+ * dev:   (struct rte_eth_dev *), fail-safe ethdev
+ * state: (enum dev_state), minimum acceptable device state
+ */
+#define FOREACH_SUBDEV_STATE_SAFE(s, i, dev, state)		\
+	for (s = fs_find_next((dev), 0, state, 1, &i);		\
+	     s != NULL;						\
+	     s = fs_find_next((dev), i + 1, state, 1, &i))
 
 /**
  * Iterator construct over fail-safe sub-devices:
@@ -328,6 +341,7 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
 fs_find_next(struct rte_eth_dev *dev,
 	     uint8_t sid,
 	     enum dev_state min_state,
+	     uint8_t check_remove,
 	     uint8_t *sid_out)
 {
 	struct sub_device *subs;
@@ -336,8 +350,12 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
 	subs = PRIV(dev)->subs;
 	tail = PRIV(dev)->subs_tail;
 	while (sid < tail) {
-		if (subs[sid].state >= min_state)
-			break;
+		if (subs[sid].state >= min_state) {
+			if (check_remove == 0)
+				break;
+			if (PRIV(dev)->subs[sid].remove == 0)
+				break;
+		}
 		sid++;
 	}
 	*sid_out = sid;
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v5 0/3] failsafe: fix calling device during RMV events
  2018-02-08 12:20       ` [dpdk-dev] [PATCH v4 0/2] failsafe: " Matan Azrad
  2018-02-08 12:20         ` [dpdk-dev] [PATCH v4 1/2] net/failsafe: fix hotplug alarm cancel Matan Azrad
  2018-02-08 12:20         ` [dpdk-dev] [PATCH v4 2/2] net/failsafe: fix calling device during RMV events Matan Azrad
@ 2018-02-08 16:34         ` Matan Azrad
  2018-02-08 16:34           ` [dpdk-dev] [PATCH v5 1/3] net/failsafe: fix hotplug alarm cancel Matan Azrad
                             ` (3 more replies)
  2 siblings, 4 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-08 16:34 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev

This series trys to mitigate failsafe race between control commands to the asynchronic plug-out\in processes.

A full fix is required and will be sent later.

v5(Matan):
Change defines names to failsafe convention (UNSAFE).
split a fix patch.

v4(Matan):
Rebase on top of 18.02-rc3.
Extend the fix for other control commands.
Fix hotplug alarm cancel.

V3(Ophir):
Rebase v2.
Add rationales (copy from an email which accompanied v2).


Matan Azrad (3):
  net/failsafe: fix hotplug alarm cancel
  net/failsafe: fix removal scope
  net/failsafe: fix calling device during RMV events

 drivers/net/failsafe/failsafe.c         | 20 +++++++++----------
 drivers/net/failsafe/failsafe_eal.c     |  2 +-
 drivers/net/failsafe/failsafe_ether.c   |  4 +++-
 drivers/net/failsafe/failsafe_ops.c     | 28 ++++++++++++++++++---------
 drivers/net/failsafe/failsafe_private.h | 34 ++++++++++++++++++++++++++-------
 5 files changed, 59 insertions(+), 29 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v5 1/3] net/failsafe: fix hotplug alarm cancel
  2018-02-08 16:34         ` [dpdk-dev] [PATCH v5 0/3] failsafe: " Matan Azrad
@ 2018-02-08 16:34           ` Matan Azrad
  2018-02-08 16:34           ` [dpdk-dev] [PATCH v5 2/3] net/failsafe: fix removal scope Matan Azrad
                             ` (2 subsequent siblings)
  3 siblings, 0 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-08 16:34 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev, stable

The hot-plug alarm mechanism of fail-safe PMD is responsible for
handling removed devices during a plug-out event and to restore them
back to activity following a plug-in event.

Fail-safe sets a flag called "pending_alarm" to validate that only one
alarm callback is pending at any time. While this flag is required to
avoid simultaneous initiations of the alarm thread - it should not be
considered during alarm thread cancellation.

So, when failsafe_hotplug_alarm_cancel() was called while the alarm
callback was being executed the alarm mechanism was not stopped.

Skip checking the "pending_alarm" flag to allow alarm thread
cancellation all the times.

Fixes: ebea83f899d8 ("net/failsafe: add plug-in support")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/failsafe/failsafe.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 2665a39..7b2cdbb 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -85,16 +85,14 @@
 {
 	int ret = 0;
 
-	if (PRIV(dev)->pending_alarm) {
-		rte_errno = 0;
-		rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
-		if (rte_errno) {
-			ERROR("rte_eal_alarm_cancel failed (errno: %s)",
-			      strerror(rte_errno));
-			ret = -rte_errno;
-		} else {
-			PRIV(dev)->pending_alarm = 0;
-		}
+	rte_errno = 0;
+	rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
+	if (rte_errno) {
+		ERROR("rte_eal_alarm_cancel failed (errno: %s)",
+		      strerror(rte_errno));
+		ret = -rte_errno;
+	} else {
+		PRIV(dev)->pending_alarm = 0;
 	}
 	return ret;
 }
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v5 2/3] net/failsafe: fix removal scope
  2018-02-08 16:34         ` [dpdk-dev] [PATCH v5 0/3] failsafe: " Matan Azrad
  2018-02-08 16:34           ` [dpdk-dev] [PATCH v5 1/3] net/failsafe: fix hotplug alarm cancel Matan Azrad
@ 2018-02-08 16:34           ` Matan Azrad
  2018-02-08 17:19             ` Gaëtan Rivet
  2018-02-08 16:34           ` [dpdk-dev] [PATCH v5 3/3] net/failsafe: fix calling device during RMV events Matan Azrad
  2018-02-11 17:24           ` [dpdk-dev] [PATCH v6 0/3] failsafe: fix hotplug races Matan Azrad
  3 siblings, 1 reply; 36+ messages in thread
From: Matan Azrad @ 2018-02-08 16:34 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev, stable

Fail-safe PMD uses per sub-device flag called "remove" to indicate the
scope where the sub-device isn't synchronized with the fail-safe state.

This flag is set when fail-safe gets RMV notification about the
physical removal of the sub-device and should be unset when the
sub-device completes all the configurations cause it to arrive to the
fail-safe state.

The previous code wrongly unsets the flag after calling to the
sub-device PMD dev_configure() operation and before all the
configurations were done.

Change the remove flag unsetting to be only after the sub-device
successes to arrive to the fail-safe state.

Fixes: a46f8d5 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/failsafe/failsafe_ether.c | 2 ++
 drivers/net/failsafe/failsafe_ops.c   | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 4c6e938..ca42376 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -377,6 +377,8 @@
 				      i);
 				goto err_remove;
 			}
+			if (PRIV(dev)->state < DEV_STARTED)
+				sdev->remove = 0;
 		}
 	}
 	/*
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 7a67e16..a7c2dba 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -131,7 +131,6 @@
 			dev->data->dev_conf.intr_conf.lsc = 0;
 		}
 		DEBUG("Configuring sub-device %d", i);
-		sdev->remove = 0;
 		ret = rte_eth_dev_configure(PORT_ID(sdev),
 					dev->data->nb_rx_queues,
 					dev->data->nb_tx_queues,
@@ -197,6 +196,7 @@
 			return ret;
 		}
 		sdev->state = DEV_STARTED;
+		sdev->remove = 0;
 	}
 	if (PRIV(dev)->state < DEV_STARTED)
 		PRIV(dev)->state = DEV_STARTED;
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v5 3/3] net/failsafe: fix calling device during RMV events
  2018-02-08 16:34         ` [dpdk-dev] [PATCH v5 0/3] failsafe: " Matan Azrad
  2018-02-08 16:34           ` [dpdk-dev] [PATCH v5 1/3] net/failsafe: fix hotplug alarm cancel Matan Azrad
  2018-02-08 16:34           ` [dpdk-dev] [PATCH v5 2/3] net/failsafe: fix removal scope Matan Azrad
@ 2018-02-08 16:34           ` Matan Azrad
  2018-02-08 18:11             ` Gaëtan Rivet
  2018-02-11 17:24           ` [dpdk-dev] [PATCH v6 0/3] failsafe: fix hotplug races Matan Azrad
  3 siblings, 1 reply; 36+ messages in thread
From: Matan Azrad @ 2018-02-08 16:34 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev, stable

Following are the failure steps:
1. The physical device is removed due to change in one of PF parameters
(e.g. MTU) 2. The interrupt thread flags the device 3. Within 2 seconds
Interrupt thread initializes the actual device removal, then every 2
seconds it tries to re-sync (plug in) the device. The trials fail as
long as VF parameter mismatches the PF parameter.
4. A control thread initiates a control operation on failsafe which
initiates this operation on the device.
5. A race condition occurs between the control thread and interrupt
thread when accessing the device data structures.

This patch mitigates the race condition in step 5.

Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/failsafe/failsafe.c         |  2 +-
 drivers/net/failsafe/failsafe_eal.c     |  2 +-
 drivers/net/failsafe/failsafe_ether.c   |  2 +-
 drivers/net/failsafe/failsafe_ops.c     | 26 +++++++++++++++++--------
 drivers/net/failsafe/failsafe_private.h | 34 ++++++++++++++++++++++++++-------
 5 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 7b2cdbb..6cdefd0 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -187,7 +187,7 @@
 		 * If MAC address was provided as a parameter,
 		 * apply to all probed slaves.
 		 */
-		FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
+		FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_PROBED) {
 			ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
 							       mac);
 			if (ret) {
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index c3d6731..b3b9c32 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -126,7 +126,7 @@
 	int sdev_ret;
 	int ret = 0;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
+	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_PROBED) {
 		sdev_ret = rte_eal_hotplug_remove(sdev->bus->name,
 							sdev->dev->name);
 		if (sdev_ret) {
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index ca42376..f2a52c9 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -325,7 +325,7 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE)
 		if (sdev->remove && fs_rxtx_clean(sdev)) {
 			fs_dev_stats_save(sdev);
 			fs_dev_remove(sdev);
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index a7c2dba..3d2cb32 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -181,6 +181,9 @@
 	FOREACH_SUBDEV(sdev, i, dev) {
 		if (sdev->state != DEV_ACTIVE)
 			continue;
+		if (sdev->remove == 1 && PRIV(dev)->state < DEV_STARTED)
+			/* Application shouldn't start removed sub-devices. */
+			continue;
 		DEBUG("Starting sub_device %d", i);
 		ret = rte_eth_dev_start(PORT_ID(sdev));
 		if (ret) {
@@ -265,10 +268,17 @@
 	uint8_t i;
 
 	failsafe_hotplug_alarm_cancel(dev);
-	if (PRIV(dev)->state == DEV_STARTED)
+	if (PRIV(dev)->state == DEV_STARTED) {
+		/*
+		 * Clean remove flags to allow stop for all sub-devices because
+		 * there is not hot-plug alarm to stop the removed sub-devices.
+		 */
+		FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_STARTED)
+			sdev->remove = 0;
 		dev->dev_ops->dev_stop(dev);
+	}
 	PRIV(dev)->state = DEV_ACTIVE - 1;
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Closing sub_device %d", i);
 		rte_eth_dev_close(PORT_ID(sdev));
 		sdev->state = DEV_ACTIVE - 1;
@@ -309,7 +319,7 @@
 	if (rxq->event_fd > 0)
 		close(rxq->event_fd);
 	dev = rxq->priv->dev;
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE)
 		SUBOPS(sdev, rx_queue_release)
 			(ETH(sdev)->data->rx_queues[rxq->qid]);
 	dev->data->rx_queues[rxq->qid] = NULL;
@@ -376,7 +386,7 @@
 		return ret;
 	rxq->event_fd = intr_handle.efds[0];
 	dev->data->rx_queues[rx_queue_id] = rxq;
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE) {
 		ret = rte_eth_rx_queue_setup(PORT_ID(sdev),
 				rx_queue_id,
 				nb_rx_desc, socket_id,
@@ -493,7 +503,7 @@
 		return;
 	txq = queue;
 	dev = txq->priv->dev;
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE)
 		SUBOPS(sdev, tx_queue_release)
 			(ETH(sdev)->data->tx_queues[txq->qid]);
 	dev->data->tx_queues[txq->qid] = NULL;
@@ -548,7 +558,7 @@
 	txq->info.nb_desc = nb_tx_desc;
 	txq->priv = PRIV(dev);
 	dev->data->tx_queues[tx_queue_id] = txq;
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE) {
 		ret = rte_eth_tx_queue_setup(PORT_ID(sdev),
 				tx_queue_id,
 				nb_tx_desc, socket_id,
@@ -663,7 +673,7 @@
 	int ret;
 
 	rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE) {
 		struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
 		uint64_t *timestamp = &sdev->stats_snapshot.timestamp;
 
@@ -746,7 +756,7 @@
 
 		rx_offload_capa = default_infos.rx_offload_capa;
 		rxq_offload_capa = default_infos.rx_queue_offload_capa;
-		FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
+		FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_PROBED) {
 			rte_eth_dev_info_get(PORT_ID(sdev),
 					&PRIV(dev)->infos);
 			rx_offload_capa &= PRIV(dev)->infos.rx_offload_capa;
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index f3be152..7ddd63a 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -244,16 +244,31 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
 	((sdev)->sid)
 
 /**
- * Stateful iterator construct over fail-safe sub-devices:
+ * Stateful iterator construct over fail-safe sub-devices,
+ * including the removed sub-devices:
+ * s:     (struct sub_device *), iterator
+ * i:     (uint8_t), increment
+ * dev:   (struct rte_eth_dev *), fail-safe ethdev
+ * state: (enum dev_state), minimum acceptable device state
+ */
+
+#define FOREACH_SUBDEV_STATE_UNSAFE(s, i, dev, state)	\
+	for (s = fs_find_next((dev), 0, state, 0, &i);	\
+	     s != NULL;					\
+	     s = fs_find_next((dev), i + 1, state, 0, &i))
+
+/**
+ * Stateful iterator construct over fail-safe sub-devices,
+ * except the removed sub-devices:
  * s:     (struct sub_device *), iterator
  * i:     (uint8_t), increment
  * dev:   (struct rte_eth_dev *), fail-safe ethdev
  * state: (enum dev_state), minimum acceptable device state
  */
 #define FOREACH_SUBDEV_STATE(s, i, dev, state)		\
-	for (s = fs_find_next((dev), 0, state, &i);	\
+	for (s = fs_find_next((dev), 0, state, 1, &i);	\
 	     s != NULL;					\
-	     s = fs_find_next((dev), i + 1, state, &i))
+	     s = fs_find_next((dev), i + 1, state, 1, &i))
 
 /**
  * Iterator construct over fail-safe sub-devices:
@@ -262,7 +277,7 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
  * dev: (struct rte_eth_dev *), fail-safe ethdev
  */
 #define FOREACH_SUBDEV(s, i, dev)			\
-	FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED)
+	FOREACH_SUBDEV_STATE_UNSAFE(s, i, dev, DEV_UNDEFINED)
 
 /* dev: (struct rte_eth_dev *) fail-safe device */
 #define PREFERRED_SUBDEV(dev) \
@@ -328,6 +343,7 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
 fs_find_next(struct rte_eth_dev *dev,
 	     uint8_t sid,
 	     enum dev_state min_state,
+	     uint8_t check_remove,
 	     uint8_t *sid_out)
 {
 	struct sub_device *subs;
@@ -336,8 +352,12 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
 	subs = PRIV(dev)->subs;
 	tail = PRIV(dev)->subs_tail;
 	while (sid < tail) {
-		if (subs[sid].state >= min_state)
-			break;
+		if (subs[sid].state >= min_state) {
+			if (check_remove == 0)
+				break;
+			if (PRIV(dev)->subs[sid].remove == 0)
+				break;
+		}
 		sid++;
 	}
 	*sid_out = sid;
@@ -376,7 +396,7 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
 		uint8_t i;
 
 		/* Using acceptable device */
-		FOREACH_SUBDEV_STATE(sdev, i, dev, req_state) {
+		FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, req_state) {
 			if (sdev == banned)
 				continue;
 			DEBUG("Switching tx_dev to sub_device %d",
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v5 2/3] net/failsafe: fix removal scope
  2018-02-08 16:34           ` [dpdk-dev] [PATCH v5 2/3] net/failsafe: fix removal scope Matan Azrad
@ 2018-02-08 17:19             ` Gaëtan Rivet
  2018-02-08 19:03               ` Matan Azrad
  0 siblings, 1 reply; 36+ messages in thread
From: Gaëtan Rivet @ 2018-02-08 17:19 UTC (permalink / raw)
  To: Matan Azrad; +Cc: dev, stable

Hi Matan,

Thanks for dealing with this.

On Thu, Feb 08, 2018 at 04:34:12PM +0000, Matan Azrad wrote:
> Fail-safe PMD uses per sub-device flag called "remove" to indicate the
> scope where the sub-device isn't synchronized with the fail-safe state.
> 
> This flag is set when fail-safe gets RMV notification about the
> physical removal of the sub-device and should be unset when the
> sub-device completes all the configurations cause it to arrive to the
> fail-safe state.
> 
> The previous code wrongly unsets the flag after calling to the
> sub-device PMD dev_configure() operation and before all the
> configurations were done.
> 
> Change the remove flag unsetting to be only after the sub-device
> successes to arrive to the fail-safe state.
> 

I'm not sure this is the right way to do this.
I think it's clear that it was a mistake to set sdev->remove to 0
only during fs_dev_configure.

The flag itself only means "there is something to be done on this
device, please clean up".

Once the clean-up has happened, then the flag is not necessary anymore
and should be reset.

So I thought that this fix would actually put the flag reset within
fs_dev_remove, right before reinstalling the hotplug alarm.

At this point, the device state would have been set back to
DEV_UNDEFINED, so the remove flag is unnecessary for any operation
trying to avoid unplugged slaves.

The "remove" flag is initialized at 0 when sub-devices are allocated
(during fail-safe init). This means that there would be a difference in
the state of the slave between its first initialization and any
subsequent init, after one successful plugout.

> Fixes: a46f8d5 ("net/failsafe: add fail-safe PMD")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> ---
>  drivers/net/failsafe/failsafe_ether.c | 2 ++
>  drivers/net/failsafe/failsafe_ops.c   | 2 +-
>  2 files changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
> index 4c6e938..ca42376 100644
> --- a/drivers/net/failsafe/failsafe_ether.c
> +++ b/drivers/net/failsafe/failsafe_ether.c
> @@ -377,6 +377,8 @@
>  				      i);
>  				goto err_remove;
>  			}
> +			if (PRIV(dev)->state < DEV_STARTED)
> +				sdev->remove = 0;

Here the remove flag should already be 0. If it isn't, this is a
(logical) bug, which should be properly addressed instead of patched
in this way.

>  		}
>  	}
>  	/*
> diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
> index 7a67e16..a7c2dba 100644
> --- a/drivers/net/failsafe/failsafe_ops.c
> +++ b/drivers/net/failsafe/failsafe_ops.c
> @@ -131,7 +131,6 @@
>  			dev->data->dev_conf.intr_conf.lsc = 0;
>  		}
>  		DEBUG("Configuring sub-device %d", i);
> -		sdev->remove = 0;

This is correct.

>  		ret = rte_eth_dev_configure(PORT_ID(sdev),
>  					dev->data->nb_rx_queues,
>  					dev->data->nb_tx_queues,
> @@ -197,6 +196,7 @@
>  			return ret;
>  		}
>  		sdev->state = DEV_STARTED;
> +		sdev->remove = 0;

This seems unnecessary, if this operation was already performed once the
device has been properly removed.

>  	}
>  	if (PRIV(dev)->state < DEV_STARTED)
>  		PRIV(dev)->state = DEV_STARTED;
> -- 
> 1.8.3.1
> 

-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/3] net/failsafe: fix calling device during RMV events
  2018-02-08 16:34           ` [dpdk-dev] [PATCH v5 3/3] net/failsafe: fix calling device during RMV events Matan Azrad
@ 2018-02-08 18:11             ` Gaëtan Rivet
  2018-02-08 19:24               ` Matan Azrad
  0 siblings, 1 reply; 36+ messages in thread
From: Gaëtan Rivet @ 2018-02-08 18:11 UTC (permalink / raw)
  To: Matan Azrad; +Cc: dev, stable

On Thu, Feb 08, 2018 at 04:34:13PM +0000, Matan Azrad wrote:
> Following are the failure steps:
> 1. The physical device is removed due to change in one of PF parameters
> (e.g. MTU) 2. The interrupt thread flags the device 3. Within 2 seconds
> Interrupt thread initializes the actual device removal, then every 2
> seconds it tries to re-sync (plug in) the device. The trials fail as
> long as VF parameter mismatches the PF parameter.
> 4. A control thread initiates a control operation on failsafe which
> initiates this operation on the device.
> 5. A race condition occurs between the control thread and interrupt
> thread when accessing the device data structures.
> 
> This patch mitigates the race condition in step 5.
> 
> Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> ---
>  drivers/net/failsafe/failsafe.c         |  2 +-
>  drivers/net/failsafe/failsafe_eal.c     |  2 +-
>  drivers/net/failsafe/failsafe_ether.c   |  2 +-
>  drivers/net/failsafe/failsafe_ops.c     | 26 +++++++++++++++++--------
>  drivers/net/failsafe/failsafe_private.h | 34 ++++++++++++++++++++++++++-------
>  5 files changed, 48 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
> index 7b2cdbb..6cdefd0 100644
> --- a/drivers/net/failsafe/failsafe.c
> +++ b/drivers/net/failsafe/failsafe.c
> @@ -187,7 +187,7 @@
>  		 * If MAC address was provided as a parameter,
>  		 * apply to all probed slaves.
>  		 */
> -		FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
> +		FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_PROBED) {

No need for the UNSAFE here. The ports should have been just
initialized, and sdev->remove should be 0.

If sdev->remove is 1, then it means it has been set already by a plugout
event, meaning that rte_eth_dev_default_mac_addr_set should not even be
called on it.

>  			ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
>  							       mac);
>  			if (ret) {
> diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
> index c3d6731..b3b9c32 100644
> --- a/drivers/net/failsafe/failsafe_eal.c
> +++ b/drivers/net/failsafe/failsafe_eal.c
> @@ -126,7 +126,7 @@
>  	int sdev_ret;
>  	int ret = 0;
>  
> -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
> +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_PROBED) {
>  		sdev_ret = rte_eal_hotplug_remove(sdev->bus->name,
>  							sdev->dev->name);
>  		if (sdev_ret) {
> diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
> index ca42376..f2a52c9 100644
> --- a/drivers/net/failsafe/failsafe_ether.c
> +++ b/drivers/net/failsafe/failsafe_ether.c
> @@ -325,7 +325,7 @@
>  	struct sub_device *sdev;
>  	uint8_t i;
>  
> -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
> +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE)
>  		if (sdev->remove && fs_rxtx_clean(sdev)) {
>  			fs_dev_stats_save(sdev);
>  			fs_dev_remove(sdev);
> diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
> index a7c2dba..3d2cb32 100644
> --- a/drivers/net/failsafe/failsafe_ops.c
> +++ b/drivers/net/failsafe/failsafe_ops.c
> @@ -181,6 +181,9 @@
>  	FOREACH_SUBDEV(sdev, i, dev) {
>  		if (sdev->state != DEV_ACTIVE)
>  			continue;
> +		if (sdev->remove == 1 && PRIV(dev)->state < DEV_STARTED)
> +			/* Application shouldn't start removed sub-devices. */
> +			continue;

FOREACH_SUBDEV should already have avoided sub-devices which remove flag
is 1.

If not, then the fs_err(sdev, ret) stanza right after will let the loop
continue, and the port should be handled by the next slave cleanup.

>  		DEBUG("Starting sub_device %d", i);
>  		ret = rte_eth_dev_start(PORT_ID(sdev));
>  		if (ret) {
> @@ -265,10 +268,17 @@
>  	uint8_t i;
>  
>  	failsafe_hotplug_alarm_cancel(dev);
> -	if (PRIV(dev)->state == DEV_STARTED)
> +	if (PRIV(dev)->state == DEV_STARTED) {
> +		/*
> +		 * Clean remove flags to allow stop for all sub-devices because
> +		 * there is not hot-plug alarm to stop the removed sub-devices.
> +		 */
> +		FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_STARTED)
> +			sdev->remove = 0;

Why make this conditional to state == DEV_STARTED?

>  		dev->dev_ops->dev_stop(dev);
> +	}
>  	PRIV(dev)->state = DEV_ACTIVE - 1;
> -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE) {
>  		DEBUG("Closing sub_device %d", i);
>  		rte_eth_dev_close(PORT_ID(sdev));
>  		sdev->state = DEV_ACTIVE - 1;

-->

	/*
	 * Clean remove flags to allow stop for all sub-devices because
	 * there is no hot-plug alarm to clean the removed sub-devices.
	 */
	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE)
		sdev->remove = 0;
	if (PRIV(dev)->state == DEV_STARTED)
		dev->dev_ops->dev_stop(dev);
	PRIV(dev)->state = DEV_ACTIVE - 1;
	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
		DEBUG("Closing sub_device %d", i);
		rte_eth_dev_close(PORT_ID(sdev));
		sdev->state = DEV_ACTIVE - 1;

> @@ -309,7 +319,7 @@
>  	if (rxq->event_fd > 0)
>  		close(rxq->event_fd);
>  	dev = rxq->priv->dev;
> -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
> +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE)

No need here, as you would have reset sdev->remove if the port was
closing, or it would be dealt with by fs_dev_remove if the alarm is
still running.

>  		SUBOPS(sdev, rx_queue_release)
>  			(ETH(sdev)->data->rx_queues[rxq->qid]);
>  	dev->data->rx_queues[rxq->qid] = NULL;
> @@ -376,7 +386,7 @@

you really should update your git, it is difficult to verify these
changes without the function contexts.

>  		return ret;
>  	rxq->event_fd = intr_handle.efds[0];
>  	dev->data->rx_queues[rx_queue_id] = rxq;
> -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE) {

Why should we setup queues on ports marked for removal?

>  		ret = rte_eth_rx_queue_setup(PORT_ID(sdev),
>  				rx_queue_id,
>  				nb_rx_desc, socket_id,
> @@ -493,7 +503,7 @@
>  		return;
>  	txq = queue;
>  	dev = txq->priv->dev;
> -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
> +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE)

Same as for rx_queue_release: either the device is closing, and the flag
has been reset, or the alarm is still running and will take care of
this.

>  		SUBOPS(sdev, tx_queue_release)
>  			(ETH(sdev)->data->tx_queues[txq->qid]);

Actually, now that I think about it, there seems to be an issue with
queues not released on plugout?

In fs_dev_remove, we only do the general dev_stop and dev_close on the
sub_device.

shouldn't we call tx_queue_release as well before?

>  	dev->data->tx_queues[txq->qid] = NULL;
> @@ -548,7 +558,7 @@
>  	txq->info.nb_desc = nb_tx_desc;
>  	txq->priv = PRIV(dev);
>  	dev->data->tx_queues[tx_queue_id] = txq;
> -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE) {

Why using the UNSAFE operator for a setup operation? (Same as for
rx_queue_setup.)

>  		ret = rte_eth_tx_queue_setup(PORT_ID(sdev),
>  				tx_queue_id,
>  				nb_tx_desc, socket_id,
> @@ -663,7 +673,7 @@
>  	int ret;
>  
>  	rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
> -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE) {

Why do you want to attempt a stat read on port bound for removal?

>  		struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
>  		uint64_t *timestamp = &sdev->stats_snapshot.timestamp;
>  
> @@ -746,7 +756,7 @@
>  
>  		rx_offload_capa = default_infos.rx_offload_capa;
>  		rxq_offload_capa = default_infos.rx_queue_offload_capa;
> -		FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
> +		FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_PROBED) {

same here.

>  			rte_eth_dev_info_get(PORT_ID(sdev),
>  					&PRIV(dev)->infos);
>  			rx_offload_capa &= PRIV(dev)->infos.rx_offload_capa;
> diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
> index f3be152..7ddd63a 100644
> --- a/drivers/net/failsafe/failsafe_private.h
> +++ b/drivers/net/failsafe/failsafe_private.h
> @@ -244,16 +244,31 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
>  	((sdev)->sid)
>  
>  /**
> - * Stateful iterator construct over fail-safe sub-devices:
> + * Stateful iterator construct over fail-safe sub-devices,
> + * including the removed sub-devices:

"including sub-devices marked for removal" is more correct here, as the
device is not actually removed yet, only scheduled for.

> + * s:     (struct sub_device *), iterator
> + * i:     (uint8_t), increment
> + * dev:   (struct rte_eth_dev *), fail-safe ethdev
> + * state: (enum dev_state), minimum acceptable device state
> + */
> +

Here the same documentation as for other macros: parameters type, quick
description of what it does.

> +#define FOREACH_SUBDEV_STATE_UNSAFE(s, i, dev, state)	\
> +	for (s = fs_find_next((dev), 0, state, 0, &i);	\
> +	     s != NULL;					\
> +	     s = fs_find_next((dev), i + 1, state, 0, &i))
> +
> +/**
> + * Stateful iterator construct over fail-safe sub-devices,
> + * except the removed sub-devices:
>   * s:     (struct sub_device *), iterator
>   * i:     (uint8_t), increment
>   * dev:   (struct rte_eth_dev *), fail-safe ethdev
>   * state: (enum dev_state), minimum acceptable device state
>   */
>  #define FOREACH_SUBDEV_STATE(s, i, dev, state)		\
> -	for (s = fs_find_next((dev), 0, state, &i);	\
> +	for (s = fs_find_next((dev), 0, state, 1, &i);	\
>  	     s != NULL;					\
> -	     s = fs_find_next((dev), i + 1, state, &i))
> +	     s = fs_find_next((dev), i + 1, state, 1, &i))
>  
>  /**
>   * Iterator construct over fail-safe sub-devices:
> @@ -262,7 +277,7 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
>   * dev: (struct rte_eth_dev *), fail-safe ethdev
>   */
>  #define FOREACH_SUBDEV(s, i, dev)			\
> -	FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED)
> +	FOREACH_SUBDEV_STATE_UNSAFE(s, i, dev, DEV_UNDEFINED)

No actually, the default case should be using the "SAFE" iterator, so no
change needed here.

>  
>  /* dev: (struct rte_eth_dev *) fail-safe device */
>  #define PREFERRED_SUBDEV(dev) \
> @@ -328,6 +343,7 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
>  fs_find_next(struct rte_eth_dev *dev,
>  	     uint8_t sid,
>  	     enum dev_state min_state,
> +	     uint8_t check_remove,

skip_remove? Seems more descriptive.

>  	     uint8_t *sid_out)
>  {
>  	struct sub_device *subs;
> @@ -336,8 +352,12 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
>  	subs = PRIV(dev)->subs;
>  	tail = PRIV(dev)->subs_tail;
>  	while (sid < tail) {
> -		if (subs[sid].state >= min_state)
> -			break;
> +		if (subs[sid].state >= min_state) {
> +			if (check_remove == 0)
> +				break;
> +			if (PRIV(dev)->subs[sid].remove == 0)
> +				break;
> +		}
>  		sid++;
>  	}
>  	*sid_out = sid;
> @@ -376,7 +396,7 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
>  		uint8_t i;
>  
>  		/* Using acceptable device */
> -		FOREACH_SUBDEV_STATE(sdev, i, dev, req_state) {
> +		FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, req_state) {

Why should we switch emitting device to one marked for removal?

>  			if (sdev == banned)
>  				continue;
>  			DEBUG("Switching tx_dev to sub_device %d",
> -- 
> 1.8.3.1
> 

Regards,
-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v5 2/3] net/failsafe: fix removal scope
  2018-02-08 17:19             ` Gaëtan Rivet
@ 2018-02-08 19:03               ` Matan Azrad
  0 siblings, 0 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-08 19:03 UTC (permalink / raw)
  To: Gaëtan Rivet; +Cc: dev, stable, Ophir Munk

Hi Gaetan

From: Gaëtan Rivet, Thursday, February 8, 2018 7:20 PM
> Hi Matan,
> 
> Thanks for dealing with this.
> 
> On Thu, Feb 08, 2018 at 04:34:12PM +0000, Matan Azrad wrote:
> > Fail-safe PMD uses per sub-device flag called "remove" to indicate the
> > scope where the sub-device isn't synchronized with the fail-safe state.
> >
> > This flag is set when fail-safe gets RMV notification about the
> > physical removal of the sub-device and should be unset when the
> > sub-device completes all the configurations cause it to arrive to the
> > fail-safe state.
> >
> > The previous code wrongly unsets the flag after calling to the
> > sub-device PMD dev_configure() operation and before all the
> > configurations were done.
> >
> > Change the remove flag unsetting to be only after the sub-device
> > successes to arrive to the fail-safe state.
> >
> 
> I'm not sure this is the right way to do this.
> I think it's clear that it was a mistake to set sdev->remove to 0 only during
> fs_dev_configure.
> 
> The flag itself only means "there is something to be done on this device,
> please clean up".
> 
> Once the clean-up has happened, then the flag is not necessary anymore
> and should be reset.
> 
> So I thought that this fix would actually put the flag reset within
> fs_dev_remove, right before reinstalling the hotplug alarm.
> 
> At this point, the device state would have been set back to DEV_UNDEFINED,
> so the remove flag is unnecessary for any operation trying to avoid
> unplugged slaves.
> 
> The "remove" flag is initialized at 0 when sub-devices are allocated (during
> fail-safe init). This means that there would be a difference in the state of the
> slave between its first initialization and any subsequent init, after one
> successful plugout.
> 

But what's about plug-in process?
Do you want to allow control commands for a sub-device while it is plugging-in?

Unset the remove flag in fs_dev_remove allows to control commands to occur in parallel to plug in process.  

Maybe the name of the flag should be changed to unsynchronized.

> > Fixes: a46f8d5 ("net/failsafe: add fail-safe PMD")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > ---
> >  drivers/net/failsafe/failsafe_ether.c | 2 ++
> >  drivers/net/failsafe/failsafe_ops.c   | 2 +-
> >  2 files changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/failsafe/failsafe_ether.c
> > b/drivers/net/failsafe/failsafe_ether.c
> > index 4c6e938..ca42376 100644
> > --- a/drivers/net/failsafe/failsafe_ether.c
> > +++ b/drivers/net/failsafe/failsafe_ether.c
> > @@ -377,6 +377,8 @@
> >  				      i);
> >  				goto err_remove;
> >  			}
> > +			if (PRIV(dev)->state < DEV_STARTED)
> > +				sdev->remove = 0;
> 
> Here the remove flag should already be 0. If it isn't, this is a
> (logical) bug, which should be properly addressed instead of patched in this
> way.

Same answer as above.

> >  		}
> >  	}
> >  	/*
> > diff --git a/drivers/net/failsafe/failsafe_ops.c
> > b/drivers/net/failsafe/failsafe_ops.c
> > index 7a67e16..a7c2dba 100644
> > --- a/drivers/net/failsafe/failsafe_ops.c
> > +++ b/drivers/net/failsafe/failsafe_ops.c
> > @@ -131,7 +131,6 @@
> >  			dev->data->dev_conf.intr_conf.lsc = 0;
> >  		}
> >  		DEBUG("Configuring sub-device %d", i);
> > -		sdev->remove = 0;
> 
> This is correct.
> 
> >  		ret = rte_eth_dev_configure(PORT_ID(sdev),
> >  					dev->data->nb_rx_queues,
> >  					dev->data->nb_tx_queues,
> > @@ -197,6 +196,7 @@
> >  			return ret;
> >  		}
> >  		sdev->state = DEV_STARTED;
> > +		sdev->remove = 0;
> 
> This seems unnecessary, if this operation was already performed once the
> device has been properly removed.

Same answer as above.
 
> >  	}
> >  	if (PRIV(dev)->state < DEV_STARTED)
> >  		PRIV(dev)->state = DEV_STARTED;
> > --
> > 1.8.3.1
> >
> 
> --
> Gaëtan Rivet
> 6WIND

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/3] net/failsafe: fix calling device during RMV events
  2018-02-08 18:11             ` Gaëtan Rivet
@ 2018-02-08 19:24               ` Matan Azrad
  0 siblings, 0 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-08 19:24 UTC (permalink / raw)
  To: Gaëtan Rivet; +Cc: dev, stable, Ophir Munk

Hi Gaetan

> From: Gaëtan Rivet, Thursday, February 8, 2018 8:11 PM
> On Thu, Feb 08, 2018 at 04:34:13PM +0000, Matan Azrad wrote:
> > Following are the failure steps:
> > 1. The physical device is removed due to change in one of PF
> > parameters (e.g. MTU) 2. The interrupt thread flags the device 3.
> > Within 2 seconds Interrupt thread initializes the actual device
> > removal, then every 2 seconds it tries to re-sync (plug in) the
> > device. The trials fail as long as VF parameter mismatches the PF
> parameter.
> > 4. A control thread initiates a control operation on failsafe which
> > initiates this operation on the device.
> > 5. A race condition occurs between the control thread and interrupt
> > thread when accessing the device data structures.
> >
> > This patch mitigates the race condition in step 5.
> >
> > Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > ---
> >  drivers/net/failsafe/failsafe.c         |  2 +-
> >  drivers/net/failsafe/failsafe_eal.c     |  2 +-
> >  drivers/net/failsafe/failsafe_ether.c   |  2 +-
> >  drivers/net/failsafe/failsafe_ops.c     | 26 +++++++++++++++++--------
> >  drivers/net/failsafe/failsafe_private.h | 34
> > ++++++++++++++++++++++++++-------
> >  5 files changed, 48 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/net/failsafe/failsafe.c
> > b/drivers/net/failsafe/failsafe.c index 7b2cdbb..6cdefd0 100644
> > --- a/drivers/net/failsafe/failsafe.c
> > +++ b/drivers/net/failsafe/failsafe.c
> > @@ -187,7 +187,7 @@
> >  		 * If MAC address was provided as a parameter,
> >  		 * apply to all probed slaves.
> >  		 */
> > -		FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
> > +		FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev,
> DEV_PROBED) {
> 
> No need for the UNSAFE here. The ports should have been just initialized,
> and sdev->remove should be 0.

So, the check is not relevant, why to do so? The UNSAFE skip the check.

> If sdev->remove is 1, then it means it has been set already by a plugout
> event, meaning that rte_eth_dev_default_mac_addr_set should not even
> be called on it.
> 
> >  			ret =
> rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
> >  							       mac);
> >  			if (ret) {
> > diff --git a/drivers/net/failsafe/failsafe_eal.c
> > b/drivers/net/failsafe/failsafe_eal.c
> > index c3d6731..b3b9c32 100644
> > --- a/drivers/net/failsafe/failsafe_eal.c
> > +++ b/drivers/net/failsafe/failsafe_eal.c
> > @@ -126,7 +126,7 @@
> >  	int sdev_ret;
> >  	int ret = 0;
> >
> > -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
> > +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_PROBED) {
> >  		sdev_ret = rte_eal_hotplug_remove(sdev->bus->name,
> >  							sdev->dev->name);
> >  		if (sdev_ret) {
> > diff --git a/drivers/net/failsafe/failsafe_ether.c
> > b/drivers/net/failsafe/failsafe_ether.c
> > index ca42376..f2a52c9 100644
> > --- a/drivers/net/failsafe/failsafe_ether.c
> > +++ b/drivers/net/failsafe/failsafe_ether.c
> > @@ -325,7 +325,7 @@
> >  	struct sub_device *sdev;
> >  	uint8_t i;
> >
> > -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
> > +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE)
> >  		if (sdev->remove && fs_rxtx_clean(sdev)) {
> >  			fs_dev_stats_save(sdev);
> >  			fs_dev_remove(sdev);
> > diff --git a/drivers/net/failsafe/failsafe_ops.c
> > b/drivers/net/failsafe/failsafe_ops.c
> > index a7c2dba..3d2cb32 100644
> > --- a/drivers/net/failsafe/failsafe_ops.c
> > +++ b/drivers/net/failsafe/failsafe_ops.c
> > @@ -181,6 +181,9 @@
> >  	FOREACH_SUBDEV(sdev, i, dev) {
> >  		if (sdev->state != DEV_ACTIVE)
> >  			continue;
> > +		if (sdev->remove == 1 && PRIV(dev)->state < DEV_STARTED)
> > +			/* Application shouldn't start removed sub-devices.
> */
> > +			continue;
> 
> FOREACH_SUBDEV should already have avoided sub-devices which remove
> flag is 1.

fs_dev_start() is called by the alarm thread too to restart a removed device(marked by remove flag), so it should not condition the remove flag.

> If not, then the fs_err(sdev, ret) stanza right after will let the loop continue,
> and the port should be handled by the next slave cleanup.
> 
> >  		DEBUG("Starting sub_device %d", i);
> >  		ret = rte_eth_dev_start(PORT_ID(sdev));
> >  		if (ret) {
> > @@ -265,10 +268,17 @@
> >  	uint8_t i;
> >
> >  	failsafe_hotplug_alarm_cancel(dev);
> > -	if (PRIV(dev)->state == DEV_STARTED)
> > +	if (PRIV(dev)->state == DEV_STARTED) {
> > +		/*
> > +		 * Clean remove flags to allow stop for all sub-devices
> because
> > +		 * there is not hot-plug alarm to stop the removed sub-
> devices.
> > +		 */
> > +		FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev,
> DEV_STARTED)
> > +			sdev->remove = 0;

> Why make this conditional to state == DEV_STARTED?
> >  		dev->dev_ops->dev_stop(dev);
> > +	}
> >  	PRIV(dev)->state = DEV_ACTIVE - 1;
> > -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> > +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE) {
> >  		DEBUG("Closing sub_device %d", i);
> >  		rte_eth_dev_close(PORT_ID(sdev));
> >  		sdev->state = DEV_ACTIVE - 1;
> 
> -->
> 
> 	/*
> 	 * Clean remove flags to allow stop for all sub-devices because
> 	 * there is no hot-plug alarm to clean the removed sub-devices.
> 	 */
> 	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE)
> 		sdev->remove = 0;
> 	if (PRIV(dev)->state == DEV_STARTED)
> 		dev->dev_ops->dev_stop(dev);
> 	PRIV(dev)->state = DEV_ACTIVE - 1;
> 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> 		DEBUG("Closing sub_device %d", i);
> 		rte_eth_dev_close(PORT_ID(sdev));
> 		sdev->state = DEV_ACTIVE - 1;
>

Agree.
 
> > @@ -309,7 +319,7 @@
> >  	if (rxq->event_fd > 0)
> >  		close(rxq->event_fd);
> >  	dev = rxq->priv->dev;
> > -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
> > +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE)
> 
> No need here, as you would have reset sdev->remove if the port was
> closing, or it would be dealt with by fs_dev_remove if the alarm is still
> running.

Agree.

> 
> >  		SUBOPS(sdev, rx_queue_release)
> >  			(ETH(sdev)->data->rx_queues[rxq->qid]);
> >  	dev->data->rx_queues[rxq->qid] = NULL; @@ -376,7 +386,7 @@
> 
> you really should update your git, it is difficult to verify these changes
> without the function contexts.
>
Agree :) sorry.
 
> >  		return ret;
> >  	rxq->event_fd = intr_handle.efds[0];
> >  	dev->data->rx_queues[rx_queue_id] = rxq;
> > -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> > +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE) {
> 
> Why should we setup queues on ports marked for removal?
> 

Need to change it. 

> >  		ret = rte_eth_rx_queue_setup(PORT_ID(sdev),
> >  				rx_queue_id,
> >  				nb_rx_desc, socket_id,
> > @@ -493,7 +503,7 @@
> >  		return;
> >  	txq = queue;
> >  	dev = txq->priv->dev;
> > -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
> > +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE)
> 
> Same as for rx_queue_release: either the device is closing, and the flag has
> been reset, or the alarm is still running and will take care of this.
> 

Agree.

> >  		SUBOPS(sdev, tx_queue_release)
> >  			(ETH(sdev)->data->tx_queues[txq->qid]);
> 
> Actually, now that I think about it, there seems to be an issue with queues
> not released on plugout?
> 
> In fs_dev_remove, we only do the general dev_stop and dev_close on the
> sub_device.
> 
> shouldn't we call tx_queue_release as well before?

Isn't it done by dev_close()?

> 
> >  	dev->data->tx_queues[txq->qid] = NULL; @@ -548,7 +558,7 @@
> >  	txq->info.nb_desc = nb_tx_desc;
> >  	txq->priv = PRIV(dev);
> >  	dev->data->tx_queues[tx_queue_id] = txq;
> > -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> > +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE) {
> 
> Why using the UNSAFE operator for a setup operation? (Same as for
> rx_queue_setup.)
> 
No need , you right, all the queue operation should be safe too.

> >  		ret = rte_eth_tx_queue_setup(PORT_ID(sdev),
> >  				tx_queue_id,
> >  				nb_tx_desc, socket_id,
> > @@ -663,7 +673,7 @@
> >  	int ret;
> >
> >  	rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
> > -	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
> > +	FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, DEV_ACTIVE) {
> 
> Why do you want to attempt a stat read on port bound for removal?

SW counters may success, this function deals with removal case.

> >  		struct rte_eth_stats *snapshot = &sdev-
> >stats_snapshot.stats;
> >  		uint64_t *timestamp = &sdev->stats_snapshot.timestamp;
> >
> > @@ -746,7 +756,7 @@
> >
> >  		rx_offload_capa = default_infos.rx_offload_capa;
> >  		rxq_offload_capa = default_infos.rx_queue_offload_capa;
> > -		FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
> > +		FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev,
> DEV_PROBED) {
> 
> same here.

No need the check, so why?

> 
> >  			rte_eth_dev_info_get(PORT_ID(sdev),
> >  					&PRIV(dev)->infos);
> >  			rx_offload_capa &= PRIV(dev)-
> >infos.rx_offload_capa; diff --git
> > a/drivers/net/failsafe/failsafe_private.h
> > b/drivers/net/failsafe/failsafe_private.h
> > index f3be152..7ddd63a 100644
> > --- a/drivers/net/failsafe/failsafe_private.h
> > +++ b/drivers/net/failsafe/failsafe_private.h
> > @@ -244,16 +244,31 @@ int failsafe_eth_lsc_event_callback(uint16_t
> port_id,
> >  	((sdev)->sid)
> >
> >  /**
> > - * Stateful iterator construct over fail-safe sub-devices:
> > + * Stateful iterator construct over fail-safe sub-devices,
> > + * including the removed sub-devices:
> 
> "including sub-devices marked for removal" is more correct here, as the
> device is not actually removed yet, only scheduled for.
> 

Maybe "including unsynchronized sub-devices"? 

> > + * s:     (struct sub_device *), iterator
> > + * i:     (uint8_t), increment
> > + * dev:   (struct rte_eth_dev *), fail-safe ethdev
> > + * state: (enum dev_state), minimum acceptable device state */
> > +
> 
> Here the same documentation as for other macros: parameters type, quick
> description of what it does.
>

Don't understand you here.
 
> > +#define FOREACH_SUBDEV_STATE_UNSAFE(s, i, dev, state)	\
> > +	for (s = fs_find_next((dev), 0, state, 0, &i);	\
> > +	     s != NULL;					\
> > +	     s = fs_find_next((dev), i + 1, state, 0, &i))
> > +
> > +/**
> > + * Stateful iterator construct over fail-safe sub-devices,
> > + * except the removed sub-devices:
> >   * s:     (struct sub_device *), iterator
> >   * i:     (uint8_t), increment
> >   * dev:   (struct rte_eth_dev *), fail-safe ethdev
> >   * state: (enum dev_state), minimum acceptable device state
> >   */
> >  #define FOREACH_SUBDEV_STATE(s, i, dev, state)		\
> > -	for (s = fs_find_next((dev), 0, state, &i);	\
> > +	for (s = fs_find_next((dev), 0, state, 1, &i);	\
> >  	     s != NULL;					\
> > -	     s = fs_find_next((dev), i + 1, state, &i))
> > +	     s = fs_find_next((dev), i + 1, state, 1, &i))
> >
> >  /**
> >   * Iterator construct over fail-safe sub-devices:
> > @@ -262,7 +277,7 @@ int failsafe_eth_lsc_event_callback(uint16_t
> port_id,
> >   * dev: (struct rte_eth_dev *), fail-safe ethdev
> >   */
> >  #define FOREACH_SUBDEV(s, i, dev)			\
> > -	FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED)
> > +	FOREACH_SUBDEV_STATE_UNSAFE(s, i, dev, DEV_UNDEFINED)
> 
> No actually, the default case should be using the "SAFE" iterator, so no
> change needed here.

Also here, I think the check is unnecessary, so using UNSAFE skip it.

> >
> >  /* dev: (struct rte_eth_dev *) fail-safe device */  #define
> > PREFERRED_SUBDEV(dev) \ @@ -328,6 +343,7 @@ int
> > failsafe_eth_lsc_event_callback(uint16_t port_id,  fs_find_next(struct
> > rte_eth_dev *dev,
> >  	     uint8_t sid,
> >  	     enum dev_state min_state,
> > +	     uint8_t check_remove,
> 
> skip_remove? Seems more descriptive.
> 
Agree.

> >  	     uint8_t *sid_out)
> >  {
> >  	struct sub_device *subs;
> > @@ -336,8 +352,12 @@ int failsafe_eth_lsc_event_callback(uint16_t
> port_id,
> >  	subs = PRIV(dev)->subs;
> >  	tail = PRIV(dev)->subs_tail;
> >  	while (sid < tail) {
> > -		if (subs[sid].state >= min_state)
> > -			break;
> > +		if (subs[sid].state >= min_state) {
> > +			if (check_remove == 0)
> > +				break;
> > +			if (PRIV(dev)->subs[sid].remove == 0)
> > +				break;
> > +		}
> >  		sid++;
> >  	}
> >  	*sid_out = sid;
> > @@ -376,7 +396,7 @@ int failsafe_eth_lsc_event_callback(uint16_t
> port_id,
> >  		uint8_t i;
> >
> >  		/* Using acceptable device */
> > -		FOREACH_SUBDEV_STATE(sdev, i, dev, req_state) {
> > +		FOREACH_SUBDEV_STATE_UNSAFE(sdev, i, dev, req_state) {
> 
> Why should we switch emitting device to one marked for removal?

Agree, should be changed.

> >  			if (sdev == banned)
> >  				continue;
> >  			DEBUG("Switching tx_dev to sub_device %d",
> > --
> > 1.8.3.1
> >
> 
> Regards,
> --
> Gaëtan Rivet
> 6WIND

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v6 0/3] failsafe: fix hotplug races
  2018-02-08 16:34         ` [dpdk-dev] [PATCH v5 0/3] failsafe: " Matan Azrad
                             ` (2 preceding siblings ...)
  2018-02-08 16:34           ` [dpdk-dev] [PATCH v5 3/3] net/failsafe: fix calling device during RMV events Matan Azrad
@ 2018-02-11 17:24           ` Matan Azrad
  2018-02-11 17:24             ` [dpdk-dev] [PATCH v6 1/3] net/failsafe: fix hotplug alarm cancel Matan Azrad
                               ` (3 more replies)
  3 siblings, 4 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-11 17:24 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev

This series fixes failsafe race between control commands to the asynchronic plug-out\in processes.

V6(matan):
Full lock based fix.
Change the remove flag scope until SW resources release. 

v5(Matan):
Change defines names to failsafe convention (UNSAFE).
split a fix patch.

v4(Matan):
Rebase on top of 18.02-rc3.
Extend the fix for other control commands.
Fix hotplug alarm cancel.

V3(Ophir):
Rebase v2.
Add rationales (copy from an email which accompanied v2).


Matan Azrad (3):
  net/failsafe: fix hotplug alarm cancel
  net/failsafe: fix removal scope
  net/failsafe: fix hotplug races

 drivers/net/failsafe/Makefile           |   1 +
 drivers/net/failsafe/failsafe.c         |  53 +++++++++---
 drivers/net/failsafe/failsafe_ether.c   |   7 +-
 drivers/net/failsafe/failsafe_flow.c    |  20 ++++-
 drivers/net/failsafe/failsafe_ops.c     | 149 ++++++++++++++++++++++++++------
 drivers/net/failsafe/failsafe_private.h |  62 +++++++++++--
 6 files changed, 248 insertions(+), 44 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v6 1/3] net/failsafe: fix hotplug alarm cancel
  2018-02-11 17:24           ` [dpdk-dev] [PATCH v6 0/3] failsafe: fix hotplug races Matan Azrad
@ 2018-02-11 17:24             ` Matan Azrad
  2018-02-11 17:24             ` [dpdk-dev] [PATCH v6 2/3] net/failsafe: fix removal scope Matan Azrad
                               ` (2 subsequent siblings)
  3 siblings, 0 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-11 17:24 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev, stable

The hot-plug alarm mechanism of fail-safe PMD is responsible for
handling removed devices during a plug-out event and to restore them
back to activity following a plug-in event.

Fail-safe sets a flag called "pending_alarm" to validate that only one
alarm callback is pending at any time. While this flag is required to
avoid simultaneous initiations of the alarm thread - it should not be
considered during alarm thread cancellation.

So, when failsafe_hotplug_alarm_cancel() was called while the alarm
callback was being executed the alarm mechanism was not stopped.

Skip checking the "pending_alarm" flag to allow alarm thread
cancellation all the times.

Fixes: ebea83f899d8 ("net/failsafe: add plug-in support")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/failsafe/failsafe.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 2665a39..7b2cdbb 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -85,16 +85,14 @@
 {
 	int ret = 0;
 
-	if (PRIV(dev)->pending_alarm) {
-		rte_errno = 0;
-		rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
-		if (rte_errno) {
-			ERROR("rte_eal_alarm_cancel failed (errno: %s)",
-			      strerror(rte_errno));
-			ret = -rte_errno;
-		} else {
-			PRIV(dev)->pending_alarm = 0;
-		}
+	rte_errno = 0;
+	rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
+	if (rte_errno) {
+		ERROR("rte_eal_alarm_cancel failed (errno: %s)",
+		      strerror(rte_errno));
+		ret = -rte_errno;
+	} else {
+		PRIV(dev)->pending_alarm = 0;
 	}
 	return ret;
 }
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v6 2/3] net/failsafe: fix removal scope
  2018-02-11 17:24           ` [dpdk-dev] [PATCH v6 0/3] failsafe: fix hotplug races Matan Azrad
  2018-02-11 17:24             ` [dpdk-dev] [PATCH v6 1/3] net/failsafe: fix hotplug alarm cancel Matan Azrad
@ 2018-02-11 17:24             ` Matan Azrad
  2018-02-11 17:24             ` [dpdk-dev] [PATCH v6 3/3] net/failsafe: fix hotplug races Matan Azrad
  2018-02-12 20:51             ` [dpdk-dev] [PATCH v7 0/3] failsafe: " Matan Azrad
  3 siblings, 0 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-11 17:24 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev, stable

Fail-safe PMD uses per sub-device flag called "remove" to indicate the
scope where the sub-device was removed physically and its SW resources
should be released.

This flag is set when fail-safe gets RMV notification about the
physical removal of the sub-device and should be unset when all the
sub-device resources is released.

The previous code wrongly unsets the flag in dev_configure() instead of
where all the SW resources release is completed.

Change the remove flag unsetting to be in the end of SW resources
release.

Fixes: a46f8d5 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/failsafe/failsafe_ether.c | 1 +
 drivers/net/failsafe/failsafe_ops.c   | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 4c6e938..d820faf 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -280,6 +280,7 @@
 		/* the end */
 		break;
 	}
+	sdev->remove = 0;
 	failsafe_hotplug_alarm_install(sdev->fs_dev);
 }
 
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 7a67e16..f0e48c1 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -131,7 +131,6 @@
 			dev->data->dev_conf.intr_conf.lsc = 0;
 		}
 		DEBUG("Configuring sub-device %d", i);
-		sdev->remove = 0;
 		ret = rte_eth_dev_configure(PORT_ID(sdev),
 					dev->data->nb_rx_queues,
 					dev->data->nb_tx_queues,
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v6 3/3] net/failsafe: fix hotplug races
  2018-02-11 17:24           ` [dpdk-dev] [PATCH v6 0/3] failsafe: fix hotplug races Matan Azrad
  2018-02-11 17:24             ` [dpdk-dev] [PATCH v6 1/3] net/failsafe: fix hotplug alarm cancel Matan Azrad
  2018-02-11 17:24             ` [dpdk-dev] [PATCH v6 2/3] net/failsafe: fix removal scope Matan Azrad
@ 2018-02-11 17:24             ` Matan Azrad
  2018-02-12 18:33               ` Gaëtan Rivet
  2018-02-12 20:51             ` [dpdk-dev] [PATCH v7 0/3] failsafe: " Matan Azrad
  3 siblings, 1 reply; 36+ messages in thread
From: Matan Azrad @ 2018-02-11 17:24 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev, stable

Fail-safe uses periodic alarm mechanism, running from the host thread,
to manage the hot-plug events of its sub-devices. This management
requires a lot of sub-devices PMDs operations (stop,close,start,etc).

While the hot-plug alarm runs in the host thread, the application may
call fail-safe operations which directly trigger the sub-devices PMDs
operations too, This call may occur from any thread decided by the
application (probably the master thread).

So, more than one operation can execute to a sub-device in same time
what can cause a lot of races in the sub-PMDs.

Moreover, some control operations update the fail-safe internal
databases which can be used by the alarm mechanism in the same
time, what also can cause to races and crashes.

Fail-safe is the owner of its sub-devices and must to synchronize their
use according to the ETHDEV ownership rules.

Synchronize hot-plug management by a new lock mechanism uses a mutex to
atomically defend each critical section in the fail-safe hot-plug
mechanism and control operations to prevent any races between them.

Fixes: a46f8d5 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/failsafe/Makefile           |   1 +
 drivers/net/failsafe/failsafe.c         |  35 ++++++++
 drivers/net/failsafe/failsafe_ether.c   |   6 +-
 drivers/net/failsafe/failsafe_flow.c    |  20 ++++-
 drivers/net/failsafe/failsafe_ops.c     | 148 ++++++++++++++++++++++++++------
 drivers/net/failsafe/failsafe_private.h |  62 +++++++++++--
 6 files changed, 239 insertions(+), 33 deletions(-)

diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index d1ae899..bd2f019 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -68,5 +68,6 @@ CFLAGS += -pedantic
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
 LDLIBS += -lrte_bus_vdev
+LDLIBS += -lpthread
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 7b2cdbb..c499bfb 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -113,17 +113,46 @@
 			break;
 	/* if we have non-probed device */
 	if (i != PRIV(dev)->subs_tail) {
+		if (fs_lock(dev, 1) != 0)
+			goto reinstall;
 		ret = failsafe_eth_dev_state_sync(dev);
+		fs_unlock(dev, 1);
 		if (ret)
 			ERROR("Unable to synchronize sub_device state");
 	}
 	failsafe_dev_remove(dev);
+reinstall:
 	ret = failsafe_hotplug_alarm_install(dev);
 	if (ret)
 		ERROR("Unable to set up next alarm");
 }
 
 static int
+fs_mutex_init(struct fs_priv *priv)
+{
+	int ret;
+	pthread_mutexattr_t attr;
+
+	ret = pthread_mutexattr_init(&attr);
+	if (ret) {
+		ERROR("Cannot initiate mutex attributes - %s", strerror(ret));
+		return ret;
+	}
+	/* Allow mutex relocks for the thread holding the mutex. */
+	ret = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+	if (ret) {
+		ERROR("Cannot set mutex type - %s", strerror(ret));
+		return ret;
+	}
+	ret = pthread_mutex_init(&priv->hotplug_mutex, &attr);
+	if (ret) {
+		ERROR("Cannot initiate mutex - %s", strerror(ret));
+		return ret;
+	}
+	return 0;
+}
+
+static int
 fs_eth_dev_create(struct rte_vdev_device *vdev)
 {
 	struct rte_eth_dev *dev;
@@ -176,6 +205,9 @@
 	ret = failsafe_eal_init(dev);
 	if (ret)
 		goto free_args;
+	ret = fs_mutex_init(priv);
+	if (ret)
+		goto free_args;
 	ret = failsafe_hotplug_alarm_install(dev);
 	if (ret) {
 		ERROR("Could not set up plug-in event detection");
@@ -250,6 +282,9 @@
 		ERROR("Error while uninitializing sub-EAL");
 	failsafe_args_free(dev);
 	fs_sub_device_free(dev);
+	ret = pthread_mutex_destroy(&PRIV(dev)->hotplug_mutex);
+	if (ret)
+		ERROR("Error while destroying hotplug mutex");
 	rte_free(PRIV(dev));
 	rte_eth_dev_release_port(dev);
 	return ret;
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index d820faf..8672819 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -328,8 +328,11 @@
 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		if (sdev->remove && fs_rxtx_clean(sdev)) {
+			if (fs_lock(dev, 1) != 0)
+				return;
 			fs_dev_stats_save(sdev);
 			fs_dev_remove(sdev);
+			fs_unlock(dev, 1);
 		}
 }
 
@@ -428,7 +431,7 @@
 				void *cb_arg, void *out __rte_unused)
 {
 	struct sub_device *sdev = cb_arg;
-
+	fs_lock(sdev->fs_dev, 0);
 	/* Switch as soon as possible tx_dev. */
 	fs_switch_dev(sdev->fs_dev, sdev);
 	/* Use safe bursts in any case. */
@@ -438,6 +441,7 @@
 	 * the callback at the source of the current thread context.
 	 */
 	sdev->remove = 1;
+	fs_unlock(sdev->fs_dev, 0);
 	return 0;
 }
 
diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
index 4d18e8e..ec8c909 100644
--- a/drivers/net/failsafe/failsafe_flow.c
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -55,6 +55,7 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_flow_validate on sub_device %d", i);
 		ret = rte_flow_validate(PORT_ID(sdev),
@@ -62,9 +63,11 @@
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_flow_validate failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -79,6 +82,7 @@
 	struct rte_flow *flow;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	flow = fs_flow_allocate(attr, patterns, actions);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		flow->flows[i] = rte_flow_create(PORT_ID(sdev),
@@ -90,6 +94,7 @@
 		}
 	}
 	TAILQ_INSERT_TAIL(&PRIV(dev)->flow_list, flow, next);
+	fs_unlock(dev, 0);
 	return flow;
 err:
 	FOREACH_SUBDEV(sdev, i, dev) {
@@ -98,6 +103,7 @@
 				flow->flows[i], error);
 	}
 	fs_flow_release(&flow);
+	fs_unlock(dev, 0);
 	return NULL;
 }
 
@@ -115,6 +121,7 @@
 		return -EINVAL;
 	}
 	ret = 0;
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		int local_ret;
 
@@ -131,6 +138,7 @@
 	}
 	TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
 	fs_flow_release(&flow);
+	fs_unlock(dev, 0);
 	return ret;
 }
 
@@ -144,12 +152,14 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_flow_flush on sub_device %d", i);
 		ret = rte_flow_flush(PORT_ID(sdev), error);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_flow_flush failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
@@ -157,6 +167,7 @@
 		TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
 		fs_flow_release(&flow);
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -169,15 +180,19 @@
 {
 	struct sub_device *sdev;
 
+	fs_lock(dev, 0);
 	sdev = TX_SUBDEV(dev);
 	if (sdev != NULL) {
 		int ret = rte_flow_query(PORT_ID(sdev),
 					 flow->flows[SUB_ID(sdev)],
 					 type, arg, error);
 
-		if ((ret = fs_err(sdev, ret)))
+		if ((ret = fs_err(sdev, ret))) {
+			fs_unlock(dev, 0);
 			return ret;
+		}
 	}
+	fs_unlock(dev, 0);
 	WARN("No active sub_device to query about its flow");
 	return -1;
 }
@@ -191,6 +206,7 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV(sdev, i, dev) {
 		if (sdev->state < DEV_PROBED)
 			continue;
@@ -202,11 +218,13 @@
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_flow_isolate failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		sdev->flow_isolated = set;
 	}
 	PRIV(dev)->flow_isolated = set;
+	fs_unlock(dev, 0);
 	return 0;
 }
 
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index f0e48c1..fe64c68 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -94,6 +94,7 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	supp_tx_offloads = PRIV(dev)->infos.tx_offload_capa;
 	tx_offloads = dev->data->dev_conf.txmode.offloads;
 	if ((tx_offloads & supp_tx_offloads) != tx_offloads) {
@@ -101,6 +102,7 @@
 		ERROR("Some Tx offloads are not supported, "
 		      "requested 0x%" PRIx64 " supported 0x%" PRIx64,
 		      tx_offloads, supp_tx_offloads);
+		fs_unlock(dev, 0);
 		return -rte_errno;
 	}
 	FOREACH_SUBDEV(sdev, i, dev) {
@@ -139,6 +141,7 @@
 			if (!fs_err(sdev, ret))
 				continue;
 			ERROR("Could not configure sub_device %d", i);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		if (rmv_interrupt) {
@@ -165,6 +168,7 @@
 	}
 	if (PRIV(dev)->state < DEV_ACTIVE)
 		PRIV(dev)->state = DEV_ACTIVE;
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -175,9 +179,12 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	ret = failsafe_rx_intr_install(dev);
-	if (ret)
+	if (ret) {
+		fs_unlock(dev, 0);
 		return ret;
+	}
 	FOREACH_SUBDEV(sdev, i, dev) {
 		if (sdev->state != DEV_ACTIVE)
 			continue;
@@ -186,6 +193,7 @@
 		if (ret) {
 			if (!fs_err(sdev, ret))
 				continue;
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		ret = failsafe_rx_intr_install_subdevice(sdev);
@@ -193,6 +201,7 @@
 			if (!fs_err(sdev, ret))
 				continue;
 			rte_eth_dev_stop(PORT_ID(sdev));
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		sdev->state = DEV_STARTED;
@@ -200,6 +209,7 @@
 	if (PRIV(dev)->state < DEV_STARTED)
 		PRIV(dev)->state = DEV_STARTED;
 	fs_switch_dev(dev, NULL);
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -209,6 +219,7 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	PRIV(dev)->state = DEV_STARTED - 1;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_STARTED) {
 		rte_eth_dev_stop(PORT_ID(sdev));
@@ -216,6 +227,7 @@
 		sdev->state = DEV_STARTED - 1;
 	}
 	failsafe_rx_intr_uninstall(dev);
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -225,15 +237,18 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
 		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -244,15 +259,18 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
 		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -263,6 +281,7 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	failsafe_hotplug_alarm_cancel(dev);
 	if (PRIV(dev)->state == DEV_STARTED)
 		dev->dev_ops->dev_stop(dev);
@@ -273,6 +292,7 @@
 		sdev->state = DEV_ACTIVE - 1;
 	}
 	fs_dev_free_queues(dev);
+	fs_unlock(dev, 0);
 }
 
 static bool
@@ -305,14 +325,16 @@
 	if (queue == NULL)
 		return;
 	rxq = queue;
+	dev = rxq->priv->dev;
+	fs_lock(dev, 0);
 	if (rxq->event_fd > 0)
 		close(rxq->event_fd);
-	dev = rxq->priv->dev;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		SUBOPS(sdev, rx_queue_release)
 			(ETH(sdev)->data->rx_queues[rxq->qid]);
 	dev->data->rx_queues[rxq->qid] = NULL;
 	rte_free(rxq);
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -338,6 +360,7 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	rxq = dev->data->rx_queues[rx_queue_id];
 	if (rxq != NULL) {
 		fs_rx_queue_release(rxq);
@@ -353,14 +376,17 @@
 		      dev->data->dev_conf.rxmode.offloads,
 		      PRIV(dev)->infos.rx_offload_capa |
 		      PRIV(dev)->infos.rx_queue_offload_capa);
+		fs_unlock(dev, 0);
 		return -rte_errno;
 	}
 	rxq = rte_zmalloc(NULL,
 			  sizeof(*rxq) +
 			  sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
 			  RTE_CACHE_LINE_SIZE);
-	if (rxq == NULL)
+	if (rxq == NULL) {
+		fs_unlock(dev, 0);
 		return -ENOMEM;
+	}
 	FOREACH_SUBDEV(sdev, i, dev)
 		rte_atomic64_init(&rxq->refcnt[i]);
 	rxq->qid = rx_queue_id;
@@ -371,8 +397,10 @@
 	rxq->priv = PRIV(dev);
 	rxq->sdev = PRIV(dev)->subs;
 	ret = rte_intr_efd_enable(&intr_handle, 1);
-	if (ret < 0)
+	if (ret < 0) {
+		fs_unlock(dev, 0);
 		return ret;
+	}
 	rxq->event_fd = intr_handle.efds[0];
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
@@ -385,9 +413,11 @@
 			goto free_rxq;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 free_rxq:
 	fs_rx_queue_release(rxq);
+	fs_unlock(dev, 0);
 	return ret;
 }
 
@@ -400,20 +430,21 @@
 	int ret;
 	int rc = 0;
 
+	fs_lock(dev, 0);
 	if (idx >= dev->data->nb_rx_queues) {
-		rte_errno = EINVAL;
-		return -rte_errno;
+		rc = -EINVAL;
+		goto unlock;
 	}
 	rxq = dev->data->rx_queues[idx];
 	if (rxq == NULL || rxq->event_fd <= 0) {
-		rte_errno = EINVAL;
-		return -rte_errno;
+		rc = -EINVAL;
+		goto unlock;
 	}
 	/* Fail if proxy service is nor running. */
 	if (PRIV(dev)->rxp.sstate != SS_RUNNING) {
 		ERROR("failsafe interrupt services are not running");
-		rte_errno = EAGAIN;
-		return -rte_errno;
+		rc = -EAGAIN;
+		goto unlock;
 	}
 	rxq->enable_events = 1;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
@@ -422,6 +453,8 @@
 		if (ret)
 			rc = ret;
 	}
+unlock:
+	fs_unlock(dev, 0);
 	if (rc)
 		rte_errno = -rc;
 	return rc;
@@ -437,14 +470,15 @@
 	int rc = 0;
 	int ret;
 
+	fs_lock(dev, 0);
 	if (idx >= dev->data->nb_rx_queues) {
-		rte_errno = EINVAL;
-		return -rte_errno;
+		rc = -EINVAL;
+		goto unlock;
 	}
 	rxq = dev->data->rx_queues[idx];
 	if (rxq == NULL || rxq->event_fd <= 0) {
-		rte_errno = EINVAL;
-		return -rte_errno;
+		rc = -EINVAL;
+		goto unlock;
 	}
 	rxq->enable_events = 0;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
@@ -456,6 +490,8 @@
 	/* Clear pending events */
 	while (read(rxq->event_fd, &u64, sizeof(uint64_t)) >  0)
 		;
+unlock:
+	fs_unlock(dev, 0);
 	if (rc)
 		rte_errno = -rc;
 	return rc;
@@ -492,11 +528,13 @@
 		return;
 	txq = queue;
 	dev = txq->priv->dev;
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		SUBOPS(sdev, tx_queue_release)
 			(ETH(sdev)->data->tx_queues[txq->qid]);
 	dev->data->tx_queues[txq->qid] = NULL;
 	rte_free(txq);
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -511,6 +549,7 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	txq = dev->data->tx_queues[tx_queue_id];
 	if (txq != NULL) {
 		fs_tx_queue_release(txq);
@@ -531,14 +570,17 @@
 		      dev->data->dev_conf.txmode.offloads,
 		      PRIV(dev)->infos.tx_offload_capa |
 		      PRIV(dev)->infos.tx_queue_offload_capa);
+		fs_unlock(dev, 0);
 		return -rte_errno;
 	}
 	txq = rte_zmalloc("ethdev TX queue",
 			  sizeof(*txq) +
 			  sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
 			  RTE_CACHE_LINE_SIZE);
-	if (txq == NULL)
+	if (txq == NULL) {
+		fs_unlock(dev, 0);
 		return -ENOMEM;
+	}
 	FOREACH_SUBDEV(sdev, i, dev)
 		rte_atomic64_init(&txq->refcnt[i]);
 	txq->qid = tx_queue_id;
@@ -557,9 +599,11 @@
 			goto free_txq;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 free_txq:
 	fs_tx_queue_release(txq);
+	fs_unlock(dev, 0);
 	return ret;
 }
 
@@ -586,8 +630,10 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_promiscuous_enable(PORT_ID(sdev));
+	fs_unlock(dev, 0);
 }
 
 static void
@@ -596,8 +642,10 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_promiscuous_disable(PORT_ID(sdev));
+	fs_unlock(dev, 0);
 }
 
 static void
@@ -606,8 +654,10 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_allmulticast_enable(PORT_ID(sdev));
+	fs_unlock(dev, 0);
 }
 
 static void
@@ -616,8 +666,10 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_allmulticast_disable(PORT_ID(sdev));
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -628,6 +680,7 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling link_update on sub_device %d", i);
 		ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
@@ -635,6 +688,7 @@
 		    rte_eth_dev_is_removed(PORT_ID(sdev)) == 0) {
 			ERROR("Link update failed for sub_device %d with error %d",
 			      i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
@@ -646,9 +700,11 @@
 		l2 = &ETH(TX_SUBDEV(dev))->data->dev_link;
 		if (memcmp(l1, l2, sizeof(*l1))) {
 			*l1 = *l2;
+			fs_unlock(dev, 0);
 			return 0;
 		}
 	}
+	fs_unlock(dev, 0);
 	return -1;
 }
 
@@ -661,6 +717,7 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
@@ -676,12 +733,14 @@
 			ERROR("Operation rte_eth_stats_get failed for sub_device %d with error %d",
 				  i, ret);
 			*timestamp = 0;
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		*timestamp = rte_rdtsc();
 inc:
 		failsafe_stats_increment(stats, snapshot);
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -691,11 +750,13 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		rte_eth_stats_reset(PORT_ID(sdev));
 		memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
 	}
 	memset(&PRIV(dev)->stats_accumulator, 0, sizeof(struct rte_eth_stats));
+	fs_unlock(dev, 0);
 }
 
 /**
@@ -771,14 +832,20 @@
 {
 	struct sub_device *sdev;
 	struct rte_eth_dev *edev;
+	const uint32_t *ret;
 
+	fs_lock(dev, 0);
 	sdev = TX_SUBDEV(dev);
-	if (sdev == NULL)
-		return NULL;
+	if (sdev == NULL) {
+		ret = NULL;
+		goto unlock;
+	}
 	edev = ETH(sdev);
 	/* ENOTSUP: counts as no supported ptypes */
-	if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL)
-		return NULL;
+	if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL) {
+		ret = NULL;
+		goto unlock;
+	}
 	/*
 	 * The API does not permit to do a clean AND of all ptypes,
 	 * It is also incomplete by design and we do not really care
@@ -786,7 +853,10 @@
 	 * We just return the ptypes of the device of highest
 	 * priority, usually the PREFERRED device.
 	 */
-	return SUBOPS(sdev, dev_supported_ptypes_get)(edev);
+	ret = SUBOPS(sdev, dev_supported_ptypes_get)(edev);
+unlock:
+	fs_unlock(dev, 0);
+	return ret;
 }
 
 static int
@@ -796,15 +866,18 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
 		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d with error %d",
 			      i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -815,15 +888,18 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
 		ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -832,13 +908,22 @@
 		struct rte_eth_fc_conf *fc_conf)
 {
 	struct sub_device *sdev;
+	int ret;
 
+	fs_lock(dev, 0);
 	sdev = TX_SUBDEV(dev);
-	if (sdev == NULL)
-		return 0;
-	if (SUBOPS(sdev, flow_ctrl_get) == NULL)
-		return -ENOTSUP;
-	return SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
+	if (sdev == NULL) {
+		ret = 0;
+		goto unlock;
+	}
+	if (SUBOPS(sdev, flow_ctrl_get) == NULL) {
+		ret = -ENOTSUP;
+		goto unlock;
+	}
+	ret = SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
+unlock:
+	fs_unlock(dev, 0);
+	return ret;
 }
 
 static int
@@ -849,15 +934,18 @@
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
 		ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -867,6 +955,7 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	/* No check: already done within the rte_eth_dev_mac_addr_remove
 	 * call for the fail-safe device.
 	 */
@@ -874,6 +963,7 @@
 		rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
 				&dev->data->mac_addrs[index]);
 	PRIV(dev)->mac_addr_pool[index] = 0;
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -887,11 +977,13 @@
 	uint8_t i;
 
 	RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
 			      PRIu8 " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
@@ -900,6 +992,7 @@
 		PRIV(dev)->nb_mac_addr = index;
 	}
 	PRIV(dev)->mac_addr_pool[index] = vmdq;
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -909,8 +1002,10 @@
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -928,15 +1023,18 @@
 		*(const void **)arg = &fs_flow_ops;
 		return 0;
 	}
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
 		ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_filter_ctrl failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index f3be152..ef1f63b 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -7,6 +7,7 @@
 #define _RTE_ETH_FAILSAFE_PRIVATE_H_
 
 #include <sys/queue.h>
+#include <pthread.h>
 
 #include <rte_atomic.h>
 #include <rte_dev.h>
@@ -161,6 +162,9 @@ struct fs_priv {
 	 * appropriate failsafe Rx queue.
 	 */
 	struct rx_proxy rxp;
+	pthread_mutex_t hotplug_mutex;
+	/* Hot-plug mutex is locked by the alarm mechanism. */
+	volatile unsigned int alarm_lock:1;
 	unsigned int pending_alarm:1; /* An alarm is pending */
 	/* flow isolation state */
 	int flow_isolated:1;
@@ -255,12 +259,6 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
 	     s != NULL;					\
 	     s = fs_find_next((dev), i + 1, state, &i))
 
-/**
- * Iterator construct over fail-safe sub-devices:
- * s:   (struct sub_device *), iterator
- * i:   (uint8_t), increment
- * dev: (struct rte_eth_dev *), fail-safe ethdev
- */
 #define FOREACH_SUBDEV(s, i, dev)			\
 	FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED)
 
@@ -347,6 +345,58 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
 }
 
 /*
+ * Lock hot-plug mutex.
+ * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
+ */
+static inline int
+fs_lock(struct rte_eth_dev *dev, unsigned int is_alarm)
+{
+	int ret;
+
+	if (is_alarm) {
+		ret = pthread_mutex_trylock(&PRIV(dev)->hotplug_mutex);
+		if (ret) {
+			DEBUG("Hot-plug mutex lock trying failed(%s), will try"
+			      " again later...", strerror(ret));
+			return ret;
+		}
+		PRIV(dev)->alarm_lock = 1;
+	} else {
+		ret = pthread_mutex_lock(&PRIV(dev)->hotplug_mutex);
+		if (ret) {
+			ERROR("Cannot lock mutex(%s)", strerror(ret));
+			return ret;
+		}
+	}
+	DEBUG("Hot-plug mutex was locked by thread %lu%s", pthread_self(),
+	      PRIV(dev)->alarm_lock ? " by the hot-plug alarm" : "");
+	return ret;
+}
+
+/*
+ * Unlock hot-plug mutex.
+ * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
+ */
+static inline void
+fs_unlock(struct rte_eth_dev *dev, unsigned int is_alarm)
+{
+	int ret;
+	unsigned int prev_alarm_lock = PRIV(dev)->alarm_lock;
+
+	if (is_alarm) {
+		RTE_ASSERT(PRIV(dev)->alarm_lock == 1);
+		PRIV(dev)->alarm_lock = 0;
+	}
+	ret = pthread_mutex_unlock(&PRIV(dev)->hotplug_mutex);
+	if (ret)
+		ERROR("Cannot unlock hot-plug mutex(%s)", strerror(ret));
+	else
+		DEBUG("Hot-plug mutex was unlocked by thread %lu%s",
+		      pthread_self(),
+		      prev_alarm_lock ? " by the hot-plug alarm" : "");
+}
+
+/*
  * Switch emitting device.
  * If banned is set, banned must not be considered for
  * the role of emitting device.
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v6 3/3] net/failsafe: fix hotplug races
  2018-02-11 17:24             ` [dpdk-dev] [PATCH v6 3/3] net/failsafe: fix hotplug races Matan Azrad
@ 2018-02-12 18:33               ` Gaëtan Rivet
  2018-02-12 20:35                 ` Matan Azrad
  0 siblings, 1 reply; 36+ messages in thread
From: Gaëtan Rivet @ 2018-02-12 18:33 UTC (permalink / raw)
  To: Matan Azrad; +Cc: dev, stable

Hi Matan,

On Sun, Feb 11, 2018 at 05:24:32PM +0000, Matan Azrad wrote:
> Fail-safe uses periodic alarm mechanism, running from the host thread,
> to manage the hot-plug events of its sub-devices. This management
> requires a lot of sub-devices PMDs operations (stop,close,start,etc).
> 
> While the hot-plug alarm runs in the host thread, the application may
> call fail-safe operations which directly trigger the sub-devices PMDs
> operations too, This call may occur from any thread decided by the
> application (probably the master thread).
> 
> So, more than one operation can execute to a sub-device in same time
> what can cause a lot of races in the sub-PMDs.
> 
> Moreover, some control operations update the fail-safe internal
> databases which can be used by the alarm mechanism in the same
> time, what also can cause to races and crashes.
> 
> Fail-safe is the owner of its sub-devices and must to synchronize their
> use according to the ETHDEV ownership rules.
> 
> Synchronize hot-plug management by a new lock mechanism uses a mutex to
> atomically defend each critical section in the fail-safe hot-plug
> mechanism and control operations to prevent any races between them.
> 
> Fixes: a46f8d5 ("net/failsafe: add fail-safe PMD")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> ---
>  drivers/net/failsafe/Makefile           |   1 +
>  drivers/net/failsafe/failsafe.c         |  35 ++++++++
>  drivers/net/failsafe/failsafe_ether.c   |   6 +-
>  drivers/net/failsafe/failsafe_flow.c    |  20 ++++-
>  drivers/net/failsafe/failsafe_ops.c     | 148 ++++++++++++++++++++++++++------
>  drivers/net/failsafe/failsafe_private.h |  62 +++++++++++--
>  6 files changed, 239 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
> index d1ae899..bd2f019 100644
> --- a/drivers/net/failsafe/Makefile
> +++ b/drivers/net/failsafe/Makefile
> @@ -68,5 +68,6 @@ CFLAGS += -pedantic
>  LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
>  LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
>  LDLIBS += -lrte_bus_vdev
> +LDLIBS += -lpthread
>  
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
> index 7b2cdbb..c499bfb 100644
> --- a/drivers/net/failsafe/failsafe.c
> +++ b/drivers/net/failsafe/failsafe.c
> @@ -113,17 +113,46 @@
>  			break;
>  	/* if we have non-probed device */
>  	if (i != PRIV(dev)->subs_tail) {
> +		if (fs_lock(dev, 1) != 0)
> +			goto reinstall;

You have left a few operations unlocked further down the call stack.
With these discrepancies fixed, the RECURSIVE attribute could be
removed, and this lock as well.

>  		ret = failsafe_eth_dev_state_sync(dev);
> +		fs_unlock(dev, 1);

Compared to the first version of these changes, I much prefer having a
wrapper for locking. However, I dislike having the arguably unnecessary
additional argument (alarm_lock).

I guess you added this for debugging purpose, but in the end either the
design is simple and clear, and you have a proper model, or you don't,
and that's an issue.

And having the RECURSIVE attribute "just in case", is not reassuring.

>  		if (ret)
>  			ERROR("Unable to synchronize sub_device state");
>  	}
>  	failsafe_dev_remove(dev);
> +reinstall:
>  	ret = failsafe_hotplug_alarm_install(dev);
>  	if (ret)
>  		ERROR("Unable to set up next alarm");
>  }
>  
>  static int
> +fs_mutex_init(struct fs_priv *priv)
> +{
> +	int ret;
> +	pthread_mutexattr_t attr;
> +
> +	ret = pthread_mutexattr_init(&attr);
> +	if (ret) {
> +		ERROR("Cannot initiate mutex attributes - %s", strerror(ret));
> +		return ret;
> +	}
> +	/* Allow mutex relocks for the thread holding the mutex. */
> +	ret = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
> +	if (ret) {

Just to emphasize, I think this should be removed.

Please explain why you thought it was necessary.

> +		ERROR("Cannot set mutex type - %s", strerror(ret));
> +		return ret;
> +	}
> +	ret = pthread_mutex_init(&priv->hotplug_mutex, &attr);
> +	if (ret) {
> +		ERROR("Cannot initiate mutex - %s", strerror(ret));
> +		return ret;
> +	}
> +	return 0;
> +}
> +
> +static int
>  fs_eth_dev_create(struct rte_vdev_device *vdev)
>  {
>  	struct rte_eth_dev *dev;
> @@ -176,6 +205,9 @@
>  	ret = failsafe_eal_init(dev);
>  	if (ret)
>  		goto free_args;
> +	ret = fs_mutex_init(priv);
> +	if (ret)
> +		goto free_args;
>  	ret = failsafe_hotplug_alarm_install(dev);
>  	if (ret) {
>  		ERROR("Could not set up plug-in event detection");
> @@ -250,6 +282,9 @@
>  		ERROR("Error while uninitializing sub-EAL");
>  	failsafe_args_free(dev);
>  	fs_sub_device_free(dev);
> +	ret = pthread_mutex_destroy(&PRIV(dev)->hotplug_mutex);
> +	if (ret)
> +		ERROR("Error while destroying hotplug mutex");
>  	rte_free(PRIV(dev));
>  	rte_eth_dev_release_port(dev);
>  	return ret;
> diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
> index d820faf..8672819 100644
> --- a/drivers/net/failsafe/failsafe_ether.c
> +++ b/drivers/net/failsafe/failsafe_ether.c

Locking fs_eth_dev_conf_apply should allow to remove the lock in
fs_hotplug_alarm, as long as we make sure only public rte_ether API is
used in fs_eth_dev_conf_apply and its callee.

> @@ -328,8 +328,11 @@
>  
>  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
>  		if (sdev->remove && fs_rxtx_clean(sdev)) {
> +			if (fs_lock(dev, 1) != 0)
> +				return;
>  			fs_dev_stats_save(sdev);
>  			fs_dev_remove(sdev);
> +			fs_unlock(dev, 1);
>  		}
>  }
>  
> @@ -428,7 +431,7 @@
>  				void *cb_arg, void *out __rte_unused)
>  {
>  	struct sub_device *sdev = cb_arg;
> -

This line should remain.

> +	fs_lock(sdev->fs_dev, 0);
>  	/* Switch as soon as possible tx_dev. */
>  	fs_switch_dev(sdev->fs_dev, sdev);
>  	/* Use safe bursts in any case. */
> @@ -438,6 +441,7 @@
>  	 * the callback at the source of the current thread context.
>  	 */
>  	sdev->remove = 1;
> +	fs_unlock(sdev->fs_dev, 0);
>  	return 0;
>  }
>  

<snip>

> diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
> index f3be152..ef1f63b 100644
> --- a/drivers/net/failsafe/failsafe_private.h
> +++ b/drivers/net/failsafe/failsafe_private.h
> @@ -7,6 +7,7 @@
>  #define _RTE_ETH_FAILSAFE_PRIVATE_H_
>  
>  #include <sys/queue.h>
> +#include <pthread.h>
>  
>  #include <rte_atomic.h>
>  #include <rte_dev.h>
> @@ -161,6 +162,9 @@ struct fs_priv {
>  	 * appropriate failsafe Rx queue.
>  	 */
>  	struct rx_proxy rxp;
> +	pthread_mutex_t hotplug_mutex;
> +	/* Hot-plug mutex is locked by the alarm mechanism. */
> +	volatile unsigned int alarm_lock:1;

Without the RECURSIVE attribute, I believe this becomes unnecessary.

>  	unsigned int pending_alarm:1; /* An alarm is pending */
>  	/* flow isolation state */
>  	int flow_isolated:1;
> @@ -255,12 +259,6 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
>  	     s != NULL;					\
>  	     s = fs_find_next((dev), i + 1, state, &i))
>  
> -/**
> - * Iterator construct over fail-safe sub-devices:
> - * s:   (struct sub_device *), iterator
> - * i:   (uint8_t), increment
> - * dev: (struct rte_eth_dev *), fail-safe ethdev
> - */

Editing mistake I think here.

>  #define FOREACH_SUBDEV(s, i, dev)			\
>  	FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED)
>  
> @@ -347,6 +345,58 @@ int failsafe_eth_lsc_event_callback(uint16_t port_id,
>  }
>  
>  /*
> + * Lock hot-plug mutex.
> + * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
> + */
> +static inline int
> +fs_lock(struct rte_eth_dev *dev, unsigned int is_alarm)

The "is_alarm" should be removed without RECURSIVE.

> +{
> +	int ret;
> +
> +	if (is_alarm) {
> +		ret = pthread_mutex_trylock(&PRIV(dev)->hotplug_mutex);
> +		if (ret) {
> +			DEBUG("Hot-plug mutex lock trying failed(%s), will try"
> +			      " again later...", strerror(ret));
> +			return ret;
> +		}
> +		PRIV(dev)->alarm_lock = 1;
> +	} else {
> +		ret = pthread_mutex_lock(&PRIV(dev)->hotplug_mutex);
> +		if (ret) {
> +			ERROR("Cannot lock mutex(%s)", strerror(ret));
> +			return ret;
> +		}
> +	}
> +	DEBUG("Hot-plug mutex was locked by thread %lu%s", pthread_self(),
> +	      PRIV(dev)->alarm_lock ? " by the hot-plug alarm" : "");
> +	return ret;
> +}
> +
> +/*
> + * Unlock hot-plug mutex.
> + * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
> + */
> +static inline void
> +fs_unlock(struct rte_eth_dev *dev, unsigned int is_alarm)

ditto

> +{
> +	int ret;
> +	unsigned int prev_alarm_lock = PRIV(dev)->alarm_lock;
> +
> +	if (is_alarm) {
> +		RTE_ASSERT(PRIV(dev)->alarm_lock == 1);
> +		PRIV(dev)->alarm_lock = 0;
> +	}
> +	ret = pthread_mutex_unlock(&PRIV(dev)->hotplug_mutex);
> +	if (ret)
> +		ERROR("Cannot unlock hot-plug mutex(%s)", strerror(ret));
> +	else
> +		DEBUG("Hot-plug mutex was unlocked by thread %lu%s",
> +		      pthread_self(),
> +		      prev_alarm_lock ? " by the hot-plug alarm" : "");
> +}

I know that using a RECURSIVE lock allows you having an implementation
quicker.

So this choice of implementation is only done due to the impending
release, not because it is the right one. I think it should work, and I
heard that it was heavily tested internally.

So I guess this patch can go in with the few other nits (removed blank line,
removed macro doc), as long as it is reworked soon after.

On this matter, I do not think that blindly testing implementations that
all either copied each other or weren't too complicated does the trick
regarding concurrency issues.

You were thinking about an example app for your ownership library, in order
to validate its implementation. I think this could work nicely as a
torture instrument for this patchset as well, with some care.

Regards,
-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v6 3/3] net/failsafe: fix hotplug races
  2018-02-12 18:33               ` Gaëtan Rivet
@ 2018-02-12 20:35                 ` Matan Azrad
  0 siblings, 0 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-12 20:35 UTC (permalink / raw)
  To: Gaëtan Rivet; +Cc: dev, stable

Hi Gaetan

From: Gaëtan Rivet, Sent: Monday, February 12, 2018 8:33 PM
> Hi Matan,
> 
> On Sun, Feb 11, 2018 at 05:24:32PM +0000, Matan Azrad wrote:
> > Fail-safe uses periodic alarm mechanism, running from the host thread,
> > to manage the hot-plug events of its sub-devices. This management
> > requires a lot of sub-devices PMDs operations (stop,close,start,etc).
> >
> > While the hot-plug alarm runs in the host thread, the application may
> > call fail-safe operations which directly trigger the sub-devices PMDs
> > operations too, This call may occur from any thread decided by the
> > application (probably the master thread).
> >
> > So, more than one operation can execute to a sub-device in same time
> > what can cause a lot of races in the sub-PMDs.
> >
> > Moreover, some control operations update the fail-safe internal
> > databases which can be used by the alarm mechanism in the same time,
> > what also can cause to races and crashes.
> >
> > Fail-safe is the owner of its sub-devices and must to synchronize
> > their use according to the ETHDEV ownership rules.
> >
> > Synchronize hot-plug management by a new lock mechanism uses a mutex
> > to atomically defend each critical section in the fail-safe hot-plug
> > mechanism and control operations to prevent any races between them.
> >
> > Fixes: a46f8d5 ("net/failsafe: add fail-safe PMD")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > ---
> >  drivers/net/failsafe/Makefile           |   1 +
> >  drivers/net/failsafe/failsafe.c         |  35 ++++++++
> >  drivers/net/failsafe/failsafe_ether.c   |   6 +-
> >  drivers/net/failsafe/failsafe_flow.c    |  20 ++++-
> >  drivers/net/failsafe/failsafe_ops.c     | 148
> ++++++++++++++++++++++++++------
> >  drivers/net/failsafe/failsafe_private.h |  62 +++++++++++--
> >  6 files changed, 239 insertions(+), 33 deletions(-)
> >
> > diff --git a/drivers/net/failsafe/Makefile
> > b/drivers/net/failsafe/Makefile index d1ae899..bd2f019 100644
> > --- a/drivers/net/failsafe/Makefile
> > +++ b/drivers/net/failsafe/Makefile
> > @@ -68,5 +68,6 @@ CFLAGS += -pedantic
> >  LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring  LDLIBS +=
> > -lrte_ethdev -lrte_net -lrte_kvargs  LDLIBS += -lrte_bus_vdev
> > +LDLIBS += -lpthread
> >
> >  include $(RTE_SDK)/mk/rte.lib.mk
> > diff --git a/drivers/net/failsafe/failsafe.c
> > b/drivers/net/failsafe/failsafe.c index 7b2cdbb..c499bfb 100644
> > --- a/drivers/net/failsafe/failsafe.c
> > +++ b/drivers/net/failsafe/failsafe.c
> > @@ -113,17 +113,46 @@
> >  			break;
> >  	/* if we have non-probed device */
> >  	if (i != PRIV(dev)->subs_tail) {
> > +		if (fs_lock(dev, 1) != 0)
> > +			goto reinstall;
> 
> You have left a few operations unlocked further down the call stack.
> With these discrepancies fixed, the RECURSIVE attribute could be removed,
> and this lock as well.
> 
> >  		ret = failsafe_eth_dev_state_sync(dev);
> > +		fs_unlock(dev, 1);
> 
> Compared to the first version of these changes, I much prefer having a
> wrapper for locking. However, I dislike having the arguably unnecessary
> additional argument (alarm_lock).
> 
> I guess you added this for debugging purpose, but in the end either the
> design is simple and clear, and you have a proper model, or you don't, and
> that's an issue.

Not for debug, the debug is by the way,
Actually it is just will be nice to know if the alarm is running in the critical sections and may be used in future.
Actually, following patch "fix reconfiguration" is using it. 

> And having the RECURSIVE attribute "just in case", is not reassuring.

You know, there are a lot of pros and cons to the RECURSIVE usage and I can understand your concern.
Just not to create a bigger patch this time I think it can be changed in the next release.

> >  		if (ret)
> >  			ERROR("Unable to synchronize sub_device state");
> >  	}
> >  	failsafe_dev_remove(dev);
> > +reinstall:
> >  	ret = failsafe_hotplug_alarm_install(dev);
> >  	if (ret)
> >  		ERROR("Unable to set up next alarm");  }
> >
> >  static int
> > +fs_mutex_init(struct fs_priv *priv)
> > +{
> > +	int ret;
> > +	pthread_mutexattr_t attr;
> > +
> > +	ret = pthread_mutexattr_init(&attr);
> > +	if (ret) {
> > +		ERROR("Cannot initiate mutex attributes - %s", strerror(ret));
> > +		return ret;
> > +	}
> > +	/* Allow mutex relocks for the thread holding the mutex. */
> > +	ret = pthread_mutexattr_settype(&attr,
> PTHREAD_MUTEX_RECURSIVE);
> > +	if (ret) {
> 
> Just to emphasize, I think this should be removed.
> 
> Please explain why you thought it was necessary.
> 

To simplify the code:
1. Allow to use less calls to lock operations.
2. Allow to differentiate between alarm time to app time in the shared code(dev_configure(),dev_start()) easily.
3. Allow easily to use try_lock in the alarm thread.
4. Defend from some kinds of deadlock.

Actually the alternative way to use simple lock is more complicated.

> > +		ERROR("Cannot set mutex type - %s", strerror(ret));
> > +		return ret;
> > +	}
> > +	ret = pthread_mutex_init(&priv->hotplug_mutex, &attr);
> > +	if (ret) {
> > +		ERROR("Cannot initiate mutex - %s", strerror(ret));
> > +		return ret;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int
> >  fs_eth_dev_create(struct rte_vdev_device *vdev)  {
> >  	struct rte_eth_dev *dev;
> > @@ -176,6 +205,9 @@
> >  	ret = failsafe_eal_init(dev);
> >  	if (ret)
> >  		goto free_args;
> > +	ret = fs_mutex_init(priv);
> > +	if (ret)
> > +		goto free_args;
> >  	ret = failsafe_hotplug_alarm_install(dev);
> >  	if (ret) {
> >  		ERROR("Could not set up plug-in event detection"); @@ -
> 250,6 +282,9
> > @@
> >  		ERROR("Error while uninitializing sub-EAL");
> >  	failsafe_args_free(dev);
> >  	fs_sub_device_free(dev);
> > +	ret = pthread_mutex_destroy(&PRIV(dev)->hotplug_mutex);
> > +	if (ret)
> > +		ERROR("Error while destroying hotplug mutex");
> >  	rte_free(PRIV(dev));
> >  	rte_eth_dev_release_port(dev);
> >  	return ret;
> > diff --git a/drivers/net/failsafe/failsafe_ether.c
> > b/drivers/net/failsafe/failsafe_ether.c
> > index d820faf..8672819 100644
> > --- a/drivers/net/failsafe/failsafe_ether.c
> > +++ b/drivers/net/failsafe/failsafe_ether.c
> 
> Locking fs_eth_dev_conf_apply should allow to remove the lock in
> fs_hotplug_alarm, as long as we make sure only public rte_ether API is used
> in fs_eth_dev_conf_apply and its callee.

No, all the state updates(failsafe state and the sub-devices states) in fs_hotplug_alarm stuck should be defended by lock (or any other atomic mechanism). 

> > @@ -328,8 +328,11 @@
> >
> >  	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
> >  		if (sdev->remove && fs_rxtx_clean(sdev)) {
> > +			if (fs_lock(dev, 1) != 0)
> > +				return;
> >  			fs_dev_stats_save(sdev);
> >  			fs_dev_remove(sdev);
> > +			fs_unlock(dev, 1);
> >  		}
> >  }
> >
> > @@ -428,7 +431,7 @@
> >  				void *cb_arg, void *out __rte_unused)  {
> >  	struct sub_device *sdev = cb_arg;
> > -
> 
> This line should remain.

Sure.
 
> > +	fs_lock(sdev->fs_dev, 0);
> >  	/* Switch as soon as possible tx_dev. */
> >  	fs_switch_dev(sdev->fs_dev, sdev);
> >  	/* Use safe bursts in any case. */
> > @@ -438,6 +441,7 @@
> >  	 * the callback at the source of the current thread context.
> >  	 */
> >  	sdev->remove = 1;
> > +	fs_unlock(sdev->fs_dev, 0);
> >  	return 0;
> >  }
> >
> 
> <snip>
> 
> > diff --git a/drivers/net/failsafe/failsafe_private.h
> > b/drivers/net/failsafe/failsafe_private.h
> > index f3be152..ef1f63b 100644
> > --- a/drivers/net/failsafe/failsafe_private.h
> > +++ b/drivers/net/failsafe/failsafe_private.h
> > @@ -7,6 +7,7 @@
> >  #define _RTE_ETH_FAILSAFE_PRIVATE_H_
> >
> >  #include <sys/queue.h>
> > +#include <pthread.h>
> >
> >  #include <rte_atomic.h>
> >  #include <rte_dev.h>
> > @@ -161,6 +162,9 @@ struct fs_priv {
> >  	 * appropriate failsafe Rx queue.
> >  	 */
> >  	struct rx_proxy rxp;
> > +	pthread_mutex_t hotplug_mutex;
> > +	/* Hot-plug mutex is locked by the alarm mechanism. */
> > +	volatile unsigned int alarm_lock:1;
> 
> Without the RECURSIVE attribute, I believe this becomes unnecessary.

I explained the potential usage above.

> 
> >  	unsigned int pending_alarm:1; /* An alarm is pending */
> >  	/* flow isolation state */
> >  	int flow_isolated:1;
> > @@ -255,12 +259,6 @@ int failsafe_eth_lsc_event_callback(uint16_t
> port_id,
> >  	     s != NULL;					\
> >  	     s = fs_find_next((dev), i + 1, state, &i))
> >
> > -/**
> > - * Iterator construct over fail-safe sub-devices:
> > - * s:   (struct sub_device *), iterator
> > - * i:   (uint8_t), increment
> > - * dev: (struct rte_eth_dev *), fail-safe ethdev
> > - */
> 
> Editing mistake I think here.

Sure.

> >  #define FOREACH_SUBDEV(s, i, dev)			\
> >  	FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED)
> >
> > @@ -347,6 +345,58 @@ int failsafe_eth_lsc_event_callback(uint16_t
> > port_id,  }
> >
> >  /*
> > + * Lock hot-plug mutex.
> > + * is_alarm means that the caller is, for sure, the hot-plug alarm
> mechanism.
> > + */
> > +static inline int
> > +fs_lock(struct rte_eth_dev *dev, unsigned int is_alarm)
> 
> The "is_alarm" should be removed without RECURSIVE.

Not sure.
 
> > +{
> > +	int ret;
> > +
> > +	if (is_alarm) {
> > +		ret = pthread_mutex_trylock(&PRIV(dev)->hotplug_mutex);
> > +		if (ret) {
> > +			DEBUG("Hot-plug mutex lock trying failed(%s), will
> try"
> > +			      " again later...", strerror(ret));
> > +			return ret;
> > +		}
> > +		PRIV(dev)->alarm_lock = 1;
> > +	} else {
> > +		ret = pthread_mutex_lock(&PRIV(dev)->hotplug_mutex);
> > +		if (ret) {
> > +			ERROR("Cannot lock mutex(%s)", strerror(ret));
> > +			return ret;
> > +		}
> > +	}
> > +	DEBUG("Hot-plug mutex was locked by thread %lu%s",
> pthread_self(),
> > +	      PRIV(dev)->alarm_lock ? " by the hot-plug alarm" : "");
> > +	return ret;
> > +}
> > +
> > +/*
> > + * Unlock hot-plug mutex.
> > + * is_alarm means that the caller is, for sure, the hot-plug alarm
> mechanism.
> > + */
> > +static inline void
> > +fs_unlock(struct rte_eth_dev *dev, unsigned int is_alarm)
> 
> ditto

Ditto

> > +{
> > +	int ret;
> > +	unsigned int prev_alarm_lock = PRIV(dev)->alarm_lock;
> > +
> > +	if (is_alarm) {
> > +		RTE_ASSERT(PRIV(dev)->alarm_lock == 1);
> > +		PRIV(dev)->alarm_lock = 0;
> > +	}
> > +	ret = pthread_mutex_unlock(&PRIV(dev)->hotplug_mutex);
> > +	if (ret)
> > +		ERROR("Cannot unlock hot-plug mutex(%s)", strerror(ret));
> > +	else
> > +		DEBUG("Hot-plug mutex was unlocked by thread %lu%s",
> > +		      pthread_self(),
> > +		      prev_alarm_lock ? " by the hot-plug alarm" : ""); }
> 
> I know that using a RECURSIVE lock allows you having an implementation
> quicker.

Yes.
 
> So this choice of implementation is only done due to the impending release,
> not because it is the right one. I think it should work, and I heard that it was
> heavily tested internally.

It is right but can be implemented in other way that have another pros and cons.

> So I guess this patch can go in with the few other nits (removed blank line,
> removed macro doc), as long as it is reworked soon after.

Sure will send V7 for it.

> On this matter, I do not think that blindly testing implementations that all
> either copied each other or weren't too complicated does the trick regarding
> concurrency issues.
> 
> You were thinking about an example app for your ownership library, in order
> to validate its implementation. I think this could work nicely as a torture
> instrument for this patchset as well, with some care.

Can be, Yes.

Thanks.
> Regards,
> --
> Gaëtan Rivet
> 6WIND

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v7 0/3] failsafe: fix hotplug races
  2018-02-11 17:24           ` [dpdk-dev] [PATCH v6 0/3] failsafe: fix hotplug races Matan Azrad
                               ` (2 preceding siblings ...)
  2018-02-11 17:24             ` [dpdk-dev] [PATCH v6 3/3] net/failsafe: fix hotplug races Matan Azrad
@ 2018-02-12 20:51             ` Matan Azrad
  2018-02-12 20:51               ` [dpdk-dev] [PATCH v7 1/3] net/failsafe: fix hotplug alarm cancel Matan Azrad
                                 ` (3 more replies)
  3 siblings, 4 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-12 20:51 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev

This series fixes failsafe race between control commands to the asynchronic plug-out\in processes.

V7(matan):
improve commit logs.
return back emty line.
return back description wrongly removed.

V6(matan):
Full lock based fix.
Change the remove flag scope until SW resources release. 

v5(Matan):
Change defines names to failsafe convention (UNSAFE).
split a fix patch.

v4(Matan):
Rebase on top of 18.02-rc3.
Extend the fix for other control commands.
Fix hotplug alarm cancel.

V3(Ophir):
Rebase v2.
Add rationales (copy from an email which accompanied v2).



Matan Azrad (3):
  net/failsafe: fix hotplug alarm cancel
  net/failsafe: fix removal scope
  net/failsafe: fix hotplug races

 drivers/net/failsafe/Makefile           |   1 +
 drivers/net/failsafe/failsafe.c         |  53 +++++++++---
 drivers/net/failsafe/failsafe_ether.c   |   6 ++
 drivers/net/failsafe/failsafe_flow.c    |  20 ++++-
 drivers/net/failsafe/failsafe_ops.c     | 149 ++++++++++++++++++++++++++------
 drivers/net/failsafe/failsafe_private.h |  56 ++++++++++++
 6 files changed, 248 insertions(+), 37 deletions(-)

-- 
1.9.5

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v7 1/3] net/failsafe: fix hotplug alarm cancel
  2018-02-12 20:51             ` [dpdk-dev] [PATCH v7 0/3] failsafe: " Matan Azrad
@ 2018-02-12 20:51               ` Matan Azrad
  2018-02-12 20:51               ` [dpdk-dev] [PATCH v7 2/3] net/failsafe: fix removal scope Matan Azrad
                                 ` (2 subsequent siblings)
  3 siblings, 0 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-12 20:51 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev, stable

The hot-plug alarm mechanism of fail-safe PMD is responsible for
handling removed devices during a plug-out event and to restore them
back to activity following a plug-in event.

Fail-safe sets a flag called "pending_alarm" to validate that only one
alarm callback is pending at any time. While this flag is required to
avoid simultaneous initiations of the alarm thread - it should not be
considered during alarm thread cancellation.

So, when failsafe_hotplug_alarm_cancel() was called while the alarm
callback was being executed the alarm mechanism was not stopped.

Skip checking the "pending_alarm" flag to allow alarm thread
cancellation all the times.

Fixes: ebea83f899d8 ("net/failsafe: add plug-in support")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/failsafe/failsafe.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 2665a39..7b2cdbb 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -85,16 +85,14 @@ failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev)
 {
 	int ret = 0;
 
-	if (PRIV(dev)->pending_alarm) {
-		rte_errno = 0;
-		rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
-		if (rte_errno) {
-			ERROR("rte_eal_alarm_cancel failed (errno: %s)",
-			      strerror(rte_errno));
-			ret = -rte_errno;
-		} else {
-			PRIV(dev)->pending_alarm = 0;
-		}
+	rte_errno = 0;
+	rte_eal_alarm_cancel(fs_hotplug_alarm, dev);
+	if (rte_errno) {
+		ERROR("rte_eal_alarm_cancel failed (errno: %s)",
+		      strerror(rte_errno));
+		ret = -rte_errno;
+	} else {
+		PRIV(dev)->pending_alarm = 0;
 	}
 	return ret;
 }
-- 
1.9.5

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v7 2/3] net/failsafe: fix removal scope
  2018-02-12 20:51             ` [dpdk-dev] [PATCH v7 0/3] failsafe: " Matan Azrad
  2018-02-12 20:51               ` [dpdk-dev] [PATCH v7 1/3] net/failsafe: fix hotplug alarm cancel Matan Azrad
@ 2018-02-12 20:51               ` Matan Azrad
  2018-02-12 20:51               ` [dpdk-dev] [PATCH v7 3/3] net/failsafe: fix hotplug races Matan Azrad
  2018-02-13 13:31               ` [dpdk-dev] [PATCH v7 0/3] failsafe: " Gaëtan Rivet
  3 siblings, 0 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-12 20:51 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev, stable

The fail-safe PMD uses a per sub-device flag called "remove" to
indicate the scope where the sub-device was removed physically and
whether its software resources should be released.

This flag is set when the fail-safe receives an RMV notification
about the physical removal of the sub-device, and should be unset when
all the sub-device resources are released.

The previous code wrongly unsets the flag in dev_configure(), instead
of when the software resources release is completed.

Change the remove flag unsetting to take action in the end of the
software resources release.

Fixes: a46f8d5 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/failsafe/failsafe_ether.c | 1 +
 drivers/net/failsafe/failsafe_ops.c   | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 4c6e938..d820faf 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -280,6 +280,7 @@ fs_dev_remove(struct sub_device *sdev)
 		/* the end */
 		break;
 	}
+	sdev->remove = 0;
 	failsafe_hotplug_alarm_install(sdev->fs_dev);
 }
 
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index 7a67e16..f0e48c1 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -131,7 +131,6 @@ fs_dev_configure(struct rte_eth_dev *dev)
 			dev->data->dev_conf.intr_conf.lsc = 0;
 		}
 		DEBUG("Configuring sub-device %d", i);
-		sdev->remove = 0;
 		ret = rte_eth_dev_configure(PORT_ID(sdev),
 					dev->data->nb_rx_queues,
 					dev->data->nb_tx_queues,
-- 
1.9.5

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [dpdk-dev] [PATCH v7 3/3] net/failsafe: fix hotplug races
  2018-02-12 20:51             ` [dpdk-dev] [PATCH v7 0/3] failsafe: " Matan Azrad
  2018-02-12 20:51               ` [dpdk-dev] [PATCH v7 1/3] net/failsafe: fix hotplug alarm cancel Matan Azrad
  2018-02-12 20:51               ` [dpdk-dev] [PATCH v7 2/3] net/failsafe: fix removal scope Matan Azrad
@ 2018-02-12 20:51               ` Matan Azrad
  2018-02-13 13:31               ` [dpdk-dev] [PATCH v7 0/3] failsafe: " Gaëtan Rivet
  3 siblings, 0 replies; 36+ messages in thread
From: Matan Azrad @ 2018-02-12 20:51 UTC (permalink / raw)
  To: Gaetan Rivet; +Cc: dev, stable

Fail-safe uses a periodic alarm mechanism, running from the host
thread, to manage the hot-plug events of its sub-devices. This
management requires a lot of sub-devices PMDs operations
(stop, close, start, configure, etc.).

While the hot-plug alarm runs in the host thread, the application may
call fail-safe operations, which directly trigger the sub-devices PMDs
operations as well. This call may occur from any thread decided by the
application (probably the master thread).

Thus, more than one operation can be executed to a sub-device at the
same time. This can initiate a lot of races in the sub-PMDs.

Moreover, some control operations update the fail-safe internal
databases, which can be used by the alarm mechanism at the same time.
This can also initiate races and crashes.

Fail-safe is the owner of its sub-devices and must synchronize their
use according to the ETHDEV ownership rules.

Synchronize hot-plug management by a new lock mechanism uses a mutex to
atomically defend each critical section in the fail-safe hot-plug
mechanism and control operations to prevent any races between them.

Fixes: a46f8d5 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/failsafe/Makefile           |   1 +
 drivers/net/failsafe/failsafe.c         |  35 ++++++++
 drivers/net/failsafe/failsafe_ether.c   |   5 ++
 drivers/net/failsafe/failsafe_flow.c    |  20 ++++-
 drivers/net/failsafe/failsafe_ops.c     | 148 ++++++++++++++++++++++++++------
 drivers/net/failsafe/failsafe_private.h |  56 ++++++++++++
 6 files changed, 239 insertions(+), 26 deletions(-)

diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index d1ae899..bd2f019 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -68,5 +68,6 @@ CFLAGS += -pedantic
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
 LDLIBS += -lrte_bus_vdev
+LDLIBS += -lpthread
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 7b2cdbb..c499bfb 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -113,17 +113,46 @@ fs_hotplug_alarm(void *arg)
 			break;
 	/* if we have non-probed device */
 	if (i != PRIV(dev)->subs_tail) {
+		if (fs_lock(dev, 1) != 0)
+			goto reinstall;
 		ret = failsafe_eth_dev_state_sync(dev);
+		fs_unlock(dev, 1);
 		if (ret)
 			ERROR("Unable to synchronize sub_device state");
 	}
 	failsafe_dev_remove(dev);
+reinstall:
 	ret = failsafe_hotplug_alarm_install(dev);
 	if (ret)
 		ERROR("Unable to set up next alarm");
 }
 
 static int
+fs_mutex_init(struct fs_priv *priv)
+{
+	int ret;
+	pthread_mutexattr_t attr;
+
+	ret = pthread_mutexattr_init(&attr);
+	if (ret) {
+		ERROR("Cannot initiate mutex attributes - %s", strerror(ret));
+		return ret;
+	}
+	/* Allow mutex relocks for the thread holding the mutex. */
+	ret = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+	if (ret) {
+		ERROR("Cannot set mutex type - %s", strerror(ret));
+		return ret;
+	}
+	ret = pthread_mutex_init(&priv->hotplug_mutex, &attr);
+	if (ret) {
+		ERROR("Cannot initiate mutex - %s", strerror(ret));
+		return ret;
+	}
+	return 0;
+}
+
+static int
 fs_eth_dev_create(struct rte_vdev_device *vdev)
 {
 	struct rte_eth_dev *dev;
@@ -176,6 +205,9 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
 	ret = failsafe_eal_init(dev);
 	if (ret)
 		goto free_args;
+	ret = fs_mutex_init(priv);
+	if (ret)
+		goto free_args;
 	ret = failsafe_hotplug_alarm_install(dev);
 	if (ret) {
 		ERROR("Could not set up plug-in event detection");
@@ -250,6 +282,9 @@ fs_rte_eth_free(const char *name)
 		ERROR("Error while uninitializing sub-EAL");
 	failsafe_args_free(dev);
 	fs_sub_device_free(dev);
+	ret = pthread_mutex_destroy(&PRIV(dev)->hotplug_mutex);
+	if (ret)
+		ERROR("Error while destroying hotplug mutex");
 	rte_free(PRIV(dev));
 	rte_eth_dev_release_port(dev);
 	return ret;
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index d820faf..2c0bf93 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -328,8 +328,11 @@ failsafe_dev_remove(struct rte_eth_dev *dev)
 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		if (sdev->remove && fs_rxtx_clean(sdev)) {
+			if (fs_lock(dev, 1) != 0)
+				return;
 			fs_dev_stats_save(sdev);
 			fs_dev_remove(sdev);
+			fs_unlock(dev, 1);
 		}
 }
 
@@ -429,6 +432,7 @@ failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
 {
 	struct sub_device *sdev = cb_arg;
 
+	fs_lock(sdev->fs_dev, 0);
 	/* Switch as soon as possible tx_dev. */
 	fs_switch_dev(sdev->fs_dev, sdev);
 	/* Use safe bursts in any case. */
@@ -438,6 +442,7 @@ failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
 	 * the callback at the source of the current thread context.
 	 */
 	sdev->remove = 1;
+	fs_unlock(sdev->fs_dev, 0);
 	return 0;
 }
 
diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
index 4d18e8e..ec8c909 100644
--- a/drivers/net/failsafe/failsafe_flow.c
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -55,6 +55,7 @@ fs_flow_validate(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_flow_validate on sub_device %d", i);
 		ret = rte_flow_validate(PORT_ID(sdev),
@@ -62,9 +63,11 @@ fs_flow_validate(struct rte_eth_dev *dev,
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_flow_validate failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -79,6 +82,7 @@ fs_flow_create(struct rte_eth_dev *dev,
 	struct rte_flow *flow;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	flow = fs_flow_allocate(attr, patterns, actions);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		flow->flows[i] = rte_flow_create(PORT_ID(sdev),
@@ -90,6 +94,7 @@ fs_flow_create(struct rte_eth_dev *dev,
 		}
 	}
 	TAILQ_INSERT_TAIL(&PRIV(dev)->flow_list, flow, next);
+	fs_unlock(dev, 0);
 	return flow;
 err:
 	FOREACH_SUBDEV(sdev, i, dev) {
@@ -98,6 +103,7 @@ err:
 				flow->flows[i], error);
 	}
 	fs_flow_release(&flow);
+	fs_unlock(dev, 0);
 	return NULL;
 }
 
@@ -115,6 +121,7 @@ fs_flow_destroy(struct rte_eth_dev *dev,
 		return -EINVAL;
 	}
 	ret = 0;
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		int local_ret;
 
@@ -131,6 +138,7 @@ fs_flow_destroy(struct rte_eth_dev *dev,
 	}
 	TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
 	fs_flow_release(&flow);
+	fs_unlock(dev, 0);
 	return ret;
 }
 
@@ -144,12 +152,14 @@ fs_flow_flush(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_flow_flush on sub_device %d", i);
 		ret = rte_flow_flush(PORT_ID(sdev), error);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_flow_flush failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
@@ -157,6 +167,7 @@ fs_flow_flush(struct rte_eth_dev *dev,
 		TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
 		fs_flow_release(&flow);
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -169,15 +180,19 @@ fs_flow_query(struct rte_eth_dev *dev,
 {
 	struct sub_device *sdev;
 
+	fs_lock(dev, 0);
 	sdev = TX_SUBDEV(dev);
 	if (sdev != NULL) {
 		int ret = rte_flow_query(PORT_ID(sdev),
 					 flow->flows[SUB_ID(sdev)],
 					 type, arg, error);
 
-		if ((ret = fs_err(sdev, ret)))
+		if ((ret = fs_err(sdev, ret))) {
+			fs_unlock(dev, 0);
 			return ret;
+		}
 	}
+	fs_unlock(dev, 0);
 	WARN("No active sub_device to query about its flow");
 	return -1;
 }
@@ -191,6 +206,7 @@ fs_flow_isolate(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV(sdev, i, dev) {
 		if (sdev->state < DEV_PROBED)
 			continue;
@@ -202,11 +218,13 @@ fs_flow_isolate(struct rte_eth_dev *dev,
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_flow_isolate failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		sdev->flow_isolated = set;
 	}
 	PRIV(dev)->flow_isolated = set;
+	fs_unlock(dev, 0);
 	return 0;
 }
 
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index f0e48c1..fe64c68 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -94,6 +94,7 @@ fs_dev_configure(struct rte_eth_dev *dev)
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	supp_tx_offloads = PRIV(dev)->infos.tx_offload_capa;
 	tx_offloads = dev->data->dev_conf.txmode.offloads;
 	if ((tx_offloads & supp_tx_offloads) != tx_offloads) {
@@ -101,6 +102,7 @@ fs_dev_configure(struct rte_eth_dev *dev)
 		ERROR("Some Tx offloads are not supported, "
 		      "requested 0x%" PRIx64 " supported 0x%" PRIx64,
 		      tx_offloads, supp_tx_offloads);
+		fs_unlock(dev, 0);
 		return -rte_errno;
 	}
 	FOREACH_SUBDEV(sdev, i, dev) {
@@ -139,6 +141,7 @@ fs_dev_configure(struct rte_eth_dev *dev)
 			if (!fs_err(sdev, ret))
 				continue;
 			ERROR("Could not configure sub_device %d", i);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		if (rmv_interrupt) {
@@ -165,6 +168,7 @@ fs_dev_configure(struct rte_eth_dev *dev)
 	}
 	if (PRIV(dev)->state < DEV_ACTIVE)
 		PRIV(dev)->state = DEV_ACTIVE;
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -175,9 +179,12 @@ fs_dev_start(struct rte_eth_dev *dev)
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	ret = failsafe_rx_intr_install(dev);
-	if (ret)
+	if (ret) {
+		fs_unlock(dev, 0);
 		return ret;
+	}
 	FOREACH_SUBDEV(sdev, i, dev) {
 		if (sdev->state != DEV_ACTIVE)
 			continue;
@@ -186,6 +193,7 @@ fs_dev_start(struct rte_eth_dev *dev)
 		if (ret) {
 			if (!fs_err(sdev, ret))
 				continue;
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		ret = failsafe_rx_intr_install_subdevice(sdev);
@@ -193,6 +201,7 @@ fs_dev_start(struct rte_eth_dev *dev)
 			if (!fs_err(sdev, ret))
 				continue;
 			rte_eth_dev_stop(PORT_ID(sdev));
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		sdev->state = DEV_STARTED;
@@ -200,6 +209,7 @@ fs_dev_start(struct rte_eth_dev *dev)
 	if (PRIV(dev)->state < DEV_STARTED)
 		PRIV(dev)->state = DEV_STARTED;
 	fs_switch_dev(dev, NULL);
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -209,6 +219,7 @@ fs_dev_stop(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	PRIV(dev)->state = DEV_STARTED - 1;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_STARTED) {
 		rte_eth_dev_stop(PORT_ID(sdev));
@@ -216,6 +227,7 @@ fs_dev_stop(struct rte_eth_dev *dev)
 		sdev->state = DEV_STARTED - 1;
 	}
 	failsafe_rx_intr_uninstall(dev);
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -225,15 +237,18 @@ fs_dev_set_link_up(struct rte_eth_dev *dev)
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
 		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -244,15 +259,18 @@ fs_dev_set_link_down(struct rte_eth_dev *dev)
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
 		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -263,6 +281,7 @@ fs_dev_close(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	failsafe_hotplug_alarm_cancel(dev);
 	if (PRIV(dev)->state == DEV_STARTED)
 		dev->dev_ops->dev_stop(dev);
@@ -273,6 +292,7 @@ fs_dev_close(struct rte_eth_dev *dev)
 		sdev->state = DEV_ACTIVE - 1;
 	}
 	fs_dev_free_queues(dev);
+	fs_unlock(dev, 0);
 }
 
 static bool
@@ -305,14 +325,16 @@ fs_rx_queue_release(void *queue)
 	if (queue == NULL)
 		return;
 	rxq = queue;
+	dev = rxq->priv->dev;
+	fs_lock(dev, 0);
 	if (rxq->event_fd > 0)
 		close(rxq->event_fd);
-	dev = rxq->priv->dev;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		SUBOPS(sdev, rx_queue_release)
 			(ETH(sdev)->data->rx_queues[rxq->qid]);
 	dev->data->rx_queues[rxq->qid] = NULL;
 	rte_free(rxq);
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -338,6 +360,7 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	rxq = dev->data->rx_queues[rx_queue_id];
 	if (rxq != NULL) {
 		fs_rx_queue_release(rxq);
@@ -353,14 +376,17 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
 		      dev->data->dev_conf.rxmode.offloads,
 		      PRIV(dev)->infos.rx_offload_capa |
 		      PRIV(dev)->infos.rx_queue_offload_capa);
+		fs_unlock(dev, 0);
 		return -rte_errno;
 	}
 	rxq = rte_zmalloc(NULL,
 			  sizeof(*rxq) +
 			  sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
 			  RTE_CACHE_LINE_SIZE);
-	if (rxq == NULL)
+	if (rxq == NULL) {
+		fs_unlock(dev, 0);
 		return -ENOMEM;
+	}
 	FOREACH_SUBDEV(sdev, i, dev)
 		rte_atomic64_init(&rxq->refcnt[i]);
 	rxq->qid = rx_queue_id;
@@ -371,8 +397,10 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
 	rxq->priv = PRIV(dev);
 	rxq->sdev = PRIV(dev)->subs;
 	ret = rte_intr_efd_enable(&intr_handle, 1);
-	if (ret < 0)
+	if (ret < 0) {
+		fs_unlock(dev, 0);
 		return ret;
+	}
 	rxq->event_fd = intr_handle.efds[0];
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
@@ -385,9 +413,11 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
 			goto free_rxq;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 free_rxq:
 	fs_rx_queue_release(rxq);
+	fs_unlock(dev, 0);
 	return ret;
 }
 
@@ -400,20 +430,21 @@ fs_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx)
 	int ret;
 	int rc = 0;
 
+	fs_lock(dev, 0);
 	if (idx >= dev->data->nb_rx_queues) {
-		rte_errno = EINVAL;
-		return -rte_errno;
+		rc = -EINVAL;
+		goto unlock;
 	}
 	rxq = dev->data->rx_queues[idx];
 	if (rxq == NULL || rxq->event_fd <= 0) {
-		rte_errno = EINVAL;
-		return -rte_errno;
+		rc = -EINVAL;
+		goto unlock;
 	}
 	/* Fail if proxy service is nor running. */
 	if (PRIV(dev)->rxp.sstate != SS_RUNNING) {
 		ERROR("failsafe interrupt services are not running");
-		rte_errno = EAGAIN;
-		return -rte_errno;
+		rc = -EAGAIN;
+		goto unlock;
 	}
 	rxq->enable_events = 1;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
@@ -422,6 +453,8 @@ fs_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx)
 		if (ret)
 			rc = ret;
 	}
+unlock:
+	fs_unlock(dev, 0);
 	if (rc)
 		rte_errno = -rc;
 	return rc;
@@ -437,14 +470,15 @@ fs_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx)
 	int rc = 0;
 	int ret;
 
+	fs_lock(dev, 0);
 	if (idx >= dev->data->nb_rx_queues) {
-		rte_errno = EINVAL;
-		return -rte_errno;
+		rc = -EINVAL;
+		goto unlock;
 	}
 	rxq = dev->data->rx_queues[idx];
 	if (rxq == NULL || rxq->event_fd <= 0) {
-		rte_errno = EINVAL;
-		return -rte_errno;
+		rc = -EINVAL;
+		goto unlock;
 	}
 	rxq->enable_events = 0;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
@@ -456,6 +490,8 @@ fs_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx)
 	/* Clear pending events */
 	while (read(rxq->event_fd, &u64, sizeof(uint64_t)) >  0)
 		;
+unlock:
+	fs_unlock(dev, 0);
 	if (rc)
 		rte_errno = -rc;
 	return rc;
@@ -492,11 +528,13 @@ fs_tx_queue_release(void *queue)
 		return;
 	txq = queue;
 	dev = txq->priv->dev;
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		SUBOPS(sdev, tx_queue_release)
 			(ETH(sdev)->data->tx_queues[txq->qid]);
 	dev->data->tx_queues[txq->qid] = NULL;
 	rte_free(txq);
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -511,6 +549,7 @@ fs_tx_queue_setup(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	txq = dev->data->tx_queues[tx_queue_id];
 	if (txq != NULL) {
 		fs_tx_queue_release(txq);
@@ -531,14 +570,17 @@ fs_tx_queue_setup(struct rte_eth_dev *dev,
 		      dev->data->dev_conf.txmode.offloads,
 		      PRIV(dev)->infos.tx_offload_capa |
 		      PRIV(dev)->infos.tx_queue_offload_capa);
+		fs_unlock(dev, 0);
 		return -rte_errno;
 	}
 	txq = rte_zmalloc("ethdev TX queue",
 			  sizeof(*txq) +
 			  sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
 			  RTE_CACHE_LINE_SIZE);
-	if (txq == NULL)
+	if (txq == NULL) {
+		fs_unlock(dev, 0);
 		return -ENOMEM;
+	}
 	FOREACH_SUBDEV(sdev, i, dev)
 		rte_atomic64_init(&txq->refcnt[i]);
 	txq->qid = tx_queue_id;
@@ -557,9 +599,11 @@ fs_tx_queue_setup(struct rte_eth_dev *dev,
 			goto free_txq;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 free_txq:
 	fs_tx_queue_release(txq);
+	fs_unlock(dev, 0);
 	return ret;
 }
 
@@ -586,8 +630,10 @@ fs_promiscuous_enable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_promiscuous_enable(PORT_ID(sdev));
+	fs_unlock(dev, 0);
 }
 
 static void
@@ -596,8 +642,10 @@ fs_promiscuous_disable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_promiscuous_disable(PORT_ID(sdev));
+	fs_unlock(dev, 0);
 }
 
 static void
@@ -606,8 +654,10 @@ fs_allmulticast_enable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_allmulticast_enable(PORT_ID(sdev));
+	fs_unlock(dev, 0);
 }
 
 static void
@@ -616,8 +666,10 @@ fs_allmulticast_disable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_allmulticast_disable(PORT_ID(sdev));
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -628,6 +680,7 @@ fs_link_update(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling link_update on sub_device %d", i);
 		ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
@@ -635,6 +688,7 @@ fs_link_update(struct rte_eth_dev *dev,
 		    rte_eth_dev_is_removed(PORT_ID(sdev)) == 0) {
 			ERROR("Link update failed for sub_device %d with error %d",
 			      i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
@@ -646,9 +700,11 @@ fs_link_update(struct rte_eth_dev *dev,
 		l2 = &ETH(TX_SUBDEV(dev))->data->dev_link;
 		if (memcmp(l1, l2, sizeof(*l1))) {
 			*l1 = *l2;
+			fs_unlock(dev, 0);
 			return 0;
 		}
 	}
+	fs_unlock(dev, 0);
 	return -1;
 }
 
@@ -661,6 +717,7 @@ fs_stats_get(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
@@ -676,12 +733,14 @@ fs_stats_get(struct rte_eth_dev *dev,
 			ERROR("Operation rte_eth_stats_get failed for sub_device %d with error %d",
 				  i, ret);
 			*timestamp = 0;
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		*timestamp = rte_rdtsc();
 inc:
 		failsafe_stats_increment(stats, snapshot);
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -691,11 +750,13 @@ fs_stats_reset(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		rte_eth_stats_reset(PORT_ID(sdev));
 		memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
 	}
 	memset(&PRIV(dev)->stats_accumulator, 0, sizeof(struct rte_eth_stats));
+	fs_unlock(dev, 0);
 }
 
 /**
@@ -771,14 +832,20 @@ fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 {
 	struct sub_device *sdev;
 	struct rte_eth_dev *edev;
+	const uint32_t *ret;
 
+	fs_lock(dev, 0);
 	sdev = TX_SUBDEV(dev);
-	if (sdev == NULL)
-		return NULL;
+	if (sdev == NULL) {
+		ret = NULL;
+		goto unlock;
+	}
 	edev = ETH(sdev);
 	/* ENOTSUP: counts as no supported ptypes */
-	if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL)
-		return NULL;
+	if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL) {
+		ret = NULL;
+		goto unlock;
+	}
 	/*
 	 * The API does not permit to do a clean AND of all ptypes,
 	 * It is also incomplete by design and we do not really care
@@ -786,7 +853,10 @@ fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 	 * We just return the ptypes of the device of highest
 	 * priority, usually the PREFERRED device.
 	 */
-	return SUBOPS(sdev, dev_supported_ptypes_get)(edev);
+	ret = SUBOPS(sdev, dev_supported_ptypes_get)(edev);
+unlock:
+	fs_unlock(dev, 0);
+	return ret;
 }
 
 static int
@@ -796,15 +866,18 @@ fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
 		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d with error %d",
 			      i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -815,15 +888,18 @@ fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
 		ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -832,13 +908,22 @@ fs_flow_ctrl_get(struct rte_eth_dev *dev,
 		struct rte_eth_fc_conf *fc_conf)
 {
 	struct sub_device *sdev;
+	int ret;
 
+	fs_lock(dev, 0);
 	sdev = TX_SUBDEV(dev);
-	if (sdev == NULL)
-		return 0;
-	if (SUBOPS(sdev, flow_ctrl_get) == NULL)
-		return -ENOTSUP;
-	return SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
+	if (sdev == NULL) {
+		ret = 0;
+		goto unlock;
+	}
+	if (SUBOPS(sdev, flow_ctrl_get) == NULL) {
+		ret = -ENOTSUP;
+		goto unlock;
+	}
+	ret = SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
+unlock:
+	fs_unlock(dev, 0);
+	return ret;
 }
 
 static int
@@ -849,15 +934,18 @@ fs_flow_ctrl_set(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
 		ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -867,6 +955,7 @@ fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	/* No check: already done within the rte_eth_dev_mac_addr_remove
 	 * call for the fail-safe device.
 	 */
@@ -874,6 +963,7 @@ fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
 		rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
 				&dev->data->mac_addrs[index]);
 	PRIV(dev)->mac_addr_pool[index] = 0;
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -887,11 +977,13 @@ fs_mac_addr_add(struct rte_eth_dev *dev,
 	uint8_t i;
 
 	RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
 			      PRIu8 " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
@@ -900,6 +992,7 @@ fs_mac_addr_add(struct rte_eth_dev *dev,
 		PRIV(dev)->nb_mac_addr = index;
 	}
 	PRIV(dev)->mac_addr_pool[index] = vmdq;
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -909,8 +1002,10 @@ fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -928,15 +1023,18 @@ fs_filter_ctrl(struct rte_eth_dev *dev,
 		*(const void **)arg = &fs_flow_ops;
 		return 0;
 	}
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
 		ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_filter_ctrl failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index f3be152..5b84db9 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -7,6 +7,7 @@
 #define _RTE_ETH_FAILSAFE_PRIVATE_H_
 
 #include <sys/queue.h>
+#include <pthread.h>
 
 #include <rte_atomic.h>
 #include <rte_dev.h>
@@ -161,6 +162,9 @@ struct fs_priv {
 	 * appropriate failsafe Rx queue.
 	 */
 	struct rx_proxy rxp;
+	pthread_mutex_t hotplug_mutex;
+	/* Hot-plug mutex is locked by the alarm mechanism. */
+	volatile unsigned int alarm_lock:1;
 	unsigned int pending_alarm:1; /* An alarm is pending */
 	/* flow isolation state */
 	int flow_isolated:1;
@@ -347,6 +351,58 @@ fs_find_next(struct rte_eth_dev *dev,
 }
 
 /*
+ * Lock hot-plug mutex.
+ * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
+ */
+static inline int
+fs_lock(struct rte_eth_dev *dev, unsigned int is_alarm)
+{
+	int ret;
+
+	if (is_alarm) {
+		ret = pthread_mutex_trylock(&PRIV(dev)->hotplug_mutex);
+		if (ret) {
+			DEBUG("Hot-plug mutex lock trying failed(%s), will try"
+			      " again later...", strerror(ret));
+			return ret;
+		}
+		PRIV(dev)->alarm_lock = 1;
+	} else {
+		ret = pthread_mutex_lock(&PRIV(dev)->hotplug_mutex);
+		if (ret) {
+			ERROR("Cannot lock mutex(%s)", strerror(ret));
+			return ret;
+		}
+	}
+	DEBUG("Hot-plug mutex was locked by thread %lu%s", pthread_self(),
+	      PRIV(dev)->alarm_lock ? " by the hot-plug alarm" : "");
+	return ret;
+}
+
+/*
+ * Unlock hot-plug mutex.
+ * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
+ */
+static inline void
+fs_unlock(struct rte_eth_dev *dev, unsigned int is_alarm)
+{
+	int ret;
+	unsigned int prev_alarm_lock = PRIV(dev)->alarm_lock;
+
+	if (is_alarm) {
+		RTE_ASSERT(PRIV(dev)->alarm_lock == 1);
+		PRIV(dev)->alarm_lock = 0;
+	}
+	ret = pthread_mutex_unlock(&PRIV(dev)->hotplug_mutex);
+	if (ret)
+		ERROR("Cannot unlock hot-plug mutex(%s)", strerror(ret));
+	else
+		DEBUG("Hot-plug mutex was unlocked by thread %lu%s",
+		      pthread_self(),
+		      prev_alarm_lock ? " by the hot-plug alarm" : "");
+}
+
+/*
  * Switch emitting device.
  * If banned is set, banned must not be considered for
  * the role of emitting device.
-- 
1.9.5

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v7 0/3] failsafe: fix hotplug races
  2018-02-12 20:51             ` [dpdk-dev] [PATCH v7 0/3] failsafe: " Matan Azrad
                                 ` (2 preceding siblings ...)
  2018-02-12 20:51               ` [dpdk-dev] [PATCH v7 3/3] net/failsafe: fix hotplug races Matan Azrad
@ 2018-02-13 13:31               ` Gaëtan Rivet
  2018-02-13 16:12                 ` Thomas Monjalon
  3 siblings, 1 reply; 36+ messages in thread
From: Gaëtan Rivet @ 2018-02-13 13:31 UTC (permalink / raw)
  To: Matan Azrad; +Cc: dev

Hi Matan,

On Mon, Feb 12, 2018 at 08:51:39PM +0000, Matan Azrad wrote:
> This series fixes failsafe race between control commands to the asynchronic plug-out\in processes.
> 

Thanks for tackling this complicated issue.
For the series:

Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>

I hope there will be more work on this matter soon.

> V7(matan):
> improve commit logs.
> return back emty line.
> return back description wrongly removed.
> 
> V6(matan):
> Full lock based fix.
> Change the remove flag scope until SW resources release. 
> 
> v5(Matan):
> Change defines names to failsafe convention (UNSAFE).
> split a fix patch.
> 
> v4(Matan):
> Rebase on top of 18.02-rc3.
> Extend the fix for other control commands.
> Fix hotplug alarm cancel.
> 
> V3(Ophir):
> Rebase v2.
> Add rationales (copy from an email which accompanied v2).
> 
> 
> 
> Matan Azrad (3):
>   net/failsafe: fix hotplug alarm cancel
>   net/failsafe: fix removal scope
>   net/failsafe: fix hotplug races
> 
>  drivers/net/failsafe/Makefile           |   1 +
>  drivers/net/failsafe/failsafe.c         |  53 +++++++++---
>  drivers/net/failsafe/failsafe_ether.c   |   6 ++
>  drivers/net/failsafe/failsafe_flow.c    |  20 ++++-
>  drivers/net/failsafe/failsafe_ops.c     | 149 ++++++++++++++++++++++++++------
>  drivers/net/failsafe/failsafe_private.h |  56 ++++++++++++
>  6 files changed, 248 insertions(+), 37 deletions(-)
> 
> -- 
> 1.9.5
> 

-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v7 0/3] failsafe: fix hotplug races
  2018-02-13 13:31               ` [dpdk-dev] [PATCH v7 0/3] failsafe: " Gaëtan Rivet
@ 2018-02-13 16:12                 ` Thomas Monjalon
  2018-02-13 20:58                   ` De Lara Guarch, Pablo
  0 siblings, 1 reply; 36+ messages in thread
From: Thomas Monjalon @ 2018-02-13 16:12 UTC (permalink / raw)
  To: Matan Azrad; +Cc: dev, Gaëtan Rivet

13/02/2018 14:31, Gaëtan Rivet:
> Hi Matan,
> 
> On Mon, Feb 12, 2018 at 08:51:39PM +0000, Matan Azrad wrote:
> > This series fixes failsafe race between control commands to the asynchronic plug-out\in processes.
> > 
> 
> Thanks for tackling this complicated issue.
> For the series:
> 
> Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>

Applied, thanks

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v7 0/3] failsafe: fix hotplug races
  2018-02-13 16:12                 ` Thomas Monjalon
@ 2018-02-13 20:58                   ` De Lara Guarch, Pablo
  2018-02-13 21:13                     ` Matan Azrad
  0 siblings, 1 reply; 36+ messages in thread
From: De Lara Guarch, Pablo @ 2018-02-13 20:58 UTC (permalink / raw)
  To: Thomas Monjalon, Matan Azrad; +Cc: dev, Gaëtan Rivet

Hi,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> Sent: Tuesday, February 13, 2018 4:12 PM
> To: Matan Azrad <matan@mellanox.com>
> Cc: dev@dpdk.org; Gaëtan Rivet <gaetan.rivet@6wind.com>
> Subject: Re: [dpdk-dev] [PATCH v7 0/3] failsafe: fix hotplug races
> 
> 13/02/2018 14:31, Gaëtan Rivet:
> > Hi Matan,
> >
> > On Mon, Feb 12, 2018 at 08:51:39PM +0000, Matan Azrad wrote:
> > > This series fixes failsafe race between control commands to the
> asynchronic plug-out\in processes.
> > >
> >
> > Thanks for tackling this complicated issue.
> > For the series:
> >
> > Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>
> 
> Applied, thanks

There is a compilation error due to this patch on FreeBSD:

drivers/net/failsafe/failsafe_private.h:377:53: error: format specifies
type 'unsigned long' but the argument has type 'pthread_t' (aka 'struct pthread *') [-Werror,-Wformat]
        DEBUG("Hot-plug mutex was locked by thread %lu%s", pthread_self(),


I am not sure how to print a pthread_t, so I can just report the issue.

Thanks,
Pablo

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v7 0/3] failsafe: fix hotplug races
  2018-02-13 20:58                   ` De Lara Guarch, Pablo
@ 2018-02-13 21:13                     ` Matan Azrad
  2018-02-13 21:21                       ` Thomas Monjalon
  0 siblings, 1 reply; 36+ messages in thread
From: Matan Azrad @ 2018-02-13 21:13 UTC (permalink / raw)
  To: De Lara Guarch, Pablo, Thomas Monjalon; +Cc: dev, Gaëtan Rivet



 From: De Lara Guarch, Pablo [mailto:pablo.de.lara.guarch@intel.com]
> Hi,
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> > Sent: Tuesday, February 13, 2018 4:12 PM
> > To: Matan Azrad <matan@mellanox.com>
> > Cc: dev@dpdk.org; Gaëtan Rivet <gaetan.rivet@6wind.com>
> > Subject: Re: [dpdk-dev] [PATCH v7 0/3] failsafe: fix hotplug races
> >
> > 13/02/2018 14:31, Gaëtan Rivet:
> > > Hi Matan,
> > >
> > > On Mon, Feb 12, 2018 at 08:51:39PM +0000, Matan Azrad wrote:
> > > > This series fixes failsafe race between control commands to the
> > asynchronic plug-out\in processes.
> > > >
> > >
> > > Thanks for tackling this complicated issue.
> > > For the series:
> > >
> > > Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>
> >
> > Applied, thanks
> 
> There is a compilation error due to this patch on FreeBSD:
> 
> drivers/net/failsafe/failsafe_private.h:377:53: error: format specifies type
> 'unsigned long' but the argument has type 'pthread_t' (aka 'struct pthread *')
> [-Werror,-Wformat]
>         DEBUG("Hot-plug mutex was locked by thread %lu%s", pthread_self(),
> 
> 
> I am not sure how to print a pthread_t, so I can just report the issue.
>
Can you check with (unsigned long int) conversion?

 
> Thanks,
> Pablo

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [dpdk-dev] [PATCH v7 0/3] failsafe: fix hotplug races
  2018-02-13 21:13                     ` Matan Azrad
@ 2018-02-13 21:21                       ` Thomas Monjalon
  0 siblings, 0 replies; 36+ messages in thread
From: Thomas Monjalon @ 2018-02-13 21:21 UTC (permalink / raw)
  To: Matan Azrad; +Cc: De Lara Guarch, Pablo, dev, Gaëtan Rivet

13/02/2018 22:13, Matan Azrad:
>  From: De Lara Guarch, Pablo [mailto:pablo.de.lara.guarch@intel.com]
> > There is a compilation error due to this patch on FreeBSD:
> > 
> > drivers/net/failsafe/failsafe_private.h:377:53: error: format specifies type
> > 'unsigned long' but the argument has type 'pthread_t' (aka 'struct pthread *')
> > [-Werror,-Wformat]
> >         DEBUG("Hot-plug mutex was locked by thread %lu%s", pthread_self(),
> > 
> > 
> > I am not sure how to print a pthread_t, so I can just report the issue.
> >
> Can you check with (unsigned long int) conversion?

On FreeBSD, pthread_t is:
typedef struct  pthread *pthread_t;

pthread_t is not portable and should not be printed.
I am preparing a patch to enable pthread_t debugging only in Linux.

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2018-02-13 21:21 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-09 19:27 [dpdk-dev] [PATCH] net/failsafe: fix calling device during RMV events Ophir Munk
2017-09-11  8:31 ` Gaëtan Rivet
2017-09-23 21:57   ` Ophir Munk
2017-10-05 22:42     ` [dpdk-dev] [PATCH v3] " Ophir Munk
2017-10-20 10:35       ` Gaëtan Rivet
2017-10-23  7:17         ` Ophir Munk
2017-10-23  8:36           ` Gaëtan Rivet
2017-11-29 19:17             ` [dpdk-dev] [dpdk-stable] " Ferruh Yigit
2018-01-18 22:22               ` Thomas Monjalon
2018-01-18 23:35                 ` Gaëtan Rivet
2018-02-08 12:20       ` [dpdk-dev] [PATCH v4 0/2] failsafe: " Matan Azrad
2018-02-08 12:20         ` [dpdk-dev] [PATCH v4 1/2] net/failsafe: fix hotplug alarm cancel Matan Azrad
2018-02-08 12:20         ` [dpdk-dev] [PATCH v4 2/2] net/failsafe: fix calling device during RMV events Matan Azrad
2018-02-08 16:34         ` [dpdk-dev] [PATCH v5 0/3] failsafe: " Matan Azrad
2018-02-08 16:34           ` [dpdk-dev] [PATCH v5 1/3] net/failsafe: fix hotplug alarm cancel Matan Azrad
2018-02-08 16:34           ` [dpdk-dev] [PATCH v5 2/3] net/failsafe: fix removal scope Matan Azrad
2018-02-08 17:19             ` Gaëtan Rivet
2018-02-08 19:03               ` Matan Azrad
2018-02-08 16:34           ` [dpdk-dev] [PATCH v5 3/3] net/failsafe: fix calling device during RMV events Matan Azrad
2018-02-08 18:11             ` Gaëtan Rivet
2018-02-08 19:24               ` Matan Azrad
2018-02-11 17:24           ` [dpdk-dev] [PATCH v6 0/3] failsafe: fix hotplug races Matan Azrad
2018-02-11 17:24             ` [dpdk-dev] [PATCH v6 1/3] net/failsafe: fix hotplug alarm cancel Matan Azrad
2018-02-11 17:24             ` [dpdk-dev] [PATCH v6 2/3] net/failsafe: fix removal scope Matan Azrad
2018-02-11 17:24             ` [dpdk-dev] [PATCH v6 3/3] net/failsafe: fix hotplug races Matan Azrad
2018-02-12 18:33               ` Gaëtan Rivet
2018-02-12 20:35                 ` Matan Azrad
2018-02-12 20:51             ` [dpdk-dev] [PATCH v7 0/3] failsafe: " Matan Azrad
2018-02-12 20:51               ` [dpdk-dev] [PATCH v7 1/3] net/failsafe: fix hotplug alarm cancel Matan Azrad
2018-02-12 20:51               ` [dpdk-dev] [PATCH v7 2/3] net/failsafe: fix removal scope Matan Azrad
2018-02-12 20:51               ` [dpdk-dev] [PATCH v7 3/3] net/failsafe: fix hotplug races Matan Azrad
2018-02-13 13:31               ` [dpdk-dev] [PATCH v7 0/3] failsafe: " Gaëtan Rivet
2018-02-13 16:12                 ` Thomas Monjalon
2018-02-13 20:58                   ` De Lara Guarch, Pablo
2018-02-13 21:13                     ` Matan Azrad
2018-02-13 21:21                       ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).