DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] kni: fix rtnl deadlocks and race conditions
@ 2020-11-26 14:46 Elad Nachman
  2021-02-19 18:41 ` Ferruh Yigit
                   ` (5 more replies)
  0 siblings, 6 replies; 42+ messages in thread
From: Elad Nachman @ 2020-11-26 14:46 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: dev, Elad Nachman

This patch leverages on Stephen Hemminger's 64106 patch from Dec 2019,
and fixes the issues reported by Ferruh and Igor:

A. KNI sync lock is being locked while rtnl is held. 
If two threads are calling kni_net_process_request() ,
then the first one wil take the sync lock, release rtnl lock then sleep.
The second thread will try to lock sync lock while holding rtnl.
The first thread will wake, and try to lock rtnl, resulting in a deadlock.
The remedy is to release rtnl before locking the KNI sync lock.
Since in between nothing is accessing Linux network-wise,
no rtnl locking is needed.

B. There is a race condition in __dev_close_many() processing the
close_list while the application terminates.
It looks like if two vEth devices are terminating,
and one releases the rtnl lock, the other takes it,
updating the close_list in an unstable state,
causing the close_list to become a circular linked list,
hence list_for_each_entry() will endlessly loop inside
__dev_close_many() . 
Since the description for the original patch indicate the
original motivation was bringing the device up,
I have changed kni_net_process_request() to hold the rtnl mutex
in case of bringing the device down since this is the path called
from __dev_close_many() , causing the corruption of the close_list. 



Signed-off-by: Elad Nachman <eladv6@gmail.com>
---
 kernel/linux/kni/kni_net.c | 47 +++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index 4b752083d..cf5b0845d 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -17,6 +17,7 @@
 #include <linux/skbuff.h>
 #include <linux/kthread.h>
 #include <linux/delay.h>
+#include <linux/rtnetlink.h>
 
 #include <rte_kni_common.h>
 #include <kni_fifo.h>
@@ -102,18 +103,26 @@ get_data_kva(struct kni_dev *kni, void *pkt_kva)
  * It can be called to process the request.
  */
 static int
-kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
+kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 {
+	struct kni_dev *kni = netdev_priv(dev);
 	int ret = -1;
 	void *resp_va;
 	uint32_t num;
 	int ret_val;
+	int req_is_dev_stop = 0;
 
-	if (!kni || !req) {
-		pr_err("No kni instance or request\n");
-		return -EINVAL;
-	}
+	if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
+			req->if_up == 0)
+		req_is_dev_stop = 1;
 
+	ASSERT_RTNL();
+
+	if (!req_is_dev_stop) {
+		dev_hold(dev);
+		rtnl_unlock();
+	}
+
 	mutex_lock(&kni->sync_lock);
 
 	/* Construct data */
@@ -125,8 +134,13 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
 		goto fail;
 	}
 
+	/* Since we need to wait and RTNL mutex is held
+	 * drop the mutex and hold refernce to keep device
+	 */
+
 	ret_val = wait_event_interruptible_timeout(kni->wq,
 			kni_fifo_count(kni->resp_q), 3 * HZ);
+
 	if (signal_pending(current) || ret_val <= 0) {
 		ret = -ETIME;
 		goto fail;
@@ -144,6 +158,13 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
 
 fail:
 	mutex_unlock(&kni->sync_lock);
+
+
+	if (!req_is_dev_stop) {
+		rtnl_lock();
+		dev_put(dev);
+	}
+
 	return ret;
 }
 
@@ -155,7 +176,6 @@ kni_net_open(struct net_device *dev)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
 
 	netif_start_queue(dev);
 	if (kni_dflt_carrier == 1)
@@ -168,7 +188,7 @@ kni_net_open(struct net_device *dev)
 
 	/* Setting if_up to non-zero means up */
 	req.if_up = 1;
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(dev, &req);
 
 	return (ret == 0) ? req.result : ret;
 }
@@ -178,7 +198,6 @@ kni_net_release(struct net_device *dev)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
 
 	netif_stop_queue(dev); /* can't transmit any more */
 	netif_carrier_off(dev);
@@ -188,7 +207,7 @@ kni_net_release(struct net_device *dev)
 
 	/* Setting if_up to 0 means down */
 	req.if_up = 0;
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(dev, &req);
 
 	return (ret == 0) ? req.result : ret;
 }
@@ -643,14 +662,13 @@ kni_net_change_mtu(struct net_device *dev, int new_mtu)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
 
 	pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
 
 	memset(&req, 0, sizeof(req));
 	req.req_id = RTE_KNI_REQ_CHANGE_MTU;
 	req.new_mtu = new_mtu;
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(dev, &req);
 	if (ret == 0 && req.result == 0)
 		dev->mtu = new_mtu;
 
@@ -661,7 +679,6 @@ static void
 kni_net_change_rx_flags(struct net_device *netdev, int flags)
 {
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(netdev);
 
 	memset(&req, 0, sizeof(req));
 
@@ -683,7 +700,7 @@ kni_net_change_rx_flags(struct net_device *netdev, int flags)
 			req.promiscusity = 0;
 	}
 
-	kni_net_process_request(kni, &req);
+	kni_net_process_request(netdev, &req);
 }
 
 /*
@@ -742,7 +759,6 @@ kni_net_set_mac(struct net_device *netdev, void *p)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni;
 	struct sockaddr *addr = p;
 
 	memset(&req, 0, sizeof(req));
@@ -754,8 +770,7 @@ kni_net_set_mac(struct net_device *netdev, void *p)
 	memcpy(req.mac_addr, addr->sa_data, netdev->addr_len);
 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
 
-	kni = netdev_priv(netdev);
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(netdev, &req);
 
 	return (ret == 0 ? req.result : ret);
 }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH] kni: fix rtnl deadlocks and race conditions
  2020-11-26 14:46 [dpdk-dev] [PATCH] kni: fix rtnl deadlocks and race conditions Elad Nachman
@ 2021-02-19 18:41 ` Ferruh Yigit
  2021-02-21  8:03   ` Elad Nachman
  2021-02-23 12:05 ` [dpdk-dev] [PATCH V2] kni: fix rtnl deadlocks and race conditions v2 Elad Nachman
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 42+ messages in thread
From: Ferruh Yigit @ 2021-02-19 18:41 UTC (permalink / raw)
  To: Elad Nachman; +Cc: dev, Igor Ryzhov, Stephen Hemminger

On 11/26/2020 2:46 PM, Elad Nachman wrote:
> This patch leverages on Stephen Hemminger's 64106 patch from Dec 2019,
> and fixes the issues reported by Ferruh and Igor:
> 
> A. KNI sync lock is being locked while rtnl is held.
> If two threads are calling kni_net_process_request() ,
> then the first one wil take the sync lock, release rtnl lock then sleep.
> The second thread will try to lock sync lock while holding rtnl.
> The first thread will wake, and try to lock rtnl, resulting in a deadlock.
> The remedy is to release rtnl before locking the KNI sync lock.
> Since in between nothing is accessing Linux network-wise,
> no rtnl locking is needed.

Hi Elad,

Thanks for explanation, that clarifies the issue.
Also I confirm I don't see the hang, at least as much as I test.

> 
> B. There is a race condition in __dev_close_many() processing the
> close_list while the application terminates.
> It looks like if two vEth devices are terminating,
> and one releases the rtnl lock, the other takes it,
> updating the close_list in an unstable state,
> causing the close_list to become a circular linked list,
> hence list_for_each_entry() will endlessly loop inside
> __dev_close_many() .
> Since the description for the original patch indicate the
> original motivation was bringing the device up,
> I have changed kni_net_process_request() to hold the rtnl mutex
> in case of bringing the device down since this is the path called
> from __dev_close_many() , causing the corruption of the close_list.
> 

I can't reproduce this case, I see the protection in the code, but better to get 
confirmation from Igor.



Overall the issue seems calling a function pointed by 'rte_kni_ops' which 
requires to acquire the rtnl lock.
So I wonder if this can't be handled in the ops function, by processing the 
request asynchronously,
like recording the request, return from 'rte_kni_ops', and process the request 
afterwards?

I assume the application we mention is not kni sample application.

> 
> 
> Signed-off-by: Elad Nachman <eladv6@gmail.com>
> ---
>   kernel/linux/kni/kni_net.c | 47 +++++++++++++++++++++++++-------------
>   1 file changed, 31 insertions(+), 16 deletions(-)
> 
> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> index 4b752083d..cf5b0845d 100644
> --- a/kernel/linux/kni/kni_net.c
> +++ b/kernel/linux/kni/kni_net.c
> @@ -17,6 +17,7 @@
>   #include <linux/skbuff.h>
>   #include <linux/kthread.h>
>   #include <linux/delay.h>
> +#include <linux/rtnetlink.h>
>   
>   #include <rte_kni_common.h>
>   #include <kni_fifo.h>
> @@ -102,18 +103,26 @@ get_data_kva(struct kni_dev *kni, void *pkt_kva)
>    * It can be called to process the request.
>    */
>   static int
> -kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
> +kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
>   {
> +	struct kni_dev *kni = netdev_priv(dev);
>   	int ret = -1;
>   	void *resp_va;
>   	uint32_t num;
>   	int ret_val;
> +	int req_is_dev_stop = 0;
>   
> -	if (!kni || !req) {
> -		pr_err("No kni instance or request\n");
> -		return -EINVAL;
> -	}
> +	if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
> +			req->if_up == 0)
> +		req_is_dev_stop = 1;
>   
> +	ASSERT_RTNL();
> +
> +	if (!req_is_dev_stop) {
> +		dev_hold(dev);
> +		rtnl_unlock();
> +	}
> +
>   	mutex_lock(&kni->sync_lock);
>   
>   	/* Construct data */
> @@ -125,8 +134,13 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
>   		goto fail;
>   	}
>   
> +	/* Since we need to wait and RTNL mutex is held
> +	 * drop the mutex and hold refernce to keep device
> +	 */
> +

Comment seems left here, need to go up. s/refernce/reference

>   	ret_val = wait_event_interruptible_timeout(kni->wq,
>   			kni_fifo_count(kni->resp_q), 3 * HZ);
> +
>   	if (signal_pending(current) || ret_val <= 0) {
>   		ret = -ETIME;
>   		goto fail;
> @@ -144,6 +158,13 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
>   
>   fail:
>   	mutex_unlock(&kni->sync_lock);
> +
> +

extra empty line

> +	if (!req_is_dev_stop) {
> +		rtnl_lock();
> +		dev_put(dev);
> +	}
> +
>   	return ret;
>   }
>   
> @@ -155,7 +176,6 @@ kni_net_open(struct net_device *dev)
>   {
>   	int ret;
>   	struct rte_kni_request req;
> -	struct kni_dev *kni = netdev_priv(dev);
>   
>   	netif_start_queue(dev);
>   	if (kni_dflt_carrier == 1)
> @@ -168,7 +188,7 @@ kni_net_open(struct net_device *dev)
>   
>   	/* Setting if_up to non-zero means up */
>   	req.if_up = 1;
> -	ret = kni_net_process_request(kni, &req);
> +	ret = kni_net_process_request(dev, &req);
>   

Althoug it is not soo confusing, these lines and following ones are noise for 
this patch, they are just for 'kni_net_process_request' paramter change.

What do you think do the 'kni_net_process_request' parameter change in first 
patch, and fix the issue in second, this way second patch can contain only the 
actual changes required for fix.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH] kni: fix rtnl deadlocks and race conditions
  2021-02-19 18:41 ` Ferruh Yigit
@ 2021-02-21  8:03   ` Elad Nachman
  2021-02-22 15:58     ` Ferruh Yigit
  0 siblings, 1 reply; 42+ messages in thread
From: Elad Nachman @ 2021-02-21  8:03 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: dev, Igor Ryzhov, Stephen Hemminger

Hi,

Regarding the asynchronous call - thought about it, but then the
request will always return OK to user-space and I will have no way to
return failure error codes back to user-space.

If the above explanation is acceptable, per your other comments - I
can send a new patch without the parameter change , without the empty
line, and with the comment moved to the proper place in the code.

Waiting for your decision,

Elad.

On Fri, Feb 19, 2021 at 8:42 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>
> On 11/26/2020 2:46 PM, Elad Nachman wrote:
> > This patch leverages on Stephen Hemminger's 64106 patch from Dec 2019,
> > and fixes the issues reported by Ferruh and Igor:
> >
> > A. KNI sync lock is being locked while rtnl is held.
> > If two threads are calling kni_net_process_request() ,
> > then the first one wil take the sync lock, release rtnl lock then sleep.
> > The second thread will try to lock sync lock while holding rtnl.
> > The first thread will wake, and try to lock rtnl, resulting in a deadlock.
> > The remedy is to release rtnl before locking the KNI sync lock.
> > Since in between nothing is accessing Linux network-wise,
> > no rtnl locking is needed.
>
> Hi Elad,
>
> Thanks for explanation, that clarifies the issue.
> Also I confirm I don't see the hang, at least as much as I test.
>
> >
> > B. There is a race condition in __dev_close_many() processing the
> > close_list while the application terminates.
> > It looks like if two vEth devices are terminating,
> > and one releases the rtnl lock, the other takes it,
> > updating the close_list in an unstable state,
> > causing the close_list to become a circular linked list,
> > hence list_for_each_entry() will endlessly loop inside
> > __dev_close_many() .
> > Since the description for the original patch indicate the
> > original motivation was bringing the device up,
> > I have changed kni_net_process_request() to hold the rtnl mutex
> > in case of bringing the device down since this is the path called
> > from __dev_close_many() , causing the corruption of the close_list.
> >
>
> I can't reproduce this case, I see the protection in the code, but better to get
> confirmation from Igor.
>
>
>
> Overall the issue seems calling a function pointed by 'rte_kni_ops' which
> requires to acquire the rtnl lock.
> So I wonder if this can't be handled in the ops function, by processing the
> request asynchronously,
> like recording the request, return from 'rte_kni_ops', and process the request
> afterwards?
>
> I assume the application we mention is not kni sample application.
>
> >
> >
> > Signed-off-by: Elad Nachman <eladv6@gmail.com>
> > ---
> >   kernel/linux/kni/kni_net.c | 47 +++++++++++++++++++++++++-------------
> >   1 file changed, 31 insertions(+), 16 deletions(-)
> >
> > diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> > index 4b752083d..cf5b0845d 100644
> > --- a/kernel/linux/kni/kni_net.c
> > +++ b/kernel/linux/kni/kni_net.c
> > @@ -17,6 +17,7 @@
> >   #include <linux/skbuff.h>
> >   #include <linux/kthread.h>
> >   #include <linux/delay.h>
> > +#include <linux/rtnetlink.h>
> >
> >   #include <rte_kni_common.h>
> >   #include <kni_fifo.h>
> > @@ -102,18 +103,26 @@ get_data_kva(struct kni_dev *kni, void *pkt_kva)
> >    * It can be called to process the request.
> >    */
> >   static int
> > -kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
> > +kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
> >   {
> > +     struct kni_dev *kni = netdev_priv(dev);
> >       int ret = -1;
> >       void *resp_va;
> >       uint32_t num;
> >       int ret_val;
> > +     int req_is_dev_stop = 0;
> >
> > -     if (!kni || !req) {
> > -             pr_err("No kni instance or request\n");
> > -             return -EINVAL;
> > -     }
> > +     if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
> > +                     req->if_up == 0)
> > +             req_is_dev_stop = 1;
> >
> > +     ASSERT_RTNL();
> > +
> > +     if (!req_is_dev_stop) {
> > +             dev_hold(dev);
> > +             rtnl_unlock();
> > +     }
> > +
> >       mutex_lock(&kni->sync_lock);
> >
> >       /* Construct data */
> > @@ -125,8 +134,13 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
> >               goto fail;
> >       }
> >
> > +     /* Since we need to wait and RTNL mutex is held
> > +      * drop the mutex and hold refernce to keep device
> > +      */
> > +
>
> Comment seems left here, need to go up. s/refernce/reference
>
> >       ret_val = wait_event_interruptible_timeout(kni->wq,
> >                       kni_fifo_count(kni->resp_q), 3 * HZ);
> > +
> >       if (signal_pending(current) || ret_val <= 0) {
> >               ret = -ETIME;
> >               goto fail;
> > @@ -144,6 +158,13 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
> >
> >   fail:
> >       mutex_unlock(&kni->sync_lock);
> > +
> > +
>
> extra empty line
>
> > +     if (!req_is_dev_stop) {
> > +             rtnl_lock();
> > +             dev_put(dev);
> > +     }
> > +
> >       return ret;
> >   }
> >
> > @@ -155,7 +176,6 @@ kni_net_open(struct net_device *dev)
> >   {
> >       int ret;
> >       struct rte_kni_request req;
> > -     struct kni_dev *kni = netdev_priv(dev);
> >
> >       netif_start_queue(dev);
> >       if (kni_dflt_carrier == 1)
> > @@ -168,7 +188,7 @@ kni_net_open(struct net_device *dev)
> >
> >       /* Setting if_up to non-zero means up */
> >       req.if_up = 1;
> > -     ret = kni_net_process_request(kni, &req);
> > +     ret = kni_net_process_request(dev, &req);
> >
>
> Althoug it is not soo confusing, these lines and following ones are noise for
> this patch, they are just for 'kni_net_process_request' paramter change.
>
> What do you think do the 'kni_net_process_request' parameter change in first
> patch, and fix the issue in second, this way second patch can contain only the
> actual changes required for fix.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH] kni: fix rtnl deadlocks and race conditions
  2021-02-21  8:03   ` Elad Nachman
@ 2021-02-22 15:58     ` Ferruh Yigit
  0 siblings, 0 replies; 42+ messages in thread
From: Ferruh Yigit @ 2021-02-22 15:58 UTC (permalink / raw)
  To: Elad Nachman; +Cc: dev, Igor Ryzhov, Stephen Hemminger

On 2/21/2021 8:03 AM, Elad Nachman wrote:
> Hi,
> 
> Regarding the asynchronous call - thought about it, but then the
> request will always return OK to user-space and I will have no way to
> return failure error codes back to user-space.
> 

Right, let's continue with this patch. Can you please send a new version with 
updates mentioned below?

> If the above explanation is acceptable, per your other comments - I
> can send a new patch without the parameter change , without the empty
> line, and with the comment moved to the proper place in the code.
> 
> Waiting for your decision,
> 
> Elad.
> 
> On Fri, Feb 19, 2021 at 8:42 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>>
>> On 11/26/2020 2:46 PM, Elad Nachman wrote:
>>> This patch leverages on Stephen Hemminger's 64106 patch from Dec 2019,
>>> and fixes the issues reported by Ferruh and Igor:
>>>
>>> A. KNI sync lock is being locked while rtnl is held.
>>> If two threads are calling kni_net_process_request() ,
>>> then the first one wil take the sync lock, release rtnl lock then sleep.
>>> The second thread will try to lock sync lock while holding rtnl.
>>> The first thread will wake, and try to lock rtnl, resulting in a deadlock.
>>> The remedy is to release rtnl before locking the KNI sync lock.
>>> Since in between nothing is accessing Linux network-wise,
>>> no rtnl locking is needed.
>>
>> Hi Elad,
>>
>> Thanks for explanation, that clarifies the issue.
>> Also I confirm I don't see the hang, at least as much as I test.
>>
>>>
>>> B. There is a race condition in __dev_close_many() processing the
>>> close_list while the application terminates.
>>> It looks like if two vEth devices are terminating,
>>> and one releases the rtnl lock, the other takes it,
>>> updating the close_list in an unstable state,
>>> causing the close_list to become a circular linked list,
>>> hence list_for_each_entry() will endlessly loop inside
>>> __dev_close_many() .
>>> Since the description for the original patch indicate the
>>> original motivation was bringing the device up,
>>> I have changed kni_net_process_request() to hold the rtnl mutex
>>> in case of bringing the device down since this is the path called
>>> from __dev_close_many() , causing the corruption of the close_list.
>>>
>>
>> I can't reproduce this case, I see the protection in the code, but better to get
>> confirmation from Igor.
>>
>>
>>
>> Overall the issue seems calling a function pointed by 'rte_kni_ops' which
>> requires to acquire the rtnl lock.
>> So I wonder if this can't be handled in the ops function, by processing the
>> request asynchronously,
>> like recording the request, return from 'rte_kni_ops', and process the request
>> afterwards?
>>
>> I assume the application we mention is not kni sample application.
>>
>>>
>>>
>>> Signed-off-by: Elad Nachman <eladv6@gmail.com>
>>> ---
>>>    kernel/linux/kni/kni_net.c | 47 +++++++++++++++++++++++++-------------
>>>    1 file changed, 31 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
>>> index 4b752083d..cf5b0845d 100644
>>> --- a/kernel/linux/kni/kni_net.c
>>> +++ b/kernel/linux/kni/kni_net.c
>>> @@ -17,6 +17,7 @@
>>>    #include <linux/skbuff.h>
>>>    #include <linux/kthread.h>
>>>    #include <linux/delay.h>
>>> +#include <linux/rtnetlink.h>
>>>
>>>    #include <rte_kni_common.h>
>>>    #include <kni_fifo.h>
>>> @@ -102,18 +103,26 @@ get_data_kva(struct kni_dev *kni, void *pkt_kva)
>>>     * It can be called to process the request.
>>>     */
>>>    static int
>>> -kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
>>> +kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
>>>    {
>>> +     struct kni_dev *kni = netdev_priv(dev);
>>>        int ret = -1;
>>>        void *resp_va;
>>>        uint32_t num;
>>>        int ret_val;
>>> +     int req_is_dev_stop = 0;
>>>
>>> -     if (!kni || !req) {
>>> -             pr_err("No kni instance or request\n");
>>> -             return -EINVAL;
>>> -     }
>>> +     if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
>>> +                     req->if_up == 0)
>>> +             req_is_dev_stop = 1;
>>>
>>> +     ASSERT_RTNL();
>>> +
>>> +     if (!req_is_dev_stop) {
>>> +             dev_hold(dev);
>>> +             rtnl_unlock();
>>> +     }
>>> +
>>>        mutex_lock(&kni->sync_lock);
>>>
>>>        /* Construct data */
>>> @@ -125,8 +134,13 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
>>>                goto fail;
>>>        }
>>>
>>> +     /* Since we need to wait and RTNL mutex is held
>>> +      * drop the mutex and hold refernce to keep device
>>> +      */
>>> +
>>
>> Comment seems left here, need to go up. s/refernce/reference
>>
>>>        ret_val = wait_event_interruptible_timeout(kni->wq,
>>>                        kni_fifo_count(kni->resp_q), 3 * HZ);
>>> +
>>>        if (signal_pending(current) || ret_val <= 0) {
>>>                ret = -ETIME;
>>>                goto fail;
>>> @@ -144,6 +158,13 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
>>>
>>>    fail:
>>>        mutex_unlock(&kni->sync_lock);
>>> +
>>> +
>>
>> extra empty line
>>
>>> +     if (!req_is_dev_stop) {
>>> +             rtnl_lock();
>>> +             dev_put(dev);
>>> +     }
>>> +
>>>        return ret;
>>>    }
>>>
>>> @@ -155,7 +176,6 @@ kni_net_open(struct net_device *dev)
>>>    {
>>>        int ret;
>>>        struct rte_kni_request req;
>>> -     struct kni_dev *kni = netdev_priv(dev);
>>>
>>>        netif_start_queue(dev);
>>>        if (kni_dflt_carrier == 1)
>>> @@ -168,7 +188,7 @@ kni_net_open(struct net_device *dev)
>>>
>>>        /* Setting if_up to non-zero means up */
>>>        req.if_up = 1;
>>> -     ret = kni_net_process_request(kni, &req);
>>> +     ret = kni_net_process_request(dev, &req);
>>>
>>
>> Althoug it is not soo confusing, these lines and following ones are noise for
>> this patch, they are just for 'kni_net_process_request' paramter change.
>>
>> What do you think do the 'kni_net_process_request' parameter change in first
>> patch, and fix the issue in second, this way second patch can contain only the
>> actual changes required for fix.


^ permalink raw reply	[flat|nested] 42+ messages in thread

* [dpdk-dev] [PATCH V2] kni: fix rtnl deadlocks and race conditions v2
  2020-11-26 14:46 [dpdk-dev] [PATCH] kni: fix rtnl deadlocks and race conditions Elad Nachman
  2021-02-19 18:41 ` Ferruh Yigit
@ 2021-02-23 12:05 ` Elad Nachman
  2021-02-23 12:53   ` Ferruh Yigit
  2021-02-23 13:44 ` [dpdk-dev] [PATCH 1/2] kni: fix rtnl deadlocks and race conditions v3 Elad Nachman
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 42+ messages in thread
From: Elad Nachman @ 2021-02-23 12:05 UTC (permalink / raw)
  To: ferruh.yigit; +Cc: iryzhov, stephen, dev, eladv6

This version 2 of the patch leverages on Stephen Hemminger's 64106 
patch from Dec 2019, 
and fixes the issues reported by Ferruh and Igor:

A. KNI sync lock is being locked while rtnl is held. 
If two threads are calling kni_net_process_request() ,
then the first one will take the sync lock, release rtnl lock then sleep.
The second thread will try to lock sync lock while holding rtnl.
The first thread will wake, and try to lock rtnl, resulting in a deadlock.
The remedy is to release rtnl before locking the KNI sync lock.
Since in between nothing is accessing Linux network-wise,
no rtnl locking is needed.

B. There is a race condition in __dev_close_many() processing the
close_list while the application terminates.
It looks like if two vEth devices are terminating,
and one releases the rtnl lock, the other takes it,
updating the close_list in an unstable state,
causing the close_list to become a circular linked list,
hence list_for_each_entry() will endlessly loop inside
__dev_close_many() . 
Since the description for the original patch indicate the
original motivation was bringing the device up,
I have changed kni_net_process_request() to hold the rtnl mutex
in case of bringing the device down since this is the path called
from __dev_close_many() , causing the corruption of the close_list. 

Depends-on: patch-64106 ("kni: fix kernel deadlock when using mlx devices")

Signed-off-by: Elad Nachman <eladv6@gmail.com>
---
V2:
* rebuild the patch as increment from patch 64106
* fix comment and blank lines

---
 kernel/linux/kni/kni_net.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index f0b6e9a8d..b41360220 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -110,9 +110,22 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 	void *resp_va;
 	uint32_t num;
 	int ret_val;
+	int req_is_dev_stop = 0;
+
+	if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
+			req->if_up == 0)
+		req_is_dev_stop = 1;
 
 	ASSERT_RTNL();
 
+	/* Since we need to wait and RTNL mutex is held
+	 * drop the mutex and hold reference to keep device
+	 */
+	if (!req_is_dev_stop) {
+		dev_hold(dev);
+		rtnl_unlock();
+	}
+
 	mutex_lock(&kni->sync_lock);
 
 	/* Construct data */
@@ -124,16 +137,8 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 		goto fail;
 	}
 
-	/* Since we need to wait and RTNL mutex is held
-	 * drop the mutex and hold refernce to keep device
-	 */
-	dev_hold(dev);
-	rtnl_unlock();
-
 	ret_val = wait_event_interruptible_timeout(kni->wq,
 			kni_fifo_count(kni->resp_q), 3 * HZ);
-	rtnl_lock();
-	dev_put(dev);
 
 	if (signal_pending(current) || ret_val <= 0) {
 		ret = -ETIME;
@@ -152,6 +157,10 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 
 fail:
 	mutex_unlock(&kni->sync_lock);
+	if (!req_is_dev_stop) {
+		rtnl_lock();
+		dev_put(dev);
+	}
 	return ret;
 }
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH V2] kni: fix rtnl deadlocks and race conditions v2
  2021-02-23 12:05 ` [dpdk-dev] [PATCH V2] kni: fix rtnl deadlocks and race conditions v2 Elad Nachman
@ 2021-02-23 12:53   ` Ferruh Yigit
  0 siblings, 0 replies; 42+ messages in thread
From: Ferruh Yigit @ 2021-02-23 12:53 UTC (permalink / raw)
  To: Elad Nachman; +Cc: iryzhov, stephen, dev

On 2/23/2021 12:05 PM, Elad Nachman wrote:
> This version 2 of the patch leverages on Stephen Hemminger's 64106
> patch from Dec 2019,
> and fixes the issues reported by Ferruh and Igor:
> 
> A. KNI sync lock is being locked while rtnl is held.
> If two threads are calling kni_net_process_request() ,
> then the first one will take the sync lock, release rtnl lock then sleep.
> The second thread will try to lock sync lock while holding rtnl.
> The first thread will wake, and try to lock rtnl, resulting in a deadlock.
> The remedy is to release rtnl before locking the KNI sync lock.
> Since in between nothing is accessing Linux network-wise,
> no rtnl locking is needed.
> 
> B. There is a race condition in __dev_close_many() processing the
> close_list while the application terminates.
> It looks like if two vEth devices are terminating,
> and one releases the rtnl lock, the other takes it,
> updating the close_list in an unstable state,
> causing the close_list to become a circular linked list,
> hence list_for_each_entry() will endlessly loop inside
> __dev_close_many() .
> Since the description for the original patch indicate the
> original motivation was bringing the device up,
> I have changed kni_net_process_request() to hold the rtnl mutex
> in case of bringing the device down since this is the path called
> from __dev_close_many() , causing the corruption of the close_list.
> 
> Depends-on: patch-64106 ("kni: fix kernel deadlock when using mlx devices")
 >

Can you please make new version of the patches on top of latest git head, not 
exiting patches, we don't support incremental updates.

> 
> Signed-off-by: Elad Nachman <eladv6@gmail.com>
> ---
> V2:
> * rebuild the patch as increment from patch 64106
> * fix comment and blank lines
> 
> ---
>   kernel/linux/kni/kni_net.c | 25 +++++++++++++++++--------
>   1 file changed, 17 insertions(+), 8 deletions(-)
> 
> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> index f0b6e9a8d..b41360220 100644
> --- a/kernel/linux/kni/kni_net.c
> +++ b/kernel/linux/kni/kni_net.c
> @@ -110,9 +110,22 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
>   	void *resp_va;
>   	uint32_t num;
>   	int ret_val;
> +	int req_is_dev_stop = 0;
> +

One more thing, can you please add comment to code why "stop" request is 
special? You have it in the commit log, but a short description in code also cna 
be helpful.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* [dpdk-dev] [PATCH 1/2] kni: fix rtnl deadlocks and race conditions v3
  2020-11-26 14:46 [dpdk-dev] [PATCH] kni: fix rtnl deadlocks and race conditions Elad Nachman
  2021-02-19 18:41 ` Ferruh Yigit
  2021-02-23 12:05 ` [dpdk-dev] [PATCH V2] kni: fix rtnl deadlocks and race conditions v2 Elad Nachman
@ 2021-02-23 13:44 ` Elad Nachman
  2021-02-23 13:45 ` [dpdk-dev] [PATCH 2/2] " Elad Nachman
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 42+ messages in thread
From: Elad Nachman @ 2021-02-23 13:44 UTC (permalink / raw)
  To: ferruh.yigit; +Cc: iryzhov, stephen, dev, eladv6

This first part of v3 of the patch re-introduces Stephen Hemminger's
patch 64106 . This part changes the parameter kni_net_process_request()
gets and introduces the initial rtnl unlocking mechanism.

Signed-off-by: Elad Nachman <eladv6@gmail.com>
---
v3: 
* Include original patch and new patch as a series of patch, added a 
  comment to the new patch
v2:
* rebuild the patch as increment from patch 64106
* fix comment and blank lines

---
 kernel/linux/kni/kni_net.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index 4b752083d..f0b6e9a8d 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -17,6 +17,7 @@
 #include <linux/skbuff.h>
 #include <linux/kthread.h>
 #include <linux/delay.h>
+#include <linux/rtnetlink.h>
 
 #include <rte_kni_common.h>
 #include <kni_fifo.h>
@@ -102,17 +103,15 @@ get_data_kva(struct kni_dev *kni, void *pkt_kva)
  * It can be called to process the request.
  */
 static int
-kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
+kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 {
+	struct kni_dev *kni = netdev_priv(dev);
 	int ret = -1;
 	void *resp_va;
 	uint32_t num;
 	int ret_val;
 
-	if (!kni || !req) {
-		pr_err("No kni instance or request\n");
-		return -EINVAL;
-	}
+	ASSERT_RTNL();
 
 	mutex_lock(&kni->sync_lock);
 
@@ -125,8 +124,17 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
 		goto fail;
 	}
 
+	/* Since we need to wait and RTNL mutex is held
+	 * drop the mutex and hold reference to keep device
+	 */
+	dev_hold(dev);
+	rtnl_unlock();
+
 	ret_val = wait_event_interruptible_timeout(kni->wq,
 			kni_fifo_count(kni->resp_q), 3 * HZ);
+	rtnl_lock();
+	dev_put(dev);
+
 	if (signal_pending(current) || ret_val <= 0) {
 		ret = -ETIME;
 		goto fail;
@@ -155,7 +163,6 @@ kni_net_open(struct net_device *dev)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
 
 	netif_start_queue(dev);
 	if (kni_dflt_carrier == 1)
@@ -168,7 +175,7 @@ kni_net_open(struct net_device *dev)
 
 	/* Setting if_up to non-zero means up */
 	req.if_up = 1;
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(dev, &req);
 
 	return (ret == 0) ? req.result : ret;
 }
@@ -178,7 +185,6 @@ kni_net_release(struct net_device *dev)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
 
 	netif_stop_queue(dev); /* can't transmit any more */
 	netif_carrier_off(dev);
@@ -188,7 +194,7 @@ kni_net_release(struct net_device *dev)
 
 	/* Setting if_up to 0 means down */
 	req.if_up = 0;
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(dev, &req);
 
 	return (ret == 0) ? req.result : ret;
 }
@@ -643,14 +649,13 @@ kni_net_change_mtu(struct net_device *dev, int new_mtu)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
 
 	pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
 
 	memset(&req, 0, sizeof(req));
 	req.req_id = RTE_KNI_REQ_CHANGE_MTU;
 	req.new_mtu = new_mtu;
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(dev, &req);
 	if (ret == 0 && req.result == 0)
 		dev->mtu = new_mtu;
 
@@ -661,7 +666,6 @@ static void
 kni_net_change_rx_flags(struct net_device *netdev, int flags)
 {
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(netdev);
 
 	memset(&req, 0, sizeof(req));
 
@@ -683,7 +687,7 @@ kni_net_change_rx_flags(struct net_device *netdev, int flags)
 			req.promiscusity = 0;
 	}
 
-	kni_net_process_request(kni, &req);
+	kni_net_process_request(netdev, &req);
 }
 
 /*
@@ -742,7 +746,6 @@ kni_net_set_mac(struct net_device *netdev, void *p)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni;
 	struct sockaddr *addr = p;
 
 	memset(&req, 0, sizeof(req));
@@ -754,8 +757,7 @@ kni_net_set_mac(struct net_device *netdev, void *p)
 	memcpy(req.mac_addr, addr->sa_data, netdev->addr_len);
 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
 
-	kni = netdev_priv(netdev);
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(netdev, &req);
 
 	return (ret == 0 ? req.result : ret);
 }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 42+ messages in thread

* [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v3
  2020-11-26 14:46 [dpdk-dev] [PATCH] kni: fix rtnl deadlocks and race conditions Elad Nachman
                   ` (2 preceding siblings ...)
  2021-02-23 13:44 ` [dpdk-dev] [PATCH 1/2] kni: fix rtnl deadlocks and race conditions v3 Elad Nachman
@ 2021-02-23 13:45 ` Elad Nachman
  2021-02-24 12:49   ` Igor Ryzhov
  2021-02-25 14:32 ` [dpdk-dev] [PATCH 1/2] kni: fix kernel deadlock when using mlx devices Elad Nachman
  2021-03-29 14:36 ` [dpdk-dev] [PATCH v5 1/3] kni: refactor user request processing Ferruh Yigit
  5 siblings, 1 reply; 42+ messages in thread
From: Elad Nachman @ 2021-02-23 13:45 UTC (permalink / raw)
  To: ferruh.yigit; +Cc: iryzhov, stephen, dev, eladv6

This part of the series includes my fixes for the issues reported
by Ferruh and Igor on top of part 1 of the patch series:

A. KNI sync lock is being locked while rtnl is held.
If two threads are calling kni_net_process_request() ,
then the first one will take the sync lock, release rtnl lock then sleep.
The second thread will try to lock sync lock while holding rtnl.
The first thread will wake, and try to lock rtnl, resulting in a deadlock.
The remedy is to release rtnl before locking the KNI sync lock.
Since in between nothing is accessing Linux network-wise,
no rtnl locking is needed.

B. There is a race condition in __dev_close_many() processing the
close_list while the application terminates.
It looks like if two vEth devices are terminating,
and one releases the rtnl lock, the other takes it,
updating the close_list in an unstable state,
causing the close_list to become a circular linked list,
hence list_for_each_entry() will endlessly loop inside
__dev_close_many() .
Since the description for the original patch indicate the
original motivation was bringing the device up,
I have changed kni_net_process_request() to hold the rtnl mutex
in case of bringing the device down since this is the path called
from __dev_close_many() , causing the corruption of the close_list. 

Signed-off-by: Elad Nachman <eladv6@gmail.com>
---
v3: 
* Include original patch and new patch as a series of patch, added a
  comment to the new patch
v2:
* rebuild the patch as increment from patch 64106
* fix comment and blank lines
---
 kernel/linux/kni/kni_net.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index f0b6e9a8d..017e44812 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -110,9 +110,26 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 	void *resp_va;
 	uint32_t num;
 	int ret_val;
+	int req_is_dev_stop = 0;
+
+	/* For configuring the interface to down,
+	 * rtnl must be held all the way to prevent race condition
+	 * inside __dev_close_many() between two netdev instances of KNI
+	 */
+	if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
+			req->if_up == 0)
+		req_is_dev_stop = 1;
 
 	ASSERT_RTNL();
 
+	/* Since we need to wait and RTNL mutex is held
+	 * drop the mutex and hold reference to keep device
+	 */
+	if (!req_is_dev_stop) {
+		dev_hold(dev);
+		rtnl_unlock();
+	}
+
 	mutex_lock(&kni->sync_lock);
 
 	/* Construct data */
@@ -124,16 +141,8 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 		goto fail;
 	}
 
-	/* Since we need to wait and RTNL mutex is held
-	 * drop the mutex and hold refernce to keep device
-	 */
-	dev_hold(dev);
-	rtnl_unlock();
-
 	ret_val = wait_event_interruptible_timeout(kni->wq,
 			kni_fifo_count(kni->resp_q), 3 * HZ);
-	rtnl_lock();
-	dev_put(dev);
 
 	if (signal_pending(current) || ret_val <= 0) {
 		ret = -ETIME;
@@ -152,6 +161,10 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 
 fail:
 	mutex_unlock(&kni->sync_lock);
+	if (!req_is_dev_stop) {
+		rtnl_lock();
+		dev_put(dev);
+	}
 	return ret;
 }
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v3
  2021-02-23 13:45 ` [dpdk-dev] [PATCH 2/2] " Elad Nachman
@ 2021-02-24 12:49   ` Igor Ryzhov
  2021-02-24 13:33     ` Elad Nachman
  2021-02-24 15:54     ` Stephen Hemminger
  0 siblings, 2 replies; 42+ messages in thread
From: Igor Ryzhov @ 2021-02-24 12:49 UTC (permalink / raw)
  To: Elad Nachman; +Cc: Ferruh Yigit, Stephen Hemminger, dev

This looks more like a hack than an actual fix to me.

After this commit:
"ip link set up" is sent to the userspace with unlocked rtnl_lock
"ip link set down" is sent to the userspace with locked rtnl_lock

How is this really fixing anything? IMHO it only complicates the code.
If talking with userspace under rtnl_lock is a problem, then we should fix
all such requests, not only part of them.
If it is not a problem, then I don't see any point in merging this.

On Tue, Feb 23, 2021 at 4:45 PM Elad Nachman <eladv6@gmail.com> wrote:

> This part of the series includes my fixes for the issues reported
> by Ferruh and Igor on top of part 1 of the patch series:
>
> A. KNI sync lock is being locked while rtnl is held.
> If two threads are calling kni_net_process_request() ,
> then the first one will take the sync lock, release rtnl lock then sleep.
> The second thread will try to lock sync lock while holding rtnl.
> The first thread will wake, and try to lock rtnl, resulting in a deadlock.
> The remedy is to release rtnl before locking the KNI sync lock.
> Since in between nothing is accessing Linux network-wise,
> no rtnl locking is needed.
>
> B. There is a race condition in __dev_close_many() processing the
> close_list while the application terminates.
> It looks like if two vEth devices are terminating,
> and one releases the rtnl lock, the other takes it,
> updating the close_list in an unstable state,
> causing the close_list to become a circular linked list,
> hence list_for_each_entry() will endlessly loop inside
> __dev_close_many() .
> Since the description for the original patch indicate the
> original motivation was bringing the device up,
> I have changed kni_net_process_request() to hold the rtnl mutex
> in case of bringing the device down since this is the path called
> from __dev_close_many() , causing the corruption of the close_list.
>
> Signed-off-by: Elad Nachman <eladv6@gmail.com>
> ---
> v3:
> * Include original patch and new patch as a series of patch, added a
>   comment to the new patch
> v2:
> * rebuild the patch as increment from patch 64106
> * fix comment and blank lines
> ---
>  kernel/linux/kni/kni_net.c | 29 +++++++++++++++++++++--------
>  1 file changed, 21 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> index f0b6e9a8d..017e44812 100644
> --- a/kernel/linux/kni/kni_net.c
> +++ b/kernel/linux/kni/kni_net.c
> @@ -110,9 +110,26 @@ kni_net_process_request(struct net_device *dev,
> struct rte_kni_request *req)
>         void *resp_va;
>         uint32_t num;
>         int ret_val;
> +       int req_is_dev_stop = 0;
> +
> +       /* For configuring the interface to down,
> +        * rtnl must be held all the way to prevent race condition
> +        * inside __dev_close_many() between two netdev instances of KNI
> +        */
> +       if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
> +                       req->if_up == 0)
> +               req_is_dev_stop = 1;
>
>         ASSERT_RTNL();
>
> +       /* Since we need to wait and RTNL mutex is held
> +        * drop the mutex and hold reference to keep device
> +        */
> +       if (!req_is_dev_stop) {
> +               dev_hold(dev);
> +               rtnl_unlock();
> +       }
> +
>         mutex_lock(&kni->sync_lock);
>
>         /* Construct data */
> @@ -124,16 +141,8 @@ kni_net_process_request(struct net_device *dev,
> struct rte_kni_request *req)
>                 goto fail;
>         }
>
> -       /* Since we need to wait and RTNL mutex is held
> -        * drop the mutex and hold refernce to keep device
> -        */
> -       dev_hold(dev);
> -       rtnl_unlock();
> -
>         ret_val = wait_event_interruptible_timeout(kni->wq,
>                         kni_fifo_count(kni->resp_q), 3 * HZ);
> -       rtnl_lock();
> -       dev_put(dev);
>
>         if (signal_pending(current) || ret_val <= 0) {
>                 ret = -ETIME;
> @@ -152,6 +161,10 @@ kni_net_process_request(struct net_device *dev,
> struct rte_kni_request *req)
>
>  fail:
>         mutex_unlock(&kni->sync_lock);
> +       if (!req_is_dev_stop) {
> +               rtnl_lock();
> +               dev_put(dev);
> +       }
>         return ret;
>  }
>
> --
> 2.17.1
>
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v3
  2021-02-24 12:49   ` Igor Ryzhov
@ 2021-02-24 13:33     ` Elad Nachman
  2021-02-24 14:04       ` Igor Ryzhov
  2021-02-24 15:54     ` Stephen Hemminger
  1 sibling, 1 reply; 42+ messages in thread
From: Elad Nachman @ 2021-02-24 13:33 UTC (permalink / raw)
  To: Igor Ryzhov; +Cc: Ferruh Yigit, Stephen Hemminger, dev

Currently KNI has a lot of issues with deadlocks locking the code,
after this commit, they are gone, and the code runs properly without
crashing.
That was tested with over 100 restarts of the application, which
previously required a hard reset of the board.

I think this benefit overweights the complication of the code.

The function is called with rtnl locked because this is how the Linux
kernel is designed to work - it is not designed to work with deferral
to user-space mid-function.

To fix all such requests you need to reach an agreement with Linux
netdev, which is unlikely.

Calling user-space can be done asynchronously, as Ferruh asked, but
then you will always have to return success, even on failure, as Linux
kernel does not have a mechanism to asynchronously report on failure
for such system calls.

IMHO - weighting the non-reporting of failure versus how the code
looks (as it functions perfectly OK), I decided to go with
functionality.

FYI,

Elad.

On Wed, Feb 24, 2021 at 2:50 PM Igor Ryzhov <iryzhov@nfware.com> wrote:
>
> This looks more like a hack than an actual fix to me.
>
> After this commit:
> "ip link set up" is sent to the userspace with unlocked rtnl_lock
> "ip link set down" is sent to the userspace with locked rtnl_lock
>
> How is this really fixing anything? IMHO it only complicates the code.
> If talking with userspace under rtnl_lock is a problem, then we should fix all such requests, not only part of them.
> If it is not a problem, then I don't see any point in merging this.
>
> On Tue, Feb 23, 2021 at 4:45 PM Elad Nachman <eladv6@gmail.com> wrote:
>>
>> This part of the series includes my fixes for the issues reported
>> by Ferruh and Igor on top of part 1 of the patch series:
>>
>> A. KNI sync lock is being locked while rtnl is held.
>> If two threads are calling kni_net_process_request() ,
>> then the first one will take the sync lock, release rtnl lock then sleep.
>> The second thread will try to lock sync lock while holding rtnl.
>> The first thread will wake, and try to lock rtnl, resulting in a deadlock.
>> The remedy is to release rtnl before locking the KNI sync lock.
>> Since in between nothing is accessing Linux network-wise,
>> no rtnl locking is needed.
>>
>> B. There is a race condition in __dev_close_many() processing the
>> close_list while the application terminates.
>> It looks like if two vEth devices are terminating,
>> and one releases the rtnl lock, the other takes it,
>> updating the close_list in an unstable state,
>> causing the close_list to become a circular linked list,
>> hence list_for_each_entry() will endlessly loop inside
>> __dev_close_many() .
>> Since the description for the original patch indicate the
>> original motivation was bringing the device up,
>> I have changed kni_net_process_request() to hold the rtnl mutex
>> in case of bringing the device down since this is the path called
>> from __dev_close_many() , causing the corruption of the close_list.
>>
>> Signed-off-by: Elad Nachman <eladv6@gmail.com>
>> ---
>> v3:
>> * Include original patch and new patch as a series of patch, added a
>>   comment to the new patch
>> v2:
>> * rebuild the patch as increment from patch 64106
>> * fix comment and blank lines
>> ---
>>  kernel/linux/kni/kni_net.c | 29 +++++++++++++++++++++--------
>>  1 file changed, 21 insertions(+), 8 deletions(-)
>>
>> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
>> index f0b6e9a8d..017e44812 100644
>> --- a/kernel/linux/kni/kni_net.c
>> +++ b/kernel/linux/kni/kni_net.c
>> @@ -110,9 +110,26 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
>>         void *resp_va;
>>         uint32_t num;
>>         int ret_val;
>> +       int req_is_dev_stop = 0;
>> +
>> +       /* For configuring the interface to down,
>> +        * rtnl must be held all the way to prevent race condition
>> +        * inside __dev_close_many() between two netdev instances of KNI
>> +        */
>> +       if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
>> +                       req->if_up == 0)
>> +               req_is_dev_stop = 1;
>>
>>         ASSERT_RTNL();
>>
>> +       /* Since we need to wait and RTNL mutex is held
>> +        * drop the mutex and hold reference to keep device
>> +        */
>> +       if (!req_is_dev_stop) {
>> +               dev_hold(dev);
>> +               rtnl_unlock();
>> +       }
>> +
>>         mutex_lock(&kni->sync_lock);
>>
>>         /* Construct data */
>> @@ -124,16 +141,8 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
>>                 goto fail;
>>         }
>>
>> -       /* Since we need to wait and RTNL mutex is held
>> -        * drop the mutex and hold refernce to keep device
>> -        */
>> -       dev_hold(dev);
>> -       rtnl_unlock();
>> -
>>         ret_val = wait_event_interruptible_timeout(kni->wq,
>>                         kni_fifo_count(kni->resp_q), 3 * HZ);
>> -       rtnl_lock();
>> -       dev_put(dev);
>>
>>         if (signal_pending(current) || ret_val <= 0) {
>>                 ret = -ETIME;
>> @@ -152,6 +161,10 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
>>
>>  fail:
>>         mutex_unlock(&kni->sync_lock);
>> +       if (!req_is_dev_stop) {
>> +               rtnl_lock();
>> +               dev_put(dev);
>> +       }
>>         return ret;
>>  }
>>
>> --
>> 2.17.1
>>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v3
  2021-02-24 13:33     ` Elad Nachman
@ 2021-02-24 14:04       ` Igor Ryzhov
  2021-02-24 14:06         ` Elad Nachman
  0 siblings, 1 reply; 42+ messages in thread
From: Igor Ryzhov @ 2021-02-24 14:04 UTC (permalink / raw)
  To: Elad Nachman; +Cc: Ferruh Yigit, Stephen Hemminger, dev

Elad,

I understand your point.
But the fact that this fix works for you doesn't mean that it will work for
all DPDK users.

For example, I provided two simple commands: "ip link set up" and "ip link
set down".
Your fix works for only one of them. For me, this is not a proper fix.
It may work for you because you don't disable interfaces, but it will fail
for users who do.

On Wed, Feb 24, 2021 at 4:33 PM Elad Nachman <eladv6@gmail.com> wrote:

> Currently KNI has a lot of issues with deadlocks locking the code,
> after this commit, they are gone, and the code runs properly without
> crashing.
> That was tested with over 100 restarts of the application, which
> previously required a hard reset of the board.
>
> I think this benefit overweights the complication of the code.
>
> The function is called with rtnl locked because this is how the Linux
> kernel is designed to work - it is not designed to work with deferral
> to user-space mid-function.
>
> To fix all such requests you need to reach an agreement with Linux
> netdev, which is unlikely.
>
> Calling user-space can be done asynchronously, as Ferruh asked, but
> then you will always have to return success, even on failure, as Linux
> kernel does not have a mechanism to asynchronously report on failure
> for such system calls.
>
> IMHO - weighting the non-reporting of failure versus how the code
> looks (as it functions perfectly OK), I decided to go with
> functionality.
>
> FYI,
>
> Elad.
>
> On Wed, Feb 24, 2021 at 2:50 PM Igor Ryzhov <iryzhov@nfware.com> wrote:
> >
> > This looks more like a hack than an actual fix to me.
> >
> > After this commit:
> > "ip link set up" is sent to the userspace with unlocked rtnl_lock
> > "ip link set down" is sent to the userspace with locked rtnl_lock
> >
> > How is this really fixing anything? IMHO it only complicates the code.
> > If talking with userspace under rtnl_lock is a problem, then we should
> fix all such requests, not only part of them.
> > If it is not a problem, then I don't see any point in merging this.
> >
> > On Tue, Feb 23, 2021 at 4:45 PM Elad Nachman <eladv6@gmail.com> wrote:
> >>
> >> This part of the series includes my fixes for the issues reported
> >> by Ferruh and Igor on top of part 1 of the patch series:
> >>
> >> A. KNI sync lock is being locked while rtnl is held.
> >> If two threads are calling kni_net_process_request() ,
> >> then the first one will take the sync lock, release rtnl lock then
> sleep.
> >> The second thread will try to lock sync lock while holding rtnl.
> >> The first thread will wake, and try to lock rtnl, resulting in a
> deadlock.
> >> The remedy is to release rtnl before locking the KNI sync lock.
> >> Since in between nothing is accessing Linux network-wise,
> >> no rtnl locking is needed.
> >>
> >> B. There is a race condition in __dev_close_many() processing the
> >> close_list while the application terminates.
> >> It looks like if two vEth devices are terminating,
> >> and one releases the rtnl lock, the other takes it,
> >> updating the close_list in an unstable state,
> >> causing the close_list to become a circular linked list,
> >> hence list_for_each_entry() will endlessly loop inside
> >> __dev_close_many() .
> >> Since the description for the original patch indicate the
> >> original motivation was bringing the device up,
> >> I have changed kni_net_process_request() to hold the rtnl mutex
> >> in case of bringing the device down since this is the path called
> >> from __dev_close_many() , causing the corruption of the close_list.
> >>
> >> Signed-off-by: Elad Nachman <eladv6@gmail.com>
> >> ---
> >> v3:
> >> * Include original patch and new patch as a series of patch, added a
> >>   comment to the new patch
> >> v2:
> >> * rebuild the patch as increment from patch 64106
> >> * fix comment and blank lines
> >> ---
> >>  kernel/linux/kni/kni_net.c | 29 +++++++++++++++++++++--------
> >>  1 file changed, 21 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> >> index f0b6e9a8d..017e44812 100644
> >> --- a/kernel/linux/kni/kni_net.c
> >> +++ b/kernel/linux/kni/kni_net.c
> >> @@ -110,9 +110,26 @@ kni_net_process_request(struct net_device *dev,
> struct rte_kni_request *req)
> >>         void *resp_va;
> >>         uint32_t num;
> >>         int ret_val;
> >> +       int req_is_dev_stop = 0;
> >> +
> >> +       /* For configuring the interface to down,
> >> +        * rtnl must be held all the way to prevent race condition
> >> +        * inside __dev_close_many() between two netdev instances of KNI
> >> +        */
> >> +       if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
> >> +                       req->if_up == 0)
> >> +               req_is_dev_stop = 1;
> >>
> >>         ASSERT_RTNL();
> >>
> >> +       /* Since we need to wait and RTNL mutex is held
> >> +        * drop the mutex and hold reference to keep device
> >> +        */
> >> +       if (!req_is_dev_stop) {
> >> +               dev_hold(dev);
> >> +               rtnl_unlock();
> >> +       }
> >> +
> >>         mutex_lock(&kni->sync_lock);
> >>
> >>         /* Construct data */
> >> @@ -124,16 +141,8 @@ kni_net_process_request(struct net_device *dev,
> struct rte_kni_request *req)
> >>                 goto fail;
> >>         }
> >>
> >> -       /* Since we need to wait and RTNL mutex is held
> >> -        * drop the mutex and hold refernce to keep device
> >> -        */
> >> -       dev_hold(dev);
> >> -       rtnl_unlock();
> >> -
> >>         ret_val = wait_event_interruptible_timeout(kni->wq,
> >>                         kni_fifo_count(kni->resp_q), 3 * HZ);
> >> -       rtnl_lock();
> >> -       dev_put(dev);
> >>
> >>         if (signal_pending(current) || ret_val <= 0) {
> >>                 ret = -ETIME;
> >> @@ -152,6 +161,10 @@ kni_net_process_request(struct net_device *dev,
> struct rte_kni_request *req)
> >>
> >>  fail:
> >>         mutex_unlock(&kni->sync_lock);
> >> +       if (!req_is_dev_stop) {
> >> +               rtnl_lock();
> >> +               dev_put(dev);
> >> +       }
> >>         return ret;
> >>  }
> >>
> >> --
> >> 2.17.1
> >>
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v3
  2021-02-24 14:04       ` Igor Ryzhov
@ 2021-02-24 14:06         ` Elad Nachman
  2021-02-24 14:41           ` Igor Ryzhov
  0 siblings, 1 reply; 42+ messages in thread
From: Elad Nachman @ 2021-02-24 14:06 UTC (permalink / raw)
  To: Igor Ryzhov; +Cc: Ferruh Yigit, Stephen Hemminger, dev

I tested both link up and link down many times without any problems on
100 restarts of the application.

Having KNI deadlock frequently for real life applications is far worst, IMHO.

FYI

Elad.

On Wed, Feb 24, 2021 at 4:04 PM Igor Ryzhov <iryzhov@nfware.com> wrote:
>
> Elad,
>
> I understand your point.
> But the fact that this fix works for you doesn't mean that it will work for all DPDK users.
>
> For example, I provided two simple commands: "ip link set up" and "ip link set down".
> Your fix works for only one of them. For me, this is not a proper fix.
> It may work for you because you don't disable interfaces, but it will fail for users who do.
>
> On Wed, Feb 24, 2021 at 4:33 PM Elad Nachman <eladv6@gmail.com> wrote:
>>
>> Currently KNI has a lot of issues with deadlocks locking the code,
>> after this commit, they are gone, and the code runs properly without
>> crashing.
>> That was tested with over 100 restarts of the application, which
>> previously required a hard reset of the board.
>>
>> I think this benefit overweights the complication of the code.
>>
>> The function is called with rtnl locked because this is how the Linux
>> kernel is designed to work - it is not designed to work with deferral
>> to user-space mid-function.
>>
>> To fix all such requests you need to reach an agreement with Linux
>> netdev, which is unlikely.
>>
>> Calling user-space can be done asynchronously, as Ferruh asked, but
>> then you will always have to return success, even on failure, as Linux
>> kernel does not have a mechanism to asynchronously report on failure
>> for such system calls.
>>
>> IMHO - weighting the non-reporting of failure versus how the code
>> looks (as it functions perfectly OK), I decided to go with
>> functionality.
>>
>> FYI,
>>
>> Elad.
>>
>> On Wed, Feb 24, 2021 at 2:50 PM Igor Ryzhov <iryzhov@nfware.com> wrote:
>> >
>> > This looks more like a hack than an actual fix to me.
>> >
>> > After this commit:
>> > "ip link set up" is sent to the userspace with unlocked rtnl_lock
>> > "ip link set down" is sent to the userspace with locked rtnl_lock
>> >
>> > How is this really fixing anything? IMHO it only complicates the code.
>> > If talking with userspace under rtnl_lock is a problem, then we should fix all such requests, not only part of them.
>> > If it is not a problem, then I don't see any point in merging this.
>> >
>> > On Tue, Feb 23, 2021 at 4:45 PM Elad Nachman <eladv6@gmail.com> wrote:
>> >>
>> >> This part of the series includes my fixes for the issues reported
>> >> by Ferruh and Igor on top of part 1 of the patch series:
>> >>
>> >> A. KNI sync lock is being locked while rtnl is held.
>> >> If two threads are calling kni_net_process_request() ,
>> >> then the first one will take the sync lock, release rtnl lock then sleep.
>> >> The second thread will try to lock sync lock while holding rtnl.
>> >> The first thread will wake, and try to lock rtnl, resulting in a deadlock.
>> >> The remedy is to release rtnl before locking the KNI sync lock.
>> >> Since in between nothing is accessing Linux network-wise,
>> >> no rtnl locking is needed.
>> >>
>> >> B. There is a race condition in __dev_close_many() processing the
>> >> close_list while the application terminates.
>> >> It looks like if two vEth devices are terminating,
>> >> and one releases the rtnl lock, the other takes it,
>> >> updating the close_list in an unstable state,
>> >> causing the close_list to become a circular linked list,
>> >> hence list_for_each_entry() will endlessly loop inside
>> >> __dev_close_many() .
>> >> Since the description for the original patch indicate the
>> >> original motivation was bringing the device up,
>> >> I have changed kni_net_process_request() to hold the rtnl mutex
>> >> in case of bringing the device down since this is the path called
>> >> from __dev_close_many() , causing the corruption of the close_list.
>> >>
>> >> Signed-off-by: Elad Nachman <eladv6@gmail.com>
>> >> ---
>> >> v3:
>> >> * Include original patch and new patch as a series of patch, added a
>> >>   comment to the new patch
>> >> v2:
>> >> * rebuild the patch as increment from patch 64106
>> >> * fix comment and blank lines
>> >> ---
>> >>  kernel/linux/kni/kni_net.c | 29 +++++++++++++++++++++--------
>> >>  1 file changed, 21 insertions(+), 8 deletions(-)
>> >>
>> >> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
>> >> index f0b6e9a8d..017e44812 100644
>> >> --- a/kernel/linux/kni/kni_net.c
>> >> +++ b/kernel/linux/kni/kni_net.c
>> >> @@ -110,9 +110,26 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
>> >>         void *resp_va;
>> >>         uint32_t num;
>> >>         int ret_val;
>> >> +       int req_is_dev_stop = 0;
>> >> +
>> >> +       /* For configuring the interface to down,
>> >> +        * rtnl must be held all the way to prevent race condition
>> >> +        * inside __dev_close_many() between two netdev instances of KNI
>> >> +        */
>> >> +       if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
>> >> +                       req->if_up == 0)
>> >> +               req_is_dev_stop = 1;
>> >>
>> >>         ASSERT_RTNL();
>> >>
>> >> +       /* Since we need to wait and RTNL mutex is held
>> >> +        * drop the mutex and hold reference to keep device
>> >> +        */
>> >> +       if (!req_is_dev_stop) {
>> >> +               dev_hold(dev);
>> >> +               rtnl_unlock();
>> >> +       }
>> >> +
>> >>         mutex_lock(&kni->sync_lock);
>> >>
>> >>         /* Construct data */
>> >> @@ -124,16 +141,8 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
>> >>                 goto fail;
>> >>         }
>> >>
>> >> -       /* Since we need to wait and RTNL mutex is held
>> >> -        * drop the mutex and hold refernce to keep device
>> >> -        */
>> >> -       dev_hold(dev);
>> >> -       rtnl_unlock();
>> >> -
>> >>         ret_val = wait_event_interruptible_timeout(kni->wq,
>> >>                         kni_fifo_count(kni->resp_q), 3 * HZ);
>> >> -       rtnl_lock();
>> >> -       dev_put(dev);
>> >>
>> >>         if (signal_pending(current) || ret_val <= 0) {
>> >>                 ret = -ETIME;
>> >> @@ -152,6 +161,10 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
>> >>
>> >>  fail:
>> >>         mutex_unlock(&kni->sync_lock);
>> >> +       if (!req_is_dev_stop) {
>> >> +               rtnl_lock();
>> >> +               dev_put(dev);
>> >> +       }
>> >>         return ret;
>> >>  }
>> >>
>> >> --
>> >> 2.17.1
>> >>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v3
  2021-02-24 14:06         ` Elad Nachman
@ 2021-02-24 14:41           ` Igor Ryzhov
  2021-02-24 14:56             ` Elad Nachman
  0 siblings, 1 reply; 42+ messages in thread
From: Igor Ryzhov @ 2021-02-24 14:41 UTC (permalink / raw)
  To: Elad Nachman; +Cc: Ferruh Yigit, Stephen Hemminger, dev

Both link up and link down also work for me without this patch.
So what's the point in merging it?

Just to clarify - I am not against the idea of this patch.
Talking to userspace under rtnl_lock is a bad idea.
I just think that any patch should fix some specified problem.

If this patch is trying to solve the overall "userspace request under
rtnl_lock" problem,
then it doesn't solve it correctly, because we still send link down
requests under the lock.

If this patch is trying to solve some other issue, for example, some "KNI
deadlocks"
you're talking about, then you should explain what these deadlocks are, how
to reproduce
them and why this patch solves the issue.

On Wed, Feb 24, 2021 at 5:07 PM Elad Nachman <eladv6@gmail.com> wrote:

> I tested both link up and link down many times without any problems on
> 100 restarts of the application.
>
> Having KNI deadlock frequently for real life applications is far worst,
> IMHO.
>
> FYI
>
> Elad.
>
> On Wed, Feb 24, 2021 at 4:04 PM Igor Ryzhov <iryzhov@nfware.com> wrote:
> >
> > Elad,
> >
> > I understand your point.
> > But the fact that this fix works for you doesn't mean that it will work
> for all DPDK users.
> >
> > For example, I provided two simple commands: "ip link set up" and "ip
> link set down".
> > Your fix works for only one of them. For me, this is not a proper fix.
> > It may work for you because you don't disable interfaces, but it will
> fail for users who do.
> >
> > On Wed, Feb 24, 2021 at 4:33 PM Elad Nachman <eladv6@gmail.com> wrote:
> >>
> >> Currently KNI has a lot of issues with deadlocks locking the code,
> >> after this commit, they are gone, and the code runs properly without
> >> crashing.
> >> That was tested with over 100 restarts of the application, which
> >> previously required a hard reset of the board.
> >>
> >> I think this benefit overweights the complication of the code.
> >>
> >> The function is called with rtnl locked because this is how the Linux
> >> kernel is designed to work - it is not designed to work with deferral
> >> to user-space mid-function.
> >>
> >> To fix all such requests you need to reach an agreement with Linux
> >> netdev, which is unlikely.
> >>
> >> Calling user-space can be done asynchronously, as Ferruh asked, but
> >> then you will always have to return success, even on failure, as Linux
> >> kernel does not have a mechanism to asynchronously report on failure
> >> for such system calls.
> >>
> >> IMHO - weighting the non-reporting of failure versus how the code
> >> looks (as it functions perfectly OK), I decided to go with
> >> functionality.
> >>
> >> FYI,
> >>
> >> Elad.
> >>
> >> On Wed, Feb 24, 2021 at 2:50 PM Igor Ryzhov <iryzhov@nfware.com> wrote:
> >> >
> >> > This looks more like a hack than an actual fix to me.
> >> >
> >> > After this commit:
> >> > "ip link set up" is sent to the userspace with unlocked rtnl_lock
> >> > "ip link set down" is sent to the userspace with locked rtnl_lock
> >> >
> >> > How is this really fixing anything? IMHO it only complicates the code.
> >> > If talking with userspace under rtnl_lock is a problem, then we
> should fix all such requests, not only part of them.
> >> > If it is not a problem, then I don't see any point in merging this.
> >> >
> >> > On Tue, Feb 23, 2021 at 4:45 PM Elad Nachman <eladv6@gmail.com>
> wrote:
> >> >>
> >> >> This part of the series includes my fixes for the issues reported
> >> >> by Ferruh and Igor on top of part 1 of the patch series:
> >> >>
> >> >> A. KNI sync lock is being locked while rtnl is held.
> >> >> If two threads are calling kni_net_process_request() ,
> >> >> then the first one will take the sync lock, release rtnl lock then
> sleep.
> >> >> The second thread will try to lock sync lock while holding rtnl.
> >> >> The first thread will wake, and try to lock rtnl, resulting in a
> deadlock.
> >> >> The remedy is to release rtnl before locking the KNI sync lock.
> >> >> Since in between nothing is accessing Linux network-wise,
> >> >> no rtnl locking is needed.
> >> >>
> >> >> B. There is a race condition in __dev_close_many() processing the
> >> >> close_list while the application terminates.
> >> >> It looks like if two vEth devices are terminating,
> >> >> and one releases the rtnl lock, the other takes it,
> >> >> updating the close_list in an unstable state,
> >> >> causing the close_list to become a circular linked list,
> >> >> hence list_for_each_entry() will endlessly loop inside
> >> >> __dev_close_many() .
> >> >> Since the description for the original patch indicate the
> >> >> original motivation was bringing the device up,
> >> >> I have changed kni_net_process_request() to hold the rtnl mutex
> >> >> in case of bringing the device down since this is the path called
> >> >> from __dev_close_many() , causing the corruption of the close_list.
> >> >>
> >> >> Signed-off-by: Elad Nachman <eladv6@gmail.com>
> >> >> ---
> >> >> v3:
> >> >> * Include original patch and new patch as a series of patch, added a
> >> >>   comment to the new patch
> >> >> v2:
> >> >> * rebuild the patch as increment from patch 64106
> >> >> * fix comment and blank lines
> >> >> ---
> >> >>  kernel/linux/kni/kni_net.c | 29 +++++++++++++++++++++--------
> >> >>  1 file changed, 21 insertions(+), 8 deletions(-)
> >> >>
> >> >> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> >> >> index f0b6e9a8d..017e44812 100644
> >> >> --- a/kernel/linux/kni/kni_net.c
> >> >> +++ b/kernel/linux/kni/kni_net.c
> >> >> @@ -110,9 +110,26 @@ kni_net_process_request(struct net_device *dev,
> struct rte_kni_request *req)
> >> >>         void *resp_va;
> >> >>         uint32_t num;
> >> >>         int ret_val;
> >> >> +       int req_is_dev_stop = 0;
> >> >> +
> >> >> +       /* For configuring the interface to down,
> >> >> +        * rtnl must be held all the way to prevent race condition
> >> >> +        * inside __dev_close_many() between two netdev instances of
> KNI
> >> >> +        */
> >> >> +       if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
> >> >> +                       req->if_up == 0)
> >> >> +               req_is_dev_stop = 1;
> >> >>
> >> >>         ASSERT_RTNL();
> >> >>
> >> >> +       /* Since we need to wait and RTNL mutex is held
> >> >> +        * drop the mutex and hold reference to keep device
> >> >> +        */
> >> >> +       if (!req_is_dev_stop) {
> >> >> +               dev_hold(dev);
> >> >> +               rtnl_unlock();
> >> >> +       }
> >> >> +
> >> >>         mutex_lock(&kni->sync_lock);
> >> >>
> >> >>         /* Construct data */
> >> >> @@ -124,16 +141,8 @@ kni_net_process_request(struct net_device *dev,
> struct rte_kni_request *req)
> >> >>                 goto fail;
> >> >>         }
> >> >>
> >> >> -       /* Since we need to wait and RTNL mutex is held
> >> >> -        * drop the mutex and hold refernce to keep device
> >> >> -        */
> >> >> -       dev_hold(dev);
> >> >> -       rtnl_unlock();
> >> >> -
> >> >>         ret_val = wait_event_interruptible_timeout(kni->wq,
> >> >>                         kni_fifo_count(kni->resp_q), 3 * HZ);
> >> >> -       rtnl_lock();
> >> >> -       dev_put(dev);
> >> >>
> >> >>         if (signal_pending(current) || ret_val <= 0) {
> >> >>                 ret = -ETIME;
> >> >> @@ -152,6 +161,10 @@ kni_net_process_request(struct net_device *dev,
> struct rte_kni_request *req)
> >> >>
> >> >>  fail:
> >> >>         mutex_unlock(&kni->sync_lock);
> >> >> +       if (!req_is_dev_stop) {
> >> >> +               rtnl_lock();
> >> >> +               dev_put(dev);
> >> >> +       }
> >> >>         return ret;
> >> >>  }
> >> >>
> >> >> --
> >> >> 2.17.1
> >> >>
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v3
  2021-02-24 14:41           ` Igor Ryzhov
@ 2021-02-24 14:56             ` Elad Nachman
  2021-02-24 15:18               ` Igor Ryzhov
  0 siblings, 1 reply; 42+ messages in thread
From: Elad Nachman @ 2021-02-24 14:56 UTC (permalink / raw)
  To: Igor Ryzhov; +Cc: Ferruh Yigit, Stephen Hemminger, dev

The deadlock scenarios are explained below:

It is described in Stephen Hemminger's original patch:

"

This fixes a deadlock when using KNI with bifurcated drivers.
Bringing kni device up always times out when using Mellanox
devices.

The kernel KNI driver sends message to userspace to complete
the request. For the case of bifurcated driver, this may involve
an additional request to kernel to change state. This request
would deadlock because KNI was holding the RTNL mutex.

"

And also in my patch:

"
KNI sync lock is being locked while rtnl is held.
If two threads are calling kni_net_process_request() ,
then the first one will take the sync lock, release rtnl lock then sleep.
The second thread will try to lock sync lock while holding rtnl.
The first thread will wake, and try to lock rtnl, resulting in a deadlock.
The remedy is to release rtnl before locking the KNI sync lock.
Since in between nothing is accessing Linux network-wise,
no rtnl locking is needed.
"

FYI,

Elad.

On Wed, Feb 24, 2021 at 4:41 PM Igor Ryzhov <iryzhov@nfware.com> wrote:
>
> Both link up and link down also work for me without this patch.
> So what's the point in merging it?
>
> Just to clarify - I am not against the idea of this patch.
> Talking to userspace under rtnl_lock is a bad idea.
> I just think that any patch should fix some specified problem.
>
> If this patch is trying to solve the overall "userspace request under rtnl_lock" problem,
> then it doesn't solve it correctly, because we still send link down requests under the lock.
>
> If this patch is trying to solve some other issue, for example, some "KNI deadlocks"
> you're talking about, then you should explain what these deadlocks are, how to reproduce
> them and why this patch solves the issue.
>
> On Wed, Feb 24, 2021 at 5:07 PM Elad Nachman <eladv6@gmail.com> wrote:
>>
>> I tested both link up and link down many times without any problems on
>> 100 restarts of the application.
>>
>> Having KNI deadlock frequently for real life applications is far worst, IMHO.
>>
>> FYI
>>
>> Elad.
>>
>> On Wed, Feb 24, 2021 at 4:04 PM Igor Ryzhov <iryzhov@nfware.com> wrote:
>> >
>> > Elad,
>> >
>> > I understand your point.
>> > But the fact that this fix works for you doesn't mean that it will work for all DPDK users.
>> >
>> > For example, I provided two simple commands: "ip link set up" and "ip link set down".
>> > Your fix works for only one of them. For me, this is not a proper fix.
>> > It may work for you because you don't disable interfaces, but it will fail for users who do.
>> >
>> > On Wed, Feb 24, 2021 at 4:33 PM Elad Nachman <eladv6@gmail.com> wrote:
>> >>
>> >> Currently KNI has a lot of issues with deadlocks locking the code,
>> >> after this commit, they are gone, and the code runs properly without
>> >> crashing.
>> >> That was tested with over 100 restarts of the application, which
>> >> previously required a hard reset of the board.
>> >>
>> >> I think this benefit overweights the complication of the code.
>> >>
>> >> The function is called with rtnl locked because this is how the Linux
>> >> kernel is designed to work - it is not designed to work with deferral
>> >> to user-space mid-function.
>> >>
>> >> To fix all such requests you need to reach an agreement with Linux
>> >> netdev, which is unlikely.
>> >>
>> >> Calling user-space can be done asynchronously, as Ferruh asked, but
>> >> then you will always have to return success, even on failure, as Linux
>> >> kernel does not have a mechanism to asynchronously report on failure
>> >> for such system calls.
>> >>
>> >> IMHO - weighting the non-reporting of failure versus how the code
>> >> looks (as it functions perfectly OK), I decided to go with
>> >> functionality.
>> >>
>> >> FYI,
>> >>
>> >> Elad.
>> >>
>> >> On Wed, Feb 24, 2021 at 2:50 PM Igor Ryzhov <iryzhov@nfware.com> wrote:
>> >> >
>> >> > This looks more like a hack than an actual fix to me.
>> >> >
>> >> > After this commit:
>> >> > "ip link set up" is sent to the userspace with unlocked rtnl_lock
>> >> > "ip link set down" is sent to the userspace with locked rtnl_lock
>> >> >
>> >> > How is this really fixing anything? IMHO it only complicates the code.
>> >> > If talking with userspace under rtnl_lock is a problem, then we should fix all such requests, not only part of them.
>> >> > If it is not a problem, then I don't see any point in merging this.
>> >> >
>> >> > On Tue, Feb 23, 2021 at 4:45 PM Elad Nachman <eladv6@gmail.com> wrote:
>> >> >>
>> >> >> This part of the series includes my fixes for the issues reported
>> >> >> by Ferruh and Igor on top of part 1 of the patch series:
>> >> >>
>> >> >> A. KNI sync lock is being locked while rtnl is held.
>> >> >> If two threads are calling kni_net_process_request() ,
>> >> >> then the first one will take the sync lock, release rtnl lock then sleep.
>> >> >> The second thread will try to lock sync lock while holding rtnl.
>> >> >> The first thread will wake, and try to lock rtnl, resulting in a deadlock.
>> >> >> The remedy is to release rtnl before locking the KNI sync lock.
>> >> >> Since in between nothing is accessing Linux network-wise,
>> >> >> no rtnl locking is needed.
>> >> >>
>> >> >> B. There is a race condition in __dev_close_many() processing the
>> >> >> close_list while the application terminates.
>> >> >> It looks like if two vEth devices are terminating,
>> >> >> and one releases the rtnl lock, the other takes it,
>> >> >> updating the close_list in an unstable state,
>> >> >> causing the close_list to become a circular linked list,
>> >> >> hence list_for_each_entry() will endlessly loop inside
>> >> >> __dev_close_many() .
>> >> >> Since the description for the original patch indicate the
>> >> >> original motivation was bringing the device up,
>> >> >> I have changed kni_net_process_request() to hold the rtnl mutex
>> >> >> in case of bringing the device down since this is the path called
>> >> >> from __dev_close_many() , causing the corruption of the close_list.
>> >> >>
>> >> >> Signed-off-by: Elad Nachman <eladv6@gmail.com>
>> >> >> ---
>> >> >> v3:
>> >> >> * Include original patch and new patch as a series of patch, added a
>> >> >>   comment to the new patch
>> >> >> v2:
>> >> >> * rebuild the patch as increment from patch 64106
>> >> >> * fix comment and blank lines
>> >> >> ---
>> >> >>  kernel/linux/kni/kni_net.c | 29 +++++++++++++++++++++--------
>> >> >>  1 file changed, 21 insertions(+), 8 deletions(-)
>> >> >>
>> >> >> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
>> >> >> index f0b6e9a8d..017e44812 100644
>> >> >> --- a/kernel/linux/kni/kni_net.c
>> >> >> +++ b/kernel/linux/kni/kni_net.c
>> >> >> @@ -110,9 +110,26 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
>> >> >>         void *resp_va;
>> >> >>         uint32_t num;
>> >> >>         int ret_val;
>> >> >> +       int req_is_dev_stop = 0;
>> >> >> +
>> >> >> +       /* For configuring the interface to down,
>> >> >> +        * rtnl must be held all the way to prevent race condition
>> >> >> +        * inside __dev_close_many() between two netdev instances of KNI
>> >> >> +        */
>> >> >> +       if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
>> >> >> +                       req->if_up == 0)
>> >> >> +               req_is_dev_stop = 1;
>> >> >>
>> >> >>         ASSERT_RTNL();
>> >> >>
>> >> >> +       /* Since we need to wait and RTNL mutex is held
>> >> >> +        * drop the mutex and hold reference to keep device
>> >> >> +        */
>> >> >> +       if (!req_is_dev_stop) {
>> >> >> +               dev_hold(dev);
>> >> >> +               rtnl_unlock();
>> >> >> +       }
>> >> >> +
>> >> >>         mutex_lock(&kni->sync_lock);
>> >> >>
>> >> >>         /* Construct data */
>> >> >> @@ -124,16 +141,8 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
>> >> >>                 goto fail;
>> >> >>         }
>> >> >>
>> >> >> -       /* Since we need to wait and RTNL mutex is held
>> >> >> -        * drop the mutex and hold refernce to keep device
>> >> >> -        */
>> >> >> -       dev_hold(dev);
>> >> >> -       rtnl_unlock();
>> >> >> -
>> >> >>         ret_val = wait_event_interruptible_timeout(kni->wq,
>> >> >>                         kni_fifo_count(kni->resp_q), 3 * HZ);
>> >> >> -       rtnl_lock();
>> >> >> -       dev_put(dev);
>> >> >>
>> >> >>         if (signal_pending(current) || ret_val <= 0) {
>> >> >>                 ret = -ETIME;
>> >> >> @@ -152,6 +161,10 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
>> >> >>
>> >> >>  fail:
>> >> >>         mutex_unlock(&kni->sync_lock);
>> >> >> +       if (!req_is_dev_stop) {
>> >> >> +               rtnl_lock();
>> >> >> +               dev_put(dev);
>> >> >> +       }
>> >> >>         return ret;
>> >> >>  }
>> >> >>
>> >> >> --
>> >> >> 2.17.1
>> >> >>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v3
  2021-02-24 14:56             ` Elad Nachman
@ 2021-02-24 15:18               ` Igor Ryzhov
       [not found]                 ` <CACXF7qkhkzFc-=v=iiBzh2V7rLjk1U34VUfPbNrnYJND_0TKHQ@mail.gmail.com>
  0 siblings, 1 reply; 42+ messages in thread
From: Igor Ryzhov @ 2021-02-24 15:18 UTC (permalink / raw)
  To: Elad Nachman; +Cc: Ferruh Yigit, Stephen Hemminger, dev

Stephen's idea was to fix the deadlock when working with the bifurcated
driver.
Your rework breaks this because you still send link down requests under
rtnl_lock.
Did you test your patch with Mellanox devices?

On Wed, Feb 24, 2021 at 5:56 PM Elad Nachman <eladv6@gmail.com> wrote:

> The deadlock scenarios are explained below:
>
> It is described in Stephen Hemminger's original patch:
>
> "
>
> This fixes a deadlock when using KNI with bifurcated drivers.
> Bringing kni device up always times out when using Mellanox
> devices.
>
> The kernel KNI driver sends message to userspace to complete
> the request. For the case of bifurcated driver, this may involve
> an additional request to kernel to change state. This request
> would deadlock because KNI was holding the RTNL mutex.
>
> "
>
> And also in my patch:
>
> "
> KNI sync lock is being locked while rtnl is held.
> If two threads are calling kni_net_process_request() ,
> then the first one will take the sync lock, release rtnl lock then sleep.
> The second thread will try to lock sync lock while holding rtnl.
> The first thread will wake, and try to lock rtnl, resulting in a deadlock.
> The remedy is to release rtnl before locking the KNI sync lock.
> Since in between nothing is accessing Linux network-wise,
> no rtnl locking is needed.
> "
>
> FYI,
>
> Elad.
>
> On Wed, Feb 24, 2021 at 4:41 PM Igor Ryzhov <iryzhov@nfware.com> wrote:
> >
> > Both link up and link down also work for me without this patch.
> > So what's the point in merging it?
> >
> > Just to clarify - I am not against the idea of this patch.
> > Talking to userspace under rtnl_lock is a bad idea.
> > I just think that any patch should fix some specified problem.
> >
> > If this patch is trying to solve the overall "userspace request under
> rtnl_lock" problem,
> > then it doesn't solve it correctly, because we still send link down
> requests under the lock.
> >
> > If this patch is trying to solve some other issue, for example, some
> "KNI deadlocks"
> > you're talking about, then you should explain what these deadlocks are,
> how to reproduce
> > them and why this patch solves the issue.
> >
> > On Wed, Feb 24, 2021 at 5:07 PM Elad Nachman <eladv6@gmail.com> wrote:
> >>
> >> I tested both link up and link down many times without any problems on
> >> 100 restarts of the application.
> >>
> >> Having KNI deadlock frequently for real life applications is far worst,
> IMHO.
> >>
> >> FYI
> >>
> >> Elad.
> >>
> >> On Wed, Feb 24, 2021 at 4:04 PM Igor Ryzhov <iryzhov@nfware.com> wrote:
> >> >
> >> > Elad,
> >> >
> >> > I understand your point.
> >> > But the fact that this fix works for you doesn't mean that it will
> work for all DPDK users.
> >> >
> >> > For example, I provided two simple commands: "ip link set up" and "ip
> link set down".
> >> > Your fix works for only one of them. For me, this is not a proper fix.
> >> > It may work for you because you don't disable interfaces, but it will
> fail for users who do.
> >> >
> >> > On Wed, Feb 24, 2021 at 4:33 PM Elad Nachman <eladv6@gmail.com>
> wrote:
> >> >>
> >> >> Currently KNI has a lot of issues with deadlocks locking the code,
> >> >> after this commit, they are gone, and the code runs properly without
> >> >> crashing.
> >> >> That was tested with over 100 restarts of the application, which
> >> >> previously required a hard reset of the board.
> >> >>
> >> >> I think this benefit overweights the complication of the code.
> >> >>
> >> >> The function is called with rtnl locked because this is how the Linux
> >> >> kernel is designed to work - it is not designed to work with deferral
> >> >> to user-space mid-function.
> >> >>
> >> >> To fix all such requests you need to reach an agreement with Linux
> >> >> netdev, which is unlikely.
> >> >>
> >> >> Calling user-space can be done asynchronously, as Ferruh asked, but
> >> >> then you will always have to return success, even on failure, as
> Linux
> >> >> kernel does not have a mechanism to asynchronously report on failure
> >> >> for such system calls.
> >> >>
> >> >> IMHO - weighting the non-reporting of failure versus how the code
> >> >> looks (as it functions perfectly OK), I decided to go with
> >> >> functionality.
> >> >>
> >> >> FYI,
> >> >>
> >> >> Elad.
> >> >>
> >> >> On Wed, Feb 24, 2021 at 2:50 PM Igor Ryzhov <iryzhov@nfware.com>
> wrote:
> >> >> >
> >> >> > This looks more like a hack than an actual fix to me.
> >> >> >
> >> >> > After this commit:
> >> >> > "ip link set up" is sent to the userspace with unlocked rtnl_lock
> >> >> > "ip link set down" is sent to the userspace with locked rtnl_lock
> >> >> >
> >> >> > How is this really fixing anything? IMHO it only complicates the
> code.
> >> >> > If talking with userspace under rtnl_lock is a problem, then we
> should fix all such requests, not only part of them.
> >> >> > If it is not a problem, then I don't see any point in merging this.
> >> >> >
> >> >> > On Tue, Feb 23, 2021 at 4:45 PM Elad Nachman <eladv6@gmail.com>
> wrote:
> >> >> >>
> >> >> >> This part of the series includes my fixes for the issues reported
> >> >> >> by Ferruh and Igor on top of part 1 of the patch series:
> >> >> >>
> >> >> >> A. KNI sync lock is being locked while rtnl is held.
> >> >> >> If two threads are calling kni_net_process_request() ,
> >> >> >> then the first one will take the sync lock, release rtnl lock
> then sleep.
> >> >> >> The second thread will try to lock sync lock while holding rtnl.
> >> >> >> The first thread will wake, and try to lock rtnl, resulting in a
> deadlock.
> >> >> >> The remedy is to release rtnl before locking the KNI sync lock.
> >> >> >> Since in between nothing is accessing Linux network-wise,
> >> >> >> no rtnl locking is needed.
> >> >> >>
> >> >> >> B. There is a race condition in __dev_close_many() processing the
> >> >> >> close_list while the application terminates.
> >> >> >> It looks like if two vEth devices are terminating,
> >> >> >> and one releases the rtnl lock, the other takes it,
> >> >> >> updating the close_list in an unstable state,
> >> >> >> causing the close_list to become a circular linked list,
> >> >> >> hence list_for_each_entry() will endlessly loop inside
> >> >> >> __dev_close_many() .
> >> >> >> Since the description for the original patch indicate the
> >> >> >> original motivation was bringing the device up,
> >> >> >> I have changed kni_net_process_request() to hold the rtnl mutex
> >> >> >> in case of bringing the device down since this is the path called
> >> >> >> from __dev_close_many() , causing the corruption of the
> close_list.
> >> >> >>
> >> >> >> Signed-off-by: Elad Nachman <eladv6@gmail.com>
> >> >> >> ---
> >> >> >> v3:
> >> >> >> * Include original patch and new patch as a series of patch,
> added a
> >> >> >>   comment to the new patch
> >> >> >> v2:
> >> >> >> * rebuild the patch as increment from patch 64106
> >> >> >> * fix comment and blank lines
> >> >> >> ---
> >> >> >>  kernel/linux/kni/kni_net.c | 29 +++++++++++++++++++++--------
> >> >> >>  1 file changed, 21 insertions(+), 8 deletions(-)
> >> >> >>
> >> >> >> diff --git a/kernel/linux/kni/kni_net.c
> b/kernel/linux/kni/kni_net.c
> >> >> >> index f0b6e9a8d..017e44812 100644
> >> >> >> --- a/kernel/linux/kni/kni_net.c
> >> >> >> +++ b/kernel/linux/kni/kni_net.c
> >> >> >> @@ -110,9 +110,26 @@ kni_net_process_request(struct net_device
> *dev, struct rte_kni_request *req)
> >> >> >>         void *resp_va;
> >> >> >>         uint32_t num;
> >> >> >>         int ret_val;
> >> >> >> +       int req_is_dev_stop = 0;
> >> >> >> +
> >> >> >> +       /* For configuring the interface to down,
> >> >> >> +        * rtnl must be held all the way to prevent race condition
> >> >> >> +        * inside __dev_close_many() between two netdev instances
> of KNI
> >> >> >> +        */
> >> >> >> +       if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
> >> >> >> +                       req->if_up == 0)
> >> >> >> +               req_is_dev_stop = 1;
> >> >> >>
> >> >> >>         ASSERT_RTNL();
> >> >> >>
> >> >> >> +       /* Since we need to wait and RTNL mutex is held
> >> >> >> +        * drop the mutex and hold reference to keep device
> >> >> >> +        */
> >> >> >> +       if (!req_is_dev_stop) {
> >> >> >> +               dev_hold(dev);
> >> >> >> +               rtnl_unlock();
> >> >> >> +       }
> >> >> >> +
> >> >> >>         mutex_lock(&kni->sync_lock);
> >> >> >>
> >> >> >>         /* Construct data */
> >> >> >> @@ -124,16 +141,8 @@ kni_net_process_request(struct net_device
> *dev, struct rte_kni_request *req)
> >> >> >>                 goto fail;
> >> >> >>         }
> >> >> >>
> >> >> >> -       /* Since we need to wait and RTNL mutex is held
> >> >> >> -        * drop the mutex and hold refernce to keep device
> >> >> >> -        */
> >> >> >> -       dev_hold(dev);
> >> >> >> -       rtnl_unlock();
> >> >> >> -
> >> >> >>         ret_val = wait_event_interruptible_timeout(kni->wq,
> >> >> >>                         kni_fifo_count(kni->resp_q), 3 * HZ);
> >> >> >> -       rtnl_lock();
> >> >> >> -       dev_put(dev);
> >> >> >>
> >> >> >>         if (signal_pending(current) || ret_val <= 0) {
> >> >> >>                 ret = -ETIME;
> >> >> >> @@ -152,6 +161,10 @@ kni_net_process_request(struct net_device
> *dev, struct rte_kni_request *req)
> >> >> >>
> >> >> >>  fail:
> >> >> >>         mutex_unlock(&kni->sync_lock);
> >> >> >> +       if (!req_is_dev_stop) {
> >> >> >> +               rtnl_lock();
> >> >> >> +               dev_put(dev);
> >> >> >> +       }
> >> >> >>         return ret;
> >> >> >>  }
> >> >> >>
> >> >> >> --
> >> >> >> 2.17.1
> >> >> >>
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v3
  2021-02-24 12:49   ` Igor Ryzhov
  2021-02-24 13:33     ` Elad Nachman
@ 2021-02-24 15:54     ` Stephen Hemminger
  1 sibling, 0 replies; 42+ messages in thread
From: Stephen Hemminger @ 2021-02-24 15:54 UTC (permalink / raw)
  To: Igor Ryzhov; +Cc: Elad Nachman, Ferruh Yigit, dev

On Wed, 24 Feb 2021 15:49:49 +0300
Igor Ryzhov <iryzhov@nfware.com> wrote:

> This looks more like a hack than an actual fix to me.
> 
> After this commit:
> "ip link set up" is sent to the userspace with unlocked rtnl_lock
> "ip link set down" is sent to the userspace with locked rtnl_lock

Calling userspace with rtnl held is a recipe for disaster

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v3
       [not found]                 ` <CACXF7qkhkzFc-=v=iiBzh2V7rLjk1U34VUfPbNrnYJND_0TKHQ@mail.gmail.com>
@ 2021-02-24 16:31                   ` Igor Ryzhov
  0 siblings, 0 replies; 42+ messages in thread
From: Igor Ryzhov @ 2021-02-24 16:31 UTC (permalink / raw)
  To: Elad Nachman; +Cc: Ferruh Yigit, Stephen Hemminger, dev

Elad,

To make it work on Mellanox NIC, you need to find a way to send
ALL requests to userspace without rtnl_lock held, including link down.
As I understand, the race condition in __dev_close_many must be
solved somehow.

I can't provide remote access, but I am happy to test on Mellanox NICs,
if you find a way to fix link down requests.

On Wed, Feb 24, 2021 at 7:11 PM Elad Nachman <eladv6@gmail.com> wrote:

> Sorry, don't have Mellanox NIC currently. Will have one in 8-12 weeks.
> Will be happy to test it remotely if anyone can provide remote HW or
> VM (Azure, for example).
>
> Elad.
>
> On Wed, Feb 24, 2021 at 5:18 PM Igor Ryzhov <iryzhov@nfware.com> wrote:
> >
> > Stephen's idea was to fix the deadlock when working with the bifurcated
> driver.
> > Your rework breaks this because you still send link down requests under
> rtnl_lock.
> > Did you test your patch with Mellanox devices?
> >
> > On Wed, Feb 24, 2021 at 5:56 PM Elad Nachman <eladv6@gmail.com> wrote:
> >>
> >> The deadlock scenarios are explained below:
> >>
> >> It is described in Stephen Hemminger's original patch:
> >>
> >> "
> >>
> >> This fixes a deadlock when using KNI with bifurcated drivers.
> >> Bringing kni device up always times out when using Mellanox
> >> devices.
> >>
> >> The kernel KNI driver sends message to userspace to complete
> >> the request. For the case of bifurcated driver, this may involve
> >> an additional request to kernel to change state. This request
> >> would deadlock because KNI was holding the RTNL mutex.
> >>
> >> "
> >>
> >> And also in my patch:
> >>
> >> "
> >> KNI sync lock is being locked while rtnl is held.
> >> If two threads are calling kni_net_process_request() ,
> >> then the first one will take the sync lock, release rtnl lock then
> sleep.
> >> The second thread will try to lock sync lock while holding rtnl.
> >> The first thread will wake, and try to lock rtnl, resulting in a
> deadlock.
> >> The remedy is to release rtnl before locking the KNI sync lock.
> >> Since in between nothing is accessing Linux network-wise,
> >> no rtnl locking is needed.
> >> "
> >>
> >> FYI,
> >>
> >> Elad.
> >>
> >> On Wed, Feb 24, 2021 at 4:41 PM Igor Ryzhov <iryzhov@nfware.com> wrote:
> >> >
> >> > Both link up and link down also work for me without this patch.
> >> > So what's the point in merging it?
> >> >
> >> > Just to clarify - I am not against the idea of this patch.
> >> > Talking to userspace under rtnl_lock is a bad idea.
> >> > I just think that any patch should fix some specified problem.
> >> >
> >> > If this patch is trying to solve the overall "userspace request under
> rtnl_lock" problem,
> >> > then it doesn't solve it correctly, because we still send link down
> requests under the lock.
> >> >
> >> > If this patch is trying to solve some other issue, for example, some
> "KNI deadlocks"
> >> > you're talking about, then you should explain what these deadlocks
> are, how to reproduce
> >> > them and why this patch solves the issue.
> >> >
> >> > On Wed, Feb 24, 2021 at 5:07 PM Elad Nachman <eladv6@gmail.com>
> wrote:
> >> >>
> >> >> I tested both link up and link down many times without any problems
> on
> >> >> 100 restarts of the application.
> >> >>
> >> >> Having KNI deadlock frequently for real life applications is far
> worst, IMHO.
> >> >>
> >> >> FYI
> >> >>
> >> >> Elad.
> >> >>
> >> >> On Wed, Feb 24, 2021 at 4:04 PM Igor Ryzhov <iryzhov@nfware.com>
> wrote:
> >> >> >
> >> >> > Elad,
> >> >> >
> >> >> > I understand your point.
> >> >> > But the fact that this fix works for you doesn't mean that it will
> work for all DPDK users.
> >> >> >
> >> >> > For example, I provided two simple commands: "ip link set up" and
> "ip link set down".
> >> >> > Your fix works for only one of them. For me, this is not a proper
> fix.
> >> >> > It may work for you because you don't disable interfaces, but it
> will fail for users who do.
> >> >> >
> >> >> > On Wed, Feb 24, 2021 at 4:33 PM Elad Nachman <eladv6@gmail.com>
> wrote:
> >> >> >>
> >> >> >> Currently KNI has a lot of issues with deadlocks locking the code,
> >> >> >> after this commit, they are gone, and the code runs properly
> without
> >> >> >> crashing.
> >> >> >> That was tested with over 100 restarts of the application, which
> >> >> >> previously required a hard reset of the board.
> >> >> >>
> >> >> >> I think this benefit overweights the complication of the code.
> >> >> >>
> >> >> >> The function is called with rtnl locked because this is how the
> Linux
> >> >> >> kernel is designed to work - it is not designed to work with
> deferral
> >> >> >> to user-space mid-function.
> >> >> >>
> >> >> >> To fix all such requests you need to reach an agreement with Linux
> >> >> >> netdev, which is unlikely.
> >> >> >>
> >> >> >> Calling user-space can be done asynchronously, as Ferruh asked,
> but
> >> >> >> then you will always have to return success, even on failure, as
> Linux
> >> >> >> kernel does not have a mechanism to asynchronously report on
> failure
> >> >> >> for such system calls.
> >> >> >>
> >> >> >> IMHO - weighting the non-reporting of failure versus how the code
> >> >> >> looks (as it functions perfectly OK), I decided to go with
> >> >> >> functionality.
> >> >> >>
> >> >> >> FYI,
> >> >> >>
> >> >> >> Elad.
> >> >> >>
> >> >> >> On Wed, Feb 24, 2021 at 2:50 PM Igor Ryzhov <iryzhov@nfware.com>
> wrote:
> >> >> >> >
> >> >> >> > This looks more like a hack than an actual fix to me.
> >> >> >> >
> >> >> >> > After this commit:
> >> >> >> > "ip link set up" is sent to the userspace with unlocked
> rtnl_lock
> >> >> >> > "ip link set down" is sent to the userspace with locked
> rtnl_lock
> >> >> >> >
> >> >> >> > How is this really fixing anything? IMHO it only complicates
> the code.
> >> >> >> > If talking with userspace under rtnl_lock is a problem, then we
> should fix all such requests, not only part of them.
> >> >> >> > If it is not a problem, then I don't see any point in merging
> this.
> >> >> >> >
> >> >> >> > On Tue, Feb 23, 2021 at 4:45 PM Elad Nachman <eladv6@gmail.com>
> wrote:
> >> >> >> >>
> >> >> >> >> This part of the series includes my fixes for the issues
> reported
> >> >> >> >> by Ferruh and Igor on top of part 1 of the patch series:
> >> >> >> >>
> >> >> >> >> A. KNI sync lock is being locked while rtnl is held.
> >> >> >> >> If two threads are calling kni_net_process_request() ,
> >> >> >> >> then the first one will take the sync lock, release rtnl lock
> then sleep.
> >> >> >> >> The second thread will try to lock sync lock while holding
> rtnl.
> >> >> >> >> The first thread will wake, and try to lock rtnl, resulting in
> a deadlock.
> >> >> >> >> The remedy is to release rtnl before locking the KNI sync lock.
> >> >> >> >> Since in between nothing is accessing Linux network-wise,
> >> >> >> >> no rtnl locking is needed.
> >> >> >> >>
> >> >> >> >> B. There is a race condition in __dev_close_many() processing
> the
> >> >> >> >> close_list while the application terminates.
> >> >> >> >> It looks like if two vEth devices are terminating,
> >> >> >> >> and one releases the rtnl lock, the other takes it,
> >> >> >> >> updating the close_list in an unstable state,
> >> >> >> >> causing the close_list to become a circular linked list,
> >> >> >> >> hence list_for_each_entry() will endlessly loop inside
> >> >> >> >> __dev_close_many() .
> >> >> >> >> Since the description for the original patch indicate the
> >> >> >> >> original motivation was bringing the device up,
> >> >> >> >> I have changed kni_net_process_request() to hold the rtnl mutex
> >> >> >> >> in case of bringing the device down since this is the path
> called
> >> >> >> >> from __dev_close_many() , causing the corruption of the
> close_list.
> >> >> >> >>
> >> >> >> >> Signed-off-by: Elad Nachman <eladv6@gmail.com>
> >> >> >> >> ---
> >> >> >> >> v3:
> >> >> >> >> * Include original patch and new patch as a series of patch,
> added a
> >> >> >> >>   comment to the new patch
> >> >> >> >> v2:
> >> >> >> >> * rebuild the patch as increment from patch 64106
> >> >> >> >> * fix comment and blank lines
> >> >> >> >> ---
> >> >> >> >>  kernel/linux/kni/kni_net.c | 29 +++++++++++++++++++++--------
> >> >> >> >>  1 file changed, 21 insertions(+), 8 deletions(-)
> >> >> >> >>
> >> >> >> >> diff --git a/kernel/linux/kni/kni_net.c
> b/kernel/linux/kni/kni_net.c
> >> >> >> >> index f0b6e9a8d..017e44812 100644
> >> >> >> >> --- a/kernel/linux/kni/kni_net.c
> >> >> >> >> +++ b/kernel/linux/kni/kni_net.c
> >> >> >> >> @@ -110,9 +110,26 @@ kni_net_process_request(struct net_device
> *dev, struct rte_kni_request *req)
> >> >> >> >>         void *resp_va;
> >> >> >> >>         uint32_t num;
> >> >> >> >>         int ret_val;
> >> >> >> >> +       int req_is_dev_stop = 0;
> >> >> >> >> +
> >> >> >> >> +       /* For configuring the interface to down,
> >> >> >> >> +        * rtnl must be held all the way to prevent race
> condition
> >> >> >> >> +        * inside __dev_close_many() between two netdev
> instances of KNI
> >> >> >> >> +        */
> >> >> >> >> +       if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
> >> >> >> >> +                       req->if_up == 0)
> >> >> >> >> +               req_is_dev_stop = 1;
> >> >> >> >>
> >> >> >> >>         ASSERT_RTNL();
> >> >> >> >>
> >> >> >> >> +       /* Since we need to wait and RTNL mutex is held
> >> >> >> >> +        * drop the mutex and hold reference to keep device
> >> >> >> >> +        */
> >> >> >> >> +       if (!req_is_dev_stop) {
> >> >> >> >> +               dev_hold(dev);
> >> >> >> >> +               rtnl_unlock();
> >> >> >> >> +       }
> >> >> >> >> +
> >> >> >> >>         mutex_lock(&kni->sync_lock);
> >> >> >> >>
> >> >> >> >>         /* Construct data */
> >> >> >> >> @@ -124,16 +141,8 @@ kni_net_process_request(struct net_device
> *dev, struct rte_kni_request *req)
> >> >> >> >>                 goto fail;
> >> >> >> >>         }
> >> >> >> >>
> >> >> >> >> -       /* Since we need to wait and RTNL mutex is held
> >> >> >> >> -        * drop the mutex and hold refernce to keep device
> >> >> >> >> -        */
> >> >> >> >> -       dev_hold(dev);
> >> >> >> >> -       rtnl_unlock();
> >> >> >> >> -
> >> >> >> >>         ret_val = wait_event_interruptible_timeout(kni->wq,
> >> >> >> >>                         kni_fifo_count(kni->resp_q), 3 * HZ);
> >> >> >> >> -       rtnl_lock();
> >> >> >> >> -       dev_put(dev);
> >> >> >> >>
> >> >> >> >>         if (signal_pending(current) || ret_val <= 0) {
> >> >> >> >>                 ret = -ETIME;
> >> >> >> >> @@ -152,6 +161,10 @@ kni_net_process_request(struct net_device
> *dev, struct rte_kni_request *req)
> >> >> >> >>
> >> >> >> >>  fail:
> >> >> >> >>         mutex_unlock(&kni->sync_lock);
> >> >> >> >> +       if (!req_is_dev_stop) {
> >> >> >> >> +               rtnl_lock();
> >> >> >> >> +               dev_put(dev);
> >> >> >> >> +       }
> >> >> >> >>         return ret;
> >> >> >> >>  }
> >> >> >> >>
> >> >> >> >> --
> >> >> >> >> 2.17.1
> >> >> >> >>
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* [dpdk-dev] [PATCH 1/2] kni: fix kernel deadlock when using mlx devices
  2020-11-26 14:46 [dpdk-dev] [PATCH] kni: fix rtnl deadlocks and race conditions Elad Nachman
                   ` (3 preceding siblings ...)
  2021-02-23 13:45 ` [dpdk-dev] [PATCH 2/2] " Elad Nachman
@ 2021-02-25 14:32 ` Elad Nachman
  2021-02-25 14:32   ` [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4 Elad Nachman
  2021-03-15 17:17   ` [dpdk-dev] [PATCH 1/2] kni: fix kernel deadlock when using mlx devices Ferruh Yigit
  2021-03-29 14:36 ` [dpdk-dev] [PATCH v5 1/3] kni: refactor user request processing Ferruh Yigit
  5 siblings, 2 replies; 42+ messages in thread
From: Elad Nachman @ 2021-02-25 14:32 UTC (permalink / raw)
  To: ferruh.yigit; +Cc: iryzhov, stephen, dev, eladv6

This first part of v4 of the patch re-introduces Stephen Hemminger's
patch 64106 . This part changes the parameter kni_net_process_request()
gets and introduces the initial rtnl unlocking mechanism.

Signed-off-by: Elad Nachman <eladv6@gmail.com>
---
v4:
* for if down case, send asynchronously with rtnl locked and without
  wait, returning immediately to avoid both kernel race conditions
  and deadlock in user-space
v3:
* Include original patch and new patch as a series of patch, added a
  comment to the new patch
v2:
* rebuild the patch as increment from patch 64106
* fix comment and blank lines
---
 kernel/linux/kni/kni_net.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index 4b752083d..f0b6e9a8d 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -17,6 +17,7 @@
 #include <linux/skbuff.h>
 #include <linux/kthread.h>
 #include <linux/delay.h>
+#include <linux/rtnetlink.h>
 
 #include <rte_kni_common.h>
 #include <kni_fifo.h>
@@ -102,17 +103,15 @@ get_data_kva(struct kni_dev *kni, void *pkt_kva)
  * It can be called to process the request.
  */
 static int
-kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
+kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 {
+	struct kni_dev *kni = netdev_priv(dev);
 	int ret = -1;
 	void *resp_va;
 	uint32_t num;
 	int ret_val;
 
-	if (!kni || !req) {
-		pr_err("No kni instance or request\n");
-		return -EINVAL;
-	}
+	ASSERT_RTNL();
 
 	mutex_lock(&kni->sync_lock);
 
@@ -125,8 +124,17 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
 		goto fail;
 	}
 
+	/* Since we need to wait and RTNL mutex is held
+	 * drop the mutex and hold reference to keep device
+	 */
+	dev_hold(dev);
+	rtnl_unlock();
+
 	ret_val = wait_event_interruptible_timeout(kni->wq,
 			kni_fifo_count(kni->resp_q), 3 * HZ);
+	rtnl_lock();
+	dev_put(dev);
+
 	if (signal_pending(current) || ret_val <= 0) {
 		ret = -ETIME;
 		goto fail;
@@ -155,7 +163,6 @@ kni_net_open(struct net_device *dev)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
 
 	netif_start_queue(dev);
 	if (kni_dflt_carrier == 1)
@@ -168,7 +175,7 @@ kni_net_open(struct net_device *dev)
 
 	/* Setting if_up to non-zero means up */
 	req.if_up = 1;
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(dev, &req);
 
 	return (ret == 0) ? req.result : ret;
 }
@@ -178,7 +185,6 @@ kni_net_release(struct net_device *dev)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
 
 	netif_stop_queue(dev); /* can't transmit any more */
 	netif_carrier_off(dev);
@@ -188,7 +194,7 @@ kni_net_release(struct net_device *dev)
 
 	/* Setting if_up to 0 means down */
 	req.if_up = 0;
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(dev, &req);
 
 	return (ret == 0) ? req.result : ret;
 }
@@ -643,14 +649,13 @@ kni_net_change_mtu(struct net_device *dev, int new_mtu)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
 
 	pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
 
 	memset(&req, 0, sizeof(req));
 	req.req_id = RTE_KNI_REQ_CHANGE_MTU;
 	req.new_mtu = new_mtu;
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(dev, &req);
 	if (ret == 0 && req.result == 0)
 		dev->mtu = new_mtu;
 
@@ -661,7 +666,6 @@ static void
 kni_net_change_rx_flags(struct net_device *netdev, int flags)
 {
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(netdev);
 
 	memset(&req, 0, sizeof(req));
 
@@ -683,7 +687,7 @@ kni_net_change_rx_flags(struct net_device *netdev, int flags)
 			req.promiscusity = 0;
 	}
 
-	kni_net_process_request(kni, &req);
+	kni_net_process_request(netdev, &req);
 }
 
 /*
@@ -742,7 +746,6 @@ kni_net_set_mac(struct net_device *netdev, void *p)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni;
 	struct sockaddr *addr = p;
 
 	memset(&req, 0, sizeof(req));
@@ -754,8 +757,7 @@ kni_net_set_mac(struct net_device *netdev, void *p)
 	memcpy(req.mac_addr, addr->sa_data, netdev->addr_len);
 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
 
-	kni = netdev_priv(netdev);
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(netdev, &req);
 
 	return (ret == 0 ? req.result : ret);
 }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 42+ messages in thread

* [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4
  2021-02-25 14:32 ` [dpdk-dev] [PATCH 1/2] kni: fix kernel deadlock when using mlx devices Elad Nachman
@ 2021-02-25 14:32   ` Elad Nachman
  2021-02-25 21:01     ` Igor Ryzhov
  2021-03-15 17:17     ` Ferruh Yigit
  2021-03-15 17:17   ` [dpdk-dev] [PATCH 1/2] kni: fix kernel deadlock when using mlx devices Ferruh Yigit
  1 sibling, 2 replies; 42+ messages in thread
From: Elad Nachman @ 2021-02-25 14:32 UTC (permalink / raw)
  To: ferruh.yigit; +Cc: iryzhov, stephen, dev, eladv6

This part of the series includes my fixes for the issues reported
by Ferruh and Igor (and Igor comments for v3 of the patch)
on top of part 1 of the patch series:

A. KNI sync lock is being locked while rtnl is held.
If two threads are calling kni_net_process_request() ,
then the first one will take the sync lock, release rtnl lock then sleep.
The second thread will try to lock sync lock while holding rtnl.
The first thread will wake, and try to lock rtnl, resulting in a deadlock.
The remedy is to release rtnl before locking the KNI sync lock.
Since in between nothing is accessing Linux network-wise,
no rtnl locking is needed.

B. There is a race condition in __dev_close_many() processing the
close_list while the application terminates.
It looks like if two vEth devices are terminating,
and one releases the rtnl lock, the other takes it,
updating the close_list in an unstable state,
causing the close_list to become a circular linked list,
hence list_for_each_entry() will endlessly loop inside
__dev_close_many() .
Since the description for the original patch indicate the
original motivation was bringing the device up,
I have changed kni_net_process_request() to hold the rtnl mutex
in case of bringing the device down since this is the path called
from __dev_close_many() , causing the corruption of the close_list. 
In order to prevent deadlock in Mellanox device in this case, the
code has been modified not to wait for user-space while holding 
the rtnl lock.
Instead, after the request has been sent, all locks are relinquished
and the function exits immediately with return code of zero (success).

To summarize:
request != interface down : unlock rtnl, send request to user-space,
wait for response, send the response error code to caller in user-space.

request == interface down: send request to user-space, return immediately
with error code of 0 (success) to user-space.

Signed-off-by: Elad Nachman <eladv6@gmail.com>


---
v4:
* for if down case, send asynchronously with rtnl locked and without
  wait, returning immediately to avoid both kernel race conditions
  and deadlock in user-space
v3:
* Include original patch and new patch as a series of patch, added a
  comment to the new patch
v2:
* rebuild the patch as increment from patch 64106
* fix comment and blank lines
---
 kernel/linux/kni/kni_net.c      | 41 +++++++++++++++++++++++++++------
 lib/librte_kni/rte_kni.c        |  7 ++++--
 lib/librte_kni/rte_kni_common.h |  1 +
 3 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index f0b6e9a8d..ba991802b 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -110,12 +110,34 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 	void *resp_va;
 	uint32_t num;
 	int ret_val;
+	int req_is_dev_stop = 0;
+
+	/* For configuring the interface to down,
+	 * rtnl must be held all the way to prevent race condition
+	 * inside __dev_close_many() between two netdev instances of KNI
+	 */
+	if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
+			req->if_up == 0)
+		req_is_dev_stop = 1;
 
 	ASSERT_RTNL();
 
+	/* Since we need to wait and RTNL mutex is held
+	 * drop the mutex and hold reference to keep device
+	 */
+	if (!req_is_dev_stop) {
+		dev_hold(dev);
+		rtnl_unlock();
+	}
+
 	mutex_lock(&kni->sync_lock);
 
-	/* Construct data */
+	/* Construct data, for dev stop send asynchronously
+	 * so instruct user-space not to sent response as no
+	 * one will be waiting for it.
+	 */
+	if (req_is_dev_stop)
+		req->skip_post_resp_to_q = 1;
 	memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
 	num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
 	if (num < 1) {
@@ -124,16 +146,16 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 		goto fail;
 	}
 
-	/* Since we need to wait and RTNL mutex is held
-	 * drop the mutex and hold refernce to keep device
+	/* No result available since request is handled
+	 * asynchronously. set response to success.
 	 */
-	dev_hold(dev);
-	rtnl_unlock();
+	if (req_is_dev_stop) {
+		req->result = 0;
+		goto async;
+	}
 
 	ret_val = wait_event_interruptible_timeout(kni->wq,
 			kni_fifo_count(kni->resp_q), 3 * HZ);
-	rtnl_lock();
-	dev_put(dev);
 
 	if (signal_pending(current) || ret_val <= 0) {
 		ret = -ETIME;
@@ -148,10 +170,15 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 	}
 
 	memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
+async:
 	ret = 0;
 
 fail:
 	mutex_unlock(&kni->sync_lock);
+	if (!req_is_dev_stop) {
+		rtnl_lock();
+		dev_put(dev);
+	}
 	return ret;
 }
 
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 837d0217d..6d777266d 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -591,8 +591,11 @@ rte_kni_handle_request(struct rte_kni *kni)
 		break;
 	}
 
-	/* Construct response mbuf and put it back to resp_q */
-	ret = kni_fifo_put(kni->resp_q, (void **)&req, 1);
+	/* if needed, construct response mbuf and put it back to resp_q */
+	if (!req->skip_post_resp_to_q)
+		ret = kni_fifo_put(kni->resp_q, (void **)&req, 1);
+	else
+		ret = 1;
 	if (ret != 1) {
 		RTE_LOG(ERR, KNI, "Fail to put the muf back to resp_q\n");
 		return -1; /* It is an error of can't putting the mbuf back */
diff --git a/lib/librte_kni/rte_kni_common.h b/lib/librte_kni/rte_kni_common.h
index ffb318273..3b5d06850 100644
--- a/lib/librte_kni/rte_kni_common.h
+++ b/lib/librte_kni/rte_kni_common.h
@@ -48,6 +48,7 @@ struct rte_kni_request {
 		uint8_t promiscusity;/**< 1: promisc mode enable, 0: disable */
 		uint8_t allmulti;    /**< 1: all-multicast mode enable, 0: disable */
 	};
+	int32_t skip_post_resp_to_q; /**< 1: skip queue response 0: disable */
 	int32_t result;               /**< Result for processing request */
 } __attribute__((__packed__));
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4
  2021-02-25 14:32   ` [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4 Elad Nachman
@ 2021-02-25 21:01     ` Igor Ryzhov
  2021-02-26 15:48       ` Stephen Hemminger
  2021-03-15 17:17     ` Ferruh Yigit
  1 sibling, 1 reply; 42+ messages in thread
From: Igor Ryzhov @ 2021-02-25 21:01 UTC (permalink / raw)
  To: Elad Nachman; +Cc: Ferruh Yigit, Stephen Hemminger, dev

Hi Elad,

Thanks for the patch, but this is still NACK from me.

The only real advantage of KNI over other exceptional-path techniques
like virtio-user is the ability to configure DPDK-managed interfaces
directly
from the kernel using well-known utils like iproute2. A very important part
of this is getting responses from the DPDK app and knowing the actual
result of command execution.
If you're making async requests to the application and you don't know
the result, then what's the point of using KNI at all?

Igor

On Thu, Feb 25, 2021 at 5:32 PM Elad Nachman <eladv6@gmail.com> wrote:

> This part of the series includes my fixes for the issues reported
> by Ferruh and Igor (and Igor comments for v3 of the patch)
> on top of part 1 of the patch series:
>
> A. KNI sync lock is being locked while rtnl is held.
> If two threads are calling kni_net_process_request() ,
> then the first one will take the sync lock, release rtnl lock then sleep.
> The second thread will try to lock sync lock while holding rtnl.
> The first thread will wake, and try to lock rtnl, resulting in a deadlock.
> The remedy is to release rtnl before locking the KNI sync lock.
> Since in between nothing is accessing Linux network-wise,
> no rtnl locking is needed.
>
> B. There is a race condition in __dev_close_many() processing the
> close_list while the application terminates.
> It looks like if two vEth devices are terminating,
> and one releases the rtnl lock, the other takes it,
> updating the close_list in an unstable state,
> causing the close_list to become a circular linked list,
> hence list_for_each_entry() will endlessly loop inside
> __dev_close_many() .
> Since the description for the original patch indicate the
> original motivation was bringing the device up,
> I have changed kni_net_process_request() to hold the rtnl mutex
> in case of bringing the device down since this is the path called
> from __dev_close_many() , causing the corruption of the close_list.
> In order to prevent deadlock in Mellanox device in this case, the
> code has been modified not to wait for user-space while holding
> the rtnl lock.
> Instead, after the request has been sent, all locks are relinquished
> and the function exits immediately with return code of zero (success).
>
> To summarize:
> request != interface down : unlock rtnl, send request to user-space,
> wait for response, send the response error code to caller in user-space.
>
> request == interface down: send request to user-space, return immediately
> with error code of 0 (success) to user-space.
>
> Signed-off-by: Elad Nachman <eladv6@gmail.com>
>
>
> ---
> v4:
> * for if down case, send asynchronously with rtnl locked and without
>   wait, returning immediately to avoid both kernel race conditions
>   and deadlock in user-space
> v3:
> * Include original patch and new patch as a series of patch, added a
>   comment to the new patch
> v2:
> * rebuild the patch as increment from patch 64106
> * fix comment and blank lines
> ---
>  kernel/linux/kni/kni_net.c      | 41 +++++++++++++++++++++++++++------
>  lib/librte_kni/rte_kni.c        |  7 ++++--
>  lib/librte_kni/rte_kni_common.h |  1 +
>  3 files changed, 40 insertions(+), 9 deletions(-)
>
> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> index f0b6e9a8d..ba991802b 100644
> --- a/kernel/linux/kni/kni_net.c
> +++ b/kernel/linux/kni/kni_net.c
> @@ -110,12 +110,34 @@ kni_net_process_request(struct net_device *dev,
> struct rte_kni_request *req)
>         void *resp_va;
>         uint32_t num;
>         int ret_val;
> +       int req_is_dev_stop = 0;
> +
> +       /* For configuring the interface to down,
> +        * rtnl must be held all the way to prevent race condition
> +        * inside __dev_close_many() between two netdev instances of KNI
> +        */
> +       if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
> +                       req->if_up == 0)
> +               req_is_dev_stop = 1;
>
>         ASSERT_RTNL();
>
> +       /* Since we need to wait and RTNL mutex is held
> +        * drop the mutex and hold reference to keep device
> +        */
> +       if (!req_is_dev_stop) {
> +               dev_hold(dev);
> +               rtnl_unlock();
> +       }
> +
>         mutex_lock(&kni->sync_lock);
>
> -       /* Construct data */
> +       /* Construct data, for dev stop send asynchronously
> +        * so instruct user-space not to sent response as no
> +        * one will be waiting for it.
> +        */
> +       if (req_is_dev_stop)
> +               req->skip_post_resp_to_q = 1;
>         memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
>         num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
>         if (num < 1) {
> @@ -124,16 +146,16 @@ kni_net_process_request(struct net_device *dev,
> struct rte_kni_request *req)
>                 goto fail;
>         }
>
> -       /* Since we need to wait and RTNL mutex is held
> -        * drop the mutex and hold refernce to keep device
> +       /* No result available since request is handled
> +        * asynchronously. set response to success.
>          */
> -       dev_hold(dev);
> -       rtnl_unlock();
> +       if (req_is_dev_stop) {
> +               req->result = 0;
> +               goto async;
> +       }
>
>         ret_val = wait_event_interruptible_timeout(kni->wq,
>                         kni_fifo_count(kni->resp_q), 3 * HZ);
> -       rtnl_lock();
> -       dev_put(dev);
>
>         if (signal_pending(current) || ret_val <= 0) {
>                 ret = -ETIME;
> @@ -148,10 +170,15 @@ kni_net_process_request(struct net_device *dev,
> struct rte_kni_request *req)
>         }
>
>         memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
> +async:
>         ret = 0;
>
>  fail:
>         mutex_unlock(&kni->sync_lock);
> +       if (!req_is_dev_stop) {
> +               rtnl_lock();
> +               dev_put(dev);
> +       }
>         return ret;
>  }
>
> diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
> index 837d0217d..6d777266d 100644
> --- a/lib/librte_kni/rte_kni.c
> +++ b/lib/librte_kni/rte_kni.c
> @@ -591,8 +591,11 @@ rte_kni_handle_request(struct rte_kni *kni)
>                 break;
>         }
>
> -       /* Construct response mbuf and put it back to resp_q */
> -       ret = kni_fifo_put(kni->resp_q, (void **)&req, 1);
> +       /* if needed, construct response mbuf and put it back to resp_q */
> +       if (!req->skip_post_resp_to_q)
> +               ret = kni_fifo_put(kni->resp_q, (void **)&req, 1);
> +       else
> +               ret = 1;
>         if (ret != 1) {
>                 RTE_LOG(ERR, KNI, "Fail to put the muf back to resp_q\n");
>                 return -1; /* It is an error of can't putting the mbuf
> back */
> diff --git a/lib/librte_kni/rte_kni_common.h
> b/lib/librte_kni/rte_kni_common.h
> index ffb318273..3b5d06850 100644
> --- a/lib/librte_kni/rte_kni_common.h
> +++ b/lib/librte_kni/rte_kni_common.h
> @@ -48,6 +48,7 @@ struct rte_kni_request {
>                 uint8_t promiscusity;/**< 1: promisc mode enable, 0:
> disable */
>                 uint8_t allmulti;    /**< 1: all-multicast mode enable, 0:
> disable */
>         };
> +       int32_t skip_post_resp_to_q; /**< 1: skip queue response 0:
> disable */
>         int32_t result;               /**< Result for processing request */
>  } __attribute__((__packed__));
>
> --
> 2.17.1
>
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4
  2021-02-25 21:01     ` Igor Ryzhov
@ 2021-02-26 15:48       ` Stephen Hemminger
  2021-02-26 17:43         ` Elad Nachman
  0 siblings, 1 reply; 42+ messages in thread
From: Stephen Hemminger @ 2021-02-26 15:48 UTC (permalink / raw)
  To: Igor Ryzhov; +Cc: Elad Nachman, Ferruh Yigit, dev

On Fri, 26 Feb 2021 00:01:01 +0300
Igor Ryzhov <iryzhov@nfware.com> wrote:

> Hi Elad,
> 
> Thanks for the patch, but this is still NACK from me.
> 
> The only real advantage of KNI over other exceptional-path techniques
> like virtio-user is the ability to configure DPDK-managed interfaces
> directly
> from the kernel using well-known utils like iproute2. A very important part
> of this is getting responses from the DPDK app and knowing the actual
> result of command execution.
> If you're making async requests to the application and you don't know
> the result, then what's the point of using KNI at all?
> 
> Igor

Do you have a better proposal that keeps the request result but does not
call userspace with lock held.

PS: I also have strong dislike of KNI, as designed it would have been rejected
by Linux kernel developers.  A better solution would be userspace version of
something like devlink devices. But doing control operations by proxy is
a locking nightmare.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4
  2021-02-26 15:48       ` Stephen Hemminger
@ 2021-02-26 17:43         ` Elad Nachman
  2021-03-01  8:10           ` Igor Ryzhov
  0 siblings, 1 reply; 42+ messages in thread
From: Elad Nachman @ 2021-02-26 17:43 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Igor Ryzhov, Ferruh Yigit, dev

The way the kernel handles its locks and lists for the dev close many
path, there is no way you can go around this with rtnl unlocked :
"

There is a race condition in __dev_close_many() processing the
close_list while the application terminates.
It looks like if two vEth devices are terminating,
and one releases the rtnl lock, the other takes it,
updating the close_list in an unstable state,
causing the close_list to become a circular linked list,
hence list_for_each_entry() will endlessly loop inside
__dev_close_many() .

"
And I don't expect David Miller will bend the kernel networking for DPDK or KNI.

But - Stephen - if you can personally convince David to accept a
kernel patch which will separate the close_list locking mechanism to a
separate (RCU?) lock, then I can introduce first a patch to the kernel
which will add a lock for the close_list, this way rtnl can be
unlocked for the if down case.

After that kernel patch, your original patch + relocation of the sync
mutex locking will do the job .

Otherwise, rtnl has to be kept locked all of the way for the if down
case in order to prevent corruption causing a circular linked list out
of the close_list, causing a hang in the kernel.

Currently, the rtnl lock is the only thing keeping the close_list from
corruption.

If you doubt rtnl cannot be unlocked for dev close path, you can
consult David for his opinion, as I think it is critical to understand
what the kernel can or cannot do, or expects to be done before we can
unlock its locks as we wish inside rte_kni.ko .

Otherwise, if we are still in disagreement on how to patch this set of
problems, I think the responsible way around it is to completely
remove kni from the main dpdk tree and move it to dpdk-kmods
repository.

I know BSD style open-source does not carry legal responsibility from
the developers, but I think when a bunch of developers know a piece of
code is highly buggy, they should not leave it for countless new users
to bounce their head desperately against, if they cannot agree on a
correct way to solve the bunch of problems, of which I think we all
agree exist (we just do not agree on the proper solution or patch)...

That's my two cents,

Elad.

On Fri, Feb 26, 2021 at 5:49 PM Stephen Hemminger
<stephen@networkplumber.org> wrote:
>
> On Fri, 26 Feb 2021 00:01:01 +0300
> Igor Ryzhov <iryzhov@nfware.com> wrote:
>
> > Hi Elad,
> >
> > Thanks for the patch, but this is still NACK from me.
> >
> > The only real advantage of KNI over other exceptional-path techniques
> > like virtio-user is the ability to configure DPDK-managed interfaces
> > directly
> > from the kernel using well-known utils like iproute2. A very important part
> > of this is getting responses from the DPDK app and knowing the actual
> > result of command execution.
> > If you're making async requests to the application and you don't know
> > the result, then what's the point of using KNI at all?
> >
> > Igor
>
> Do you have a better proposal that keeps the request result but does not
> call userspace with lock held.
>
> PS: I also have strong dislike of KNI, as designed it would have been rejected
> by Linux kernel developers.  A better solution would be userspace version of
> something like devlink devices. But doing control operations by proxy is
> a locking nightmare.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4
  2021-02-26 17:43         ` Elad Nachman
@ 2021-03-01  8:10           ` Igor Ryzhov
  2021-03-01 16:38             ` Stephen Hemminger
  2021-03-01 20:27             ` Dan Gora
  0 siblings, 2 replies; 42+ messages in thread
From: Igor Ryzhov @ 2021-03-01  8:10 UTC (permalink / raw)
  To: Elad Nachman; +Cc: Stephen Hemminger, Ferruh Yigit, dev

Stephen,

No, I don't have a better proposal, but I think it is not correct to change
the behavior of KNI (making link down without a real response).
Even though we know that communicating with userspace under rtnl_lock is a
bad idea, it works as it is for many years already.

Elad,

I agree with you that KNI should be removed from the main tree if it is not
possible to fix this __dev_close_many issue.
There were discussions about this multiple times already, but no one is
working on this AFAIK.
Last time the discussion was a month ago:
https://www.mail-archive.com/dev@dpdk.org/msg196033.html

Igor

On Fri, Feb 26, 2021 at 8:43 PM Elad Nachman <eladv6@gmail.com> wrote:

> The way the kernel handles its locks and lists for the dev close many
> path, there is no way you can go around this with rtnl unlocked :
> "
>
> There is a race condition in __dev_close_many() processing the
> close_list while the application terminates.
> It looks like if two vEth devices are terminating,
> and one releases the rtnl lock, the other takes it,
> updating the close_list in an unstable state,
> causing the close_list to become a circular linked list,
> hence list_for_each_entry() will endlessly loop inside
> __dev_close_many() .
>
> "
> And I don't expect David Miller will bend the kernel networking for DPDK
> or KNI.
>
> But - Stephen - if you can personally convince David to accept a
> kernel patch which will separate the close_list locking mechanism to a
> separate (RCU?) lock, then I can introduce first a patch to the kernel
> which will add a lock for the close_list, this way rtnl can be
> unlocked for the if down case.
>
> After that kernel patch, your original patch + relocation of the sync
> mutex locking will do the job .
>
> Otherwise, rtnl has to be kept locked all of the way for the if down
> case in order to prevent corruption causing a circular linked list out
> of the close_list, causing a hang in the kernel.
>
> Currently, the rtnl lock is the only thing keeping the close_list from
> corruption.
>
> If you doubt rtnl cannot be unlocked for dev close path, you can
> consult David for his opinion, as I think it is critical to understand
> what the kernel can or cannot do, or expects to be done before we can
> unlock its locks as we wish inside rte_kni.ko .
>
> Otherwise, if we are still in disagreement on how to patch this set of
> problems, I think the responsible way around it is to completely
> remove kni from the main dpdk tree and move it to dpdk-kmods
> repository.
>
> I know BSD style open-source does not carry legal responsibility from
> the developers, but I think when a bunch of developers know a piece of
> code is highly buggy, they should not leave it for countless new users
> to bounce their head desperately against, if they cannot agree on a
> correct way to solve the bunch of problems, of which I think we all
> agree exist (we just do not agree on the proper solution or patch)...
>
> That's my two cents,
>
> Elad.
>
> On Fri, Feb 26, 2021 at 5:49 PM Stephen Hemminger
> <stephen@networkplumber.org> wrote:
> >
> > On Fri, 26 Feb 2021 00:01:01 +0300
> > Igor Ryzhov <iryzhov@nfware.com> wrote:
> >
> > > Hi Elad,
> > >
> > > Thanks for the patch, but this is still NACK from me.
> > >
> > > The only real advantage of KNI over other exceptional-path techniques
> > > like virtio-user is the ability to configure DPDK-managed interfaces
> > > directly
> > > from the kernel using well-known utils like iproute2. A very important
> part
> > > of this is getting responses from the DPDK app and knowing the actual
> > > result of command execution.
> > > If you're making async requests to the application and you don't know
> > > the result, then what's the point of using KNI at all?
> > >
> > > Igor
> >
> > Do you have a better proposal that keeps the request result but does not
> > call userspace with lock held.
> >
> > PS: I also have strong dislike of KNI, as designed it would have been
> rejected
> > by Linux kernel developers.  A better solution would be userspace
> version of
> > something like devlink devices. But doing control operations by proxy is
> > a locking nightmare.
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4
  2021-03-01  8:10           ` Igor Ryzhov
@ 2021-03-01 16:38             ` Stephen Hemminger
  2021-03-15 16:58               ` Ferruh Yigit
  2021-03-01 20:27             ` Dan Gora
  1 sibling, 1 reply; 42+ messages in thread
From: Stephen Hemminger @ 2021-03-01 16:38 UTC (permalink / raw)
  To: Igor Ryzhov; +Cc: Elad Nachman, Ferruh Yigit, dev

On Mon, 1 Mar 2021 11:10:01 +0300
Igor Ryzhov <iryzhov@nfware.com> wrote:

> Stephen,
> 
> No, I don't have a better proposal, but I think it is not correct to change
> the behavior of KNI (making link down without a real response).
> Even though we know that communicating with userspace under rtnl_lock is a
> bad idea, it works as it is for many years already.
> 
> Elad,
> 
> I agree with you that KNI should be removed from the main tree if it is not
> possible to fix this __dev_close_many issue.
> There were discussions about this multiple times already, but no one is
> working on this AFAIK.
> Last time the discussion was a month ago:
> https://www.mail-archive.com/dev@dpdk.org/msg196033.html
> 
> Igor

The better proposal would be to make DPDK virtio smarter.
There already is virtio devices that must handle this (VDPA) etc.
And when you can control link through virtio, then put a big warning
in KNI that says "Don't use this"

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4
  2021-03-01  8:10           ` Igor Ryzhov
  2021-03-01 16:38             ` Stephen Hemminger
@ 2021-03-01 20:27             ` Dan Gora
  2021-03-01 21:26               ` Dan Gora
  1 sibling, 1 reply; 42+ messages in thread
From: Dan Gora @ 2021-03-01 20:27 UTC (permalink / raw)
  To: Igor Ryzhov; +Cc: Elad Nachman, Stephen Hemminger, Ferruh Yigit, dev

Hi All,

Sorry to butt in on this, but I fixed this same issue about 3 years
ago in my application, but I was never able to get the changes
integrated and eventually just gave up trying.

The rule with KNI is:
1) The app should have a separate control thread per rte_kni which
just spins calling rte_kni_handle_request().  This ensures that other
threads calling rte_kni_XXX functions will always get a response.

2) In order to deal with lockups and timeouts when closing the device, I sent
patches which separated the closing process into two steps:
rte_kni_release() which would unregister the underlying netdev, then
rte_kni_free() which would free the KNI portions of the KNI device.
When rte_kni_release() is called the kernel netdev is unregistered and
a response is sent back to the application, the control thread calling
rte_kni_handle_request() is still running, so the application will
still get a response back from the kernel and not lock up, the
application then kills the control thread so that
rte_kni_handle_request() is not called again, then the application
calls rte_kni_free() which frees all of the FIFOs and closes the
device.

If anyone is interested the patches are probably still floating around
patchwork.  If not you can check them out here:

https://github.com/danielgora/dpdk.git

thanks-
dan

On Mon, Mar 1, 2021 at 5:10 AM Igor Ryzhov <iryzhov@nfware.com> wrote:
>
> Stephen,
>
> No, I don't have a better proposal, but I think it is not correct to change
> the behavior of KNI (making link down without a real response).
> Even though we know that communicating with userspace under rtnl_lock is a
> bad idea, it works as it is for many years already.
>
> Elad,
>
> I agree with you that KNI should be removed from the main tree if it is not
> possible to fix this __dev_close_many issue.
> There were discussions about this multiple times already, but no one is
> working on this AFAIK.
> Last time the discussion was a month ago:
> https://www.mail-archive.com/dev@dpdk.org/msg196033.html
>
> Igor
>
> On Fri, Feb 26, 2021 at 8:43 PM Elad Nachman <eladv6@gmail.com> wrote:
>
> > The way the kernel handles its locks and lists for the dev close many
> > path, there is no way you can go around this with rtnl unlocked :
> > "
> >
> > There is a race condition in __dev_close_many() processing the
> > close_list while the application terminates.
> > It looks like if two vEth devices are terminating,
> > and one releases the rtnl lock, the other takes it,
> > updating the close_list in an unstable state,
> > causing the close_list to become a circular linked list,
> > hence list_for_each_entry() will endlessly loop inside
> > __dev_close_many() .
> >
> > "
> > And I don't expect David Miller will bend the kernel networking for DPDK
> > or KNI.
> >
> > But - Stephen - if you can personally convince David to accept a
> > kernel patch which will separate the close_list locking mechanism to a
> > separate (RCU?) lock, then I can introduce first a patch to the kernel
> > which will add a lock for the close_list, this way rtnl can be
> > unlocked for the if down case.
> >
> > After that kernel patch, your original patch + relocation of the sync
> > mutex locking will do the job .
> >lockups
> > Otherwise, rtnl has to be kept locked all of the way for the if down
> > case in order to prevent corruption causing a circular linked list out
> > of the close_list, causing a hang in the kernel.
> >lockups
> > Currently, the rtnl lock is the only thing keeping the close_list from
> > corruption.
> >
> > If you doubt rtnl cannot be unlocked for dev close path, you can
> > consult David for his opinion, as I think it is critical to understand
> > what the kernel can or cannot do, or expects to be done before we can
> > unlock its locks as we wish inside rte_kni.ko .
> >
> > Otherwise, if we are still in disagreement on how to patch this set of
> > problems, I think the responsible way around it is to completely
> > remove kni from the main dpdk tree and move it to dpdk-kmods
> > repository.
> >
> > I know BSD style open-source does not carry legal responsibility from
> > the developers, but I think when a bunch of developers know a piece of
> > code is highly buggy, they should not leave it for countless new users
> > to bounce their head desperately against, if they cannot agree on a
> > correct way to solve the bunch of problems, of which I think we all
> > agree exist (we just do not agree on the proper solution or patch)...
> >
> > That's my two cents,
> >
> > Elad.
> >
> > On Fri, Feb 26, 2021 at 5:49 PM Stephen Hemminger
> > <stephen@networkplumber.org> wrote:
> > >
> > > On Fri, 26 Feb 2021 00:01:01 +0300
> > > Igor Ryzhov <iryzhov@nfware.com> wrote:
> > >
> > > > Hi Elad,
> > > >
> > > > Thanks for the patch, but this is still NACK from me.
> > > >
> > > > The only real advantage of KNI over other exceptional-path techniques
> > > > like virtio-user is the ability to configure DPDK-managed interfaces
> > > > directly
> > > > from the kernel using well-known utils like iproute2. A very important
> > part
> > > > of this is getting responses from the DPDK app and knowing the actual
> > > > result of command execution.
> > > > If you're making async requests to the application and you don't know
> > > > the result, then what's the point of using KNI at all?
> > > >
> > > > Igor
> > >
> > > Do you have a better proposal that keeps the request result but does not
> > > call userspace with lock held.
> > >
> > > PS: I also have strong dislike of KNI, as designed it would have been
> > rejected
> > > by Linux kernel developers.  A better solution would be userspace
> > version of
> > > something like devlink devices. But doing control operations by proxy is
> > > a locking nightmare.
> >

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4
  2021-03-01 20:27             ` Dan Gora
@ 2021-03-01 21:26               ` Dan Gora
  2021-03-02 16:44                 ` Elad Nachman
  0 siblings, 1 reply; 42+ messages in thread
From: Dan Gora @ 2021-03-01 21:26 UTC (permalink / raw)
  To: Igor Ryzhov; +Cc: Elad Nachman, Stephen Hemminger, Ferruh Yigit, dev

This is from my git commit fixing this:

kni: separate releasing netdev from freeing KNI interface

    Currently the rte_kni kernel driver suffers from a problem where
    when the interface is released, it generates a callback to the DPDK
    application to change the interface state to Down.  However, after the
    DPDK application handles the callback and generates a response back to
    the kernel, the rte_kni driver cannot wake the thread which is asleep
    waiting for the response, because it is holding the kni_link_lock
    semaphore and it has already removed the 'struct kni_dev' from the
    list of interfaces to poll for responses.

    This means that if the KNI interface is in the Up state when
    rte_kni_release() is called, it will always sleep for three seconds
    until kni_net_release gives up waiting for a response from the DPDK
    application.

    To fix this, we must separate the step to release the kernel network
    interface from the steps to remove the KNI interface from the list
    of interfaces to poll.

    When the kernel network interface is removed with unregister_netdev(),
    if the interface is up, it will generate a callback to mark the
    interface down, which calls kni_net_release().  kni_net_release() will
    block waiting for the DPDK application to call rte_kni_handle_request()
    to handle the callback, but it also needs the thread in the KNI driver
    (either the per-dev thread for multi-thread or the per-driver thread)
    to call kni_net_poll_resp() in order to wake the thread sleeping in
    kni_net_release (actually kni_net_process_request()).

    So now, KNI interfaces should be removed as such:

    1) The user calls rte_kni_release().  This only unregisters the
    netdev in the kernel, but touches nothing else.  This allows all the
    threads to run which are necessary to handle the callback into the
    DPDK application to mark the interface down.

    2) The user stops the thread running rte_kni_handle_request().
    After rte_kni_release() has been called, there will be no more
    callbacks for that interface so it is not necessary.  It cannot be
    running at the same time that rte_kni_free() frees all of the FIFOs
    and DPDK memory for that KNI interface.

    3) The user calls rte_kni_free().  This performs the RTE_KNI_IOCTL_FREE
    ioctl which calls kni_ioctl_free().  This function removes the struct
    kni_dev from the list of interfaces to poll (and kills the per-dev
    kthread, if configured for multi-thread), then frees the memory in
    the FIFOs.

    Signed-off-by: Dan Gora <dg@adax.com>

I'm not sure that this is exactly the problem that you're seeing, but
it sounds like it to me.

thanks
dan

On Mon, Mar 1, 2021 at 5:27 PM Dan Gora <dg@adax.com> wrote:
>
> Hi All,
>
> Sorry to butt in on this, but I fixed this same issue about 3 years
> ago in my application, but I was never able to get the changes
> integrated and eventually just gave up trying.
>
> The rule with KNI is:
> 1) The app should have a separate control thread per rte_kni which
> just spins calling rte_kni_handle_request().  This ensures that other
> threads calling rte_kni_XXX functions will always get a response.
>
> 2) In order to deal with lockups and timeouts when closing the device, I sent
> patches which separated the closing process into two steps:
> rte_kni_release() which would unregister the underlying netdev, then
> rte_kni_free() which would free the KNI portions of the KNI device.
> When rte_kni_release() is called the kernel netdev is unregistered and
> a response is sent back to the application, the control thread calling
> rte_kni_handle_request() is still running, so the application will
> still get a response back from the kernel and not lock up, the
> application then kills the control thread so that
> rte_kni_handle_request() is not called again, then the application
> calls rte_kni_free() which frees all of the FIFOs and closes the
> device.
>
> If anyone is interested the patches are probably still floating around
> patchwork.  If not you can check them out here:
>
> https://github.com/danielgora/dpdk.git
>
> thanks-
> dan
>
> On Mon, Mar 1, 2021 at 5:10 AM Igor Ryzhov <iryzhov@nfware.com> wrote:
> >
> > Stephen,
> >
> > No, I don't have a better proposal, but I think it is not correct to change
> > the behavior of KNI (making link down without a real response).
> > Even though we know that communicating with userspace under rtnl_lock is a
> > bad idea, it works as it is for many years already.
> >
> > Elad,
> >
> > I agree with you that KNI should be removed from the main tree if it is not
> > possible to fix this __dev_close_many issue.
> > There were discussions about this multiple times already, but no one is
> > working on this AFAIK.
> > Last time the discussion was a month ago:
> > https://www.mail-archive.com/dev@dpdk.org/msg196033.html
> >
> > Igor
> >
> > On Fri, Feb 26, 2021 at 8:43 PM Elad Nachman <eladv6@gmail.com> wrote:
> >
> > > The way the kernel handles its locks and lists for the dev close many
> > > path, there is no way you can go around this with rtnl unlocked :
> > > "
> > >
> > > There is a race condition in __dev_close_many() processing the
> > > close_list while the application terminates.
> > > It looks like if two vEth devices are terminating,
> > > and one releases the rtnl lock, the other takes it,
> > > updating the close_list in an unstable state,
> > > causing the close_list to become a circular linked list,
> > > hence list_for_each_entry() will endlessly loop inside
> > > __dev_close_many() .
> > >
> > > "
> > > And I don't expect David Miller will bend the kernel networking for DPDK
> > > or KNI.
> > >
> > > But - Stephen - if you can personally convince David to accept a
> > > kernel patch which will separate the close_list locking mechanism to a
> > > separate (RCU?) lock, then I can introduce first a patch to the kernel
> > > which will add a lock for the close_list, this way rtnl can be
> > > unlocked for the if down case.
> > >
> > > After that kernel patch, your original patch + relocation of the sync
> > > mutex locking will do the job .
> > >lockups
> > > Otherwise, rtnl has to be kept locked all of the way for the if down
> > > case in order to prevent corruption causing a circular linked list out
> > > of the close_list, causing a hang in the kernel.
> > >lockups
> > > Currently, the rtnl lock is the only thing keeping the close_list from
> > > corruption.
> > >
> > > If you doubt rtnl cannot be unlocked for dev close path, you can
> > > consult David for his opinion, as I think it is critical to understand
> > > what the kernel can or cannot do, or expects to be done before we can
> > > unlock its locks as we wish inside rte_kni.ko .
> > >
> > > Otherwise, if we are still in disagreement on how to patch this set of
> > > problems, I think the responsible way around it is to completely
> > > remove kni from the main dpdk tree and move it to dpdk-kmods
> > > repository.
> > >
> > > I know BSD style open-source does not carry legal responsibility from
> > > the developers, but I think when a bunch of developers know a piece of
> > > code is highly buggy, they should not leave it for countless new users
> > > to bounce their head desperately against, if they cannot agree on a
> > > correct way to solve the bunch of problems, of which I think we all
> > > agree exist (we just do not agree on the proper solution or patch)...
> > >
> > > That's my two cents,
> > >
> > > Elad.
> > >
> > > On Fri, Feb 26, 2021 at 5:49 PM Stephen Hemminger
> > > <stephen@networkplumber.org> wrote:
> > > >
> > > > On Fri, 26 Feb 2021 00:01:01 +0300
> > > > Igor Ryzhov <iryzhov@nfware.com> wrote:
> > > >
> > > > > Hi Elad,
> > > > >
> > > > > Thanks for the patch, but this is still NACK from me.
> > > > >
> > > > > The only real advantage of KNI over other exceptional-path techniques
> > > > > like virtio-user is the ability to configure DPDK-managed interfaces
> > > > > directly
> > > > > from the kernel using well-known utils like iproute2. A very important
> > > part
> > > > > of this is getting responses from the DPDK app and knowing the actual
> > > > > result of command execution.
> > > > > If you're making async requests to the application and you don't know
> > > > > the result, then what's the point of using KNI at all?
> > > > >
> > > > > Igor
> > > >
> > > > Do you have a better proposal that keeps the request result but does not
> > > > call userspace with lock held.
> > > >
> > > > PS: I also have strong dislike of KNI, as designed it would have been
> > > rejected
> > > > by Linux kernel developers.  A better solution would be userspace
> > > version of
> > > > something like devlink devices. But doing control operations by proxy is
> > > > a locking nightmare.
> > >



-- 
Dan Gora
Software Engineer

Adax, Inc.
Rua Dona Maria Alves, 1070 Casa 5
Centro
Ubatuba, SP
CEP 11680-000
Brasil

Tel: +55 (12) 3833-1021  (Brazil and outside of US)
    : +1 (510) 859-4801  (Inside of US)
    : dan_gora (Skype)

email: dg@adax.com

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4
  2021-03-01 21:26               ` Dan Gora
@ 2021-03-02 16:44                 ` Elad Nachman
  0 siblings, 0 replies; 42+ messages in thread
From: Elad Nachman @ 2021-03-02 16:44 UTC (permalink / raw)
  To: Dan Gora; +Cc: Igor Ryzhov, Stephen Hemminger, Ferruh Yigit, dev

Hi Dan,

Thanks for the information but you are addressing a different problem. The
problem discussed here is making ifconfig up or down while the DPDK
application is running.

Elad.

בתאריך יום ב׳, 1 במרץ 2021, 23:26, מאת Dan Gora ‏<dg@adax.com>:

> This is from my git commit fixing this:
>
> kni: separate releasing netdev from freeing KNI interface
>
>     Currently the rte_kni kernel driver suffers from a problem where
>     when the interface is released, it generates a callback to the DPDK
>     application to change the interface state to Down.  However, after the
>     DPDK application handles the callback and generates a response back to
>     the kernel, the rte_kni driver cannot wake the thread which is asleep
>     waiting for the response, because it is holding the kni_link_lock
>     semaphore and it has already removed the 'struct kni_dev' from the
>     list of interfaces to poll for responses.
>
>     This means that if the KNI interface is in the Up state when
>     rte_kni_release() is called, it will always sleep for three seconds
>     until kni_net_release gives up waiting for a response from the DPDK
>     application.
>
>     To fix this, we must separate the step to release the kernel network
>     interface from the steps to remove the KNI interface from the list
>     of interfaces to poll.
>
>     When the kernel network interface is removed with unregister_netdev(),
>     if the interface is up, it will generate a callback to mark the
>     interface down, which calls kni_net_release().  kni_net_release() will
>     block waiting for the DPDK application to call rte_kni_handle_request()
>     to handle the callback, but it also needs the thread in the KNI driver
>     (either the per-dev thread for multi-thread or the per-driver thread)
>     to call kni_net_poll_resp() in order to wake the thread sleeping in
>     kni_net_release (actually kni_net_process_request()).
>
>     So now, KNI interfaces should be removed as such:
>
>     1) The user calls rte_kni_release().  This only unregisters the
>     netdev in the kernel, but touches nothing else.  This allows all the
>     threads to run which are necessary to handle the callback into the
>     DPDK application to mark the interface down.
>
>     2) The user stops the thread running rte_kni_handle_request().
>     After rte_kni_release() has been called, there will be no more
>     callbacks for that interface so it is not necessary.  It cannot be
>     running at the same time that rte_kni_free() frees all of the FIFOs
>     and DPDK memory for that KNI interface.
>
>     3) The user calls rte_kni_free().  This performs the RTE_KNI_IOCTL_FREE
>     ioctl which calls kni_ioctl_free().  This function removes the struct
>     kni_dev from the list of interfaces to poll (and kills the per-dev
>     kthread, if configured for multi-thread), then frees the memory in
>     the FIFOs.
>
>     Signed-off-by: Dan Gora <dg@adax.com>
>
> I'm not sure that this is exactly the problem that you're seeing, but
> it sounds like it to me.
>
> thanks
> dan
>
> On Mon, Mar 1, 2021 at 5:27 PM Dan Gora <dg@adax.com> wrote:
> >
> > Hi All,
> >
> > Sorry to butt in on this, but I fixed this same issue about 3 years
> > ago in my application, but I was never able to get the changes
> > integrated and eventually just gave up trying.
> >
> > The rule with KNI is:
> > 1) The app should have a separate control thread per rte_kni which
> > just spins calling rte_kni_handle_request().  This ensures that other
> > threads calling rte_kni_XXX functions will always get a response.
> >
> > 2) In order to deal with lockups and timeouts when closing the device, I
> sent
> > patches which separated the closing process into two steps:
> > rte_kni_release() which would unregister the underlying netdev, then
> > rte_kni_free() which would free the KNI portions of the KNI device.
> > When rte_kni_release() is called the kernel netdev is unregistered and
> > a response is sent back to the application, the control thread calling
> > rte_kni_handle_request() is still running, so the application will
> > still get a response back from the kernel and not lock up, the
> > application then kills the control thread so that
> > rte_kni_handle_request() is not called again, then the application
> > calls rte_kni_free() which frees all of the FIFOs and closes the
> > device.
> >
> > If anyone is interested the patches are probably still floating around
> > patchwork.  If not you can check them out here:
> >
> > https://github.com/danielgora/dpdk.git
> >
> > thanks-
> > dan
> >
> > On Mon, Mar 1, 2021 at 5:10 AM Igor Ryzhov <iryzhov@nfware.com> wrote:
> > >
> > > Stephen,
> > >
> > > No, I don't have a better proposal, but I think it is not correct to
> change
> > > the behavior of KNI (making link down without a real response).
> > > Even though we know that communicating with userspace under rtnl_lock
> is a
> > > bad idea, it works as it is for many years already.
> > >
> > > Elad,
> > >
> > > I agree with you that KNI should be removed from the main tree if it
> is not
> > > possible to fix this __dev_close_many issue.
> > > There were discussions about this multiple times already, but no one is
> > > working on this AFAIK.
> > > Last time the discussion was a month ago:
> > > https://www.mail-archive.com/dev@dpdk.org/msg196033.html
> > >
> > > Igor
> > >
> > > On Fri, Feb 26, 2021 at 8:43 PM Elad Nachman <eladv6@gmail.com> wrote:
> > >
> > > > The way the kernel handles its locks and lists for the dev close many
> > > > path, there is no way you can go around this with rtnl unlocked :
> > > > "
> > > >
> > > > There is a race condition in __dev_close_many() processing the
> > > > close_list while the application terminates.
> > > > It looks like if two vEth devices are terminating,
> > > > and one releases the rtnl lock, the other takes it,
> > > > updating the close_list in an unstable state,
> > > > causing the close_list to become a circular linked list,
> > > > hence list_for_each_entry() will endlessly loop inside
> > > > __dev_close_many() .
> > > >
> > > > "
> > > > And I don't expect David Miller will bend the kernel networking for
> DPDK
> > > > or KNI.
> > > >
> > > > But - Stephen - if you can personally convince David to accept a
> > > > kernel patch which will separate the close_list locking mechanism to
> a
> > > > separate (RCU?) lock, then I can introduce first a patch to the
> kernel
> > > > which will add a lock for the close_list, this way rtnl can be
> > > > unlocked for the if down case.
> > > >
> > > > After that kernel patch, your original patch + relocation of the sync
> > > > mutex locking will do the job .
> > > >lockups
> > > > Otherwise, rtnl has to be kept locked all of the way for the if down
> > > > case in order to prevent corruption causing a circular linked list
> out
> > > > of the close_list, causing a hang in the kernel.
> > > >lockups
> > > > Currently, the rtnl lock is the only thing keeping the close_list
> from
> > > > corruption.
> > > >
> > > > If you doubt rtnl cannot be unlocked for dev close path, you can
> > > > consult David for his opinion, as I think it is critical to
> understand
> > > > what the kernel can or cannot do, or expects to be done before we can
> > > > unlock its locks as we wish inside rte_kni.ko .
> > > >
> > > > Otherwise, if we are still in disagreement on how to patch this set
> of
> > > > problems, I think the responsible way around it is to completely
> > > > remove kni from the main dpdk tree and move it to dpdk-kmods
> > > > repository.
> > > >
> > > > I know BSD style open-source does not carry legal responsibility from
> > > > the developers, but I think when a bunch of developers know a piece
> of
> > > > code is highly buggy, they should not leave it for countless new
> users
> > > > to bounce their head desperately against, if they cannot agree on a
> > > > correct way to solve the bunch of problems, of which I think we all
> > > > agree exist (we just do not agree on the proper solution or patch)...
> > > >
> > > > That's my two cents,
> > > >
> > > > Elad.
> > > >
> > > > On Fri, Feb 26, 2021 at 5:49 PM Stephen Hemminger
> > > > <stephen@networkplumber.org> wrote:
> > > > >
> > > > > On Fri, 26 Feb 2021 00:01:01 +0300
> > > > > Igor Ryzhov <iryzhov@nfware.com> wrote:
> > > > >
> > > > > > Hi Elad,
> > > > > >
> > > > > > Thanks for the patch, but this is still NACK from me.
> > > > > >
> > > > > > The only real advantage of KNI over other exceptional-path
> techniques
> > > > > > like virtio-user is the ability to configure DPDK-managed
> interfaces
> > > > > > directly
> > > > > > from the kernel using well-known utils like iproute2. A very
> important
> > > > part
> > > > > > of this is getting responses from the DPDK app and knowing the
> actual
> > > > > > result of command execution.
> > > > > > If you're making async requests to the application and you don't
> know
> > > > > > the result, then what's the point of using KNI at all?
> > > > > >
> > > > > > Igor
> > > > >
> > > > > Do you have a better proposal that keeps the request result but
> does not
> > > > > call userspace with lock held.
> > > > >
> > > > > PS: I also have strong dislike of KNI, as designed it would have
> been
> > > > rejected
> > > > > by Linux kernel developers.  A better solution would be userspace
> > > > version of
> > > > > something like devlink devices. But doing control operations by
> proxy is
> > > > > a locking nightmare.
> > > >
>
>
>
> --
> Dan Gora
> Software Engineer
>
> Adax, Inc.
> Rua Dona Maria Alves, 1070 Casa 5
> Centro
> Ubatuba, SP
> CEP 11680-000
> Brasil
>
> Tel: +55 (12) 3833-1021  (Brazil and outside of US)
>     : +1 (510) 859-4801  (Inside of US)
>     : dan_gora (Skype)
>
> email: dg@adax.com
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4
  2021-03-01 16:38             ` Stephen Hemminger
@ 2021-03-15 16:58               ` Ferruh Yigit
  0 siblings, 0 replies; 42+ messages in thread
From: Ferruh Yigit @ 2021-03-15 16:58 UTC (permalink / raw)
  To: Igor Ryzhov, Elad Nachman; +Cc: dev, Dan Gora, Stephen Hemminger

On 3/1/2021 4:38 PM, Stephen Hemminger wrote:
> On Mon, 1 Mar 2021 11:10:01 +0300
> Igor Ryzhov <iryzhov@nfware.com> wrote:
> 
>> Stephen,
>>
>> No, I don't have a better proposal, but I think it is not correct to change
>> the behavior of KNI (making link down without a real response).
>> Even though we know that communicating with userspace under rtnl_lock is a
>> bad idea, it works as it is for many years already.
>>
>> Elad,
>>
>> I agree with you that KNI should be removed from the main tree if it is not
>> possible to fix this __dev_close_many issue.
>> There were discussions about this multiple times already, but no one is
>> working on this AFAIK.
>> Last time the discussion was a month ago:
>> https://www.mail-archive.com/dev@dpdk.org/msg196033.html
>>
>> Igor
> 
> The better proposal would be to make DPDK virtio smarter.
> There already is virtio devices that must handle this (VDPA) etc.
> And when you can control link through virtio, then put a big warning
> in KNI that says "Don't use this"
> 

Hi Igor, Elad,

I think it is reasonable to do the ifdown as async to solve the problem,
still we can make sync default, and async with kernel parameter, to cover both case.

I will put more details on the patches.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] kni: fix kernel deadlock when using mlx devices
  2021-02-25 14:32 ` [dpdk-dev] [PATCH 1/2] kni: fix kernel deadlock when using mlx devices Elad Nachman
  2021-02-25 14:32   ` [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4 Elad Nachman
@ 2021-03-15 17:17   ` Ferruh Yigit
  1 sibling, 0 replies; 42+ messages in thread
From: Ferruh Yigit @ 2021-03-15 17:17 UTC (permalink / raw)
  To: Elad Nachman; +Cc: iryzhov, stephen, dev

On 2/25/2021 2:32 PM, Elad Nachman wrote:
> This first part of v4 of the patch re-introduces Stephen Hemminger's
> patch 64106 . This part changes the parameter kni_net_process_request()
> gets and introduces the initial rtnl unlocking mechanism.
> 
> Signed-off-by: Elad Nachman <eladv6@gmail.com>
> ---
> v4:
> * for if down case, send asynchronously with rtnl locked and without
>    wait, returning immediately to avoid both kernel race conditions
>    and deadlock in user-space
> v3:
> * Include original patch and new patch as a series of patch, added a
>    comment to the new patch
> v2:
> * rebuild the patch as increment from patch 64106
> * fix comment and blank lines

<...>

>   
> +	/* Since we need to wait and RTNL mutex is held
> +	 * drop the mutex and hold reference to keep device
> +	 */
> +	dev_hold(dev);
> +	rtnl_unlock();
> +
>   	ret_val = wait_event_interruptible_timeout(kni->wq,
>   			kni_fifo_count(kni->resp_q), 3 * HZ);
> +	rtnl_lock();
> +	dev_put(dev);
> +

Hi Elad,

Let's drop rtnl lock/unlock form this patch, it will be changed in next patch 
already, and make this patch only function parameter change, as preparation to 
actual change.


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4
  2021-02-25 14:32   ` [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4 Elad Nachman
  2021-02-25 21:01     ` Igor Ryzhov
@ 2021-03-15 17:17     ` Ferruh Yigit
  2021-03-16 18:35       ` Elad Nachman
  1 sibling, 1 reply; 42+ messages in thread
From: Ferruh Yigit @ 2021-03-15 17:17 UTC (permalink / raw)
  To: Elad Nachman; +Cc: iryzhov, stephen, dev, Dan Gora

On 2/25/2021 2:32 PM, Elad Nachman wrote:
> This part of the series includes my fixes for the issues reported
> by Ferruh and Igor (and Igor comments for v3 of the patch)
> on top of part 1 of the patch series:
> 
> A. KNI sync lock is being locked while rtnl is held.
> If two threads are calling kni_net_process_request() ,
> then the first one will take the sync lock, release rtnl lock then sleep.
> The second thread will try to lock sync lock while holding rtnl.
> The first thread will wake, and try to lock rtnl, resulting in a deadlock.
> The remedy is to release rtnl before locking the KNI sync lock.
> Since in between nothing is accessing Linux network-wise,
> no rtnl locking is needed.
> 
> B. There is a race condition in __dev_close_many() processing the
> close_list while the application terminates.
> It looks like if two vEth devices are terminating,
> and one releases the rtnl lock, the other takes it,
> updating the close_list in an unstable state,
> causing the close_list to become a circular linked list,
> hence list_for_each_entry() will endlessly loop inside
> __dev_close_many() .
> Since the description for the original patch indicate the
> original motivation was bringing the device up,
> I have changed kni_net_process_request() to hold the rtnl mutex
> in case of bringing the device down since this is the path called
> from __dev_close_many() , causing the corruption of the close_list.
> In order to prevent deadlock in Mellanox device in this case, the
> code has been modified not to wait for user-space while holding
> the rtnl lock.
> Instead, after the request has been sent, all locks are relinquished
> and the function exits immediately with return code of zero (success).
> 
> To summarize:
> request != interface down : unlock rtnl, send request to user-space,
> wait for response, send the response error code to caller in user-space.
> 
> request == interface down: send request to user-space, return immediately
> with error code of 0 (success) to user-space.
> 
> Signed-off-by: Elad Nachman <eladv6@gmail.com>
> 
> 
> ---
> v4:
> * for if down case, send asynchronously with rtnl locked and without
>    wait, returning immediately to avoid both kernel race conditions
>    and deadlock in user-space
> v3:
> * Include original patch and new patch as a series of patch, added a
>    comment to the new patch
> v2:
> * rebuild the patch as increment from patch 64106
> * fix comment and blank lines
> ---
>   kernel/linux/kni/kni_net.c      | 41 +++++++++++++++++++++++++++------
>   lib/librte_kni/rte_kni.c        |  7 ++++--
>   lib/librte_kni/rte_kni_common.h |  1 +
>   3 files changed, 40 insertions(+), 9 deletions(-)
> 
> diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> index f0b6e9a8d..ba991802b 100644
> --- a/kernel/linux/kni/kni_net.c
> +++ b/kernel/linux/kni/kni_net.c
> @@ -110,12 +110,34 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
>   	void *resp_va;
>   	uint32_t num;
>   	int ret_val;
> +	int req_is_dev_stop = 0;
> +
> +	/* For configuring the interface to down,
> +	 * rtnl must be held all the way to prevent race condition
> +	 * inside __dev_close_many() between two netdev instances of KNI
> +	 */
> +	if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
> +			req->if_up == 0)
> +		req_is_dev_stop = 1;

Having this request type checks in the 'kni_net_process_request()' function 
looks like hack.
Since adding a new field into the "struct rte_kni_request", that can be a more 
generic 'asnyc' field, and the requested function, like 'kni_net_release()' can 
set it to support async requests.

And can you please separate the function to add a more generic async request 
support on its patch, which should do:
- add new 'asnyc' field to "struct rte_kni_request"
- in 'kni_net_process_request()', if 'req->async' set, do not wait for response
- in library, 'lib/librte_kni/rte_kni.c', in 'rte_kni_handle_request()' 
function, if the request is async don't put the response
(These are already done in this patch)

Overall it can be three patch set:
1) Function parameter change
2) Add generic async request support (with documentation update)
3) rtnl unlock and make 'kni_net_release()' request async (actual fix)
(We can discuss more if to make 'kni_net_release()' async with a kernel 
parameter or not)

What do you think, does it make sense?


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4
  2021-03-15 17:17     ` Ferruh Yigit
@ 2021-03-16 18:35       ` Elad Nachman
  2021-03-16 18:42         ` Ferruh Yigit
  0 siblings, 1 reply; 42+ messages in thread
From: Elad Nachman @ 2021-03-16 18:35 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: Igor Ryzhov, Stephen Hemminger, dev, Dan Gora

Hi,

Owing to my current development schedule and obligations, I see no
opportunity to make this set of changes in the near future.

Sorry,

Elad.

בתאריך יום ב׳, 15 במרץ 2021, 19:17, מאת Ferruh Yigit ‏<
ferruh.yigit@intel.com>:

> On 2/25/2021 2:32 PM, Elad Nachman wrote:
> > This part of the series includes my fixes for the issues reported
> > by Ferruh and Igor (and Igor comments for v3 of the patch)
> > on top of part 1 of the patch series:
> >
> > A. KNI sync lock is being locked while rtnl is held.
> > If two threads are calling kni_net_process_request() ,
> > then the first one will take the sync lock, release rtnl lock then sleep.
> > The second thread will try to lock sync lock while holding rtnl.
> > The first thread will wake, and try to lock rtnl, resulting in a
> deadlock.
> > The remedy is to release rtnl before locking the KNI sync lock.
> > Since in between nothing is accessing Linux network-wise,
> > no rtnl locking is needed.
> >
> > B. There is a race condition in __dev_close_many() processing the
> > close_list while the application terminates.
> > It looks like if two vEth devices are terminating,
> > and one releases the rtnl lock, the other takes it,
> > updating the close_list in an unstable state,
> > causing the close_list to become a circular linked list,
> > hence list_for_each_entry() will endlessly loop inside
> > __dev_close_many() .
> > Since the description for the original patch indicate the
> > original motivation was bringing the device up,
> > I have changed kni_net_process_request() to hold the rtnl mutex
> > in case of bringing the device down since this is the path called
> > from __dev_close_many() , causing the corruption of the close_list.
> > In order to prevent deadlock in Mellanox device in this case, the
> > code has been modified not to wait for user-space while holding
> > the rtnl lock.
> > Instead, after the request has been sent, all locks are relinquished
> > and the function exits immediately with return code of zero (success).
> >
> > To summarize:
> > request != interface down : unlock rtnl, send request to user-space,
> > wait for response, send the response error code to caller in user-space.
> >
> > request == interface down: send request to user-space, return immediately
> > with error code of 0 (success) to user-space.
> >
> > Signed-off-by: Elad Nachman <eladv6@gmail.com>
> >
> >
> > ---
> > v4:
> > * for if down case, send asynchronously with rtnl locked and without
> >    wait, returning immediately to avoid both kernel race conditions
> >    and deadlock in user-space
> > v3:
> > * Include original patch and new patch as a series of patch, added a
> >    comment to the new patch
> > v2:
> > * rebuild the patch as increment from patch 64106
> > * fix comment and blank lines
> > ---
> >   kernel/linux/kni/kni_net.c      | 41 +++++++++++++++++++++++++++------
> >   lib/librte_kni/rte_kni.c        |  7 ++++--
> >   lib/librte_kni/rte_kni_common.h |  1 +
> >   3 files changed, 40 insertions(+), 9 deletions(-)
> >
> > diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
> > index f0b6e9a8d..ba991802b 100644
> > --- a/kernel/linux/kni/kni_net.c
> > +++ b/kernel/linux/kni/kni_net.c
> > @@ -110,12 +110,34 @@ kni_net_process_request(struct net_device *dev,
> struct rte_kni_request *req)
> >       void *resp_va;
> >       uint32_t num;
> >       int ret_val;
> > +     int req_is_dev_stop = 0;
> > +
> > +     /* For configuring the interface to down,
> > +      * rtnl must be held all the way to prevent race condition
> > +      * inside __dev_close_many() between two netdev instances of KNI
> > +      */
> > +     if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
> > +                     req->if_up == 0)
> > +             req_is_dev_stop = 1;
>
> Having this request type checks in the 'kni_net_process_request()'
> function
> looks like hack.
> Since adding a new field into the "struct rte_kni_request", that can be a
> more
> generic 'asnyc' field, and the requested function, like
> 'kni_net_release()' can
> set it to support async requests.
>
> And can you please separate the function to add a more generic async
> request
> support on its patch, which should do:
> - add new 'asnyc' field to "struct rte_kni_request"
> - in 'kni_net_process_request()', if 'req->async' set, do not wait for
> response
> - in library, 'lib/librte_kni/rte_kni.c', in 'rte_kni_handle_request()'
> function, if the request is async don't put the response
> (These are already done in this patch)
>
> Overall it can be three patch set:
> 1) Function parameter change
> 2) Add generic async request support (with documentation update)
> 3) rtnl unlock and make 'kni_net_release()' request async (actual fix)
> (We can discuss more if to make 'kni_net_release()' async with a kernel
> parameter or not)
>
> What do you think, does it make sense?
>
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4
  2021-03-16 18:35       ` Elad Nachman
@ 2021-03-16 18:42         ` Ferruh Yigit
  0 siblings, 0 replies; 42+ messages in thread
From: Ferruh Yigit @ 2021-03-16 18:42 UTC (permalink / raw)
  To: Elad Nachman; +Cc: Igor Ryzhov, Stephen Hemminger, dev, Dan Gora

On 3/16/2021 6:35 PM, Elad Nachman wrote:
> Hi,
> 
> Owing to my current development schedule and obligations, I see no opportunity 
> to make this set of changes in the near future.
> 

I can do on top of your work if you don't mind?

> Sorry,
> 
> Elad.
> 
> בתאריך יום ב׳, 15 במרץ 2021, 19:17, מאת Ferruh Yigit ‏<ferruh.yigit@intel.com 
> <mailto:ferruh.yigit@intel.com>>:
> 
>     On 2/25/2021 2:32 PM, Elad Nachman wrote:
>      > This part of the series includes my fixes for the issues reported
>      > by Ferruh and Igor (and Igor comments for v3 of the patch)
>      > on top of part 1 of the patch series:
>      >
>      > A. KNI sync lock is being locked while rtnl is held.
>      > If two threads are calling kni_net_process_request() ,
>      > then the first one will take the sync lock, release rtnl lock then sleep.
>      > The second thread will try to lock sync lock while holding rtnl.
>      > The first thread will wake, and try to lock rtnl, resulting in a deadlock.
>      > The remedy is to release rtnl before locking the KNI sync lock.
>      > Since in between nothing is accessing Linux network-wise,
>      > no rtnl locking is needed.
>      >
>      > B. There is a race condition in __dev_close_many() processing the
>      > close_list while the application terminates.
>      > It looks like if two vEth devices are terminating,
>      > and one releases the rtnl lock, the other takes it,
>      > updating the close_list in an unstable state,
>      > causing the close_list to become a circular linked list,
>      > hence list_for_each_entry() will endlessly loop inside
>      > __dev_close_many() .
>      > Since the description for the original patch indicate the
>      > original motivation was bringing the device up,
>      > I have changed kni_net_process_request() to hold the rtnl mutex
>      > in case of bringing the device down since this is the path called
>      > from __dev_close_many() , causing the corruption of the close_list.
>      > In order to prevent deadlock in Mellanox device in this case, the
>      > code has been modified not to wait for user-space while holding
>      > the rtnl lock.
>      > Instead, after the request has been sent, all locks are relinquished
>      > and the function exits immediately with return code of zero (success).
>      >
>      > To summarize:
>      > request != interface down : unlock rtnl, send request to user-space,
>      > wait for response, send the response error code to caller in user-space.
>      >
>      > request == interface down: send request to user-space, return immediately
>      > with error code of 0 (success) to user-space.
>      >
>      > Signed-off-by: Elad Nachman <eladv6@gmail.com <mailto:eladv6@gmail.com>>
>      >
>      >
>      > ---
>      > v4:
>      > * for if down case, send asynchronously with rtnl locked and without
>      >    wait, returning immediately to avoid both kernel race conditions
>      >    and deadlock in user-space
>      > v3:
>      > * Include original patch and new patch as a series of patch, added a
>      >    comment to the new patch
>      > v2:
>      > * rebuild the patch as increment from patch 64106
>      > * fix comment and blank lines
>      > ---
>      >   kernel/linux/kni/kni_net.c      | 41 +++++++++++++++++++++++++++------
>      >   lib/librte_kni/rte_kni.c        |  7 ++++--
>      >   lib/librte_kni/rte_kni_common.h |  1 +
>      >   3 files changed, 40 insertions(+), 9 deletions(-)
>      >
>      > diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
>      > index f0b6e9a8d..ba991802b 100644
>      > --- a/kernel/linux/kni/kni_net.c
>      > +++ b/kernel/linux/kni/kni_net.c
>      > @@ -110,12 +110,34 @@ kni_net_process_request(struct net_device *dev,
>     struct rte_kni_request *req)
>      >       void *resp_va;
>      >       uint32_t num;
>      >       int ret_val;
>      > +     int req_is_dev_stop = 0;
>      > +
>      > +     /* For configuring the interface to down,
>      > +      * rtnl must be held all the way to prevent race condition
>      > +      * inside __dev_close_many() between two netdev instances of KNI
>      > +      */
>      > +     if (req->req_id == RTE_KNI_REQ_CFG_NETWORK_IF &&
>      > +                     req->if_up == 0)
>      > +             req_is_dev_stop = 1;
> 
>     Having this request type checks in the 'kni_net_process_request()' function
>     looks like hack.
>     Since adding a new field into the "struct rte_kni_request", that can be a more
>     generic 'asnyc' field, and the requested function, like 'kni_net_release()' can
>     set it to support async requests.
> 
>     And can you please separate the function to add a more generic async request
>     support on its patch, which should do:
>     - add new 'asnyc' field to "struct rte_kni_request"
>     - in 'kni_net_process_request()', if 'req->async' set, do not wait for response
>     - in library, 'lib/librte_kni/rte_kni.c', in 'rte_kni_handle_request()'
>     function, if the request is async don't put the response
>     (These are already done in this patch)
> 
>     Overall it can be three patch set:
>     1) Function parameter change
>     2) Add generic async request support (with documentation update)
>     3) rtnl unlock and make 'kni_net_release()' request async (actual fix)
>     (We can discuss more if to make 'kni_net_release()' async with a kernel
>     parameter or not)
> 
>     What do you think, does it make sense?
> 


^ permalink raw reply	[flat|nested] 42+ messages in thread

* [dpdk-dev] [PATCH v5 1/3] kni: refactor user request processing
  2020-11-26 14:46 [dpdk-dev] [PATCH] kni: fix rtnl deadlocks and race conditions Elad Nachman
                   ` (4 preceding siblings ...)
  2021-02-25 14:32 ` [dpdk-dev] [PATCH 1/2] kni: fix kernel deadlock when using mlx devices Elad Nachman
@ 2021-03-29 14:36 ` Ferruh Yigit
  2021-03-29 14:36   ` [dpdk-dev] [PATCH v5 2/3] kni: support async user request Ferruh Yigit
  2021-03-29 14:36   ` [dpdk-dev] [PATCH v5 3/3] kni: fix kernel deadlock when using mlx devices Ferruh Yigit
  5 siblings, 2 replies; 42+ messages in thread
From: Ferruh Yigit @ 2021-03-29 14:36 UTC (permalink / raw)
  To: dev; +Cc: Elad Nachman, Stephen Hemminger

From: Elad Nachman <eladv6@gmail.com>

Refactor the parameter kni_net_process_request() gets, this is
preparation for addressing a user request processing deadlock problem.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Elad Nachman <eladv6@gmail.com>
---
 kernel/linux/kni/kni_net.c | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index 4b752083da28..b830054c7491 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -17,6 +17,7 @@
 #include <linux/skbuff.h>
 #include <linux/kthread.h>
 #include <linux/delay.h>
+#include <linux/rtnetlink.h>
 
 #include <rte_kni_common.h>
 #include <kni_fifo.h>
@@ -102,17 +103,15 @@ get_data_kva(struct kni_dev *kni, void *pkt_kva)
  * It can be called to process the request.
  */
 static int
-kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
+kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 {
+	struct kni_dev *kni = netdev_priv(dev);
 	int ret = -1;
 	void *resp_va;
 	uint32_t num;
 	int ret_val;
 
-	if (!kni || !req) {
-		pr_err("No kni instance or request\n");
-		return -EINVAL;
-	}
+	ASSERT_RTNL();
 
 	mutex_lock(&kni->sync_lock);
 
@@ -155,7 +154,6 @@ kni_net_open(struct net_device *dev)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
 
 	netif_start_queue(dev);
 	if (kni_dflt_carrier == 1)
@@ -168,7 +166,7 @@ kni_net_open(struct net_device *dev)
 
 	/* Setting if_up to non-zero means up */
 	req.if_up = 1;
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(dev, &req);
 
 	return (ret == 0) ? req.result : ret;
 }
@@ -178,7 +176,6 @@ kni_net_release(struct net_device *dev)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
 
 	netif_stop_queue(dev); /* can't transmit any more */
 	netif_carrier_off(dev);
@@ -188,7 +185,7 @@ kni_net_release(struct net_device *dev)
 
 	/* Setting if_up to 0 means down */
 	req.if_up = 0;
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(dev, &req);
 
 	return (ret == 0) ? req.result : ret;
 }
@@ -643,14 +640,13 @@ kni_net_change_mtu(struct net_device *dev, int new_mtu)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
 
 	pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
 
 	memset(&req, 0, sizeof(req));
 	req.req_id = RTE_KNI_REQ_CHANGE_MTU;
 	req.new_mtu = new_mtu;
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(dev, &req);
 	if (ret == 0 && req.result == 0)
 		dev->mtu = new_mtu;
 
@@ -661,7 +657,6 @@ static void
 kni_net_change_rx_flags(struct net_device *netdev, int flags)
 {
 	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(netdev);
 
 	memset(&req, 0, sizeof(req));
 
@@ -683,7 +678,7 @@ kni_net_change_rx_flags(struct net_device *netdev, int flags)
 			req.promiscusity = 0;
 	}
 
-	kni_net_process_request(kni, &req);
+	kni_net_process_request(netdev, &req);
 }
 
 /*
@@ -742,7 +737,6 @@ kni_net_set_mac(struct net_device *netdev, void *p)
 {
 	int ret;
 	struct rte_kni_request req;
-	struct kni_dev *kni;
 	struct sockaddr *addr = p;
 
 	memset(&req, 0, sizeof(req));
@@ -754,8 +748,7 @@ kni_net_set_mac(struct net_device *netdev, void *p)
 	memcpy(req.mac_addr, addr->sa_data, netdev->addr_len);
 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
 
-	kni = netdev_priv(netdev);
-	ret = kni_net_process_request(kni, &req);
+	ret = kni_net_process_request(netdev, &req);
 
 	return (ret == 0 ? req.result : ret);
 }
-- 
2.30.2


^ permalink raw reply	[flat|nested] 42+ messages in thread

* [dpdk-dev] [PATCH v5 2/3] kni: support async user request
  2021-03-29 14:36 ` [dpdk-dev] [PATCH v5 1/3] kni: refactor user request processing Ferruh Yigit
@ 2021-03-29 14:36   ` Ferruh Yigit
  2021-03-29 14:36   ` [dpdk-dev] [PATCH v5 3/3] kni: fix kernel deadlock when using mlx devices Ferruh Yigit
  1 sibling, 0 replies; 42+ messages in thread
From: Ferruh Yigit @ 2021-03-29 14:36 UTC (permalink / raw)
  To: dev; +Cc: Elad Nachman

Adding async userspace requests which don't wait for the userspace
response and always return success. This is preparation to address a
regression in KNI.

Signed-off-by: Elad Nachman <eladv6@gmail.com>
Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com>
---
 kernel/linux/kni/kni_net.c      | 9 +++++++++
 lib/librte_kni/rte_kni.c        | 7 +++++--
 lib/librte_kni/rte_kni_common.h | 1 +
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index b830054c7491..6cf99da0dc92 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -124,6 +124,14 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 		goto fail;
 	}
 
+	/* No result available since request is handled
+	 * asynchronously. set response to success.
+	 */
+	if (req->async != 0) {
+		req->result = 0;
+		goto async;
+	}
+
 	ret_val = wait_event_interruptible_timeout(kni->wq,
 			kni_fifo_count(kni->resp_q), 3 * HZ);
 	if (signal_pending(current) || ret_val <= 0) {
@@ -139,6 +147,7 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 	}
 
 	memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
+async:
 	ret = 0;
 
 fail:
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 837d0217d2d1..9dae6a8d7c0c 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -591,8 +591,11 @@ rte_kni_handle_request(struct rte_kni *kni)
 		break;
 	}
 
-	/* Construct response mbuf and put it back to resp_q */
-	ret = kni_fifo_put(kni->resp_q, (void **)&req, 1);
+	/* if needed, construct response buffer and put it back to resp_q */
+	if (!req->async)
+		ret = kni_fifo_put(kni->resp_q, (void **)&req, 1);
+	else
+		ret = 1;
 	if (ret != 1) {
 		RTE_LOG(ERR, KNI, "Fail to put the muf back to resp_q\n");
 		return -1; /* It is an error of can't putting the mbuf back */
diff --git a/lib/librte_kni/rte_kni_common.h b/lib/librte_kni/rte_kni_common.h
index ffb3182731a0..b547ea550171 100644
--- a/lib/librte_kni/rte_kni_common.h
+++ b/lib/librte_kni/rte_kni_common.h
@@ -48,6 +48,7 @@ struct rte_kni_request {
 		uint8_t promiscusity;/**< 1: promisc mode enable, 0: disable */
 		uint8_t allmulti;    /**< 1: all-multicast mode enable, 0: disable */
 	};
+	int32_t async : 1;            /**< 1: request is asynchronous */
 	int32_t result;               /**< Result for processing request */
 } __attribute__((__packed__));
 
-- 
2.30.2


^ permalink raw reply	[flat|nested] 42+ messages in thread

* [dpdk-dev] [PATCH v5 3/3] kni: fix kernel deadlock when using mlx devices
  2021-03-29 14:36 ` [dpdk-dev] [PATCH v5 1/3] kni: refactor user request processing Ferruh Yigit
  2021-03-29 14:36   ` [dpdk-dev] [PATCH v5 2/3] kni: support async user request Ferruh Yigit
@ 2021-03-29 14:36   ` Ferruh Yigit
  2021-04-09 14:56     ` [dpdk-dev] [dpdk-stable] " Ferruh Yigit
  1 sibling, 1 reply; 42+ messages in thread
From: Ferruh Yigit @ 2021-03-29 14:36 UTC (permalink / raw)
  To: dev; +Cc: stable, Elad Nachman, Stephen Hemminger, Igor Ryzhov, Dan Gora

KNI runs userspace callback with rtnl lock held, this is not working
fine with some devices that needs to interact with kernel interface in
the callback, like Mellanox devices.

The solution is releasing the rtnl lock before calling the userspace
callback. But it requires two consideration:

1. The rtnl lock needs to released before 'kni->sync_lock', otherwise it
   causes deadlock with multiple KNI devices, please check below the A.
   for the details of the deadlock condition.

2. When rtnl lock is released for interface down event, it cause a
   regression and deadlock, so can't release the rtnl lock for interface
   down event, please check below B. for the details.

As a solution, interface down event is handled asynchronously and for
all other events rtnl lock is released before processing the callback.

A. KNI sync lock is being locked while rtnl is held.
If two threads are calling kni_net_process_request() ,
then the first one will take the sync lock, release rtnl lock then sleep.
The second thread will try to lock sync lock while holding rtnl.
The first thread will wake, and try to lock rtnl, resulting in a
deadlock.  The remedy is to release rtnl before locking the KNI sync
lock.
Since in between nothing is accessing Linux network-wise, no rtnl
locking is needed.

B. There is a race condition in __dev_close_many() processing the
close_list while the application terminates.
It looks like if two KNI interfaces are terminating,
and one releases the rtnl lock, the other takes it,
updating the close_list in an unstable state,
causing the close_list to become a circular linked list,
hence list_for_each_entry() will endlessly loop inside
__dev_close_many() .

To summarize:
request != interface down : unlock rtnl, send request to user-space,
wait for response, send the response error code to caller in user-space.

request == interface down: send request to user-space, return immediately
with error code of 0 (success) to user-space.

Fixes: 3fc5ca2f6352 ("kni: initial import")
Cc: stable@dpdk.org

Signed-off-by: Elad Nachman <eladv6@gmail.com>
---
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Igor Ryzhov <iryzhov@nfware.com>
Cc: Dan Gora <dg@adax.com>

 #	kernel/linux/kni/kni_net.c.rej
---
 kernel/linux/kni/kni_net.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index 6cf99da0dc92..f259327954b2 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -113,6 +113,14 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 
 	ASSERT_RTNL();
 
+	/* If we need to wait and RTNL mutex is held
+	 * drop the mutex and hold reference to keep device
+	 */
+	if (req->async == 0) {
+		dev_hold(dev);
+		rtnl_unlock();
+	}
+
 	mutex_lock(&kni->sync_lock);
 
 	/* Construct data */
@@ -152,6 +160,10 @@ kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
 
 fail:
 	mutex_unlock(&kni->sync_lock);
+	if (req->async == 0) {
+		rtnl_lock();
+		dev_put(dev);
+	}
 	return ret;
 }
 
@@ -194,6 +206,10 @@ kni_net_release(struct net_device *dev)
 
 	/* Setting if_up to 0 means down */
 	req.if_up = 0;
+
+	/* request async because of the deadlock problem */
+	req.async = 1;
+
 	ret = kni_net_process_request(dev, &req);
 
 	return (ret == 0) ? req.result : ret;
-- 
2.30.2


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [dpdk-stable] [PATCH v5 3/3] kni: fix kernel deadlock when using mlx devices
  2021-03-29 14:36   ` [dpdk-dev] [PATCH v5 3/3] kni: fix kernel deadlock when using mlx devices Ferruh Yigit
@ 2021-04-09 14:56     ` Ferruh Yigit
  2021-04-12 14:35       ` Elad Nachman
  0 siblings, 1 reply; 42+ messages in thread
From: Ferruh Yigit @ 2021-04-09 14:56 UTC (permalink / raw)
  To: Elad Nachman, Igor Ryzhov; +Cc: stable, Stephen Hemminger, Dan Gora, dev

On 3/29/2021 3:36 PM, Ferruh Yigit wrote:
> KNI runs userspace callback with rtnl lock held, this is not working
> fine with some devices that needs to interact with kernel interface in
> the callback, like Mellanox devices.
> 
> The solution is releasing the rtnl lock before calling the userspace
> callback. But it requires two consideration:
> 
> 1. The rtnl lock needs to released before 'kni->sync_lock', otherwise it
>     causes deadlock with multiple KNI devices, please check below the A.
>     for the details of the deadlock condition.
> 
> 2. When rtnl lock is released for interface down event, it cause a
>     regression and deadlock, so can't release the rtnl lock for interface
>     down event, please check below B. for the details.
> 
> As a solution, interface down event is handled asynchronously and for
> all other events rtnl lock is released before processing the callback.
> 
> A. KNI sync lock is being locked while rtnl is held.
> If two threads are calling kni_net_process_request() ,
> then the first one will take the sync lock, release rtnl lock then sleep.
> The second thread will try to lock sync lock while holding rtnl.
> The first thread will wake, and try to lock rtnl, resulting in a
> deadlock.  The remedy is to release rtnl before locking the KNI sync
> lock.
> Since in between nothing is accessing Linux network-wise, no rtnl
> locking is needed.
> 
> B. There is a race condition in __dev_close_many() processing the
> close_list while the application terminates.
> It looks like if two KNI interfaces are terminating,
> and one releases the rtnl lock, the other takes it,
> updating the close_list in an unstable state,
> causing the close_list to become a circular linked list,
> hence list_for_each_entry() will endlessly loop inside
> __dev_close_many() .
> 
> To summarize:
> request != interface down : unlock rtnl, send request to user-space,
> wait for response, send the response error code to caller in user-space.
> 
> request == interface down: send request to user-space, return immediately
> with error code of 0 (success) to user-space.
> 
> Fixes: 3fc5ca2f6352 ("kni: initial import")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Elad Nachman <eladv6@gmail.com>
> ---
> Cc: Stephen Hemminger <stephen@networkplumber.org>
> Cc: Igor Ryzhov <iryzhov@nfware.com>
> Cc: Dan Gora <dg@adax.com>
> 

Hi Elad, Igor,

Can you please review/test this set when you have time?

Thanks,
ferruh


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [dpdk-stable] [PATCH v5 3/3] kni: fix kernel deadlock when using mlx devices
  2021-04-09 14:56     ` [dpdk-dev] [dpdk-stable] " Ferruh Yigit
@ 2021-04-12 14:35       ` Elad Nachman
  2021-04-20 23:07         ` Thomas Monjalon
  0 siblings, 1 reply; 42+ messages in thread
From: Elad Nachman @ 2021-04-12 14:35 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: Igor Ryzhov, stable, Stephen Hemminger, Dan Gora, dev

Hi,

The new patch is fine by me.

Tested several dozens restarts of our proprietary application without
apparent problem.

FYI,

Elad.

בתאריך יום ו׳, 9 באפר׳ 2021, 17:56, מאת Ferruh Yigit ‏<
ferruh.yigit@intel.com>:

> On 3/29/2021 3:36 PM, Ferruh Yigit wrote:
> > KNI runs userspace callback with rtnl lock held, this is not working
> > fine with some devices that needs to interact with kernel interface in
> > the callback, like Mellanox devices.
> >
> > The solution is releasing the rtnl lock before calling the userspace
> > callback. But it requires two consideration:
> >
> > 1. The rtnl lock needs to released before 'kni->sync_lock', otherwise it
> >     causes deadlock with multiple KNI devices, please check below the A.
> >     for the details of the deadlock condition.
> >
> > 2. When rtnl lock is released for interface down event, it cause a
> >     regression and deadlock, so can't release the rtnl lock for interface
> >     down event, please check below B. for the details.
> >
> > As a solution, interface down event is handled asynchronously and for
> > all other events rtnl lock is released before processing the callback.
> >
> > A. KNI sync lock is being locked while rtnl is held.
> > If two threads are calling kni_net_process_request() ,
> > then the first one will take the sync lock, release rtnl lock then sleep.
> > The second thread will try to lock sync lock while holding rtnl.
> > The first thread will wake, and try to lock rtnl, resulting in a
> > deadlock.  The remedy is to release rtnl before locking the KNI sync
> > lock.
> > Since in between nothing is accessing Linux network-wise, no rtnl
> > locking is needed.
> >
> > B. There is a race condition in __dev_close_many() processing the
> > close_list while the application terminates.
> > It looks like if two KNI interfaces are terminating,
> > and one releases the rtnl lock, the other takes it,
> > updating the close_list in an unstable state,
> > causing the close_list to become a circular linked list,
> > hence list_for_each_entry() will endlessly loop inside
> > __dev_close_many() .
> >
> > To summarize:
> > request != interface down : unlock rtnl, send request to user-space,
> > wait for response, send the response error code to caller in user-space.
> >
> > request == interface down: send request to user-space, return immediately
> > with error code of 0 (success) to user-space.
> >
> > Fixes: 3fc5ca2f6352 ("kni: initial import")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Elad Nachman <eladv6@gmail.com>
> > ---
> > Cc: Stephen Hemminger <stephen@networkplumber.org>
> > Cc: Igor Ryzhov <iryzhov@nfware.com>
> > Cc: Dan Gora <dg@adax.com>
> >
>
> Hi Elad, Igor,
>
> Can you please review/test this set when you have time?
>
> Thanks,
> ferruh
>
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [dpdk-stable] [PATCH v5 3/3] kni: fix kernel deadlock when using mlx devices
  2021-04-12 14:35       ` Elad Nachman
@ 2021-04-20 23:07         ` Thomas Monjalon
  2021-04-23  8:41           ` Igor Ryzhov
  0 siblings, 1 reply; 42+ messages in thread
From: Thomas Monjalon @ 2021-04-20 23:07 UTC (permalink / raw)
  To: Ferruh Yigit, Stephen Hemminger, Elad Nachman
  Cc: dev, Igor Ryzhov, stable, Dan Gora

12/04/2021 16:35, Elad Nachman:
> Hi,
> 
> The new patch is fine by me.
> 
> Tested several dozens restarts of our proprietary application without
> apparent problem.

Series applied, thanks.



^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [dpdk-stable] [PATCH v5 3/3] kni: fix kernel deadlock when using mlx devices
  2021-04-20 23:07         ` Thomas Monjalon
@ 2021-04-23  8:41           ` Igor Ryzhov
  2021-04-23  8:59             ` Ferruh Yigit
  0 siblings, 1 reply; 42+ messages in thread
From: Igor Ryzhov @ 2021-04-23  8:41 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: Ferruh Yigit, Stephen Hemminger, Elad Nachman, dev, dpdk stable,
	Dan Gora

This patch changes the behavior for KNI interface shutdown.
Previously we would receive a real response from the driver, now we
always receive success.
I think this should be reflected in the docs/release notes.

Igor

On Wed, Apr 21, 2021 at 2:07 AM Thomas Monjalon <thomas@monjalon.net> wrote:

> 12/04/2021 16:35, Elad Nachman:
> > Hi,
> >
> > The new patch is fine by me.
> >
> > Tested several dozens restarts of our proprietary application without
> > apparent problem.
>
> Series applied, thanks.
>
>
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [dpdk-stable] [PATCH v5 3/3] kni: fix kernel deadlock when using mlx devices
  2021-04-23  8:41           ` Igor Ryzhov
@ 2021-04-23  8:59             ` Ferruh Yigit
  2021-04-23 12:43               ` Igor Ryzhov
  0 siblings, 1 reply; 42+ messages in thread
From: Ferruh Yigit @ 2021-04-23  8:59 UTC (permalink / raw)
  To: Igor Ryzhov, Thomas Monjalon
  Cc: Stephen Hemminger, Elad Nachman, dev, dpdk stable, Dan Gora

On 4/23/2021 9:41 AM, Igor Ryzhov wrote:
> This patch changes the behavior for KNI interface shutdown.
> Previously we would receive a real response from the driver, now we 
> always receive success.
> I think this should be reflected in the docs/release notes.
> 

Hi Igor,

Make sense, I can add it.

Meanwhile do you think has a benefit to make shutdown behavior configurable? 
Async/Sync shutdown based on module param?

> Igor
> 
> On Wed, Apr 21, 2021 at 2:07 AM Thomas Monjalon <thomas@monjalon.net 
> <mailto:thomas@monjalon.net>> wrote:
> 
>     12/04/2021 16:35, Elad Nachman:
>      > Hi,
>      >
>      > The new patch is fine by me.
>      >
>      > Tested several dozens restarts of our proprietary application without
>      > apparent problem.
> 
>     Series applied, thanks.
> 
> 


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [dpdk-stable] [PATCH v5 3/3] kni: fix kernel deadlock when using mlx devices
  2021-04-23  8:59             ` Ferruh Yigit
@ 2021-04-23 12:43               ` Igor Ryzhov
  2021-04-23 12:58                 ` Igor Ryzhov
  0 siblings, 1 reply; 42+ messages in thread
From: Igor Ryzhov @ 2021-04-23 12:43 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: Thomas Monjalon, Stephen Hemminger, Elad Nachman, dev,
	dpdk stable, Dan Gora

Hi Ferruh,

Thanks. I think it would be great to make this configurable, and maybe even
make shutdown synchronous by default to preserve the old behavior.

I would be grateful if you could spend time on the work and I am ready to
review it.

Igor

On Fri, Apr 23, 2021 at 11:59 AM Ferruh Yigit <ferruh.yigit@intel.com>
wrote:

> On 4/23/2021 9:41 AM, Igor Ryzhov wrote:
> > This patch changes the behavior for KNI interface shutdown.
> > Previously we would receive a real response from the driver, now we
> > always receive success.
> > I think this should be reflected in the docs/release notes.
> >
>
> Hi Igor,
>
> Make sense, I can add it.
>
> Meanwhile do you think has a benefit to make shutdown behavior
> configurable?
> Async/Sync shutdown based on module param?
>
> > Igor
> >
> > On Wed, Apr 21, 2021 at 2:07 AM Thomas Monjalon <thomas@monjalon.net
> > <mailto:thomas@monjalon.net>> wrote:
> >
> >     12/04/2021 16:35, Elad Nachman:
> >      > Hi,
> >      >
> >      > The new patch is fine by me.
> >      >
> >      > Tested several dozens restarts of our proprietary application
> without
> >      > apparent problem.
> >
> >     Series applied, thanks.
> >
> >
>
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [dpdk-dev] [dpdk-stable] [PATCH v5 3/3] kni: fix kernel deadlock when using mlx devices
  2021-04-23 12:43               ` Igor Ryzhov
@ 2021-04-23 12:58                 ` Igor Ryzhov
  0 siblings, 0 replies; 42+ messages in thread
From: Igor Ryzhov @ 2021-04-23 12:58 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: Thomas Monjalon, Stephen Hemminger, Elad Nachman, dev,
	dpdk stable, Dan Gora

Sorry I remembered the problem with the deadlock.

We can't just make the shutdown command synchronous, because
we can't release the rtnl_lock anyway. So regardless of the process
mode (sync/async), we have to preserve the lock when processing
the shutdown. It looks like two different settings...

On Fri, Apr 23, 2021 at 3:43 PM Igor Ryzhov <iryzhov@nfware.com> wrote:

> Hi Ferruh,
>
> Thanks. I think it would be great to make this configurable, and maybe even
> make shutdown synchronous by default to preserve the old behavior.
>
> I would be grateful if you could spend time on the work and I am ready to
> review it.
>
> Igor
>
> On Fri, Apr 23, 2021 at 11:59 AM Ferruh Yigit <ferruh.yigit@intel.com>
> wrote:
>
>> On 4/23/2021 9:41 AM, Igor Ryzhov wrote:
>> > This patch changes the behavior for KNI interface shutdown.
>> > Previously we would receive a real response from the driver, now we
>> > always receive success.
>> > I think this should be reflected in the docs/release notes.
>> >
>>
>> Hi Igor,
>>
>> Make sense, I can add it.
>>
>> Meanwhile do you think has a benefit to make shutdown behavior
>> configurable?
>> Async/Sync shutdown based on module param?
>>
>> > Igor
>> >
>> > On Wed, Apr 21, 2021 at 2:07 AM Thomas Monjalon <thomas@monjalon.net
>> > <mailto:thomas@monjalon.net>> wrote:
>> >
>> >     12/04/2021 16:35, Elad Nachman:
>> >      > Hi,
>> >      >
>> >      > The new patch is fine by me.
>> >      >
>> >      > Tested several dozens restarts of our proprietary application
>> without
>> >      > apparent problem.
>> >
>> >     Series applied, thanks.
>> >
>> >
>>
>>

^ permalink raw reply	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2021-04-23 12:58 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-26 14:46 [dpdk-dev] [PATCH] kni: fix rtnl deadlocks and race conditions Elad Nachman
2021-02-19 18:41 ` Ferruh Yigit
2021-02-21  8:03   ` Elad Nachman
2021-02-22 15:58     ` Ferruh Yigit
2021-02-23 12:05 ` [dpdk-dev] [PATCH V2] kni: fix rtnl deadlocks and race conditions v2 Elad Nachman
2021-02-23 12:53   ` Ferruh Yigit
2021-02-23 13:44 ` [dpdk-dev] [PATCH 1/2] kni: fix rtnl deadlocks and race conditions v3 Elad Nachman
2021-02-23 13:45 ` [dpdk-dev] [PATCH 2/2] " Elad Nachman
2021-02-24 12:49   ` Igor Ryzhov
2021-02-24 13:33     ` Elad Nachman
2021-02-24 14:04       ` Igor Ryzhov
2021-02-24 14:06         ` Elad Nachman
2021-02-24 14:41           ` Igor Ryzhov
2021-02-24 14:56             ` Elad Nachman
2021-02-24 15:18               ` Igor Ryzhov
     [not found]                 ` <CACXF7qkhkzFc-=v=iiBzh2V7rLjk1U34VUfPbNrnYJND_0TKHQ@mail.gmail.com>
2021-02-24 16:31                   ` Igor Ryzhov
2021-02-24 15:54     ` Stephen Hemminger
2021-02-25 14:32 ` [dpdk-dev] [PATCH 1/2] kni: fix kernel deadlock when using mlx devices Elad Nachman
2021-02-25 14:32   ` [dpdk-dev] [PATCH 2/2] kni: fix rtnl deadlocks and race conditions v4 Elad Nachman
2021-02-25 21:01     ` Igor Ryzhov
2021-02-26 15:48       ` Stephen Hemminger
2021-02-26 17:43         ` Elad Nachman
2021-03-01  8:10           ` Igor Ryzhov
2021-03-01 16:38             ` Stephen Hemminger
2021-03-15 16:58               ` Ferruh Yigit
2021-03-01 20:27             ` Dan Gora
2021-03-01 21:26               ` Dan Gora
2021-03-02 16:44                 ` Elad Nachman
2021-03-15 17:17     ` Ferruh Yigit
2021-03-16 18:35       ` Elad Nachman
2021-03-16 18:42         ` Ferruh Yigit
2021-03-15 17:17   ` [dpdk-dev] [PATCH 1/2] kni: fix kernel deadlock when using mlx devices Ferruh Yigit
2021-03-29 14:36 ` [dpdk-dev] [PATCH v5 1/3] kni: refactor user request processing Ferruh Yigit
2021-03-29 14:36   ` [dpdk-dev] [PATCH v5 2/3] kni: support async user request Ferruh Yigit
2021-03-29 14:36   ` [dpdk-dev] [PATCH v5 3/3] kni: fix kernel deadlock when using mlx devices Ferruh Yigit
2021-04-09 14:56     ` [dpdk-dev] [dpdk-stable] " Ferruh Yigit
2021-04-12 14:35       ` Elad Nachman
2021-04-20 23:07         ` Thomas Monjalon
2021-04-23  8:41           ` Igor Ryzhov
2021-04-23  8:59             ` Ferruh Yigit
2021-04-23 12:43               ` Igor Ryzhov
2021-04-23 12:58                 ` Igor Ryzhov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).