DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function
@ 2018-02-15  8:47 Nelio Laranjeiro
  2018-02-15  8:47 ` [dpdk-dev] [PATCH 2/2] net/mlx5: fix link status to use wait to complete Nelio Laranjeiro
                   ` (6 more replies)
  0 siblings, 7 replies; 18+ messages in thread
From: Nelio Laranjeiro @ 2018-02-15  8:47 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil, Yongseok Koh

Use a function to retrieve the version of the kernel.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_ethdev.c | 13 +++++--------
 drivers/net/mlx5/mlx5_utils.h  | 26 ++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index b73cb53df..0ce9f438a 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -18,11 +18,9 @@
 #include <net/if.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
-#include <sys/utsname.h>
 #include <netinet/in.h>
 #include <linux/ethtool.h>
 #include <linux/sockios.h>
-#include <linux/version.h>
 #include <fcntl.h>
 #include <stdalign.h>
 #include <sys/un.h>
@@ -715,15 +713,14 @@ int
 priv_link_update(struct priv *priv, int wait_to_complete)
 {
 	struct rte_eth_dev *dev = priv->dev;
-	struct utsname utsname;
-	int ver[3];
 	int ret;
 	struct rte_eth_link dev_link = dev->data->dev_link;
+	unsigned int current_version = mlx5_kernel_version();
+	int use_new_api = current_version > 0 ?
+		current_version >= KERNEL_VERSION(4, 9, 0) :
+		0;
 
-	if (uname(&utsname) == -1 ||
-	    sscanf(utsname.release, "%d.%d.%d",
-		   &ver[0], &ver[1], &ver[2]) != 3 ||
-	    KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0))
+	if (use_new_api)
 		ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete);
 	else
 		ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete);
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index e1bfb9cd9..bd179ed21 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -12,6 +12,8 @@
 #include <limits.h>
 #include <assert.h>
 #include <errno.h>
+#include <sys/utsname.h>
+#include <linux/version.h>
 
 #include "mlx5_defs.h"
 
@@ -159,4 +161,28 @@ log2above(unsigned int v)
 	return l + r;
 }
 
+/**
+ * retrieve the current kernel version.
+ *
+ * @return
+ *   the current kernel version or negative errno on error.
+ */
+static inline unsigned int
+mlx5_kernel_version(void)
+{
+	struct utsname utsname;
+	int ver[3];
+	int ret;
+
+	ret = uname(&utsname);
+	if (ret == -1)
+		goto error;
+	ret = sscanf(utsname.release, "%d.%d.%d", &ver[0], &ver[1], &ver[2]);
+	if (ret != 3)
+		goto error;
+	return KERNEL_VERSION(ver[0], ver[1], ver[2]);
+error:
+	return -errno;
+}
+
 #endif /* RTE_PMD_MLX5_UTILS_H_ */
-- 
2.11.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [dpdk-dev] [PATCH 2/2] net/mlx5: fix link status to use wait to complete
  2018-02-15  8:47 [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function Nelio Laranjeiro
@ 2018-02-15  8:47 ` Nelio Laranjeiro
  2018-02-16 10:49   ` Adrien Mazarguil
  2018-02-16 10:48 ` [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function Adrien Mazarguil
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Nelio Laranjeiro @ 2018-02-15  8:47 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil, Yongseok Koh

Wait to complete is present to let the application get a correct status
when it requires it, it should not be ignored.

Fixes: cb8faed7dde8 ("mlx5: support link status update")
Cc: adrien.mazarguil@6wind.com

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_ethdev.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 0ce9f438a..112cc2a40 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -502,11 +502,12 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
  *
  * @param dev
  *   Pointer to Ethernet device structure.
- * @param wait_to_complete
- *   Wait for request completion (ignored).
+ *
+ * @return
+ *   0 on success, -1 otherwise.
  */
 static int
-mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
+mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct ethtool_cmd edata = {
@@ -518,7 +519,6 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
 
 	/* priv_lock() is not taken to allow concurrent calls. */
 
-	(void)wait_to_complete;
 	if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
 		WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
 		return -1;
@@ -568,11 +568,9 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
  *
  * @param dev
  *   Pointer to Ethernet device structure.
- * @param wait_to_complete
- *   Wait for request completion (ignored).
  */
 static int
-mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
+mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
@@ -580,7 +578,6 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
 	struct rte_eth_link dev_link;
 	uint64_t sc;
 
-	(void)wait_to_complete;
 	if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
 		WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
 		return -1;
@@ -708,6 +705,9 @@ priv_link_stop(struct priv *priv)
  *   Pointer to private structure.
  * @param wait_to_complete
  *   Wait for request completion (ignored).
+ *
+ * @return
+ *   0 on success, negative value otherwise.
  */
 int
 priv_link_update(struct priv *priv, int wait_to_complete)
@@ -720,10 +720,16 @@ priv_link_update(struct priv *priv, int wait_to_complete)
 		current_version >= KERNEL_VERSION(4, 9, 0) :
 		0;
 
-	if (use_new_api)
-		ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete);
-	else
-		ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete);
+	do {
+		if (use_new_api)
+			ret = mlx5_link_update_unlocked_gset(dev);
+		else
+			ret = mlx5_link_update_unlocked_gs(dev);
+		if (ret == -1)
+			return -EAGAIN;
+	} while(wait_to_complete && !ret);
+	if (ret == -EAGAIN)
+		return ret;
 	/* If lsc interrupt is disabled, should always be ready for traffic. */
 	if (!dev->data->dev_conf.intr_conf.lsc) {
 		priv_link_start(priv);
@@ -755,10 +761,11 @@ int
 priv_force_link_status_change(struct priv *priv, int status)
 {
 	int try = 0;
+	int ret = 0;
 
 	while (try < MLX5_MAX_LINK_QUERY_ATTEMPTS) {
-		priv_link_update(priv, 0);
-		if (priv->dev->data->dev_link.link_status == status)
+		ret = priv_link_update(priv, 0);
+		if (!ret && priv->dev->data->dev_link.link_status == status)
 			return 0;
 		try++;
 		sleep(1);
-- 
2.11.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function
  2018-02-15  8:47 [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function Nelio Laranjeiro
  2018-02-15  8:47 ` [dpdk-dev] [PATCH 2/2] net/mlx5: fix link status to use wait to complete Nelio Laranjeiro
@ 2018-02-16 10:48 ` Adrien Mazarguil
  2018-02-16 18:03 ` Stephen Hemminger
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Adrien Mazarguil @ 2018-02-16 10:48 UTC (permalink / raw)
  To: Nelio Laranjeiro; +Cc: dev, Yongseok Koh

On Thu, Feb 15, 2018 at 09:47:27AM +0100, Nelio Laranjeiro wrote:
> Use a function to retrieve the version of the kernel.
> 
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>

A couple of nits, please see below.

> ---
>  drivers/net/mlx5/mlx5_ethdev.c | 13 +++++--------
>  drivers/net/mlx5/mlx5_utils.h  | 26 ++++++++++++++++++++++++++
>  2 files changed, 31 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> index b73cb53df..0ce9f438a 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -18,11 +18,9 @@
>  #include <net/if.h>
>  #include <sys/ioctl.h>
>  #include <sys/socket.h>
> -#include <sys/utsname.h>
>  #include <netinet/in.h>
>  #include <linux/ethtool.h>
>  #include <linux/sockios.h>
> -#include <linux/version.h>

Looks like this one should remain; this file still uses KERNEL_VERSION().

>  #include <fcntl.h>
>  #include <stdalign.h>
>  #include <sys/un.h>
> @@ -715,15 +713,14 @@ int
>  priv_link_update(struct priv *priv, int wait_to_complete)
>  {
>  	struct rte_eth_dev *dev = priv->dev;
> -	struct utsname utsname;
> -	int ver[3];
>  	int ret;
>  	struct rte_eth_link dev_link = dev->data->dev_link;
> +	unsigned int current_version = mlx5_kernel_version();
> +	int use_new_api = current_version > 0 ?
> +		current_version >= KERNEL_VERSION(4, 9, 0) :
> +		0;
>  
> -	if (uname(&utsname) == -1 ||
> -	    sscanf(utsname.release, "%d.%d.%d",
> -		   &ver[0], &ver[1], &ver[2]) != 3 ||
> -	    KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0))
> +	if (use_new_api)
>  		ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete);
>  	else
>  		ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete);
> diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
> index e1bfb9cd9..bd179ed21 100644
> --- a/drivers/net/mlx5/mlx5_utils.h
> +++ b/drivers/net/mlx5/mlx5_utils.h
> @@ -12,6 +12,8 @@
>  #include <limits.h>
>  #include <assert.h>
>  #include <errno.h>
> +#include <sys/utsname.h>
> +#include <linux/version.h>
>  
>  #include "mlx5_defs.h"
>  
> @@ -159,4 +161,28 @@ log2above(unsigned int v)
>  	return l + r;
>  }
>  
> +/**
> + * retrieve the current kernel version.
> + *
> + * @return
> + *   the current kernel version or negative errno on error.
> + */

Missing caps!

> +static inline unsigned int
> +mlx5_kernel_version(void)
> +{
> +	struct utsname utsname;
> +	int ver[3];
> +	int ret;
> +
> +	ret = uname(&utsname);
> +	if (ret == -1)
> +		goto error;
> +	ret = sscanf(utsname.release, "%d.%d.%d", &ver[0], &ver[1], &ver[2]);
> +	if (ret != 3)
> +		goto error;
> +	return KERNEL_VERSION(ver[0], ver[1], ver[2]);
> +error:
> +	return -errno;
> +}
> +
>  #endif /* RTE_PMD_MLX5_UTILS_H_ */
> -- 
> 2.11.0
> 

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/mlx5: fix link status to use wait to complete
  2018-02-15  8:47 ` [dpdk-dev] [PATCH 2/2] net/mlx5: fix link status to use wait to complete Nelio Laranjeiro
@ 2018-02-16 10:49   ` Adrien Mazarguil
  2018-02-16 12:08     ` Nélio Laranjeiro
  0 siblings, 1 reply; 18+ messages in thread
From: Adrien Mazarguil @ 2018-02-16 10:49 UTC (permalink / raw)
  To: Nelio Laranjeiro; +Cc: dev, Yongseok Koh

On Thu, Feb 15, 2018 at 09:47:28AM +0100, Nelio Laranjeiro wrote:
> Wait to complete is present to let the application get a correct status
> when it requires it, it should not be ignored.
> 
> Fixes: cb8faed7dde8 ("mlx5: support link status update")
> Cc: adrien.mazarguil@6wind.com
> 
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>

Several comments, please see below.

> ---
>  drivers/net/mlx5/mlx5_ethdev.c | 35 +++++++++++++++++++++--------------
>  1 file changed, 21 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> index 0ce9f438a..112cc2a40 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -502,11 +502,12 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
>   *
>   * @param dev
>   *   Pointer to Ethernet device structure.
> - * @param wait_to_complete
> - *   Wait for request completion (ignored).
> + *
> + * @return
> + *   0 on success, -1 otherwise.

See below regarding the return value.

>   */
>  static int
> -mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
> +mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev)
>  {
>  	struct priv *priv = dev->data->dev_private;
>  	struct ethtool_cmd edata = {
> @@ -518,7 +519,6 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
>  
>  	/* priv_lock() is not taken to allow concurrent calls. */
>  
> -	(void)wait_to_complete;
>  	if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
>  		WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
>  		return -1;
> @@ -568,11 +568,9 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
>   *
>   * @param dev
>   *   Pointer to Ethernet device structure.
> - * @param wait_to_complete
> - *   Wait for request completion (ignored).

You should use this opportunity to document its return value as well.

>   */
>  static int
> -mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
> +mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev)
>  {
>  	struct priv *priv = dev->data->dev_private;
>  	struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
> @@ -580,7 +578,6 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
>  	struct rte_eth_link dev_link;
>  	uint64_t sc;
>  
> -	(void)wait_to_complete;
>  	if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
>  		WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
>  		return -1;
> @@ -708,6 +705,9 @@ priv_link_stop(struct priv *priv)
>   *   Pointer to private structure.
>   * @param wait_to_complete
>   *   Wait for request completion (ignored).
> + *
> + * @return
> + *   0 on success, negative value otherwise.

How about "0 on success, negative errno value otherwise" according to
subsequent comments.

>   */
>  int
>  priv_link_update(struct priv *priv, int wait_to_complete)
> @@ -720,10 +720,16 @@ priv_link_update(struct priv *priv, int wait_to_complete)
>  		current_version >= KERNEL_VERSION(4, 9, 0) :
>  		0;
>  
> -	if (use_new_api)
> -		ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete);
> -	else
> -		ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete);
> +	do {
> +		if (use_new_api)
> +			ret = mlx5_link_update_unlocked_gset(dev);
> +		else
> +			ret = mlx5_link_update_unlocked_gs(dev);
> +		if (ret == -1)
> +			return -EAGAIN;

Like for system calls, EAGAIN isn't an acceptable error in case
wait_to_complete (blocking mode) is requested. It must be some other reason.

This check should be replaced with my next suggestion.

> +	} while(wait_to_complete && !ret);

Missing space after "while".

One issue is when wait_to_complete is enabled and link status never settles
down due to bad cabling or buggy SFP. I think this function should give up
and return an error after a while (not -EAGAIN in this case but -EIO, -EBUSY
or even -EINTR to reflect the call had to be interrupted due to HW trouble).

You could use MLX5_MAX_LINK_QUERY_ATTEMPTS for that, e.g.:

 int attempt = MLX5_MAX_LINK_QUERY_ATTEMPTS;
 [...]
 while (1) {
    [...]
    if (!wait_to_complete || ret != -EAGAIN || !attempt--)
        break;
    sleep(1);  
 }

> +	if (ret == -EAGAIN)
> +		return ret;

Since neither function may return anything other than -1 in case of error at
the moment and since their wait_to_complete argument is being removed, I
suggest to make them properly non-blocking by default (i.e. O_NONBLOCK on
their ioctl() FD), then return -errno in case of error on intermediate
system calls, then the above check will make sense.

>  	/* If lsc interrupt is disabled, should always be ready for traffic. */
>  	if (!dev->data->dev_conf.intr_conf.lsc) {
>  		priv_link_start(priv);
> @@ -755,10 +761,11 @@ int
>  priv_force_link_status_change(struct priv *priv, int status)
>  {
>  	int try = 0;
> +	int ret = 0;
>  
>  	while (try < MLX5_MAX_LINK_QUERY_ATTEMPTS) {
> -		priv_link_update(priv, 0);
> -		if (priv->dev->data->dev_link.link_status == status)
> +		ret = priv_link_update(priv, 0);
> +		if (!ret && priv->dev->data->dev_link.link_status == status)
>  			return 0;
>  		try++;
>  		sleep(1);

I think this function could be removed in the same patch (with e313ef4c2fe8
"net/mlx5: fix link state on device start" partially reverted) since a call
to priv_link_update(priv, 1) would now result in the same behavior.

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH 2/2] net/mlx5: fix link status to use wait to complete
  2018-02-16 10:49   ` Adrien Mazarguil
@ 2018-02-16 12:08     ` Nélio Laranjeiro
  0 siblings, 0 replies; 18+ messages in thread
From: Nélio Laranjeiro @ 2018-02-16 12:08 UTC (permalink / raw)
  To: Adrien Mazarguil; +Cc: dev, Yongseok Koh

On Fri, Feb 16, 2018 at 11:49:00AM +0100, Adrien Mazarguil wrote:
> On Thu, Feb 15, 2018 at 09:47:28AM +0100, Nelio Laranjeiro wrote:
> > Wait to complete is present to let the application get a correct status
> > when it requires it, it should not be ignored.
> > 
> > Fixes: cb8faed7dde8 ("mlx5: support link status update")
> > Cc: adrien.mazarguil@6wind.com
> > 
> > Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> 
> Several comments, please see below.
> 
> > ---
> >  drivers/net/mlx5/mlx5_ethdev.c | 35 +++++++++++++++++++++--------------
> >  1 file changed, 21 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> > index 0ce9f438a..112cc2a40 100644
> > --- a/drivers/net/mlx5/mlx5_ethdev.c
> > +++ b/drivers/net/mlx5/mlx5_ethdev.c
> > @@ -502,11 +502,12 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
> >   *
> >   * @param dev
> >   *   Pointer to Ethernet device structure.
> > - * @param wait_to_complete
> > - *   Wait for request completion (ignored).
> > + *
> > + * @return
> > + *   0 on success, -1 otherwise.
> 
> See below regarding the return value.
> 
> >   */
> >  static int
> > -mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
> > +mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev)
> >  {
> >  	struct priv *priv = dev->data->dev_private;
> >  	struct ethtool_cmd edata = {
> > @@ -518,7 +519,6 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
> >  
> >  	/* priv_lock() is not taken to allow concurrent calls. */
> >  
> > -	(void)wait_to_complete;
> >  	if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
> >  		WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
> >  		return -1;
> > @@ -568,11 +568,9 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
> >   *
> >   * @param dev
> >   *   Pointer to Ethernet device structure.
> > - * @param wait_to_complete
> > - *   Wait for request completion (ignored).
> 
> You should use this opportunity to document its return value as well.
> 
> >   */
> >  static int
> > -mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
> > +mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev)
> >  {
> >  	struct priv *priv = dev->data->dev_private;
> >  	struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
> > @@ -580,7 +578,6 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
> >  	struct rte_eth_link dev_link;
> >  	uint64_t sc;
> >  
> > -	(void)wait_to_complete;
> >  	if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
> >  		WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
> >  		return -1;
> > @@ -708,6 +705,9 @@ priv_link_stop(struct priv *priv)
> >   *   Pointer to private structure.
> >   * @param wait_to_complete
> >   *   Wait for request completion (ignored).
> > + *
> > + * @return
> > + *   0 on success, negative value otherwise.
> 
> How about "0 on success, negative errno value otherwise" according to
> subsequent comments.
> 
> >   */
> >  int
> >  priv_link_update(struct priv *priv, int wait_to_complete)
> > @@ -720,10 +720,16 @@ priv_link_update(struct priv *priv, int wait_to_complete)
> >  		current_version >= KERNEL_VERSION(4, 9, 0) :
> >  		0;
> >  
> > -	if (use_new_api)
> > -		ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete);
> > -	else
> > -		ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete);
> > +	do {
> > +		if (use_new_api)
> > +			ret = mlx5_link_update_unlocked_gset(dev);
> > +		else
> > +			ret = mlx5_link_update_unlocked_gs(dev);
> > +		if (ret == -1)
> > +			return -EAGAIN;
> 
> Like for system calls, EAGAIN isn't an acceptable error in case
> wait_to_complete (blocking mode) is requested. It must be some other reason.
> 
> This check should be replaced with my next suggestion.
> 
> > +	} while(wait_to_complete && !ret);
> 
> Missing space after "while".
> 
> One issue is when wait_to_complete is enabled and link status never settles
> down due to bad cabling or buggy SFP. I think this function should give up
> and return an error after a while (not -EAGAIN in this case but -EIO, -EBUSY
> or even -EINTR to reflect the call had to be interrupted due to HW trouble).
> 
> You could use MLX5_MAX_LINK_QUERY_ATTEMPTS for that, e.g.:
> 
>  int attempt = MLX5_MAX_LINK_QUERY_ATTEMPTS;
>  [...]
>  while (1) {
>     [...]
>     if (!wait_to_complete || ret != -EAGAIN || !attempt--)
>         break;
>     sleep(1);  
>  }
> 
> > +	if (ret == -EAGAIN)
> > +		return ret;
> 
> Since neither function may return anything other than -1 in case of error at
> the moment and since their wait_to_complete argument is being removed, I
> suggest to make them properly non-blocking by default (i.e. O_NONBLOCK on
> their ioctl() FD), then return -errno in case of error on intermediate
> system calls, then the above check will make sense.
> 
> >  	/* If lsc interrupt is disabled, should always be ready for traffic. */
> >  	if (!dev->data->dev_conf.intr_conf.lsc) {
> >  		priv_link_start(priv);
> > @@ -755,10 +761,11 @@ int
> >  priv_force_link_status_change(struct priv *priv, int status)
> >  {
> >  	int try = 0;
> > +	int ret = 0;
> >  
> >  	while (try < MLX5_MAX_LINK_QUERY_ATTEMPTS) {
> > -		priv_link_update(priv, 0);
> > -		if (priv->dev->data->dev_link.link_status == status)
> > +		ret = priv_link_update(priv, 0);
> > +		if (!ret && priv->dev->data->dev_link.link_status == status)
> >  			return 0;
> >  		try++;
> >  		sleep(1);
> 
> I think this function could be removed in the same patch (with e313ef4c2fe8
> "net/mlx5: fix link state on device start" partially reverted) since a call
> to priv_link_update(priv, 1) would now result in the same behavior.
 
Agreed, I'll re-work it in a v2.

Thanks,

-- 
Nélio Laranjeiro
6WIND

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function
  2018-02-15  8:47 [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function Nelio Laranjeiro
  2018-02-15  8:47 ` [dpdk-dev] [PATCH 2/2] net/mlx5: fix link status to use wait to complete Nelio Laranjeiro
  2018-02-16 10:48 ` [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function Adrien Mazarguil
@ 2018-02-16 18:03 ` Stephen Hemminger
  2018-02-19  7:42   ` Nélio Laranjeiro
  2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: cleanup link status Nelio Laranjeiro
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Stephen Hemminger @ 2018-02-16 18:03 UTC (permalink / raw)
  To: Nelio Laranjeiro; +Cc: dev, Adrien Mazarguil, Yongseok Koh

On Thu, 15 Feb 2018 09:47:27 +0100
Nelio Laranjeiro <nelio.laranjeiro@6wind.com> wrote:

> Use a function to retrieve the version of the kernel.
> 
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>

This type of logic will run into problems with Enterprise and vendor kernels.
What about users using older kernels with OFED? What about case where kernel
MLX drivers have been backported?

In general, it is better to adapt to the environment the code is running
in. Try the new feature, if it fails then log a message (at info level)
and fallback to the other strategy.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function
  2018-02-16 18:03 ` Stephen Hemminger
@ 2018-02-19  7:42   ` Nélio Laranjeiro
  0 siblings, 0 replies; 18+ messages in thread
From: Nélio Laranjeiro @ 2018-02-19  7:42 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Adrien Mazarguil, Yongseok Koh

On Fri, Feb 16, 2018 at 10:03:04AM -0800, Stephen Hemminger wrote:
> On Thu, 15 Feb 2018 09:47:27 +0100
> Nelio Laranjeiro <nelio.laranjeiro@6wind.com> wrote:
> 
> > Use a function to retrieve the version of the kernel.
> > 
> > Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> 
> This type of logic will run into problems with Enterprise and vendor kernels.
> What about users using older kernels with OFED? What about case where kernel
> MLX drivers have been backported?
> 
> In general, it is better to adapt to the environment the code is running
> in. Try the new feature, if it fails then log a message (at info level)
> and fallback to the other strategy.

We have already discussed it, this new API in the kernel is buggee
between v4.5 to v4.9.  As there is now way in between those version to
detect if the API is the buggee one or not, we don't have any other
solution.

It is better to not use at all this new API than using a buggee one.

-- 
Nélio Laranjeiro
6WIND

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [dpdk-dev] [PATCH v2 0/3] net/mlx5: cleanup link status
  2018-02-15  8:47 [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function Nelio Laranjeiro
                   ` (2 preceding siblings ...)
  2018-02-16 18:03 ` Stephen Hemminger
@ 2018-03-12 13:43 ` Nelio Laranjeiro
  2018-03-19 19:15   ` Shahaf Shuler
  2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 1/3] net/mlx5: remove kernel version check Nelio Laranjeiro
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Nelio Laranjeiro @ 2018-03-12 13:43 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil, Yongseok Koh

This series applies on top of [1] and cleans up the DPDK API implementation
for the link status.

[1] https://dpdk.org/dev/patchwork/patch/35653/

Changes in v2:
- Removes kernel version verification, the bug it tried to detected was fixed
  several commit after in the PMD.  Implementation in mlx5 kernel driver is
  only available since v4.9.
- Removes the alarm handler as this can be worked around by not acknowledging
  the event until the link becomes stable.
- Clean-up the API implementation by letting the application handle the
  interrupt and decide by itself to start/stop the device.

Nelio Laranjeiro (3):
  net/mlx5: remove kernel version check
  net/mlx5: fix link status behavior
  net/mlx5: fix link status to use wait to complete

 drivers/net/mlx5/mlx5.c         |   2 +-
 drivers/net/mlx5/mlx5.h         |   1 -
 drivers/net/mlx5/mlx5_defs.h    |   4 +-
 drivers/net/mlx5/mlx5_ethdev.c  | 244 +++++++++-------------------------------
 drivers/net/mlx5/mlx5_trigger.c |  15 ++-
 5 files changed, 69 insertions(+), 197 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [dpdk-dev] [PATCH v2 1/3] net/mlx5: remove kernel version check
  2018-02-15  8:47 [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function Nelio Laranjeiro
                   ` (3 preceding siblings ...)
  2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: cleanup link status Nelio Laranjeiro
@ 2018-03-12 13:43 ` Nelio Laranjeiro
  2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 2/3] net/mlx5: fix link status behavior Nelio Laranjeiro
  2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 3/3] net/mlx5: fix link status to use wait to complete Nelio Laranjeiro
  6 siblings, 0 replies; 18+ messages in thread
From: Nelio Laranjeiro @ 2018-03-12 13:43 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil, Yongseok Koh

Kernel version check was introduced in
commit 3a49ffe38a95 ("net/mlx5: fix link status query")
due to a bug fixed by
commit ef09a7fc7620 ("net/mlx5: fix inconsistent link status query")

This patch restore the previous behavior as described in Linux API.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx5/mlx5_ethdev.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index dd46124ec..26f13fb1b 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -18,11 +18,9 @@
 #include <net/if.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
-#include <sys/utsname.h>
 #include <netinet/in.h>
 #include <linux/ethtool.h>
 #include <linux/sockios.h>
-#include <linux/version.h>
 #include <fcntl.h>
 #include <stdalign.h>
 #include <sys/un.h>
@@ -734,20 +732,15 @@ mlx5_force_link_status_change(struct rte_eth_dev *dev, int status)
 int
 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused)
 {
-	struct utsname utsname;
-	int ver[3];
 	int ret;
 	struct rte_eth_link dev_link = dev->data->dev_link;
 
-	if (uname(&utsname) == -1 ||
-	    sscanf(utsname.release, "%d.%d.%d",
-		   &ver[0], &ver[1], &ver[2]) != 3 ||
-	    KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0))
-		ret = mlx5_link_update_unlocked_gset(dev);
-	else
+	ret = mlx5_link_update_unlocked_gset(dev);
+	if (ret) {
 		ret = mlx5_link_update_unlocked_gs(dev);
-	if (ret)
-		return ret;
+		if (ret)
+			return ret;
+	}
 	/* If lsc interrupt is disabled, should always be ready for traffic. */
 	if (!dev->data->dev_conf.intr_conf.lsc) {
 		mlx5_link_start(dev);
-- 
2.11.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [dpdk-dev] [PATCH v2 2/3] net/mlx5: fix link status behavior
  2018-02-15  8:47 [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function Nelio Laranjeiro
                   ` (4 preceding siblings ...)
  2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 1/3] net/mlx5: remove kernel version check Nelio Laranjeiro
@ 2018-03-12 13:43 ` Nelio Laranjeiro
  2018-03-13 21:54   ` Yongseok Koh
  2018-03-15 18:08   ` Yongseok Koh
  2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 3/3] net/mlx5: fix link status to use wait to complete Nelio Laranjeiro
  6 siblings, 2 replies; 18+ messages in thread
From: Nelio Laranjeiro @ 2018-03-12 13:43 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil, Yongseok Koh, shahafs, stable

This behavior is mixed between what should be handled by the application
and what is under PMD responsibility.

According to DPDK API:
- link_update() should only query the link status [1]
- link_set_{up,down}() should only set the link to the according status [1]
- dev_{start,stop}() should enable/disable traffic reception/emission [2]

On this PMD, the link status is retrieved from the net device associated
owned by the Linux Kernel, it does not means that even when this interface
is down, the PMD cannot send/receive traffic from the NIC those two
information are unrelated, until the physical port is active and has a
link, the PMD can receive/send traffic on the wire.

According to DPDK API, calling the rte_eth_dev_start() even when the Linux
interface link is down is then possible and allowed, as the traffic will
flow between the DPDK application and the Physical port.

This also means that a synchronisation between the Linux interface and the
DPDK application remains under the DPDK application responsibility.

To handle such synchronisation the application should behave as the
following scheme, to start:

 rte_eth_get_link(port_id, &link);
 if (link.link_status == ETH_DOWN)
	rte_eth_dev_set_link_up(port_id);
 rte_eth_dev_start(port_id);

Taking in account the possible returned values fro each function.

and to stop:

 rte_eth_dev_stop(port_id);
 rte_eth_dev_set_link_down(port_id);

The application should also set the LSC interrupt callbacks to catch and
behave accordingly when the administrator set the Linux device down/up.
The same callbacks are called when the link on the medium falls/raise.

Fixes: c7bf62255edf ("net/mlx5: fix handling link status event")
Fixes: e313ef4c2fe8 ("net/mlx5: fix link state on device start")
Cc: yskoh@mellanox.com
Cc: shahafs@mellanox.com
Cc: stable@dpdk.org

[1] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev_core.h
[2] https://dpdk.org/browse/dpdk/tree/lib/librte_ether/rte_ethdev.h#n1677

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx5/mlx5.c         |  2 +-
 drivers/net/mlx5/mlx5_ethdev.c  | 92 +----------------------------------------
 drivers/net/mlx5/mlx5_trigger.c | 15 ++++---
 3 files changed, 12 insertions(+), 97 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 4300bafb7..35a018758 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -962,7 +962,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		/* Bring Ethernet device up. */
 		DRV_LOG(DEBUG, "port %u forcing Ethernet interface up",
 			eth_dev->data->port_id);
-		mlx5_set_flags(eth_dev, ~IFF_UP, IFF_UP);
+		mlx5_set_link_up(eth_dev);
 		/* Store device configuration on private structure. */
 		priv->config = config;
 		continue;
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 26f13fb1b..10ba27c79 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -645,80 +645,6 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev)
 }
 
 /**
- * Enable receiving and transmitting traffic.
- *
- * @param dev
- *   Pointer to Ethernet device.
- */
-static void
-mlx5_link_start(struct rte_eth_dev *dev)
-{
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-
-	dev->tx_pkt_burst = mlx5_select_tx_function(dev);
-	dev->rx_pkt_burst = mlx5_select_rx_function(dev);
-	ret = mlx5_traffic_enable(dev);
-	if (ret) {
-		DRV_LOG(ERR,
-			"port %u error occurred while configuring control"
-			" flows: %s",
-			dev->data->port_id, strerror(rte_errno));
-		return;
-	}
-	ret = mlx5_flow_start(dev, &priv->flows);
-	if (ret)
-		DRV_LOG(ERR,
-			"port %u error occurred while configuring flows: %s",
-			dev->data->port_id, strerror(rte_errno));
-}
-
-/**
- * Disable receiving and transmitting traffic.
- *
- * @param dev
- *   Pointer to Ethernet device.
- */
-static void
-mlx5_link_stop(struct rte_eth_dev *dev)
-{
-	struct priv *priv = dev->data->dev_private;
-
-	mlx5_flow_stop(dev, &priv->flows);
-	mlx5_traffic_disable(dev);
-	dev->rx_pkt_burst = removed_rx_burst;
-	dev->tx_pkt_burst = removed_tx_burst;
-}
-
-/**
- * Querying the link status till it changes to the desired state.
- * Number of query attempts is bounded by MLX5_MAX_LINK_QUERY_ATTEMPTS.
- *
- * @param dev
- *   Pointer to Ethernet device.
- * @param status
- *   Link desired status.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_force_link_status_change(struct rte_eth_dev *dev, int status)
-{
-	int try = 0;
-
-	while (try < MLX5_MAX_LINK_QUERY_ATTEMPTS) {
-		mlx5_link_update(dev, 0);
-		if (dev->data->dev_link.link_status == status)
-			return 0;
-		try++;
-		sleep(1);
-	}
-	rte_errno = EAGAIN;
-	return -rte_errno;
-}
-
-/**
  * DPDK callback to retrieve physical link information.
  *
  * @param dev
@@ -733,26 +659,10 @@ int
 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused)
 {
 	int ret;
-	struct rte_eth_link dev_link = dev->data->dev_link;
 
 	ret = mlx5_link_update_unlocked_gset(dev);
-	if (ret) {
+	if (ret)
 		ret = mlx5_link_update_unlocked_gs(dev);
-		if (ret)
-			return ret;
-	}
-	/* If lsc interrupt is disabled, should always be ready for traffic. */
-	if (!dev->data->dev_conf.intr_conf.lsc) {
-		mlx5_link_start(dev);
-		return 0;
-	}
-	/* Re-select burst callbacks only if link status has been changed. */
-	if (!ret && dev_link.link_status != dev->data->dev_link.link_status) {
-		if (dev->data->dev_link.link_status == ETH_LINK_UP)
-			mlx5_link_start(dev);
-		else
-			mlx5_link_stop(dev);
-	}
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 28770b8eb..6bb4ffb14 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -176,15 +176,20 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 		goto error;
 	}
 	mlx5_xstats_init(dev);
-	/* Update link status and Tx/Rx callbacks for the first time. */
-	memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-	DRV_LOG(INFO, "forcing port %u link to be up", dev->data->port_id);
-	ret = mlx5_force_link_status_change(dev, ETH_LINK_UP);
+	ret = mlx5_traffic_enable(dev);
 	if (ret) {
-		DRV_LOG(DEBUG, "failed to set port %u link to be up",
+		DRV_LOG(DEBUG, "port %u failed to set defaults flows",
 			dev->data->port_id);
 		goto error;
 	}
+	ret = mlx5_flow_start(dev, &priv->flows);
+	if (ret) {
+		DRV_LOG(DEBUG, "port %u failed to set flows",
+			dev->data->port_id);
+		goto error;
+	}
+	dev->tx_pkt_burst = mlx5_select_tx_function(dev);
+	dev->rx_pkt_burst = mlx5_select_rx_function(dev);
 	mlx5_dev_interrupt_handler_install(dev);
 	return 0;
 error:
-- 
2.11.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [dpdk-dev] [PATCH v2 3/3] net/mlx5: fix link status to use wait to complete
  2018-02-15  8:47 [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function Nelio Laranjeiro
                   ` (5 preceding siblings ...)
  2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 2/3] net/mlx5: fix link status behavior Nelio Laranjeiro
@ 2018-03-12 13:43 ` Nelio Laranjeiro
  6 siblings, 0 replies; 18+ messages in thread
From: Nelio Laranjeiro @ 2018-03-12 13:43 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil, Yongseok Koh, shahafs, stable

Wait to complete is present to let the application get a correct status
when it requires it, it should not be ignored.

Fixes: e313ef4c2fe8 ("net/mlx5: fix link state on device start")
Fixes: cb8faed7dde8 ("mlx5: support link status update")
Cc: shahafs@mellanox.com
Cc: adrien.mazarguil@6wind.com
Cc: stable@dpdk.org

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx5/mlx5.h        |   1 -
 drivers/net/mlx5/mlx5_defs.h   |   4 +-
 drivers/net/mlx5/mlx5_ethdev.c | 147 ++++++++++++++++-------------------------
 3 files changed, 58 insertions(+), 94 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 86310404a..faacfd9d6 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -124,7 +124,6 @@ struct priv {
 	/* Device properties. */
 	uint16_t mtu; /* Configured MTU. */
 	uint8_t port; /* Physical port number. */
-	unsigned int pending_alarm:1; /* An alarm is pending. */
 	unsigned int isolated:1; /* Whether isolated mode is enabled. */
 	/* RX/TX queues. */
 	unsigned int rxqs_n; /* RX queues array size. */
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index c3334ca30..6401588ee 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -82,8 +82,8 @@
 /* Supported RSS */
 #define MLX5_RSS_HF_MASK (~(ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP))
 
-/* Maximum number of attempts to query link status before giving up. */
-#define MLX5_MAX_LINK_QUERY_ATTEMPTS 5
+/* Timeout in seconds to get a valid link status. */
+#define MLX5_LINK_STATUS_TIMEOUT 10
 
 /* Reserved address space for UAR mapping. */
 #define MLX5_UAR_SIZE (1ULL << 32)
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 10ba27c79..f219f2ccd 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -24,6 +24,7 @@
 #include <fcntl.h>
 #include <stdalign.h>
 #include <sys/un.h>
+#include <time.h>
 
 #include <rte_atomic.h>
 #include <rte_ethdev_driver.h>
@@ -31,7 +32,6 @@
 #include <rte_mbuf.h>
 #include <rte_common.h>
 #include <rte_interrupts.h>
-#include <rte_alarm.h>
 #include <rte_malloc.h>
 
 #include "mlx5.h"
@@ -473,12 +473,15 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
  *
  * @param dev
  *   Pointer to Ethernet device structure.
+ * @param[out] link
+ *   Storage for current link status.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev)
+mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
+			       struct rte_eth_link *link)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct ethtool_cmd edata = {
@@ -528,14 +531,13 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev)
 				ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
 	dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
 			ETH_LINK_SPEED_FIXED);
-	if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) {
-		/* Link status changed. */
-		dev->data->dev_link = dev_link;
-		return 0;
+	if ((dev_link.link_speed && !dev_link.link_status) ||
+	    (!dev_link.link_speed && dev_link.link_status)) {
+		rte_errno = EAGAIN;
+		return -rte_errno;
 	}
-	/* Link status is still the same. */
-	rte_errno = EAGAIN;
-	return -rte_errno;
+	*link = dev_link;
+	return 0;
 }
 
 /**
@@ -543,12 +545,16 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev)
  *
  * @param dev
  *   Pointer to Ethernet device structure.
+ * @param[out] link
+ *   Storage for current link status.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev)
+mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
+			     struct rte_eth_link *link)
+
 {
 	struct priv *priv = dev->data->dev_private;
 	struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
@@ -634,14 +640,13 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev)
 				ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
 	dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
 				  ETH_LINK_SPEED_FIXED);
-	if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) {
-		/* Link status changed. */
-		dev->data->dev_link = dev_link;
-		return 0;
+	if ((dev_link.link_speed && !dev_link.link_status) ||
+	    (!dev_link.link_speed && dev_link.link_status)) {
+		rte_errno = EAGAIN;
+		return -rte_errno;
 	}
-	/* Link status is still the same. */
-	rte_errno = EAGAIN;
-	return -rte_errno;
+	*link = dev_link;
+	return 0;
 }
 
 /**
@@ -650,20 +655,43 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev)
  * @param dev
  *   Pointer to Ethernet device structure.
  * @param wait_to_complete
- *   Wait for request completion (ignored).
+ *   Wait for request completion.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   0 if link status was not updated, positive if it was, a negative errno
+ *   value otherwise and rte_errno is set.*
  */
 int
-mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused)
+mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
 {
 	int ret;
+	struct rte_eth_link dev_link;
+	time_t start_time = time(NULL);
 
-	ret = mlx5_link_update_unlocked_gset(dev);
-	if (ret)
-		ret = mlx5_link_update_unlocked_gs(dev);
-	return 0;
+	do {
+		ret = mlx5_link_update_unlocked_gset(dev, &dev_link);
+		if (ret)
+			ret = mlx5_link_update_unlocked_gs(dev, &dev_link);
+		if (ret == 0)
+			break;
+		/* Handle wait to complete situation. */
+		if (wait_to_complete && ret == -EAGAIN) {
+			if (abs((int)difftime(time(NULL), start_time)) <
+			    MLX5_LINK_STATUS_TIMEOUT) {
+				usleep(0);
+				continue;
+			} else {
+				rte_errno = EBUSY;
+				return -rte_errno;
+			}
+		} else if (ret < 0) {
+			return ret;
+		}
+	} while (wait_to_complete);
+	ret = !!memcmp(&dev->data->dev_link, &dev_link,
+		       sizeof(struct rte_eth_link));
+	dev->data->dev_link = dev_link;
+	return ret;
 }
 
 /**
@@ -842,47 +870,6 @@ mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
 }
 
 /**
- * Update the link status.
- *
- * @param dev
- *   Pointer to Ethernet device.
- *
- * @return
- *   Zero if the callback process can be called immediately, negative errno
- *   value otherwise and rte_errno is set.
- */
-static int
-mlx5_link_status_update(struct rte_eth_dev *dev)
-{
-	struct priv *priv = dev->data->dev_private;
-	struct rte_eth_link *link = &dev->data->dev_link;
-	int ret;
-
-	ret = mlx5_link_update(dev, 0);
-	if (ret)
-		return ret;
-	if (((link->link_speed == 0) && link->link_status) ||
-		((link->link_speed != 0) && !link->link_status)) {
-		/*
-		 * Inconsistent status. Event likely occurred before the
-		 * kernel netdevice exposes the new status.
-		 */
-		if (!priv->pending_alarm) {
-			priv->pending_alarm = 1;
-			rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
-					  mlx5_dev_link_status_handler,
-					  priv->dev);
-		}
-		return 1;
-	} else if (unlikely(priv->pending_alarm)) {
-		/* Link interrupt occurred while alarm is already scheduled. */
-		priv->pending_alarm = 0;
-		rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv->dev);
-	}
-	return 0;
-}
-
-/**
  * Device status handler.
  *
  * @param dev
@@ -900,6 +887,10 @@ mlx5_dev_status_handler(struct rte_eth_dev *dev)
 	struct ibv_async_event event;
 	uint32_t ret = 0;
 
+	if (mlx5_link_update(dev, 0) == -EAGAIN) {
+		usleep(0);
+		return 0;
+	}
 	/* Read all message and acknowledge them. */
 	for (;;) {
 		if (mlx5_glue->get_async_event(priv->ctx, &event))
@@ -917,32 +908,10 @@ mlx5_dev_status_handler(struct rte_eth_dev *dev)
 				dev->data->port_id, event.event_type);
 		mlx5_glue->ack_async_event(&event);
 	}
-	if (ret & (1 << RTE_ETH_EVENT_INTR_LSC))
-		if (mlx5_link_status_update(dev))
-			ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC);
 	return ret;
 }
 
 /**
- * Handle delayed link status event.
- *
- * @param arg
- *   Registered argument.
- */
-void
-mlx5_dev_link_status_handler(void *arg)
-{
-	struct rte_eth_dev *dev = arg;
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-
-	priv->pending_alarm = 0;
-	ret = mlx5_link_status_update(dev);
-	if (!ret)
-		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
-}
-
-/**
  * Handle interrupts from the NIC.
  *
  * @param[in] intr_handle
@@ -995,10 +964,6 @@ mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev)
 	if (priv->primary_socket)
 		rte_intr_callback_unregister(&priv->intr_handle_socket,
 					     mlx5_dev_handler_socket, dev);
-	if (priv->pending_alarm) {
-		priv->pending_alarm = 0;
-		rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev);
-	}
 	priv->intr_handle.fd = 0;
 	priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
 	priv->intr_handle_socket.fd = 0;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] net/mlx5: fix link status behavior
  2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 2/3] net/mlx5: fix link status behavior Nelio Laranjeiro
@ 2018-03-13 21:54   ` Yongseok Koh
  2018-03-14 12:18     ` Adrien Mazarguil
  2018-03-14 12:22     ` Nélio Laranjeiro
  2018-03-15 18:08   ` Yongseok Koh
  1 sibling, 2 replies; 18+ messages in thread
From: Yongseok Koh @ 2018-03-13 21:54 UTC (permalink / raw)
  To: Nelio Laranjeiro; +Cc: dev, Adrien Mazarguil, shahafs, stable

On Mon, Mar 12, 2018 at 02:43:18PM +0100, Nelio Laranjeiro wrote:
> This behavior is mixed between what should be handled by the application
> and what is under PMD responsibility.
> 
> According to DPDK API:
> - link_update() should only query the link status [1]
> - link_set_{up,down}() should only set the link to the according status [1]
> - dev_{start,stop}() should enable/disable traffic reception/emission [2]

The description of rte_eth_dev_set_link_up() is [1] :
	The device rx/tx functionality will be disabled if success, and it can
	be re-enabled with a call to rte_eth_dev_set_link_up()

This means, if user runs "set link-down port 0" on testpmd, traffic should stop
by disabling Rx/Tx on device. But unfortunately, mlx5 doesn't have a way to stop
device but it rather relies on kernel implementation - e.g. SIOCSIFFLAGS. So,
even if the command is run, traffic goes on. I guess the original
implementation might be needed to workaround this situation.

Shall we talk to HW and driver people regarding how to access dev (or PHY) from
user-level?

[1] http://dpdk.org/doc/api/rte__ethdev_8h.html#a51d7a0d2bb4202f9ebf9f174ba1f6e5c

Thanks,
Yongseok

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] net/mlx5: fix link status behavior
  2018-03-13 21:54   ` Yongseok Koh
@ 2018-03-14 12:18     ` Adrien Mazarguil
  2018-03-14 17:40       ` Yongseok Koh
  2018-03-14 12:22     ` Nélio Laranjeiro
  1 sibling, 1 reply; 18+ messages in thread
From: Adrien Mazarguil @ 2018-03-14 12:18 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: Nelio Laranjeiro, dev, shahafs, stable

On Tue, Mar 13, 2018 at 02:54:44PM -0700, Yongseok Koh wrote:
> On Mon, Mar 12, 2018 at 02:43:18PM +0100, Nelio Laranjeiro wrote:
> > This behavior is mixed between what should be handled by the application
> > and what is under PMD responsibility.
> > 
> > According to DPDK API:
> > - link_update() should only query the link status [1]
> > - link_set_{up,down}() should only set the link to the according status [1]
> > - dev_{start,stop}() should enable/disable traffic reception/emission [2]
> 
> The description of rte_eth_dev_set_link_up() is [1] :
> 	The device rx/tx functionality will be disabled if success, and it can
> 	be re-enabled with a call to rte_eth_dev_set_link_up()
> 
> This means, if user runs "set link-down port 0" on testpmd, traffic should stop
> by disabling Rx/Tx on device. But unfortunately, mlx5 doesn't have a way to stop
> device but it rather relies on kernel implementation - e.g. SIOCSIFFLAGS. So,
> even if the command is run, traffic goes on. I guess the original
> implementation might be needed to workaround this situation.
> 
> Shall we talk to HW and driver people regarding how to access dev (or PHY) from
> user-level?
> 
> [1] http://dpdk.org/doc/api/rte__ethdev_8h.html#a51d7a0d2bb4202f9ebf9f174ba1f6e5c

As you mentioned, since the mlx5 PMD doesn't really own the device, it
doesn't have the final say on whether traffic still flows after putting the
link down at the DPDK level. It has been worked around by replacing burst
callbacks with no-ops since up/down ethops were added [3].

Problem is that updating burst callback pointers while traffic is flowing
has always been more or less unsafe. It's not necessarily atomic and only
really safe to do when traffic is guaranteed to be stopped (i.e. after
dev_stop() was called by the application). Moreover these no-ops don't
prevent device RX queues from still getting filled up.

Looking at the original implementation [4][5], other PMDs simply have to
turn off the laser or some such which doesn't prevent RX/TX functions from
working as before except traffic happens to be lost instead of ending up
rejected by dedicated burst callbacks.

The main purpose of up/down callbacks and the reason they were implemented
in mlx5 is that customers want to see something happen at the carrier level
on the remote end (as with other PMDs) when a DPDK port is brought up or
down. This is why they are seldom implemented in other PMDs for VF
eth_dev_ops given those can't control PHY.

Actively preventing traffic is secondary and either has a performance impact
(permanent status check in the data plane) or is somewhat unsafe (live
replacement of burst callbacks).

Given the above, I'm in favor of removing the no-ops. Applications are the
ones performing up/down calls, they manage the administrative status of
interfaces and should refrain from calling TX/RX burst functions
afterward. Carrier status is left to PMDs and can't necessarily be modified.

[3] 62072098b54e ("mlx5: support setting link up or down")
[4] 915e67837586 ("ethdev: API for link up and down")
[5] c38f4f83edc0 ("ixgbe: link up and down")

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] net/mlx5: fix link status behavior
  2018-03-13 21:54   ` Yongseok Koh
  2018-03-14 12:18     ` Adrien Mazarguil
@ 2018-03-14 12:22     ` Nélio Laranjeiro
  1 sibling, 0 replies; 18+ messages in thread
From: Nélio Laranjeiro @ 2018-03-14 12:22 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev, Adrien Mazarguil, shahafs, stable

On Tue, Mar 13, 2018 at 02:54:44PM -0700, Yongseok Koh wrote:
> On Mon, Mar 12, 2018 at 02:43:18PM +0100, Nelio Laranjeiro wrote:
> > This behavior is mixed between what should be handled by the application
> > and what is under PMD responsibility.
> > 
> > According to DPDK API:
> > - link_update() should only query the link status [1]
> > - link_set_{up,down}() should only set the link to the according status [1]
> > - dev_{start,stop}() should enable/disable traffic reception/emission [2]
> 
> The description of rte_eth_dev_set_link_up() is [1] :
> 	The device rx/tx functionality will be disabled if success, and it can
> 	be re-enabled with a call to rte_eth_dev_set_link_up()
> 
> This means, if user runs "set link-down port 0" on testpmd, traffic should stop
> by disabling Rx/Tx on device. But unfortunately, mlx5 doesn't have a way to stop
> device but it rather relies on kernel implementation - e.g. SIOCSIFFLAGS. So,
> even if the command is run, traffic goes on. I guess the original
> implementation might be needed to workaround this situation.

As you mention the traffic is not disabled on the hardware, which also
means that replacing the burst functions does not solve anything, it
just moves the issue.
The fact is, the queues can still send/receive traffic even if the link
is down.  Not polling them won't solve the fact that Rx queues will
still receive traffic addressed to the application.

Considering an application should not try to send nor poll traffic once
it has set the link down and this, until it sets the link up, the
behavior is identical to the "original" code i.e. at the first poll, the
application will receive enqueued packets in the Rx queues while the
link was down.

> Shall we talk to HW and driver people regarding how to access dev (or PHY) from
> user-level?

We can.

> [1] http://dpdk.org/doc/api/rte__ethdev_8h.html#a51d7a0d2bb4202f9ebf9f174ba1f6e5c

Regards,

-- 
Nélio Laranjeiro
6WIND

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] net/mlx5: fix link status behavior
  2018-03-14 12:18     ` Adrien Mazarguil
@ 2018-03-14 17:40       ` Yongseok Koh
  2018-03-14 19:00         ` Adrien Mazarguil
  0 siblings, 1 reply; 18+ messages in thread
From: Yongseok Koh @ 2018-03-14 17:40 UTC (permalink / raw)
  To: Adrien Mazarguil, Nelio Laranjeiro; +Cc: dev, shahafs, stable

On Wed, Mar 14, 2018 at 01:18:56PM +0100, Adrien Mazarguil wrote:
> On Tue, Mar 13, 2018 at 02:54:44PM -0700, Yongseok Koh wrote:
> > On Mon, Mar 12, 2018 at 02:43:18PM +0100, Nelio Laranjeiro wrote:
> > > This behavior is mixed between what should be handled by the application
> > > and what is under PMD responsibility.
> > > 
> > > According to DPDK API:
> > > - link_update() should only query the link status [1]
> > > - link_set_{up,down}() should only set the link to the according status [1]
> > > - dev_{start,stop}() should enable/disable traffic reception/emission [2]
> > 
> > The description of rte_eth_dev_set_link_up() is [1] :
> > 	The device rx/tx functionality will be disabled if success, and it can
> > 	be re-enabled with a call to rte_eth_dev_set_link_up()
> > 
> > This means, if user runs "set link-down port 0" on testpmd, traffic should stop
> > by disabling Rx/Tx on device. But unfortunately, mlx5 doesn't have a way to stop
> > device but it rather relies on kernel implementation - e.g. SIOCSIFFLAGS. So,
> > even if the command is run, traffic goes on. I guess the original
> > implementation might be needed to workaround this situation.
> > 
> > Shall we talk to HW and driver people regarding how to access dev (or PHY) from
> > user-level?
> > 
> > [1] https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fdpdk.org%2Fdoc%2Fapi%2Frte__ethdev_8h.html%23a51d7a0d2bb4202f9ebf9f174ba1f6e5c&data=02%7C01%7Cyskoh%40mellanox.com%7C346b9914b7664dcf0e7008d589a5cb53%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636566267555398866&sdata=Ad1%2FyQqyXeifXFJjxMMRxq81YGpF7nEFHvaX28nncl8%3D&reserved=0
> 
> As you mentioned, since the mlx5 PMD doesn't really own the device, it
> doesn't have the final say on whether traffic still flows after putting the
> link down at the DPDK level. It has been worked around by replacing burst
> callbacks with no-ops since up/down ethops were added [3].
> 
> Problem is that updating burst callback pointers while traffic is flowing
> has always been more or less unsafe. It's not necessarily atomic and only
> really safe to do when traffic is guaranteed to be stopped (i.e. after
> dev_stop() was called by the application). Moreover these no-ops don't
> prevent device RX queues from still getting filled up.
> 
> Looking at the original implementation [4][5], other PMDs simply have to
> turn off the laser or some such which doesn't prevent RX/TX functions from
> working as before except traffic happens to be lost instead of ending up
> rejected by dedicated burst callbacks.
> 
> The main purpose of up/down callbacks and the reason they were implemented
> in mlx5 is that customers want to see something happen at the carrier level
> on the remote end (as with other PMDs) when a DPDK port is brought up or
> down. This is why they are seldom implemented in other PMDs for VF
> eth_dev_ops given those can't control PHY.
> 
> Actively preventing traffic is secondary and either has a performance impact
> (permanent status check in the data plane) or is somewhat unsafe (live
> replacement of burst callbacks).
> 
> Given the above, I'm in favor of removing the no-ops. Applications are the
> ones performing up/down calls, they manage the administrative status of
> interfaces and should refrain from calling TX/RX burst functions
> afterward. Carrier status is left to PMDs and can't necessarily be modified.
> 
> [3] 62072098b54e ("mlx5: support setting link up or down")
> [4] 915e67837586 ("ethdev: API for link up and down")
> [5] c38f4f83edc0 ("ixgbe: link up and down")

Adrien, Nelio

Please don't get me wrong. I didn't mean to defend the status quo. I didn't like
the null burst function either since I firstly joined this project. I was just
mentioning it was anyway non-compliant to the document and suggesting to find
out a better way if any, e.g. accessing PHY. Even if you don't think it is a
critical matter, there's no need to change the kernel flag and we just can make
dev_set_link_down/up() return without doing anything. If we can't/don't change
carrier status in the functions and those funcs have no effect, how about not
changing the kernel interface flag? Or, if you still insist no change is needed
in this patch, that is also fine to me as this isn't a critical path and doesn't
have any erroneous behavior.

Thanks,
Yongseok

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] net/mlx5: fix link status behavior
  2018-03-14 17:40       ` Yongseok Koh
@ 2018-03-14 19:00         ` Adrien Mazarguil
  0 siblings, 0 replies; 18+ messages in thread
From: Adrien Mazarguil @ 2018-03-14 19:00 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: Nelio Laranjeiro, dev, shahafs, stable

On Wed, Mar 14, 2018 at 10:40:59AM -0700, Yongseok Koh wrote:
> On Wed, Mar 14, 2018 at 01:18:56PM +0100, Adrien Mazarguil wrote:
> > On Tue, Mar 13, 2018 at 02:54:44PM -0700, Yongseok Koh wrote:
> > > On Mon, Mar 12, 2018 at 02:43:18PM +0100, Nelio Laranjeiro wrote:
> > > > This behavior is mixed between what should be handled by the application
> > > > and what is under PMD responsibility.
> > > > 
> > > > According to DPDK API:
> > > > - link_update() should only query the link status [1]
> > > > - link_set_{up,down}() should only set the link to the according status [1]
> > > > - dev_{start,stop}() should enable/disable traffic reception/emission [2]
> > > 
> > > The description of rte_eth_dev_set_link_up() is [1] :
> > > 	The device rx/tx functionality will be disabled if success, and it can
> > > 	be re-enabled with a call to rte_eth_dev_set_link_up()
> > > 
> > > This means, if user runs "set link-down port 0" on testpmd, traffic should stop
> > > by disabling Rx/Tx on device. But unfortunately, mlx5 doesn't have a way to stop
> > > device but it rather relies on kernel implementation - e.g. SIOCSIFFLAGS. So,
> > > even if the command is run, traffic goes on. I guess the original
> > > implementation might be needed to workaround this situation.
> > > 
> > > Shall we talk to HW and driver people regarding how to access dev (or PHY) from
> > > user-level?
> > > 
> > > [1] https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fdpdk.org%2Fdoc%2Fapi%2Frte__ethdev_8h.html%23a51d7a0d2bb4202f9ebf9f174ba1f6e5c&data=02%7C01%7Cyskoh%40mellanox.com%7C346b9914b7664dcf0e7008d589a5cb53%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636566267555398866&sdata=Ad1%2FyQqyXeifXFJjxMMRxq81YGpF7nEFHvaX28nncl8%3D&reserved=0
> > 
> > As you mentioned, since the mlx5 PMD doesn't really own the device, it
> > doesn't have the final say on whether traffic still flows after putting the
> > link down at the DPDK level. It has been worked around by replacing burst
> > callbacks with no-ops since up/down ethops were added [3].
> > 
> > Problem is that updating burst callback pointers while traffic is flowing
> > has always been more or less unsafe. It's not necessarily atomic and only
> > really safe to do when traffic is guaranteed to be stopped (i.e. after
> > dev_stop() was called by the application). Moreover these no-ops don't
> > prevent device RX queues from still getting filled up.
> > 
> > Looking at the original implementation [4][5], other PMDs simply have to
> > turn off the laser or some such which doesn't prevent RX/TX functions from
> > working as before except traffic happens to be lost instead of ending up
> > rejected by dedicated burst callbacks.
> > 
> > The main purpose of up/down callbacks and the reason they were implemented
> > in mlx5 is that customers want to see something happen at the carrier level
> > on the remote end (as with other PMDs) when a DPDK port is brought up or
> > down. This is why they are seldom implemented in other PMDs for VF
> > eth_dev_ops given those can't control PHY.
> > 
> > Actively preventing traffic is secondary and either has a performance impact
> > (permanent status check in the data plane) or is somewhat unsafe (live
> > replacement of burst callbacks).
> > 
> > Given the above, I'm in favor of removing the no-ops. Applications are the
> > ones performing up/down calls, they manage the administrative status of
> > interfaces and should refrain from calling TX/RX burst functions
> > afterward. Carrier status is left to PMDs and can't necessarily be modified.
> > 
> > [3] 62072098b54e ("mlx5: support setting link up or down")
> > [4] 915e67837586 ("ethdev: API for link up and down")
> > [5] c38f4f83edc0 ("ixgbe: link up and down")
> 
> Adrien, Nelio
> 
> Please don't get me wrong. I didn't mean to defend the status quo. I didn't like
> the null burst function either since I firstly joined this project. I was just
> mentioning it was anyway non-compliant to the document and suggesting to find
> out a better way if any, e.g. accessing PHY. Even if you don't think it is a
> critical matter, there's no need to change the kernel flag and we just can make
> dev_set_link_down/up() return without doing anything. If we can't/don't change
> carrier status in the functions and those funcs have no effect, how about not
> changing the kernel interface flag? Or, if you still insist no change is needed
> in this patch, that is also fine to me as this isn't a critical path and doesn't
> have any erroneous behavior.

Heh, all right, I felt obligated to describe how it ended up like that.

I agree that somehow controlling PHY should be OK assuming there was a way
to do it. Currently bringing the netdevice down achieves the desired effect
with PF devices (well, at least it should, according to memory).

The code is the same for VF devices though it has no effect on them, perhaps
we could do like other PMDs by providing a separate set of eth_dev_ops
without up/down capabilities.

One problem will always remain though: an external application can always
re-enable PHY through other interfaces, resuming traffic by doing so.

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] net/mlx5: fix link status behavior
  2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 2/3] net/mlx5: fix link status behavior Nelio Laranjeiro
  2018-03-13 21:54   ` Yongseok Koh
@ 2018-03-15 18:08   ` Yongseok Koh
  1 sibling, 0 replies; 18+ messages in thread
From: Yongseok Koh @ 2018-03-15 18:08 UTC (permalink / raw)
  To: Nélio Laranjeiro; +Cc: dev, Adrien Mazarguil, Shahaf Shuler, stable


> On Mar 12, 2018, at 6:43 AM, Nelio Laranjeiro <nelio.laranjeiro@6wind.com> wrote:
> 
> This behavior is mixed between what should be handled by the application
> and what is under PMD responsibility.
> 
> According to DPDK API:
> - link_update() should only query the link status [1]
> - link_set_{up,down}() should only set the link to the according status [1]
> - dev_{start,stop}() should enable/disable traffic reception/emission [2]
> 
> On this PMD, the link status is retrieved from the net device associated
> owned by the Linux Kernel, it does not means that even when this interface
> is down, the PMD cannot send/receive traffic from the NIC those two
> information are unrelated, until the physical port is active and has a
> link, the PMD can receive/send traffic on the wire.
> 
> According to DPDK API, calling the rte_eth_dev_start() even when the Linux
> interface link is down is then possible and allowed, as the traffic will
> flow between the DPDK application and the Physical port.
> 
> This also means that a synchronisation between the Linux interface and the
> DPDK application remains under the DPDK application responsibility.
> 
> To handle such synchronisation the application should behave as the
> following scheme, to start:
> 
> rte_eth_get_link(port_id, &link);
> if (link.link_status == ETH_DOWN)
> 	rte_eth_dev_set_link_up(port_id);
> rte_eth_dev_start(port_id);
> 
> Taking in account the possible returned values fro each function.
> 
> and to stop:
> 
> rte_eth_dev_stop(port_id);
> rte_eth_dev_set_link_down(port_id);
> 
> The application should also set the LSC interrupt callbacks to catch and
> behave accordingly when the administrator set the Linux device down/up.
> The same callbacks are called when the link on the medium falls/raise.
> 
> Fixes: c7bf62255edf ("net/mlx5: fix handling link status event")
> Fixes: e313ef4c2fe8 ("net/mlx5: fix link state on device start")
> Cc: yskoh@mellanox.com
> Cc: shahafs@mellanox.com
> Cc: stable@dpdk.org
> 
> [1] https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdpdk.org%2Fbrowse%2Fdpdk%2Ftree%2Flib%2Flibrte_ether%2Frte_ethdev_core.h&data=02%7C01%7Cyskoh%40mellanox.com%7Cf3d27421af9541c0d74608d5881f67af%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636564590830553144&sdata=dn%2BQOq9IG2O4eYC7aSAMjvQ%2BT9rkVW%2BRo7t9RxLODTk%3D&reserved=0
> [2] https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdpdk.org%2Fbrowse%2Fdpdk%2Ftree%2Flib%2Flibrte_ether%2Frte_ethdev.h%23n1677&data=02%7C01%7Cyskoh%40mellanox.com%7Cf3d27421af9541c0d74608d5881f67af%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636564590830553144&sdata=2zY%2F9gpIRcjz1mo4442u9uHTJPj5GVRftxHW8oVi6Ug%3D&reserved=0
> 
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
> ---
Acked-by: Yongseok Koh <yskoh@mellanox.com>
 
Thanks

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/3] net/mlx5: cleanup link status
  2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: cleanup link status Nelio Laranjeiro
@ 2018-03-19 19:15   ` Shahaf Shuler
  0 siblings, 0 replies; 18+ messages in thread
From: Shahaf Shuler @ 2018-03-19 19:15 UTC (permalink / raw)
  To: Nélio Laranjeiro, dev; +Cc: Adrien Mazarguil, Yongseok Koh

Monday, March 12, 2018 3:43 PM, Nelio Laranjeiro:
> This series applies on top of [1] and cleans up the DPDK API implementation
> for the link status.
> 
> [1]
> https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdp
> dk.org%2Fdev%2Fpatchwork%2Fpatch%2F35653%2F&data=02%7C01%7Csha
> hafs%40mellanox.com%7C53adbbb3cbac4c7036f508d5881f6948%7Ca652971c
> 7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636564590863924988&sdata=ic9Y
> UOQLHq719ebOXxX%2FlS%2BgzWX7R7CFa7qXrOEG7xs%3D&reserved=0
> 
> Changes in v2:
> - Removes kernel version verification, the bug it tried to detected was fixed
>   several commit after in the PMD.  Implementation in mlx5 kernel driver is
>   only available since v4.9.
> - Removes the alarm handler as this can be worked around by not
> acknowledging
>   the event until the link becomes stable.
> - Clean-up the API implementation by letting the application handle the
>   interrupt and decide by itself to start/stop the device.
> 
> Nelio Laranjeiro (3):
>   net/mlx5: remove kernel version check
>   net/mlx5: fix link status behavior
>   net/mlx5: fix link status to use wait to complete

Series applied to next-net-mlx, thanks.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2018-03-19 19:15 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-15  8:47 [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function Nelio Laranjeiro
2018-02-15  8:47 ` [dpdk-dev] [PATCH 2/2] net/mlx5: fix link status to use wait to complete Nelio Laranjeiro
2018-02-16 10:49   ` Adrien Mazarguil
2018-02-16 12:08     ` Nélio Laranjeiro
2018-02-16 10:48 ` [dpdk-dev] [PATCH 1/2] net/mlx5: add kernel version function Adrien Mazarguil
2018-02-16 18:03 ` Stephen Hemminger
2018-02-19  7:42   ` Nélio Laranjeiro
2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 0/3] net/mlx5: cleanup link status Nelio Laranjeiro
2018-03-19 19:15   ` Shahaf Shuler
2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 1/3] net/mlx5: remove kernel version check Nelio Laranjeiro
2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 2/3] net/mlx5: fix link status behavior Nelio Laranjeiro
2018-03-13 21:54   ` Yongseok Koh
2018-03-14 12:18     ` Adrien Mazarguil
2018-03-14 17:40       ` Yongseok Koh
2018-03-14 19:00         ` Adrien Mazarguil
2018-03-14 12:22     ` Nélio Laranjeiro
2018-03-15 18:08   ` Yongseok Koh
2018-03-12 13:43 ` [dpdk-dev] [PATCH v2 3/3] net/mlx5: fix link status to use wait to complete Nelio Laranjeiro

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).