patches for DPDK stable branches
 help / color / mirror / Atom feed
* [dpdk-stable] [PATCH 1/2] net/mlx4: fix device detach
       [not found] <1610555333-18961-1-git-send-email-michaelba@nvidia.com>
@ 2021-01-13 16:28 ` Michael Baum
  2021-01-14  8:32   ` [dpdk-stable] [dpdk-dev] " David Marchand
       [not found]   ` <1611130491-19129-1-git-send-email-michaelba@nvidia.com>
  2021-01-13 16:28 ` [dpdk-stable] [PATCH " Michael Baum
  1 sibling, 2 replies; 6+ messages in thread
From: Michael Baum @ 2021-01-13 16:28 UTC (permalink / raw)
  To: dev; +Cc: Matan Azrad, Raslan Darawsheh, David Marchand, stable

When mlx4 device is probed, 2 different ethdev ports may be created for
the 2 physical ports of the device.

Wrongly, when the device is removed, the created ports are not released.

Close and release the ethdev ports in remove process.

Fixes: 7fae69eeff13 ("mlx4: new poll mode driver")
Cc: stable@dpdk.org

Reported-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Michael Baum <michaelba@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx4/mlx4.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index d5d8c96..7460afa 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -375,8 +375,10 @@ struct mlx4_conf {
 	struct mlx4_priv *priv = dev->data->dev_private;
 	unsigned int i;
 
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		rte_eth_dev_release_port(dev);
 		return 0;
+	}
 	DEBUG("%p: closing device \"%s\"",
 	      (void *)dev,
 	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
@@ -1123,6 +1125,36 @@ struct mlx4_conf {
 	return -err;
 }
 
+/**
+ * DPDK callback to remove a PCI device.
+ *
+ * This function removes all Ethernet devices belong to a given PCI device.
+ *
+ * @param[in] pci_dev
+ *   Pointer to the PCI device.
+ *
+ * @return
+ *   0 on success, the function cannot fail.
+ */
+static int
+mlx4_pci_remove(struct rte_pci_device *pci_dev)
+{
+	uint16_t port_id;
+	int ret = 0;
+
+	RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) {
+		/*
+		 * mlx4_dev_close() is not registered to secondary process,
+		 * call the close function explicitly for secondary process.
+		 */
+		if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+			ret |= mlx4_dev_close(&rte_eth_devices[port_id]);
+		else
+			ret |= rte_eth_dev_close(port_id);
+	}
+	return ret == 0 ? 0 : -EIO;
+}
+
 static const struct rte_pci_id mlx4_pci_id_map[] = {
 	{
 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
@@ -1147,6 +1179,7 @@ struct mlx4_conf {
 	},
 	.id_table = mlx4_pci_id_map,
 	.probe = mlx4_pci_probe,
+	.remove = mlx4_pci_remove,
 	.drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV,
 };
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [dpdk-stable] [PATCH 2/2] net/mlx4: fix PCI probe error flow
       [not found] <1610555333-18961-1-git-send-email-michaelba@nvidia.com>
  2021-01-13 16:28 ` [dpdk-stable] [PATCH 1/2] net/mlx4: fix device detach Michael Baum
@ 2021-01-13 16:28 ` Michael Baum
  1 sibling, 0 replies; 6+ messages in thread
From: Michael Baum @ 2021-01-13 16:28 UTC (permalink / raw)
  To: dev; +Cc: Matan Azrad, Raslan Darawsheh, David Marchand, stable

In mlx4 PCI probing, there are some validations for the Ethernet device
configuration.
From each PCI device the function creates one or two Ethernet devices.

When one of validations fails during the creation of the second device,
the first device is not freed what caused a memory leak.

Free it.

Fixes: 7fae69eeff13 ("mlx4: new poll mode driver")
Cc: stable@dpdk.org

Signed-off-by: Michael Baum <michaelba@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx4/mlx4.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 7460afa..92b0427 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -860,6 +860,7 @@ struct mlx4_conf {
 		struct ibv_pd *pd = NULL;
 		struct mlx4_priv *priv = NULL;
 		struct rte_eth_dev *eth_dev = NULL;
+		struct rte_eth_dev *prev_dev = NULL;
 		struct rte_ether_addr mac;
 		char name[RTE_ETH_NAME_MAX_LEN];
 
@@ -880,7 +881,7 @@ struct mlx4_conf {
 				ERROR("can not attach rte ethdev");
 				rte_errno = ENOMEM;
 				err = rte_errno;
-				goto error;
+				goto err_secondary;
 			}
 			priv = eth_dev->data->dev_private;
 			if (!priv->verbs_alloc_ctx.enabled) {
@@ -889,24 +890,24 @@ struct mlx4_conf {
 				      " from Verbs");
 				rte_errno = ENOTSUP;
 				err = rte_errno;
-				goto error;
+				goto err_secondary;
 			}
 			eth_dev->device = &pci_dev->device;
 			eth_dev->dev_ops = &mlx4_dev_sec_ops;
 			err = mlx4_proc_priv_init(eth_dev);
 			if (err)
-				goto error;
+				goto err_secondary;
 			/* Receive command fd from primary process. */
 			err = mlx4_mp_req_verbs_cmd_fd(eth_dev);
 			if (err < 0) {
 				err = rte_errno;
-				goto error;
+				goto err_secondary;
 			}
 			/* Remap UAR for Tx queues. */
 			err = mlx4_tx_uar_init_secondary(eth_dev, err);
 			if (err) {
 				err = rte_errno;
-				goto error;
+				goto err_secondary;
 			}
 			/*
 			 * Ethdev pointer is still required as input since
@@ -918,7 +919,14 @@ struct mlx4_conf {
 			claim_zero(mlx4_glue->close_device(ctx));
 			rte_eth_copy_pci_info(eth_dev, pci_dev);
 			rte_eth_dev_probing_finish(eth_dev);
+			prev_dev = eth_dev;
 			continue;
+err_secondary:
+			claim_zero(mlx4_glue->close_device(ctx));
+			rte_eth_dev_release_port(eth_dev);
+			if (prev_dev)
+				rte_eth_dev_release_port(prev_dev);
+			break;
 		}
 		/* Check port status. */
 		err = mlx4_glue->query_port(ctx, port, &port_attr);
@@ -1093,6 +1101,7 @@ struct mlx4_conf {
 				 priv, mem_event_cb);
 		rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
 		rte_eth_dev_probing_finish(eth_dev);
+		prev_dev = eth_dev;
 		continue;
 port_error:
 		rte_free(priv);
@@ -1107,14 +1116,10 @@ struct mlx4_conf {
 			eth_dev->data->mac_addrs = NULL;
 			rte_eth_dev_release_port(eth_dev);
 		}
+		if (prev_dev)
+			mlx4_dev_close(prev_dev);
 		break;
 	}
-	/*
-	 * XXX if something went wrong in the loop above, there is a resource
-	 * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as
-	 * long as the dpdk does not provide a way to deallocate a ethdev and a
-	 * way to enumerate the registered ethdevs to free the previous ones.
-	 */
 error:
 	if (attr_ctx)
 		claim_zero(mlx4_glue->close_device(attr_ctx));
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-stable] [dpdk-dev] [PATCH 1/2] net/mlx4: fix device detach
  2021-01-13 16:28 ` [dpdk-stable] [PATCH 1/2] net/mlx4: fix device detach Michael Baum
@ 2021-01-14  8:32   ` David Marchand
  2021-01-14 10:41     ` David Marchand
       [not found]   ` <1611130491-19129-1-git-send-email-michaelba@nvidia.com>
  1 sibling, 1 reply; 6+ messages in thread
From: David Marchand @ 2021-01-14  8:32 UTC (permalink / raw)
  To: Michael Baum; +Cc: dev, Matan Azrad, Raslan Darawsheh, dpdk stable

On Wed, Jan 13, 2021 at 5:29 PM Michael Baum <michaelba@nvidia.com> wrote:
>
> When mlx4 device is probed, 2 different ethdev ports may be created for
> the 2 physical ports of the device.
>
> Wrongly, when the device is removed, the created ports are not released.
>
> Close and release the ethdev ports in remove process.
>

Missing a reference to bug 488.

> Fixes: 7fae69eeff13 ("mlx4: new poll mode driver")

Odd that it never worked, but if Matan acked, I guess this is ok.

> Cc: stable@dpdk.org
>
> Reported-by: David Marchand <david.marchand@redhat.com>
> Signed-off-by: Michael Baum <michaelba@nvidia.com>
> Acked-by: Matan Azrad <matan@nvidia.com>


-- 
David Marchand


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [dpdk-stable] [dpdk-dev] [PATCH 1/2] net/mlx4: fix device detach
  2021-01-14  8:32   ` [dpdk-stable] [dpdk-dev] " David Marchand
@ 2021-01-14 10:41     ` David Marchand
  0 siblings, 0 replies; 6+ messages in thread
From: David Marchand @ 2021-01-14 10:41 UTC (permalink / raw)
  To: Michael Baum; +Cc: dev, Matan Azrad, Raslan Darawsheh, dpdk stable

On Thu, Jan 14, 2021 at 9:32 AM David Marchand
<david.marchand@redhat.com> wrote:
>
> On Wed, Jan 13, 2021 at 5:29 PM Michael Baum <michaelba@nvidia.com> wrote:
> >
> > When mlx4 device is probed, 2 different ethdev ports may be created for
> > the 2 physical ports of the device.
> >
> > Wrongly, when the device is removed, the created ports are not released.
> >
> > Close and release the ethdev ports in remove process.
> >
>
> Missing a reference to bug 488.
>
> > Fixes: 7fae69eeff13 ("mlx4: new poll mode driver")
>
> Odd that it never worked, but if Matan acked, I guess this is ok.
>
> > Cc: stable@dpdk.org
> >
> > Reported-by: David Marchand <david.marchand@redhat.com>
> > Signed-off-by: Michael Baum <michaelba@nvidia.com>
> > Acked-by: Matan Azrad <matan@nvidia.com>

Forgot to add my tag..
Tested-by: David Marchand <david.marchand@redhat.com>

Thanks for fixing.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [dpdk-stable] [PATCH v2 1/2] net/mlx4: fix device detach
       [not found]   ` <1611130491-19129-1-git-send-email-michaelba@nvidia.com>
@ 2021-01-20  8:14     ` Michael Baum
  2021-01-20  8:14     ` [dpdk-stable] [PATCH v2 2/2] net/mlx4: fix PCI probe error flow Michael Baum
  1 sibling, 0 replies; 6+ messages in thread
From: Michael Baum @ 2021-01-20  8:14 UTC (permalink / raw)
  To: dev; +Cc: Matan Azrad, Raslan Darawsheh, David Marchand, stable

When mlx4 device is probed, 2 different ethdev ports may be created for
the 2 physical ports of the device.

Wrongly, when the device is removed, the created ports are not released.

Close and release the ethdev ports in remove process.

Fixes: 7fae69eeff13 ("mlx4: new poll mode driver")
Cc: stable@dpdk.org

Reported-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Michael Baum <michaelba@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx4/mlx4.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index d5d8c96..7460afa 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -375,8 +375,10 @@ struct mlx4_conf {
 	struct mlx4_priv *priv = dev->data->dev_private;
 	unsigned int i;
 
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		rte_eth_dev_release_port(dev);
 		return 0;
+	}
 	DEBUG("%p: closing device \"%s\"",
 	      (void *)dev,
 	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
@@ -1123,6 +1125,36 @@ struct mlx4_conf {
 	return -err;
 }
 
+/**
+ * DPDK callback to remove a PCI device.
+ *
+ * This function removes all Ethernet devices belong to a given PCI device.
+ *
+ * @param[in] pci_dev
+ *   Pointer to the PCI device.
+ *
+ * @return
+ *   0 on success, the function cannot fail.
+ */
+static int
+mlx4_pci_remove(struct rte_pci_device *pci_dev)
+{
+	uint16_t port_id;
+	int ret = 0;
+
+	RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) {
+		/*
+		 * mlx4_dev_close() is not registered to secondary process,
+		 * call the close function explicitly for secondary process.
+		 */
+		if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+			ret |= mlx4_dev_close(&rte_eth_devices[port_id]);
+		else
+			ret |= rte_eth_dev_close(port_id);
+	}
+	return ret == 0 ? 0 : -EIO;
+}
+
 static const struct rte_pci_id mlx4_pci_id_map[] = {
 	{
 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
@@ -1147,6 +1179,7 @@ struct mlx4_conf {
 	},
 	.id_table = mlx4_pci_id_map,
 	.probe = mlx4_pci_probe,
+	.remove = mlx4_pci_remove,
 	.drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV,
 };
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [dpdk-stable] [PATCH v2 2/2] net/mlx4: fix PCI probe error flow
       [not found]   ` <1611130491-19129-1-git-send-email-michaelba@nvidia.com>
  2021-01-20  8:14     ` [dpdk-stable] [PATCH v2 " Michael Baum
@ 2021-01-20  8:14     ` Michael Baum
  1 sibling, 0 replies; 6+ messages in thread
From: Michael Baum @ 2021-01-20  8:14 UTC (permalink / raw)
  To: dev; +Cc: Matan Azrad, Raslan Darawsheh, David Marchand, stable

In mlx4 PCI probing, there are some validations for the Ethernet device
configuration.
From each PCI device the function creates one or two Ethernet devices.

When one of validations fails during the creation of the second device,
the first device is not freed what caused a memory leak.

Free it.

Fixes: 7fae69eeff13 ("mlx4: new poll mode driver")
Cc: stable@dpdk.org

Signed-off-by: Michael Baum <michaelba@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx4/mlx4.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 7460afa..495b4fc 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -766,6 +766,7 @@ struct mlx4_conf {
 	struct ibv_context *attr_ctx = NULL;
 	struct ibv_device_attr device_attr;
 	struct ibv_device_attr_ex device_attr_ex;
+	struct rte_eth_dev *prev_dev = NULL;
 	struct mlx4_conf conf = {
 		.ports.present = 0,
 		.mr_ext_memseg_en = 1,
@@ -880,7 +881,7 @@ struct mlx4_conf {
 				ERROR("can not attach rte ethdev");
 				rte_errno = ENOMEM;
 				err = rte_errno;
-				goto error;
+				goto err_secondary;
 			}
 			priv = eth_dev->data->dev_private;
 			if (!priv->verbs_alloc_ctx.enabled) {
@@ -889,24 +890,24 @@ struct mlx4_conf {
 				      " from Verbs");
 				rte_errno = ENOTSUP;
 				err = rte_errno;
-				goto error;
+				goto err_secondary;
 			}
 			eth_dev->device = &pci_dev->device;
 			eth_dev->dev_ops = &mlx4_dev_sec_ops;
 			err = mlx4_proc_priv_init(eth_dev);
 			if (err)
-				goto error;
+				goto err_secondary;
 			/* Receive command fd from primary process. */
 			err = mlx4_mp_req_verbs_cmd_fd(eth_dev);
 			if (err < 0) {
 				err = rte_errno;
-				goto error;
+				goto err_secondary;
 			}
 			/* Remap UAR for Tx queues. */
 			err = mlx4_tx_uar_init_secondary(eth_dev, err);
 			if (err) {
 				err = rte_errno;
-				goto error;
+				goto err_secondary;
 			}
 			/*
 			 * Ethdev pointer is still required as input since
@@ -918,7 +919,14 @@ struct mlx4_conf {
 			claim_zero(mlx4_glue->close_device(ctx));
 			rte_eth_copy_pci_info(eth_dev, pci_dev);
 			rte_eth_dev_probing_finish(eth_dev);
+			prev_dev = eth_dev;
 			continue;
+err_secondary:
+			claim_zero(mlx4_glue->close_device(ctx));
+			rte_eth_dev_release_port(eth_dev);
+			if (prev_dev)
+				rte_eth_dev_release_port(prev_dev);
+			break;
 		}
 		/* Check port status. */
 		err = mlx4_glue->query_port(ctx, port, &port_attr);
@@ -1093,6 +1101,7 @@ struct mlx4_conf {
 				 priv, mem_event_cb);
 		rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
 		rte_eth_dev_probing_finish(eth_dev);
+		prev_dev = eth_dev;
 		continue;
 port_error:
 		rte_free(priv);
@@ -1107,14 +1116,10 @@ struct mlx4_conf {
 			eth_dev->data->mac_addrs = NULL;
 			rte_eth_dev_release_port(eth_dev);
 		}
+		if (prev_dev)
+			mlx4_dev_close(prev_dev);
 		break;
 	}
-	/*
-	 * XXX if something went wrong in the loop above, there is a resource
-	 * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as
-	 * long as the dpdk does not provide a way to deallocate a ethdev and a
-	 * way to enumerate the registered ethdevs to free the previous ones.
-	 */
 error:
 	if (attr_ctx)
 		claim_zero(mlx4_glue->close_device(attr_ctx));
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-01-20  8:15 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1610555333-18961-1-git-send-email-michaelba@nvidia.com>
2021-01-13 16:28 ` [dpdk-stable] [PATCH 1/2] net/mlx4: fix device detach Michael Baum
2021-01-14  8:32   ` [dpdk-stable] [dpdk-dev] " David Marchand
2021-01-14 10:41     ` David Marchand
     [not found]   ` <1611130491-19129-1-git-send-email-michaelba@nvidia.com>
2021-01-20  8:14     ` [dpdk-stable] [PATCH v2 " Michael Baum
2021-01-20  8:14     ` [dpdk-stable] [PATCH v2 2/2] net/mlx4: fix PCI probe error flow Michael Baum
2021-01-13 16:28 ` [dpdk-stable] [PATCH " Michael Baum

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).