patches for DPDK stable branches
 help / color / mirror / Atom feed
From: Kevin Traynor <ktraynor@redhat.com>
To: Long Li <longli@microsoft.com>
Cc: dpdk stable <stable@dpdk.org>
Subject: patch 'net/mana: fix multi-process tracking' has been queued to stable release 24.11.2
Date: Fri,  7 Mar 2025 12:46:43 +0000	[thread overview]
Message-ID: <20250307124726.475001-10-ktraynor@redhat.com> (raw)
In-Reply-To: <20250307124726.475001-1-ktraynor@redhat.com>

Hi,

FYI, your patch has been queued to stable release 24.11.2

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 03/12/25. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Queued patches are on a temporary branch at:
https://github.com/kevintraynor/dpdk-stable

This queued commit can be viewed at:
https://github.com/kevintraynor/dpdk-stable/commit/bb87bcb247f6420f5de662b855c2671f498b13ee

Thanks.

Kevin

---
From bb87bcb247f6420f5de662b855c2671f498b13ee Mon Sep 17 00:00:00 2001
From: Long Li <longli@microsoft.com>
Date: Thu, 20 Feb 2025 15:32:02 -0800
Subject: [PATCH] net/mana: fix multi-process tracking

[ upstream commit 57aa3ec91ecf13ab2f11e4dc0dc74c50a2afa0cc ]

The driver uses mana_shared_data for tracking usage count for primary
process. This is not correct as the mana_shared_data is allocated
by the primary and is meant to track usage of secondary process by the
primary process. And it creates a race condition when the device is
removed because the counter is no longer available if this shared
memory is freed.

Move the usage count tracking to mana_local_data and fix the race
condition in mana_pci_remove().

Fixes: 517ed6e2d590 ("net/mana: add basic driver with build environment")

Signed-off-by: Long Li <longli@microsoft.com>
---
 drivers/net/mana/mana.c | 103 +++++++++++++++++++++++-----------------
 drivers/net/mana/mana.h |   6 +--
 drivers/net/mana/mp.c   |   2 +-
 3 files changed, 61 insertions(+), 50 deletions(-)

diff --git a/drivers/net/mana/mana.c b/drivers/net/mana/mana.c
index c37c4e3444..2934da29f7 100644
--- a/drivers/net/mana/mana.c
+++ b/drivers/net/mana/mana.c
@@ -24,7 +24,12 @@
 
 /* Shared memory between primary/secondary processes, per driver */
-/* Data to track primary/secondary usage */
 struct mana_shared_data *mana_shared_data;
-static struct mana_shared_data mana_local_data;
+
+/* Local data to track device instance usage for primary/secondary processes */
+static struct mana_local_data {
+	int init_done;
+	unsigned int primary_cnt;
+	unsigned int secondary_cnt;
+} mana_local_data;
 
 /* The memory region for the above data */
@@ -1168,6 +1173,10 @@ mana_init_shared_data(void)
 
 	/* Skip if shared data is already initialized */
-	if (mana_shared_data)
+	if (mana_shared_data) {
+		DRV_LOG(INFO, "shared data is already initialized");
 		goto exit;
+	}
+
+	memset(&mana_local_data, 0, sizeof(mana_local_data));
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
@@ -1182,6 +1191,6 @@ mana_init_shared_data(void)
 
 		mana_shared_data = mana_shared_mz->addr;
-		memset(mana_shared_data, 0, sizeof(*mana_shared_data));
-		rte_spinlock_init(&mana_shared_data->lock);
+		rte_atomic_store_explicit(&mana_shared_data->secondary_cnt, 0,
+					  rte_memory_order_relaxed);
 	} else {
 		secondary_mz = rte_memzone_lookup(MZ_MANA_SHARED_DATA);
@@ -1193,5 +1202,4 @@ mana_init_shared_data(void)
 
 		mana_shared_data = secondary_mz->addr;
-		memset(&mana_local_data, 0, sizeof(mana_local_data));
 	}
 
@@ -1214,9 +1222,9 @@ mana_init_once(void)
 		return ret;
 
-	rte_spinlock_lock(&mana_shared_data->lock);
+	rte_spinlock_lock(&mana_shared_data_lock);
 
 	switch (rte_eal_process_type()) {
 	case RTE_PROC_PRIMARY:
-		if (mana_shared_data->init_done)
+		if (mana_local_data.init_done)
 			break;
 
@@ -1226,5 +1234,5 @@ mana_init_once(void)
 		DRV_LOG(ERR, "MP INIT PRIMARY");
 
-		mana_shared_data->init_done = 1;
+		mana_local_data.init_done = 1;
 		break;
 
@@ -1249,5 +1257,5 @@ mana_init_once(void)
 	}
 
-	rte_spinlock_unlock(&mana_shared_data->lock);
+	rte_spinlock_unlock(&mana_shared_data_lock);
 
 	return ret;
@@ -1320,9 +1328,4 @@ mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr,
 		eth_dev->rx_pkt_burst = mana_rx_burst;
 
-		rte_spinlock_lock(&mana_shared_data->lock);
-		mana_shared_data->secondary_cnt++;
-		mana_local_data.secondary_cnt++;
-		rte_spinlock_unlock(&mana_shared_data->lock);
-
 		rte_eth_copy_pci_info(eth_dev, pci_dev);
 		rte_eth_dev_probing_finish(eth_dev);
@@ -1407,8 +1410,4 @@ mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr,
 	}
 
-	rte_spinlock_lock(&mana_shared_data->lock);
-	mana_shared_data->primary_cnt++;
-	rte_spinlock_unlock(&mana_shared_data->lock);
-
 	eth_dev->device = &pci_dev->device;
 
@@ -1553,11 +1552,36 @@ mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 	}
 
+	/* If no device is found, clean up resources if this is the last one */
 	if (!count) {
-		rte_memzone_free(mana_shared_mz);
-		mana_shared_mz = NULL;
-		ret = -ENODEV;
+		rte_spinlock_lock(&mana_shared_data_lock);
+		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+			if (!mana_local_data.primary_cnt) {
+				mana_mp_uninit_primary();
+				rte_memzone_free(mana_shared_mz);
+				mana_shared_mz = NULL;
+				mana_shared_data = NULL;
+			}
+		} else {
+			if (!mana_local_data.secondary_cnt) {
+				mana_mp_uninit_secondary();
+				mana_shared_data = NULL;
+			}
+		}
+		rte_spinlock_unlock(&mana_shared_data_lock);
+		return -ENODEV;
 	}
 
-	return ret;
+	/* At least one eth_dev is probed, increase counter for shared data */
+	rte_spinlock_lock(&mana_shared_data_lock);
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		mana_local_data.primary_cnt++;
+	} else {
+		rte_atomic_fetch_add_explicit(&mana_shared_data->secondary_cnt, 1,
+					      rte_memory_order_relaxed);
+		mana_local_data.secondary_cnt++;
+	}
+	rte_spinlock_unlock(&mana_shared_data_lock);
+
+	return 0;
 }
 
@@ -1574,33 +1598,24 @@ static int
 mana_pci_remove(struct rte_pci_device *pci_dev)
 {
+	rte_spinlock_lock(&mana_shared_data_lock);
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-		rte_spinlock_lock(&mana_shared_data_lock);
+		RTE_VERIFY(mana_local_data.primary_cnt > 0);
+		mana_local_data.primary_cnt--;
 
-		rte_spinlock_lock(&mana_shared_data->lock);
-
-		RTE_VERIFY(mana_shared_data->primary_cnt > 0);
-		mana_shared_data->primary_cnt--;
-		if (!mana_shared_data->primary_cnt) {
+		if (!mana_local_data.primary_cnt) {
 			DRV_LOG(DEBUG, "mp uninit primary");
 			mana_mp_uninit_primary();
-		}
 
-		rte_spinlock_unlock(&mana_shared_data->lock);
-
-		/* Also free the shared memory if this is the last */
-		if (!mana_shared_data->primary_cnt) {
+			/* Also free the shared memory if this is the last */
 			DRV_LOG(DEBUG, "free shared memezone data");
 			rte_memzone_free(mana_shared_mz);
 			mana_shared_mz = NULL;
+			mana_shared_data = NULL;
 		}
-
-		rte_spinlock_unlock(&mana_shared_data_lock);
 	} else {
-		rte_spinlock_lock(&mana_shared_data_lock);
-
-		rte_spinlock_lock(&mana_shared_data->lock);
-		RTE_VERIFY(mana_shared_data->secondary_cnt > 0);
-		mana_shared_data->secondary_cnt--;
-		rte_spinlock_unlock(&mana_shared_data->lock);
+		RTE_VERIFY(rte_atomic_load_explicit(&mana_shared_data->secondary_cnt,
+						    rte_memory_order_relaxed) > 0);
+		rte_atomic_fetch_sub_explicit(&mana_shared_data->secondary_cnt, 1,
+					      rte_memory_order_relaxed);
 
 		RTE_VERIFY(mana_local_data.secondary_cnt > 0);
@@ -1609,8 +1624,8 @@ mana_pci_remove(struct rte_pci_device *pci_dev)
 			DRV_LOG(DEBUG, "mp uninit secondary");
 			mana_mp_uninit_secondary();
+			mana_shared_data = NULL;
 		}
-
-		rte_spinlock_unlock(&mana_shared_data_lock);
 	}
+	rte_spinlock_unlock(&mana_shared_data_lock);
 
 	return rte_eth_dev_pci_generic_remove(pci_dev, mana_dev_uninit);
diff --git a/drivers/net/mana/mana.h b/drivers/net/mana/mana.h
index 41a0ca6dfe..855d98911b 100644
--- a/drivers/net/mana/mana.h
+++ b/drivers/net/mana/mana.h
@@ -9,10 +9,6 @@
 #define PCI_DEVICE_ID_MICROSOFT_MANA	0x00ba
 
-/* Shared data between primary/secondary processes */
 struct mana_shared_data {
-	rte_spinlock_t lock;
-	int init_done;
-	unsigned int primary_cnt;
-	unsigned int secondary_cnt;
+	RTE_ATOMIC(uint32_t) secondary_cnt;
 };
 
diff --git a/drivers/net/mana/mp.c b/drivers/net/mana/mp.c
index 34b45ed832..5467d385ce 100644
--- a/drivers/net/mana/mp.c
+++ b/drivers/net/mana/mp.c
@@ -307,5 +307,5 @@ mana_mp_req_on_rxtx(struct rte_eth_dev *dev, enum mana_mp_req_type type)
 	}
 
-	if (!mana_shared_data->secondary_cnt)
+	if (rte_atomic_load_explicit(&mana_shared_data->secondary_cnt, rte_memory_order_relaxed) == 0)
 		return;
 
-- 
2.48.1

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2025-03-07 11:02:57.276511092 +0000
+++ 0010-net-mana-fix-multi-process-tracking.patch	2025-03-07 11:02:56.840335570 +0000
@@ -1 +1 @@
-From 57aa3ec91ecf13ab2f11e4dc0dc74c50a2afa0cc Mon Sep 17 00:00:00 2001
+From bb87bcb247f6420f5de662b855c2671f498b13ee Mon Sep 17 00:00:00 2001
@@ -5,0 +6,2 @@
+[ upstream commit 57aa3ec91ecf13ab2f11e4dc0dc74c50a2afa0cc ]
+
@@ -17 +18,0 @@
-Cc: stable@dpdk.org


  parent reply	other threads:[~2025-03-07 12:47 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-07 12:46 patch 'ci: build with MSVC in GHA' " Kevin Traynor
2025-03-07 12:46 ` patch 'net/netvsc: scan all net devices under the PCI device' " Kevin Traynor
2025-03-07 12:46 ` patch 'net/netvsc: remove device if its net devices removed' " Kevin Traynor
2025-03-07 12:46 ` patch 'doc: fix feature flags for queue start/stop' " Kevin Traynor
2025-03-07 12:46 ` patch 'doc: add VXLAN matching requirement in mlx5 guide' " Kevin Traynor
2025-03-07 12:46 ` patch 'app/testpmd: use VXLAN flow flags from user if set' " Kevin Traynor
2025-03-07 12:46 ` patch 'ethdev: fix registers info request' " Kevin Traynor
2025-03-07 12:46 ` patch 'app/testpmd: show all DCB priority TC map' " Kevin Traynor
2025-03-07 12:46 ` patch 'app/testpmd: avoid crash in DCB config' " Kevin Traynor
2025-03-07 12:46 ` Kevin Traynor [this message]
2025-03-07 12:46 ` patch 'app/testpmd: fix out-of-bound reference in offload " Kevin Traynor
2025-03-07 12:46 ` patch 'net/ngbe: fix WOL and NCSI capabilities' " Kevin Traynor
2025-03-07 12:46 ` patch 'net/txgbe: remove useless condition for SW-FW sync' " Kevin Traynor
2025-03-07 12:46 ` patch 'net/nfp: fix representor port statistics' " Kevin Traynor
2025-03-07 12:46 ` patch 'bus/pci: fix registered device name' " Kevin Traynor
2025-03-07 12:46 ` patch 'net/nfp: fix init failure handling' " Kevin Traynor
2025-03-07 12:46 ` patch 'vhost/crypto: skip fetch before vring init' " Kevin Traynor
2025-03-07 12:46 ` patch 'examples/vhost_crypto: fix user callbacks' " Kevin Traynor
2025-03-07 12:46 ` patch 'vhost: check descriptor chains length' " Kevin Traynor
2025-03-07 12:46 ` patch 'test/bbdev: update FFT test vectors' " Kevin Traynor
2025-03-07 12:46 ` patch 'event/dlb2: fix event weight handling in SSE code path' " Kevin Traynor
2025-03-07 12:46 ` patch 'test/event: fix number of queues in eventdev conf' " Kevin Traynor
2025-03-07 12:46 ` patch 'net/e1000: fix crashes in secondary processes' " Kevin Traynor
2025-03-07 12:46 ` patch 'net/ixgbe: " Kevin Traynor
2025-03-07 12:46 ` patch 'net/ixgbe: fix minimum Rx/Tx descriptors' " Kevin Traynor
2025-03-07 12:46 ` patch 'net/mlx5/hws: fix fragmented packet type matching' " Kevin Traynor
2025-03-07 12:47 ` patch 'net/mlx5: fix leak in HWS flow counter action' " Kevin Traynor
2025-03-07 12:47 ` patch 'net/mlx5: fix crash with null flow list creation' " Kevin Traynor
2025-03-07 12:47 ` patch 'net/mlx5: fix actions translation error overwrite' " Kevin Traynor
2025-03-07 12:47 ` patch 'net/mlx5: fix hardware packet type translation' " Kevin Traynor
2025-03-07 12:47 ` patch 'common/cnxk: fix inbound IPsec SA setup' " Kevin Traynor
2025-03-07 12:47 ` patch 'stack: fix pop in C11 implementation' " Kevin Traynor
2025-03-07 12:47 ` patch 'test/crypto: fix AES-ECB test lengths' " Kevin Traynor
2025-03-07 12:47 ` patch 'examples/ipsec-secgw: fix IV length in CTR 192/256' " Kevin Traynor
2025-03-07 12:47 ` patch 'crypto/cnxk: fix asymmetric operation status code' " Kevin Traynor
2025-03-07 12:47 ` patch 'test/crypto: remove unused variable' " Kevin Traynor
2025-03-07 12:47 ` patch 'crypto/openssl: validate incorrect RSA signature' " Kevin Traynor
2025-03-07 12:47 ` patch 'test/crypto: fix check for OOP header data' " Kevin Traynor
2025-03-07 12:47 ` patch 'crypto/qat: fix SM3 state size' " Kevin Traynor
2025-03-07 12:47 ` patch 'test/dma: fix pointers in IOVA as PA mode' " Kevin Traynor
2025-03-07 12:47 ` patch 'doc: update ionic driver guide' " Kevin Traynor
2025-03-07 12:47 ` patch 'ci: point at GitHub mirror' " Kevin Traynor
2025-03-07 12:47 ` patch 'ci: fix ccache for Ubuntu 22.04' " Kevin Traynor

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250307124726.475001-10-ktraynor@redhat.com \
    --to=ktraynor@redhat.com \
    --cc=longli@microsoft.com \
    --cc=stable@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).