DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH 1/2] common/cnxk: fix mailbox timeout issues
@ 2024-11-04 13:07 Harman Kalra
  2024-11-04 13:08 ` [PATCH 2/2] common/cnxk: add mailbox debug trace Harman Kalra
  2024-11-14  9:31 ` [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra
  0 siblings, 2 replies; 6+ messages in thread
From: Harman Kalra @ 2024-11-04 13:07 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao,
	Harman Kalra
  Cc: dev

Found couple of reasons causing mailbox timeout:
* msgs_acked value by interrupt thread was not getting synced to thread
polling it.
* mbox_data value sent by AF was seen 0 causing neither down nor up msg
processing.

Fixes: 9bd368ca311a ("common/cnxk: enable PF VF mailbox")
Fixes: 61deac72abbf ("common/cnxk: add CN20KA mbox support")

Signed-off-by: Harman Kalra <hkalra@marvell.com>
---
 drivers/common/cnxk/roc_dev.c      | 106 ++++++++++++++++++++++++-----
 drivers/common/cnxk/roc_dev_priv.h |  11 +++
 drivers/common/cnxk/roc_mbox.c     |  10 ++-
 3 files changed, 104 insertions(+), 23 deletions(-)

diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c
index 32409f2ef3..1127d8185c 100644
--- a/drivers/common/cnxk/roc_dev.c
+++ b/drivers/common/cnxk/roc_dev.c
@@ -401,8 +401,7 @@ vf_pf_process_up_msgs(struct dev *dev, uint16_t vf)
 		offset = mbox->rx_start + msg->next_msgoff;
 	}
 	mbox_reset(mbox, vf);
-	mdev->msgs_acked = msgs_acked;
-	plt_wmb();
+	__atomic_store_n(&mdev->msgs_acked, msgs_acked, __ATOMIC_RELEASE);
 
 	return i;
 }
@@ -542,8 +541,7 @@ process_msgs(struct dev *dev, struct mbox *mbox)
 
 	mbox_reset(mbox, 0);
 	/* Update acked if someone is waiting a message - mbox_wait is waiting */
-	mdev->msgs_acked = msgs_acked;
-	plt_wmb();
+	__atomic_store_n(&mdev->msgs_acked, msgs_acked, __ATOMIC_RELEASE);
 }
 
 /* Copies the message received from AF and sends it to VF */
@@ -920,12 +918,59 @@ process_msgs_up(struct dev *dev, struct mbox *mbox)
 	}
 }
 
+/* IRQ to VF from PF - VF context (interrupt thread) */
+static void
+roc_pf_vf_mbox_irq_cn20k(void *param)
+{
+	struct dev *dev = param;
+	uint64_t intr;
+
+	intr = plt_read64(dev->mbox_reg_base + RVU_VF_INT);
+	if (intr == 0)
+		plt_base_dbg("Proceeding to check mbox UP messages if any");
+
+	plt_write64(intr, dev->mbox_reg_base + RVU_VF_INT);
+	plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
+
+	/* If interrupt occurred for down message */
+	if (intr & BIT_ULL(1))
+		/* First process all configuration messages */
+		process_msgs(dev, dev->mbox);
+
+	/* If interrupt occurred for UP message */
+	if (intr & BIT_ULL(0))
+		process_msgs_up(dev, &dev->mbox_up);
+}
+
+/* IRQ to PF from AF - PF context (interrupt thread) */
+static void
+roc_af_pf_mbox_irq_cn20k(void *param)
+{
+	struct dev *dev = param;
+	uint64_t intr;
+
+	intr = plt_read64(dev->mbox_reg_base + RVU_PF_INT);
+	if (intr == 0)
+		plt_base_dbg("Proceeding to check mbox UP messages if any");
+
+	plt_write64(intr, dev->mbox_reg_base + RVU_PF_INT);
+	plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
+
+	/* If interrupt occurred for down message */
+	if (intr & BIT_ULL(1))
+		process_msgs(dev, dev->mbox);
+
+	/* If interrupt occurred for up message */
+	if (intr & BIT_ULL(0))
+		process_msgs_up(dev, &dev->mbox_up);
+}
+
 /* IRQ to VF from PF - VF context (interrupt thread) */
 static void
 roc_pf_vf_mbox_irq(void *param)
 {
 	struct dev *dev = param;
-	uint64_t mbox_data;
+	uint64_t mbox_data = 0;
 	uint64_t intr;
 
 	intr = plt_read64(dev->mbox_reg_base + RVU_VF_INT);
@@ -940,7 +985,7 @@ roc_pf_vf_mbox_irq(void *param)
 	 */
 	mbox_data = plt_read64(dev->mbox_reg_base + RVU_VF_VFPF_MBOX0);
 	/* If interrupt occurred for down message */
-	if (mbox_data & MBOX_DOWN_MSG || intr & BIT_ULL(1)) {
+	if (mbox_data & MBOX_DOWN_MSG) {
 		mbox_data &= ~MBOX_DOWN_MSG;
 		plt_write64(mbox_data, dev->mbox_reg_base + RVU_VF_VFPF_MBOX0);
 
@@ -948,7 +993,7 @@ roc_pf_vf_mbox_irq(void *param)
 		process_msgs(dev, dev->mbox);
 	}
 	/* If interrupt occurred for UP message */
-	if (mbox_data & MBOX_UP_MSG || intr & BIT_ULL(0)) {
+	if (mbox_data & MBOX_UP_MSG) {
 		mbox_data &= ~MBOX_UP_MSG;
 		plt_write64(mbox_data, dev->mbox_reg_base + RVU_VF_VFPF_MBOX0);
 
@@ -962,7 +1007,7 @@ static void
 roc_af_pf_mbox_irq(void *param)
 {
 	struct dev *dev = param;
-	uint64_t mbox_data;
+	uint64_t mbox_data = 0;
 	uint64_t intr;
 
 	intr = plt_read64(dev->mbox_reg_base + RVU_PF_INT);
@@ -977,7 +1022,7 @@ roc_af_pf_mbox_irq(void *param)
 	 */
 	mbox_data = plt_read64(dev->mbox_reg_base + RVU_PF_PFAF_MBOX0);
 	/* If interrupt occurred for down message */
-	if (mbox_data & MBOX_DOWN_MSG || intr & BIT_ULL(1)) {
+	if (mbox_data & MBOX_DOWN_MSG) {
 		mbox_data &= ~MBOX_DOWN_MSG;
 		plt_write64(mbox_data, dev->mbox_reg_base + RVU_PF_PFAF_MBOX0);
 
@@ -985,7 +1030,7 @@ roc_af_pf_mbox_irq(void *param)
 		process_msgs(dev, dev->mbox);
 	}
 	/* If interrupt occurred for up message */
-	if (mbox_data & MBOX_UP_MSG || intr & BIT_ULL(0)) {
+	if (mbox_data & MBOX_UP_MSG) {
 		mbox_data &= ~MBOX_UP_MSG;
 		plt_write64(mbox_data, dev->mbox_reg_base + RVU_PF_PFAF_MBOX0);
 
@@ -1045,7 +1090,8 @@ mbox_register_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
 		}
 	}
 	/* MBOX interrupt AF <-> PF */
-	rc = dev_irq_register(intr_handle, roc_af_pf_mbox_irq, dev, dev->mbox_plat->pfaf_vec);
+	rc = dev_irq_register(intr_handle, dev->mbox_plat->ops->af_pf_mbox_irq, dev,
+			      dev->mbox_plat->pfaf_vec);
 	if (rc) {
 		plt_err("Fail to register AF<->PF mbox irq");
 		return rc;
@@ -1073,7 +1119,8 @@ mbox_register_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
 	plt_write64(~0ull, dev->mbox_reg_base + RVU_VF_INT_ENA_W1C);
 
 	/* MBOX interrupt PF <-> VF */
-	rc = dev_irq_register(intr_handle, roc_pf_vf_mbox_irq, dev, RVU_VF_INT_VEC_MBOX);
+	rc = dev_irq_register(intr_handle, dev->mbox_plat->ops->pf_vf_mbox_irq, dev,
+			      RVU_VF_INT_VEC_MBOX);
 	if (rc) {
 		plt_err("Fail to register PF<->VF mbox irq");
 		return rc;
@@ -1127,7 +1174,8 @@ mbox_unregister_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
 	}
 
 	/* MBOX interrupt AF <-> PF */
-	dev_irq_unregister(intr_handle, roc_af_pf_mbox_irq, dev, dev->mbox_plat->pfaf_vec);
+	dev_irq_unregister(intr_handle, dev->mbox_plat->ops->af_pf_mbox_irq, dev,
+			   dev->mbox_plat->pfaf_vec);
 }
 
 static void
@@ -1139,7 +1187,8 @@ mbox_unregister_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
 	plt_write64(~0ull, dev->mbox_reg_base + RVU_VF_INT_ENA_W1C);
 
 	/* Unregister the interrupt handler */
-	dev_irq_unregister(intr_handle, roc_pf_vf_mbox_irq, dev, RVU_VF_INT_VEC_MBOX);
+	dev_irq_unregister(intr_handle, dev->mbox_plat->ops->pf_vf_mbox_irq, dev,
+			   RVU_VF_INT_VEC_MBOX);
 }
 
 void
@@ -1599,10 +1648,17 @@ dev_cache_line_size_valid(void)
 	return true;
 }
 
-static void
+static int
 mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t bar4, bool is_vf)
 {
-	int i;
+	int i, rc = 0;
+
+	/* Allocate memory for device ops */
+	mbox_plat->ops = plt_zmalloc(sizeof(struct mbox_ops), 0);
+	if (mbox_plat->ops == NULL) {
+		rc = -ENOMEM;
+		goto fail;
+	}
 
 	if (roc_model_is_cn20k()) {
 		/* For CN20K, AF allocates mbox memory in DRAM and writes PF
@@ -1613,6 +1669,9 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t
 		mbox_plat->mbox_region_base =
 			bar2 + (RVU_PFX_FUNC_PFAF_MBOX +
 				((uint64_t)RVU_BLOCK_ADDR_MBOX << RVU_FUNC_BLKADDR_SHIFT));
+		/* Mbox operations */
+		mbox_plat->ops->af_pf_mbox_irq = roc_af_pf_mbox_irq_cn20k;
+		mbox_plat->ops->pf_vf_mbox_irq = roc_pf_vf_mbox_irq_cn20k;
 		/* Interrupt vectors */
 		mbox_plat->pfaf_vec = RVU_MBOX_PF_INT_VEC_AFPF_MBOX;
 		mbox_plat->pfvf_mbox0_vec = RVU_MBOX_PF_INT_VEC_VFPF_MBOX0;
@@ -1630,6 +1689,9 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t
 	} else {
 		mbox_plat->mbox_reg_base = bar2;
 		mbox_plat->mbox_region_base = bar4;
+		/* Mbox operations */
+		mbox_plat->ops->af_pf_mbox_irq = roc_af_pf_mbox_irq;
+		mbox_plat->ops->pf_vf_mbox_irq = roc_pf_vf_mbox_irq;
 		mbox_plat->pfaf_vec = RVU_PF_INT_VEC_AFPF_MBOX;
 		mbox_plat->pfvf_mbox0_vec = RVU_PF_INT_VEC_VFPF_MBOX0;
 		mbox_plat->pfvf_mbox1_vec = RVU_PF_INT_VEC_VFPF_MBOX1;
@@ -1647,6 +1709,8 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t
 		if (roc_model_is_cn10k())
 			mbox_plat->mbox_region_base = bar2 + RVU_VF_MBOX_REGION;
 	}
+fail:
+	return rc;
 }
 
 int
@@ -1678,7 +1742,12 @@ dev_init(struct dev *dev, struct plt_pci_device *pci_dev)
 		rc = -ENOMEM;
 		goto fail;
 	}
-	mbox_platform_changes(dev->mbox_plat, bar2, bar4, is_vf);
+
+	if (mbox_platform_changes(dev->mbox_plat, bar2, bar4, is_vf)) {
+		plt_err("Failed to populate platform specific changes");
+		rc = -ENOMEM;
+		goto mbox_plat_free;
+	}
 
 	mbox_reg_base = dev->mbox_plat->mbox_reg_base;
 	mbox_region_base = dev->mbox_plat->mbox_region_base;
@@ -1824,6 +1893,8 @@ dev_init(struct dev *dev, struct plt_pci_device *pci_dev)
 	mbox_fini(dev->mbox);
 	mbox_fini(&dev->mbox_up);
 error:
+	plt_free(dev->mbox_plat->ops);
+mbox_plat_free:
 	plt_free(dev->mbox_plat);
 fail:
 	return rc;
@@ -1883,6 +1954,7 @@ dev_fini(struct dev *dev, struct plt_pci_device *pci_dev)
 	mbox_fini(mbox);
 	dev->mbox_active = 0;
 
+	plt_free(dev->mbox_plat->ops);
 	plt_free(dev->mbox_plat);
 	/* Disable MSIX vectors */
 	dev_irqs_disable(intr_handle);
diff --git a/drivers/common/cnxk/roc_dev_priv.h b/drivers/common/cnxk/roc_dev_priv.h
index c766183196..a8e40fa9d5 100644
--- a/drivers/common/cnxk/roc_dev_priv.h
+++ b/drivers/common/cnxk/roc_dev_priv.h
@@ -106,6 +106,15 @@ struct mbox_sync {
 	pthread_mutex_t mutex;
 };
 
+/* AF PF VF mbox interrupt callbacks */
+typedef void (*af_pf_mbox_irq_t)(void *param);
+typedef void (*pf_vf_mbox_irq_t)(void *param);
+
+struct mbox_ops {
+	af_pf_mbox_irq_t af_pf_mbox_irq;
+	pf_vf_mbox_irq_t pf_vf_mbox_irq;
+};
+
 struct mbox_platform {
 	uint8_t pfaf_vec;
 	uint8_t pfvf_mbox0_vec;
@@ -120,6 +129,8 @@ struct mbox_platform {
 	uint64_t pfvf1_mbox_int_ena_w1c[MAX_VFPF_DWORD_BITS];
 	uintptr_t mbox_reg_base;
 	uintptr_t mbox_region_base;
+	/* Mbox operations */
+	struct mbox_ops *ops;
 };
 
 struct dev {
diff --git a/drivers/common/cnxk/roc_mbox.c b/drivers/common/cnxk/roc_mbox.c
index db77babfdb..eb5bd771fe 100644
--- a/drivers/common/cnxk/roc_mbox.c
+++ b/drivers/common/cnxk/roc_mbox.c
@@ -275,7 +275,7 @@ mbox_msg_send_data(struct mbox *mbox, int devid, uint8_t data)
 	tx_hdr->msg_size = mdev->msg_size;
 	mdev->msg_size = 0;
 	mdev->rsp_size = 0;
-	mdev->msgs_acked = 0;
+	__atomic_store_n(&mdev->msgs_acked, 0, __ATOMIC_RELEASE);
 
 	/* num_msgs != 0 signals to the peer that the buffer has a number of
 	 * messages. So this should be written after copying txmem
@@ -417,7 +417,7 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo)
 	 * mdev->msgs_acked are incremented at process_msgs() in interrupt
 	 * thread context.
 	 */
-	while (mdev->num_msgs > mdev->msgs_acked) {
+	while (mdev->num_msgs > __atomic_load_n(&mdev->msgs_acked, __ATOMIC_ACQUIRE)) {
 		plt_delay_us(sleep);
 		timeout += sleep;
 		if (timeout >= rst_timo) {
@@ -433,13 +433,11 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo)
 				"(tx/rx num_msgs: %d/%d), msg_size: %d, "
 				"rsp_size: %d",
 				devid, timeout, mdev->num_msgs,
-				mdev->msgs_acked, tx_hdr->num_msgs,
-				rx_hdr->num_msgs, mdev->msg_size,
-				mdev->rsp_size);
+				__atomic_load_n(&mdev->msgs_acked, __ATOMIC_ACQUIRE),
+				tx_hdr->num_msgs, rx_hdr->num_msgs, mdev->msg_size, mdev->rsp_size);
 
 			return -EIO;
 		}
-		plt_rmb();
 	}
 	return 0;
 }
-- 
2.46.0.469.g4590f2e941


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-11-14 16:38 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-11-04 13:07 [PATCH 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra
2024-11-04 13:08 ` [PATCH 2/2] common/cnxk: add mailbox debug trace Harman Kalra
2024-11-07  8:04   ` [EXTERNAL] " Jerin Jacob
2024-11-14 16:38   ` Stephen Hemminger
2024-11-14  9:31 ` [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra
2024-11-14  9:31   ` [PATCH v2 2/2] common/cnxk: add mailbox debug trace Harman Kalra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).