* [PATCH 1/2] common/cnxk: fix mailbox timeout issues
@ 2024-11-04 13:07 Harman Kalra
2024-11-04 13:08 ` [PATCH 2/2] common/cnxk: add mailbox debug trace Harman Kalra
2024-11-14 9:31 ` [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra
0 siblings, 2 replies; 7+ messages in thread
From: Harman Kalra @ 2024-11-04 13:07 UTC (permalink / raw)
To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao,
Harman Kalra
Cc: dev
Found couple of reasons causing mailbox timeout:
* msgs_acked value by interrupt thread was not getting synced to thread
polling it.
* mbox_data value sent by AF was seen 0 causing neither down nor up msg
processing.
Fixes: 9bd368ca311a ("common/cnxk: enable PF VF mailbox")
Fixes: 61deac72abbf ("common/cnxk: add CN20KA mbox support")
Signed-off-by: Harman Kalra <hkalra@marvell.com>
---
drivers/common/cnxk/roc_dev.c | 106 ++++++++++++++++++++++++-----
drivers/common/cnxk/roc_dev_priv.h | 11 +++
drivers/common/cnxk/roc_mbox.c | 10 ++-
3 files changed, 104 insertions(+), 23 deletions(-)
diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c
index 32409f2ef3..1127d8185c 100644
--- a/drivers/common/cnxk/roc_dev.c
+++ b/drivers/common/cnxk/roc_dev.c
@@ -401,8 +401,7 @@ vf_pf_process_up_msgs(struct dev *dev, uint16_t vf)
offset = mbox->rx_start + msg->next_msgoff;
}
mbox_reset(mbox, vf);
- mdev->msgs_acked = msgs_acked;
- plt_wmb();
+ __atomic_store_n(&mdev->msgs_acked, msgs_acked, __ATOMIC_RELEASE);
return i;
}
@@ -542,8 +541,7 @@ process_msgs(struct dev *dev, struct mbox *mbox)
mbox_reset(mbox, 0);
/* Update acked if someone is waiting a message - mbox_wait is waiting */
- mdev->msgs_acked = msgs_acked;
- plt_wmb();
+ __atomic_store_n(&mdev->msgs_acked, msgs_acked, __ATOMIC_RELEASE);
}
/* Copies the message received from AF and sends it to VF */
@@ -920,12 +918,59 @@ process_msgs_up(struct dev *dev, struct mbox *mbox)
}
}
+/* IRQ to VF from PF - VF context (interrupt thread) */
+static void
+roc_pf_vf_mbox_irq_cn20k(void *param)
+{
+ struct dev *dev = param;
+ uint64_t intr;
+
+ intr = plt_read64(dev->mbox_reg_base + RVU_VF_INT);
+ if (intr == 0)
+ plt_base_dbg("Proceeding to check mbox UP messages if any");
+
+ plt_write64(intr, dev->mbox_reg_base + RVU_VF_INT);
+ plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
+
+ /* If interrupt occurred for down message */
+ if (intr & BIT_ULL(1))
+ /* First process all configuration messages */
+ process_msgs(dev, dev->mbox);
+
+ /* If interrupt occurred for UP message */
+ if (intr & BIT_ULL(0))
+ process_msgs_up(dev, &dev->mbox_up);
+}
+
+/* IRQ to PF from AF - PF context (interrupt thread) */
+static void
+roc_af_pf_mbox_irq_cn20k(void *param)
+{
+ struct dev *dev = param;
+ uint64_t intr;
+
+ intr = plt_read64(dev->mbox_reg_base + RVU_PF_INT);
+ if (intr == 0)
+ plt_base_dbg("Proceeding to check mbox UP messages if any");
+
+ plt_write64(intr, dev->mbox_reg_base + RVU_PF_INT);
+ plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
+
+ /* If interrupt occurred for down message */
+ if (intr & BIT_ULL(1))
+ process_msgs(dev, dev->mbox);
+
+ /* If interrupt occurred for up message */
+ if (intr & BIT_ULL(0))
+ process_msgs_up(dev, &dev->mbox_up);
+}
+
/* IRQ to VF from PF - VF context (interrupt thread) */
static void
roc_pf_vf_mbox_irq(void *param)
{
struct dev *dev = param;
- uint64_t mbox_data;
+ uint64_t mbox_data = 0;
uint64_t intr;
intr = plt_read64(dev->mbox_reg_base + RVU_VF_INT);
@@ -940,7 +985,7 @@ roc_pf_vf_mbox_irq(void *param)
*/
mbox_data = plt_read64(dev->mbox_reg_base + RVU_VF_VFPF_MBOX0);
/* If interrupt occurred for down message */
- if (mbox_data & MBOX_DOWN_MSG || intr & BIT_ULL(1)) {
+ if (mbox_data & MBOX_DOWN_MSG) {
mbox_data &= ~MBOX_DOWN_MSG;
plt_write64(mbox_data, dev->mbox_reg_base + RVU_VF_VFPF_MBOX0);
@@ -948,7 +993,7 @@ roc_pf_vf_mbox_irq(void *param)
process_msgs(dev, dev->mbox);
}
/* If interrupt occurred for UP message */
- if (mbox_data & MBOX_UP_MSG || intr & BIT_ULL(0)) {
+ if (mbox_data & MBOX_UP_MSG) {
mbox_data &= ~MBOX_UP_MSG;
plt_write64(mbox_data, dev->mbox_reg_base + RVU_VF_VFPF_MBOX0);
@@ -962,7 +1007,7 @@ static void
roc_af_pf_mbox_irq(void *param)
{
struct dev *dev = param;
- uint64_t mbox_data;
+ uint64_t mbox_data = 0;
uint64_t intr;
intr = plt_read64(dev->mbox_reg_base + RVU_PF_INT);
@@ -977,7 +1022,7 @@ roc_af_pf_mbox_irq(void *param)
*/
mbox_data = plt_read64(dev->mbox_reg_base + RVU_PF_PFAF_MBOX0);
/* If interrupt occurred for down message */
- if (mbox_data & MBOX_DOWN_MSG || intr & BIT_ULL(1)) {
+ if (mbox_data & MBOX_DOWN_MSG) {
mbox_data &= ~MBOX_DOWN_MSG;
plt_write64(mbox_data, dev->mbox_reg_base + RVU_PF_PFAF_MBOX0);
@@ -985,7 +1030,7 @@ roc_af_pf_mbox_irq(void *param)
process_msgs(dev, dev->mbox);
}
/* If interrupt occurred for up message */
- if (mbox_data & MBOX_UP_MSG || intr & BIT_ULL(0)) {
+ if (mbox_data & MBOX_UP_MSG) {
mbox_data &= ~MBOX_UP_MSG;
plt_write64(mbox_data, dev->mbox_reg_base + RVU_PF_PFAF_MBOX0);
@@ -1045,7 +1090,8 @@ mbox_register_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
}
}
/* MBOX interrupt AF <-> PF */
- rc = dev_irq_register(intr_handle, roc_af_pf_mbox_irq, dev, dev->mbox_plat->pfaf_vec);
+ rc = dev_irq_register(intr_handle, dev->mbox_plat->ops->af_pf_mbox_irq, dev,
+ dev->mbox_plat->pfaf_vec);
if (rc) {
plt_err("Fail to register AF<->PF mbox irq");
return rc;
@@ -1073,7 +1119,8 @@ mbox_register_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
plt_write64(~0ull, dev->mbox_reg_base + RVU_VF_INT_ENA_W1C);
/* MBOX interrupt PF <-> VF */
- rc = dev_irq_register(intr_handle, roc_pf_vf_mbox_irq, dev, RVU_VF_INT_VEC_MBOX);
+ rc = dev_irq_register(intr_handle, dev->mbox_plat->ops->pf_vf_mbox_irq, dev,
+ RVU_VF_INT_VEC_MBOX);
if (rc) {
plt_err("Fail to register PF<->VF mbox irq");
return rc;
@@ -1127,7 +1174,8 @@ mbox_unregister_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
}
/* MBOX interrupt AF <-> PF */
- dev_irq_unregister(intr_handle, roc_af_pf_mbox_irq, dev, dev->mbox_plat->pfaf_vec);
+ dev_irq_unregister(intr_handle, dev->mbox_plat->ops->af_pf_mbox_irq, dev,
+ dev->mbox_plat->pfaf_vec);
}
static void
@@ -1139,7 +1187,8 @@ mbox_unregister_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
plt_write64(~0ull, dev->mbox_reg_base + RVU_VF_INT_ENA_W1C);
/* Unregister the interrupt handler */
- dev_irq_unregister(intr_handle, roc_pf_vf_mbox_irq, dev, RVU_VF_INT_VEC_MBOX);
+ dev_irq_unregister(intr_handle, dev->mbox_plat->ops->pf_vf_mbox_irq, dev,
+ RVU_VF_INT_VEC_MBOX);
}
void
@@ -1599,10 +1648,17 @@ dev_cache_line_size_valid(void)
return true;
}
-static void
+static int
mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t bar4, bool is_vf)
{
- int i;
+ int i, rc = 0;
+
+ /* Allocate memory for device ops */
+ mbox_plat->ops = plt_zmalloc(sizeof(struct mbox_ops), 0);
+ if (mbox_plat->ops == NULL) {
+ rc = -ENOMEM;
+ goto fail;
+ }
if (roc_model_is_cn20k()) {
/* For CN20K, AF allocates mbox memory in DRAM and writes PF
@@ -1613,6 +1669,9 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t
mbox_plat->mbox_region_base =
bar2 + (RVU_PFX_FUNC_PFAF_MBOX +
((uint64_t)RVU_BLOCK_ADDR_MBOX << RVU_FUNC_BLKADDR_SHIFT));
+ /* Mbox operations */
+ mbox_plat->ops->af_pf_mbox_irq = roc_af_pf_mbox_irq_cn20k;
+ mbox_plat->ops->pf_vf_mbox_irq = roc_pf_vf_mbox_irq_cn20k;
/* Interrupt vectors */
mbox_plat->pfaf_vec = RVU_MBOX_PF_INT_VEC_AFPF_MBOX;
mbox_plat->pfvf_mbox0_vec = RVU_MBOX_PF_INT_VEC_VFPF_MBOX0;
@@ -1630,6 +1689,9 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t
} else {
mbox_plat->mbox_reg_base = bar2;
mbox_plat->mbox_region_base = bar4;
+ /* Mbox operations */
+ mbox_plat->ops->af_pf_mbox_irq = roc_af_pf_mbox_irq;
+ mbox_plat->ops->pf_vf_mbox_irq = roc_pf_vf_mbox_irq;
mbox_plat->pfaf_vec = RVU_PF_INT_VEC_AFPF_MBOX;
mbox_plat->pfvf_mbox0_vec = RVU_PF_INT_VEC_VFPF_MBOX0;
mbox_plat->pfvf_mbox1_vec = RVU_PF_INT_VEC_VFPF_MBOX1;
@@ -1647,6 +1709,8 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t
if (roc_model_is_cn10k())
mbox_plat->mbox_region_base = bar2 + RVU_VF_MBOX_REGION;
}
+fail:
+ return rc;
}
int
@@ -1678,7 +1742,12 @@ dev_init(struct dev *dev, struct plt_pci_device *pci_dev)
rc = -ENOMEM;
goto fail;
}
- mbox_platform_changes(dev->mbox_plat, bar2, bar4, is_vf);
+
+ if (mbox_platform_changes(dev->mbox_plat, bar2, bar4, is_vf)) {
+ plt_err("Failed to populate platform specific changes");
+ rc = -ENOMEM;
+ goto mbox_plat_free;
+ }
mbox_reg_base = dev->mbox_plat->mbox_reg_base;
mbox_region_base = dev->mbox_plat->mbox_region_base;
@@ -1824,6 +1893,8 @@ dev_init(struct dev *dev, struct plt_pci_device *pci_dev)
mbox_fini(dev->mbox);
mbox_fini(&dev->mbox_up);
error:
+ plt_free(dev->mbox_plat->ops);
+mbox_plat_free:
plt_free(dev->mbox_plat);
fail:
return rc;
@@ -1883,6 +1954,7 @@ dev_fini(struct dev *dev, struct plt_pci_device *pci_dev)
mbox_fini(mbox);
dev->mbox_active = 0;
+ plt_free(dev->mbox_plat->ops);
plt_free(dev->mbox_plat);
/* Disable MSIX vectors */
dev_irqs_disable(intr_handle);
diff --git a/drivers/common/cnxk/roc_dev_priv.h b/drivers/common/cnxk/roc_dev_priv.h
index c766183196..a8e40fa9d5 100644
--- a/drivers/common/cnxk/roc_dev_priv.h
+++ b/drivers/common/cnxk/roc_dev_priv.h
@@ -106,6 +106,15 @@ struct mbox_sync {
pthread_mutex_t mutex;
};
+/* AF PF VF mbox interrupt callbacks */
+typedef void (*af_pf_mbox_irq_t)(void *param);
+typedef void (*pf_vf_mbox_irq_t)(void *param);
+
+struct mbox_ops {
+ af_pf_mbox_irq_t af_pf_mbox_irq;
+ pf_vf_mbox_irq_t pf_vf_mbox_irq;
+};
+
struct mbox_platform {
uint8_t pfaf_vec;
uint8_t pfvf_mbox0_vec;
@@ -120,6 +129,8 @@ struct mbox_platform {
uint64_t pfvf1_mbox_int_ena_w1c[MAX_VFPF_DWORD_BITS];
uintptr_t mbox_reg_base;
uintptr_t mbox_region_base;
+ /* Mbox operations */
+ struct mbox_ops *ops;
};
struct dev {
diff --git a/drivers/common/cnxk/roc_mbox.c b/drivers/common/cnxk/roc_mbox.c
index db77babfdb..eb5bd771fe 100644
--- a/drivers/common/cnxk/roc_mbox.c
+++ b/drivers/common/cnxk/roc_mbox.c
@@ -275,7 +275,7 @@ mbox_msg_send_data(struct mbox *mbox, int devid, uint8_t data)
tx_hdr->msg_size = mdev->msg_size;
mdev->msg_size = 0;
mdev->rsp_size = 0;
- mdev->msgs_acked = 0;
+ __atomic_store_n(&mdev->msgs_acked, 0, __ATOMIC_RELEASE);
/* num_msgs != 0 signals to the peer that the buffer has a number of
* messages. So this should be written after copying txmem
@@ -417,7 +417,7 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo)
* mdev->msgs_acked are incremented at process_msgs() in interrupt
* thread context.
*/
- while (mdev->num_msgs > mdev->msgs_acked) {
+ while (mdev->num_msgs > __atomic_load_n(&mdev->msgs_acked, __ATOMIC_ACQUIRE)) {
plt_delay_us(sleep);
timeout += sleep;
if (timeout >= rst_timo) {
@@ -433,13 +433,11 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo)
"(tx/rx num_msgs: %d/%d), msg_size: %d, "
"rsp_size: %d",
devid, timeout, mdev->num_msgs,
- mdev->msgs_acked, tx_hdr->num_msgs,
- rx_hdr->num_msgs, mdev->msg_size,
- mdev->rsp_size);
+ __atomic_load_n(&mdev->msgs_acked, __ATOMIC_ACQUIRE),
+ tx_hdr->num_msgs, rx_hdr->num_msgs, mdev->msg_size, mdev->rsp_size);
return -EIO;
}
- plt_rmb();
}
return 0;
}
--
2.46.0.469.g4590f2e941
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 2/2] common/cnxk: add mailbox debug trace
2024-11-04 13:07 [PATCH 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra
@ 2024-11-04 13:08 ` Harman Kalra
2024-11-07 8:04 ` [EXTERNAL] " Jerin Jacob
2024-11-14 16:38 ` Stephen Hemminger
2024-11-14 9:31 ` [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra
1 sibling, 2 replies; 7+ messages in thread
From: Harman Kalra @ 2024-11-04 13:08 UTC (permalink / raw)
To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao,
Harman Kalra
Cc: dev
Implementing mailbox debug trace points to trace function calls.
Signed-off-by: Harman Kalra <hkalra@marvell.com>
---
drivers/common/cnxk/roc_dev.c | 89 ++++++++++++++++--------------
drivers/common/cnxk/roc_mbox.c | 34 ++++++++++++
drivers/common/cnxk/roc_mbox.h | 2 +
drivers/common/cnxk/roc_platform.c | 9 +++
drivers/common/cnxk/roc_platform.h | 87 +++++++++++++++++++++++++++--
5 files changed, 175 insertions(+), 46 deletions(-)
diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c
index 1127d8185c..e9bf8c01b4 100644
--- a/drivers/common/cnxk/roc_dev.c
+++ b/drivers/common/cnxk/roc_dev.c
@@ -26,7 +26,6 @@
/* RVU PF interrupt status as received from AF*/
#define RVU_PF_INTR_STATUS 0x3
-
static void *
mbox_mem_map(off_t off, size_t size)
{
@@ -83,6 +82,7 @@ pf_af_sync_msg(struct dev *dev, struct mbox_msghdr **rsp)
timeout += sleep;
if (timeout >= mbox->rsp_tmo) {
plt_err("Message timeout: %dms", mbox->rsp_tmo);
+ roc_trace_mbox_error("AFPF_SYNC", "RSP_TMO", -EIO);
rc = -EIO;
break;
}
@@ -137,6 +137,7 @@ af_pf_wait_msg(struct dev *dev, uint16_t vf, int num_msg)
timeout++;
if (timeout >= mbox->rsp_tmo) {
plt_err("Routed messages %d timeout: %dms", num_msg, mbox->rsp_tmo);
+ roc_trace_mbox_error("AFPF_WAIT", "RTD_MSG_TMO", -EIO);
break;
}
int_status = plt_read64(dev->mbox_reg_base + RVU_PF_INT);
@@ -149,16 +150,18 @@ af_pf_wait_msg(struct dev *dev, uint16_t vf, int num_msg)
plt_write64(~0ull, dev->mbox_reg_base + RVU_PF_INT_ENA_W1S);
req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start);
- if (req_hdr->num_msgs != num_msg)
- plt_err("Routed messages: %d received: %d", num_msg,
- req_hdr->num_msgs);
+ if (req_hdr->num_msgs != num_msg) {
+ plt_err("Routed messages: %d received: %d", num_msg, req_hdr->num_msgs);
+ roc_trace_mbox_error("AFPF_WAIT", "RTD_INV_CNT", num_msg);
+ }
/* Get messages from mbox */
- offset = mbox->rx_start +
- PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
+ offset = mbox->rx_start + PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
for (i = 0; i < req_hdr->num_msgs; i++) {
msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
size = mbox->rx_start + msg->next_msgoff - offset;
+ roc_trace_mbox_process("AFPF_WAIT", mbox_id2name(msg->id), req_hdr->num_msgs,
+ msg->pcifunc);
/* Reserve PF/VF mbox message */
size = PLT_ALIGN(size, MBOX_MSG_ALIGN);
@@ -288,6 +291,8 @@ vf_pf_process_msgs(struct dev *dev, uint16_t vf)
/* RVU_PF_FUNC_S */
msg->pcifunc = dev_pf_func(dev->pf, vf);
+ roc_trace_mbox_process("VFPF_MSG", mbox_id2name(msg->id), req_hdr->num_msgs,
+ msg->pcifunc);
if (msg->id == MBOX_MSG_READY) {
struct ready_msg_rsp *rsp;
@@ -333,8 +338,7 @@ vf_pf_process_msgs(struct dev *dev, uint16_t vf)
}
if (routed > 0) {
- plt_base_dbg("pf:%d routed %d messages from vf:%d to AF",
- dev->pf, routed, vf);
+ roc_trace_mbox_vf_pf_handle("RTG_AF", dev_pf_func(dev->pf, vf), routed);
/* PF will send the messages to AF and wait for responses */
af_pf_wait_msg(dev, vf, routed);
mbox_reset(dev->mbox, 0);
@@ -343,8 +347,7 @@ vf_pf_process_msgs(struct dev *dev, uint16_t vf)
/* Send mbox responses to VF */
if (mdev->num_msgs) {
- plt_base_dbg("pf:%d reply %d messages to vf:%d", dev->pf,
- mdev->num_msgs, vf);
+ roc_trace_mbox_vf_pf_handle("RSP_VF", dev_pf_func(dev->pf, vf), mdev->num_msgs);
mbox_msg_send(mbox, vf);
}
@@ -378,25 +381,24 @@ vf_pf_process_up_msgs(struct dev *dev, uint16_t vf)
switch (msg->id) {
case MBOX_MSG_CGX_LINK_EVENT:
- plt_base_dbg("PF: Msg 0x%x (%s) fn:0x%x (pf:%d,vf:%d)",
- msg->id, mbox_id2name(msg->id),
- msg->pcifunc, dev_get_pf(msg->pcifunc),
- dev_get_vf(msg->pcifunc));
+ roc_trace_mbox_process("VFPF_UPMSG_CGX", mbox_id2name(msg->id),
+ req_hdr->num_msgs, msg->pcifunc);
break;
case MBOX_MSG_CGX_PTP_RX_INFO:
- plt_base_dbg("PF: Msg 0x%x (%s) fn:0x%x (pf:%d,vf:%d)",
- msg->id, mbox_id2name(msg->id),
- msg->pcifunc, dev_get_pf(msg->pcifunc),
- dev_get_vf(msg->pcifunc));
+ roc_trace_mbox_process("VFPF_UPMSG_PTP", mbox_id2name(msg->id),
+ req_hdr->num_msgs, msg->pcifunc);
break;
default:
if (roc_rvu_lf_msg_id_range_check(dev->roc_rvu_lf, msg->id))
plt_base_dbg("PF: Msg 0x%x fn:0x%x (pf:%d,vf:%d)",
msg->id, msg->pcifunc, dev_get_pf(msg->pcifunc),
dev_get_vf(msg->pcifunc));
- else
- plt_err("Not handled UP msg 0x%x (%s) func:0x%x",
- msg->id, mbox_id2name(msg->id), msg->pcifunc);
+ else {
+ roc_trace_mbox_process("UNHND_UPMSG", mbox_id2name(msg->id),
+ req_hdr->num_msgs, msg->pcifunc);
+ plt_err("Not handled UP msg 0x%x (%s) func:0x%x", msg->id,
+ mbox_id2name(msg->id), msg->pcifunc);
+ }
}
offset = mbox->rx_start + msg->next_msgoff;
}
@@ -446,8 +448,7 @@ roc_vf_pf_mbox_irq(void *param)
if (!intr)
continue;
- plt_base_dbg("vfpf: %d intr: 0x%" PRIx64 " (pf:%d, vf:%d)", vfpf, intr, dev->pf,
- dev->vf);
+ roc_trace_mbox_interrupt("VFPF_IRQ", dev_pf_func(dev->pf, dev->vf), intr, 0);
/* Save and clear intr bits */
intrb.bits[vfpf] |= intr;
@@ -489,9 +490,8 @@ process_msgs(struct dev *dev, struct mbox *mbox)
msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
msgs_acked++;
- plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id,
- mbox_id2name(msg->id), dev_get_pf(msg->pcifunc),
- dev_get_vf(msg->pcifunc));
+ roc_trace_mbox_process("PRC_MSG", mbox_id2name(msg->id), req_hdr->num_msgs,
+ msg->pcifunc);
switch (msg->id) {
/* Add message id's that are handled here */
@@ -514,6 +514,8 @@ process_msgs(struct dev *dev, struct mbox *mbox)
} else {
plt_err("Message (%s) response has err=%d",
mbox_id2name(msg->id), msg->rc);
+ roc_trace_mbox_error("PRC_MSG_ERR", mbox_id2name(msg->id),
+ msg->rc);
}
}
break;
@@ -526,14 +528,19 @@ process_msgs(struct dev *dev, struct mbox *mbox)
} else {
plt_err("Message (%s) response has err=%d",
mbox_id2name(msg->id), msg->rc);
+ roc_trace_mbox_error("PRC_MSG_ERR", mbox_id2name(msg->id),
+ msg->rc);
}
}
break;
default:
- if (msg->rc)
+ if (msg->rc) {
plt_err("Message (%s) response has err=%d (%s)",
mbox_id2name(msg->id), msg->rc, roc_error_msg_get(msg->rc));
+ roc_trace_mbox_error("PRC_MSG_INV_RSP", mbox_id2name(msg->id),
+ msg->rc);
+ }
break;
}
offset = mbox->rx_start + msg->next_msgoff;
@@ -564,8 +571,8 @@ pf_vf_mbox_send_up_msg(struct dev *dev, void *rec_msg)
if (!(dev->active_vfs[vf / max_bits] & (BIT_ULL(vf))))
continue;
- plt_base_dbg("(%s) size: %zx to VF: %d",
- mbox_id2name(msg->hdr.id), size, vf);
+ roc_trace_mbox_process("PFVF_SND_UPMSG", mbox_id2name(msg->hdr.id), 1,
+ (uint16_t)dev_pf_func(dev->pf, vf));
/* Reserve PF/VF mbox message */
vf_msg = mbox_alloc_msg(vf_mbox, vf, size);
@@ -895,9 +902,9 @@ process_msgs_up(struct dev *dev, struct mbox *mbox)
for (i = 0; i < req_hdr->num_msgs; i++) {
msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
- plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id,
- mbox_id2name(msg->id), dev_get_pf(msg->pcifunc),
- dev_get_vf(msg->pcifunc));
+ plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id, mbox_id2name(msg->id),
+ dev_get_pf(msg->pcifunc), dev_get_vf(msg->pcifunc));
+ roc_trace_mbox_process("PRC_UPMSG", mbox_id2name(msg->id), req_hdr->num_msgs, msg->pcifunc);
if (roc_rvu_lf_msg_id_range_check(dev->roc_rvu_lf, msg->id)) {
size = mbox->rx_start + msg->next_msgoff - offset;
err = process_rvu_lf_msgs_up(dev, msg, size);
@@ -905,9 +912,10 @@ process_msgs_up(struct dev *dev, struct mbox *mbox)
plt_err("Error %d handling 0x%x RVU_LF up msg", err, msg->id);
} else {
err = mbox_process_msgs_up(dev, msg);
- if (err)
- plt_err("Error %d handling 0x%x (%s)", err, msg->id,
- mbox_id2name(msg->id));
+ if (err) {
+ roc_trace_mbox_error("PRC_UPMSG_ERR", mbox_id2name(msg->id), err);
+ plt_err("Error %d handling 0x%x (%s)", err, msg->id, mbox_id2name(msg->id));
+ }
}
offset = mbox->rx_start + msg->next_msgoff;
}
@@ -930,7 +938,7 @@ roc_pf_vf_mbox_irq_cn20k(void *param)
plt_base_dbg("Proceeding to check mbox UP messages if any");
plt_write64(intr, dev->mbox_reg_base + RVU_VF_INT);
- plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
+ roc_trace_mbox_interrupt("PFVF_IRQ_20K", dev_pf_func(dev->pf, dev->vf), intr, 0);
/* If interrupt occurred for down message */
if (intr & BIT_ULL(1))
@@ -954,7 +962,7 @@ roc_af_pf_mbox_irq_cn20k(void *param)
plt_base_dbg("Proceeding to check mbox UP messages if any");
plt_write64(intr, dev->mbox_reg_base + RVU_PF_INT);
- plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
+ roc_trace_mbox_interrupt("AFPF_IRQ_20K", dev_pf_func(dev->pf, dev->vf), intr, 0);
/* If interrupt occurred for down message */
if (intr & BIT_ULL(1))
@@ -978,12 +986,12 @@ roc_pf_vf_mbox_irq(void *param)
plt_base_dbg("Proceeding to check mbox UP messages if any");
plt_write64(intr, dev->mbox_reg_base + RVU_VF_INT);
- plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
/* Reading for UP/DOWN message, next message sending will be delayed
* by 1ms until this region is zeroed mbox_wait_for_zero()
*/
mbox_data = plt_read64(dev->mbox_reg_base + RVU_VF_VFPF_MBOX0);
+ roc_trace_mbox_interrupt("PFVF_IRQ", dev_pf_func(dev->pf, dev->vf), intr, mbox_data);
/* If interrupt occurred for down message */
if (mbox_data & MBOX_DOWN_MSG) {
mbox_data &= ~MBOX_DOWN_MSG;
@@ -1015,12 +1023,12 @@ roc_af_pf_mbox_irq(void *param)
plt_base_dbg("Proceeding to check mbox UP messages if any");
plt_write64(intr, dev->mbox_reg_base + RVU_PF_INT);
- plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
/* Reading for UP/DOWN message, next message sending will be delayed
* by 1ms until this region is zeroed mbox_wait_for_zero()
*/
mbox_data = plt_read64(dev->mbox_reg_base + RVU_PF_PFAF_MBOX0);
+ roc_trace_mbox_interrupt("AFPF_IRQ", dev_pf_func(dev->pf, dev->vf), intr, mbox_data);
/* If interrupt occurred for down message */
if (mbox_data & MBOX_DOWN_MSG) {
mbox_data &= ~MBOX_DOWN_MSG;
@@ -1327,8 +1335,7 @@ vf_flr_handle_msg(void *param, dev_intr_t *flr)
for (vf = 0; vf < max_vf; vf++) {
if (flr->bits[vf / max_bits] & BIT_ULL(vf % max_bits)) {
- plt_base_dbg("Process FLR vf:%d request (pf:%d, vf:%d)",
- vf, dev->pf, dev->vf);
+ roc_trace_mbox_vf_flr("PRC_VFFLR", vf, dev_pf_func(dev->pf, dev->vf));
/* Inform AF about VF reset */
vf_flr_send_msg(dev, vf);
flr->bits[vf / max_bits] &= ~(BIT_ULL(vf % max_bits));
diff --git a/drivers/common/cnxk/roc_mbox.c b/drivers/common/cnxk/roc_mbox.c
index eb5bd771fe..e1b0f2fa63 100644
--- a/drivers/common/cnxk/roc_mbox.c
+++ b/drivers/common/cnxk/roc_mbox.c
@@ -263,6 +263,34 @@ mbox_wait_for_zero(struct mbox *mbox, int devid)
return data == 0;
}
+static inline void
+mbox_debug_region(const char *ops, struct mbox *mbox, int devid, uint64_t intr_val, int rc)
+{
+ struct mbox_dev *mdev = &mbox->dev[devid];
+ struct mbox_msghdr *msg_rx, *msg_tx;
+ uint64_t offset_rx, offset_tx;
+ char buf[BUFSIZ];
+
+ if (!plt_trace_point_is_enabled(&__cnxk_trace_mbox_region))
+ return;
+
+ /* Get messages from mbox */
+ offset_tx = mbox->tx_start + PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
+
+ msg_tx = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset_tx);
+ snprintf(buf, BUFSIZ, "%s_TX", ops);
+ /* Only while sending message choose a rand identifier for the msg */
+ if (intr_val)
+ msg_tx->cookie = rand();
+ roc_trace_mbox_region(buf, mbox_id2name(msg_tx->id), msg_tx->pcifunc, (int)intr_val,
+ msg_tx->cookie);
+
+ offset_rx = mbox->rx_start + PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
+ msg_rx = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset_rx);
+ snprintf(buf, BUFSIZ, "%s_RX", ops);
+ roc_trace_mbox_region(buf, mbox_id2name(msg_rx->id), msg_rx->pcifunc, rc, msg_rx->cookie);
+}
+
static void
mbox_msg_send_data(struct mbox *mbox, int devid, uint8_t data)
{
@@ -291,6 +319,8 @@ mbox_msg_send_data(struct mbox *mbox, int devid, uint8_t data)
(volatile void *)(mbox->reg_base + (mbox->trigger | (devid << mbox->tr_shift))));
intr_val |= (uint64_t)data;
+
+ mbox_debug_region("SND_MSG", mbox, devid, intr_val, 0);
/* The interrupt should be fired after num_msgs is written
* to the shared memory
*/
@@ -342,6 +372,8 @@ mbox_get_rsp(struct mbox *mbox, int devid, void **msg)
if (msg != NULL)
*msg = msghdr;
+ mbox_debug_region("GET_RSP", mbox, devid, 0, msghdr->rc);
+
return msghdr->rc;
}
@@ -436,6 +468,8 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo)
__atomic_load_n(&mdev->msgs_acked, __ATOMIC_ACQUIRE),
tx_hdr->num_msgs, rx_hdr->num_msgs, mdev->msg_size, mdev->rsp_size);
+ mbox_debug_region("MBX_TMO", mbox, devid, 0, -EIO);
+
return -EIO;
}
}
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index e7a916d61c..3ca8442b55 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -32,6 +32,8 @@ struct mbox_msghdr {
uint16_t __io ver;
/* Offset of next msg within mailbox region */
uint16_t __io next_msgoff;
+ /* Unique ID for trace debug */
+ uint16_t __io cookie;
int __io rc; /* Msg processed response code */
};
diff --git a/drivers/common/cnxk/roc_platform.c b/drivers/common/cnxk/roc_platform.c
index f1e0a93d97..f7678280f5 100644
--- a/drivers/common/cnxk/roc_platform.c
+++ b/drivers/common/cnxk/roc_platform.c
@@ -3,6 +3,7 @@
*/
#include <rte_log.h>
+#include <rte_trace_point_register.h>
#include "roc_api.h"
@@ -85,6 +86,14 @@ roc_plt_init(void)
return 0;
}
+/* ROC trace points */
+RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_region, cnxk.mbox.region)
+RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_process, cnxk.mbox.process)
+RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_interrupt, cnxk.mbox.interrupt)
+RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_vf_flr, cnxk.mbox.vf.flr)
+RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_vf_pf_handle, cnxk.mbox.vf.pf.handle)
+RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_error, cnxk.mbox.error)
+
RTE_LOG_REGISTER_SUFFIX(cnxk_logtype_base, base, INFO);
RTE_LOG_REGISTER_SUFFIX(cnxk_logtype_mbox, mbox, NOTICE);
RTE_LOG_REGISTER_SUFFIX(cnxk_logtype_cpt, crypto, NOTICE);
diff --git a/drivers/common/cnxk/roc_platform.h b/drivers/common/cnxk/roc_platform.h
index df4f88f288..ef2aaa312f 100644
--- a/drivers/common/cnxk/roc_platform.h
+++ b/drivers/common/cnxk/roc_platform.h
@@ -5,12 +5,12 @@
#ifndef _ROC_PLATFORM_H_
#define _ROC_PLATFORM_H_
-#include <rte_compat.h>
+#include <bus_pci_driver.h>
#include <rte_alarm.h>
#include <rte_bitmap.h>
-#include <bus_pci_driver.h>
#include <rte_byteorder.h>
#include <rte_common.h>
+#include <rte_compat.h>
#include <rte_cycles.h>
#include <rte_ether.h>
#include <rte_interrupts.h>
@@ -25,6 +25,7 @@
#include <rte_string_fns.h>
#include <rte_tailq.h>
#include <rte_telemetry.h>
+#include <rte_trace_point.h>
#include "eal_filesystem.h"
@@ -260,6 +261,9 @@ plt_thread_is_valid(plt_thread_t thr)
#define plt_tel_data_add_dict_u64 rte_tel_data_add_dict_uint
#define plt_telemetry_register_cmd rte_telemetry_register_cmd
+/* Trace */
+#define plt_trace_point_is_enabled rte_trace_point_is_enabled
+
/* Log */
extern int cnxk_logtype_base;
#define RTE_LOGTYPE_base cnxk_logtype_base
@@ -374,9 +378,82 @@ plt_lmt_region_reserve_aligned(const char *name, size_t len, uint32_t align)
/* To ensure returned memory is physically contiguous, bounding
* the start and end address in 2M range.
*/
- return rte_memzone_reserve_bounded(name, len, SOCKET_ID_ANY,
- RTE_MEMZONE_IOVA_CONTIG,
- align, RTE_PGSIZE_2M);
+ return rte_memzone_reserve_bounded(name, len, SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, align,
+ RTE_PGSIZE_2M);
+}
+
+/* ROC trace points */
+RTE_TRACE_POINT(cnxk_trace_mbox_region,
+ RTE_TRACE_POINT_ARGS(const char *func, const char *msg, uint16_t pcifunc, int data,
+ uint16_t cookie),
+ rte_trace_point_emit_string(func);
+ rte_trace_point_emit_string(msg); rte_trace_point_emit_u16(pcifunc);
+ rte_trace_point_emit_int(data); rte_trace_point_emit_u16(cookie);)
+
+RTE_TRACE_POINT(cnxk_trace_mbox_process,
+ RTE_TRACE_POINT_ARGS(const char *func, const char *msg, uint16_t num_msgs,
+ uint16_t pcifunc),
+ rte_trace_point_emit_string(func);
+ rte_trace_point_emit_string(msg); rte_trace_point_emit_u16(num_msgs);
+ rte_trace_point_emit_u16(pcifunc);)
+
+RTE_TRACE_POINT(cnxk_trace_mbox_interrupt,
+ RTE_TRACE_POINT_ARGS(const char *func, int pcifunc, uint64_t intr,
+ uint64_t mbox_data),
+ rte_trace_point_emit_string(func);
+ rte_trace_point_emit_int(pcifunc); rte_trace_point_emit_u64(intr);
+ rte_trace_point_emit_u64(mbox_data);)
+
+RTE_TRACE_POINT(cnxk_trace_mbox_vf_flr,
+ RTE_TRACE_POINT_ARGS(const char *func, uint16_t from_vf, uint16_t pcifunc),
+ rte_trace_point_emit_string(func);
+ rte_trace_point_emit_u16(from_vf); rte_trace_point_emit_u16(pcifunc);)
+
+RTE_TRACE_POINT(cnxk_trace_mbox_vf_pf_handle,
+ RTE_TRACE_POINT_ARGS(const char *ops, uint16_t pcifunc, int data),
+ rte_trace_point_emit_string(ops);
+ rte_trace_point_emit_u16(pcifunc); rte_trace_point_emit_int(data);)
+
+RTE_TRACE_POINT(cnxk_trace_mbox_error,
+ RTE_TRACE_POINT_ARGS(const char *func, const char *msg, int data),
+ rte_trace_point_emit_string(func);
+ rte_trace_point_emit_string(msg); rte_trace_point_emit_int(data);)
+
+static inline void
+roc_trace_mbox_region(const char *func, const char *msg, uint16_t pcifunc, int data,
+ uint16_t cookie)
+{
+ cnxk_trace_mbox_region(func, msg, pcifunc, data, cookie);
+}
+
+static inline void
+roc_trace_mbox_process(const char *func, const char *msg, uint16_t num_msgs, uint16_t pcifunc)
+{
+ cnxk_trace_mbox_process(func, msg, num_msgs, pcifunc);
+}
+
+static inline void
+roc_trace_mbox_interrupt(const char *func, int pcifunc, uint64_t intr, uint64_t mbox_data)
+{
+ cnxk_trace_mbox_interrupt(func, pcifunc, intr, mbox_data);
+}
+
+static inline void
+roc_trace_mbox_vf_pf_handle(const char *func, uint16_t pcifunc, int data)
+{
+ cnxk_trace_mbox_vf_pf_handle(func, pcifunc, data);
+}
+
+static inline void
+roc_trace_mbox_error(const char *func, const char *msg, int data)
+{
+ cnxk_trace_mbox_error(func, msg, data);
+}
+
+static inline void
+roc_trace_mbox_vf_flr(const char *func, uint16_t from_vf, uint16_t pcifunc)
+{
+ cnxk_trace_mbox_vf_flr(func, from_vf, pcifunc);
}
#endif /* _ROC_PLATFORM_H_ */
--
2.46.0.469.g4590f2e941
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [EXTERNAL] [PATCH 2/2] common/cnxk: add mailbox debug trace
2024-11-04 13:08 ` [PATCH 2/2] common/cnxk: add mailbox debug trace Harman Kalra
@ 2024-11-07 8:04 ` Jerin Jacob
2024-11-14 16:38 ` Stephen Hemminger
1 sibling, 0 replies; 7+ messages in thread
From: Jerin Jacob @ 2024-11-07 8:04 UTC (permalink / raw)
To: Harman Kalra, Nithin Kumar Dabilpuram, Kiran Kumar Kokkilagadda,
Sunil Kumar Kori, Satha Koteswara Rao Kottidi, Harman Kalra
Cc: dev
[-- Attachment #1: Type: text/plain, Size: 2348 bytes --]
> -----Original Message-----
> From: Harman Kalra <hkalra@marvell.com>
> Sent: Monday, November 4, 2024 6:38 PM
> To: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>; Kiran Kumar
> Kokkilagadda <kirankumark@marvell.com>; Sunil Kumar Kori
> <skori@marvell.com>; Satha Koteswara Rao Kottidi
> <skoteshwar@marvell.com>; Harman Kalra <hkalra@marvell.com>
> Cc: dev@dpdk.org
> Subject: [EXTERNAL] [PATCH 2/2] common/cnxk: add mailbox debug trace
>
> Implementing mailbox debug trace points to trace function calls. Signed-off-by:
> Harman Kalra <hkalra@ marvell. com> --- drivers/common/cnxk/roc_dev. c | 89
> ++++++++++++++++-------------- drivers/common/cnxk/roc_mbox. c | 34
> ++++++++++++ drivers/common/cnxk/roc_mbox. h
> Implementing mailbox debug trace points to trace function calls.
>
> Signed-off-by: Harman Kalra <hkalra@marvell.com>
> ---
> drivers/common/cnxk/roc_dev.c | 89 ++++++++++++++++--------------
> drivers/common/cnxk/roc_mbox.c | 34 ++++++++++++
> drivers/common/cnxk/roc_mbox.h | 2 +
> drivers/common/cnxk/roc_platform.c | 9 +++
> drivers/common/cnxk/roc_platform.h | 87 +++++++++++++++++++++++++++--
> 5 files changed, 175 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c
> index 1127d8185c..e9bf8c01b4 100644
> --- a/drivers/common/cnxk/roc_dev.c
> +++ b/drivers/common/cnxk/roc_dev.c
> @@ -26,7 +26,6 @@
>
> /* RVU PF interrupt status as received from AF*/ #define
> RVU_PF_INTR_STATUS 0x3
> -
This change may not be needed.
Also, Fix following issues
### [PATCH] common/cnxk: fix mailbox timeout issues
Warning in drivers/common/cnxk/roc_dev.c:
Warning in drivers/common/cnxk/roc_mbox.c:
Using __atomic_xxx/__ATOMIC_XXX built-ins, prefer rte_atomic_xxx/rte_memory_order_xxx
### [PATCH] common/cnxk: add mailbox debug trace
WARNING:LONG_LINE: line length of 108 exceeds 100 columns
#199: FILE: drivers/common/cnxk/roc_dev.c:907:
+ roc_trace_mbox_process("PRC_UPMSG", mbox_id2name(msg->id), req_hdr->num_msgs, msg->pcifunc);
WARNING:LONG_LINE: line length of 108 exceeds 100 columns
#212: FILE: drivers/common/cnxk/roc_dev.c:917:
+ plt_err("Error %d handling 0x%x (%s)", err, msg->id, mbox_id2name(msg->id));
);
[-- Attachment #2: winmail.dat --]
[-- Type: application/ms-tnef, Size: 15716 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues
2024-11-04 13:07 [PATCH 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra
2024-11-04 13:08 ` [PATCH 2/2] common/cnxk: add mailbox debug trace Harman Kalra
@ 2024-11-14 9:31 ` Harman Kalra
2024-11-14 9:31 ` [PATCH v2 2/2] common/cnxk: add mailbox debug trace Harman Kalra
2025-01-22 12:36 ` [EXTERNAL] [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Jerin Jacob
1 sibling, 2 replies; 7+ messages in thread
From: Harman Kalra @ 2024-11-14 9:31 UTC (permalink / raw)
To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao,
Harman Kalra
Cc: dev
Found couple of reasons causing mailbox timeout:
* msgs_acked value by interrupt thread was not getting synced to thread
polling it.
* mbox_data value sent by AF was seen 0 causing neither down nor up msg
processing.
Fixes: 9bd368ca311a ("common/cnxk: enable PF VF mailbox")
Fixes: 61deac72abbf ("common/cnxk: add CN20KA mbox support")
Signed-off-by: Harman Kalra <hkalra@marvell.com>
---
V2:
* Replaced gcc builtin __atomic_xxx intrinsics with corresponding
rte_atomic_xxx stdatomic API
drivers/common/cnxk/roc_dev.c | 106 ++++++++++++++++++++++++-----
drivers/common/cnxk/roc_dev_priv.h | 11 +++
drivers/common/cnxk/roc_mbox.c | 12 ++--
drivers/common/cnxk/roc_platform.h | 5 ++
4 files changed, 111 insertions(+), 23 deletions(-)
diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c
index 32409f2ef3..0d1f9c4564 100644
--- a/drivers/common/cnxk/roc_dev.c
+++ b/drivers/common/cnxk/roc_dev.c
@@ -401,8 +401,7 @@ vf_pf_process_up_msgs(struct dev *dev, uint16_t vf)
offset = mbox->rx_start + msg->next_msgoff;
}
mbox_reset(mbox, vf);
- mdev->msgs_acked = msgs_acked;
- plt_wmb();
+ plt_atomic_store_explicit(&mdev->msgs_acked, msgs_acked, plt_memory_order_release);
return i;
}
@@ -542,8 +541,7 @@ process_msgs(struct dev *dev, struct mbox *mbox)
mbox_reset(mbox, 0);
/* Update acked if someone is waiting a message - mbox_wait is waiting */
- mdev->msgs_acked = msgs_acked;
- plt_wmb();
+ plt_atomic_store_explicit(&mdev->msgs_acked, msgs_acked, plt_memory_order_release);
}
/* Copies the message received from AF and sends it to VF */
@@ -920,12 +918,59 @@ process_msgs_up(struct dev *dev, struct mbox *mbox)
}
}
+/* IRQ to VF from PF - VF context (interrupt thread) */
+static void
+roc_pf_vf_mbox_irq_cn20k(void *param)
+{
+ struct dev *dev = param;
+ uint64_t intr;
+
+ intr = plt_read64(dev->mbox_reg_base + RVU_VF_INT);
+ if (intr == 0)
+ plt_base_dbg("Proceeding to check mbox UP messages if any");
+
+ plt_write64(intr, dev->mbox_reg_base + RVU_VF_INT);
+ plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
+
+ /* If interrupt occurred for down message */
+ if (intr & BIT_ULL(1))
+ /* First process all configuration messages */
+ process_msgs(dev, dev->mbox);
+
+ /* If interrupt occurred for UP message */
+ if (intr & BIT_ULL(0))
+ process_msgs_up(dev, &dev->mbox_up);
+}
+
+/* IRQ to PF from AF - PF context (interrupt thread) */
+static void
+roc_af_pf_mbox_irq_cn20k(void *param)
+{
+ struct dev *dev = param;
+ uint64_t intr;
+
+ intr = plt_read64(dev->mbox_reg_base + RVU_PF_INT);
+ if (intr == 0)
+ plt_base_dbg("Proceeding to check mbox UP messages if any");
+
+ plt_write64(intr, dev->mbox_reg_base + RVU_PF_INT);
+ plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
+
+ /* If interrupt occurred for down message */
+ if (intr & BIT_ULL(1))
+ process_msgs(dev, dev->mbox);
+
+ /* If interrupt occurred for up message */
+ if (intr & BIT_ULL(0))
+ process_msgs_up(dev, &dev->mbox_up);
+}
+
/* IRQ to VF from PF - VF context (interrupt thread) */
static void
roc_pf_vf_mbox_irq(void *param)
{
struct dev *dev = param;
- uint64_t mbox_data;
+ uint64_t mbox_data = 0;
uint64_t intr;
intr = plt_read64(dev->mbox_reg_base + RVU_VF_INT);
@@ -940,7 +985,7 @@ roc_pf_vf_mbox_irq(void *param)
*/
mbox_data = plt_read64(dev->mbox_reg_base + RVU_VF_VFPF_MBOX0);
/* If interrupt occurred for down message */
- if (mbox_data & MBOX_DOWN_MSG || intr & BIT_ULL(1)) {
+ if (mbox_data & MBOX_DOWN_MSG) {
mbox_data &= ~MBOX_DOWN_MSG;
plt_write64(mbox_data, dev->mbox_reg_base + RVU_VF_VFPF_MBOX0);
@@ -948,7 +993,7 @@ roc_pf_vf_mbox_irq(void *param)
process_msgs(dev, dev->mbox);
}
/* If interrupt occurred for UP message */
- if (mbox_data & MBOX_UP_MSG || intr & BIT_ULL(0)) {
+ if (mbox_data & MBOX_UP_MSG) {
mbox_data &= ~MBOX_UP_MSG;
plt_write64(mbox_data, dev->mbox_reg_base + RVU_VF_VFPF_MBOX0);
@@ -962,7 +1007,7 @@ static void
roc_af_pf_mbox_irq(void *param)
{
struct dev *dev = param;
- uint64_t mbox_data;
+ uint64_t mbox_data = 0;
uint64_t intr;
intr = plt_read64(dev->mbox_reg_base + RVU_PF_INT);
@@ -977,7 +1022,7 @@ roc_af_pf_mbox_irq(void *param)
*/
mbox_data = plt_read64(dev->mbox_reg_base + RVU_PF_PFAF_MBOX0);
/* If interrupt occurred for down message */
- if (mbox_data & MBOX_DOWN_MSG || intr & BIT_ULL(1)) {
+ if (mbox_data & MBOX_DOWN_MSG) {
mbox_data &= ~MBOX_DOWN_MSG;
plt_write64(mbox_data, dev->mbox_reg_base + RVU_PF_PFAF_MBOX0);
@@ -985,7 +1030,7 @@ roc_af_pf_mbox_irq(void *param)
process_msgs(dev, dev->mbox);
}
/* If interrupt occurred for up message */
- if (mbox_data & MBOX_UP_MSG || intr & BIT_ULL(0)) {
+ if (mbox_data & MBOX_UP_MSG) {
mbox_data &= ~MBOX_UP_MSG;
plt_write64(mbox_data, dev->mbox_reg_base + RVU_PF_PFAF_MBOX0);
@@ -1045,7 +1090,8 @@ mbox_register_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
}
}
/* MBOX interrupt AF <-> PF */
- rc = dev_irq_register(intr_handle, roc_af_pf_mbox_irq, dev, dev->mbox_plat->pfaf_vec);
+ rc = dev_irq_register(intr_handle, dev->mbox_plat->ops->af_pf_mbox_irq, dev,
+ dev->mbox_plat->pfaf_vec);
if (rc) {
plt_err("Fail to register AF<->PF mbox irq");
return rc;
@@ -1073,7 +1119,8 @@ mbox_register_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
plt_write64(~0ull, dev->mbox_reg_base + RVU_VF_INT_ENA_W1C);
/* MBOX interrupt PF <-> VF */
- rc = dev_irq_register(intr_handle, roc_pf_vf_mbox_irq, dev, RVU_VF_INT_VEC_MBOX);
+ rc = dev_irq_register(intr_handle, dev->mbox_plat->ops->pf_vf_mbox_irq, dev,
+ RVU_VF_INT_VEC_MBOX);
if (rc) {
plt_err("Fail to register PF<->VF mbox irq");
return rc;
@@ -1127,7 +1174,8 @@ mbox_unregister_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
}
/* MBOX interrupt AF <-> PF */
- dev_irq_unregister(intr_handle, roc_af_pf_mbox_irq, dev, dev->mbox_plat->pfaf_vec);
+ dev_irq_unregister(intr_handle, dev->mbox_plat->ops->af_pf_mbox_irq, dev,
+ dev->mbox_plat->pfaf_vec);
}
static void
@@ -1139,7 +1187,8 @@ mbox_unregister_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
plt_write64(~0ull, dev->mbox_reg_base + RVU_VF_INT_ENA_W1C);
/* Unregister the interrupt handler */
- dev_irq_unregister(intr_handle, roc_pf_vf_mbox_irq, dev, RVU_VF_INT_VEC_MBOX);
+ dev_irq_unregister(intr_handle, dev->mbox_plat->ops->pf_vf_mbox_irq, dev,
+ RVU_VF_INT_VEC_MBOX);
}
void
@@ -1599,10 +1648,17 @@ dev_cache_line_size_valid(void)
return true;
}
-static void
+static int
mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t bar4, bool is_vf)
{
- int i;
+ int i, rc = 0;
+
+ /* Allocate memory for device ops */
+ mbox_plat->ops = plt_zmalloc(sizeof(struct mbox_ops), 0);
+ if (mbox_plat->ops == NULL) {
+ rc = -ENOMEM;
+ goto fail;
+ }
if (roc_model_is_cn20k()) {
/* For CN20K, AF allocates mbox memory in DRAM and writes PF
@@ -1613,6 +1669,9 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t
mbox_plat->mbox_region_base =
bar2 + (RVU_PFX_FUNC_PFAF_MBOX +
((uint64_t)RVU_BLOCK_ADDR_MBOX << RVU_FUNC_BLKADDR_SHIFT));
+ /* Mbox operations */
+ mbox_plat->ops->af_pf_mbox_irq = roc_af_pf_mbox_irq_cn20k;
+ mbox_plat->ops->pf_vf_mbox_irq = roc_pf_vf_mbox_irq_cn20k;
/* Interrupt vectors */
mbox_plat->pfaf_vec = RVU_MBOX_PF_INT_VEC_AFPF_MBOX;
mbox_plat->pfvf_mbox0_vec = RVU_MBOX_PF_INT_VEC_VFPF_MBOX0;
@@ -1630,6 +1689,9 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t
} else {
mbox_plat->mbox_reg_base = bar2;
mbox_plat->mbox_region_base = bar4;
+ /* Mbox operations */
+ mbox_plat->ops->af_pf_mbox_irq = roc_af_pf_mbox_irq;
+ mbox_plat->ops->pf_vf_mbox_irq = roc_pf_vf_mbox_irq;
mbox_plat->pfaf_vec = RVU_PF_INT_VEC_AFPF_MBOX;
mbox_plat->pfvf_mbox0_vec = RVU_PF_INT_VEC_VFPF_MBOX0;
mbox_plat->pfvf_mbox1_vec = RVU_PF_INT_VEC_VFPF_MBOX1;
@@ -1647,6 +1709,8 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t
if (roc_model_is_cn10k())
mbox_plat->mbox_region_base = bar2 + RVU_VF_MBOX_REGION;
}
+fail:
+ return rc;
}
int
@@ -1678,7 +1742,12 @@ dev_init(struct dev *dev, struct plt_pci_device *pci_dev)
rc = -ENOMEM;
goto fail;
}
- mbox_platform_changes(dev->mbox_plat, bar2, bar4, is_vf);
+
+ if (mbox_platform_changes(dev->mbox_plat, bar2, bar4, is_vf)) {
+ plt_err("Failed to populate platform specific changes");
+ rc = -ENOMEM;
+ goto mbox_plat_free;
+ }
mbox_reg_base = dev->mbox_plat->mbox_reg_base;
mbox_region_base = dev->mbox_plat->mbox_region_base;
@@ -1824,6 +1893,8 @@ dev_init(struct dev *dev, struct plt_pci_device *pci_dev)
mbox_fini(dev->mbox);
mbox_fini(&dev->mbox_up);
error:
+ plt_free(dev->mbox_plat->ops);
+mbox_plat_free:
plt_free(dev->mbox_plat);
fail:
return rc;
@@ -1883,6 +1954,7 @@ dev_fini(struct dev *dev, struct plt_pci_device *pci_dev)
mbox_fini(mbox);
dev->mbox_active = 0;
+ plt_free(dev->mbox_plat->ops);
plt_free(dev->mbox_plat);
/* Disable MSIX vectors */
dev_irqs_disable(intr_handle);
diff --git a/drivers/common/cnxk/roc_dev_priv.h b/drivers/common/cnxk/roc_dev_priv.h
index c766183196..a8e40fa9d5 100644
--- a/drivers/common/cnxk/roc_dev_priv.h
+++ b/drivers/common/cnxk/roc_dev_priv.h
@@ -106,6 +106,15 @@ struct mbox_sync {
pthread_mutex_t mutex;
};
+/* AF PF VF mbox interrupt callbacks */
+typedef void (*af_pf_mbox_irq_t)(void *param);
+typedef void (*pf_vf_mbox_irq_t)(void *param);
+
+struct mbox_ops {
+ af_pf_mbox_irq_t af_pf_mbox_irq;
+ pf_vf_mbox_irq_t pf_vf_mbox_irq;
+};
+
struct mbox_platform {
uint8_t pfaf_vec;
uint8_t pfvf_mbox0_vec;
@@ -120,6 +129,8 @@ struct mbox_platform {
uint64_t pfvf1_mbox_int_ena_w1c[MAX_VFPF_DWORD_BITS];
uintptr_t mbox_reg_base;
uintptr_t mbox_region_base;
+ /* Mbox operations */
+ struct mbox_ops *ops;
};
struct dev {
diff --git a/drivers/common/cnxk/roc_mbox.c b/drivers/common/cnxk/roc_mbox.c
index db77babfdb..a3b96e6e2b 100644
--- a/drivers/common/cnxk/roc_mbox.c
+++ b/drivers/common/cnxk/roc_mbox.c
@@ -275,7 +275,7 @@ mbox_msg_send_data(struct mbox *mbox, int devid, uint8_t data)
tx_hdr->msg_size = mdev->msg_size;
mdev->msg_size = 0;
mdev->rsp_size = 0;
- mdev->msgs_acked = 0;
+ plt_atomic_store_explicit(&mdev->msgs_acked, 0, plt_memory_order_release);
/* num_msgs != 0 signals to the peer that the buffer has a number of
* messages. So this should be written after copying txmem
@@ -417,7 +417,8 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo)
* mdev->msgs_acked are incremented at process_msgs() in interrupt
* thread context.
*/
- while (mdev->num_msgs > mdev->msgs_acked) {
+ while (mdev->num_msgs > plt_atomic_load_explicit(&mdev->msgs_acked,
+ plt_memory_order_acquire)) {
plt_delay_us(sleep);
timeout += sleep;
if (timeout >= rst_timo) {
@@ -433,13 +434,12 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo)
"(tx/rx num_msgs: %d/%d), msg_size: %d, "
"rsp_size: %d",
devid, timeout, mdev->num_msgs,
- mdev->msgs_acked, tx_hdr->num_msgs,
- rx_hdr->num_msgs, mdev->msg_size,
- mdev->rsp_size);
+ plt_atomic_load_explicit(&mdev->msgs_acked,
+ plt_memory_order_acquire),
+ tx_hdr->num_msgs, rx_hdr->num_msgs, mdev->msg_size, mdev->rsp_size);
return -EIO;
}
- plt_rmb();
}
return 0;
}
diff --git a/drivers/common/cnxk/roc_platform.h b/drivers/common/cnxk/roc_platform.h
index df4f88f288..4076dbe94d 100644
--- a/drivers/common/cnxk/roc_platform.h
+++ b/drivers/common/cnxk/roc_platform.h
@@ -211,6 +211,11 @@ plt_thread_is_valid(plt_thread_t thr)
#define plt_io_rmb() rte_io_rmb()
#define plt_atomic_thread_fence rte_atomic_thread_fence
+#define plt_atomic_store_explicit rte_atomic_store_explicit
+#define plt_atomic_load_explicit rte_atomic_load_explicit
+#define plt_memory_order_release rte_memory_order_release
+#define plt_memory_order_acquire rte_memory_order_acquire
+
#define plt_bit_relaxed_get32 rte_bit_relaxed_get32
#define plt_bit_relaxed_set32 rte_bit_relaxed_set32
#define plt_bit_relaxed_clear32 rte_bit_relaxed_clear32
--
2.25.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 2/2] common/cnxk: add mailbox debug trace
2024-11-14 9:31 ` [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra
@ 2024-11-14 9:31 ` Harman Kalra
2025-01-22 12:36 ` [EXTERNAL] [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Jerin Jacob
1 sibling, 0 replies; 7+ messages in thread
From: Harman Kalra @ 2024-11-14 9:31 UTC (permalink / raw)
To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao,
Harman Kalra
Cc: dev
Implementing mailbox debug trace points to trace function calls.
Signed-off-by: Harman Kalra <hkalra@marvell.com>
---
V2:
* Fixed line length exceeding 100 columns
drivers/common/cnxk/roc_dev.c | 87 ++++++++++++++++--------------
drivers/common/cnxk/roc_mbox.c | 34 ++++++++++++
drivers/common/cnxk/roc_mbox.h | 2 +
drivers/common/cnxk/roc_platform.c | 9 ++++
drivers/common/cnxk/roc_platform.h | 87 ++++++++++++++++++++++++++++--
5 files changed, 175 insertions(+), 44 deletions(-)
diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c
index 0d1f9c4564..2dc437c491 100644
--- a/drivers/common/cnxk/roc_dev.c
+++ b/drivers/common/cnxk/roc_dev.c
@@ -26,7 +26,6 @@
/* RVU PF interrupt status as received from AF*/
#define RVU_PF_INTR_STATUS 0x3
-
static void *
mbox_mem_map(off_t off, size_t size)
{
@@ -83,6 +82,7 @@ pf_af_sync_msg(struct dev *dev, struct mbox_msghdr **rsp)
timeout += sleep;
if (timeout >= mbox->rsp_tmo) {
plt_err("Message timeout: %dms", mbox->rsp_tmo);
+ roc_trace_mbox_error("AFPF_SYNC", "RSP_TMO", -EIO);
rc = -EIO;
break;
}
@@ -137,6 +137,7 @@ af_pf_wait_msg(struct dev *dev, uint16_t vf, int num_msg)
timeout++;
if (timeout >= mbox->rsp_tmo) {
plt_err("Routed messages %d timeout: %dms", num_msg, mbox->rsp_tmo);
+ roc_trace_mbox_error("AFPF_WAIT", "RTD_MSG_TMO", -EIO);
break;
}
int_status = plt_read64(dev->mbox_reg_base + RVU_PF_INT);
@@ -149,16 +150,18 @@ af_pf_wait_msg(struct dev *dev, uint16_t vf, int num_msg)
plt_write64(~0ull, dev->mbox_reg_base + RVU_PF_INT_ENA_W1S);
req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start);
- if (req_hdr->num_msgs != num_msg)
- plt_err("Routed messages: %d received: %d", num_msg,
- req_hdr->num_msgs);
+ if (req_hdr->num_msgs != num_msg) {
+ plt_err("Routed messages: %d received: %d", num_msg, req_hdr->num_msgs);
+ roc_trace_mbox_error("AFPF_WAIT", "RTD_INV_CNT", num_msg);
+ }
/* Get messages from mbox */
- offset = mbox->rx_start +
- PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
+ offset = mbox->rx_start + PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
for (i = 0; i < req_hdr->num_msgs; i++) {
msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
size = mbox->rx_start + msg->next_msgoff - offset;
+ roc_trace_mbox_process("AFPF_WAIT", mbox_id2name(msg->id), req_hdr->num_msgs,
+ msg->pcifunc);
/* Reserve PF/VF mbox message */
size = PLT_ALIGN(size, MBOX_MSG_ALIGN);
@@ -288,6 +291,8 @@ vf_pf_process_msgs(struct dev *dev, uint16_t vf)
/* RVU_PF_FUNC_S */
msg->pcifunc = dev_pf_func(dev->pf, vf);
+ roc_trace_mbox_process("VFPF_MSG", mbox_id2name(msg->id), req_hdr->num_msgs,
+ msg->pcifunc);
if (msg->id == MBOX_MSG_READY) {
struct ready_msg_rsp *rsp;
@@ -333,8 +338,7 @@ vf_pf_process_msgs(struct dev *dev, uint16_t vf)
}
if (routed > 0) {
- plt_base_dbg("pf:%d routed %d messages from vf:%d to AF",
- dev->pf, routed, vf);
+ roc_trace_mbox_vf_pf_handle("RTG_AF", dev_pf_func(dev->pf, vf), routed);
/* PF will send the messages to AF and wait for responses */
af_pf_wait_msg(dev, vf, routed);
mbox_reset(dev->mbox, 0);
@@ -343,8 +347,7 @@ vf_pf_process_msgs(struct dev *dev, uint16_t vf)
/* Send mbox responses to VF */
if (mdev->num_msgs) {
- plt_base_dbg("pf:%d reply %d messages to vf:%d", dev->pf,
- mdev->num_msgs, vf);
+ roc_trace_mbox_vf_pf_handle("RSP_VF", dev_pf_func(dev->pf, vf), mdev->num_msgs);
mbox_msg_send(mbox, vf);
}
@@ -378,25 +381,24 @@ vf_pf_process_up_msgs(struct dev *dev, uint16_t vf)
switch (msg->id) {
case MBOX_MSG_CGX_LINK_EVENT:
- plt_base_dbg("PF: Msg 0x%x (%s) fn:0x%x (pf:%d,vf:%d)",
- msg->id, mbox_id2name(msg->id),
- msg->pcifunc, dev_get_pf(msg->pcifunc),
- dev_get_vf(msg->pcifunc));
+ roc_trace_mbox_process("VFPF_UPMSG_CGX", mbox_id2name(msg->id),
+ req_hdr->num_msgs, msg->pcifunc);
break;
case MBOX_MSG_CGX_PTP_RX_INFO:
- plt_base_dbg("PF: Msg 0x%x (%s) fn:0x%x (pf:%d,vf:%d)",
- msg->id, mbox_id2name(msg->id),
- msg->pcifunc, dev_get_pf(msg->pcifunc),
- dev_get_vf(msg->pcifunc));
+ roc_trace_mbox_process("VFPF_UPMSG_PTP", mbox_id2name(msg->id),
+ req_hdr->num_msgs, msg->pcifunc);
break;
default:
if (roc_rvu_lf_msg_id_range_check(dev->roc_rvu_lf, msg->id))
plt_base_dbg("PF: Msg 0x%x fn:0x%x (pf:%d,vf:%d)",
msg->id, msg->pcifunc, dev_get_pf(msg->pcifunc),
dev_get_vf(msg->pcifunc));
- else
- plt_err("Not handled UP msg 0x%x (%s) func:0x%x",
- msg->id, mbox_id2name(msg->id), msg->pcifunc);
+ else {
+ roc_trace_mbox_process("UNHND_UPMSG", mbox_id2name(msg->id),
+ req_hdr->num_msgs, msg->pcifunc);
+ plt_err("Not handled UP msg 0x%x (%s) func:0x%x", msg->id,
+ mbox_id2name(msg->id), msg->pcifunc);
+ }
}
offset = mbox->rx_start + msg->next_msgoff;
}
@@ -446,8 +448,7 @@ roc_vf_pf_mbox_irq(void *param)
if (!intr)
continue;
- plt_base_dbg("vfpf: %d intr: 0x%" PRIx64 " (pf:%d, vf:%d)", vfpf, intr, dev->pf,
- dev->vf);
+ roc_trace_mbox_interrupt("VFPF_IRQ", dev_pf_func(dev->pf, dev->vf), intr, 0);
/* Save and clear intr bits */
intrb.bits[vfpf] |= intr;
@@ -489,9 +490,8 @@ process_msgs(struct dev *dev, struct mbox *mbox)
msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
msgs_acked++;
- plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id,
- mbox_id2name(msg->id), dev_get_pf(msg->pcifunc),
- dev_get_vf(msg->pcifunc));
+ roc_trace_mbox_process("PRC_MSG", mbox_id2name(msg->id), req_hdr->num_msgs,
+ msg->pcifunc);
switch (msg->id) {
/* Add message id's that are handled here */
@@ -514,6 +514,8 @@ process_msgs(struct dev *dev, struct mbox *mbox)
} else {
plt_err("Message (%s) response has err=%d",
mbox_id2name(msg->id), msg->rc);
+ roc_trace_mbox_error("PRC_MSG_ERR", mbox_id2name(msg->id),
+ msg->rc);
}
}
break;
@@ -526,14 +528,19 @@ process_msgs(struct dev *dev, struct mbox *mbox)
} else {
plt_err("Message (%s) response has err=%d",
mbox_id2name(msg->id), msg->rc);
+ roc_trace_mbox_error("PRC_MSG_ERR", mbox_id2name(msg->id),
+ msg->rc);
}
}
break;
default:
- if (msg->rc)
+ if (msg->rc) {
plt_err("Message (%s) response has err=%d (%s)",
mbox_id2name(msg->id), msg->rc, roc_error_msg_get(msg->rc));
+ roc_trace_mbox_error("PRC_MSG_INV_RSP", mbox_id2name(msg->id),
+ msg->rc);
+ }
break;
}
offset = mbox->rx_start + msg->next_msgoff;
@@ -564,8 +571,8 @@ pf_vf_mbox_send_up_msg(struct dev *dev, void *rec_msg)
if (!(dev->active_vfs[vf / max_bits] & (BIT_ULL(vf))))
continue;
- plt_base_dbg("(%s) size: %zx to VF: %d",
- mbox_id2name(msg->hdr.id), size, vf);
+ roc_trace_mbox_process("PFVF_SND_UPMSG", mbox_id2name(msg->hdr.id), 1,
+ (uint16_t)dev_pf_func(dev->pf, vf));
/* Reserve PF/VF mbox message */
vf_msg = mbox_alloc_msg(vf_mbox, vf, size);
@@ -895,9 +902,10 @@ process_msgs_up(struct dev *dev, struct mbox *mbox)
for (i = 0; i < req_hdr->num_msgs; i++) {
msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
- plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id,
- mbox_id2name(msg->id), dev_get_pf(msg->pcifunc),
- dev_get_vf(msg->pcifunc));
+ plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id, mbox_id2name(msg->id),
+ dev_get_pf(msg->pcifunc), dev_get_vf(msg->pcifunc));
+ roc_trace_mbox_process("PRC_UPMSG", mbox_id2name(msg->id), req_hdr->num_msgs,
+ msg->pcifunc);
if (roc_rvu_lf_msg_id_range_check(dev->roc_rvu_lf, msg->id)) {
size = mbox->rx_start + msg->next_msgoff - offset;
err = process_rvu_lf_msgs_up(dev, msg, size);
@@ -905,9 +913,11 @@ process_msgs_up(struct dev *dev, struct mbox *mbox)
plt_err("Error %d handling 0x%x RVU_LF up msg", err, msg->id);
} else {
err = mbox_process_msgs_up(dev, msg);
- if (err)
+ if (err) {
+ roc_trace_mbox_error("PRC_UPMSG_ERR", mbox_id2name(msg->id), err);
plt_err("Error %d handling 0x%x (%s)", err, msg->id,
mbox_id2name(msg->id));
+ }
}
offset = mbox->rx_start + msg->next_msgoff;
}
@@ -930,7 +940,7 @@ roc_pf_vf_mbox_irq_cn20k(void *param)
plt_base_dbg("Proceeding to check mbox UP messages if any");
plt_write64(intr, dev->mbox_reg_base + RVU_VF_INT);
- plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
+ roc_trace_mbox_interrupt("PFVF_IRQ_20K", dev_pf_func(dev->pf, dev->vf), intr, 0);
/* If interrupt occurred for down message */
if (intr & BIT_ULL(1))
@@ -954,7 +964,7 @@ roc_af_pf_mbox_irq_cn20k(void *param)
plt_base_dbg("Proceeding to check mbox UP messages if any");
plt_write64(intr, dev->mbox_reg_base + RVU_PF_INT);
- plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
+ roc_trace_mbox_interrupt("AFPF_IRQ_20K", dev_pf_func(dev->pf, dev->vf), intr, 0);
/* If interrupt occurred for down message */
if (intr & BIT_ULL(1))
@@ -978,12 +988,12 @@ roc_pf_vf_mbox_irq(void *param)
plt_base_dbg("Proceeding to check mbox UP messages if any");
plt_write64(intr, dev->mbox_reg_base + RVU_VF_INT);
- plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
/* Reading for UP/DOWN message, next message sending will be delayed
* by 1ms until this region is zeroed mbox_wait_for_zero()
*/
mbox_data = plt_read64(dev->mbox_reg_base + RVU_VF_VFPF_MBOX0);
+ roc_trace_mbox_interrupt("PFVF_IRQ", dev_pf_func(dev->pf, dev->vf), intr, mbox_data);
/* If interrupt occurred for down message */
if (mbox_data & MBOX_DOWN_MSG) {
mbox_data &= ~MBOX_DOWN_MSG;
@@ -1015,12 +1025,12 @@ roc_af_pf_mbox_irq(void *param)
plt_base_dbg("Proceeding to check mbox UP messages if any");
plt_write64(intr, dev->mbox_reg_base + RVU_PF_INT);
- plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
/* Reading for UP/DOWN message, next message sending will be delayed
* by 1ms until this region is zeroed mbox_wait_for_zero()
*/
mbox_data = plt_read64(dev->mbox_reg_base + RVU_PF_PFAF_MBOX0);
+ roc_trace_mbox_interrupt("AFPF_IRQ", dev_pf_func(dev->pf, dev->vf), intr, mbox_data);
/* If interrupt occurred for down message */
if (mbox_data & MBOX_DOWN_MSG) {
mbox_data &= ~MBOX_DOWN_MSG;
@@ -1327,8 +1337,7 @@ vf_flr_handle_msg(void *param, dev_intr_t *flr)
for (vf = 0; vf < max_vf; vf++) {
if (flr->bits[vf / max_bits] & BIT_ULL(vf % max_bits)) {
- plt_base_dbg("Process FLR vf:%d request (pf:%d, vf:%d)",
- vf, dev->pf, dev->vf);
+ roc_trace_mbox_vf_flr("PRC_VFFLR", vf, dev_pf_func(dev->pf, dev->vf));
/* Inform AF about VF reset */
vf_flr_send_msg(dev, vf);
flr->bits[vf / max_bits] &= ~(BIT_ULL(vf % max_bits));
diff --git a/drivers/common/cnxk/roc_mbox.c b/drivers/common/cnxk/roc_mbox.c
index a3b96e6e2b..c07202b964 100644
--- a/drivers/common/cnxk/roc_mbox.c
+++ b/drivers/common/cnxk/roc_mbox.c
@@ -263,6 +263,34 @@ mbox_wait_for_zero(struct mbox *mbox, int devid)
return data == 0;
}
+static inline void
+mbox_debug_region(const char *ops, struct mbox *mbox, int devid, uint64_t intr_val, int rc)
+{
+ struct mbox_dev *mdev = &mbox->dev[devid];
+ struct mbox_msghdr *msg_rx, *msg_tx;
+ uint64_t offset_rx, offset_tx;
+ char buf[BUFSIZ];
+
+ if (!plt_trace_point_is_enabled(&__cnxk_trace_mbox_region))
+ return;
+
+ /* Get messages from mbox */
+ offset_tx = mbox->tx_start + PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
+
+ msg_tx = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset_tx);
+ snprintf(buf, BUFSIZ, "%s_TX", ops);
+ /* Only while sending message choose a rand identifier for the msg */
+ if (intr_val)
+ msg_tx->cookie = rand();
+ roc_trace_mbox_region(buf, mbox_id2name(msg_tx->id), msg_tx->pcifunc, (int)intr_val,
+ msg_tx->cookie);
+
+ offset_rx = mbox->rx_start + PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
+ msg_rx = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset_rx);
+ snprintf(buf, BUFSIZ, "%s_RX", ops);
+ roc_trace_mbox_region(buf, mbox_id2name(msg_rx->id), msg_rx->pcifunc, rc, msg_rx->cookie);
+}
+
static void
mbox_msg_send_data(struct mbox *mbox, int devid, uint8_t data)
{
@@ -291,6 +319,8 @@ mbox_msg_send_data(struct mbox *mbox, int devid, uint8_t data)
(volatile void *)(mbox->reg_base + (mbox->trigger | (devid << mbox->tr_shift))));
intr_val |= (uint64_t)data;
+
+ mbox_debug_region("SND_MSG", mbox, devid, intr_val, 0);
/* The interrupt should be fired after num_msgs is written
* to the shared memory
*/
@@ -342,6 +372,8 @@ mbox_get_rsp(struct mbox *mbox, int devid, void **msg)
if (msg != NULL)
*msg = msghdr;
+ mbox_debug_region("GET_RSP", mbox, devid, 0, msghdr->rc);
+
return msghdr->rc;
}
@@ -438,6 +470,8 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo)
plt_memory_order_acquire),
tx_hdr->num_msgs, rx_hdr->num_msgs, mdev->msg_size, mdev->rsp_size);
+ mbox_debug_region("MBX_TMO", mbox, devid, 0, -EIO);
+
return -EIO;
}
}
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index e7a916d61c..3ca8442b55 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -32,6 +32,8 @@ struct mbox_msghdr {
uint16_t __io ver;
/* Offset of next msg within mailbox region */
uint16_t __io next_msgoff;
+ /* Unique ID for trace debug */
+ uint16_t __io cookie;
int __io rc; /* Msg processed response code */
};
diff --git a/drivers/common/cnxk/roc_platform.c b/drivers/common/cnxk/roc_platform.c
index f1e0a93d97..f7678280f5 100644
--- a/drivers/common/cnxk/roc_platform.c
+++ b/drivers/common/cnxk/roc_platform.c
@@ -3,6 +3,7 @@
*/
#include <rte_log.h>
+#include <rte_trace_point_register.h>
#include "roc_api.h"
@@ -85,6 +86,14 @@ roc_plt_init(void)
return 0;
}
+/* ROC trace points */
+RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_region, cnxk.mbox.region)
+RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_process, cnxk.mbox.process)
+RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_interrupt, cnxk.mbox.interrupt)
+RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_vf_flr, cnxk.mbox.vf.flr)
+RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_vf_pf_handle, cnxk.mbox.vf.pf.handle)
+RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_error, cnxk.mbox.error)
+
RTE_LOG_REGISTER_SUFFIX(cnxk_logtype_base, base, INFO);
RTE_LOG_REGISTER_SUFFIX(cnxk_logtype_mbox, mbox, NOTICE);
RTE_LOG_REGISTER_SUFFIX(cnxk_logtype_cpt, crypto, NOTICE);
diff --git a/drivers/common/cnxk/roc_platform.h b/drivers/common/cnxk/roc_platform.h
index 4076dbe94d..d62d1db8d1 100644
--- a/drivers/common/cnxk/roc_platform.h
+++ b/drivers/common/cnxk/roc_platform.h
@@ -5,12 +5,12 @@
#ifndef _ROC_PLATFORM_H_
#define _ROC_PLATFORM_H_
-#include <rte_compat.h>
+#include <bus_pci_driver.h>
#include <rte_alarm.h>
#include <rte_bitmap.h>
-#include <bus_pci_driver.h>
#include <rte_byteorder.h>
#include <rte_common.h>
+#include <rte_compat.h>
#include <rte_cycles.h>
#include <rte_ether.h>
#include <rte_interrupts.h>
@@ -25,6 +25,7 @@
#include <rte_string_fns.h>
#include <rte_tailq.h>
#include <rte_telemetry.h>
+#include <rte_trace_point.h>
#include "eal_filesystem.h"
@@ -265,6 +266,9 @@ plt_thread_is_valid(plt_thread_t thr)
#define plt_tel_data_add_dict_u64 rte_tel_data_add_dict_uint
#define plt_telemetry_register_cmd rte_telemetry_register_cmd
+/* Trace */
+#define plt_trace_point_is_enabled rte_trace_point_is_enabled
+
/* Log */
extern int cnxk_logtype_base;
#define RTE_LOGTYPE_base cnxk_logtype_base
@@ -379,9 +383,82 @@ plt_lmt_region_reserve_aligned(const char *name, size_t len, uint32_t align)
/* To ensure returned memory is physically contiguous, bounding
* the start and end address in 2M range.
*/
- return rte_memzone_reserve_bounded(name, len, SOCKET_ID_ANY,
- RTE_MEMZONE_IOVA_CONTIG,
- align, RTE_PGSIZE_2M);
+ return rte_memzone_reserve_bounded(name, len, SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, align,
+ RTE_PGSIZE_2M);
+}
+
+/* ROC trace points */
+RTE_TRACE_POINT(cnxk_trace_mbox_region,
+ RTE_TRACE_POINT_ARGS(const char *func, const char *msg, uint16_t pcifunc, int data,
+ uint16_t cookie),
+ rte_trace_point_emit_string(func);
+ rte_trace_point_emit_string(msg); rte_trace_point_emit_u16(pcifunc);
+ rte_trace_point_emit_int(data); rte_trace_point_emit_u16(cookie);)
+
+RTE_TRACE_POINT(cnxk_trace_mbox_process,
+ RTE_TRACE_POINT_ARGS(const char *func, const char *msg, uint16_t num_msgs,
+ uint16_t pcifunc),
+ rte_trace_point_emit_string(func);
+ rte_trace_point_emit_string(msg); rte_trace_point_emit_u16(num_msgs);
+ rte_trace_point_emit_u16(pcifunc);)
+
+RTE_TRACE_POINT(cnxk_trace_mbox_interrupt,
+ RTE_TRACE_POINT_ARGS(const char *func, int pcifunc, uint64_t intr,
+ uint64_t mbox_data),
+ rte_trace_point_emit_string(func);
+ rte_trace_point_emit_int(pcifunc); rte_trace_point_emit_u64(intr);
+ rte_trace_point_emit_u64(mbox_data);)
+
+RTE_TRACE_POINT(cnxk_trace_mbox_vf_flr,
+ RTE_TRACE_POINT_ARGS(const char *func, uint16_t from_vf, uint16_t pcifunc),
+ rte_trace_point_emit_string(func);
+ rte_trace_point_emit_u16(from_vf); rte_trace_point_emit_u16(pcifunc);)
+
+RTE_TRACE_POINT(cnxk_trace_mbox_vf_pf_handle,
+ RTE_TRACE_POINT_ARGS(const char *ops, uint16_t pcifunc, int data),
+ rte_trace_point_emit_string(ops);
+ rte_trace_point_emit_u16(pcifunc); rte_trace_point_emit_int(data);)
+
+RTE_TRACE_POINT(cnxk_trace_mbox_error,
+ RTE_TRACE_POINT_ARGS(const char *func, const char *msg, int data),
+ rte_trace_point_emit_string(func);
+ rte_trace_point_emit_string(msg); rte_trace_point_emit_int(data);)
+
+static inline void
+roc_trace_mbox_region(const char *func, const char *msg, uint16_t pcifunc, int data,
+ uint16_t cookie)
+{
+ cnxk_trace_mbox_region(func, msg, pcifunc, data, cookie);
+}
+
+static inline void
+roc_trace_mbox_process(const char *func, const char *msg, uint16_t num_msgs, uint16_t pcifunc)
+{
+ cnxk_trace_mbox_process(func, msg, num_msgs, pcifunc);
+}
+
+static inline void
+roc_trace_mbox_interrupt(const char *func, int pcifunc, uint64_t intr, uint64_t mbox_data)
+{
+ cnxk_trace_mbox_interrupt(func, pcifunc, intr, mbox_data);
+}
+
+static inline void
+roc_trace_mbox_vf_pf_handle(const char *func, uint16_t pcifunc, int data)
+{
+ cnxk_trace_mbox_vf_pf_handle(func, pcifunc, data);
+}
+
+static inline void
+roc_trace_mbox_error(const char *func, const char *msg, int data)
+{
+ cnxk_trace_mbox_error(func, msg, data);
+}
+
+static inline void
+roc_trace_mbox_vf_flr(const char *func, uint16_t from_vf, uint16_t pcifunc)
+{
+ cnxk_trace_mbox_vf_flr(func, from_vf, pcifunc);
}
#endif /* _ROC_PLATFORM_H_ */
--
2.25.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/2] common/cnxk: add mailbox debug trace
2024-11-04 13:08 ` [PATCH 2/2] common/cnxk: add mailbox debug trace Harman Kalra
2024-11-07 8:04 ` [EXTERNAL] " Jerin Jacob
@ 2024-11-14 16:38 ` Stephen Hemminger
1 sibling, 0 replies; 7+ messages in thread
From: Stephen Hemminger @ 2024-11-14 16:38 UTC (permalink / raw)
To: Harman Kalra
Cc: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao, dev
On Mon, 4 Nov 2024 18:38:00 +0530
Harman Kalra <hkalra@marvell.com> wrote:
> Implementing mailbox debug trace points to trace function calls.
>
> Signed-off-by: Harman Kalra <hkalra@marvell.com>
Missing use of RTE_ATOMIC() to flag atomic variables.
-----------------------Summary of failed steps-----------------------
"ubuntu-22.04-clang-stdatomic" failed at step Build and test
----------------------End summary of failed steps--------------------
-------------------------------BEGIN LOGS----------------------------
####################################################################################
#### [Begin job log] "ubuntu-22.04-clang-stdatomic" at step Build and test
####################################################################################
atomic_store_explicit(ptr, val, memorder)
^ ~~~
/usr/lib/llvm-14/lib/clang/14.0.0/include/stdatomic.h:127:31: note: expanded from macro 'atomic_store_explicit'
#define atomic_store_explicit __c11_atomic_store
^
../drivers/common/cnxk/roc_dev.c:551:2: error: address argument to atomic operation must be a pointer to _Atomic type ('uint16_t *' (aka 'unsigned short *') invalid)
plt_atomic_store_explicit(&mdev->msgs_acked, msgs_acked, plt_memory_order_release);
^ ~~~~~~~~~~~~~~~~~
../drivers/common/cnxk/roc_platform.h:215:35: note: expanded from macro 'plt_atomic_store_explicit'
#define plt_atomic_store_explicit rte_atomic_store_explicit
^
../lib/eal/include/rte_stdatomic.h:72:2: note: expanded from macro 'rte_atomic_store_explicit'
atomic_store_explicit(ptr, val, memorder)
^ ~~~
/usr/lib/llvm-14/lib/clang/14.0.0/include/stdatomic.h:127:31: note: expanded from macro 'atomic_store_explicit'
#define atomic_store_explicit __c11_atomic_store
^
2 errors generated.
[749/3145] Compiling C object drivers/libtmp_rte_common_cnxk.a.p/common_cnxk_roc_cpt.c.o
[750/3145] Compiling C object drivers/libtmp_rte_common_cnxk.a.p/common_cnxk_roc_cpt_debug.c.o
[751/3145] Compiling C object drivers/libtmp_rte_common_cnxk.a.p/common_cnxk_roc_dpi.c.o
[752/3145] Compiling C object drivers/libtmp_rte_common_cnxk.a.p/common_cnxk_roc_eswitch.c.o
[753/3145] Compiling C object lib/librte_pipeline.a.p/pipeline_rte_table_action.c.o
ninja: build stopped: subcommand failed.
##[error]Process completed with exit code 1.
####################################################################################
#### [End job log] "ubuntu-22.04-clang-stdatomic" at step Build and test
##############################################################################
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [EXTERNAL] [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues
2024-11-14 9:31 ` [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra
2024-11-14 9:31 ` [PATCH v2 2/2] common/cnxk: add mailbox debug trace Harman Kalra
@ 2025-01-22 12:36 ` Jerin Jacob
1 sibling, 0 replies; 7+ messages in thread
From: Jerin Jacob @ 2025-01-22 12:36 UTC (permalink / raw)
To: Harman Kalra, Nithin Kumar Dabilpuram, Kiran Kumar Kokkilagadda,
Sunil Kumar Kori, Satha Koteswara Rao Kottidi, Harman Kalra
Cc: dev
[-- Attachment #1: Type: text/plain, Size: 745 bytes --]
> -----Original Message-----
> From: Harman Kalra <hkalra@marvell.com>
> Sent: Thursday, November 14, 2024 3:02 PM
> To: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>; Kiran Kumar
> Kokkilagadda <kirankumark@marvell.com>; Sunil Kumar Kori
> <skori@marvell.com>; Satha Koteswara Rao Kottidi
> <skoteshwar@marvell.com>; Harman Kalra <hkalra@marvell.com>
> Cc: dev@dpdk.org
> Subject: [EXTERNAL] [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues
>
> Found couple of reasons causing mailbox timeout: * msgs_acked value by
1)Two issues - please make it as two patches
2)fix https://mails.dpdk.org/archives/test-report/2024-November/825162.html
3)fix https://mails.dpdk.org/archives/test-report/2024-November/825257.html
[-- Attachment #2: winmail.dat --]
[-- Type: application/ms-tnef, Size: 14930 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2025-01-22 12:37 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-11-04 13:07 [PATCH 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra
2024-11-04 13:08 ` [PATCH 2/2] common/cnxk: add mailbox debug trace Harman Kalra
2024-11-07 8:04 ` [EXTERNAL] " Jerin Jacob
2024-11-14 16:38 ` Stephen Hemminger
2024-11-14 9:31 ` [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra
2024-11-14 9:31 ` [PATCH v2 2/2] common/cnxk: add mailbox debug trace Harman Kalra
2025-01-22 12:36 ` [EXTERNAL] [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Jerin Jacob
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).