* [PATCH 1/2] common/cnxk: fix mailbox timeout issues @ 2024-11-04 13:07 Harman Kalra 2024-11-04 13:08 ` [PATCH 2/2] common/cnxk: add mailbox debug trace Harman Kalra 2024-11-14 9:31 ` [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra 0 siblings, 2 replies; 7+ messages in thread From: Harman Kalra @ 2024-11-04 13:07 UTC (permalink / raw) To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra Cc: dev Found couple of reasons causing mailbox timeout: * msgs_acked value by interrupt thread was not getting synced to thread polling it. * mbox_data value sent by AF was seen 0 causing neither down nor up msg processing. Fixes: 9bd368ca311a ("common/cnxk: enable PF VF mailbox") Fixes: 61deac72abbf ("common/cnxk: add CN20KA mbox support") Signed-off-by: Harman Kalra <hkalra@marvell.com> --- drivers/common/cnxk/roc_dev.c | 106 ++++++++++++++++++++++++----- drivers/common/cnxk/roc_dev_priv.h | 11 +++ drivers/common/cnxk/roc_mbox.c | 10 ++- 3 files changed, 104 insertions(+), 23 deletions(-) diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c index 32409f2ef3..1127d8185c 100644 --- a/drivers/common/cnxk/roc_dev.c +++ b/drivers/common/cnxk/roc_dev.c @@ -401,8 +401,7 @@ vf_pf_process_up_msgs(struct dev *dev, uint16_t vf) offset = mbox->rx_start + msg->next_msgoff; } mbox_reset(mbox, vf); - mdev->msgs_acked = msgs_acked; - plt_wmb(); + __atomic_store_n(&mdev->msgs_acked, msgs_acked, __ATOMIC_RELEASE); return i; } @@ -542,8 +541,7 @@ process_msgs(struct dev *dev, struct mbox *mbox) mbox_reset(mbox, 0); /* Update acked if someone is waiting a message - mbox_wait is waiting */ - mdev->msgs_acked = msgs_acked; - plt_wmb(); + __atomic_store_n(&mdev->msgs_acked, msgs_acked, __ATOMIC_RELEASE); } /* Copies the message received from AF and sends it to VF */ @@ -920,12 +918,59 @@ process_msgs_up(struct dev *dev, struct mbox *mbox) } } +/* IRQ to VF from PF - VF context (interrupt thread) */ +static void +roc_pf_vf_mbox_irq_cn20k(void *param) +{ + struct dev *dev = param; + uint64_t intr; + + intr = plt_read64(dev->mbox_reg_base + RVU_VF_INT); + if (intr == 0) + plt_base_dbg("Proceeding to check mbox UP messages if any"); + + plt_write64(intr, dev->mbox_reg_base + RVU_VF_INT); + plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf); + + /* If interrupt occurred for down message */ + if (intr & BIT_ULL(1)) + /* First process all configuration messages */ + process_msgs(dev, dev->mbox); + + /* If interrupt occurred for UP message */ + if (intr & BIT_ULL(0)) + process_msgs_up(dev, &dev->mbox_up); +} + +/* IRQ to PF from AF - PF context (interrupt thread) */ +static void +roc_af_pf_mbox_irq_cn20k(void *param) +{ + struct dev *dev = param; + uint64_t intr; + + intr = plt_read64(dev->mbox_reg_base + RVU_PF_INT); + if (intr == 0) + plt_base_dbg("Proceeding to check mbox UP messages if any"); + + plt_write64(intr, dev->mbox_reg_base + RVU_PF_INT); + plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf); + + /* If interrupt occurred for down message */ + if (intr & BIT_ULL(1)) + process_msgs(dev, dev->mbox); + + /* If interrupt occurred for up message */ + if (intr & BIT_ULL(0)) + process_msgs_up(dev, &dev->mbox_up); +} + /* IRQ to VF from PF - VF context (interrupt thread) */ static void roc_pf_vf_mbox_irq(void *param) { struct dev *dev = param; - uint64_t mbox_data; + uint64_t mbox_data = 0; uint64_t intr; intr = plt_read64(dev->mbox_reg_base + RVU_VF_INT); @@ -940,7 +985,7 @@ roc_pf_vf_mbox_irq(void *param) */ mbox_data = plt_read64(dev->mbox_reg_base + RVU_VF_VFPF_MBOX0); /* If interrupt occurred for down message */ - if (mbox_data & MBOX_DOWN_MSG || intr & BIT_ULL(1)) { + if (mbox_data & MBOX_DOWN_MSG) { mbox_data &= ~MBOX_DOWN_MSG; plt_write64(mbox_data, dev->mbox_reg_base + RVU_VF_VFPF_MBOX0); @@ -948,7 +993,7 @@ roc_pf_vf_mbox_irq(void *param) process_msgs(dev, dev->mbox); } /* If interrupt occurred for UP message */ - if (mbox_data & MBOX_UP_MSG || intr & BIT_ULL(0)) { + if (mbox_data & MBOX_UP_MSG) { mbox_data &= ~MBOX_UP_MSG; plt_write64(mbox_data, dev->mbox_reg_base + RVU_VF_VFPF_MBOX0); @@ -962,7 +1007,7 @@ static void roc_af_pf_mbox_irq(void *param) { struct dev *dev = param; - uint64_t mbox_data; + uint64_t mbox_data = 0; uint64_t intr; intr = plt_read64(dev->mbox_reg_base + RVU_PF_INT); @@ -977,7 +1022,7 @@ roc_af_pf_mbox_irq(void *param) */ mbox_data = plt_read64(dev->mbox_reg_base + RVU_PF_PFAF_MBOX0); /* If interrupt occurred for down message */ - if (mbox_data & MBOX_DOWN_MSG || intr & BIT_ULL(1)) { + if (mbox_data & MBOX_DOWN_MSG) { mbox_data &= ~MBOX_DOWN_MSG; plt_write64(mbox_data, dev->mbox_reg_base + RVU_PF_PFAF_MBOX0); @@ -985,7 +1030,7 @@ roc_af_pf_mbox_irq(void *param) process_msgs(dev, dev->mbox); } /* If interrupt occurred for up message */ - if (mbox_data & MBOX_UP_MSG || intr & BIT_ULL(0)) { + if (mbox_data & MBOX_UP_MSG) { mbox_data &= ~MBOX_UP_MSG; plt_write64(mbox_data, dev->mbox_reg_base + RVU_PF_PFAF_MBOX0); @@ -1045,7 +1090,8 @@ mbox_register_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev) } } /* MBOX interrupt AF <-> PF */ - rc = dev_irq_register(intr_handle, roc_af_pf_mbox_irq, dev, dev->mbox_plat->pfaf_vec); + rc = dev_irq_register(intr_handle, dev->mbox_plat->ops->af_pf_mbox_irq, dev, + dev->mbox_plat->pfaf_vec); if (rc) { plt_err("Fail to register AF<->PF mbox irq"); return rc; @@ -1073,7 +1119,8 @@ mbox_register_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev) plt_write64(~0ull, dev->mbox_reg_base + RVU_VF_INT_ENA_W1C); /* MBOX interrupt PF <-> VF */ - rc = dev_irq_register(intr_handle, roc_pf_vf_mbox_irq, dev, RVU_VF_INT_VEC_MBOX); + rc = dev_irq_register(intr_handle, dev->mbox_plat->ops->pf_vf_mbox_irq, dev, + RVU_VF_INT_VEC_MBOX); if (rc) { plt_err("Fail to register PF<->VF mbox irq"); return rc; @@ -1127,7 +1174,8 @@ mbox_unregister_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev) } /* MBOX interrupt AF <-> PF */ - dev_irq_unregister(intr_handle, roc_af_pf_mbox_irq, dev, dev->mbox_plat->pfaf_vec); + dev_irq_unregister(intr_handle, dev->mbox_plat->ops->af_pf_mbox_irq, dev, + dev->mbox_plat->pfaf_vec); } static void @@ -1139,7 +1187,8 @@ mbox_unregister_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev) plt_write64(~0ull, dev->mbox_reg_base + RVU_VF_INT_ENA_W1C); /* Unregister the interrupt handler */ - dev_irq_unregister(intr_handle, roc_pf_vf_mbox_irq, dev, RVU_VF_INT_VEC_MBOX); + dev_irq_unregister(intr_handle, dev->mbox_plat->ops->pf_vf_mbox_irq, dev, + RVU_VF_INT_VEC_MBOX); } void @@ -1599,10 +1648,17 @@ dev_cache_line_size_valid(void) return true; } -static void +static int mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t bar4, bool is_vf) { - int i; + int i, rc = 0; + + /* Allocate memory for device ops */ + mbox_plat->ops = plt_zmalloc(sizeof(struct mbox_ops), 0); + if (mbox_plat->ops == NULL) { + rc = -ENOMEM; + goto fail; + } if (roc_model_is_cn20k()) { /* For CN20K, AF allocates mbox memory in DRAM and writes PF @@ -1613,6 +1669,9 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t mbox_plat->mbox_region_base = bar2 + (RVU_PFX_FUNC_PFAF_MBOX + ((uint64_t)RVU_BLOCK_ADDR_MBOX << RVU_FUNC_BLKADDR_SHIFT)); + /* Mbox operations */ + mbox_plat->ops->af_pf_mbox_irq = roc_af_pf_mbox_irq_cn20k; + mbox_plat->ops->pf_vf_mbox_irq = roc_pf_vf_mbox_irq_cn20k; /* Interrupt vectors */ mbox_plat->pfaf_vec = RVU_MBOX_PF_INT_VEC_AFPF_MBOX; mbox_plat->pfvf_mbox0_vec = RVU_MBOX_PF_INT_VEC_VFPF_MBOX0; @@ -1630,6 +1689,9 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t } else { mbox_plat->mbox_reg_base = bar2; mbox_plat->mbox_region_base = bar4; + /* Mbox operations */ + mbox_plat->ops->af_pf_mbox_irq = roc_af_pf_mbox_irq; + mbox_plat->ops->pf_vf_mbox_irq = roc_pf_vf_mbox_irq; mbox_plat->pfaf_vec = RVU_PF_INT_VEC_AFPF_MBOX; mbox_plat->pfvf_mbox0_vec = RVU_PF_INT_VEC_VFPF_MBOX0; mbox_plat->pfvf_mbox1_vec = RVU_PF_INT_VEC_VFPF_MBOX1; @@ -1647,6 +1709,8 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t if (roc_model_is_cn10k()) mbox_plat->mbox_region_base = bar2 + RVU_VF_MBOX_REGION; } +fail: + return rc; } int @@ -1678,7 +1742,12 @@ dev_init(struct dev *dev, struct plt_pci_device *pci_dev) rc = -ENOMEM; goto fail; } - mbox_platform_changes(dev->mbox_plat, bar2, bar4, is_vf); + + if (mbox_platform_changes(dev->mbox_plat, bar2, bar4, is_vf)) { + plt_err("Failed to populate platform specific changes"); + rc = -ENOMEM; + goto mbox_plat_free; + } mbox_reg_base = dev->mbox_plat->mbox_reg_base; mbox_region_base = dev->mbox_plat->mbox_region_base; @@ -1824,6 +1893,8 @@ dev_init(struct dev *dev, struct plt_pci_device *pci_dev) mbox_fini(dev->mbox); mbox_fini(&dev->mbox_up); error: + plt_free(dev->mbox_plat->ops); +mbox_plat_free: plt_free(dev->mbox_plat); fail: return rc; @@ -1883,6 +1954,7 @@ dev_fini(struct dev *dev, struct plt_pci_device *pci_dev) mbox_fini(mbox); dev->mbox_active = 0; + plt_free(dev->mbox_plat->ops); plt_free(dev->mbox_plat); /* Disable MSIX vectors */ dev_irqs_disable(intr_handle); diff --git a/drivers/common/cnxk/roc_dev_priv.h b/drivers/common/cnxk/roc_dev_priv.h index c766183196..a8e40fa9d5 100644 --- a/drivers/common/cnxk/roc_dev_priv.h +++ b/drivers/common/cnxk/roc_dev_priv.h @@ -106,6 +106,15 @@ struct mbox_sync { pthread_mutex_t mutex; }; +/* AF PF VF mbox interrupt callbacks */ +typedef void (*af_pf_mbox_irq_t)(void *param); +typedef void (*pf_vf_mbox_irq_t)(void *param); + +struct mbox_ops { + af_pf_mbox_irq_t af_pf_mbox_irq; + pf_vf_mbox_irq_t pf_vf_mbox_irq; +}; + struct mbox_platform { uint8_t pfaf_vec; uint8_t pfvf_mbox0_vec; @@ -120,6 +129,8 @@ struct mbox_platform { uint64_t pfvf1_mbox_int_ena_w1c[MAX_VFPF_DWORD_BITS]; uintptr_t mbox_reg_base; uintptr_t mbox_region_base; + /* Mbox operations */ + struct mbox_ops *ops; }; struct dev { diff --git a/drivers/common/cnxk/roc_mbox.c b/drivers/common/cnxk/roc_mbox.c index db77babfdb..eb5bd771fe 100644 --- a/drivers/common/cnxk/roc_mbox.c +++ b/drivers/common/cnxk/roc_mbox.c @@ -275,7 +275,7 @@ mbox_msg_send_data(struct mbox *mbox, int devid, uint8_t data) tx_hdr->msg_size = mdev->msg_size; mdev->msg_size = 0; mdev->rsp_size = 0; - mdev->msgs_acked = 0; + __atomic_store_n(&mdev->msgs_acked, 0, __ATOMIC_RELEASE); /* num_msgs != 0 signals to the peer that the buffer has a number of * messages. So this should be written after copying txmem @@ -417,7 +417,7 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo) * mdev->msgs_acked are incremented at process_msgs() in interrupt * thread context. */ - while (mdev->num_msgs > mdev->msgs_acked) { + while (mdev->num_msgs > __atomic_load_n(&mdev->msgs_acked, __ATOMIC_ACQUIRE)) { plt_delay_us(sleep); timeout += sleep; if (timeout >= rst_timo) { @@ -433,13 +433,11 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo) "(tx/rx num_msgs: %d/%d), msg_size: %d, " "rsp_size: %d", devid, timeout, mdev->num_msgs, - mdev->msgs_acked, tx_hdr->num_msgs, - rx_hdr->num_msgs, mdev->msg_size, - mdev->rsp_size); + __atomic_load_n(&mdev->msgs_acked, __ATOMIC_ACQUIRE), + tx_hdr->num_msgs, rx_hdr->num_msgs, mdev->msg_size, mdev->rsp_size); return -EIO; } - plt_rmb(); } return 0; } -- 2.46.0.469.g4590f2e941 ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 2/2] common/cnxk: add mailbox debug trace 2024-11-04 13:07 [PATCH 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra @ 2024-11-04 13:08 ` Harman Kalra 2024-11-07 8:04 ` [EXTERNAL] " Jerin Jacob 2024-11-14 16:38 ` Stephen Hemminger 2024-11-14 9:31 ` [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra 1 sibling, 2 replies; 7+ messages in thread From: Harman Kalra @ 2024-11-04 13:08 UTC (permalink / raw) To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra Cc: dev Implementing mailbox debug trace points to trace function calls. Signed-off-by: Harman Kalra <hkalra@marvell.com> --- drivers/common/cnxk/roc_dev.c | 89 ++++++++++++++++-------------- drivers/common/cnxk/roc_mbox.c | 34 ++++++++++++ drivers/common/cnxk/roc_mbox.h | 2 + drivers/common/cnxk/roc_platform.c | 9 +++ drivers/common/cnxk/roc_platform.h | 87 +++++++++++++++++++++++++++-- 5 files changed, 175 insertions(+), 46 deletions(-) diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c index 1127d8185c..e9bf8c01b4 100644 --- a/drivers/common/cnxk/roc_dev.c +++ b/drivers/common/cnxk/roc_dev.c @@ -26,7 +26,6 @@ /* RVU PF interrupt status as received from AF*/ #define RVU_PF_INTR_STATUS 0x3 - static void * mbox_mem_map(off_t off, size_t size) { @@ -83,6 +82,7 @@ pf_af_sync_msg(struct dev *dev, struct mbox_msghdr **rsp) timeout += sleep; if (timeout >= mbox->rsp_tmo) { plt_err("Message timeout: %dms", mbox->rsp_tmo); + roc_trace_mbox_error("AFPF_SYNC", "RSP_TMO", -EIO); rc = -EIO; break; } @@ -137,6 +137,7 @@ af_pf_wait_msg(struct dev *dev, uint16_t vf, int num_msg) timeout++; if (timeout >= mbox->rsp_tmo) { plt_err("Routed messages %d timeout: %dms", num_msg, mbox->rsp_tmo); + roc_trace_mbox_error("AFPF_WAIT", "RTD_MSG_TMO", -EIO); break; } int_status = plt_read64(dev->mbox_reg_base + RVU_PF_INT); @@ -149,16 +150,18 @@ af_pf_wait_msg(struct dev *dev, uint16_t vf, int num_msg) plt_write64(~0ull, dev->mbox_reg_base + RVU_PF_INT_ENA_W1S); req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start); - if (req_hdr->num_msgs != num_msg) - plt_err("Routed messages: %d received: %d", num_msg, - req_hdr->num_msgs); + if (req_hdr->num_msgs != num_msg) { + plt_err("Routed messages: %d received: %d", num_msg, req_hdr->num_msgs); + roc_trace_mbox_error("AFPF_WAIT", "RTD_INV_CNT", num_msg); + } /* Get messages from mbox */ - offset = mbox->rx_start + - PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + offset = mbox->rx_start + PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); for (i = 0; i < req_hdr->num_msgs; i++) { msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset); size = mbox->rx_start + msg->next_msgoff - offset; + roc_trace_mbox_process("AFPF_WAIT", mbox_id2name(msg->id), req_hdr->num_msgs, + msg->pcifunc); /* Reserve PF/VF mbox message */ size = PLT_ALIGN(size, MBOX_MSG_ALIGN); @@ -288,6 +291,8 @@ vf_pf_process_msgs(struct dev *dev, uint16_t vf) /* RVU_PF_FUNC_S */ msg->pcifunc = dev_pf_func(dev->pf, vf); + roc_trace_mbox_process("VFPF_MSG", mbox_id2name(msg->id), req_hdr->num_msgs, + msg->pcifunc); if (msg->id == MBOX_MSG_READY) { struct ready_msg_rsp *rsp; @@ -333,8 +338,7 @@ vf_pf_process_msgs(struct dev *dev, uint16_t vf) } if (routed > 0) { - plt_base_dbg("pf:%d routed %d messages from vf:%d to AF", - dev->pf, routed, vf); + roc_trace_mbox_vf_pf_handle("RTG_AF", dev_pf_func(dev->pf, vf), routed); /* PF will send the messages to AF and wait for responses */ af_pf_wait_msg(dev, vf, routed); mbox_reset(dev->mbox, 0); @@ -343,8 +347,7 @@ vf_pf_process_msgs(struct dev *dev, uint16_t vf) /* Send mbox responses to VF */ if (mdev->num_msgs) { - plt_base_dbg("pf:%d reply %d messages to vf:%d", dev->pf, - mdev->num_msgs, vf); + roc_trace_mbox_vf_pf_handle("RSP_VF", dev_pf_func(dev->pf, vf), mdev->num_msgs); mbox_msg_send(mbox, vf); } @@ -378,25 +381,24 @@ vf_pf_process_up_msgs(struct dev *dev, uint16_t vf) switch (msg->id) { case MBOX_MSG_CGX_LINK_EVENT: - plt_base_dbg("PF: Msg 0x%x (%s) fn:0x%x (pf:%d,vf:%d)", - msg->id, mbox_id2name(msg->id), - msg->pcifunc, dev_get_pf(msg->pcifunc), - dev_get_vf(msg->pcifunc)); + roc_trace_mbox_process("VFPF_UPMSG_CGX", mbox_id2name(msg->id), + req_hdr->num_msgs, msg->pcifunc); break; case MBOX_MSG_CGX_PTP_RX_INFO: - plt_base_dbg("PF: Msg 0x%x (%s) fn:0x%x (pf:%d,vf:%d)", - msg->id, mbox_id2name(msg->id), - msg->pcifunc, dev_get_pf(msg->pcifunc), - dev_get_vf(msg->pcifunc)); + roc_trace_mbox_process("VFPF_UPMSG_PTP", mbox_id2name(msg->id), + req_hdr->num_msgs, msg->pcifunc); break; default: if (roc_rvu_lf_msg_id_range_check(dev->roc_rvu_lf, msg->id)) plt_base_dbg("PF: Msg 0x%x fn:0x%x (pf:%d,vf:%d)", msg->id, msg->pcifunc, dev_get_pf(msg->pcifunc), dev_get_vf(msg->pcifunc)); - else - plt_err("Not handled UP msg 0x%x (%s) func:0x%x", - msg->id, mbox_id2name(msg->id), msg->pcifunc); + else { + roc_trace_mbox_process("UNHND_UPMSG", mbox_id2name(msg->id), + req_hdr->num_msgs, msg->pcifunc); + plt_err("Not handled UP msg 0x%x (%s) func:0x%x", msg->id, + mbox_id2name(msg->id), msg->pcifunc); + } } offset = mbox->rx_start + msg->next_msgoff; } @@ -446,8 +448,7 @@ roc_vf_pf_mbox_irq(void *param) if (!intr) continue; - plt_base_dbg("vfpf: %d intr: 0x%" PRIx64 " (pf:%d, vf:%d)", vfpf, intr, dev->pf, - dev->vf); + roc_trace_mbox_interrupt("VFPF_IRQ", dev_pf_func(dev->pf, dev->vf), intr, 0); /* Save and clear intr bits */ intrb.bits[vfpf] |= intr; @@ -489,9 +490,8 @@ process_msgs(struct dev *dev, struct mbox *mbox) msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset); msgs_acked++; - plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id, - mbox_id2name(msg->id), dev_get_pf(msg->pcifunc), - dev_get_vf(msg->pcifunc)); + roc_trace_mbox_process("PRC_MSG", mbox_id2name(msg->id), req_hdr->num_msgs, + msg->pcifunc); switch (msg->id) { /* Add message id's that are handled here */ @@ -514,6 +514,8 @@ process_msgs(struct dev *dev, struct mbox *mbox) } else { plt_err("Message (%s) response has err=%d", mbox_id2name(msg->id), msg->rc); + roc_trace_mbox_error("PRC_MSG_ERR", mbox_id2name(msg->id), + msg->rc); } } break; @@ -526,14 +528,19 @@ process_msgs(struct dev *dev, struct mbox *mbox) } else { plt_err("Message (%s) response has err=%d", mbox_id2name(msg->id), msg->rc); + roc_trace_mbox_error("PRC_MSG_ERR", mbox_id2name(msg->id), + msg->rc); } } break; default: - if (msg->rc) + if (msg->rc) { plt_err("Message (%s) response has err=%d (%s)", mbox_id2name(msg->id), msg->rc, roc_error_msg_get(msg->rc)); + roc_trace_mbox_error("PRC_MSG_INV_RSP", mbox_id2name(msg->id), + msg->rc); + } break; } offset = mbox->rx_start + msg->next_msgoff; @@ -564,8 +571,8 @@ pf_vf_mbox_send_up_msg(struct dev *dev, void *rec_msg) if (!(dev->active_vfs[vf / max_bits] & (BIT_ULL(vf)))) continue; - plt_base_dbg("(%s) size: %zx to VF: %d", - mbox_id2name(msg->hdr.id), size, vf); + roc_trace_mbox_process("PFVF_SND_UPMSG", mbox_id2name(msg->hdr.id), 1, + (uint16_t)dev_pf_func(dev->pf, vf)); /* Reserve PF/VF mbox message */ vf_msg = mbox_alloc_msg(vf_mbox, vf, size); @@ -895,9 +902,9 @@ process_msgs_up(struct dev *dev, struct mbox *mbox) for (i = 0; i < req_hdr->num_msgs; i++) { msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset); - plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id, - mbox_id2name(msg->id), dev_get_pf(msg->pcifunc), - dev_get_vf(msg->pcifunc)); + plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id, mbox_id2name(msg->id), + dev_get_pf(msg->pcifunc), dev_get_vf(msg->pcifunc)); + roc_trace_mbox_process("PRC_UPMSG", mbox_id2name(msg->id), req_hdr->num_msgs, msg->pcifunc); if (roc_rvu_lf_msg_id_range_check(dev->roc_rvu_lf, msg->id)) { size = mbox->rx_start + msg->next_msgoff - offset; err = process_rvu_lf_msgs_up(dev, msg, size); @@ -905,9 +912,10 @@ process_msgs_up(struct dev *dev, struct mbox *mbox) plt_err("Error %d handling 0x%x RVU_LF up msg", err, msg->id); } else { err = mbox_process_msgs_up(dev, msg); - if (err) - plt_err("Error %d handling 0x%x (%s)", err, msg->id, - mbox_id2name(msg->id)); + if (err) { + roc_trace_mbox_error("PRC_UPMSG_ERR", mbox_id2name(msg->id), err); + plt_err("Error %d handling 0x%x (%s)", err, msg->id, mbox_id2name(msg->id)); + } } offset = mbox->rx_start + msg->next_msgoff; } @@ -930,7 +938,7 @@ roc_pf_vf_mbox_irq_cn20k(void *param) plt_base_dbg("Proceeding to check mbox UP messages if any"); plt_write64(intr, dev->mbox_reg_base + RVU_VF_INT); - plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf); + roc_trace_mbox_interrupt("PFVF_IRQ_20K", dev_pf_func(dev->pf, dev->vf), intr, 0); /* If interrupt occurred for down message */ if (intr & BIT_ULL(1)) @@ -954,7 +962,7 @@ roc_af_pf_mbox_irq_cn20k(void *param) plt_base_dbg("Proceeding to check mbox UP messages if any"); plt_write64(intr, dev->mbox_reg_base + RVU_PF_INT); - plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf); + roc_trace_mbox_interrupt("AFPF_IRQ_20K", dev_pf_func(dev->pf, dev->vf), intr, 0); /* If interrupt occurred for down message */ if (intr & BIT_ULL(1)) @@ -978,12 +986,12 @@ roc_pf_vf_mbox_irq(void *param) plt_base_dbg("Proceeding to check mbox UP messages if any"); plt_write64(intr, dev->mbox_reg_base + RVU_VF_INT); - plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf); /* Reading for UP/DOWN message, next message sending will be delayed * by 1ms until this region is zeroed mbox_wait_for_zero() */ mbox_data = plt_read64(dev->mbox_reg_base + RVU_VF_VFPF_MBOX0); + roc_trace_mbox_interrupt("PFVF_IRQ", dev_pf_func(dev->pf, dev->vf), intr, mbox_data); /* If interrupt occurred for down message */ if (mbox_data & MBOX_DOWN_MSG) { mbox_data &= ~MBOX_DOWN_MSG; @@ -1015,12 +1023,12 @@ roc_af_pf_mbox_irq(void *param) plt_base_dbg("Proceeding to check mbox UP messages if any"); plt_write64(intr, dev->mbox_reg_base + RVU_PF_INT); - plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf); /* Reading for UP/DOWN message, next message sending will be delayed * by 1ms until this region is zeroed mbox_wait_for_zero() */ mbox_data = plt_read64(dev->mbox_reg_base + RVU_PF_PFAF_MBOX0); + roc_trace_mbox_interrupt("AFPF_IRQ", dev_pf_func(dev->pf, dev->vf), intr, mbox_data); /* If interrupt occurred for down message */ if (mbox_data & MBOX_DOWN_MSG) { mbox_data &= ~MBOX_DOWN_MSG; @@ -1327,8 +1335,7 @@ vf_flr_handle_msg(void *param, dev_intr_t *flr) for (vf = 0; vf < max_vf; vf++) { if (flr->bits[vf / max_bits] & BIT_ULL(vf % max_bits)) { - plt_base_dbg("Process FLR vf:%d request (pf:%d, vf:%d)", - vf, dev->pf, dev->vf); + roc_trace_mbox_vf_flr("PRC_VFFLR", vf, dev_pf_func(dev->pf, dev->vf)); /* Inform AF about VF reset */ vf_flr_send_msg(dev, vf); flr->bits[vf / max_bits] &= ~(BIT_ULL(vf % max_bits)); diff --git a/drivers/common/cnxk/roc_mbox.c b/drivers/common/cnxk/roc_mbox.c index eb5bd771fe..e1b0f2fa63 100644 --- a/drivers/common/cnxk/roc_mbox.c +++ b/drivers/common/cnxk/roc_mbox.c @@ -263,6 +263,34 @@ mbox_wait_for_zero(struct mbox *mbox, int devid) return data == 0; } +static inline void +mbox_debug_region(const char *ops, struct mbox *mbox, int devid, uint64_t intr_val, int rc) +{ + struct mbox_dev *mdev = &mbox->dev[devid]; + struct mbox_msghdr *msg_rx, *msg_tx; + uint64_t offset_rx, offset_tx; + char buf[BUFSIZ]; + + if (!plt_trace_point_is_enabled(&__cnxk_trace_mbox_region)) + return; + + /* Get messages from mbox */ + offset_tx = mbox->tx_start + PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + + msg_tx = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset_tx); + snprintf(buf, BUFSIZ, "%s_TX", ops); + /* Only while sending message choose a rand identifier for the msg */ + if (intr_val) + msg_tx->cookie = rand(); + roc_trace_mbox_region(buf, mbox_id2name(msg_tx->id), msg_tx->pcifunc, (int)intr_val, + msg_tx->cookie); + + offset_rx = mbox->rx_start + PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + msg_rx = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset_rx); + snprintf(buf, BUFSIZ, "%s_RX", ops); + roc_trace_mbox_region(buf, mbox_id2name(msg_rx->id), msg_rx->pcifunc, rc, msg_rx->cookie); +} + static void mbox_msg_send_data(struct mbox *mbox, int devid, uint8_t data) { @@ -291,6 +319,8 @@ mbox_msg_send_data(struct mbox *mbox, int devid, uint8_t data) (volatile void *)(mbox->reg_base + (mbox->trigger | (devid << mbox->tr_shift)))); intr_val |= (uint64_t)data; + + mbox_debug_region("SND_MSG", mbox, devid, intr_val, 0); /* The interrupt should be fired after num_msgs is written * to the shared memory */ @@ -342,6 +372,8 @@ mbox_get_rsp(struct mbox *mbox, int devid, void **msg) if (msg != NULL) *msg = msghdr; + mbox_debug_region("GET_RSP", mbox, devid, 0, msghdr->rc); + return msghdr->rc; } @@ -436,6 +468,8 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo) __atomic_load_n(&mdev->msgs_acked, __ATOMIC_ACQUIRE), tx_hdr->num_msgs, rx_hdr->num_msgs, mdev->msg_size, mdev->rsp_size); + mbox_debug_region("MBX_TMO", mbox, devid, 0, -EIO); + return -EIO; } } diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h index e7a916d61c..3ca8442b55 100644 --- a/drivers/common/cnxk/roc_mbox.h +++ b/drivers/common/cnxk/roc_mbox.h @@ -32,6 +32,8 @@ struct mbox_msghdr { uint16_t __io ver; /* Offset of next msg within mailbox region */ uint16_t __io next_msgoff; + /* Unique ID for trace debug */ + uint16_t __io cookie; int __io rc; /* Msg processed response code */ }; diff --git a/drivers/common/cnxk/roc_platform.c b/drivers/common/cnxk/roc_platform.c index f1e0a93d97..f7678280f5 100644 --- a/drivers/common/cnxk/roc_platform.c +++ b/drivers/common/cnxk/roc_platform.c @@ -3,6 +3,7 @@ */ #include <rte_log.h> +#include <rte_trace_point_register.h> #include "roc_api.h" @@ -85,6 +86,14 @@ roc_plt_init(void) return 0; } +/* ROC trace points */ +RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_region, cnxk.mbox.region) +RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_process, cnxk.mbox.process) +RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_interrupt, cnxk.mbox.interrupt) +RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_vf_flr, cnxk.mbox.vf.flr) +RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_vf_pf_handle, cnxk.mbox.vf.pf.handle) +RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_error, cnxk.mbox.error) + RTE_LOG_REGISTER_SUFFIX(cnxk_logtype_base, base, INFO); RTE_LOG_REGISTER_SUFFIX(cnxk_logtype_mbox, mbox, NOTICE); RTE_LOG_REGISTER_SUFFIX(cnxk_logtype_cpt, crypto, NOTICE); diff --git a/drivers/common/cnxk/roc_platform.h b/drivers/common/cnxk/roc_platform.h index df4f88f288..ef2aaa312f 100644 --- a/drivers/common/cnxk/roc_platform.h +++ b/drivers/common/cnxk/roc_platform.h @@ -5,12 +5,12 @@ #ifndef _ROC_PLATFORM_H_ #define _ROC_PLATFORM_H_ -#include <rte_compat.h> +#include <bus_pci_driver.h> #include <rte_alarm.h> #include <rte_bitmap.h> -#include <bus_pci_driver.h> #include <rte_byteorder.h> #include <rte_common.h> +#include <rte_compat.h> #include <rte_cycles.h> #include <rte_ether.h> #include <rte_interrupts.h> @@ -25,6 +25,7 @@ #include <rte_string_fns.h> #include <rte_tailq.h> #include <rte_telemetry.h> +#include <rte_trace_point.h> #include "eal_filesystem.h" @@ -260,6 +261,9 @@ plt_thread_is_valid(plt_thread_t thr) #define plt_tel_data_add_dict_u64 rte_tel_data_add_dict_uint #define plt_telemetry_register_cmd rte_telemetry_register_cmd +/* Trace */ +#define plt_trace_point_is_enabled rte_trace_point_is_enabled + /* Log */ extern int cnxk_logtype_base; #define RTE_LOGTYPE_base cnxk_logtype_base @@ -374,9 +378,82 @@ plt_lmt_region_reserve_aligned(const char *name, size_t len, uint32_t align) /* To ensure returned memory is physically contiguous, bounding * the start and end address in 2M range. */ - return rte_memzone_reserve_bounded(name, len, SOCKET_ID_ANY, - RTE_MEMZONE_IOVA_CONTIG, - align, RTE_PGSIZE_2M); + return rte_memzone_reserve_bounded(name, len, SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, align, + RTE_PGSIZE_2M); +} + +/* ROC trace points */ +RTE_TRACE_POINT(cnxk_trace_mbox_region, + RTE_TRACE_POINT_ARGS(const char *func, const char *msg, uint16_t pcifunc, int data, + uint16_t cookie), + rte_trace_point_emit_string(func); + rte_trace_point_emit_string(msg); rte_trace_point_emit_u16(pcifunc); + rte_trace_point_emit_int(data); rte_trace_point_emit_u16(cookie);) + +RTE_TRACE_POINT(cnxk_trace_mbox_process, + RTE_TRACE_POINT_ARGS(const char *func, const char *msg, uint16_t num_msgs, + uint16_t pcifunc), + rte_trace_point_emit_string(func); + rte_trace_point_emit_string(msg); rte_trace_point_emit_u16(num_msgs); + rte_trace_point_emit_u16(pcifunc);) + +RTE_TRACE_POINT(cnxk_trace_mbox_interrupt, + RTE_TRACE_POINT_ARGS(const char *func, int pcifunc, uint64_t intr, + uint64_t mbox_data), + rte_trace_point_emit_string(func); + rte_trace_point_emit_int(pcifunc); rte_trace_point_emit_u64(intr); + rte_trace_point_emit_u64(mbox_data);) + +RTE_TRACE_POINT(cnxk_trace_mbox_vf_flr, + RTE_TRACE_POINT_ARGS(const char *func, uint16_t from_vf, uint16_t pcifunc), + rte_trace_point_emit_string(func); + rte_trace_point_emit_u16(from_vf); rte_trace_point_emit_u16(pcifunc);) + +RTE_TRACE_POINT(cnxk_trace_mbox_vf_pf_handle, + RTE_TRACE_POINT_ARGS(const char *ops, uint16_t pcifunc, int data), + rte_trace_point_emit_string(ops); + rte_trace_point_emit_u16(pcifunc); rte_trace_point_emit_int(data);) + +RTE_TRACE_POINT(cnxk_trace_mbox_error, + RTE_TRACE_POINT_ARGS(const char *func, const char *msg, int data), + rte_trace_point_emit_string(func); + rte_trace_point_emit_string(msg); rte_trace_point_emit_int(data);) + +static inline void +roc_trace_mbox_region(const char *func, const char *msg, uint16_t pcifunc, int data, + uint16_t cookie) +{ + cnxk_trace_mbox_region(func, msg, pcifunc, data, cookie); +} + +static inline void +roc_trace_mbox_process(const char *func, const char *msg, uint16_t num_msgs, uint16_t pcifunc) +{ + cnxk_trace_mbox_process(func, msg, num_msgs, pcifunc); +} + +static inline void +roc_trace_mbox_interrupt(const char *func, int pcifunc, uint64_t intr, uint64_t mbox_data) +{ + cnxk_trace_mbox_interrupt(func, pcifunc, intr, mbox_data); +} + +static inline void +roc_trace_mbox_vf_pf_handle(const char *func, uint16_t pcifunc, int data) +{ + cnxk_trace_mbox_vf_pf_handle(func, pcifunc, data); +} + +static inline void +roc_trace_mbox_error(const char *func, const char *msg, int data) +{ + cnxk_trace_mbox_error(func, msg, data); +} + +static inline void +roc_trace_mbox_vf_flr(const char *func, uint16_t from_vf, uint16_t pcifunc) +{ + cnxk_trace_mbox_vf_flr(func, from_vf, pcifunc); } #endif /* _ROC_PLATFORM_H_ */ -- 2.46.0.469.g4590f2e941 ^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [EXTERNAL] [PATCH 2/2] common/cnxk: add mailbox debug trace 2024-11-04 13:08 ` [PATCH 2/2] common/cnxk: add mailbox debug trace Harman Kalra @ 2024-11-07 8:04 ` Jerin Jacob 2024-11-14 16:38 ` Stephen Hemminger 1 sibling, 0 replies; 7+ messages in thread From: Jerin Jacob @ 2024-11-07 8:04 UTC (permalink / raw) To: Harman Kalra, Nithin Kumar Dabilpuram, Kiran Kumar Kokkilagadda, Sunil Kumar Kori, Satha Koteswara Rao Kottidi, Harman Kalra Cc: dev [-- Attachment #1: Type: text/plain, Size: 2348 bytes --] > -----Original Message----- > From: Harman Kalra <hkalra@marvell.com> > Sent: Monday, November 4, 2024 6:38 PM > To: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>; Kiran Kumar > Kokkilagadda <kirankumark@marvell.com>; Sunil Kumar Kori > <skori@marvell.com>; Satha Koteswara Rao Kottidi > <skoteshwar@marvell.com>; Harman Kalra <hkalra@marvell.com> > Cc: dev@dpdk.org > Subject: [EXTERNAL] [PATCH 2/2] common/cnxk: add mailbox debug trace > > Implementing mailbox debug trace points to trace function calls. Signed-off-by: > Harman Kalra <hkalra@ marvell. com> --- drivers/common/cnxk/roc_dev. c | 89 > ++++++++++++++++-------------- drivers/common/cnxk/roc_mbox. c | 34 > ++++++++++++ drivers/common/cnxk/roc_mbox. h > Implementing mailbox debug trace points to trace function calls. > > Signed-off-by: Harman Kalra <hkalra@marvell.com> > --- > drivers/common/cnxk/roc_dev.c | 89 ++++++++++++++++-------------- > drivers/common/cnxk/roc_mbox.c | 34 ++++++++++++ > drivers/common/cnxk/roc_mbox.h | 2 + > drivers/common/cnxk/roc_platform.c | 9 +++ > drivers/common/cnxk/roc_platform.h | 87 +++++++++++++++++++++++++++-- > 5 files changed, 175 insertions(+), 46 deletions(-) > > diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c > index 1127d8185c..e9bf8c01b4 100644 > --- a/drivers/common/cnxk/roc_dev.c > +++ b/drivers/common/cnxk/roc_dev.c > @@ -26,7 +26,6 @@ > > /* RVU PF interrupt status as received from AF*/ #define > RVU_PF_INTR_STATUS 0x3 > - This change may not be needed. Also, Fix following issues ### [PATCH] common/cnxk: fix mailbox timeout issues Warning in drivers/common/cnxk/roc_dev.c: Warning in drivers/common/cnxk/roc_mbox.c: Using __atomic_xxx/__ATOMIC_XXX built-ins, prefer rte_atomic_xxx/rte_memory_order_xxx ### [PATCH] common/cnxk: add mailbox debug trace WARNING:LONG_LINE: line length of 108 exceeds 100 columns #199: FILE: drivers/common/cnxk/roc_dev.c:907: + roc_trace_mbox_process("PRC_UPMSG", mbox_id2name(msg->id), req_hdr->num_msgs, msg->pcifunc); WARNING:LONG_LINE: line length of 108 exceeds 100 columns #212: FILE: drivers/common/cnxk/roc_dev.c:917: + plt_err("Error %d handling 0x%x (%s)", err, msg->id, mbox_id2name(msg->id)); ); [-- Attachment #2: winmail.dat --] [-- Type: application/ms-tnef, Size: 15716 bytes --] ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/2] common/cnxk: add mailbox debug trace 2024-11-04 13:08 ` [PATCH 2/2] common/cnxk: add mailbox debug trace Harman Kalra 2024-11-07 8:04 ` [EXTERNAL] " Jerin Jacob @ 2024-11-14 16:38 ` Stephen Hemminger 1 sibling, 0 replies; 7+ messages in thread From: Stephen Hemminger @ 2024-11-14 16:38 UTC (permalink / raw) To: Harman Kalra Cc: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao, dev On Mon, 4 Nov 2024 18:38:00 +0530 Harman Kalra <hkalra@marvell.com> wrote: > Implementing mailbox debug trace points to trace function calls. > > Signed-off-by: Harman Kalra <hkalra@marvell.com> Missing use of RTE_ATOMIC() to flag atomic variables. -----------------------Summary of failed steps----------------------- "ubuntu-22.04-clang-stdatomic" failed at step Build and test ----------------------End summary of failed steps-------------------- -------------------------------BEGIN LOGS---------------------------- #################################################################################### #### [Begin job log] "ubuntu-22.04-clang-stdatomic" at step Build and test #################################################################################### atomic_store_explicit(ptr, val, memorder) ^ ~~~ /usr/lib/llvm-14/lib/clang/14.0.0/include/stdatomic.h:127:31: note: expanded from macro 'atomic_store_explicit' #define atomic_store_explicit __c11_atomic_store ^ ../drivers/common/cnxk/roc_dev.c:551:2: error: address argument to atomic operation must be a pointer to _Atomic type ('uint16_t *' (aka 'unsigned short *') invalid) plt_atomic_store_explicit(&mdev->msgs_acked, msgs_acked, plt_memory_order_release); ^ ~~~~~~~~~~~~~~~~~ ../drivers/common/cnxk/roc_platform.h:215:35: note: expanded from macro 'plt_atomic_store_explicit' #define plt_atomic_store_explicit rte_atomic_store_explicit ^ ../lib/eal/include/rte_stdatomic.h:72:2: note: expanded from macro 'rte_atomic_store_explicit' atomic_store_explicit(ptr, val, memorder) ^ ~~~ /usr/lib/llvm-14/lib/clang/14.0.0/include/stdatomic.h:127:31: note: expanded from macro 'atomic_store_explicit' #define atomic_store_explicit __c11_atomic_store ^ 2 errors generated. [749/3145] Compiling C object drivers/libtmp_rte_common_cnxk.a.p/common_cnxk_roc_cpt.c.o [750/3145] Compiling C object drivers/libtmp_rte_common_cnxk.a.p/common_cnxk_roc_cpt_debug.c.o [751/3145] Compiling C object drivers/libtmp_rte_common_cnxk.a.p/common_cnxk_roc_dpi.c.o [752/3145] Compiling C object drivers/libtmp_rte_common_cnxk.a.p/common_cnxk_roc_eswitch.c.o [753/3145] Compiling C object lib/librte_pipeline.a.p/pipeline_rte_table_action.c.o ninja: build stopped: subcommand failed. ##[error]Process completed with exit code 1. #################################################################################### #### [End job log] "ubuntu-22.04-clang-stdatomic" at step Build and test ############################################################################## ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues 2024-11-04 13:07 [PATCH 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra 2024-11-04 13:08 ` [PATCH 2/2] common/cnxk: add mailbox debug trace Harman Kalra @ 2024-11-14 9:31 ` Harman Kalra 2024-11-14 9:31 ` [PATCH v2 2/2] common/cnxk: add mailbox debug trace Harman Kalra 2025-01-22 12:36 ` [EXTERNAL] [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Jerin Jacob 1 sibling, 2 replies; 7+ messages in thread From: Harman Kalra @ 2024-11-14 9:31 UTC (permalink / raw) To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra Cc: dev Found couple of reasons causing mailbox timeout: * msgs_acked value by interrupt thread was not getting synced to thread polling it. * mbox_data value sent by AF was seen 0 causing neither down nor up msg processing. Fixes: 9bd368ca311a ("common/cnxk: enable PF VF mailbox") Fixes: 61deac72abbf ("common/cnxk: add CN20KA mbox support") Signed-off-by: Harman Kalra <hkalra@marvell.com> --- V2: * Replaced gcc builtin __atomic_xxx intrinsics with corresponding rte_atomic_xxx stdatomic API drivers/common/cnxk/roc_dev.c | 106 ++++++++++++++++++++++++----- drivers/common/cnxk/roc_dev_priv.h | 11 +++ drivers/common/cnxk/roc_mbox.c | 12 ++-- drivers/common/cnxk/roc_platform.h | 5 ++ 4 files changed, 111 insertions(+), 23 deletions(-) diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c index 32409f2ef3..0d1f9c4564 100644 --- a/drivers/common/cnxk/roc_dev.c +++ b/drivers/common/cnxk/roc_dev.c @@ -401,8 +401,7 @@ vf_pf_process_up_msgs(struct dev *dev, uint16_t vf) offset = mbox->rx_start + msg->next_msgoff; } mbox_reset(mbox, vf); - mdev->msgs_acked = msgs_acked; - plt_wmb(); + plt_atomic_store_explicit(&mdev->msgs_acked, msgs_acked, plt_memory_order_release); return i; } @@ -542,8 +541,7 @@ process_msgs(struct dev *dev, struct mbox *mbox) mbox_reset(mbox, 0); /* Update acked if someone is waiting a message - mbox_wait is waiting */ - mdev->msgs_acked = msgs_acked; - plt_wmb(); + plt_atomic_store_explicit(&mdev->msgs_acked, msgs_acked, plt_memory_order_release); } /* Copies the message received from AF and sends it to VF */ @@ -920,12 +918,59 @@ process_msgs_up(struct dev *dev, struct mbox *mbox) } } +/* IRQ to VF from PF - VF context (interrupt thread) */ +static void +roc_pf_vf_mbox_irq_cn20k(void *param) +{ + struct dev *dev = param; + uint64_t intr; + + intr = plt_read64(dev->mbox_reg_base + RVU_VF_INT); + if (intr == 0) + plt_base_dbg("Proceeding to check mbox UP messages if any"); + + plt_write64(intr, dev->mbox_reg_base + RVU_VF_INT); + plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf); + + /* If interrupt occurred for down message */ + if (intr & BIT_ULL(1)) + /* First process all configuration messages */ + process_msgs(dev, dev->mbox); + + /* If interrupt occurred for UP message */ + if (intr & BIT_ULL(0)) + process_msgs_up(dev, &dev->mbox_up); +} + +/* IRQ to PF from AF - PF context (interrupt thread) */ +static void +roc_af_pf_mbox_irq_cn20k(void *param) +{ + struct dev *dev = param; + uint64_t intr; + + intr = plt_read64(dev->mbox_reg_base + RVU_PF_INT); + if (intr == 0) + plt_base_dbg("Proceeding to check mbox UP messages if any"); + + plt_write64(intr, dev->mbox_reg_base + RVU_PF_INT); + plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf); + + /* If interrupt occurred for down message */ + if (intr & BIT_ULL(1)) + process_msgs(dev, dev->mbox); + + /* If interrupt occurred for up message */ + if (intr & BIT_ULL(0)) + process_msgs_up(dev, &dev->mbox_up); +} + /* IRQ to VF from PF - VF context (interrupt thread) */ static void roc_pf_vf_mbox_irq(void *param) { struct dev *dev = param; - uint64_t mbox_data; + uint64_t mbox_data = 0; uint64_t intr; intr = plt_read64(dev->mbox_reg_base + RVU_VF_INT); @@ -940,7 +985,7 @@ roc_pf_vf_mbox_irq(void *param) */ mbox_data = plt_read64(dev->mbox_reg_base + RVU_VF_VFPF_MBOX0); /* If interrupt occurred for down message */ - if (mbox_data & MBOX_DOWN_MSG || intr & BIT_ULL(1)) { + if (mbox_data & MBOX_DOWN_MSG) { mbox_data &= ~MBOX_DOWN_MSG; plt_write64(mbox_data, dev->mbox_reg_base + RVU_VF_VFPF_MBOX0); @@ -948,7 +993,7 @@ roc_pf_vf_mbox_irq(void *param) process_msgs(dev, dev->mbox); } /* If interrupt occurred for UP message */ - if (mbox_data & MBOX_UP_MSG || intr & BIT_ULL(0)) { + if (mbox_data & MBOX_UP_MSG) { mbox_data &= ~MBOX_UP_MSG; plt_write64(mbox_data, dev->mbox_reg_base + RVU_VF_VFPF_MBOX0); @@ -962,7 +1007,7 @@ static void roc_af_pf_mbox_irq(void *param) { struct dev *dev = param; - uint64_t mbox_data; + uint64_t mbox_data = 0; uint64_t intr; intr = plt_read64(dev->mbox_reg_base + RVU_PF_INT); @@ -977,7 +1022,7 @@ roc_af_pf_mbox_irq(void *param) */ mbox_data = plt_read64(dev->mbox_reg_base + RVU_PF_PFAF_MBOX0); /* If interrupt occurred for down message */ - if (mbox_data & MBOX_DOWN_MSG || intr & BIT_ULL(1)) { + if (mbox_data & MBOX_DOWN_MSG) { mbox_data &= ~MBOX_DOWN_MSG; plt_write64(mbox_data, dev->mbox_reg_base + RVU_PF_PFAF_MBOX0); @@ -985,7 +1030,7 @@ roc_af_pf_mbox_irq(void *param) process_msgs(dev, dev->mbox); } /* If interrupt occurred for up message */ - if (mbox_data & MBOX_UP_MSG || intr & BIT_ULL(0)) { + if (mbox_data & MBOX_UP_MSG) { mbox_data &= ~MBOX_UP_MSG; plt_write64(mbox_data, dev->mbox_reg_base + RVU_PF_PFAF_MBOX0); @@ -1045,7 +1090,8 @@ mbox_register_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev) } } /* MBOX interrupt AF <-> PF */ - rc = dev_irq_register(intr_handle, roc_af_pf_mbox_irq, dev, dev->mbox_plat->pfaf_vec); + rc = dev_irq_register(intr_handle, dev->mbox_plat->ops->af_pf_mbox_irq, dev, + dev->mbox_plat->pfaf_vec); if (rc) { plt_err("Fail to register AF<->PF mbox irq"); return rc; @@ -1073,7 +1119,8 @@ mbox_register_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev) plt_write64(~0ull, dev->mbox_reg_base + RVU_VF_INT_ENA_W1C); /* MBOX interrupt PF <-> VF */ - rc = dev_irq_register(intr_handle, roc_pf_vf_mbox_irq, dev, RVU_VF_INT_VEC_MBOX); + rc = dev_irq_register(intr_handle, dev->mbox_plat->ops->pf_vf_mbox_irq, dev, + RVU_VF_INT_VEC_MBOX); if (rc) { plt_err("Fail to register PF<->VF mbox irq"); return rc; @@ -1127,7 +1174,8 @@ mbox_unregister_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev) } /* MBOX interrupt AF <-> PF */ - dev_irq_unregister(intr_handle, roc_af_pf_mbox_irq, dev, dev->mbox_plat->pfaf_vec); + dev_irq_unregister(intr_handle, dev->mbox_plat->ops->af_pf_mbox_irq, dev, + dev->mbox_plat->pfaf_vec); } static void @@ -1139,7 +1187,8 @@ mbox_unregister_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev) plt_write64(~0ull, dev->mbox_reg_base + RVU_VF_INT_ENA_W1C); /* Unregister the interrupt handler */ - dev_irq_unregister(intr_handle, roc_pf_vf_mbox_irq, dev, RVU_VF_INT_VEC_MBOX); + dev_irq_unregister(intr_handle, dev->mbox_plat->ops->pf_vf_mbox_irq, dev, + RVU_VF_INT_VEC_MBOX); } void @@ -1599,10 +1648,17 @@ dev_cache_line_size_valid(void) return true; } -static void +static int mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t bar4, bool is_vf) { - int i; + int i, rc = 0; + + /* Allocate memory for device ops */ + mbox_plat->ops = plt_zmalloc(sizeof(struct mbox_ops), 0); + if (mbox_plat->ops == NULL) { + rc = -ENOMEM; + goto fail; + } if (roc_model_is_cn20k()) { /* For CN20K, AF allocates mbox memory in DRAM and writes PF @@ -1613,6 +1669,9 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t mbox_plat->mbox_region_base = bar2 + (RVU_PFX_FUNC_PFAF_MBOX + ((uint64_t)RVU_BLOCK_ADDR_MBOX << RVU_FUNC_BLKADDR_SHIFT)); + /* Mbox operations */ + mbox_plat->ops->af_pf_mbox_irq = roc_af_pf_mbox_irq_cn20k; + mbox_plat->ops->pf_vf_mbox_irq = roc_pf_vf_mbox_irq_cn20k; /* Interrupt vectors */ mbox_plat->pfaf_vec = RVU_MBOX_PF_INT_VEC_AFPF_MBOX; mbox_plat->pfvf_mbox0_vec = RVU_MBOX_PF_INT_VEC_VFPF_MBOX0; @@ -1630,6 +1689,9 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t } else { mbox_plat->mbox_reg_base = bar2; mbox_plat->mbox_region_base = bar4; + /* Mbox operations */ + mbox_plat->ops->af_pf_mbox_irq = roc_af_pf_mbox_irq; + mbox_plat->ops->pf_vf_mbox_irq = roc_pf_vf_mbox_irq; mbox_plat->pfaf_vec = RVU_PF_INT_VEC_AFPF_MBOX; mbox_plat->pfvf_mbox0_vec = RVU_PF_INT_VEC_VFPF_MBOX0; mbox_plat->pfvf_mbox1_vec = RVU_PF_INT_VEC_VFPF_MBOX1; @@ -1647,6 +1709,8 @@ mbox_platform_changes(struct mbox_platform *mbox_plat, uintptr_t bar2, uintptr_t if (roc_model_is_cn10k()) mbox_plat->mbox_region_base = bar2 + RVU_VF_MBOX_REGION; } +fail: + return rc; } int @@ -1678,7 +1742,12 @@ dev_init(struct dev *dev, struct plt_pci_device *pci_dev) rc = -ENOMEM; goto fail; } - mbox_platform_changes(dev->mbox_plat, bar2, bar4, is_vf); + + if (mbox_platform_changes(dev->mbox_plat, bar2, bar4, is_vf)) { + plt_err("Failed to populate platform specific changes"); + rc = -ENOMEM; + goto mbox_plat_free; + } mbox_reg_base = dev->mbox_plat->mbox_reg_base; mbox_region_base = dev->mbox_plat->mbox_region_base; @@ -1824,6 +1893,8 @@ dev_init(struct dev *dev, struct plt_pci_device *pci_dev) mbox_fini(dev->mbox); mbox_fini(&dev->mbox_up); error: + plt_free(dev->mbox_plat->ops); +mbox_plat_free: plt_free(dev->mbox_plat); fail: return rc; @@ -1883,6 +1954,7 @@ dev_fini(struct dev *dev, struct plt_pci_device *pci_dev) mbox_fini(mbox); dev->mbox_active = 0; + plt_free(dev->mbox_plat->ops); plt_free(dev->mbox_plat); /* Disable MSIX vectors */ dev_irqs_disable(intr_handle); diff --git a/drivers/common/cnxk/roc_dev_priv.h b/drivers/common/cnxk/roc_dev_priv.h index c766183196..a8e40fa9d5 100644 --- a/drivers/common/cnxk/roc_dev_priv.h +++ b/drivers/common/cnxk/roc_dev_priv.h @@ -106,6 +106,15 @@ struct mbox_sync { pthread_mutex_t mutex; }; +/* AF PF VF mbox interrupt callbacks */ +typedef void (*af_pf_mbox_irq_t)(void *param); +typedef void (*pf_vf_mbox_irq_t)(void *param); + +struct mbox_ops { + af_pf_mbox_irq_t af_pf_mbox_irq; + pf_vf_mbox_irq_t pf_vf_mbox_irq; +}; + struct mbox_platform { uint8_t pfaf_vec; uint8_t pfvf_mbox0_vec; @@ -120,6 +129,8 @@ struct mbox_platform { uint64_t pfvf1_mbox_int_ena_w1c[MAX_VFPF_DWORD_BITS]; uintptr_t mbox_reg_base; uintptr_t mbox_region_base; + /* Mbox operations */ + struct mbox_ops *ops; }; struct dev { diff --git a/drivers/common/cnxk/roc_mbox.c b/drivers/common/cnxk/roc_mbox.c index db77babfdb..a3b96e6e2b 100644 --- a/drivers/common/cnxk/roc_mbox.c +++ b/drivers/common/cnxk/roc_mbox.c @@ -275,7 +275,7 @@ mbox_msg_send_data(struct mbox *mbox, int devid, uint8_t data) tx_hdr->msg_size = mdev->msg_size; mdev->msg_size = 0; mdev->rsp_size = 0; - mdev->msgs_acked = 0; + plt_atomic_store_explicit(&mdev->msgs_acked, 0, plt_memory_order_release); /* num_msgs != 0 signals to the peer that the buffer has a number of * messages. So this should be written after copying txmem @@ -417,7 +417,8 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo) * mdev->msgs_acked are incremented at process_msgs() in interrupt * thread context. */ - while (mdev->num_msgs > mdev->msgs_acked) { + while (mdev->num_msgs > plt_atomic_load_explicit(&mdev->msgs_acked, + plt_memory_order_acquire)) { plt_delay_us(sleep); timeout += sleep; if (timeout >= rst_timo) { @@ -433,13 +434,12 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo) "(tx/rx num_msgs: %d/%d), msg_size: %d, " "rsp_size: %d", devid, timeout, mdev->num_msgs, - mdev->msgs_acked, tx_hdr->num_msgs, - rx_hdr->num_msgs, mdev->msg_size, - mdev->rsp_size); + plt_atomic_load_explicit(&mdev->msgs_acked, + plt_memory_order_acquire), + tx_hdr->num_msgs, rx_hdr->num_msgs, mdev->msg_size, mdev->rsp_size); return -EIO; } - plt_rmb(); } return 0; } diff --git a/drivers/common/cnxk/roc_platform.h b/drivers/common/cnxk/roc_platform.h index df4f88f288..4076dbe94d 100644 --- a/drivers/common/cnxk/roc_platform.h +++ b/drivers/common/cnxk/roc_platform.h @@ -211,6 +211,11 @@ plt_thread_is_valid(plt_thread_t thr) #define plt_io_rmb() rte_io_rmb() #define plt_atomic_thread_fence rte_atomic_thread_fence +#define plt_atomic_store_explicit rte_atomic_store_explicit +#define plt_atomic_load_explicit rte_atomic_load_explicit +#define plt_memory_order_release rte_memory_order_release +#define plt_memory_order_acquire rte_memory_order_acquire + #define plt_bit_relaxed_get32 rte_bit_relaxed_get32 #define plt_bit_relaxed_set32 rte_bit_relaxed_set32 #define plt_bit_relaxed_clear32 rte_bit_relaxed_clear32 -- 2.25.1 ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 2/2] common/cnxk: add mailbox debug trace 2024-11-14 9:31 ` [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra @ 2024-11-14 9:31 ` Harman Kalra 2025-01-22 12:36 ` [EXTERNAL] [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Jerin Jacob 1 sibling, 0 replies; 7+ messages in thread From: Harman Kalra @ 2024-11-14 9:31 UTC (permalink / raw) To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao, Harman Kalra Cc: dev Implementing mailbox debug trace points to trace function calls. Signed-off-by: Harman Kalra <hkalra@marvell.com> --- V2: * Fixed line length exceeding 100 columns drivers/common/cnxk/roc_dev.c | 87 ++++++++++++++++-------------- drivers/common/cnxk/roc_mbox.c | 34 ++++++++++++ drivers/common/cnxk/roc_mbox.h | 2 + drivers/common/cnxk/roc_platform.c | 9 ++++ drivers/common/cnxk/roc_platform.h | 87 ++++++++++++++++++++++++++++-- 5 files changed, 175 insertions(+), 44 deletions(-) diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c index 0d1f9c4564..2dc437c491 100644 --- a/drivers/common/cnxk/roc_dev.c +++ b/drivers/common/cnxk/roc_dev.c @@ -26,7 +26,6 @@ /* RVU PF interrupt status as received from AF*/ #define RVU_PF_INTR_STATUS 0x3 - static void * mbox_mem_map(off_t off, size_t size) { @@ -83,6 +82,7 @@ pf_af_sync_msg(struct dev *dev, struct mbox_msghdr **rsp) timeout += sleep; if (timeout >= mbox->rsp_tmo) { plt_err("Message timeout: %dms", mbox->rsp_tmo); + roc_trace_mbox_error("AFPF_SYNC", "RSP_TMO", -EIO); rc = -EIO; break; } @@ -137,6 +137,7 @@ af_pf_wait_msg(struct dev *dev, uint16_t vf, int num_msg) timeout++; if (timeout >= mbox->rsp_tmo) { plt_err("Routed messages %d timeout: %dms", num_msg, mbox->rsp_tmo); + roc_trace_mbox_error("AFPF_WAIT", "RTD_MSG_TMO", -EIO); break; } int_status = plt_read64(dev->mbox_reg_base + RVU_PF_INT); @@ -149,16 +150,18 @@ af_pf_wait_msg(struct dev *dev, uint16_t vf, int num_msg) plt_write64(~0ull, dev->mbox_reg_base + RVU_PF_INT_ENA_W1S); req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start); - if (req_hdr->num_msgs != num_msg) - plt_err("Routed messages: %d received: %d", num_msg, - req_hdr->num_msgs); + if (req_hdr->num_msgs != num_msg) { + plt_err("Routed messages: %d received: %d", num_msg, req_hdr->num_msgs); + roc_trace_mbox_error("AFPF_WAIT", "RTD_INV_CNT", num_msg); + } /* Get messages from mbox */ - offset = mbox->rx_start + - PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + offset = mbox->rx_start + PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); for (i = 0; i < req_hdr->num_msgs; i++) { msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset); size = mbox->rx_start + msg->next_msgoff - offset; + roc_trace_mbox_process("AFPF_WAIT", mbox_id2name(msg->id), req_hdr->num_msgs, + msg->pcifunc); /* Reserve PF/VF mbox message */ size = PLT_ALIGN(size, MBOX_MSG_ALIGN); @@ -288,6 +291,8 @@ vf_pf_process_msgs(struct dev *dev, uint16_t vf) /* RVU_PF_FUNC_S */ msg->pcifunc = dev_pf_func(dev->pf, vf); + roc_trace_mbox_process("VFPF_MSG", mbox_id2name(msg->id), req_hdr->num_msgs, + msg->pcifunc); if (msg->id == MBOX_MSG_READY) { struct ready_msg_rsp *rsp; @@ -333,8 +338,7 @@ vf_pf_process_msgs(struct dev *dev, uint16_t vf) } if (routed > 0) { - plt_base_dbg("pf:%d routed %d messages from vf:%d to AF", - dev->pf, routed, vf); + roc_trace_mbox_vf_pf_handle("RTG_AF", dev_pf_func(dev->pf, vf), routed); /* PF will send the messages to AF and wait for responses */ af_pf_wait_msg(dev, vf, routed); mbox_reset(dev->mbox, 0); @@ -343,8 +347,7 @@ vf_pf_process_msgs(struct dev *dev, uint16_t vf) /* Send mbox responses to VF */ if (mdev->num_msgs) { - plt_base_dbg("pf:%d reply %d messages to vf:%d", dev->pf, - mdev->num_msgs, vf); + roc_trace_mbox_vf_pf_handle("RSP_VF", dev_pf_func(dev->pf, vf), mdev->num_msgs); mbox_msg_send(mbox, vf); } @@ -378,25 +381,24 @@ vf_pf_process_up_msgs(struct dev *dev, uint16_t vf) switch (msg->id) { case MBOX_MSG_CGX_LINK_EVENT: - plt_base_dbg("PF: Msg 0x%x (%s) fn:0x%x (pf:%d,vf:%d)", - msg->id, mbox_id2name(msg->id), - msg->pcifunc, dev_get_pf(msg->pcifunc), - dev_get_vf(msg->pcifunc)); + roc_trace_mbox_process("VFPF_UPMSG_CGX", mbox_id2name(msg->id), + req_hdr->num_msgs, msg->pcifunc); break; case MBOX_MSG_CGX_PTP_RX_INFO: - plt_base_dbg("PF: Msg 0x%x (%s) fn:0x%x (pf:%d,vf:%d)", - msg->id, mbox_id2name(msg->id), - msg->pcifunc, dev_get_pf(msg->pcifunc), - dev_get_vf(msg->pcifunc)); + roc_trace_mbox_process("VFPF_UPMSG_PTP", mbox_id2name(msg->id), + req_hdr->num_msgs, msg->pcifunc); break; default: if (roc_rvu_lf_msg_id_range_check(dev->roc_rvu_lf, msg->id)) plt_base_dbg("PF: Msg 0x%x fn:0x%x (pf:%d,vf:%d)", msg->id, msg->pcifunc, dev_get_pf(msg->pcifunc), dev_get_vf(msg->pcifunc)); - else - plt_err("Not handled UP msg 0x%x (%s) func:0x%x", - msg->id, mbox_id2name(msg->id), msg->pcifunc); + else { + roc_trace_mbox_process("UNHND_UPMSG", mbox_id2name(msg->id), + req_hdr->num_msgs, msg->pcifunc); + plt_err("Not handled UP msg 0x%x (%s) func:0x%x", msg->id, + mbox_id2name(msg->id), msg->pcifunc); + } } offset = mbox->rx_start + msg->next_msgoff; } @@ -446,8 +448,7 @@ roc_vf_pf_mbox_irq(void *param) if (!intr) continue; - plt_base_dbg("vfpf: %d intr: 0x%" PRIx64 " (pf:%d, vf:%d)", vfpf, intr, dev->pf, - dev->vf); + roc_trace_mbox_interrupt("VFPF_IRQ", dev_pf_func(dev->pf, dev->vf), intr, 0); /* Save and clear intr bits */ intrb.bits[vfpf] |= intr; @@ -489,9 +490,8 @@ process_msgs(struct dev *dev, struct mbox *mbox) msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset); msgs_acked++; - plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id, - mbox_id2name(msg->id), dev_get_pf(msg->pcifunc), - dev_get_vf(msg->pcifunc)); + roc_trace_mbox_process("PRC_MSG", mbox_id2name(msg->id), req_hdr->num_msgs, + msg->pcifunc); switch (msg->id) { /* Add message id's that are handled here */ @@ -514,6 +514,8 @@ process_msgs(struct dev *dev, struct mbox *mbox) } else { plt_err("Message (%s) response has err=%d", mbox_id2name(msg->id), msg->rc); + roc_trace_mbox_error("PRC_MSG_ERR", mbox_id2name(msg->id), + msg->rc); } } break; @@ -526,14 +528,19 @@ process_msgs(struct dev *dev, struct mbox *mbox) } else { plt_err("Message (%s) response has err=%d", mbox_id2name(msg->id), msg->rc); + roc_trace_mbox_error("PRC_MSG_ERR", mbox_id2name(msg->id), + msg->rc); } } break; default: - if (msg->rc) + if (msg->rc) { plt_err("Message (%s) response has err=%d (%s)", mbox_id2name(msg->id), msg->rc, roc_error_msg_get(msg->rc)); + roc_trace_mbox_error("PRC_MSG_INV_RSP", mbox_id2name(msg->id), + msg->rc); + } break; } offset = mbox->rx_start + msg->next_msgoff; @@ -564,8 +571,8 @@ pf_vf_mbox_send_up_msg(struct dev *dev, void *rec_msg) if (!(dev->active_vfs[vf / max_bits] & (BIT_ULL(vf)))) continue; - plt_base_dbg("(%s) size: %zx to VF: %d", - mbox_id2name(msg->hdr.id), size, vf); + roc_trace_mbox_process("PFVF_SND_UPMSG", mbox_id2name(msg->hdr.id), 1, + (uint16_t)dev_pf_func(dev->pf, vf)); /* Reserve PF/VF mbox message */ vf_msg = mbox_alloc_msg(vf_mbox, vf, size); @@ -895,9 +902,10 @@ process_msgs_up(struct dev *dev, struct mbox *mbox) for (i = 0; i < req_hdr->num_msgs; i++) { msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset); - plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id, - mbox_id2name(msg->id), dev_get_pf(msg->pcifunc), - dev_get_vf(msg->pcifunc)); + plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id, mbox_id2name(msg->id), + dev_get_pf(msg->pcifunc), dev_get_vf(msg->pcifunc)); + roc_trace_mbox_process("PRC_UPMSG", mbox_id2name(msg->id), req_hdr->num_msgs, + msg->pcifunc); if (roc_rvu_lf_msg_id_range_check(dev->roc_rvu_lf, msg->id)) { size = mbox->rx_start + msg->next_msgoff - offset; err = process_rvu_lf_msgs_up(dev, msg, size); @@ -905,9 +913,11 @@ process_msgs_up(struct dev *dev, struct mbox *mbox) plt_err("Error %d handling 0x%x RVU_LF up msg", err, msg->id); } else { err = mbox_process_msgs_up(dev, msg); - if (err) + if (err) { + roc_trace_mbox_error("PRC_UPMSG_ERR", mbox_id2name(msg->id), err); plt_err("Error %d handling 0x%x (%s)", err, msg->id, mbox_id2name(msg->id)); + } } offset = mbox->rx_start + msg->next_msgoff; } @@ -930,7 +940,7 @@ roc_pf_vf_mbox_irq_cn20k(void *param) plt_base_dbg("Proceeding to check mbox UP messages if any"); plt_write64(intr, dev->mbox_reg_base + RVU_VF_INT); - plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf); + roc_trace_mbox_interrupt("PFVF_IRQ_20K", dev_pf_func(dev->pf, dev->vf), intr, 0); /* If interrupt occurred for down message */ if (intr & BIT_ULL(1)) @@ -954,7 +964,7 @@ roc_af_pf_mbox_irq_cn20k(void *param) plt_base_dbg("Proceeding to check mbox UP messages if any"); plt_write64(intr, dev->mbox_reg_base + RVU_PF_INT); - plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf); + roc_trace_mbox_interrupt("AFPF_IRQ_20K", dev_pf_func(dev->pf, dev->vf), intr, 0); /* If interrupt occurred for down message */ if (intr & BIT_ULL(1)) @@ -978,12 +988,12 @@ roc_pf_vf_mbox_irq(void *param) plt_base_dbg("Proceeding to check mbox UP messages if any"); plt_write64(intr, dev->mbox_reg_base + RVU_VF_INT); - plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf); /* Reading for UP/DOWN message, next message sending will be delayed * by 1ms until this region is zeroed mbox_wait_for_zero() */ mbox_data = plt_read64(dev->mbox_reg_base + RVU_VF_VFPF_MBOX0); + roc_trace_mbox_interrupt("PFVF_IRQ", dev_pf_func(dev->pf, dev->vf), intr, mbox_data); /* If interrupt occurred for down message */ if (mbox_data & MBOX_DOWN_MSG) { mbox_data &= ~MBOX_DOWN_MSG; @@ -1015,12 +1025,12 @@ roc_af_pf_mbox_irq(void *param) plt_base_dbg("Proceeding to check mbox UP messages if any"); plt_write64(intr, dev->mbox_reg_base + RVU_PF_INT); - plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf); /* Reading for UP/DOWN message, next message sending will be delayed * by 1ms until this region is zeroed mbox_wait_for_zero() */ mbox_data = plt_read64(dev->mbox_reg_base + RVU_PF_PFAF_MBOX0); + roc_trace_mbox_interrupt("AFPF_IRQ", dev_pf_func(dev->pf, dev->vf), intr, mbox_data); /* If interrupt occurred for down message */ if (mbox_data & MBOX_DOWN_MSG) { mbox_data &= ~MBOX_DOWN_MSG; @@ -1327,8 +1337,7 @@ vf_flr_handle_msg(void *param, dev_intr_t *flr) for (vf = 0; vf < max_vf; vf++) { if (flr->bits[vf / max_bits] & BIT_ULL(vf % max_bits)) { - plt_base_dbg("Process FLR vf:%d request (pf:%d, vf:%d)", - vf, dev->pf, dev->vf); + roc_trace_mbox_vf_flr("PRC_VFFLR", vf, dev_pf_func(dev->pf, dev->vf)); /* Inform AF about VF reset */ vf_flr_send_msg(dev, vf); flr->bits[vf / max_bits] &= ~(BIT_ULL(vf % max_bits)); diff --git a/drivers/common/cnxk/roc_mbox.c b/drivers/common/cnxk/roc_mbox.c index a3b96e6e2b..c07202b964 100644 --- a/drivers/common/cnxk/roc_mbox.c +++ b/drivers/common/cnxk/roc_mbox.c @@ -263,6 +263,34 @@ mbox_wait_for_zero(struct mbox *mbox, int devid) return data == 0; } +static inline void +mbox_debug_region(const char *ops, struct mbox *mbox, int devid, uint64_t intr_val, int rc) +{ + struct mbox_dev *mdev = &mbox->dev[devid]; + struct mbox_msghdr *msg_rx, *msg_tx; + uint64_t offset_rx, offset_tx; + char buf[BUFSIZ]; + + if (!plt_trace_point_is_enabled(&__cnxk_trace_mbox_region)) + return; + + /* Get messages from mbox */ + offset_tx = mbox->tx_start + PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + + msg_tx = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset_tx); + snprintf(buf, BUFSIZ, "%s_TX", ops); + /* Only while sending message choose a rand identifier for the msg */ + if (intr_val) + msg_tx->cookie = rand(); + roc_trace_mbox_region(buf, mbox_id2name(msg_tx->id), msg_tx->pcifunc, (int)intr_val, + msg_tx->cookie); + + offset_rx = mbox->rx_start + PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + msg_rx = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset_rx); + snprintf(buf, BUFSIZ, "%s_RX", ops); + roc_trace_mbox_region(buf, mbox_id2name(msg_rx->id), msg_rx->pcifunc, rc, msg_rx->cookie); +} + static void mbox_msg_send_data(struct mbox *mbox, int devid, uint8_t data) { @@ -291,6 +319,8 @@ mbox_msg_send_data(struct mbox *mbox, int devid, uint8_t data) (volatile void *)(mbox->reg_base + (mbox->trigger | (devid << mbox->tr_shift)))); intr_val |= (uint64_t)data; + + mbox_debug_region("SND_MSG", mbox, devid, intr_val, 0); /* The interrupt should be fired after num_msgs is written * to the shared memory */ @@ -342,6 +372,8 @@ mbox_get_rsp(struct mbox *mbox, int devid, void **msg) if (msg != NULL) *msg = msghdr; + mbox_debug_region("GET_RSP", mbox, devid, 0, msghdr->rc); + return msghdr->rc; } @@ -438,6 +470,8 @@ mbox_wait(struct mbox *mbox, int devid, uint32_t rst_timo) plt_memory_order_acquire), tx_hdr->num_msgs, rx_hdr->num_msgs, mdev->msg_size, mdev->rsp_size); + mbox_debug_region("MBX_TMO", mbox, devid, 0, -EIO); + return -EIO; } } diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h index e7a916d61c..3ca8442b55 100644 --- a/drivers/common/cnxk/roc_mbox.h +++ b/drivers/common/cnxk/roc_mbox.h @@ -32,6 +32,8 @@ struct mbox_msghdr { uint16_t __io ver; /* Offset of next msg within mailbox region */ uint16_t __io next_msgoff; + /* Unique ID for trace debug */ + uint16_t __io cookie; int __io rc; /* Msg processed response code */ }; diff --git a/drivers/common/cnxk/roc_platform.c b/drivers/common/cnxk/roc_platform.c index f1e0a93d97..f7678280f5 100644 --- a/drivers/common/cnxk/roc_platform.c +++ b/drivers/common/cnxk/roc_platform.c @@ -3,6 +3,7 @@ */ #include <rte_log.h> +#include <rte_trace_point_register.h> #include "roc_api.h" @@ -85,6 +86,14 @@ roc_plt_init(void) return 0; } +/* ROC trace points */ +RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_region, cnxk.mbox.region) +RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_process, cnxk.mbox.process) +RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_interrupt, cnxk.mbox.interrupt) +RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_vf_flr, cnxk.mbox.vf.flr) +RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_vf_pf_handle, cnxk.mbox.vf.pf.handle) +RTE_TRACE_POINT_REGISTER(cnxk_trace_mbox_error, cnxk.mbox.error) + RTE_LOG_REGISTER_SUFFIX(cnxk_logtype_base, base, INFO); RTE_LOG_REGISTER_SUFFIX(cnxk_logtype_mbox, mbox, NOTICE); RTE_LOG_REGISTER_SUFFIX(cnxk_logtype_cpt, crypto, NOTICE); diff --git a/drivers/common/cnxk/roc_platform.h b/drivers/common/cnxk/roc_platform.h index 4076dbe94d..d62d1db8d1 100644 --- a/drivers/common/cnxk/roc_platform.h +++ b/drivers/common/cnxk/roc_platform.h @@ -5,12 +5,12 @@ #ifndef _ROC_PLATFORM_H_ #define _ROC_PLATFORM_H_ -#include <rte_compat.h> +#include <bus_pci_driver.h> #include <rte_alarm.h> #include <rte_bitmap.h> -#include <bus_pci_driver.h> #include <rte_byteorder.h> #include <rte_common.h> +#include <rte_compat.h> #include <rte_cycles.h> #include <rte_ether.h> #include <rte_interrupts.h> @@ -25,6 +25,7 @@ #include <rte_string_fns.h> #include <rte_tailq.h> #include <rte_telemetry.h> +#include <rte_trace_point.h> #include "eal_filesystem.h" @@ -265,6 +266,9 @@ plt_thread_is_valid(plt_thread_t thr) #define plt_tel_data_add_dict_u64 rte_tel_data_add_dict_uint #define plt_telemetry_register_cmd rte_telemetry_register_cmd +/* Trace */ +#define plt_trace_point_is_enabled rte_trace_point_is_enabled + /* Log */ extern int cnxk_logtype_base; #define RTE_LOGTYPE_base cnxk_logtype_base @@ -379,9 +383,82 @@ plt_lmt_region_reserve_aligned(const char *name, size_t len, uint32_t align) /* To ensure returned memory is physically contiguous, bounding * the start and end address in 2M range. */ - return rte_memzone_reserve_bounded(name, len, SOCKET_ID_ANY, - RTE_MEMZONE_IOVA_CONTIG, - align, RTE_PGSIZE_2M); + return rte_memzone_reserve_bounded(name, len, SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, align, + RTE_PGSIZE_2M); +} + +/* ROC trace points */ +RTE_TRACE_POINT(cnxk_trace_mbox_region, + RTE_TRACE_POINT_ARGS(const char *func, const char *msg, uint16_t pcifunc, int data, + uint16_t cookie), + rte_trace_point_emit_string(func); + rte_trace_point_emit_string(msg); rte_trace_point_emit_u16(pcifunc); + rte_trace_point_emit_int(data); rte_trace_point_emit_u16(cookie);) + +RTE_TRACE_POINT(cnxk_trace_mbox_process, + RTE_TRACE_POINT_ARGS(const char *func, const char *msg, uint16_t num_msgs, + uint16_t pcifunc), + rte_trace_point_emit_string(func); + rte_trace_point_emit_string(msg); rte_trace_point_emit_u16(num_msgs); + rte_trace_point_emit_u16(pcifunc);) + +RTE_TRACE_POINT(cnxk_trace_mbox_interrupt, + RTE_TRACE_POINT_ARGS(const char *func, int pcifunc, uint64_t intr, + uint64_t mbox_data), + rte_trace_point_emit_string(func); + rte_trace_point_emit_int(pcifunc); rte_trace_point_emit_u64(intr); + rte_trace_point_emit_u64(mbox_data);) + +RTE_TRACE_POINT(cnxk_trace_mbox_vf_flr, + RTE_TRACE_POINT_ARGS(const char *func, uint16_t from_vf, uint16_t pcifunc), + rte_trace_point_emit_string(func); + rte_trace_point_emit_u16(from_vf); rte_trace_point_emit_u16(pcifunc);) + +RTE_TRACE_POINT(cnxk_trace_mbox_vf_pf_handle, + RTE_TRACE_POINT_ARGS(const char *ops, uint16_t pcifunc, int data), + rte_trace_point_emit_string(ops); + rte_trace_point_emit_u16(pcifunc); rte_trace_point_emit_int(data);) + +RTE_TRACE_POINT(cnxk_trace_mbox_error, + RTE_TRACE_POINT_ARGS(const char *func, const char *msg, int data), + rte_trace_point_emit_string(func); + rte_trace_point_emit_string(msg); rte_trace_point_emit_int(data);) + +static inline void +roc_trace_mbox_region(const char *func, const char *msg, uint16_t pcifunc, int data, + uint16_t cookie) +{ + cnxk_trace_mbox_region(func, msg, pcifunc, data, cookie); +} + +static inline void +roc_trace_mbox_process(const char *func, const char *msg, uint16_t num_msgs, uint16_t pcifunc) +{ + cnxk_trace_mbox_process(func, msg, num_msgs, pcifunc); +} + +static inline void +roc_trace_mbox_interrupt(const char *func, int pcifunc, uint64_t intr, uint64_t mbox_data) +{ + cnxk_trace_mbox_interrupt(func, pcifunc, intr, mbox_data); +} + +static inline void +roc_trace_mbox_vf_pf_handle(const char *func, uint16_t pcifunc, int data) +{ + cnxk_trace_mbox_vf_pf_handle(func, pcifunc, data); +} + +static inline void +roc_trace_mbox_error(const char *func, const char *msg, int data) +{ + cnxk_trace_mbox_error(func, msg, data); +} + +static inline void +roc_trace_mbox_vf_flr(const char *func, uint16_t from_vf, uint16_t pcifunc) +{ + cnxk_trace_mbox_vf_flr(func, from_vf, pcifunc); } #endif /* _ROC_PLATFORM_H_ */ -- 2.25.1 ^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [EXTERNAL] [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues 2024-11-14 9:31 ` [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra 2024-11-14 9:31 ` [PATCH v2 2/2] common/cnxk: add mailbox debug trace Harman Kalra @ 2025-01-22 12:36 ` Jerin Jacob 1 sibling, 0 replies; 7+ messages in thread From: Jerin Jacob @ 2025-01-22 12:36 UTC (permalink / raw) To: Harman Kalra, Nithin Kumar Dabilpuram, Kiran Kumar Kokkilagadda, Sunil Kumar Kori, Satha Koteswara Rao Kottidi, Harman Kalra Cc: dev [-- Attachment #1: Type: text/plain, Size: 745 bytes --] > -----Original Message----- > From: Harman Kalra <hkalra@marvell.com> > Sent: Thursday, November 14, 2024 3:02 PM > To: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>; Kiran Kumar > Kokkilagadda <kirankumark@marvell.com>; Sunil Kumar Kori > <skori@marvell.com>; Satha Koteswara Rao Kottidi > <skoteshwar@marvell.com>; Harman Kalra <hkalra@marvell.com> > Cc: dev@dpdk.org > Subject: [EXTERNAL] [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues > > Found couple of reasons causing mailbox timeout: * msgs_acked value by 1)Two issues - please make it as two patches 2)fix https://mails.dpdk.org/archives/test-report/2024-November/825162.html 3)fix https://mails.dpdk.org/archives/test-report/2024-November/825257.html [-- Attachment #2: winmail.dat --] [-- Type: application/ms-tnef, Size: 14930 bytes --] ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2025-01-22 12:37 UTC | newest] Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2024-11-04 13:07 [PATCH 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra 2024-11-04 13:08 ` [PATCH 2/2] common/cnxk: add mailbox debug trace Harman Kalra 2024-11-07 8:04 ` [EXTERNAL] " Jerin Jacob 2024-11-14 16:38 ` Stephen Hemminger 2024-11-14 9:31 ` [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Harman Kalra 2024-11-14 9:31 ` [PATCH v2 2/2] common/cnxk: add mailbox debug trace Harman Kalra 2025-01-22 12:36 ` [EXTERNAL] [PATCH v2 1/2] common/cnxk: fix mailbox timeout issues Jerin Jacob
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).