From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 0CF924365B; Mon, 4 Dec 2023 07:45:54 +0100 (CET) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 90EEE402E0; Mon, 4 Dec 2023 07:45:53 +0100 (CET) Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.11]) by mails.dpdk.org (Postfix) with ESMTP id 56C14402AE for ; Mon, 4 Dec 2023 07:45:51 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1701672352; x=1733208352; h=from:to:cc:subject:date:message-id:mime-version: content-transfer-encoding; bh=dpXdxMouCvPlpMiKr9O6z/8iHzbCrJtb7aPmOof8oVs=; b=HPAtRVssUJ5NfWQLHmH920P6E+8CjjxQQW27NKaAH6T2ZE1Dx7F+UXZg 6id1N3pBKK3hl3bETmnxDcPGOneEBK9tWvmsxIhUwO+YH9UR8hhM3EgQV F8NyPQB9Hth8uEZJDUjtA0kDchfDTHC/Wgb8nFDmFykCm5W4VtCB8evV3 gnKmVjFhRwBf7YMYKg95QuBxNE53CsuqZR45mSIqsxhPSLz0MXBBlsbxL GyN3Q0vsWjABX2BE0kkJ6bLvFJrqGp143WL/vNpexetCr/2aQMy1cHlgl j1T5My7FhHkVB0poDYq9+RM4iHX+HE50NLMlC8tJHOXADkiUKesGRlNWE w==; X-IronPort-AV: E=McAfee;i="6600,9927,10913"; a="540275" X-IronPort-AV: E=Sophos;i="6.04,249,1695711600"; d="scan'208";a="540275" Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmvoesa105.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 03 Dec 2023 22:45:49 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10913"; a="1017737313" X-IronPort-AV: E=Sophos;i="6.04,249,1695711600"; d="scan'208";a="1017737313" Received: from unknown (HELO dpdk..) ([10.239.252.115]) by fmsmga006-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 03 Dec 2023 22:45:47 -0800 From: Shiyang He To: dev@dpdk.org Cc: yidingx.zhou@intel.com, Shiyang He , Jingjing Wu , Beilei Xing Subject: [PATCH] net/iavf: fix vf startup coredump Date: Mon, 4 Dec 2023 14:09:40 +0000 Message-Id: <20231204140940.3166836-1-shiyangx.he@intel.com> X-Mailer: git-send-email 2.34.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org When the vf starts to request multiple queues, the pf sends a reset command to the vf. During the reset process, adminq sends an abnormal message to pf for an unknown reason, and the resource request fails resulting in a coredump. This patch fixes the issue by checking the reset state before resetting and creating a separate thread to handle reset event. Signed-off-by: Shiyang He --- drivers/net/iavf/iavf.h | 4 +- drivers/net/iavf/iavf_ethdev.c | 172 +++++++++++++++++++++++++-------- drivers/net/iavf/iavf_vchnl.c | 13 +-- 3 files changed, 139 insertions(+), 50 deletions(-) diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h index 10868f2c30..e1cb1e0bc3 100644 --- a/drivers/net/iavf/iavf.h +++ b/drivers/net/iavf/iavf.h @@ -19,6 +19,7 @@ #define IAVF_AQ_LEN 32 #define IAVF_AQ_BUF_SZ 4096 #define IAVF_RESET_WAIT_CNT 500 +#define IAVF_RESET_DETECTED_CNT 100 #define IAVF_BUF_SIZE_MIN 1024 #define IAVF_FRAME_SIZE_MAX 9728 #define IAVF_QUEUE_BASE_ADDR_UNIT 128 @@ -32,6 +33,7 @@ #define IAVF_NUM_MACADDR_MAX 64 #define IAVF_DEV_WATCHDOG_PERIOD 2000 /* microseconds, set 0 to disable*/ +#define IAVF_MAX_VF_COUNT 256 #define IAVF_DEFAULT_RX_PTHRESH 8 #define IAVF_DEFAULT_RX_HTHRESH 8 @@ -511,5 +513,5 @@ int iavf_flow_sub_check(struct iavf_adapter *adapter, struct iavf_fsub_conf *filter); void iavf_dev_watchdog_enable(struct iavf_adapter *adapter); void iavf_dev_watchdog_disable(struct iavf_adapter *adapter); -int iavf_handle_hw_reset(struct rte_eth_dev *dev); +int iavf_reset_enqueue(struct rte_eth_dev *dev); #endif /* _IAVF_ETHDEV_H_ */ diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c index d1edb0dd5c..63848f7f6e 100644 --- a/drivers/net/iavf/iavf_ethdev.c +++ b/drivers/net/iavf/iavf_ethdev.c @@ -90,6 +90,23 @@ static struct iavf_proto_xtr_ol iavf_proto_xtr_params[] = { &rte_pmd_ifd_dynflag_proto_xtr_ipsec_crypto_said_mask }, }; +/* struct holding pending PF-to-VF reset */ +struct iavf_reset_info { + pthread_mutex_t lock; + rte_thread_t reset_tid; + uint32_t pending; + uint32_t head; + uint32_t tail; + struct rte_eth_dev *dev[IAVF_MAX_VF_COUNT]; +}; + +static struct iavf_reset_info reset_info = { + .lock = PTHREAD_MUTEX_INITIALIZER, + .head = 0, + .tail = 0, + .pending = 0, +}; + static int iavf_dev_configure(struct rte_eth_dev *dev); static int iavf_dev_start(struct rte_eth_dev *dev); static int iavf_dev_stop(struct rte_eth_dev *dev); @@ -310,8 +327,10 @@ iavf_dev_watchdog(void *cb_arg) adapter->vf.vf_reset = true; adapter->vf.link_up = false; - iavf_dev_event_post(adapter->vf.eth_dev, RTE_ETH_EVENT_INTR_RESET, - NULL, 0); + adapter->devargs.auto_reset ? + iavf_reset_enqueue(adapter->vf.eth_dev) : + iavf_dev_event_post(adapter->vf.eth_dev, + RTE_ETH_EVENT_INTR_RESET, NULL, 0); } } @@ -1086,9 +1105,6 @@ iavf_dev_stop(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); - if (vf->vf_reset) - return 0; - if (adapter->closed) return -1; @@ -1986,14 +2002,11 @@ iavf_check_vf_reset_done(struct iavf_hw *hw) reset = reset >> IAVF_VFGEN_RSTAT_VFR_STATE_SHIFT; if (reset == VIRTCHNL_VFR_VFACTIVE || reset == VIRTCHNL_VFR_COMPLETED) - break; + return 0; rte_delay_ms(20); } - if (i >= IAVF_RESET_WAIT_CNT) - return -1; - - return 0; + return -1; } static int @@ -2928,15 +2941,12 @@ iavf_dev_close(struct rte_eth_dev *dev) static int iavf_dev_uninit(struct rte_eth_dev *dev) { - struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); - if (rte_eal_process_type() != RTE_PROC_PRIMARY) return -EPERM; iavf_dev_close(dev); - if (!vf->in_reset_recovery) - iavf_dev_event_handler_fini(); + iavf_dev_event_handler_fini(); return 0; } @@ -2949,7 +2959,6 @@ iavf_dev_reset(struct rte_eth_dev *dev) { int ret; struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); /* * Check whether the VF reset has been done and inform application, @@ -2961,7 +2970,6 @@ iavf_dev_reset(struct rte_eth_dev *dev) PMD_DRV_LOG(ERR, "Wait too long for reset done!\n"); return ret; } - vf->vf_reset = false; PMD_DRV_LOG(DEBUG, "Start dev_reset ...\n"); ret = iavf_dev_uninit(dev); @@ -2971,41 +2979,127 @@ iavf_dev_reset(struct rte_eth_dev *dev) return iavf_dev_init(dev); } +static struct rte_eth_dev * +iavf_reset_dequeue(void) +{ + struct rte_eth_dev *dev; + + pthread_mutex_lock(&reset_info.lock); + if (reset_info.head == reset_info.tail) { + pthread_mutex_unlock(&reset_info.lock); + return NULL; + } + dev = reset_info.dev[reset_info.head]; + reset_info.head = (reset_info.head + 1) % IAVF_MAX_VF_COUNT; + reset_info.pending--; + pthread_mutex_unlock(&reset_info.lock); + + return dev; +} + +static inline bool +iavf_is_reset(struct iavf_hw *hw) +{ + return !(IAVF_READ_REG(hw, IAVF_VF_ARQLEN1) & + IAVF_VF_ARQLEN1_ARQENABLE_MASK); +} + +static bool +iavf_is_reset_detected(struct iavf_adapter *adapter) +{ + struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(adapter); + int i; + + /* poll until we see the reset actually happen */ + for (i = 0; i < IAVF_RESET_DETECTED_CNT; i++) { + if (iavf_is_reset(hw)) + return true; + rte_delay_ms(20); + } + + return false; +} + /* * Handle hardware reset */ -int -iavf_handle_hw_reset(struct rte_eth_dev *dev) +static uint32_t +iavf_hw_reset_handler(void *arg __rte_unused) { - struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); - int ret; + struct rte_eth_dev *dev; + int ret, i; - vf->in_reset_recovery = true; + for (dev = iavf_reset_dequeue(); dev != NULL; dev = iavf_reset_dequeue()) { + struct iavf_adapter *adapter = dev->data->dev_private; + struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); - ret = iavf_dev_reset(dev); - if (ret) - goto error; + if (!iavf_is_reset_detected(adapter)) + return 0; - /* VF states restore */ - ret = iavf_dev_configure(dev); - if (ret) - goto error; + vf->in_reset_recovery = true; - iavf_dev_xstats_reset(dev); + ret = iavf_dev_reset(dev); + if (ret) + goto error; - /* start the device */ - ret = iavf_dev_start(dev); - if (ret) - goto error; - dev->data->dev_started = 1; + /* VF states restore */ + ret = iavf_dev_configure(dev); + if (ret) + goto error; - vf->in_reset_recovery = false; - return 0; + iavf_dev_xstats_reset(dev); + + /* start the device */ + ret = iavf_dev_start(dev); + if (ret) + goto error; + + /* Restore queue parameters */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct iavf_rx_queue *rxq = dev->data->rx_queues[i]; + rxq->vsi = &vf->vsi; + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct iavf_tx_queue *txq = dev->data->tx_queues[i]; + txq->vsi = &vf->vsi; + } + dev->data->dev_started = 1; error: - PMD_DRV_LOG(DEBUG, "RESET recover with error code=%d\n", ret); - vf->in_reset_recovery = false; - return ret; + vf->in_reset_recovery = false; + } + + return -1; +} + +int +iavf_reset_enqueue(struct rte_eth_dev *dev) +{ + int first_entry; + + pthread_mutex_lock(&reset_info.lock); + if ((reset_info.tail + 1) % IAVF_MAX_VF_COUNT == reset_info.head) { + pthread_mutex_unlock(&reset_info.lock); + return -1; + } + first_entry = reset_info.pending == 0; + reset_info.dev[reset_info.tail] = dev; + reset_info.tail = (reset_info.tail + 1) % IAVF_MAX_VF_COUNT; + reset_info.pending++; + if (first_entry) { + PMD_DRV_LOG(DEBUG, "Create reset thread !!!\n"); + if (rte_thread_create_internal_control(&reset_info.reset_tid, + "iavf-reset-thread", iavf_hw_reset_handler, NULL)) { + pthread_mutex_unlock(&reset_info.lock); + PMD_DRV_LOG(ERR, "Fail to kick off iavf-reset-thread\n"); + return -1; + } + rte_thread_detach(reset_info.reset_tid); + } + pthread_mutex_unlock(&reset_info.lock); + + return 0; } static int diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c index 0a3e1d082c..cccc9a57e3 100644 --- a/drivers/net/iavf/iavf_vchnl.c +++ b/drivers/net/iavf/iavf_vchnl.c @@ -80,14 +80,6 @@ iavf_dev_event_handle(void *param __rte_unused) TAILQ_FOREACH_SAFE(pos, &pending, next, save_next) { TAILQ_REMOVE(&pending, pos, next); - struct iavf_adapter *adapter = pos->dev->data->dev_private; - if (pos->event == RTE_ETH_EVENT_INTR_RESET && - adapter->devargs.auto_reset) { - iavf_handle_hw_reset(pos->dev); - rte_free(pos); - continue; - } - rte_eth_dev_callback_process(pos->dev, pos->event, pos->param); rte_free(pos); } @@ -462,8 +454,9 @@ iavf_handle_pf_event_msg(struct rte_eth_dev *dev, uint8_t *msg, vf->link_up = false; if (!vf->vf_reset) { vf->vf_reset = true; - iavf_dev_event_post(dev, RTE_ETH_EVENT_INTR_RESET, - NULL, 0); + adapter->devargs.auto_reset ? + iavf_reset_enqueue(dev) : + iavf_dev_event_post(dev, RTE_ETH_EVENT_INTR_RESET, NULL, 0); } break; case VIRTCHNL_EVENT_LINK_CHANGE: -- 2.34.1