From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id 037421B52A for ; Fri, 29 Jun 2018 12:27:09 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 29 Jun 2018 03:27:09 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.51,285,1526367600"; d="scan'208";a="241324451" Received: from jeffguo-z170x-ud5.sh.intel.com (HELO localhost.localdomain) ([10.67.104.10]) by fmsmga005.fm.intel.com with ESMTP; 29 Jun 2018 03:27:06 -0700 From: Jeff Guo To: stephen@networkplumber.org, bruce.richardson@intel.com, ferruh.yigit@intel.com, konstantin.ananyev@intel.com, gaetan.rivet@6wind.com, jingjing.wu@intel.com, thomas@monjalon.net, motih@mellanox.com, matan@mellanox.com, harry.van.haaren@intel.com, qi.z.zhang@intel.com, shaopeng.he@intel.com, bernard.iremonger@intel.com Cc: jblunck@infradead.org, shreyansh.jain@nxp.com, dev@dpdk.org, jia.guo@intel.com, helin.zhang@intel.com Date: Fri, 29 Jun 2018 18:24:28 +0800 Message-Id: <1530267871-7161-7-git-send-email-jia.guo@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1530267871-7161-1-git-send-email-jia.guo@intel.com> References: <1530267871-7161-1-git-send-email-jia.guo@intel.com> Subject: [dpdk-dev] [PATCH V4 6/9] eal: add failure handle mechanism for hot plug X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 29 Jun 2018 10:27:10 -0000 This patch introduces a failure handler mechanism to handle device hot plug removal event. First register sigbus handler, once sigbus error be captured, will check the failure address and accordingly remap the invalid memory for the corresponding device. Bese on this mechanism, it could guaranty the application not to be crash when hot unplug devices. Signed-off-by: Jeff Guo --- v4->v3: split patches to be small and clear. --- lib/librte_eal/linuxapp/eal/eal_dev.c | 88 ++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c index 1cf6aeb..c9dddab 100644 --- a/lib/librte_eal/linuxapp/eal/eal_dev.c +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c @@ -4,6 +4,8 @@ #include #include +#include +#include #include #include @@ -14,15 +16,24 @@ #include #include #include +#include +#include +#include +#include #include "eal_private.h" static struct rte_intr_handle intr_handle = {.fd = -1 }; static bool monitor_started; +extern struct rte_bus_list rte_bus_list; + #define EAL_UEV_MSG_LEN 4096 #define EAL_UEV_MSG_ELEM_LEN 128 +/* spinlock for device failure process */ +static rte_spinlock_t dev_failure_lock = RTE_SPINLOCK_INITIALIZER; + static void dev_uev_handler(__rte_unused void *param); /* identify the system layer which reports this event. */ @@ -33,6 +44,34 @@ enum eal_dev_event_subsystem { EAL_DEV_EVENT_SUBSYSTEM_MAX }; +static void sigbus_handler(int signum __rte_unused, siginfo_t *info, + void *ctx __rte_unused) +{ + int ret; + + RTE_LOG(DEBUG, EAL, "Thread[%d] catch SIGBUS, fault address:%p\n", + (int)pthread_self(), info->si_addr); + + rte_spinlock_lock(&dev_failure_lock); + ret = rte_bus_sigbus_handler(info->si_addr); + rte_spinlock_unlock(&dev_failure_lock); + if (!ret) + RTE_LOG(INFO, EAL, + "Success to handle SIGBUS error for hotplug!\n"); + else + rte_exit(EXIT_FAILURE, + "A generic SIGBUS error, (rte_errno: %s)!", + strerror(rte_errno)); +} + +static int cmp_dev_name(const struct rte_device *dev, + const void *_name) +{ + const char *name = _name; + + return strcmp(dev->name, name); +} + static int dev_uev_socket_fd_create(void) { @@ -147,6 +186,9 @@ dev_uev_handler(__rte_unused void *param) struct rte_dev_event uevent; int ret; char buf[EAL_UEV_MSG_LEN]; + struct rte_bus *bus; + struct rte_device *dev; + const char *busname; memset(&uevent, 0, sizeof(struct rte_dev_event)); memset(buf, 0, EAL_UEV_MSG_LEN); @@ -171,13 +213,48 @@ dev_uev_handler(__rte_unused void *param) RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n", uevent.devname, uevent.type, uevent.subsystem); - if (uevent.devname) + switch (uevent.subsystem) { + case EAL_DEV_EVENT_SUBSYSTEM_PCI: + case EAL_DEV_EVENT_SUBSYSTEM_UIO: + busname = "pci"; + break; + default: + break; + } + + if (uevent.devname) { + if (uevent.type == RTE_DEV_EVENT_REMOVE) { + bus = rte_bus_find_by_name(busname); + if (bus == NULL) { + RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", + busname); + return; + } + dev = bus->find_device(NULL, cmp_dev_name, + uevent.devname); + if (dev == NULL) { + RTE_LOG(ERR, EAL, "Cannot find device (%s) on " + "bus (%s)\n", uevent.devname, busname); + return; + } + rte_spinlock_lock(&dev_failure_lock); + ret = bus->hotplug_handler(dev); + rte_spinlock_unlock(&dev_failure_lock); + if (ret) { + RTE_LOG(ERR, EAL, "Can not handle hotplug for " + "device (%s)\n", dev->name); + return; + } + } dev_callback_process(uevent.devname, uevent.type); + } } int __rte_experimental rte_dev_event_monitor_start(void) { + sigset_t mask; + struct sigaction action; int ret; if (monitor_started) @@ -197,6 +274,14 @@ rte_dev_event_monitor_start(void) return -1; } + /* register sigbus handler */ + sigemptyset(&mask); + sigaddset(&mask, SIGBUS); + action.sa_flags = SA_SIGINFO; + action.sa_mask = mask; + action.sa_sigaction = sigbus_handler; + sigaction(SIGBUS, &action, NULL); + monitor_started = true; return 0; @@ -220,5 +305,6 @@ rte_dev_event_monitor_stop(void) close(intr_handle.fd); intr_handle.fd = -1; monitor_started = false; + return 0; } -- 2.7.4